====================================================================================
#Cell 1:Load data
#Step 1: All necessary data
#Load raw revenue tabe
competitors_spark_df = spark.table("finmgmtcourse.finmgmtdata.competitors")
competitors = competitors_spark_df.toPandas()

========================================================================================

#Cell 2: Extract XYZ data
import pandas as pd
revenue_spark_df = spark.table("finmgmtcourse.finmgmtdata.revenue")
revenue = revenue_spark_df.toPandas()

revenue["Month"] = pd.to_datetime(revenue["Month"], format="%m/%d/%Y")
revenue_last_year  = revenue[revenue['Month'] > '2024-08-01']
tot_revenue = revenue_last_year['Revenue'].sum()

cost_spark_df = spark.table("finmgmtcourse.finmgmtdata.cost_summary")
cost = cost_spark_df.toPandas()

cost["Month"] = pd.to_datetime(cost["Month"], format="%m/%d/%Y")
cost_last_year  = cost[cost['Month'] > '2024-08-01']
tot_revenue = revenue_last_year['Revenue'].sum()
tot_cost = cost_last_year['TotCost'].sum()

print(tot_revenue)
print(tot_cost)


====================================================================================
#Cell 3: Estimate median multiples

median_PE = competitors['P/E'].median()
print(median_PE)
median_PS = competitors['P/S'].median()
print(median_PS)


====================================================================================
#Cell 4: Estimate comparable group median multiples
median_group_PE = competitors.groupby('Revenue group')['P/E'].median()
median_group_PS = competitors.groupby('Revenue group')['P/S'].median()
print(median_group_PE)
print(median_group_PS)


====================================================================================


#Cell 5 Regression model
import statsmodels.api as sm
import numpy as np

y = np.log(competitors['Market Cap'])
x = competitors[['EBIT Margin','Net Margin']] 
model = sm.OLS(y, x).fit()
coef_df = model.params.to_frame(name='coefficient')
print(coef_df)
display(model.summary())