==================================================================================== #Cell 1:Load data #Step 1: All necessary data #Load raw revenue tabe competitors_spark_df = spark.table("finmgmtcourse.finmgmtdata.competitors") competitors = competitors_spark_df.toPandas() ======================================================================================== #Cell 2: Extract XYZ data import pandas as pd revenue_spark_df = spark.table("finmgmtcourse.finmgmtdata.revenue") revenue = revenue_spark_df.toPandas() revenue["Month"] = pd.to_datetime(revenue["Month"], format="%m/%d/%Y") revenue_last_year = revenue[revenue['Month'] > '2024-08-01'] tot_revenue = revenue_last_year['Revenue'].sum() cost_spark_df = spark.table("finmgmtcourse.finmgmtdata.cost_summary") cost = cost_spark_df.toPandas() cost["Month"] = pd.to_datetime(cost["Month"], format="%m/%d/%Y") cost_last_year = cost[cost['Month'] > '2024-08-01'] tot_revenue = revenue_last_year['Revenue'].sum() tot_cost = cost_last_year['TotCost'].sum() print(tot_revenue) print(tot_cost) ==================================================================================== #Cell 3: Estimate median multiples median_PE = competitors['P/E'].median() print(median_PE) median_PS = competitors['P/S'].median() print(median_PS) ==================================================================================== #Cell 4: Estimate comparable group median multiples median_group_PE = competitors.groupby('Revenue group')['P/E'].median() median_group_PS = competitors.groupby('Revenue group')['P/S'].median() print(median_group_PE) print(median_group_PS) ==================================================================================== #Cell 5 Regression model import statsmodels.api as sm import numpy as np y = np.log(competitors['Market Cap']) x = competitors[['EBIT Margin','Net Margin']] model = sm.OLS(y, x).fit() coef_df = model.params.to_frame(name='coefficient') print(coef_df) display(model.summary())