====================================================================================================================================================== #Cell 1: #Step 1: All necessary data #Load raw revenue tabe revenue_spark_df = spark.table("finmgmtcourse.finmgmtdata.revenue") revenue_raw = revenue_spark_df.toPandas() #Load client mapping table clients_spark_df = spark.table("finmgmtcourse.finmgmtdata.client_mapping") client_mapping = clients_spark_df.toPandas() #Load product mapping product_spark_df = spark.table("finmgmtcourse.finmgmtdata.product_mapping") product_mapping = product_spark_df.toPandas() #Load business segment structure segment_spark_df = spark.table("finmgmtcourse.finmgmtdata.business_segment_mapping") segment_mapping = segment_spark_df.toPandas() #Load forecast data rev_fcst_spark_df = spark.table("finmgmtcourse.finmgmtdata.revenue_forecast") revenue_forecast_raw = rev_fcst_spark_df.toPandas() ====================================================================================================================================================== #Cell 2: #Step 2 Perform data transformations import pandas as pd #Add client structure revenue_table = pd.merge(revenue_raw, client_mapping, left_on='Client', right_on='Customer_code', how='left') #Add product mapping revenue_table = pd.merge(revenue_table, product_mapping, left_on='Product', right_on='Prd_Leaf_Code', how='left') revenue_forecast = pd.merge(revenue_forecast_raw, product_mapping, left_on='Product_code', right_on='Prd_Leaf_Code', how='left') #Add business segment mapping revenue_table = pd.merge(revenue_table, segment_mapping, left_on='Segment', right_on='Seg_Leaf_Code', how='left') revenue_forecast = pd.merge(revenue_forecast, segment_mapping, left_on='Business_segment', right_on='Seg_Leaf_Code', how='left') #Drop unnecessary columns revenue_table = revenue_table.drop(["Customer_code", "Product", "Segment", 'Client', "Prd_Lvl1_Code", "Prd_Lvl2_Code", "Prd_Leaf_Code", "Seg_Leaf_Code", "Seg_Lvl1_Code"], axis=1) revenue_forecast = revenue_forecast.drop(["Product_code", "Business_segment", "Prd_Lvl1_Code", "Prd_Lvl2_Code", "Prd_Leaf_Code", "Seg_Leaf_Code", "Seg_Lvl1_Code"], axis=1) #Transform 'Month' column to a date format revenue_table["Month"] = pd.to_datetime(revenue_table["Month"], format="%m/%d/%Y") revenue_forecast["Month"] = pd.to_datetime(revenue_forecast["Month"], format="%m/%d/%Y") ====================================================================================================================================================== #Cell 3: # Task 1: Filter only relevant data for Production A revenue_prodA = revenue_table[revenue_table["Seg_Leaf_Desc"] == "Production A"] revenue_fcst_prodA = revenue_forecast[revenue_forecast["Seg_Leaf_Desc"] == "Production A"] # Task 2: Find total revenue and total quantity for latest month and also total forecast for latest month latest_month = revenue_prodA["Month"].max() # creates filter for latest month latest_month_revenue = revenue_prodA[revenue_prodA["Month"] == latest_month] #filters the data target_date = pd.to_datetime("2025-08-01") latest_month_forecast = revenue_fcst_prodA[revenue_fcst_prodA["Month"] == latest_month] #filters the data lm_total_revenue = latest_month_revenue["Revenue"].sum() #estimates the total lm_total_forecast = latest_month_forecast["Forecast"].sum() #estimates the total lm_total_quantity = latest_month_revenue["Q"].sum() lm_var_actual_vs_forecast = lm_total_revenue - lm_total_forecast print("Total rev: ",lm_total_revenue/1000000) #display results in millions print("Total fcst: ",lm_total_forecast/1000000) print("Total Q: ",lm_total_quantity) print("Var vs Fcst: ", lm_var_actual_vs_forecast/1000000) ====================================================================================================================================================== #Cell 4: # Task 3: Compute MoM growth # 🔹 Derive previous month and same month last year previous_month = (latest_month - pd.DateOffset(months=1)).replace(day=1) same_month_prev_year = (latest_month - pd.DateOffset(years=1)).replace(day=1) # 🔹 Helper function to get revenue for a given month and segment def get_monthly_revenue(df, month): return revenue_prodA.loc[(df['Month'] == month) , 'Revenue'].sum() # 🔹 Compute revenues revenue_current = get_monthly_revenue(revenue_prodA, latest_month, ) revenue_prev_month = get_monthly_revenue(revenue_prodA, previous_month, ) revenue_prev_year = get_monthly_revenue(revenue_prodA, same_month_prev_year) # 🔹 Convert to billions rev_cur_bn = revenue_current / 1_000_000 rev_prev_m_bn = revenue_prev_month / 1_000_000 rev_prev_y_bn = revenue_prev_year / 1_000_000 # 🔹 Calculate growth rates mom_growth = ((revenue_current - revenue_prev_month) / revenue_prev_month * 100) if revenue_prev_month else None yoy_growth = ((revenue_current - revenue_prev_year) / revenue_prev_year * 100) if revenue_prev_year else None # 🔹 Print results print(f"Revenue trend :") print(f" Current month ({latest_month.strftime('%B %Y')}): {rev_cur_bn:.2f} mln") print(f" Previous month ({previous_month.strftime('%B %Y')}): {rev_prev_m_bn:.2f} mln") print(f" Same month last year ({same_month_prev_year.strftime('%B %Y')}): {rev_prev_y_bn:.2f} mln") print() print("Growth rates:") if mom_growth is not None: print(f" • MoM growth: {mom_growth:+.2f}%") else: print(" • MoM growth: N/A (no previous month data)") if yoy_growth is not None: print(f" • YoY growth: {yoy_growth:+.2f}%") else: print(" • YoY growth: N/A (no previous year data)") ====================================================================================================================================================== #Cell 5: # Task 4: Estimate YTD performance vs forecast import pandas as pd # Ensure 'month' column is datetime revenue_prodA['Month'] = pd.to_datetime(revenue_prodA['Month']) # Define target year and segment target_year = 2025 # Find the latest month available in the data latest_month = revenue_prodA['Month'].max() # Filter for YTD (revenue) ytd_revenue = revenue_prodA[ (revenue_prodA['Month'].dt.year == target_year) & (revenue_prodA['Month'] <= latest_month) ] # Filter for YTD (forecast) ytd_forecast = revenue_fcst_prodA[ (revenue_fcst_prodA['Month'].dt.year == target_year) & (revenue_fcst_prodA['Month'] <= latest_month) ] # Calculate YTD totals ytd_total_revenue = ytd_revenue['Revenue'].sum() ytd_total_forecast = ytd_forecast['Forecast'].sum() # Convert to billions ytd_revenue_billion = ytd_total_revenue / 1_000_000_000 ytd_forecast_billion = ytd_total_forecast / 1_000_000_000 # 🔹 Print formatted output print(f"YTD ({target_year}) up to {latest_month.strftime('%B %Y')} :") print(f" • Revenue: {ytd_revenue_billion:.2f} Bn") print(f" • Forecast: {ytd_forecast_billion:.2f} Bn") # (Optional) Variance and achievement % ytd_variance = ytd_total_revenue - ytd_total_forecast progress = (ytd_total_revenue / ytd_total_forecast * 100) if ytd_total_forecast else 0 print(f" • Variance: {ytd_variance/1_000_000_000:.2f} Bn") print(f" • % of Forecast Achieved: {progress:.1f}%") ====================================================================================================================================================== #Cell 6: # Task 5: Estimate YTD growth import pandas as pd # 🔹 Define segment and target year target_year = 2025 # 🔹 Define YTD periods (same cutoff month for both years) current_ytd_end = latest_month previous_ytd_end = current_ytd_end - pd.DateOffset(years=1) # 🔹 Filter for YTD current year ytd_current = revenue_prodA[ (revenue_prodA['Month'].dt.year == target_year) & (revenue_prodA['Month'] <= current_ytd_end) ] # 🔹 Filter for YTD previous year (same cutoff month) ytd_previous = revenue_prodA[ (revenue_prodA['Month'].dt.year == target_year - 1) & (revenue_prodA['Month'] <= previous_ytd_end) ] # 🔹 Calculate YTD revenues ytd_revenue_current = ytd_current['Revenue'].sum() ytd_revenue_previous = ytd_previous['Revenue'].sum() # 🔹 Convert to billions ytd_rev_cur_bn = ytd_revenue_current / 1_000_000_000 ytd_rev_prev_bn = ytd_revenue_previous / 1_000_000_000 # 🔹 Calculate YTD growth % ytd_growth = ((ytd_revenue_current - ytd_revenue_previous) / ytd_revenue_previous * 100) if ytd_revenue_previous else None # 🔹 Print results print(f" • Current YTD ({target_year} up to {current_ytd_end.strftime('%B %Y')}): {ytd_rev_cur_bn:.2f} Bn") print(f" • Previous YTD ({target_year - 1} up to {previous_ytd_end.strftime('%B %Y')}): {ytd_rev_prev_bn:.2f} Bn") print() if ytd_growth is not None: print(f" → YTD YoY Growth: {ytd_growth:+.2f}%") else: print(" → YTD YoY Growth: N/A (no previous year data)") ====================================================================================================================================================== #Cell 7: # Task 6: Compute trend vs forecast import pandas as pd import matplotlib.pyplot as plt # 🔹 Aggregate monthly totals (if needed) rev_monthly = revenue_prodA.groupby('Month', as_index=False)['Revenue'].sum() fc_monthly = revenue_fcst_prodA.groupby('Month', as_index=False)['Forecast'].sum() # 🔹 Merge both datasets merged = pd.merge(fc_monthly, rev_monthly, on='Month', how='inner').sort_values('Month') # 🔹 Filter only the target year this_year = merged[merged['Month'].dt.year == target_year].copy() # 🔹 Convert to millions this_year['forecast_m'] = this_year['Forecast'] / 1_000_000 this_year['revenue_m'] = this_year['Revenue'] / 1_000_000 # 🔹 Plot fig, ax1 = plt.subplots(figsize=(10, 5)) # Bars = forecast ax1.bar(this_year['Month'].dt.strftime('%b'), this_year['forecast_m'], color='#AB1E9A', label='Forecast') # Line = revenue ax1.plot(this_year['Month'].dt.strftime('%b'), this_year['revenue_m'], color='black', marker='o', linewidth=2, label='Revenue') # Labels & title ax1.set_title(f'Monthly Forecast vs Revenue ({target_year})', fontsize=14, weight='bold') ax1.set_xlabel('Month', fontsize=12) ax1.set_ylabel('Value (Millions)', fontsize=12) ax1.legend() # Add value labels for revenue points for x, y in zip(this_year['Month'].dt.strftime('%b'), this_year['revenue_m']): plt.text(x, y, f'{y:.1f}', ha='center', va='bottom', fontsize=9) plt.tight_layout() plt.show() ====================================================================================================================================================== #Cell 8: # Task 7: Variance vs forecast by product import pandas as pd import matplotlib.pyplot as plt # 🔹 Filter both DataFrames for the latest month rev_latest = revenue_prodA[revenue_prodA['Month'] == latest_month] fc_latest = revenue_fcst_prodA[revenue_fcst_prodA['Month'] == latest_month] # 🔹 Aggregate by product rev_by_product = rev_latest.groupby('Prd_Leaf_Desc', as_index=False)['Revenue'].sum() fc_by_product = fc_latest.groupby('Prd_Leaf_Desc', as_index=False)['Forecast'].sum() # 🔹 Merge and compute variance % summary = pd.merge(fc_by_product, rev_by_product, on='Prd_Leaf_Desc', how='outer').fillna(0) summary['variance_pct'] = ((summary['Revenue'] - summary['Forecast']) / summary['Forecast'] * 100).replace([float('inf'), -float('inf')], 0) # 🔹 Sort by variance % summary = summary.sort_values('variance_pct', ascending=True) # 🔹 Plot plt.figure(figsize=(10, 6)) bars = plt.barh(summary['Prd_Leaf_Desc'], summary['variance_pct'], color='#AB1E9A') # Add reference line at 0% plt.axvline(0, color='black', linewidth=1) # Add value labels for bar in bars: x = bar.get_width() y = bar.get_y() + bar.get_height() / 2 plt.text(x + (1 if x >= 0 else -1), y, f'{x:+.1f}%', va='center', ha='left' if x >= 0 else 'right', fontsize=9) # Titles and labels plt.title(f'Revenue vs Forecast Variance by Product – {latest_month.strftime("%B %Y")}', fontsize=14, weight='bold') plt.xlabel('Variance (%)', fontsize=12) plt.ylabel('Product', fontsize=12) plt.tight_layout() plt.show() ====================================================================================================================================================== #Cell 9: # Task 8: Show revenue by product group & industry import numpy as np rev_latest = revenue_prodA[revenue_prodA['Month'] == latest_month] revenue_by_nested = rev_latest.groupby(["Prd_Lvl2_Desc", "Industry"])["Revenue"].sum().reset_index() pivot=revenue_by_nested.pivot(index='Industry', columns='Prd_Lvl2_Desc', values='Revenue') # Replace NaNs with 0 pivot = pivot.fillna(0) import matplotlib.pyplot as plt import seaborn as sns # Replace NaNs with 0 and convert to millions pivot = pivot.fillna(0) / 1_000_000 # Plot heatmap plt.figure(figsize=(6, 4)) sns.heatmap(pivot, annot=True, fmt=".1f", cmap="YlGnBu", cbar_kws={'label': 'Value (M)'}) plt.title("Pivot Table Heatmap in Millions") plt.show()