Pareto e2e allocator (#1142)

* fixed visualizer instantiation * fixed clustering and minor pareto things * added utils to implement pareto result logic after clustering * Added baseliner function and changed logic in generate_waterfall * Corrected generate_bootstrap_confidence to handle some edge cases * Updated notebooks * cleared outputs from notebooks * added budget_allocator to pareto e2e * removed unused parameter in budget allocator * added one pager to clustering notebook * added clustering and allocator integration into e2e notebook --------- Co-authored-by: Dhaval Patel <[email protected]>
facebookexperimental · Nov 16, 2024 · 997c7cd · 997c7cd
1 parent d2452e8
commit 997c7cd
Show file tree

Hide file tree

Showing 8 changed files with 423 additions and 44,551 deletions.
diff --git a/python/src/robyn/allocator/budget_allocator.py b/python/src/robyn/allocator/budget_allocator.py
@@ -24,18 +24,16 @@ def __init__(
         self,
         mmm_data: MMMData,
         featurized_mmm_data: FeaturizedMMMData,
-        model_outputs: ModelOutputs,
         pareto_result: ParetoResult,
         select_model: str,
     ):
         """Initialize the BudgetAllocator."""
         logger.info("Initializing BudgetAllocator")
-        logger.debug("Input parameters: mmm_data=%s, model_outputs=%s, pareto_result=%s, select_model=%s",
-                    mmm_data, model_outputs, pareto_result, select_model)
+        logger.debug("Input parameters: mmm_data=%s, pareto_result=%s, select_model=%s",
+                    mmm_data, pareto_result, select_model)
 
         self.mmm_data = mmm_data
         self.featurized_mmm_data = featurized_mmm_data
-        self.model_outputs = model_outputs
         self.pareto_result = pareto_result
         self.select_model = select_model
 
@@ -138,8 +136,7 @@ def _calculate_initial_metrics(
         logger.debug("Calculating initial metrics with date_range=%s, total_budget=%s", date_range, total_budget)
 
         try:
-            dt_mod = self.mmm_data.data
-            hist_spend = dt_mod.loc[date_range.start_index : date_range.end_index, media_spend_sorted]
+            hist_spend = self.featurized_mmm_data.dt_mod.loc[date_range.start_index : date_range.end_index, media_spend_sorted]
 
             logger.debug("Historical spend statistics: total=%s, mean=%s", 
                         hist_spend.sum(), hist_spend.mean())

diff --git a/python/src/robyn/robyn.py b/python/src/robyn/robyn.py
@@ -265,7 +265,6 @@ def budget_allocator(
         allocator = BudgetAllocator(
             mmm_data=self.mmm_data,
             featurized_mmm_data=self.featurized_mmm_data,
-            model_outputs=self.model_outputs,
             pareto_result=self.pareto_result,
             select_model=select_model,
         )

diff --git a/python/src/robyn/tutorials/e2e_test/tutorial4_pareto_e2e_test.ipynb b/python/src/robyn/tutorials/e2e_test/tutorial4_pareto_e2e_test.ipynb
diff --git a/python/src/robyn/tutorials/e2e_test/tutorial4_pareto_e2e_test_with_allocator.ipynb b/python/src/robyn/tutorials/e2e_test/tutorial4_pareto_e2e_test_with_allocator.ipynb
diff --git a/python/src/robyn/tutorials/tutorial1_src.ipynb b/python/src/robyn/tutorials/tutorial1_src.ipynb
diff --git a/python/src/robyn/tutorials/tutorial4_pareto.ipynb b/python/src/robyn/tutorials/tutorial4_pareto.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -30,10 +30,10 @@
    "source": [
     "# Load data from JSON\n",
     "inputCollect = load_data_from_json(\n",
-    "    \"/Users/funny/Documents/git/Robyn/python/src/robyn/tutorials/resources/InputCollect.json\"\n",
+    "    \"/Users/funny/Documents/git/Robyn/python/src/robyn/tutorials/resources/Pareto_InputCollect.json\"\n",
     ")\n",
     "outputModel = load_data_from_json(\n",
-    "    \"/Users/funny/Documents/git/Robyn/python/src/robyn/tutorials/resources/OutputModels.json\"\n",
+    "    \"/Users/funny/Documents/git/Robyn/python/src/robyn/tutorials/resources/Pareto_OutputModels.json\"\n",
     ")\n",
     "input_collect = import_input_collect(inputCollect)\n",
     "model_outputs = import_output_models(outputModel)\n",
@@ -103,8 +103,8 @@
     "print(\"\\nCarryover percentage all:\")\n",
     "print(pareto_result.df_caov_pct_all.shape, pareto_result.df_caov_pct_all)\n",
     "print(\"\\Plot Data Collected\")\n",
-    "print(\"NUMBER OF PLOTS Data collected for:\", len(pareto_result.plot_data_collect[\"3_206_6\"]))\n",
-    "print(\"Plot data for solid 3_206_6\", pareto_result.plot_data_collect[\"3_206_6\"])\n",
+    "# print(\"NUMBER OF PLOTS Data collected for:\", len(pareto_result.plot_data_collect[\"3_206_6\"]))\n",
+    "# print(\"Plot data for solid 3_206_6\", pareto_result.plot_data_collect[\"3_206_6\"])\n",
     "\n",
     "# 6. Validate logic\n",
     "assert pareto_result.pareto_fronts == \"auto\" or isinstance(\n",
@@ -122,7 +122,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "print(pareto_result.x_decomp_agg[pareto_result.x_decomp_agg[\"sol_id\"] == '3_206_6'])"
+    "print(pareto_result.x_decomp_agg[pareto_result.x_decomp_agg[\"sol_id\"] == '5_221_9'])"
    ]
   },
   {
@@ -145,7 +145,9 @@
     "with open(\"mmmdata.pkl\", \"wb\") as f:\n",
     "    pickle.dump(mmm_data, f)\n",
     "with open(\"holidays_data.pkl\", \"wb\") as f:\n",
-    "    pickle.dump(holidays_data, f)"
+    "    pickle.dump(holidays_data, f)\n",
+    "with open(\"featurized_mmm.pkl\", \"wb\") as f:\n",
+    "    pickle.dump(featurized_mmm_data, f)"
    ]
   }
  ],

diff --git a/python/src/robyn/tutorials/tutorial6_allocator.ipynb b/python/src/robyn/tutorials/tutorial6_allocator.ipynb
@@ -248,7 +248,6 @@
     "allocator = BudgetAllocator(\n",
     "    mmm_data=mmm_data,\n",
     "    featurized_mmm_data=featurized_mmm_data,\n",
-    "    model_outputs=model_outputs,\n",
     "    pareto_result=r_output_collect[\"pareto_result\"],  # Get ParetoResult from import_output_collect()\n",
     "    select_model=select_model,\n",
     ")"

diff --git a/python/src/robyn/tutorials/tutorial7_clustering.ipynb b/python/src/robyn/tutorials/tutorial7_clustering.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -21,7 +21,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -110,6 +110,15 @@
     "\"\\nsim_n: \", cluster_results.cluster_ci.sim_n)\n"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(cluster_results.cluster_ci.cluster_confidence_interval_df)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -121,7 +130,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [