161161 "execution_count" : null ,
162162 "metadata" : {},
163163 "outputs" : [],
164- "source" : " # Fit TROP with automatic tuning via LOOCV\n trop_est = TROP(\n lambda_time_grid=[0.0, 1.0], # Reduced time decay grid\n lambda_unit_grid=[0.0, 1.0], # Reduced unit distance grid \n lambda_nn_grid=[0.0, 0.1], # Reduced nuclear norm grid\n n_bootstrap=50, # Reduced bootstrap replications for SE\n seed=42\n )\n\n # Note: TROP infers treatment periods from the treatment indicator column.\n # The 'treated' column should be an absorbing state (D=1 for all periods\n # during and after treatment starts).\n\n # For SDID comparison later, we keep post_periods for SDID\n post_periods = list(range(n_pre, n_pre + n_post))\n\n results = trop_est.fit(\n df,\n outcome='outcome',\n treatment='treated',\n unit='unit',\n time='period'\n )\n\n print(results.summary())"
164+ "source" : [
165+ " # Fit TROP with automatic tuning via LOOCV\n " ,
166+ " trop_est = TROP(\n " ,
167+ " lambda_time_grid=[0.0, 1.0], # Reduced time decay grid\n " ,
168+ " lambda_unit_grid=[0.0, 1.0], # Reduced unit distance grid \n " ,
169+ " lambda_nn_grid=[0.0, 0.1], # Reduced nuclear norm grid\n " ,
170+ " n_bootstrap=50, # Reduced bootstrap replications for SE\n " ,
171+ " seed=42\n " ,
172+ " )\n " ,
173+ " \n " ,
174+ " # Note: TROP infers treatment periods from the treatment indicator column.\n " ,
175+ " # The 'treated' column should be an absorbing state (D=1 for all periods\n " ,
176+ " # during and after treatment starts).\n " ,
177+ " \n " ,
178+ " # For SDID comparison later, we keep post_periods for SDID\n " ,
179+ " post_periods = list(range(n_pre, n_pre + n_post))\n " ,
180+ " \n " ,
181+ " results = trop_est.fit(\n " ,
182+ " df,\n " ,
183+ " outcome='outcome',\n " ,
184+ " treatment='treated',\n " ,
185+ " unit='unit',\n " ,
186+ " time='period'\n " ,
187+ " )\n " ,
188+ " \n " ,
189+ " print(results.summary())"
190+ ]
165191 },
166192 {
167193 "cell_type" : " code" ,
217243 "execution_count" : null ,
218244 "metadata" : {},
219245 "outputs" : [],
220- "source" : " # Effect of different nuclear norm regularization levels\n print(\" Effect of nuclear norm regularization (λ_nn):\" )\n print(\" =\" *65)\n print(f\" {'λ_nn':>10} {'ATT':>12} {'Bias':>12} {'Eff. Rank':>15}\" )\n print(\" -\" *65)\n\n for lambda_nn in [0.0, 0.1, 1.0]: # Reduced grid\n trop_fixed = TROP(\n lambda_time_grid=[1.0], # Fixed\n lambda_unit_grid=[1.0], # Fixed\n lambda_nn_grid=[lambda_nn], # Vary this\n n_bootstrap=20, # Reduced for faster execution\n seed=42\n )\n \n res = trop_fixed.fit(\n df,\n outcome='outcome',\n treatment='treated',\n unit='unit',\n time='period'\n )\n \n bias = res.att - true_att\n print(f\" {lambda_nn:>10.1f} {res.att:>12.4f} {bias:>12.4f} {res.effective_rank:>15.2f}\" )"
246+ "source" : [
247+ " # Effect of different nuclear norm regularization levels\n " ,
248+ " print(\" Effect of nuclear norm regularization (λ_nn):\" )\n " ,
249+ " print(\" =\" *65)\n " ,
250+ " print(f\" {'λ_nn':>10} {'ATT':>12} {'Bias':>12} {'Eff. Rank':>15}\" )\n " ,
251+ " print(\" -\" *65)\n " ,
252+ " \n " ,
253+ " for lambda_nn in [0.0, 0.1, 1.0]: # Reduced grid\n " ,
254+ " trop_fixed = TROP(\n " ,
255+ " lambda_time_grid=[1.0], # Fixed\n " ,
256+ " lambda_unit_grid=[1.0], # Fixed\n " ,
257+ " lambda_nn_grid=[lambda_nn], # Vary this\n " ,
258+ " n_bootstrap=20, # Reduced for faster execution\n " ,
259+ " seed=42\n " ,
260+ " )\n " ,
261+ " \n " ,
262+ " res = trop_fixed.fit(\n " ,
263+ " df,\n " ,
264+ " outcome='outcome',\n " ,
265+ " treatment='treated',\n " ,
266+ " unit='unit',\n " ,
267+ " time='period'\n " ,
268+ " )\n " ,
269+ " \n " ,
270+ " bias = res.att - true_att\n " ,
271+ " print(f\" {lambda_nn:>10.1f} {res.att:>12.4f} {bias:>12.4f} {res.effective_rank:>15.2f}\" )"
272+ ]
221273 },
222274 {
223275 "cell_type" : " markdown" ,
353405 "execution_count" : null ,
354406 "metadata" : {},
355407 "outputs" : [],
356- "source": "# SDID (no factor adjustment)\n# Note: SDID uses 'treat' (unit-level ever-treated indicator)\nsdid = SyntheticDiD(\n n_bootstrap=50, # Reduced for faster execution\n seed=42\n)\n\n# SDID still uses post_periods parameter\nsdid_results = sdid.fit(\n df,\n outcome='outcome',\n treatment='treat', # Unit-level ever-treated indicator\n unit='unit',\n time='period',\n post_periods=post_periods\n)\n\n# TROP (with factor adjustment)\n# Note: TROP uses 'treated' (observation-level treatment indicator)\n# and infers treatment periods automatically\ntrop_est2 = TROP(\n lambda_nn_grid=[0.0, 0.1], # Reduced grid for faster execution\n n_bootstrap=50, # Reduced for faster execution\n seed=42\n)\n\ntrop_results = trop_est2.fit(\n df,\n outcome='outcome',\n treatment='treated', # Observation-level indicator\n unit='unit',\n time='period'\n)\n\nprint(\"Comparison: SDID vs TROP\")\nprint(\"=\"*60)\nprint(f\"True ATT: {true_att:.4f}\")\nprint()\nprint(f\"Synthetic DiD (no factor adjustment):\")\nprint(f\" ATT: {sdid_results.att:.4f}\")\nprint(f\" SE: {sdid_results.se:.4f}\")\nprint(f\" Bias: {sdid_results.att - true_att:.4f}\")\nprint()\nprint(f\"TROP (with factor adjustment):\")\nprint(f\" ATT: {trop_results.att:.4f}\")\nprint(f\" SE: {trop_results.se:.4f}\")\nprint(f\" Bias: {trop_results.att - true_att:.4f}\")\nprint(f\" Effective rank: {trop_results.effective_rank:.2f}\")"
408+ "source" : [
409+ " # SDID (no factor adjustment)\n " ,
410+ " # Note: SDID uses 'treat' (unit-level ever-treated indicator)\n " ,
411+ " sdid = SyntheticDiD(\n " ,
412+ " n_bootstrap=50, # Reduced for faster execution\n " ,
413+ " seed=42\n " ,
414+ " )\n " ,
415+ " \n " ,
416+ " # SDID still uses post_periods parameter\n " ,
417+ " sdid_results = sdid.fit(\n " ,
418+ " df,\n " ,
419+ " outcome='outcome',\n " ,
420+ " treatment='treat', # Unit-level ever-treated indicator\n " ,
421+ " unit='unit',\n " ,
422+ " time='period',\n " ,
423+ " post_periods=post_periods\n " ,
424+ " )\n " ,
425+ " \n " ,
426+ " # TROP (with factor adjustment)\n " ,
427+ " # Note: TROP uses 'treated' (observation-level treatment indicator)\n " ,
428+ " # and infers treatment periods automatically\n " ,
429+ " trop_est2 = TROP(\n " ,
430+ " lambda_nn_grid=[0.0, 0.1], # Reduced grid for faster execution\n " ,
431+ " n_bootstrap=50, # Reduced for faster execution\n " ,
432+ " seed=42\n " ,
433+ " )\n " ,
434+ " \n " ,
435+ " trop_results = trop_est2.fit(\n " ,
436+ " df,\n " ,
437+ " outcome='outcome',\n " ,
438+ " treatment='treated', # Observation-level indicator\n " ,
439+ " unit='unit',\n " ,
440+ " time='period'\n " ,
441+ " )\n " ,
442+ " \n " ,
443+ " print(\" Comparison: SDID vs TROP\" )\n " ,
444+ " print(\" =\" *60)\n " ,
445+ " print(f\" True ATT: {true_att:.4f}\" )\n " ,
446+ " print()\n " ,
447+ " print(f\" Synthetic DiD (no factor adjustment):\" )\n " ,
448+ " print(f\" ATT: {sdid_results.att:.4f}\" )\n " ,
449+ " print(f\" SE: {sdid_results.se:.4f}\" )\n " ,
450+ " print(f\" Bias: {sdid_results.att - true_att:.4f}\" )\n " ,
451+ " print()\n " ,
452+ " print(f\" TROP (with factor adjustment):\" )\n " ,
453+ " print(f\" ATT: {trop_results.att:.4f}\" )\n " ,
454+ " print(f\" SE: {trop_results.se:.4f}\" )\n " ,
455+ " print(f\" Bias: {trop_results.att - true_att:.4f}\" )\n " ,
456+ " print(f\" Effective rank: {trop_results.effective_rank:.2f}\" )"
457+ ]
357458 },
358459 {
359460 "cell_type" : " markdown" ,
369470 "execution_count" : null ,
370471 "metadata" : {},
371472 "outputs" : [],
372- "source": "# Monte Carlo comparison (reduced for faster tutorial execution)\nn_sims = 5 # Reduced from 20 for faster validation\ntrop_estimates = []\nsdid_estimates = []\n\nprint(f\"Running {n_sims} simulations...\")\n\nfor sim in range(n_sims):\n # Generate new data using the library function\n # (includes both 'treated' and 'treat' columns)\n sim_data = generate_factor_data(\n n_units=50,\n n_pre=10,\n n_post=5,\n n_treated=10,\n n_factors=2,\n treatment_effect=2.0,\n factor_strength=1.5,\n noise_sd=0.5,\n seed=100 + sim\n )\n \n # TROP (uses observation-level 'treated')\n # Note: TROP infers treatment periods from the treatment indicator\n try:\n trop_m = TROP(\n lambda_time_grid=[1.0],\n lambda_unit_grid=[1.0],\n lambda_nn_grid=[0.1],\n n_bootstrap=10, \n seed=42 + sim\n )\n trop_res = trop_m.fit(\n sim_data,\n outcome='outcome',\n treatment='treated',\n unit='unit',\n time='period'\n )\n trop_estimates.append(trop_res.att)\n except Exception as e:\n print(f\"TROP failed on sim {sim}: {e}\")\n \n # SDID (uses unit-level 'treat')\n # Note: SDID still uses post_periods parameter\n try:\n sdid_m = SyntheticDiD(n_bootstrap=10, seed=42 + sim)\n sdid_res = sdid_m.fit(\n sim_data,\n outcome='outcome',\n treatment='treat', # Unit-level ever-treated indicator\n unit='unit',\n time='period',\n post_periods=list(range(10, 15))\n )\n sdid_estimates.append(sdid_res.att)\n except Exception as e:\n print(f\"SDID failed on sim {sim}: {e}\")\n\nprint(f\"\\nMonte Carlo Results (True ATT = {true_att})\")\nprint(\"=\"*60)\nprint(f\"{'Estimator':<15} {'Mean':>12} {'Bias':>12} {'RMSE':>12}\")\nprint(\"-\"*60)\n\nif trop_estimates:\n trop_mean = np.mean(trop_estimates)\n trop_bias = trop_mean - true_att\n trop_rmse = np.sqrt(np.mean([(e - true_att)**2 for e in trop_estimates]))\n print(f\"{'TROP':<15} {trop_mean:>12.4f} {trop_bias:>12.4f} {trop_rmse:>12.4f}\")\n\nif sdid_estimates:\n sdid_mean = np.mean(sdid_estimates)\n sdid_bias = sdid_mean - true_att\n sdid_rmse = np.sqrt(np.mean([(e - true_att)**2 for e in sdid_estimates]))\n print(f\"{'SDID':<15} {sdid_mean:>12.4f} {sdid_bias:>12.4f} {sdid_rmse:>12.4f}\")"
473+ "source" : [
474+ " # Monte Carlo comparison (reduced for faster tutorial execution)\n " ,
475+ " n_sims = 5 # Reduced from 20 for faster validation\n " ,
476+ " trop_estimates = []\n " ,
477+ " sdid_estimates = []\n " ,
478+ " \n " ,
479+ " print(f\" Running {n_sims} simulations...\" )\n " ,
480+ " \n " ,
481+ " for sim in range(n_sims):\n " ,
482+ " # Generate new data using the library function\n " ,
483+ " # (includes both 'treated' and 'treat' columns)\n " ,
484+ " sim_data = generate_factor_data(\n " ,
485+ " n_units=50,\n " ,
486+ " n_pre=10,\n " ,
487+ " n_post=5,\n " ,
488+ " n_treated=10,\n " ,
489+ " n_factors=2,\n " ,
490+ " treatment_effect=2.0,\n " ,
491+ " factor_strength=1.5,\n " ,
492+ " noise_sd=0.5,\n " ,
493+ " seed=100 + sim\n " ,
494+ " )\n " ,
495+ " \n " ,
496+ " # TROP (uses observation-level 'treated')\n " ,
497+ " # Note: TROP infers treatment periods from the treatment indicator\n " ,
498+ " try:\n " ,
499+ " trop_m = TROP(\n " ,
500+ " lambda_time_grid=[1.0],\n " ,
501+ " lambda_unit_grid=[1.0],\n " ,
502+ " lambda_nn_grid=[0.1],\n " ,
503+ " n_bootstrap=10, \n " ,
504+ " seed=42 + sim\n " ,
505+ " )\n " ,
506+ " trop_res = trop_m.fit(\n " ,
507+ " sim_data,\n " ,
508+ " outcome='outcome',\n " ,
509+ " treatment='treated',\n " ,
510+ " unit='unit',\n " ,
511+ " time='period'\n " ,
512+ " )\n " ,
513+ " trop_estimates.append(trop_res.att)\n " ,
514+ " except Exception as e:\n " ,
515+ " print(f\" TROP failed on sim {sim}: {e}\" )\n " ,
516+ " \n " ,
517+ " # SDID (uses unit-level 'treat')\n " ,
518+ " # Note: SDID still uses post_periods parameter\n " ,
519+ " try:\n " ,
520+ " sdid_m = SyntheticDiD(n_bootstrap=10, seed=42 + sim)\n " ,
521+ " sdid_res = sdid_m.fit(\n " ,
522+ " sim_data,\n " ,
523+ " outcome='outcome',\n " ,
524+ " treatment='treat', # Unit-level ever-treated indicator\n " ,
525+ " unit='unit',\n " ,
526+ " time='period',\n " ,
527+ " post_periods=list(range(10, 15))\n " ,
528+ " )\n " ,
529+ " sdid_estimates.append(sdid_res.att)\n " ,
530+ " except Exception as e:\n " ,
531+ " print(f\" SDID failed on sim {sim}: {e}\" )\n " ,
532+ " \n " ,
533+ " print(f\"\\ nMonte Carlo Results (True ATT = {true_att})\" )\n " ,
534+ " print(\" =\" *60)\n " ,
535+ " print(f\" {'Estimator':<15} {'Mean':>12} {'Bias':>12} {'RMSE':>12}\" )\n " ,
536+ " print(\" -\" *60)\n " ,
537+ " \n " ,
538+ " if trop_estimates:\n " ,
539+ " trop_mean = np.mean(trop_estimates)\n " ,
540+ " trop_bias = trop_mean - true_att\n " ,
541+ " trop_rmse = np.sqrt(np.mean([(e - true_att)**2 for e in trop_estimates]))\n " ,
542+ " print(f\" {'TROP':<15} {trop_mean:>12.4f} {trop_bias:>12.4f} {trop_rmse:>12.4f}\" )\n " ,
543+ " \n " ,
544+ " if sdid_estimates:\n " ,
545+ " sdid_mean = np.mean(sdid_estimates)\n " ,
546+ " sdid_bias = sdid_mean - true_att\n " ,
547+ " sdid_rmse = np.sqrt(np.mean([(e - true_att)**2 for e in sdid_estimates]))\n " ,
548+ " print(f\" {'SDID':<15} {sdid_mean:>12.4f} {sdid_bias:>12.4f} {sdid_rmse:>12.4f}\" )"
549+ ]
373550 },
374551 {
375552 "cell_type" : " code" ,
407584 "execution_count" : null ,
408585 "metadata" : {},
409586 "outputs" : [],
410- "source" : " # One-liner estimation with default tuning grid\n # Note: TROP infers treatment periods from the treatment indicator\n quick_results = trop(\n df,\n outcome='outcome',\n treatment='treated',\n unit='unit',\n time='period',\n n_bootstrap=20, # Reduced for faster execution\n seed=42\n )\n\n print(f\" Quick estimation:\" )\n print(f\" ATT: {quick_results.att:.4f}\" )\n print(f\" SE: {quick_results.se:.4f}\" )\n print(f\" λ_time: {quick_results.lambda_time:.2f}\" )\n print(f\" λ_unit: {quick_results.lambda_unit:.2f}\" )\n print(f\" λ_nn: {quick_results.lambda_nn:.2f}\" )\n print(f\" Effective rank: {quick_results.effective_rank:.2f}\" )"
587+ "source" : [
588+ " # One-liner estimation with default tuning grid\n " ,
589+ " # Note: TROP infers treatment periods from the treatment indicator\n " ,
590+ " quick_results = trop(\n " ,
591+ " df,\n " ,
592+ " outcome='outcome',\n " ,
593+ " treatment='treated',\n " ,
594+ " unit='unit',\n " ,
595+ " time='period',\n " ,
596+ " n_bootstrap=20, # Reduced for faster execution\n " ,
597+ " seed=42\n " ,
598+ " )\n " ,
599+ " \n " ,
600+ " print(f\" Quick estimation:\" )\n " ,
601+ " print(f\" ATT: {quick_results.att:.4f}\" )\n " ,
602+ " print(f\" SE: {quick_results.se:.4f}\" )\n " ,
603+ " print(f\" λ_time: {quick_results.lambda_time:.2f}\" )\n " ,
604+ " print(f\" λ_unit: {quick_results.lambda_unit:.2f}\" )\n " ,
605+ " print(f\" λ_nn: {quick_results.lambda_nn:.2f}\" )\n " ,
606+ " print(f\" Effective rank: {quick_results.effective_rank:.2f}\" )"
607+ ]
411608 },
412609 {
413610 "cell_type" : " markdown" ,
425622 "execution_count" : null ,
426623 "metadata" : {},
427624 "outputs" : [],
428- "source" : " # Compare variance estimation methods\n print(\" Variance estimation comparison:\" )\n print(\" =\" *50)\n\n for method in ['bootstrap', 'jackknife']:\n trop_var = TROP(\n lambda_time_grid=[1.0],\n lambda_unit_grid=[1.0], \n lambda_nn_grid=[0.1],\n variance_method=method,\n n_bootstrap=30, # Reduced for faster execution\n seed=42\n )\n \n res = trop_var.fit(\n df,\n outcome='outcome',\n treatment='treated',\n unit='unit',\n time='period'\n )\n \n print(f\"\\ n{method.capitalize()}:\" )\n print(f\" ATT: {res.att:.4f}\" )\n print(f\" SE: {res.se:.4f}\" )\n print(f\" 95% CI: [{res.conf_int[0]:.4f}, {res.conf_int[1]:.4f}]\" )"
625+ "source" : [
626+ " # Compare variance estimation methods\n " ,
627+ " print(\" Variance estimation comparison:\" )\n " ,
628+ " print(\" =\" *50)\n " ,
629+ " \n " ,
630+ " for method in ['bootstrap', 'jackknife']:\n " ,
631+ " trop_var = TROP(\n " ,
632+ " lambda_time_grid=[1.0],\n " ,
633+ " lambda_unit_grid=[1.0], \n " ,
634+ " lambda_nn_grid=[0.1],\n " ,
635+ " variance_method=method,\n " ,
636+ " n_bootstrap=30, # Reduced for faster execution\n " ,
637+ " seed=42\n " ,
638+ " )\n " ,
639+ " \n " ,
640+ " res = trop_var.fit(\n " ,
641+ " df,\n " ,
642+ " outcome='outcome',\n " ,
643+ " treatment='treated',\n " ,
644+ " unit='unit',\n " ,
645+ " time='period'\n " ,
646+ " )\n " ,
647+ " \n " ,
648+ " print(f\"\\ n{method.capitalize()}:\" )\n " ,
649+ " print(f\" ATT: {res.att:.4f}\" )\n " ,
650+ " print(f\" SE: {res.se:.4f}\" )\n " ,
651+ " print(f\" 95% CI: [{res.conf_int[0]:.4f}, {res.conf_int[1]:.4f}]\" )"
652+ ]
429653 },
430654 {
431655 "cell_type" : " markdown" ,
508732 },
509733 "nbformat" : 4 ,
510734 "nbformat_minor" : 4
511- }
735+ }
0 commit comments