Skip to content

Commit b9abbb8

Browse files
committed
Add more model examples to notebook + add time-varying predictors
The `generate_event_study_data` function now supports optional time-varying predictors generated as AR(1) processes, controlled by new parameters: `predictor_effects`, `ar_phi`, and `ar_scale`. Added the `generate_ar1_series` utility function. Updated docstrings and examples to reflect these changes. The event study PyMC notebook was updated with additional analysis and improved section headings.
1 parent e4f82a3 commit b9abbb8

File tree

2 files changed

+753
-18
lines changed

2 files changed

+753
-18
lines changed

causalpy/data/simulate_data.py

Lines changed: 106 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -450,14 +450,18 @@ def generate_event_study_data(
450450
unit_fe_sigma: float = 1.0,
451451
time_fe_sigma: float = 0.5,
452452
noise_sigma: float = 0.2,
453+
predictor_effects: dict[str, float] | None = None,
454+
ar_phi: float = 0.9,
455+
ar_scale: float = 1.0,
453456
seed: int | None = None,
454457
) -> pd.DataFrame:
455458
"""
456459
Generate synthetic panel data for event study / dynamic DiD analysis.
457460
458461
Creates panel data with unit and time fixed effects, where a fraction of units
459462
receive treatment at a common treatment time. Treatment effects can vary by
460-
event time (time relative to treatment).
463+
event time (time relative to treatment). Optionally includes time-varying
464+
predictor variables generated via AR(1) processes.
461465
462466
Parameters
463467
----------
@@ -482,6 +486,19 @@ def generate_event_study_data(
482486
Standard deviation for time fixed effects. Default 0.5.
483487
noise_sigma : float
484488
Standard deviation for observation noise. Default 0.2.
489+
predictor_effects : dict[str, float], optional
490+
Dictionary mapping predictor names to their true coefficients.
491+
Each predictor is generated as an AR(1) time series that varies over time
492+
but is the same for all units at a given time. For example,
493+
``{'temperature': 0.3, 'humidity': -0.1}`` creates two predictors.
494+
Default None (no predictors).
495+
ar_phi : float
496+
AR(1) autoregressive coefficient controlling persistence of predictors.
497+
Values closer to 1 produce smoother, more persistent series.
498+
Default 0.9.
499+
ar_scale : float
500+
Standard deviation of the AR(1) innovation noise for predictors.
501+
Default 1.0.
485502
seed : int, optional
486503
Random seed for reproducibility.
487504
@@ -494,6 +511,7 @@ def generate_event_study_data(
494511
- y: Outcome variable
495512
- treat_time: Treatment time for unit (NaN if never treated)
496513
- treated: Whether unit is in treated group (0 or 1)
514+
- <predictor_name>: One column per predictor (if predictor_effects provided)
497515
498516
Example
499517
--------
@@ -505,6 +523,20 @@ def generate_event_study_data(
505523
(400, 5)
506524
>>> df.columns.tolist()
507525
['unit', 'time', 'y', 'treat_time', 'treated']
526+
527+
With predictors:
528+
529+
>>> df = generate_event_study_data(
530+
... n_units=10,
531+
... n_time=10,
532+
... treatment_time=5,
533+
... seed=42,
534+
... predictor_effects={"temperature": 0.3, "humidity": -0.1},
535+
... )
536+
>>> df.shape
537+
(100, 7)
538+
>>> "temperature" in df.columns and "humidity" in df.columns
539+
True
508540
"""
509541
if seed is not None:
510542
np.random.seed(seed)
@@ -529,6 +561,16 @@ def generate_event_study_data(
529561
# Generate time fixed effects
530562
time_fe = np.random.normal(0, time_fe_sigma, n_time)
531563

564+
# Generate predictor time series (if any)
565+
# Each predictor is an AR(1) series that varies over time but is the same
566+
# for all units at a given time
567+
predictors: dict[str, np.ndarray] = {}
568+
if predictor_effects is not None:
569+
for predictor_name in predictor_effects:
570+
predictors[predictor_name] = generate_ar1_series(
571+
n=n_time, phi=ar_phi, scale=ar_scale
572+
)
573+
532574
# Build panel data
533575
data = []
534576
for unit in range(n_units):
@@ -539,6 +581,11 @@ def generate_event_study_data(
539581
# Base outcome: unit FE + time FE + noise
540582
y = unit_fe[unit] + time_fe[t] + np.random.normal(0, noise_sigma)
541583

584+
# Add predictor contributions to outcome
585+
if predictor_effects is not None:
586+
for predictor_name, coef in predictor_effects.items():
587+
y += coef * predictors[predictor_name][t]
588+
542589
# Add treatment effect for treated units in event window
543590
if is_treated:
544591
event_time = t - treatment_time
@@ -548,15 +595,18 @@ def generate_event_study_data(
548595
):
549596
y += treatment_effects[event_time]
550597

551-
data.append(
552-
{
553-
"unit": unit,
554-
"time": t,
555-
"y": y,
556-
"treat_time": unit_treat_time,
557-
"treated": 1 if is_treated else 0,
558-
}
559-
)
598+
row = {
599+
"unit": unit,
600+
"time": t,
601+
"y": y,
602+
"treat_time": unit_treat_time,
603+
"treated": 1 if is_treated else 0,
604+
}
605+
# Add predictor values to the row
606+
for predictor_name, series in predictors.items():
607+
row[predictor_name] = series[t]
608+
609+
data.append(row)
560610

561611
return pd.DataFrame(data)
562612

@@ -566,6 +616,52 @@ def generate_event_study_data(
566616
# -----------------
567617

568618

619+
def generate_ar1_series(
620+
n: int,
621+
phi: float = 0.9,
622+
scale: float = 1.0,
623+
initial: float = 0.0,
624+
) -> np.ndarray:
625+
"""
626+
Generate an AR(1) autoregressive time series.
627+
628+
The AR(1) process is defined as:
629+
x_{t+1} = phi * x_t + eta_t, where eta_t ~ N(0, scale^2)
630+
631+
Parameters
632+
----------
633+
n : int
634+
Length of the time series to generate.
635+
phi : float
636+
Autoregressive coefficient controlling persistence. Values closer to 1
637+
produce smoother, more persistent series. Must be in (-1, 1) for
638+
stationarity. Default 0.9.
639+
scale : float
640+
Standard deviation of the innovation noise. Default 1.0.
641+
initial : float
642+
Initial value of the series. Default 0.0.
643+
644+
Returns
645+
-------
646+
np.ndarray
647+
Array of length n containing the AR(1) time series.
648+
649+
Example
650+
-------
651+
>>> from causalpy.data.simulate_data import generate_ar1_series
652+
>>> np.random.seed(42)
653+
>>> series = generate_ar1_series(n=10, phi=0.9, scale=0.5)
654+
>>> len(series)
655+
10
656+
"""
657+
series = np.zeros(n)
658+
series[0] = initial
659+
innovations = np.random.normal(0, scale, n - 1)
660+
for t in range(1, n):
661+
series[t] = phi * series[t - 1] + innovations[t - 1]
662+
return series
663+
664+
569665
def generate_seasonality(
570666
n: int = 12, amplitude: int = 1, length_scale: float = 0.5
571667
) -> np.ndarray:

docs/source/notebooks/event_study_pymc.ipynb

Lines changed: 647 additions & 8 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)