more unification with score (r2) in terms of unified naming: unit_{n}_r2

drbenvincent · drbenvincent · commit 3ee430e53f81 · 2025-06-30T21:33:34.000+01:00
diff --git a/causalpy/experiments/interrupted_time_series.py b/causalpy/experiments/interrupted_time_series.py
@@ -239,8 +239,8 @@ def _bayesian_plot(
 
         ax[0].set(
             title=f"""
-            Pre-intervention Bayesian $R^2$: {round_num(self.score["unit_r2"], round_to)}
-            (std = {round_num(self.score["unit_r2_std"], round_to)})
+            Pre-intervention Bayesian $R^2$: {round_num(self.score["unit_0_r2"], round_to)}
+            (std = {round_num(self.score["unit_0_r2_std"], round_to)})
             """
         )
 
diff --git a/causalpy/experiments/regression_discontinuity.py b/causalpy/experiments/regression_discontinuity.py
@@ -256,7 +256,7 @@ def _bayesian_plot(self, round_to=None, **kwargs) -> tuple[plt.Figure, plt.Axes]
         labels = ["Posterior mean"]
 
         # create strings to compose title
-        title_info = f"{round_num(self.score['unit_r2'], round_to)} (std = {round_num(self.score['unit_r2_std'], round_to)})"
+        title_info = f"{round_num(self.score['unit_0_r2'], round_to)} (std = {round_num(self.score['unit_0_r2_std'], round_to)})"
         r2 = f"Bayesian $R^2$ on all data = {title_info}"
         percentiles = self.discontinuity_at_threshold.quantile([0.03, 1 - 0.03]).values
         ci = (
diff --git a/causalpy/experiments/regression_kink.py b/causalpy/experiments/regression_kink.py
@@ -227,7 +227,7 @@ def _bayesian_plot(self, round_to=None, **kwargs) -> tuple[plt.Figure, plt.Axes]
         labels = ["Posterior mean"]
 
         # create strings to compose title
-        title_info = f"{round_num(self.score['unit_r2'], round_to)} (std = {round_num(self.score['unit_r2_std'], round_to)})"
+        title_info = f"{round_num(self.score['unit_0_r2'], round_to)} (std = {round_num(self.score['unit_0_r2_std'], round_to)})"
         r2 = f"Bayesian $R^2$ on all data = {title_info}"
         percentiles = self.gradient_change.quantile([0.03, 1 - 0.03]).values
         ci = (
diff --git a/causalpy/experiments/synthetic_control.py b/causalpy/experiments/synthetic_control.py
@@ -564,9 +564,10 @@ def get_plot_data_bayesian(
     def _get_score_title(self, treated_unit: str, round_to=None):
         """Generate appropriate score title for the specified treated unit"""
         if isinstance(self.model, PyMCModel):
-            # Bayesian model - get unit-specific R² scores
-            r2_val = round_num(self.score[f"{treated_unit}_r2"], round_to)
-            r2_std_val = round_num(self.score[f"{treated_unit}_r2_std"], round_to)
+            # Bayesian model - get unit-specific R² scores using unified format
+            unit_index = self.treated_units.index(treated_unit)
+            r2_val = round_num(self.score[f"unit_{unit_index}_r2"], round_to)
+            r2_std_val = round_num(self.score[f"unit_{unit_index}_r2_std"], round_to)
             return f"Pre-intervention Bayesian $R^2$: {r2_val} (std = {r2_std_val})"
         else:
             # OLS model - simple float score
diff --git a/causalpy/pymc_models.py b/causalpy/pymc_models.py
@@ -68,8 +68,8 @@ class PyMCModel(pm.Model):
     >>> model.fit(X, y)
     Inference data...
     >>> model.score(X, y)  # doctest: +ELLIPSIS
-    unit_r2        ...
-    unit_r2_std    ...
+    unit_0_r2        ...
+    unit_0_r2_std    ...
     dtype: float64
     >>> X_new = rng.normal(loc=0, scale=1, size=(20, 2))
     >>> model.predict(X_new)
@@ -203,30 +203,25 @@ def score(self, X: xr.DataArray, y: xr.DataArray) -> pd.Series:
         mu = self.predict(X)
         mu_data = az.extract(mu, group="posterior_predictive", var_names="mu")
 
-        # Always use the multiple treated unit convention for consistency
+        # Always use unified labeling system: unit_0_r2, unit_1_r2, etc.
         scores = {}
 
         if "treated_units" in mu_data.dims:
             # Multiple treated units - score each unit separately
             treated_units = mu_data.coords["treated_units"].values
-            for unit in treated_units:
+            for i, unit in enumerate(treated_units):
                 unit_mu = mu_data.sel(treated_units=unit).T  # (sample, obs_ind)
                 unit_y = y.sel(treated_units=unit).data
                 unit_score = r2_score(unit_y, unit_mu.data)
-                scores[f"{unit}_r2"] = unit_score["r2"]
-                scores[f"{unit}_r2_std"] = unit_score["r2_std"]
+                scores[f"unit_{i}_r2"] = unit_score["r2"]
+                scores[f"unit_{i}_r2_std"] = unit_score["r2_std"]
         else:
-            # Single treated unit - determine unit name and use same format
-            if hasattr(y, "coords") and "treated_units" in y.coords:
-                unit_name = y.coords["treated_units"].values[0]
-            else:
-                unit_name = "unit"  # Fallback for backwards compatibility
-
+            # Single treated unit - use unit_0 for consistency
             mu_data = mu_data.T
             y_data = y.data.squeeze() if y.data.ndim > 1 else y.data
             unit_score = r2_score(y_data, mu_data.data)
-            scores[f"{unit_name}_r2"] = unit_score["r2"]
-            scores[f"{unit_name}_r2_std"] = unit_score["r2_std"]
+            scores["unit_0_r2"] = unit_score["r2"]
+            scores["unit_0_r2_std"] = unit_score["r2_std"]
 
         return pd.Series(scores)
 
diff --git a/causalpy/tests/test_integration_pymc_examples.py b/causalpy/tests/test_integration_pymc_examples.py
@@ -863,10 +863,10 @@ def test_multi_unit_scoring(self, multi_unit_sc_data):
         # Score should be a pandas Series with separate entries for each unit
         assert isinstance(sc.score, pd.Series)
 
-        # Check that we have r2 and r2_std for each treated unit
-        for unit in treated_units:
-            assert f"{unit}_r2" in sc.score.index
-            assert f"{unit}_r2_std" in sc.score.index
+        # Check that we have r2 and r2_std for each treated unit using unified format
+        for i, unit in enumerate(treated_units):
+            assert f"unit_{i}_r2" in sc.score.index
+            assert f"unit_{i}_r2_std" in sc.score.index
 
     @pytest.mark.integration
     def test_multi_unit_summary(self, multi_unit_sc_data, capsys):
diff --git a/causalpy/tests/test_pymc_models.py b/causalpy/tests/test_pymc_models.py
@@ -120,9 +120,9 @@ def test_fit_predict(self, coords, rng) -> None:
         ).shape == (20, 2 * 2)
         assert isinstance(score, pd.Series)
         assert score.shape == (2,)
-        # Test that the score follows the new standardized format
-        assert "unit_r2" in score.index
-        assert "unit_r2_std" in score.index
+        # Test that the score follows the new unified format
+        assert "unit_0_r2" in score.index
+        assert "unit_0_r2_std" in score.index
         assert isinstance(predictions, az.InferenceData)
 
 
@@ -423,15 +423,15 @@ def test_scoring_multi_unit(self, synthetic_control_data):
         # Score should be a pandas Series with separate r2 and r2_std for each treated unit
         assert isinstance(score, pd.Series)
 
-        # Check that we have r2 and r2_std for each treated unit
-        for unit in treated_units:
-            assert f"{unit}_r2" in score.index
-            assert f"{unit}_r2_std" in score.index
+        # Check that we have r2 and r2_std for each treated unit using unified format
+        for i, unit in enumerate(treated_units):
+            assert f"unit_{i}_r2" in score.index
+            assert f"unit_{i}_r2_std" in score.index
 
             # R2 should be reasonable (between 0 and 1 typically, though can be negative)
-            assert score[f"{unit}_r2"] >= -1  # R2 can be negative for very bad fits
+            assert score[f"unit_{i}_r2"] >= -1  # R2 can be negative for very bad fits
             assert (
-                score[f"{unit}_r2_std"] >= 0
+                score[f"unit_{i}_r2_std"] >= 0
             )  # Standard deviation should be non-negative
 
     def test_scoring_single_unit(self, single_treated_data):
@@ -444,16 +444,14 @@ def test_scoring_single_unit(self, single_treated_data):
         # Test scoring
         score = wsf.score(X, y)
 
-        # Now consistently uses treated unit name prefix even for single unit
+        # Now consistently uses unified unit indexing even for single unit
         assert isinstance(score, pd.Series)
-        assert "treated_0_r2" in score.index
-        assert "treated_0_r2_std" in score.index
+        assert "unit_0_r2" in score.index
+        assert "unit_0_r2_std" in score.index
 
         # R2 should be reasonable
-        assert score["treated_0_r2"] >= -1  # R2 can be negative for very bad fits
-        assert (
-            score["treated_0_r2_std"] >= 0
-        )  # Standard deviation should be non-negative
+        assert score["unit_0_r2"] >= -1  # R2 can be negative for very bad fits
+        assert score["unit_0_r2_std"] >= 0  # Standard deviation should be non-negative
 
     def test_r2_scores_differ_across_units(self, rng):
         """Test that R² scores are different for different treated units.
@@ -523,8 +521,8 @@ def test_r2_scores_differ_across_units(self, rng):
         wsf.fit(X, y, coords=coords)
         scores = wsf.score(X, y)
 
-        # Extract R² values for each treated unit
-        r2_values = [scores[f"{unit}_r2"] for unit in treated_units]
+        # Extract R² values for each treated unit using unified format
+        r2_values = [scores[f"unit_{i}_r2"] for i in range(len(treated_units))]
 
         # Test that not all R² values are the same
         # Use a tolerance to avoid issues with floating point precision

Original file line number	Diff line number	Diff line change
`@@ -239,8 +239,8 @@ def _bayesian_plot(`
`239`	`239`
`240`	`240`	`ax[0].set(`
`241`	`241`	`title=f"""`
`242`		`- Pre-intervention Bayesian $R^2$: {round_num(self.score["unit_r2"], round_to)}`
`243`		`- (std = {round_num(self.score["unit_r2_std"], round_to)})`
	`242`	`+ Pre-intervention Bayesian $R^2$: {round_num(self.score["unit_0_r2"], round_to)}`
	`243`	`+ (std = {round_num(self.score["unit_0_r2_std"], round_to)})`
`244`	`244`	`"""`
`245`	`245`	`)`
`246`	`246`