Merge pull request #286 from pymc-labs/random_seed

drbenvincent · web-flow · commit 6283c7625b72 · 2024-01-05T21:08:03.000Z
Make results fully reproducible
diff --git a/causalpy/pymc_experiments.py b/causalpy/pymc_experiments.py
@@ -143,8 +143,8 @@ class PrePostFit(ExperimentalDesign):
     Formula: actual ~ 0 + a + g
     Model coefficients:
     a                             0.6, 94% HDI [0.6, 0.6]
-    g                             0.3, 94% HDI [0.3, 0.3]
-    sigma                         0.7, 94% HDI [0.6, 0.9]
+    g                             0.4, 94% HDI [0.4, 0.4]
+    sigma                         0.8, 94% HDI [0.6, 0.9]
     """
 
     def __init__(
diff --git a/causalpy/pymc_models.py b/causalpy/pymc_models.py
@@ -96,12 +96,22 @@ def fit(self, X, y, coords: Optional[Dict[str, Any]] = None) -> None:
         """Draw samples fromposterior, prior predictive, and posterior predictive
         distributions, placing them in the model's idata attribute.
         """
+
+        # Ensure random_seed is used in sample_prior_predictive() and
+        # sample_posterior_predictive() if provided in sample_kwargs.
+        if "random_seed" in self.sample_kwargs:
+            random_seed = self.sample_kwargs["random_seed"]
+        else:
+            random_seed = None
+
         self.build_model(X, y, coords)
         with self.model:
             self.idata = pm.sample(**self.sample_kwargs)
-            self.idata.extend(pm.sample_prior_predictive())
+            self.idata.extend(pm.sample_prior_predictive(random_seed=random_seed))
             self.idata.extend(
-                pm.sample_posterior_predictive(self.idata, progressbar=False)
+                pm.sample_posterior_predictive(
+                    self.idata, progressbar=False, random_seed=random_seed
+                )
             )
         return self.idata
 
diff --git a/causalpy/tests/test_pymc_models.py b/causalpy/tests/test_pymc_models.py
@@ -123,3 +123,42 @@ def test_idata_property():
     )
     assert hasattr(result, "idata")
     assert isinstance(result.idata, az.InferenceData)
+
+
+seeds = [1234, 42, 123456789]
+
+
+@pytest.mark.parametrize("seed", seeds)
+def test_result_reproducibility(seed):
+    """Test that we can reproduce the results from the model. We could in theory test
+    this with all the model and experiment types, but what is being targetted is
+    the ModelBuilder.fit method, so we should be safe testing with just one model. Here
+    we use the DifferenceInDifferences experiment class."""
+    # Load the data
+    df = cp.load_data("did")
+    # Set a random seed
+    sample_kwargs["random_seed"] = seed
+    # Calculate the result twice
+    result1 = cp.pymc_experiments.DifferenceInDifferences(
+        df,
+        formula="y ~ 1 + group + t + group:post_treatment",
+        time_variable_name="t",
+        group_variable_name="group",
+        treated=1,
+        untreated=0,
+        model=cp.pymc_models.LinearRegression(sample_kwargs=sample_kwargs),
+    )
+    result2 = cp.pymc_experiments.DifferenceInDifferences(
+        df,
+        formula="y ~ 1 + group + t + group:post_treatment",
+        time_variable_name="t",
+        group_variable_name="group",
+        treated=1,
+        untreated=0,
+        model=cp.pymc_models.LinearRegression(sample_kwargs=sample_kwargs),
+    )
+    assert np.all(result1.idata.posterior.mu == result2.idata.posterior.mu)
+    assert np.all(result1.idata.prior.mu == result2.idata.prior.mu)
+    assert np.all(
+        result1.idata.prior_predictive.y_hat == result2.idata.prior_predictive.y_hat
+    )