From 34c18aa6e2f92781463be739a69df7ca14e38517 Mon Sep 17 00:00:00 2001 From: Amit Sharma Date: Sat, 4 Nov 2023 20:11:53 +0530 Subject: [PATCH 1/2] fixed frontdoor bug Signed-off-by: Amit Sharma --- .../two_stage_regression_estimator.py | 1 - .../test_two_stage_regression_estimator.py | 79 +++++++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) diff --git a/dowhy/causal_estimators/two_stage_regression_estimator.py b/dowhy/causal_estimators/two_stage_regression_estimator.py index 3b91a8a90d..4c2010ae89 100644 --- a/dowhy/causal_estimators/two_stage_regression_estimator.py +++ b/dowhy/causal_estimators/two_stage_regression_estimator.py @@ -125,7 +125,6 @@ def __init__( ) ) else: - modified_target_estimand = copy.deepcopy(self._target_estimand) self._second_stage_model = self.__class__.DEFAULT_SECOND_STAGE_MODEL( modified_target_estimand, test_significance=self._significance_test, diff --git a/tests/causal_estimators/test_two_stage_regression_estimator.py b/tests/causal_estimators/test_two_stage_regression_estimator.py index e932a8f79c..9230d2bd63 100644 --- a/tests/causal_estimators/test_two_stage_regression_estimator.py +++ b/tests/causal_estimators/test_two_stage_regression_estimator.py @@ -2,6 +2,10 @@ from pytest import mark from dowhy.causal_estimators.two_stage_regression_estimator import TwoStageRegressionEstimator +from dowhy import CausalModel +import numpy as np +import pandas as pd + from .base import TestEstimator @@ -71,3 +75,78 @@ def test_average_treatment_effect( ], method_params={"num_simulations": 10, "num_null_simulations": 10}, ) + + def test_frontdoor_estimator(self): + """ + Test for frontdoor estimation, from @AlxndrMlk + See issue #616 https://github.com/py-why/dowhy/issues/616 + """ + + # Create the graph describing the causal structure + graph = """ + graph [ + directed 1 + + node [ + id "X" + label "X" + ] + node [ + id "Z" + label "Z" + ] + node [ + id "Y" + label "Y" + ] + node [ + id "U" + label "U" + ] + + edge [ + source "X" + target "Z" + ] + + edge [ + source "Z" + target "Y" + ] + + edge [ + source "U" + target "Y" + ] + + edge [ + source "U" + target "X" + ] + ] + """.replace('\n', '') + + N_SAMPLES = 10000 + # Generate the data + U = np.random.randn(N_SAMPLES) + X = np.random.randn(N_SAMPLES) + 0.3*U + Z = 0.7*X + 0.3*np.random.randn(N_SAMPLES) + Y = 0.65*Z + 0.2*U + + # Data to df + df = pd.DataFrame(np.vstack([X, Z, Y]).T, columns=['X', 'Z', 'Y']) + + # Create a model + model = CausalModel( + data=df, + treatment='X', + outcome='Y', + graph=graph + ) + estimand = model.identify_effect(proceed_when_unidentifiable=True) + # Estimate the effect with front-door + estimate = model.estimate_effect( + identified_estimand=estimand, + method_name='frontdoor.two_stage_regression' + ) + assert estimate.value == pytest.approx(0.45, 0.025) From 9d7dba7696e970f65f8395e690ac2fa7c38d9cd4 Mon Sep 17 00:00:00 2001 From: Amit Sharma Date: Sat, 4 Nov 2023 20:18:46 +0530 Subject: [PATCH 2/2] fixed formatting issues Signed-off-by: Amit Sharma --- .../test_two_stage_regression_estimator.py | 31 +++++++------------ 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/tests/causal_estimators/test_two_stage_regression_estimator.py b/tests/causal_estimators/test_two_stage_regression_estimator.py index 9230d2bd63..5be515d0c5 100644 --- a/tests/causal_estimators/test_two_stage_regression_estimator.py +++ b/tests/causal_estimators/test_two_stage_regression_estimator.py @@ -1,11 +1,10 @@ +import numpy as np +import pandas as pd import pytest from pytest import mark -from dowhy.causal_estimators.two_stage_regression_estimator import TwoStageRegressionEstimator from dowhy import CausalModel -import numpy as np -import pandas as pd - +from dowhy.causal_estimators.two_stage_regression_estimator import TwoStageRegressionEstimator from .base import TestEstimator @@ -124,29 +123,23 @@ def test_frontdoor_estimator(self): target "X" ] ] - """.replace('\n', '') + """.replace( + "\n", "" + ) N_SAMPLES = 10000 # Generate the data U = np.random.randn(N_SAMPLES) - X = np.random.randn(N_SAMPLES) + 0.3*U - Z = 0.7*X + 0.3*np.random.randn(N_SAMPLES) - Y = 0.65*Z + 0.2*U + X = np.random.randn(N_SAMPLES) + 0.3 * U + Z = 0.7 * X + 0.3 * np.random.randn(N_SAMPLES) + Y = 0.65 * Z + 0.2 * U # Data to df - df = pd.DataFrame(np.vstack([X, Z, Y]).T, columns=['X', 'Z', 'Y']) + df = pd.DataFrame(np.vstack([X, Z, Y]).T, columns=["X", "Z", "Y"]) # Create a model - model = CausalModel( - data=df, - treatment='X', - outcome='Y', - graph=graph - ) + model = CausalModel(data=df, treatment="X", outcome="Y", graph=graph) estimand = model.identify_effect(proceed_when_unidentifiable=True) # Estimate the effect with front-door - estimate = model.estimate_effect( - identified_estimand=estimand, - method_name='frontdoor.two_stage_regression' - ) + estimate = model.estimate_effect(identified_estimand=estimand, method_name="frontdoor.two_stage_regression") assert estimate.value == pytest.approx(0.45, 0.025)