From fbd5b1ebcb64990eda2ebd993c288fb0588dfb39 Mon Sep 17 00:00:00 2001
From: Amit Sharma <amit_sharma@live.com>
Date: Sun, 5 Nov 2023 13:32:14 +0530
Subject: [PATCH 1/2] auto identify the effect modifier columns

Signed-off-by: Amit Sharma <amit_sharma@live.com>
---
 dowhy/causal_estimators/econml.py             | 13 ++--
 .../test_econml_estimator.py                  | 62 +++++++++++++++++--
 2 files changed, 64 insertions(+), 11 deletions(-)

diff --git a/dowhy/causal_estimators/econml.py b/dowhy/causal_estimators/econml.py
index a98c178306..e7cca8f7f8 100755
--- a/dowhy/causal_estimators/econml.py
+++ b/dowhy/causal_estimators/econml.py
@@ -245,7 +245,6 @@ def estimate_effect(
         # Changing shape to a list for a singleton value
         # Note that self._control_value is assumed to be a singleton value
         self._treatment_value = parse_state(self._treatment_value)
-
         est = self.effect(X_test)
         ate = np.mean(est, axis=0)  # one value per treatment value
 
@@ -305,7 +304,6 @@ def apply_multitreatment(self, df: pd.DataFrame, fun: Callable, *args, **kwargs)
             filtered_df = None
         else:
             filtered_df = df.values
-
         for tv in self._treatment_value:
             ests.append(
                 fun(
@@ -330,8 +328,9 @@ def effect(self, df: pd.DataFrame, *args, **kwargs) -> np.ndarray:
 
         def effect_fun(filtered_df, T0, T1, *args, **kwargs):
             return self.estimator.effect(filtered_df, T0=T0, T1=T1, *args, **kwargs)
-
-        return self.apply_multitreatment(df, effect_fun, *args, **kwargs)
+        
+        Xdf = df[self._effect_modifier_names] if df is not None else df
+        return self.apply_multitreatment(Xdf, effect_fun, *args, **kwargs)
 
     def effect_interval(self, df: pd.DataFrame, *args, **kwargs) -> np.ndarray:
         """
@@ -346,7 +345,8 @@ def effect_interval_fun(filtered_df, T0, T1, *args, **kwargs):
                 filtered_df, T0=T0, T1=T1, alpha=1 - self.confidence_level, *args, **kwargs
             )
 
-        return self.apply_multitreatment(df, effect_interval_fun, *args, **kwargs)
+        Xdf = df[self._effect_modifier_names] if df is not None else df
+        return self.apply_multitreatment(Xdf, effect_interval_fun, *args, **kwargs)
 
     def effect_inference(self, df: pd.DataFrame, *args, **kwargs):
         """
@@ -359,7 +359,8 @@ def effect_inference(self, df: pd.DataFrame, *args, **kwargs):
         def effect_inference_fun(filtered_df, T0, T1, *args, **kwargs):
             return self.estimator.effect_inference(filtered_df, T0=T0, T1=T1, *args, **kwargs)
 
-        return self.apply_multitreatment(df, effect_inference_fun, *args, **kwargs)
+        Xdf = df[self._effect_modifier_names] if df is not None else df
+        return self.apply_multitreatment(Xdf, effect_inference_fun, *args, **kwargs)
 
     def effect_tt(self, df: pd.DataFrame, treatment_value, *args, **kwargs):
         """
diff --git a/tests/causal_estimators/test_econml_estimator.py b/tests/causal_estimators/test_econml_estimator.py
index 97b6c8adf0..009e649032 100644
--- a/tests/causal_estimators/test_econml_estimator.py
+++ b/tests/causal_estimators/test_econml_estimator.py
@@ -8,7 +8,6 @@
 from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import PolynomialFeatures
-
 import dowhy
 from dowhy import CausalModel, datasets
 
@@ -28,7 +27,7 @@ def test_backdoor_estimators(self):
         data = datasets.linear_dataset(
             10,
             num_common_causes=4,
-            num_samples=10000,
+            num_samples=1000,
             num_instruments=2,
             num_effect_modifiers=2,
             num_treatments=1,
@@ -59,6 +58,9 @@ def test_backdoor_estimators(self):
                 "fit_params": {},
             },
         )
+        # Checking that the CATE estimates are not identical 
+        dml_cate_estimates_f = dml_estimate.cate_estimates.flatten()
+        assert pytest.approx(dml_cate_estimates_f[0], 0.01) != dml_cate_estimates_f[1]
         # Test ContinuousTreatmentOrthoForest
         orthoforest_estimate = model.estimate_effect(
             identified_estimand,
@@ -66,11 +68,15 @@ def test_backdoor_estimators(self):
             target_units=lambda df: df["X0"] > 2,
             method_params={"init_params": {"n_trees": 10}, "fit_params": {}},
         )
+        # Checking that the CATE estimates are not identical 
+        orthoforest_cate_estimates_f = orthoforest_estimate.cate_estimates.flatten()
+        assert pytest.approx(orthoforest_cate_estimates_f[0], 0.01) != orthoforest_cate_estimates_f[1]
+
         # Test LinearDRLearner
         data_binary = datasets.linear_dataset(
             10,
             num_common_causes=4,
-            num_samples=10000,
+            num_samples=1000,
             num_instruments=2,
             num_effect_modifiers=2,
             treatment_is_binary=True,
@@ -94,6 +100,52 @@ def test_backdoor_estimators(self):
                 "fit_params": {},
             },
         )
+        drlearner_cate_estimates_f = drlearner_estimate.cate_estimates.flatten()
+        assert pytest.approx(drlearner_cate_estimates_f[0], 0.01) != drlearner_cate_estimates_f[1]
+
+    def test_metalearners(self):
+        data = datasets.linear_dataset(
+            10,
+            num_common_causes=4,
+            num_samples=1000,
+            num_instruments=2,
+            num_effect_modifiers=2,
+            num_treatments=1,
+            treatment_is_binary=True,
+        )
+        df = data["df"]
+        model = CausalModel(
+            data=data["df"],
+            treatment=data["treatment_name"],
+            outcome=data["outcome_name"],
+            effect_modifiers=data["effect_modifier_names"],
+            graph=data["gml_graph"],
+        )
+        identified_estimand = model.identify_effect(proceed_when_unidentifiable=True)
+        # Test LinearDML
+        sl_estimate = model.estimate_effect(
+            identified_estimand,
+            method_name='backdoor.econml.metalearners.SLearner',
+            target_units='ate',
+            method_params={
+                'init_params': {
+                'overall_model': GradientBoostingRegressor()
+            },
+            'fit_params': {}
+        })
+        # checking that CATE estimates are not identical
+        sl_cate_estimates_f = sl_estimate.cate_estimates.flatten()
+        assert pytest.approx(sl_cate_estimates_f[0], 0.01) != sl_cate_estimates_f[1]
+
+        # predict on new data
+        sl_estimate_test = model.estimate_effect(
+            identified_estimand,
+            method_name='backdoor.econml.metalearners.SLearner',
+            fit_estimator=False,
+            target_units=data["df"].sample(frac=0.1)
+            )
+        sl_cate_estimates_test_f = sl_estimate_test.cate_estimates.flatten()
+        assert pytest.approx(sl_cate_estimates_test_f[0], 0.01) != sl_cate_estimates_test_f[1]
 
     def test_iv_estimators(self):
         keras = pytest.importorskip("keras")
@@ -101,7 +153,7 @@ def test_iv_estimators(self):
         data = datasets.linear_dataset(
             10,
             num_common_causes=4,
-            num_samples=10000,
+            num_samples=1000,
             num_instruments=2,
             num_effect_modifiers=2,
             num_treatments=1,
@@ -164,7 +216,7 @@ def test_iv_estimators(self):
         data = datasets.linear_dataset(
             10,
             num_common_causes=4,
-            num_samples=10000,
+            num_samples=1000,
             num_instruments=1,
             num_effect_modifiers=2,
             num_treatments=1,

From 53a26be4640b790f99905447aebc1bd55b2666af Mon Sep 17 00:00:00 2001
From: Amit Sharma <amit_sharma@live.com>
Date: Sun, 5 Nov 2023 13:38:59 +0530
Subject: [PATCH 2/2] fixed formatting errors

Signed-off-by: Amit Sharma <amit_sharma@live.com>
---
 dowhy/causal_estimators/econml.py             |  2 +-
 .../test_econml_estimator.py                  | 23 ++++++++-----------
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/dowhy/causal_estimators/econml.py b/dowhy/causal_estimators/econml.py
index e7cca8f7f8..819d3211cb 100755
--- a/dowhy/causal_estimators/econml.py
+++ b/dowhy/causal_estimators/econml.py
@@ -328,7 +328,7 @@ def effect(self, df: pd.DataFrame, *args, **kwargs) -> np.ndarray:
 
         def effect_fun(filtered_df, T0, T1, *args, **kwargs):
             return self.estimator.effect(filtered_df, T0=T0, T1=T1, *args, **kwargs)
-        
+
         Xdf = df[self._effect_modifier_names] if df is not None else df
         return self.apply_multitreatment(Xdf, effect_fun, *args, **kwargs)
 
diff --git a/tests/causal_estimators/test_econml_estimator.py b/tests/causal_estimators/test_econml_estimator.py
index 009e649032..335b31555a 100644
--- a/tests/causal_estimators/test_econml_estimator.py
+++ b/tests/causal_estimators/test_econml_estimator.py
@@ -8,6 +8,7 @@
 from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import PolynomialFeatures
+
 import dowhy
 from dowhy import CausalModel, datasets
 
@@ -58,7 +59,7 @@ def test_backdoor_estimators(self):
                 "fit_params": {},
             },
         )
-        # Checking that the CATE estimates are not identical 
+        # Checking that the CATE estimates are not identical
         dml_cate_estimates_f = dml_estimate.cate_estimates.flatten()
         assert pytest.approx(dml_cate_estimates_f[0], 0.01) != dml_cate_estimates_f[1]
         # Test ContinuousTreatmentOrthoForest
@@ -68,7 +69,7 @@ def test_backdoor_estimators(self):
             target_units=lambda df: df["X0"] > 2,
             method_params={"init_params": {"n_trees": 10}, "fit_params": {}},
         )
-        # Checking that the CATE estimates are not identical 
+        # Checking that the CATE estimates are not identical
         orthoforest_cate_estimates_f = orthoforest_estimate.cate_estimates.flatten()
         assert pytest.approx(orthoforest_cate_estimates_f[0], 0.01) != orthoforest_cate_estimates_f[1]
 
@@ -125,14 +126,10 @@ def test_metalearners(self):
         # Test LinearDML
         sl_estimate = model.estimate_effect(
             identified_estimand,
-            method_name='backdoor.econml.metalearners.SLearner',
-            target_units='ate',
-            method_params={
-                'init_params': {
-                'overall_model': GradientBoostingRegressor()
-            },
-            'fit_params': {}
-        })
+            method_name="backdoor.econml.metalearners.SLearner",
+            target_units="ate",
+            method_params={"init_params": {"overall_model": GradientBoostingRegressor()}, "fit_params": {}},
+        )
         # checking that CATE estimates are not identical
         sl_cate_estimates_f = sl_estimate.cate_estimates.flatten()
         assert pytest.approx(sl_cate_estimates_f[0], 0.01) != sl_cate_estimates_f[1]
@@ -140,10 +137,10 @@ def test_metalearners(self):
         # predict on new data
         sl_estimate_test = model.estimate_effect(
             identified_estimand,
-            method_name='backdoor.econml.metalearners.SLearner',
+            method_name="backdoor.econml.metalearners.SLearner",
             fit_estimator=False,
-            target_units=data["df"].sample(frac=0.1)
-            )
+            target_units=data["df"].sample(frac=0.1),
+        )
         sl_cate_estimates_test_f = sl_estimate_test.cate_estimates.flatten()
         assert pytest.approx(sl_cate_estimates_test_f[0], 0.01) != sl_cate_estimates_test_f[1]