py-why · github-actions · Apr 2, 2026
diff --git a/dowhy/causal_estimators/econml.py b/dowhy/causal_estimators/econml.py
@@ -194,7 +194,7 @@ def fit(
     def _get_econml_class_object(self, module_method_name, *args, **kwargs):
         # from https://www.bnmetrics.com/blog/factory-pattern-in-python3-simple-version
         try:
-            (module_name, _, class_name) = module_method_name.rpartition(".")
+            module_name, _, class_name = module_method_name.rpartition(".")
             estimator_module = import_module(module_name)
             estimator_class = getattr(estimator_module, class_name)
 
@@ -233,7 +233,7 @@ def estimate_effect(
         X_test = X
         if X is not None:
             if type(target_units) is pd.DataFrame:
-                X_test = target_units
+                X_test = self._encode(target_units[self._effect_modifier_names], "effect_modifiers")
             elif callable(target_units):
                 filtered_rows = data.where(target_units)
                 boolean_criterion = np.array(filtered_rows.notnull().iloc[:, 0])
@@ -289,7 +289,8 @@ def construct_symbolic_estimator(self, estimand):
         return expr
 
     def shap_values(self, df: pd.DataFrame, *args, **kwargs):
-        return self.estimator.shap_values(df[self._effect_modifier_names].values, *args, **kwargs)
+        em_encoded = self._encode(df[self._effect_modifier_names], "effect_modifiers")
+        return self.estimator.shap_values(em_encoded.values, *args, **kwargs)
 
     def apply_multitreatment(self, df: pd.DataFrame, fun: Callable, *args, **kwargs):
         ests = []
@@ -316,16 +317,15 @@ def effect(self, df: pd.DataFrame, *args, **kwargs) -> np.ndarray:
         """
         Pointwise estimated treatment effect,
         output shape n_units x n_treatment_values (not counting control)
-        :param df: Features of the units to evaluate
+        :param df: Features of the units to evaluate (already encoded effect modifiers)
         :param args: passed through to the underlying estimator
         :param kwargs: passed through to the underlying estimator
         """
 
         def effect_fun(filtered_df, T0, T1, *args, **kwargs):
             return self.estimator.effect(filtered_df, T0=T0, T1=T1, *args, **kwargs)
 
-        Xdf = df[self._effect_modifier_names] if df is not None else df
-        return self.apply_multitreatment(Xdf, effect_fun, *args, **kwargs)
+        return self.apply_multitreatment(df, effect_fun, *args, **kwargs)
 
     def effect_interval(self, df: pd.DataFrame, *args, **kwargs) -> np.ndarray:
         """
@@ -340,8 +340,7 @@ def effect_interval_fun(filtered_df, T0, T1, *args, **kwargs):
                 filtered_df, T0=T0, T1=T1, alpha=1 - self.confidence_level, *args, **kwargs
             )
 
-        Xdf = df[self._effect_modifier_names] if df is not None else df
-        return self.apply_multitreatment(Xdf, effect_interval_fun, *args, **kwargs)
+        return self.apply_multitreatment(df, effect_interval_fun, *args, **kwargs)
 
     def effect_inference(self, df: pd.DataFrame, *args, **kwargs):
         """
@@ -354,8 +353,7 @@ def effect_inference(self, df: pd.DataFrame, *args, **kwargs):
         def effect_inference_fun(filtered_df, T0, T1, *args, **kwargs):
             return self.estimator.effect_inference(filtered_df, T0=T0, T1=T1, *args, **kwargs)
 
-        Xdf = df[self._effect_modifier_names] if df is not None else df
-        return self.apply_multitreatment(Xdf, effect_inference_fun, *args, **kwargs)
+        return self.apply_multitreatment(df, effect_inference_fun, *args, **kwargs)
 
     def effect_tt(self, df: pd.DataFrame, treatment_value, *args, **kwargs):
         """
@@ -365,8 +363,8 @@ def effect_tt(self, df: pd.DataFrame, treatment_value, *args, **kwargs):
         :param args: passed through to estimator.effect()
         :param kwargs: passed through to estimator.effect()
         """
-
-        eff = self.effect(df[self._effect_modifier_names], *args, **kwargs).reshape((len(df), len(treatment_value)))
+        em_encoded = self._encode(df[self._effect_modifier_names], "effect_modifiers")
+        eff = self.effect(em_encoded, *args, **kwargs).reshape((len(df), len(treatment_value)))
 
         out = np.zeros(len(df))
         treatment_value = parse_state(treatment_value)

diff --git a/tests/causal_estimators/test_econml_estimator.py b/tests/causal_estimators/test_econml_estimator.py
@@ -447,3 +447,50 @@ def test_effect_modifier_input2(self):
         )
 
         assert np.array_equal(est1.cate_estimates, est2.cate_estimates)
+
+    def test_categorical_effect_modifiers(self):
+        """Regression test for #820: KeyError when effect modifiers contain categorical columns.
+
+        When effect modifiers include categorical (dtype='category') columns, they get
+        one-hot encoded internally. The EconML estimator should still receive the correct
+        encoded data without raising a KeyError.
+        """
+        np.random.seed(42)
+        n = 200
+        df = pd.DataFrame(
+            {
+                "x0": np.random.binomial(1, 0.5, size=n).astype("float"),
+                "x1": np.random.randn(n),
+                "x2": np.random.randint(3, size=n),
+                "x3": np.random.randn(n),
+            }
+        )
+        # Make x2 categorical -- this triggers one-hot encoding inside CausalEstimator
+        df = df.astype({"x2": "category"})
+
+        graph_str = """
+        digraph {
+            x0; x1; x2; x3;
+            x3 -> x0;
+            x3 -> x1;
+            x2 -> x1;
+            x0 -> x1;
+        }
+        """
+        model = CausalModel(data=df, treatment="x0", outcome="x1", graph=graph_str)
+        identified_estimand = model.identify_effect(proceed_when_unidentifiable=True)
+
+        # Should not raise KeyError for categorical effect modifier x2
+        estimate = model.estimate_effect(
+            identified_estimand,
+            method_name="backdoor.econml.dml.DML",
+            method_params={
+                "init_params": {
+                    "model_y": GradientBoostingRegressor(),
+                    "model_t": GradientBoostingClassifier(),
+                    "discrete_treatment": True,
+                },
+                "fit_params": {},
+            },
+        )
+        assert estimate.value is not None