Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 10 additions & 12 deletions dowhy/causal_estimators/econml.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def fit(
def _get_econml_class_object(self, module_method_name, *args, **kwargs):
# from https://www.bnmetrics.com/blog/factory-pattern-in-python3-simple-version
try:
(module_name, _, class_name) = module_method_name.rpartition(".")
module_name, _, class_name = module_method_name.rpartition(".")
estimator_module = import_module(module_name)
estimator_class = getattr(estimator_module, class_name)

Expand Down Expand Up @@ -233,7 +233,7 @@ def estimate_effect(
X_test = X
if X is not None:
if type(target_units) is pd.DataFrame:
X_test = target_units
X_test = self._encode(target_units[self._effect_modifier_names], "effect_modifiers")
elif callable(target_units):
filtered_rows = data.where(target_units)
boolean_criterion = np.array(filtered_rows.notnull().iloc[:, 0])
Expand Down Expand Up @@ -289,7 +289,8 @@ def construct_symbolic_estimator(self, estimand):
return expr

def shap_values(self, df: pd.DataFrame, *args, **kwargs):
return self.estimator.shap_values(df[self._effect_modifier_names].values, *args, **kwargs)
em_encoded = self._encode(df[self._effect_modifier_names], "effect_modifiers")
return self.estimator.shap_values(em_encoded.values, *args, **kwargs)

def apply_multitreatment(self, df: pd.DataFrame, fun: Callable, *args, **kwargs):
ests = []
Expand All @@ -316,16 +317,15 @@ def effect(self, df: pd.DataFrame, *args, **kwargs) -> np.ndarray:
"""
Pointwise estimated treatment effect,
output shape n_units x n_treatment_values (not counting control)
:param df: Features of the units to evaluate
:param df: Features of the units to evaluate (already encoded effect modifiers)
:param args: passed through to the underlying estimator
:param kwargs: passed through to the underlying estimator
"""

def effect_fun(filtered_df, T0, T1, *args, **kwargs):
return self.estimator.effect(filtered_df, T0=T0, T1=T1, *args, **kwargs)

Xdf = df[self._effect_modifier_names] if df is not None else df
return self.apply_multitreatment(Xdf, effect_fun, *args, **kwargs)
return self.apply_multitreatment(df, effect_fun, *args, **kwargs)

def effect_interval(self, df: pd.DataFrame, *args, **kwargs) -> np.ndarray:
"""
Expand All @@ -340,8 +340,7 @@ def effect_interval_fun(filtered_df, T0, T1, *args, **kwargs):
filtered_df, T0=T0, T1=T1, alpha=1 - self.confidence_level, *args, **kwargs
)

Xdf = df[self._effect_modifier_names] if df is not None else df
return self.apply_multitreatment(Xdf, effect_interval_fun, *args, **kwargs)
return self.apply_multitreatment(df, effect_interval_fun, *args, **kwargs)

def effect_inference(self, df: pd.DataFrame, *args, **kwargs):
"""
Expand All @@ -354,8 +353,7 @@ def effect_inference(self, df: pd.DataFrame, *args, **kwargs):
def effect_inference_fun(filtered_df, T0, T1, *args, **kwargs):
return self.estimator.effect_inference(filtered_df, T0=T0, T1=T1, *args, **kwargs)

Xdf = df[self._effect_modifier_names] if df is not None else df
return self.apply_multitreatment(Xdf, effect_inference_fun, *args, **kwargs)
return self.apply_multitreatment(df, effect_inference_fun, *args, **kwargs)

def effect_tt(self, df: pd.DataFrame, treatment_value, *args, **kwargs):
"""
Expand All @@ -365,8 +363,8 @@ def effect_tt(self, df: pd.DataFrame, treatment_value, *args, **kwargs):
:param args: passed through to estimator.effect()
:param kwargs: passed through to estimator.effect()
"""

eff = self.effect(df[self._effect_modifier_names], *args, **kwargs).reshape((len(df), len(treatment_value)))
em_encoded = self._encode(df[self._effect_modifier_names], "effect_modifiers")
eff = self.effect(em_encoded, *args, **kwargs).reshape((len(df), len(treatment_value)))

out = np.zeros(len(df))
treatment_value = parse_state(treatment_value)
Expand Down
47 changes: 47 additions & 0 deletions tests/causal_estimators/test_econml_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,3 +447,50 @@ def test_effect_modifier_input2(self):
)

assert np.array_equal(est1.cate_estimates, est2.cate_estimates)

def test_categorical_effect_modifiers(self):
"""Regression test for #820: KeyError when effect modifiers contain categorical columns.

When effect modifiers include categorical (dtype='category') columns, they get
one-hot encoded internally. The EconML estimator should still receive the correct
encoded data without raising a KeyError.
"""
np.random.seed(42)
n = 200
df = pd.DataFrame(
{
"x0": np.random.binomial(1, 0.5, size=n).astype("float"),
"x1": np.random.randn(n),
"x2": np.random.randint(3, size=n),
"x3": np.random.randn(n),
}
)
# Make x2 categorical -- this triggers one-hot encoding inside CausalEstimator
df = df.astype({"x2": "category"})

graph_str = """
digraph {
x0; x1; x2; x3;
x3 -> x0;
x3 -> x1;
x2 -> x1;
x0 -> x1;
}
"""
model = CausalModel(data=df, treatment="x0", outcome="x1", graph=graph_str)
identified_estimand = model.identify_effect(proceed_when_unidentifiable=True)

# Should not raise KeyError for categorical effect modifier x2
estimate = model.estimate_effect(
identified_estimand,
method_name="backdoor.econml.dml.DML",
method_params={
"init_params": {
"model_y": GradientBoostingRegressor(),
"model_t": GradientBoostingClassifier(),
"discrete_treatment": True,
},
"fit_params": {},
},
)
assert estimate.value is not None