Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions dowhy/causal_estimators/linear_regression_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,15 +120,22 @@ def _ate_and_se_for_treatment(self, treatment_index: int):
:returns: Tuple of (ate_unscaled, se_unscaled).
"""
n_treatments = len(self._target_estimand.treatment_variable)
n_common_causes = len(self._observed_common_causes_names)
n_effect_modifiers = len(self._effect_modifier_names)

em_means = np.asarray(self._effect_modifiers.mean(axis=0))
# Use actual encoded column counts, not variable name counts.
# Categorical variables are one-hot encoded and expand into multiple columns,
# so len(names) underestimates the true column count.
n_common_causes = self._observed_common_causes.shape[1] if self._observed_common_causes is not None else 0
em_means = self._effect_modifiers.mean(axis=0).to_numpy()
n_effect_modifiers = len(em_means)

params = self.model.params.to_numpy()
cov = self.model.cov_params().to_numpy()

n_params = len(params)
assert n_params == 1 + n_treatments + n_common_causes + n_treatments * n_effect_modifiers, (
f"Model has {n_params} params but expected "
f"{1 + n_treatments + n_common_causes + n_treatments * n_effect_modifiers}. "
"Column ordering assumption in _ate_and_se_for_treatment may be broken."
)
c = np.zeros(n_params)
# Direct treatment coefficient (offset by 1 for the intercept)
c[1 + treatment_index] = 1.0
Expand All @@ -144,10 +151,6 @@ def _ate_and_se_for_treatment(self, treatment_index: int):

def _estimate_confidence_intervals(self, confidence_level, method=None):
if self._effect_modifier_names:
# Use the Delta method to compute asymptotic confidence intervals for the
# ATE when effect modifiers are present. The ATE is a linear combination
# of the OLS coefficients: ATE = b_T + b_{TX_1}*E[X_1] + …
# Reference: Gelman & Hill, ARM Book, Chapter 9
n_treatments = len(self._target_estimand.treatment_variable)
scale = self._treatment_value - self._control_value
t_score = scipy.stats.t.ppf((1.0 + confidence_level) / 2.0, df=self.model.df_resid)
Expand All @@ -169,7 +172,6 @@ def _estimate_confidence_intervals(self, confidence_level, method=None):

def _estimate_std_error(self, method=None):
if self._effect_modifier_names:
# Delta method: SE(scale * ATE) = |scale| * sqrt(c' Σ c)
scale = self._treatment_value - self._control_value
ses = np.array(
[
Expand Down
114 changes: 114 additions & 0 deletions tests/causal_estimators/test_linear_regression_estimator.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
import pandas as pd
from pytest import mark

import dowhy.datasets
Expand Down Expand Up @@ -297,3 +298,116 @@ def test_ci_consistent_with_no_effect_modifier(self):
assert ci is not None
lower, upper = ci[0]
assert lower < upper

GML_GRAPH = """
graph [
directed 1
node [ id "W" label "W" ]
node [ id "T" label "T" ]
node [ id "X" label "X" ]
node [ id "Y" label "Y" ]
edge [ source "W" target "T" ]
edge [ source "W" target "Y" ]
edge [ source "T" target "Y" ]
edge [ source "X" target "Y" ]
]
"""

def _make_estimand(self, df):
graph = build_graph_from_str(self.GML_GRAPH)
estimand = identify_effect_auto(
graph,
observed_nodes=list(df.columns),
action_nodes=["T"],
outcome_nodes=["Y"],
estimand_type=EstimandType.NONPARAMETRIC_ATE,
)
estimand.set_identifier_method("backdoor")
return estimand

def _make_continuous_dataset(self, n=3000, seed=0):
rng = np.random.default_rng(seed)
W = rng.normal(0, 1, size=n)
X = rng.normal(0, 1, size=n)
T = 0.5 * W + rng.normal(0, 1, size=n)
Y = 5.0 * T + 3.0 * T * X + 2.0 * W + rng.normal(0, 1, size=n)
return pd.DataFrame({"T": T, "W": W, "X": X, "Y": Y})

def _make_categorical_dataset(self, n=3000, seed=0):
"""Common cause W is categorical with 3 levels → 2 encoded columns after one-hot encoding."""
rng = np.random.default_rng(seed)
W = rng.choice(["a", "b", "c"], size=n)
W_effect = np.where(W == "a", 0.0, np.where(W == "b", 2.0, 4.0))
X = rng.normal(0, 1, size=n)
T = 0.3 * W_effect + rng.normal(0, 1, size=n)
Y = 5.0 * T + 3.0 * T * X + W_effect + rng.normal(0, 1, size=n)
return pd.DataFrame({"T": T, "W": pd.Categorical(W), "X": X, "Y": Y})

def _fit_and_get_ci(self, df):
estimand = self._make_estimand(df)
estimator = LinearRegressionEstimator(
identified_estimand=estimand,
confidence_intervals=True,
confidence_level=0.95,
)
estimator.fit(df, effect_modifier_names=["X"])
estimate = estimator.estimate_effect(df, treatment_value=1, control_value=0, confidence_intervals=True)
return estimator, estimate

def test_ci_no_error_continuous_common_cause(self):
"""CI computation does not raise with a continuous common cause and effect modifier."""
df = self._make_continuous_dataset()
_, estimate = self._fit_and_get_ci(df)
ci = estimate.get_confidence_intervals()
assert ci is not None
assert ci.shape == (1, 2)
assert ci[0, 0] < ci[0, 1], "CI lower must be less than upper"

def test_ci_no_error_categorical_common_cause(self):
"""CI computation does not raise when the common cause is categorical (one-hot encoded)."""
df = self._make_categorical_dataset()
_, estimate = self._fit_and_get_ci(df)
ci = estimate.get_confidence_intervals()
assert ci is not None
assert ci.shape == (1, 2)
assert ci[0, 0] < ci[0, 1], "CI lower must be less than upper"

def test_ci_uses_actual_encoded_column_count_not_name_count(self):
"""Regression test: interaction_start must use shape[1] not len(names).

With a 3-level categorical common cause, one-hot encoding (drop_first=True)
produces 2 columns from 1 variable name. The buggy code used len(names)=1,
making interaction_start point at the wrong coefficient.
This test confirms the fix uses the actual encoded column count.
"""
df = self._make_categorical_dataset()
estimator, _ = self._fit_and_get_ci(df)

n_names = len(estimator._observed_common_causes_names)
n_cols = estimator._observed_common_causes.shape[1]

# The categorical W expands from 1 name to 2 encoded columns
assert n_cols > n_names, (
"Expected categorical variable to expand beyond 1 column, "
"but got n_cols == n_names. Check dataset has categorical variable."
)

# The assert inside _ate_and_se_for_treatment will catch wrong indexing;
# if it passes, the fixed column count is being used correctly.
# (The buggy code would compute the wrong interaction_start and either
# silently return wrong CI or fail the internal assert we added.)
n_treatments = 1
em_means = estimator._effect_modifiers.mean(axis=0).to_numpy()
n_effect_modifiers = len(em_means)
expected_n_params = 1 + n_treatments + n_cols + n_treatments * n_effect_modifiers
assert len(estimator.model.params) == expected_n_params

def test_ci_contains_estimate(self):
"""The CI should be centered around the ATE estimate (not population truth)."""
df = self._make_categorical_dataset()
_, estimate = self._fit_and_get_ci(df)
ci = estimate.get_confidence_intervals()
lower, upper = ci[0]
assert lower <= estimate.value <= upper, (
f"CI [{lower:.4f}, {upper:.4f}] does not contain estimate {estimate.value:.4f}"
)