diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index e69de29..a90a995 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -0,0 +1,5 @@
+- bump: minor
+ changes:
+ added:
+ - Error bars and grid lines to visualizations.
+ - Notebook benchmarking models on additional datasets.
diff --git a/docs/imputation-benchmarking/cross-validation.md b/docs/imputation-benchmarking/cross-validation.md
index 134772d..5e440e8 100644
--- a/docs/imputation-benchmarking/cross-validation.md
+++ b/docs/imputation-benchmarking/cross-validation.md
@@ -44,20 +44,28 @@ Returns a dictionary containing separate results for each metric type:
```python
{
"quantile_loss": {
- "results": pd.DataFrame, # rows: ["train", "test"], cols: quantiles
+ "results": pd.DataFrame, # rows: ["train", "test"], cols: quantiles (mean across folds)
+ "results_std": pd.DataFrame, # rows: ["train", "test"], cols: quantiles (std across folds)
"mean_train": float,
"mean_test": float,
- "variables": List[str] # numerical variables evaluated
+ "std_train": float,
+ "std_test": float,
+ "variables": List[str] # numerical variables evaluated
},
"log_loss": {
- "results": pd.DataFrame, # rows: ["train", "test"], cols: quantiles
+ "results": pd.DataFrame, # rows: ["train", "test"], cols: quantiles
+ "results_std": pd.DataFrame, # rows: ["train", "test"], cols: quantiles (std across folds)
"mean_train": float,
"mean_test": float,
- "variables": List[str] # categorical variables evaluated
+ "std_train": float,
+ "std_test": float,
+ "variables": List[str] # categorical variables evaluated
}
}
```
+The `results_std` DataFrame and `std_train`/`std_test` values provide the standard deviation of the loss across cross-validation folds, which can be used to visualize uncertainty via error bars.
+
If `tune_hyperparameters=True`, returns a tuple of `(results_dict, best_hyperparameters)`.
## Example usage
diff --git a/docs/imputation-benchmarking/visualizations.md b/docs/imputation-benchmarking/visualizations.md
index 86867ac..d6c9450 100644
--- a/docs/imputation-benchmarking/visualizations.md
+++ b/docs/imputation-benchmarking/visualizations.md
@@ -37,6 +37,7 @@ class MethodComparisonResults:
show_mean: bool = True,
figsize: Tuple[int, int] = (PLOT_CONFIG["width"], PLOT_CONFIG["height"]),
plot_type: str = "bar",
+ show_error_bars: bool = True,
) -> go.Figure
def summary(self, format: str = "wide") -> pd.DataFrame
@@ -53,6 +54,7 @@ class MethodComparisonResults:
| show_mean | bool | True | Show horizontal lines for mean loss |
| figsize | tuple | (width, height) | Figure dimensions in pixels |
| plot_type | str | "bar" | Plot type: "bar" for grouped bars, "stacked" for contribution analysis |
+| show_error_bars | bool | True | Show error bars representing standard deviation across CV folds |
The `"stacked"` plot type shows rank-based contribution scores, useful for understanding how each variable contributes to overall model performance.
@@ -134,6 +136,7 @@ class PerformanceResults:
title: Optional[str] = None,
save_path: Optional[str] = None,
figsize: Tuple[int, int] = (PLOT_CONFIG["width"], PLOT_CONFIG["height"]),
+ show_error_bars: bool = True,
) -> go.Figure
def summary(self) -> pd.DataFrame
@@ -146,6 +149,7 @@ class PerformanceResults:
| title | str | None | Custom plot title |
| save_path | str | None | Path to save the plot |
| figsize | tuple | (width, height) | Figure dimensions in pixels |
+| show_error_bars | bool | True | Show error bars representing standard deviation across CV folds |
For quantile loss, the plot shows train and test loss across quantiles as grouped bars. For log loss, the plot includes the loss bars and optionally confusion matrix and class probability distribution subplots. For combined metrics, both are shown in subplots.
diff --git a/microimpute/__init__.py b/microimpute/__init__.py
index 170ef61..2667c7e 100644
--- a/microimpute/__init__.py
+++ b/microimpute/__init__.py
@@ -72,6 +72,11 @@
except ImportError:
pass
+try:
+ from microimpute.models.mdn import MDN
+except ImportError:
+ pass
+
# Import visualization modules
from microimpute.visualizations import (
MethodComparisonResults,
diff --git a/microimpute/config.py b/microimpute/config.py
index bcd51b3..c2b0eb7 100644
--- a/microimpute/config.py
+++ b/microimpute/config.py
@@ -84,5 +84,28 @@
PLOT_CONFIG: Dict[str, Any] = {
"width": 750,
"height": 600,
- "colors": {},
+ # Plotly Safe palette - colorblind-friendly
+ "color_palette": [
+ "#88CCEE", # Cyan
+ "#CC6677", # Rose
+ "#DDCC77", # Sand
+ "#117733", # Green
+ "#332288", # Indigo
+ "#AA4499", # Purple
+ "#44AA99", # Teal
+ "#999933", # Olive
+ "#882255", # Wine
+ "#661100", # Brown
+ ],
+ # Background colors (same for both)
+ "plot_bgcolor": "#FAFAFA",
+ "paper_bgcolor": "#FAFAFA",
+ # Grid styling (horizontal only)
+ "gridcolor": "#E5E5E5",
+ "gridwidth": 1,
+ "showgrid_x": False,
+ "showgrid_y": True,
+ # Axis line styling
+ "linecolor": "#CCCCCC",
+ "showline": True,
}
diff --git a/microimpute/evaluations/cross_validation.py b/microimpute/evaluations/cross_validation.py
index 371ca1d..664adb8 100644
--- a/microimpute/evaluations/cross_validation.py
+++ b/microimpute/evaluations/cross_validation.py
@@ -494,15 +494,21 @@ def cross_validate_model(
Dictionary containing separate results for quantile_loss and log_loss:
{
"quantile_loss": {
- "results": pd.DataFrame, # rows: ["train", "test"], cols: quantiles
+ "results": pd.DataFrame, # rows: ["train", "test"], cols: quantiles (mean across folds)
+ "results_std": pd.DataFrame, # rows: ["train", "test"], cols: quantiles (std across folds)
"mean_train": float,
"mean_test": float,
+ "std_train": float, # std of mean loss across folds
+ "std_test": float, # std of mean loss across folds
"variables": List[str]
},
"log_loss": {
"results": pd.DataFrame, # rows: ["train", "test"], cols: quantiles (constant values)
+ "results_std": pd.DataFrame, # rows: ["train", "test"], cols: quantiles (std across folds)
"mean_train": float,
"mean_test": float,
+ "std_train": float,
+ "std_test": float,
"variables": List[str]
}
}
@@ -676,26 +682,54 @@ def cross_validate_model(
index=["train", "test"],
)
- # Calculate means
+ # Create std DataFrame for error bars
+ std_df = pd.DataFrame(
+ [
+ {
+ q: np.std(values) if len(values) > 1 else 0.0
+ for q, values in metric_results[metric_type][
+ "train"
+ ].items()
+ },
+ {
+ q: np.std(values) if len(values) > 1 else 0.0
+ for q, values in metric_results[metric_type][
+ "test"
+ ].items()
+ },
+ ],
+ index=["train", "test"],
+ )
+
+ # Calculate means and stds across all quantiles
mean_test = combined_df.loc["test"].mean()
mean_train = combined_df.loc["train"].mean()
+ std_test = std_df.loc["test"].mean()
+ std_train = std_df.loc["train"].mean()
final_results[metric_type] = {
"results": combined_df, # Single DataFrame with train/test rows
+ "results_std": std_df, # Std across folds for each quantile
"mean_train": mean_train,
"mean_test": mean_test,
+ "std_train": std_train,
+ "std_test": std_test,
"variables": metric_results[metric_type]["variables"],
}
log.info(
- f"{metric_type} - Mean Train: {mean_train:.6f}, Mean Test: {mean_test:.6f}"
+ f"{metric_type} - Mean Train: {mean_train:.6f} (±{std_train:.6f}), "
+ f"Mean Test: {mean_test:.6f} (±{std_test:.6f})"
)
else:
# No variables use this metric
final_results[metric_type] = {
"results": pd.DataFrame(), # Empty DataFrame
+ "results_std": pd.DataFrame(), # Empty DataFrame
"mean_train": np.nan,
"mean_test": np.nan,
+ "std_train": np.nan,
+ "std_test": np.nan,
"variables": [],
}
diff --git a/microimpute/models/mdn.py b/microimpute/models/mdn.py
index 7558e7f..096e18c 100644
--- a/microimpute/models/mdn.py
+++ b/microimpute/models/mdn.py
@@ -291,6 +291,9 @@ def fit(
verbose=False,
suppress_lightning_logger=True,
)
+ # Disable Lightning's default CSVLogger to avoid
+ # "dict contains fields not in fieldnames" errors
+ self.model.logger = False
self.model.fit(train=train_data)
@@ -477,6 +480,9 @@ def fit(
verbose=False,
suppress_lightning_logger=True,
)
+ # Disable Lightning's default CSVLogger to avoid
+ # "dict contains fields not in fieldnames" errors
+ self.model.logger = False
self.model.fit(train=train_data)
diff --git a/microimpute/models/quantreg.py b/microimpute/models/quantreg.py
index 1d701e9..2f2c686 100644
--- a/microimpute/models/quantreg.py
+++ b/microimpute/models/quantreg.py
@@ -32,6 +32,7 @@ def __init__(
quantiles_specified: bool = False,
boolean_targets: Optional[Dict[str, Dict]] = None,
constant_targets: Optional[Dict[str, Dict]] = None,
+ dummy_processor: Optional[Any] = None,
) -> None:
"""Initialize the QuantReg results.
@@ -46,6 +47,7 @@ def __init__(
names before dummy encoding.
quantiles_specified: Whether quantiles were explicitly specified during fit.
boolean_targets: Dictionary of boolean target info for conversion back to bool.
+ dummy_processor: Processor for handling dummy encoding in test data.
"""
super().__init__(
predictors,
@@ -59,6 +61,7 @@ def __init__(
self.quantiles_specified = quantiles_specified
self.boolean_targets = boolean_targets or {}
self.constant_targets = constant_targets or {}
+ self.dummy_processor = dummy_processor
@validate_call(config=VALIDATE_CONFIG)
def _predict(
@@ -414,6 +417,7 @@ def _fit(
quantiles_specified=(quantiles is not None),
boolean_targets=boolean_targets,
constant_targets=constant_targets,
+ dummy_processor=getattr(self, "dummy_processor", None),
)
except Exception as e:
self.logger.error(f"Error fitting QuantReg model: {str(e)}")
diff --git a/microimpute/utils/dashboard_formatter.py b/microimpute/utils/dashboard_formatter.py
index c8847d4..fe8dbb6 100644
--- a/microimpute/utils/dashboard_formatter.py
+++ b/microimpute/utils/dashboard_formatter.py
@@ -179,6 +179,7 @@ def _format_histogram_rows(
"quantile": "N/A",
"metric_name": "categorical_distribution",
"metric_value": None, # Not used for histograms
+ "metric_std": None,
"split": "full",
"additional_info": json.dumps(
{
@@ -211,6 +212,7 @@ def _format_histogram_rows(
"quantile": "N/A",
"metric_name": "histogram_distribution",
"metric_value": None, # Not used for histograms
+ "metric_std": None,
"split": "full",
"additional_info": json.dumps(
{
@@ -375,6 +377,7 @@ def format_csv(
if metric_type in cv_result:
data = cv_result[metric_type]
results_df = data.get("results")
+ results_std_df = data.get("results_std")
variables = data.get("variables", [])
if results_df is not None:
@@ -382,7 +385,18 @@ def format_csv(
for split in ["train", "test"]:
if split in results_df.index:
for quantile in results_df.columns:
- # Add mean_all row for this quantile
+ # Get std value if available
+ std_value = None
+ if (
+ results_std_df is not None
+ and split in results_std_df.index
+ ):
+ std_value = float(
+ results_std_df.loc[
+ split, quantile
+ ]
+ )
+
rows.append(
{
"type": "benchmark_loss",
@@ -393,6 +407,7 @@ def format_csv(
"metric_value": results_df.loc[
split, quantile
],
+ "metric_std": std_value,
"split": split,
"additional_info": json.dumps(
{
@@ -406,6 +421,11 @@ def format_csv(
# Add mean across all quantiles
if "mean_train" in data:
+ std_train = (
+ float(data["std_train"])
+ if "std_train" in data
+ else None
+ )
rows.append(
{
"type": "benchmark_loss",
@@ -414,6 +434,7 @@ def format_csv(
"quantile": "mean",
"metric_name": metric_type,
"metric_value": data["mean_train"],
+ "metric_std": std_train,
"split": "train",
"additional_info": json.dumps(
{"n_variables": len(variables)}
@@ -422,6 +443,11 @@ def format_csv(
)
if "mean_test" in data:
+ std_test = (
+ float(data["std_test"])
+ if "std_test" in data
+ else None
+ )
rows.append(
{
"type": "benchmark_loss",
@@ -430,6 +456,7 @@ def format_csv(
"quantile": "mean",
"metric_name": metric_type,
"metric_value": data["mean_test"],
+ "metric_std": std_test,
"split": "test",
"additional_info": json.dumps(
{"n_variables": len(variables)}
@@ -461,6 +488,13 @@ def format_csv(
else:
quantile = float(percentile) if pd.notna(percentile) else "N/A"
+ # Get std if available in the DataFrame
+ std_value = (
+ float(row["Std"])
+ if "Std" in row.index and pd.notna(row["Std"])
+ else None
+ )
+
rows.append(
{
"type": "benchmark_loss",
@@ -469,6 +503,7 @@ def format_csv(
"quantile": quantile,
"metric_name": row["Metric"],
"metric_value": row["Loss"],
+ "metric_std": std_value,
"split": "test", # Comparison metrics are typically on test set
"additional_info": "{}",
}
@@ -490,6 +525,7 @@ def format_csv(
.lower()
.replace(" ", "_"), # e.g., "wasserstein_distance"
"metric_value": row["Distance"],
+ "metric_std": None,
"split": "full",
"additional_info": "{}",
}
@@ -514,6 +550,7 @@ def format_csv(
"quantile": "N/A",
"metric_name": corr_type,
"metric_value": corr_matrix.iloc[i, j],
+ "metric_std": None,
"split": "full",
"additional_info": json.dumps(
{"predictor2": pred2}
@@ -534,6 +571,7 @@ def format_csv(
"quantile": "N/A",
"metric_name": "mutual_info",
"metric_value": mi_df.loc[predictor, target],
+ "metric_std": None,
"split": "full",
"additional_info": json.dumps({"target": target}),
}
@@ -556,6 +594,7 @@ def format_csv(
"quantile": "N/A",
"metric_name": "relative_impact",
"metric_value": row["relative_impact"],
+ "metric_std": None,
"split": "test",
"additional_info": json.dumps(
{"removed_predictor": predictor}
@@ -572,6 +611,7 @@ def format_csv(
"quantile": "N/A",
"metric_name": "loss_increase",
"metric_value": row["loss_increase"],
+ "metric_std": None,
"split": "test",
"additional_info": json.dumps(
{"removed_predictor": predictor}
@@ -603,6 +643,7 @@ def format_csv(
"quantile": "N/A",
"metric_name": "cumulative_improvement",
"metric_value": row["cumulative_improvement"],
+ "metric_std": None,
"split": "test",
"additional_info": json.dumps(
{
@@ -632,6 +673,7 @@ def format_csv(
"quantile": "N/A",
"metric_name": "marginal_improvement",
"metric_value": row["marginal_improvement"],
+ "metric_std": None,
"split": "test",
"additional_info": json.dumps(
{
@@ -693,6 +735,7 @@ def format_csv(
"quantile",
"metric_name",
"metric_value",
+ "metric_std",
"split",
"additional_info",
]
diff --git a/microimpute/visualizations/comparison_plots.py b/microimpute/visualizations/comparison_plots.py
index 46dbeef..4e1b85a 100644
--- a/microimpute/visualizations/comparison_plots.py
+++ b/microimpute/visualizations/comparison_plots.py
@@ -192,17 +192,29 @@ def _process_dual_metrics_input(
# Get test results (single row)
if "test" in ql_data["results"].index:
test_results = ql_data["results"].loc["test"]
+ # Get std results if available
+ std_results = None
+ if (
+ ql_data.get("results_std") is not None
+ and "test" in ql_data["results_std"].index
+ ):
+ std_results = ql_data["results_std"].loc["test"]
+
for quantile in test_results.index:
for var in ql_data.get("variables", ["y"]):
- long_format_data.append(
- {
- "Method": method_name,
- "Imputed Variable": var,
- "Percentile": quantile,
- "Loss": test_results[quantile],
- "Metric": "quantile_loss",
- }
- )
+ row = {
+ "Method": method_name,
+ "Imputed Variable": var,
+ "Percentile": quantile,
+ "Loss": test_results[quantile],
+ "Metric": "quantile_loss",
+ "Std": (
+ std_results[quantile]
+ if std_results is not None
+ else np.nan
+ ),
+ }
+ long_format_data.append(row)
# Add mean loss
if "mean_test" in ql_data:
@@ -214,6 +226,7 @@ def _process_dual_metrics_input(
"Percentile": "mean_quantile_loss",
"Loss": ql_data["mean_test"],
"Metric": "quantile_loss",
+ "Std": ql_data.get("std_test", np.nan),
}
)
@@ -230,6 +243,7 @@ def _process_dual_metrics_input(
# Log loss is constant across quantiles
if "test" in ll_data["results"].index:
test_loss = ll_data["results"].loc["test"].mean()
+ test_std = ll_data.get("std_test", np.nan)
for var in ll_data.get("variables", []):
long_format_data.append(
{
@@ -238,6 +252,7 @@ def _process_dual_metrics_input(
"Percentile": "log_loss",
"Loss": test_loss,
"Metric": "log_loss",
+ "Std": test_std,
}
)
@@ -251,6 +266,7 @@ def _process_dual_metrics_input(
"Percentile": "mean_log_loss",
"Loss": ll_data["mean_test"],
"Metric": "log_loss",
+ "Std": ll_data.get("std_test", np.nan),
}
)
@@ -266,6 +282,7 @@ def plot(
PLOT_CONFIG["height"],
),
plot_type: str = "bar",
+ show_error_bars: bool = True,
) -> go.Figure:
"""Plot a comparison of performance across different imputation methods.
@@ -275,6 +292,7 @@ def plot(
show_mean: Whether to show horizontal lines for mean loss values.
figsize: Figure size as (width, height) in pixels.
plot_type: Type of plot: 'bar' (default) or 'stacked' (for contribution analysis)
+ show_error_bars: Whether to show error bars for variation across quantiles.
Returns:
Plotly figure object
@@ -290,12 +308,14 @@ def plot(
if plot_type == "stacked":
return self._plot_stacked_contribution(title, save_path, figsize)
elif self.metric == "log_loss":
- return self._plot_log_loss_comparison(title, save_path, figsize)
+ return self._plot_log_loss_comparison(
+ title, save_path, figsize, show_error_bars
+ )
elif self.metric == "combined":
return self._plot_combined_metrics(title, save_path, figsize)
else:
return self._plot_quantile_loss_comparison(
- title, save_path, show_mean, figsize
+ title, save_path, show_mean, figsize, show_error_bars
)
def _plot_quantile_loss_comparison(
@@ -304,6 +324,7 @@ def _plot_quantile_loss_comparison(
save_path: Optional[str],
show_mean: bool,
figsize: Tuple[int, int],
+ show_error_bars: bool = True,
) -> go.Figure:
"""Plot quantile loss comparison across methods."""
@@ -354,6 +375,12 @@ def _plot_quantile_loss_comparison(
if title is None:
title = f"Test {self.metric_name} Across Quantiles for Different Imputation Methods"
+ # Use Std column for error bars if requested
+ error_y_col = None
+ if show_error_bars and "Std" in melted_df.columns:
+ melted_df["Std"] = melted_df["Std"].fillna(0)
+ error_y_col = "Std"
+
# Create the bar chart
logger.debug("Creating bar chart with plotly express")
fig = px.bar(
@@ -361,13 +388,14 @@ def _plot_quantile_loss_comparison(
x="Percentile",
y=self.metric_name,
color="Method",
- color_discrete_sequence=px.colors.qualitative.Plotly,
+ color_discrete_sequence=PLOT_CONFIG["color_palette"],
barmode="group",
title=title,
labels={
"Percentile": "Quantiles",
- self.metric_name: f"Test {self.metric_name}",
+ self.metric_name: f"{self.metric_name}",
},
+ error_y=error_y_col,
)
# Add horizontal lines for mean loss if requested
@@ -385,8 +413,8 @@ def _plot_quantile_loss_comparison(
x1=n_percentiles - 0.5,
y1=mean_loss,
line=dict(
- color=px.colors.qualitative.Plotly[
- i % len(px.colors.qualitative.Plotly)
+ color=PLOT_CONFIG["color_palette"][
+ i % len(PLOT_CONFIG["color_palette"])
],
width=2,
dash="dot",
@@ -398,15 +426,29 @@ def _plot_quantile_loss_comparison(
title_font_size=14,
xaxis_title_font_size=12,
yaxis_title_font_size=12,
- paper_bgcolor="#F0F0F0",
- plot_bgcolor="#F0F0F0",
+ paper_bgcolor=PLOT_CONFIG["paper_bgcolor"],
+ plot_bgcolor=PLOT_CONFIG["plot_bgcolor"],
legend_title="Method",
height=figsize[1],
width=figsize[0],
)
- fig.update_xaxes(showgrid=False, zeroline=False)
- fig.update_yaxes(showgrid=False, zeroline=False)
+ fig.update_xaxes(
+ showgrid=PLOT_CONFIG["showgrid_x"],
+ gridcolor=PLOT_CONFIG["gridcolor"],
+ gridwidth=PLOT_CONFIG["gridwidth"],
+ showline=PLOT_CONFIG["showline"],
+ linecolor=PLOT_CONFIG["linecolor"],
+ zeroline=False,
+ )
+ fig.update_yaxes(
+ showgrid=PLOT_CONFIG["showgrid_y"],
+ gridcolor=PLOT_CONFIG["gridcolor"],
+ gridwidth=PLOT_CONFIG["gridwidth"],
+ showline=PLOT_CONFIG["showline"],
+ linecolor=PLOT_CONFIG["linecolor"],
+ zeroline=False,
+ )
# Save or show the plot
if save_path:
@@ -426,6 +468,7 @@ def _plot_log_loss_comparison(
title: Optional[str],
save_path: Optional[str],
figsize: Tuple[int, int],
+ show_error_bars: bool = True,
) -> go.Figure:
"""Plot log loss comparison across methods."""
try:
@@ -439,38 +482,55 @@ def _plot_log_loss_comparison(
logger.warning("No log loss data available")
return go.Figure()
- # Get mean log loss per method
- method_means = (
- log_loss_df.groupby("Method")["Loss"].mean().reset_index()
+ # Get mean log loss per method (Loss already contains the mean)
+ method_stats = log_loss_df.groupby("Method", as_index=False).agg(
+ {"Loss": "mean", "Std": "mean"}
)
+ method_stats["Std"] = method_stats["Std"].fillna(0)
if title is None:
- title = f"Log Loss Comparison Across Methods"
+ title = "Log Loss Comparison Across Methods"
# Create bar chart
+ error_y_col = "Std" if show_error_bars else None
fig = px.bar(
- method_means,
+ method_stats,
x="Method",
y="Loss",
color="Method",
title=title,
labels={"Loss": "Log Loss"},
- color_discrete_sequence=px.colors.qualitative.Plotly,
+ color_discrete_sequence=PLOT_CONFIG["color_palette"],
+ error_y=error_y_col,
)
fig.update_layout(
title_font_size=14,
xaxis_title_font_size=12,
yaxis_title_font_size=12,
- paper_bgcolor="#F0F0F0",
- plot_bgcolor="#F0F0F0",
+ paper_bgcolor=PLOT_CONFIG["paper_bgcolor"],
+ plot_bgcolor=PLOT_CONFIG["plot_bgcolor"],
height=figsize[1],
width=figsize[0],
showlegend=False,
)
- fig.update_xaxes(showgrid=False, zeroline=False)
- fig.update_yaxes(showgrid=False, zeroline=False)
+ fig.update_xaxes(
+ showgrid=PLOT_CONFIG["showgrid_x"],
+ gridcolor=PLOT_CONFIG["gridcolor"],
+ gridwidth=PLOT_CONFIG["gridwidth"],
+ showline=PLOT_CONFIG["showline"],
+ linecolor=PLOT_CONFIG["linecolor"],
+ zeroline=False,
+ )
+ fig.update_yaxes(
+ showgrid=PLOT_CONFIG["showgrid_y"],
+ gridcolor=PLOT_CONFIG["gridcolor"],
+ gridwidth=PLOT_CONFIG["gridwidth"],
+ showline=PLOT_CONFIG["showline"],
+ linecolor=PLOT_CONFIG["linecolor"],
+ zeroline=False,
+ )
if save_path:
_save_figure(fig, save_path)
@@ -543,8 +603,8 @@ def _plot_combined_metrics(
y=method_data["Loss"],
name=method,
legendgroup=method,
- marker_color=px.colors.qualitative.Plotly[
- i % len(px.colors.qualitative.Plotly)
+ marker_color=PLOT_CONFIG["color_palette"][
+ i % len(PLOT_CONFIG["color_palette"])
],
),
row=1,
@@ -564,8 +624,8 @@ def _plot_combined_metrics(
x=list(method_means.index),
y=list(method_means.values),
marker_color=[
- px.colors.qualitative.Plotly[
- i % len(px.colors.qualitative.Plotly)
+ PLOT_CONFIG["color_palette"][
+ i % len(PLOT_CONFIG["color_palette"])
]
for i in range(len(method_means))
],
@@ -583,18 +643,46 @@ def _plot_combined_metrics(
barmode="group",
height=figsize[1] * 1.5,
width=figsize[0],
- paper_bgcolor="#F0F0F0",
- plot_bgcolor="#F0F0F0",
+ paper_bgcolor=PLOT_CONFIG["paper_bgcolor"],
+ plot_bgcolor=PLOT_CONFIG["plot_bgcolor"],
showlegend=True,
)
fig.update_xaxes(
- title_text="Quantile", row=1, col=1, showgrid=False
+ title_text="Quantile",
+ row=1,
+ col=1,
+ showgrid=PLOT_CONFIG["showgrid_x"],
+ gridcolor=PLOT_CONFIG["gridcolor"],
+ showline=PLOT_CONFIG["showline"],
+ linecolor=PLOT_CONFIG["linecolor"],
+ )
+ fig.update_xaxes(
+ title_text="Method",
+ row=2,
+ col=1,
+ showgrid=PLOT_CONFIG["showgrid_x"],
+ gridcolor=PLOT_CONFIG["gridcolor"],
+ showline=PLOT_CONFIG["showline"],
+ linecolor=PLOT_CONFIG["linecolor"],
)
- fig.update_xaxes(title_text="Method", row=2, col=1, showgrid=False)
- fig.update_yaxes(title_text="Loss", row=1, col=1, showgrid=False)
fig.update_yaxes(
- title_text="Log loss", row=2, col=1, showgrid=False
+ title_text="Loss",
+ row=1,
+ col=1,
+ showgrid=PLOT_CONFIG["showgrid_y"],
+ gridcolor=PLOT_CONFIG["gridcolor"],
+ showline=PLOT_CONFIG["showline"],
+ linecolor=PLOT_CONFIG["linecolor"],
+ )
+ fig.update_yaxes(
+ title_text="Log loss",
+ row=2,
+ col=1,
+ showgrid=PLOT_CONFIG["showgrid_y"],
+ gridcolor=PLOT_CONFIG["gridcolor"],
+ showline=PLOT_CONFIG["showline"],
+ linecolor=PLOT_CONFIG["linecolor"],
)
if save_path:
@@ -726,13 +814,27 @@ def _plot_stacked_contribution(
yaxis_title="Total rank score",
height=figsize[1],
width=figsize[0],
- paper_bgcolor="#F0F0F0",
- plot_bgcolor="#F0F0F0",
+ paper_bgcolor=PLOT_CONFIG["paper_bgcolor"],
+ plot_bgcolor=PLOT_CONFIG["plot_bgcolor"],
legend_title="Variable (Metric)",
)
- fig.update_xaxes(showgrid=False, zeroline=False)
- fig.update_yaxes(showgrid=False, zeroline=False)
+ fig.update_xaxes(
+ showgrid=PLOT_CONFIG["showgrid_x"],
+ gridcolor=PLOT_CONFIG["gridcolor"],
+ gridwidth=PLOT_CONFIG["gridwidth"],
+ showline=PLOT_CONFIG["showline"],
+ linecolor=PLOT_CONFIG["linecolor"],
+ zeroline=False,
+ )
+ fig.update_yaxes(
+ showgrid=PLOT_CONFIG["showgrid_y"],
+ gridcolor=PLOT_CONFIG["gridcolor"],
+ gridwidth=PLOT_CONFIG["gridwidth"],
+ showline=PLOT_CONFIG["showline"],
+ linecolor=PLOT_CONFIG["linecolor"],
+ zeroline=False,
+ )
if save_path:
_save_figure(fig, save_path)
@@ -819,7 +921,6 @@ def __repr__(self) -> str:
def method_comparison_results(
data: Union[pd.DataFrame, Dict[str, Dict[str, Dict]]],
- metric_name: Optional[str] = None,
metric: str = "quantile_loss",
data_format: str = "wide",
) -> MethodComparisonResults:
diff --git a/microimpute/visualizations/performance_plots.py b/microimpute/visualizations/performance_plots.py
index eda2a8f..8e9de4a 100644
--- a/microimpute/visualizations/performance_plots.py
+++ b/microimpute/visualizations/performance_plots.py
@@ -98,6 +98,7 @@ def plot(
PLOT_CONFIG["width"],
PLOT_CONFIG["height"],
),
+ show_error_bars: bool = True,
) -> go.Figure:
"""Plot the performance based on the specified metric.
@@ -105,6 +106,7 @@ def plot(
title: Custom title for the plot. If None, a default title is used.
save_path: Path to save the plot. If None, the plot is displayed.
figsize: Figure size as (width, height) in pixels.
+ show_error_bars: Whether to show error bars for variation across CV folds.
Returns:
Plotly figure object
@@ -115,11 +117,17 @@ def plot(
logger.debug(f"Creating performance plot for metric: {self.metric}")
if self.metric == "quantile_loss":
- return self._plot_quantile_loss(title, save_path, figsize)
+ return self._plot_quantile_loss(
+ title, save_path, figsize, show_error_bars
+ )
elif self.metric == "log_loss":
- return self._plot_log_loss(title, save_path, figsize)
+ return self._plot_log_loss(
+ title, save_path, figsize, show_error_bars
+ )
elif self.metric == "combined":
- return self._plot_combined(title, save_path, figsize)
+ return self._plot_combined(
+ title, save_path, figsize, show_error_bars
+ )
else:
raise ValueError(f"Invalid metric: {self.metric}")
@@ -128,13 +136,14 @@ def _plot_quantile_loss(
title: Optional[str],
save_path: Optional[str],
figsize: Tuple[int, int],
+ show_error_bars: bool = True,
) -> go.Figure:
"""Plot quantile loss performance across quantiles."""
if not self.has_quantile_loss:
logger.warning("No quantile loss data available")
return go.Figure()
- palette = px.colors.qualitative.Plotly
+ palette = PLOT_CONFIG["color_palette"]
train_color = palette[2]
test_color = palette[3]
@@ -144,25 +153,48 @@ def _plot_quantile_loss(
# Get the DataFrame for quantile loss
ql_data = self.results["quantile_loss"]["results"]
+ # Get std data for error bars if available
+ ql_std = self.results["quantile_loss"].get("results_std")
+
# Add bars for training data
if "train" in ql_data.index:
+ error_y_dict = None
+ if (
+ show_error_bars
+ and ql_std is not None
+ and "train" in ql_std.index
+ ):
+ error_y_dict = dict(
+ type="data", array=ql_std.loc["train"].values
+ )
fig.add_trace(
go.Bar(
x=[str(x) for x in ql_data.columns],
y=ql_data.loc["train"].values,
name="Train",
marker_color=train_color,
+ error_y=error_y_dict,
)
)
# Add bars for test data
if "test" in ql_data.index:
+ error_y_dict = None
+ if (
+ show_error_bars
+ and ql_std is not None
+ and "test" in ql_std.index
+ ):
+ error_y_dict = dict(
+ type="data", array=ql_std.loc["test"].values
+ )
fig.add_trace(
go.Bar(
x=[str(x) for x in ql_data.columns],
y=ql_data.loc["test"].values,
name="Test",
marker_color=test_color,
+ error_y=error_y_dict,
)
)
@@ -176,14 +208,28 @@ def _plot_quantile_loss(
barmode="group",
width=figsize[0],
height=figsize[1],
- paper_bgcolor="#F0F0F0",
- plot_bgcolor="#F0F0F0",
+ paper_bgcolor=PLOT_CONFIG["paper_bgcolor"],
+ plot_bgcolor=PLOT_CONFIG["plot_bgcolor"],
legend=dict(yanchor="top", y=0.99, xanchor="right", x=0.99),
margin=dict(l=50, r=50, t=80, b=50),
)
- fig.update_xaxes(showgrid=False, zeroline=False)
- fig.update_yaxes(showgrid=False, zeroline=False)
+ fig.update_xaxes(
+ showgrid=PLOT_CONFIG["showgrid_x"],
+ gridcolor=PLOT_CONFIG["gridcolor"],
+ gridwidth=PLOT_CONFIG["gridwidth"],
+ showline=PLOT_CONFIG["showline"],
+ linecolor=PLOT_CONFIG["linecolor"],
+ zeroline=False,
+ )
+ fig.update_yaxes(
+ showgrid=PLOT_CONFIG["showgrid_y"],
+ gridcolor=PLOT_CONFIG["gridcolor"],
+ gridwidth=PLOT_CONFIG["gridwidth"],
+ showline=PLOT_CONFIG["showline"],
+ linecolor=PLOT_CONFIG["linecolor"],
+ zeroline=False,
+ )
if save_path:
_save_figure(fig, save_path)
@@ -200,6 +246,7 @@ def _plot_log_loss(
title: Optional[str],
save_path: Optional[str],
figsize: Tuple[int, int],
+ show_error_bars: bool = True,
) -> go.Figure:
"""Plot log loss performance and additional categorical metrics."""
if not self.has_log_loss:
@@ -235,16 +282,23 @@ def _plot_log_loss(
)
# Plot 1: Log Loss bars
- palette = px.colors.qualitative.Plotly
+ palette = PLOT_CONFIG["color_palette"]
train_color = palette[2]
test_color = palette[3]
# Get log loss values from the results DataFrame
ll_results_df = ll_data["results"]
+ # Get std values for error bars
+ ll_std = ll_data.get("std_train")
+ ll_std_test = ll_data.get("std_test")
+
if "train" in ll_results_df.index:
# Log loss should be constant across quantiles, so take the mean
train_loss = ll_results_df.loc["train"].mean()
+ error_y_dict = None
+ if show_error_bars and ll_std is not None:
+ error_y_dict = dict(type="data", array=[ll_std])
fig.add_trace(
go.Bar(
x=["Train"],
@@ -252,6 +306,7 @@ def _plot_log_loss(
name="Train",
marker_color=train_color,
showlegend=True,
+ error_y=error_y_dict,
),
row=1,
col=1,
@@ -259,6 +314,9 @@ def _plot_log_loss(
if "test" in ll_results_df.index:
test_loss = ll_results_df.loc["test"].mean()
+ error_y_dict = None
+ if show_error_bars and ll_std_test is not None:
+ error_y_dict = dict(type="data", array=[ll_std_test])
fig.add_trace(
go.Bar(
x=["Test"],
@@ -266,6 +324,7 @@ def _plot_log_loss(
name="Test",
marker_color=test_color,
showlegend=True,
+ error_y=error_y_dict,
),
row=1,
col=1,
@@ -336,11 +395,26 @@ def _plot_log_loss(
title=title,
height=figsize[1] * num_subplots * 0.7,
width=figsize[0],
- paper_bgcolor="#F0F0F0",
- plot_bgcolor="#F0F0F0",
+ paper_bgcolor=PLOT_CONFIG["paper_bgcolor"],
+ plot_bgcolor=PLOT_CONFIG["plot_bgcolor"],
showlegend=True,
)
+ fig.update_xaxes(
+ showgrid=PLOT_CONFIG["showgrid_x"],
+ gridcolor=PLOT_CONFIG["gridcolor"],
+ gridwidth=PLOT_CONFIG["gridwidth"],
+ showline=PLOT_CONFIG["showline"],
+ linecolor=PLOT_CONFIG["linecolor"],
+ )
+ fig.update_yaxes(
+ showgrid=PLOT_CONFIG["showgrid_y"],
+ gridcolor=PLOT_CONFIG["gridcolor"],
+ gridwidth=PLOT_CONFIG["gridwidth"],
+ showline=PLOT_CONFIG["showline"],
+ linecolor=PLOT_CONFIG["linecolor"],
+ )
+
if save_path:
_save_figure(fig, save_path)
@@ -351,6 +425,7 @@ def _plot_combined(
title: Optional[str],
save_path: Optional[str],
figsize: Tuple[int, int],
+ show_error_bars: bool = True,
) -> go.Figure:
"""Plot combined view of both metrics."""
if not self.has_quantile_loss and not self.has_log_loss:
@@ -375,7 +450,7 @@ def _plot_combined(
vertical_spacing=0.2,
)
- palette = px.colors.qualitative.Plotly
+ palette = PLOT_CONFIG["color_palette"]
train_color = palette[2]
test_color = palette[3]
current_row = 1
@@ -383,8 +458,18 @@ def _plot_combined(
# Add quantile loss plot
if self.has_quantile_loss:
ql_data = self.results["quantile_loss"]["results"]
+ ql_std = self.results["quantile_loss"].get("results_std")
if "train" in ql_data.index:
+ error_y_dict = None
+ if (
+ show_error_bars
+ and ql_std is not None
+ and "train" in ql_std.index
+ ):
+ error_y_dict = dict(
+ type="data", array=ql_std.loc["train"].values
+ )
fig.add_trace(
go.Bar(
x=[str(x) for x in ql_data.columns],
@@ -392,12 +477,22 @@ def _plot_combined(
name="QL Train",
marker_color=train_color,
legendgroup="train",
+ error_y=error_y_dict,
),
row=current_row,
col=1,
)
if "test" in ql_data.index:
+ error_y_dict = None
+ if (
+ show_error_bars
+ and ql_std is not None
+ and "test" in ql_std.index
+ ):
+ error_y_dict = dict(
+ type="data", array=ql_std.loc["test"].values
+ )
fig.add_trace(
go.Bar(
x=[str(x) for x in ql_data.columns],
@@ -405,6 +500,7 @@ def _plot_combined(
name="QL Test",
marker_color=test_color,
legendgroup="test",
+ error_y=error_y_dict,
),
row=current_row,
col=1,
@@ -417,9 +513,14 @@ def _plot_combined(
# Add log loss plot
if self.has_log_loss:
ll_data = self.results["log_loss"]["results"]
+ ll_std_train = self.results["log_loss"].get("std_train")
+ ll_std_test = self.results["log_loss"].get("std_test")
if "train" in ll_data.index:
train_loss = ll_data.loc["train"].mean()
+ error_y_dict = None
+ if show_error_bars and ll_std_train is not None:
+ error_y_dict = dict(type="data", array=[ll_std_train])
fig.add_trace(
go.Bar(
x=["Log loss"],
@@ -427,7 +528,8 @@ def _plot_combined(
name="Log loss train",
marker_color=train_color,
legendgroup="train",
- showlegend=self.has_quantile_loss == False,
+ showlegend=self.has_quantile_loss is False,
+ error_y=error_y_dict,
),
row=current_row,
col=1,
@@ -435,6 +537,9 @@ def _plot_combined(
if "test" in ll_data.index:
test_loss = ll_data.loc["test"].mean()
+ error_y_dict = None
+ if show_error_bars and ll_std_test is not None:
+ error_y_dict = dict(type="data", array=[ll_std_test])
fig.add_trace(
go.Bar(
x=["Log loss"],
@@ -442,7 +547,8 @@ def _plot_combined(
name="Log loss test",
marker_color=test_color,
legendgroup="test",
- showlegend=self.has_quantile_loss == False,
+ showlegend=self.has_quantile_loss is False,
+ error_y=error_y_dict,
),
row=current_row,
col=1,
@@ -458,13 +564,27 @@ def _plot_combined(
barmode="group",
height=figsize[1] * num_subplots * 0.6,
width=figsize[0],
- paper_bgcolor="#F0F0F0",
- plot_bgcolor="#F0F0F0",
+ paper_bgcolor=PLOT_CONFIG["paper_bgcolor"],
+ plot_bgcolor=PLOT_CONFIG["plot_bgcolor"],
showlegend=True,
)
- fig.update_xaxes(showgrid=False, zeroline=False)
- fig.update_yaxes(showgrid=False, zeroline=False)
+ fig.update_xaxes(
+ showgrid=PLOT_CONFIG["showgrid_x"],
+ gridcolor=PLOT_CONFIG["gridcolor"],
+ gridwidth=PLOT_CONFIG["gridwidth"],
+ showline=PLOT_CONFIG["showline"],
+ linecolor=PLOT_CONFIG["linecolor"],
+ zeroline=False,
+ )
+ fig.update_yaxes(
+ showgrid=PLOT_CONFIG["showgrid_y"],
+ gridcolor=PLOT_CONFIG["gridcolor"],
+ gridwidth=PLOT_CONFIG["gridwidth"],
+ showline=PLOT_CONFIG["showline"],
+ linecolor=PLOT_CONFIG["linecolor"],
+ zeroline=False,
+ )
if save_path:
_save_figure(fig, save_path)
diff --git a/paper/benchmarking_datasets/benchmark_utils.py b/paper/benchmarking_datasets/benchmark_utils.py
new file mode 100644
index 0000000..eb4489b
--- /dev/null
+++ b/paper/benchmarking_datasets/benchmark_utils.py
@@ -0,0 +1,811 @@
+"""Benchmark utilities for CIA sensitivity analysis and cross-dataset comparison.
+
+This module provides functions for:
+1. CIA (Conditional Independence Assumption) sensitivity analysis via progressive
+ predictor exclusion
+2. Cross-dataset results visualization (summary tables and heatmaps)
+"""
+
+import logging
+from typing import Any, Dict, List, Optional, Type, Union
+
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as go
+from scipy.integrate import trapezoid
+from sklearn.feature_selection import mutual_info_regression
+from sklearn.model_selection import train_test_split
+from tqdm.auto import tqdm
+
+from microimpute.comparisons.metrics import quantile_loss
+from microimpute.config import (
+ PLOT_CONFIG,
+ RANDOM_STATE,
+ TRAIN_SIZE,
+)
+from microimpute.models import Imputer
+
+log = logging.getLogger(__name__)
+
+# Method colors from PLOT_CONFIG (Safe palette - colorblind-friendly)
+METHOD_COLORS = {
+ "QRF": "#88CCEE", # Cyan
+ "OLS": "#CC6677", # Rose
+ "QuantReg": "#DDCC77", # Sand
+ "Matching": "#117733", # Green
+ "MDN": "#332288", # Indigo
+}
+
+# Use a simple set of quantiles for CIA analysis to avoid QuantReg issues
+CIA_QUANTILES = [0.1, 0.3, 0.5, 0.7, 0.9]
+
+
+def progressive_predictor_exclusion(
+ data: pd.DataFrame,
+ predictors: List[str],
+ imputed_variables: List[str],
+ model_class: Type[Imputer],
+ ordering: str = "mutual_info",
+ weight_col: Optional[Union[str, np.ndarray, pd.Series]] = None,
+ quantiles: Optional[List[float]] = None,
+ train_size: float = TRAIN_SIZE,
+ random_state: int = RANDOM_STATE,
+) -> Dict[str, Any]:
+ """Progressively REMOVE predictors to measure CIA sensitivity.
+
+ This function assesses sensitivity to the Conditional Independence Assumption
+ by measuring how performance degrades when key predictors are unavailable
+ (simulating incomplete linking variables between surveys).
+
+ Args:
+ data: DataFrame containing the data.
+ predictors: List of predictor column names.
+ imputed_variables: List of variables to impute.
+ model_class: The Imputer class to use for evaluation.
+ ordering: How to order predictor removal:
+ - "mutual_info": Remove most informative first (default)
+ - "correlation": Remove highest correlated first
+ - "random": Remove in random order (control)
+ weight_col: Optional column name or array of sampling weights.
+ quantiles: List of quantiles for evaluation (default: [0.1, 0.3, 0.5, 0.7, 0.9]).
+ train_size: Proportion of data to use for training.
+ random_state: Random state for reproducibility.
+
+ Returns:
+ Dictionary containing:
+ - results_df: DataFrame with columns ['step', 'predictor_removed',
+ 'remaining_predictors', 'quantile_loss', 'normalized_loss']
+ - predictor_order: List of predictors in removal order
+ - baseline_loss: Performance with all predictors
+ - sensitivity_score: Area under degradation curve (higher = more sensitive)
+ """
+ if quantiles is None:
+ quantiles = CIA_QUANTILES
+
+ # Split data
+ train_data, test_data = train_test_split(
+ data, train_size=train_size, random_state=random_state
+ )
+
+ # Order predictors by importance
+ predictor_order = _order_predictors(
+ train_data, predictors, imputed_variables, ordering, random_state
+ )
+
+ # Compute baseline performance with all predictors
+ log.info("Computing baseline performance with all predictors")
+ baseline_loss = _evaluate_model(
+ train_data=train_data,
+ test_data=test_data,
+ predictors=predictors,
+ imputed_variables=imputed_variables,
+ model_class=model_class,
+ weight_col=weight_col,
+ quantiles=quantiles,
+ )
+
+ # Track results
+ results = []
+ current_predictors = predictors.copy()
+
+ # Add baseline (step 0, no predictors removed)
+ results.append(
+ {
+ "step": 0,
+ "predictor_removed": None,
+ "remaining_predictors": current_predictors.copy(),
+ "num_predictors": len(current_predictors),
+ "quantile_loss": baseline_loss,
+ "normalized_loss": 1.0,
+ }
+ )
+
+ # Progressively remove predictors
+ for step, pred_to_remove in enumerate(
+ tqdm(predictor_order, desc="Progressive exclusion"), start=1
+ ):
+ current_predictors = [
+ p for p in current_predictors if p != pred_to_remove
+ ]
+
+ if len(current_predictors) == 0:
+ # No predictors left - record as maximum degradation
+ results.append(
+ {
+ "step": step,
+ "predictor_removed": pred_to_remove,
+ "remaining_predictors": [],
+ "num_predictors": 0,
+ "quantile_loss": np.nan,
+ "normalized_loss": np.nan,
+ }
+ )
+ break
+
+ try:
+ loss = _evaluate_model(
+ train_data=train_data,
+ test_data=test_data,
+ predictors=current_predictors,
+ imputed_variables=imputed_variables,
+ model_class=model_class,
+ weight_col=weight_col,
+ quantiles=quantiles,
+ )
+
+ normalized_loss = (
+ loss / baseline_loss if baseline_loss > 0 else np.nan
+ )
+
+ results.append(
+ {
+ "step": step,
+ "predictor_removed": pred_to_remove,
+ "remaining_predictors": current_predictors.copy(),
+ "num_predictors": len(current_predictors),
+ "quantile_loss": loss,
+ "normalized_loss": normalized_loss,
+ }
+ )
+
+ except Exception as e:
+ log.warning(
+ f"Failed to evaluate after removing {pred_to_remove}: {e}"
+ )
+ results.append(
+ {
+ "step": step,
+ "predictor_removed": pred_to_remove,
+ "remaining_predictors": current_predictors.copy(),
+ "num_predictors": len(current_predictors),
+ "quantile_loss": np.nan,
+ "normalized_loss": np.nan,
+ }
+ )
+
+ results_df = pd.DataFrame(results)
+
+ # Compute sensitivity score (AUC of normalized loss curve)
+ # Higher = more sensitive to predictor removal
+ valid_results = results_df[results_df["normalized_loss"].notna()]
+ if len(valid_results) > 1:
+ # Use trapezoidal rule for AUC calculation
+ x = valid_results["step"].values
+ y = valid_results["normalized_loss"].values
+ # Normalize x to [0, 1] range
+ x_norm = x / x.max() if x.max() > 0 else x
+ sensitivity_score = trapezoid(y, x_norm)
+ else:
+ sensitivity_score = 1.0
+
+ return {
+ "results_df": results_df,
+ "predictor_order": predictor_order,
+ "baseline_loss": baseline_loss,
+ "sensitivity_score": sensitivity_score,
+ }
+
+
+def _order_predictors(
+ data: pd.DataFrame,
+ predictors: List[str],
+ imputed_variables: List[str],
+ ordering: str,
+ random_state: int,
+) -> List[str]:
+ """Order predictors by importance for removal.
+
+ Args:
+ data: Training data.
+ predictors: List of predictor names.
+ imputed_variables: Target variables.
+ ordering: Ordering method ("mutual_info", "correlation", "random").
+ random_state: Random state.
+
+ Returns:
+ List of predictors ordered by importance (most important first).
+ """
+ if ordering == "random":
+ rng = np.random.RandomState(random_state)
+ order = predictors.copy()
+ rng.shuffle(order)
+ return order
+
+ # Compute importance scores
+ importance_scores = {}
+
+ for pred in predictors:
+ scores = []
+ for target in imputed_variables:
+ # Get predictor values - encode categorical if needed
+ X_series = data[pred]
+ if X_series.dtype == "object" or str(X_series.dtype) == "category":
+ # Encode categorical predictor as numeric codes
+ X = pd.Categorical(X_series).codes.reshape(-1, 1)
+ else:
+ X = X_series.values.reshape(-1, 1)
+
+ y = data[target].values
+
+ # Handle missing values using pd.isna (works with all types)
+ mask = ~(pd.isna(X.flatten()) | pd.isna(y))
+ X_clean = X[mask].astype(float)
+ y_clean = y[mask].astype(float)
+
+ if len(X_clean) < 10:
+ continue
+
+ if ordering == "mutual_info":
+ mi = mutual_info_regression(
+ X_clean, y_clean, random_state=random_state
+ )[0]
+ scores.append(mi)
+ elif ordering == "correlation":
+ corr = np.abs(np.corrcoef(X_clean.flatten(), y_clean)[0, 1])
+ scores.append(corr if not np.isnan(corr) else 0)
+
+ importance_scores[pred] = np.mean(scores) if scores else 0
+
+ # Sort by importance (highest first - these will be removed first)
+ sorted_preds = sorted(
+ importance_scores.keys(),
+ key=lambda x: importance_scores[x],
+ reverse=True,
+ )
+
+ return sorted_preds
+
+
+def _evaluate_model(
+ train_data: pd.DataFrame,
+ test_data: pd.DataFrame,
+ predictors: List[str],
+ imputed_variables: List[str],
+ model_class: Type[Imputer],
+ weight_col: Optional[Union[str, np.ndarray, pd.Series]],
+ quantiles: List[float],
+) -> float:
+ """Train a model and evaluate its quantile loss.
+
+ Returns:
+ Mean quantile loss across all quantiles and variables.
+ """
+ model_name = model_class.__name__
+
+ # Initialize and fit the model
+ model = model_class()
+
+ # QuantReg needs quantiles at fit time
+ if model_name == "QuantReg":
+ fitted_model = model.fit(
+ X_train=train_data,
+ predictors=predictors,
+ imputed_variables=imputed_variables,
+ weight_col=weight_col,
+ quantiles=quantiles,
+ )
+ else:
+ fitted_model = model.fit(
+ X_train=train_data,
+ predictors=predictors,
+ imputed_variables=imputed_variables,
+ weight_col=weight_col,
+ )
+
+ # Get predictions
+ predictions = fitted_model.predict(test_data, quantiles)
+
+ # Compute quantile loss using the existing function from metrics
+ losses = []
+ for q in quantiles:
+ if q not in predictions:
+ continue
+ for var in imputed_variables:
+ if var not in predictions[q].columns:
+ continue
+
+ true_values = test_data[var].values
+ pred_values = predictions[q][var].values
+
+ # Use existing quantile_loss function
+ loss_array = quantile_loss(q, true_values, pred_values)
+ losses.append(np.mean(loss_array))
+
+ return np.mean(losses) if losses else np.nan
+
+
+def plot_cia_degradation_curves(
+ results: Dict[str, Dict[str, Any]],
+ title: Optional[str] = None,
+ save_path: Optional[str] = None,
+ figsize: tuple = (PLOT_CONFIG["width"], PLOT_CONFIG["height"]),
+ use_absolute_loss: bool = True,
+) -> go.Figure:
+ """Plot CIA degradation curves for multiple methods.
+
+ Args:
+ results: Dict mapping method name to progressive_predictor_exclusion results.
+ title: Plot title.
+ save_path: Path to save the figure.
+ figsize: Figure size as (width, height).
+ use_absolute_loss: If True (default), plot absolute quantile loss values.
+ If False, plot normalized loss (relative to each method's baseline).
+
+ Returns:
+ Plotly figure object.
+ """
+ fig = go.Figure()
+
+ y_col = "quantile_loss" if use_absolute_loss else "normalized_loss"
+
+ for method_name, method_results in results.items():
+ df = method_results["results_df"]
+
+ # Skip if results_df is empty or missing required column
+ if df.empty or y_col not in df.columns:
+ continue
+
+ valid_df = df[df[y_col].notna()]
+
+ if valid_df.empty:
+ continue
+
+ color = METHOD_COLORS.get(method_name, "#999999")
+ baseline = method_results.get("baseline_loss", np.nan)
+
+ # Build label with baseline info
+ label = f"{method_name}"
+ if not np.isnan(baseline):
+ label += f" (baseline={baseline:.4f})"
+
+ fig.add_trace(
+ go.Scatter(
+ x=valid_df["step"],
+ y=valid_df[y_col],
+ mode="lines+markers",
+ name=label,
+ line=dict(color=color, width=2),
+ marker=dict(color=color, size=8),
+ )
+ )
+
+ if title is None:
+ title = "CIA Sensitivity: Performance Degradation as Predictors are Removed"
+
+ y_axis_title = (
+ "Quantile Loss"
+ if use_absolute_loss
+ else "Normalized Quantile Loss (1.0 = baseline)"
+ )
+
+ fig.update_layout(
+ title=title,
+ xaxis_title="Number of Predictors Removed",
+ yaxis_title=y_axis_title,
+ height=figsize[1],
+ width=figsize[0],
+ paper_bgcolor=PLOT_CONFIG["paper_bgcolor"],
+ plot_bgcolor=PLOT_CONFIG["plot_bgcolor"],
+ legend=dict(
+ yanchor="top",
+ y=0.99,
+ xanchor="left",
+ x=0.01,
+ ),
+ hovermode="x unified",
+ )
+
+ fig.update_xaxes(
+ showgrid=PLOT_CONFIG["showgrid_x"],
+ gridcolor=PLOT_CONFIG["gridcolor"],
+ showline=PLOT_CONFIG["showline"],
+ linecolor=PLOT_CONFIG["linecolor"],
+ )
+ fig.update_yaxes(
+ showgrid=PLOT_CONFIG["showgrid_y"],
+ gridcolor=PLOT_CONFIG["gridcolor"],
+ showline=PLOT_CONFIG["showline"],
+ linecolor=PLOT_CONFIG["linecolor"],
+ )
+
+ if save_path:
+ fig.write_html(save_path)
+
+ return fig
+
+
+def create_benchmark_summary_table(
+ cv_results: Dict[str, Dict[str, Dict[str, Any]]],
+ cia_results: Optional[Dict[str, Dict[str, Dict[str, Any]]]] = None,
+) -> pd.DataFrame:
+ """Create a summary table of benchmark results with rankings.
+
+ Args:
+ cv_results: Dict mapping dataset name to cv_results from autoimpute.
+ cia_results: Optional dict mapping dataset name to CIA sensitivity results.
+
+ Returns:
+ DataFrame with columns: Dataset, Best Method, and rank columns for each method.
+ """
+ methods = ["QRF", "OLS", "QuantReg", "Matching", "MDN"]
+ rows = []
+
+ for dataset_name, dataset_cv_results in cv_results.items():
+ row = {"Dataset": dataset_name}
+
+ # Extract mean quantile loss for each method
+ losses = {}
+ for method in methods:
+ if method in dataset_cv_results:
+ method_data = dataset_cv_results[method]
+ if "quantile_loss" in method_data:
+ ql = method_data["quantile_loss"]
+ losses[method] = ql.get("mean_test", np.nan)
+ else:
+ losses[method] = np.nan
+ else:
+ losses[method] = np.nan
+
+ # Compute ranks
+ valid_losses = {k: v for k, v in losses.items() if not np.isnan(v)}
+ if valid_losses:
+ sorted_methods = sorted(
+ valid_losses.keys(), key=lambda x: valid_losses[x]
+ )
+ ranks = {m: i + 1 for i, m in enumerate(sorted_methods)}
+
+ # Fill in ranks for methods with NaN losses
+ max_rank = len(valid_losses) + 1
+ for method in methods:
+ if method not in ranks:
+ ranks[method] = max_rank
+
+ row["Best Method"] = sorted_methods[0]
+ else:
+ ranks = {m: np.nan for m in methods}
+ row["Best Method"] = "N/A"
+
+ # Add rank columns
+ for method in methods:
+ row[f"{method} Rank"] = ranks.get(method, np.nan)
+ row[f"{method} Loss"] = losses.get(method, np.nan)
+
+ # Add CIA sensitivity if provided
+ if cia_results and dataset_name in cia_results:
+ for method in methods:
+ if method in cia_results[dataset_name]:
+ row[f"{method} CIA"] = cia_results[dataset_name][
+ method
+ ].get("sensitivity_score", np.nan)
+ else:
+ row[f"{method} CIA"] = np.nan
+
+ rows.append(row)
+
+ df = pd.DataFrame(rows)
+
+ # Add mean rank row
+ mean_row = {"Dataset": "Mean Rank", "Best Method": "-"}
+ for method in methods:
+ rank_col = f"{method} Rank"
+ if rank_col in df.columns:
+ mean_row[rank_col] = df[rank_col].mean()
+ loss_col = f"{method} Loss"
+ if loss_col in df.columns:
+ mean_row[loss_col] = df[loss_col].mean()
+ cia_col = f"{method} CIA"
+ if cia_col in df.columns:
+ mean_row[cia_col] = df[cia_col].mean()
+
+ df = pd.concat([df, pd.DataFrame([mean_row])], ignore_index=True)
+
+ return df
+
+
+def create_benchmark_heatmap(
+ cv_results: Dict[str, Dict[str, Dict[str, Any]]],
+ wasserstein_results: Optional[Dict[str, Dict[str, float]]] = None,
+ title: Optional[str] = None,
+ save_path: Optional[str] = None,
+ figsize: Optional[tuple] = None,
+) -> go.Figure:
+ """Create a heatmap showing quantile loss and Wasserstein distance.
+
+ The heatmap uses:
+ - Method-specific colors (consistent per column)
+ - Row-type opacity: Q-Loss rows are darker (0.8), W-Dist rows lighter (0.5)
+ - Font weight/size to indicate performance: best = bold + larger
+
+ Args:
+ cv_results: Dict mapping dataset name to cv_results from autoimpute.
+ wasserstein_results: Optional dict mapping dataset to {method: distance}.
+ title: Plot title.
+ save_path: Path to save the figure.
+ figsize: Figure size (width, height). Auto-calculated if None.
+
+ Returns:
+ Plotly figure object.
+ """
+ methods = ["QRF", "OLS", "QuantReg", "Matching", "MDN"]
+ datasets = list(cv_results.keys())
+
+ # Build data matrix with row type tracking
+ row_labels = []
+ row_types = [] # 'qloss' or 'wdist'
+ data_matrix = []
+
+ for dataset in datasets:
+ dataset_cv = cv_results[dataset]
+
+ # Quantile Loss row
+ ql_row = []
+ for method in methods:
+ if method in dataset_cv:
+ val = dataset_cv[method]
+ # Handle both nested format and simple float format
+ if isinstance(val, dict) and "quantile_loss" in val:
+ # Nested format: cv_results[dataset][method]["quantile_loss"]["mean_test"]
+ ql_row.append(
+ val["quantile_loss"].get("mean_test", np.nan)
+ )
+ elif isinstance(val, (int, float)):
+ # Simple format: cv_results[dataset][method] = float
+ ql_row.append(val)
+ else:
+ ql_row.append(np.nan)
+ else:
+ ql_row.append(np.nan)
+ row_labels.append(f"{dataset} (Q-Loss)")
+ row_types.append("qloss")
+ data_matrix.append(ql_row)
+
+ # Wasserstein Distance row
+ if wasserstein_results and dataset in wasserstein_results:
+ wd_row = []
+ for method in methods:
+ wd_row.append(wasserstein_results[dataset].get(method, np.nan))
+ row_labels.append(f"{dataset} (W-Dist)")
+ row_types.append("wdist")
+ data_matrix.append(wd_row)
+
+ data_array = np.array(data_matrix)
+ n_rows = len(row_labels)
+ n_cols = len(methods)
+
+ # Compute ranks for each row (1 = best, higher = worse)
+ rank_matrix = np.zeros_like(data_array)
+ for i in range(len(data_array)):
+ row = data_array[i]
+ valid_mask = ~np.isnan(row)
+ if valid_mask.sum() > 0:
+ # Rank valid values (lower value = better = rank 1)
+ valid_vals = row[valid_mask]
+ ranks = np.argsort(np.argsort(valid_vals)) + 1
+ rank_matrix[i, valid_mask] = ranks
+ # Set NaN positions to max rank + 1
+ rank_matrix[i, ~valid_mask] = valid_mask.sum() + 1
+
+ # Create figure
+ fig = go.Figure()
+
+ # Create cells using shapes (rectangles)
+ annotations = []
+
+ for i in range(n_rows):
+ row_type = row_types[i]
+ # Set alpha based on row type: Q-Loss darker, W-Dist lighter
+ alpha = 0.8 if row_type == "qloss" else 0.5
+
+ for j, method in enumerate(methods):
+ value = data_array[i, j]
+ rank = rank_matrix[i, j]
+
+ base_color = METHOD_COLORS.get(method, "#999999")
+ r = int(base_color[1:3], 16)
+ g = int(base_color[3:5], 16)
+ b = int(base_color[5:7], 16)
+
+ if np.isnan(value):
+ cell_color = "rgba(200, 200, 200, 0.3)"
+ text = "N/A"
+ font_size = 10
+ font_weight = "normal"
+ else:
+ cell_color = f"rgba({r}, {g}, {b}, {alpha:.2f})"
+ text = f"{value:.4f}"
+ # Best performer (rank 1) gets bold + larger font
+ if rank == 1:
+ font_size = 12
+ font_weight = "bold"
+ else:
+ font_size = 10
+ font_weight = "normal"
+
+ # Add rectangle shape for cell background
+ fig.add_shape(
+ type="rect",
+ x0=j - 0.5,
+ x1=j + 0.5,
+ y0=i - 0.5,
+ y1=i + 0.5,
+ fillcolor=cell_color,
+ line=dict(color="white", width=1),
+ )
+
+ # Text color: white for MDN (dark indigo), black for others
+ if method == "MDN" and not np.isnan(value):
+ text_color = "white"
+ else:
+ text_color = "black"
+
+ # Add text annotation with bold formatting for best
+ if font_weight == "bold":
+ text = f"{text}"
+
+ annotations.append(
+ dict(
+ x=j,
+ y=i,
+ text=text,
+ showarrow=False,
+ font=dict(color=text_color, size=font_size),
+ xanchor="center",
+ yanchor="middle",
+ )
+ )
+
+ # Add method color legend
+ for method in methods:
+ fig.add_trace(
+ go.Scatter(
+ x=[None],
+ y=[None],
+ mode="markers",
+ marker=dict(
+ size=12, color=METHOD_COLORS.get(method, "#999999")
+ ),
+ name=method,
+ )
+ )
+
+ if title is None:
+ title = "Cross-Dataset Benchmark Results"
+
+ # Calculate compact figure size if not provided
+ if figsize is None:
+ width = max(500, n_cols * 100 + 200)
+ height = max(300, n_rows * 40 + 100)
+ figsize = (width, height)
+
+ fig.update_layout(
+ title=dict(text=title, font=dict(size=14)),
+ xaxis=dict(
+ tickmode="array",
+ tickvals=list(range(n_cols)),
+ ticktext=methods,
+ side="top",
+ showgrid=False,
+ zeroline=False,
+ range=[-0.5, n_cols - 0.5],
+ ),
+ yaxis=dict(
+ tickmode="array",
+ tickvals=list(range(n_rows)),
+ ticktext=row_labels,
+ showgrid=False,
+ zeroline=False,
+ autorange="reversed",
+ range=[-0.5, n_rows - 0.5],
+ ),
+ height=figsize[1],
+ width=figsize[0],
+ paper_bgcolor=PLOT_CONFIG["paper_bgcolor"],
+ plot_bgcolor=PLOT_CONFIG["plot_bgcolor"],
+ annotations=annotations,
+ legend=dict(
+ orientation="h",
+ yanchor="top",
+ y=-0.02,
+ xanchor="center",
+ x=0.5,
+ ),
+ margin=dict(l=180, r=20, t=60, b=40),
+ )
+
+ if save_path:
+ fig.write_html(save_path)
+
+ return fig
+
+
+def run_cia_analysis_for_dataset(
+ data: pd.DataFrame,
+ predictors: List[str],
+ imputed_variables: List[str],
+ model_classes: Optional[List[Type[Imputer]]] = None,
+ ordering: str = "mutual_info",
+ train_size: float = TRAIN_SIZE,
+ random_state: int = RANDOM_STATE,
+) -> Dict[str, Dict[str, Any]]:
+ """Run CIA sensitivity analysis for all models on a dataset.
+
+ Args:
+ data: DataFrame containing the data.
+ predictors: List of predictor column names.
+ imputed_variables: List of variables to impute.
+ model_classes: List of Imputer classes to evaluate.
+ ordering: Predictor ordering method.
+ train_size: Proportion for training.
+ random_state: Random state.
+
+ Returns:
+ Dict mapping method name to progressive_predictor_exclusion results.
+ """
+ if model_classes is None:
+ from microimpute.models import OLS, QRF, QuantReg
+
+ model_classes = [QRF, OLS, QuantReg]
+ try:
+ from microimpute.models import Matching
+
+ model_classes.append(Matching)
+ except ImportError:
+ pass
+ try:
+ from microimpute.models import MDN
+
+ model_classes.append(MDN)
+ except ImportError:
+ pass
+
+ results = {}
+
+ for model_class in model_classes:
+ method_name = model_class.__name__
+
+ try:
+ method_results = progressive_predictor_exclusion(
+ data=data,
+ predictors=predictors,
+ imputed_variables=imputed_variables,
+ model_class=model_class,
+ ordering=ordering,
+ train_size=train_size,
+ random_state=random_state,
+ )
+ results[method_name] = method_results
+ print(
+ f" {method_name} sensitivity score: "
+ f"{method_results['sensitivity_score']:.3f}"
+ )
+ except Exception as e:
+ log.warning(f"CIA analysis failed for {method_name}: {e}")
+ print(f" CIA analysis failed for {method_name}: {e}")
+ results[method_name] = {
+ "results_df": pd.DataFrame(),
+ "predictor_order": [],
+ "baseline_loss": np.nan,
+ "sensitivity_score": np.nan,
+ }
+
+ return results
diff --git a/paper/benchmarking_datasets/data_loader.py b/paper/benchmarking_datasets/data_loader.py
new file mode 100644
index 0000000..32421bd
--- /dev/null
+++ b/paper/benchmarking_datasets/data_loader.py
@@ -0,0 +1,583 @@
+"""
+OpenML AutoML Benchmark Regression Suite Data Loader
+
+This module retrieves and analyzes datasets from the OpenML AutoML
+Benchmark Regression suite (ID: 269) for evaluating statistical
+matching models in microimpute.
+"""
+
+import re
+
+import openml
+import pandas as pd
+
+# Patterns that indicate generic/non-interpretable feature names
+GENERIC_FEATURE_PATTERNS = [
+ r"^col_?\d+$", # col_1, col1, col_2
+ r"^[fvx]\d+$", # f1, v1, x1, V1, X1
+ r"^feat(ure)?_?\d+$", # feat1, feature_1
+ r"^var_?\d+$", # var1, var_1
+ r"^att(r)?_?\d+$", # attr1, att_1
+ r"^[a-z]\d+[a-z]*\d*$", # P1, P1p2, H2p2 (encoded names)
+ r"^[a-f0-9]{8,}$", # Hashed/anonymized names (e.g., 48df886f9)
+]
+
+
+def is_generic_feature_name(name: str) -> bool:
+ """
+ Check if a feature name appears to be generic/non-interpretable.
+
+ Parameters
+ ----------
+ name : str
+ Feature name to check
+
+ Returns
+ -------
+ bool
+ True if the name matches a generic pattern
+ """
+ name_lower = name.lower().strip()
+ for pattern in GENERIC_FEATURE_PATTERNS:
+ if re.match(pattern, name_lower):
+ return True
+ return False
+
+
+def calculate_interpretability_score(feature_names: list) -> float:
+ """
+ Calculate the proportion of features with interpretable names.
+
+ Parameters
+ ----------
+ feature_names : list
+ List of feature names
+
+ Returns
+ -------
+ float
+ Proportion of features with non-generic names (0 to 1)
+ """
+ if not feature_names:
+ return 0.0
+
+ interpretable_count = sum(
+ 1 for name in feature_names if not is_generic_feature_name(name)
+ )
+ return interpretable_count / len(feature_names)
+
+
+def list_all_qualities(dataset_id: int = 269) -> list:
+ """
+ List all available qualities for a sample dataset to understand
+ what metadata OpenML provides.
+
+ Parameters
+ ----------
+ dataset_id : int
+ Sample dataset ID to inspect qualities
+
+ Returns
+ -------
+ list
+ List of all available quality names
+ """
+ dataset = openml.datasets.get_dataset(
+ dataset_id,
+ download_data=False,
+ download_qualities=True,
+ )
+ if dataset.qualities:
+ return sorted(dataset.qualities.keys())
+ return []
+
+
+def get_benchmark_suite_metadata(suite_id: int = 269) -> pd.DataFrame:
+ """
+ Retrieve metadata for all datasets in an OpenML benchmark suite.
+
+ Parameters
+ ----------
+ suite_id : int
+ The OpenML benchmark suite ID (default: 269 for AutoML
+ Benchmark Regression)
+
+ Returns
+ -------
+ pd.DataFrame
+ DataFrame containing metadata for each dataset in the suite
+ """
+ # Get the benchmark suite
+ suite = openml.study.get_suite(suite_id)
+ print(f"Suite: {suite.name}")
+ print(f"Description: {suite.description}")
+ print(f"Number of datasets: {len(suite.data)}")
+ print("-" * 80)
+
+ # Collect metadata for each dataset
+ metadata_list = []
+
+ for dataset_id in suite.data:
+ try:
+ dataset = openml.datasets.get_dataset(
+ dataset_id,
+ download_data=False,
+ download_qualities=True,
+ download_features_meta_data=True,
+ )
+
+ # Get qualities (dataset characteristics)
+ qualities = dataset.qualities if dataset.qualities else {}
+
+ # Get feature information
+ features = dataset.features if dataset.features else {}
+ feature_names = []
+ feature_types = {}
+ if features:
+ for feat in features.values():
+ feature_names.append(feat.name)
+ feat_type = feat.data_type
+ feature_types[feat_type] = (
+ feature_types.get(feat_type, 0) + 1
+ )
+
+ # Calculate interpretability score
+ interpretability = calculate_interpretability_score(feature_names)
+
+ metadata = {
+ "dataset_id": dataset_id,
+ "name": dataset.name,
+ "version": dataset.version,
+ "n_instances": qualities.get("NumberOfInstances"),
+ "n_features": qualities.get("NumberOfFeatures"),
+ "n_numeric_features": qualities.get("NumberOfNumericFeatures"),
+ "n_categorical_features": qualities.get(
+ "NumberOfSymbolicFeatures"
+ ),
+ "n_missing_values": qualities.get("NumberOfMissingValues"),
+ "pct_missing": qualities.get("PercentageOfMissingValues"),
+ "n_instances_with_missing": qualities.get(
+ "NumberOfInstancesWithMissingValues"
+ ),
+ # Relationship/correlation measures
+ "mean_mutual_info": qualities.get("MeanMutualInformation"),
+ "mean_attr_entropy": qualities.get("MeanAttributeEntropy"),
+ "equiv_num_attr": qualities.get("EquivalentNumberOfAtts"),
+ "noise_signal_ratio": qualities.get("NoiseToSignalRatio"),
+ "class_entropy": qualities.get("ClassEntropy"),
+ "mean_kurtosis": qualities.get("MeanKurtosisOfNumericAtts"),
+ "mean_skewness": qualities.get("MeanSkewnessOfNumericAtts"),
+ "target_variable": dataset.default_target_attribute,
+ "interpretability_score": interpretability,
+ "feature_names": feature_names,
+ "description": (
+ dataset.description[:200] + "..."
+ if dataset.description and len(dataset.description) > 200
+ else dataset.description
+ ),
+ "format": dataset.format,
+ "upload_date": dataset.upload_date,
+ }
+
+ metadata_list.append(metadata)
+ print(f"Loaded: {dataset.name} (ID: {dataset_id})")
+
+ except Exception as e:
+ print(f"Error loading dataset {dataset_id}: {e}")
+ metadata_list.append(
+ {
+ "dataset_id": dataset_id,
+ "name": "ERROR",
+ "error": str(e),
+ }
+ )
+
+ return pd.DataFrame(metadata_list)
+
+
+def analyze_missingness(df_metadata: pd.DataFrame) -> pd.DataFrame:
+ """
+ Analyze missingness patterns in the benchmark datasets.
+
+ Parameters
+ ----------
+ df_metadata : pd.DataFrame
+ DataFrame with dataset metadata
+
+ Returns
+ -------
+ pd.DataFrame
+ Summary of datasets with missing values
+ """
+ missing_df = df_metadata[df_metadata["n_missing_values"] > 0].copy()
+ missing_df = missing_df.sort_values("pct_missing", ascending=False)
+ return missing_df[
+ [
+ "name",
+ "n_instances",
+ "n_features",
+ "n_missing_values",
+ "pct_missing",
+ "n_instances_with_missing",
+ ]
+ ]
+
+
+def filter_datasets(
+ df_metadata: pd.DataFrame,
+ min_numeric_proportion: float = 0.5,
+ max_missing_values: int = 0,
+ max_instances: int = 40000,
+ min_instances: int = 0,
+ min_features: int = 0,
+ max_features: int = None,
+ exclude_name_patterns: list = None,
+ min_interpretability_score: float = None,
+) -> pd.DataFrame:
+ """
+ Filter datasets based on specified criteria.
+
+ Parameters
+ ----------
+ df_metadata : pd.DataFrame
+ DataFrame with dataset metadata from get_benchmark_suite_metadata()
+ min_numeric_proportion : float
+ Minimum proportion of features that must be numeric (default: 0.5)
+ max_missing_values : int
+ Maximum number of missing values allowed (default: 0 for complete)
+ max_instances : int
+ Maximum number of instances (default: 40000)
+ min_instances : int
+ Minimum number of instances (default: 0)
+ min_features : int
+ Minimum number of features (default: 0)
+ max_features : int
+ Maximum number of features (default: None for no limit)
+ exclude_name_patterns : list
+ List of substrings to exclude from dataset names (default: None)
+ Case-insensitive matching.
+ min_interpretability_score : float
+ Minimum proportion of features with interpretable names (default:
+ None for no filtering). Value between 0 and 1, where 1 means all
+ features must have interpretable names.
+
+ Returns
+ -------
+ pd.DataFrame
+ Filtered DataFrame containing only datasets meeting all criteria
+ """
+ df = df_metadata.copy()
+
+ # Calculate numeric proportion
+ df["numeric_proportion"] = df["n_numeric_features"] / df["n_features"]
+
+ # Apply filters
+ mask = (
+ (df["numeric_proportion"] >= min_numeric_proportion)
+ & (df["n_missing_values"] <= max_missing_values)
+ & (df["n_instances"] <= max_instances)
+ & (df["n_instances"] >= min_instances)
+ & (df["n_features"] >= min_features)
+ )
+
+ if max_features is not None:
+ mask = mask & (df["n_features"] <= max_features)
+
+ # Filter by interpretability score
+ if min_interpretability_score is not None:
+ mask = mask & (
+ df["interpretability_score"] >= min_interpretability_score
+ )
+
+ # Exclude datasets by name pattern
+ if exclude_name_patterns:
+ for pattern in exclude_name_patterns:
+ mask = mask & (
+ ~df["name"].str.lower().str.contains(pattern.lower())
+ )
+
+ filtered_df = df[mask].copy()
+
+ return filtered_df
+
+
+def save_filtered_dataset_ids(
+ df_filtered: pd.DataFrame,
+ output_path: str = "paper/benchmarking_datasets/selected_dataset_ids.txt",
+) -> list:
+ """
+ Save filtered dataset IDs to a file.
+
+ Parameters
+ ----------
+ df_filtered : pd.DataFrame
+ Filtered DataFrame from filter_datasets()
+ output_path : str
+ Path to save the dataset IDs
+
+ Returns
+ -------
+ list
+ List of selected dataset IDs
+ """
+ dataset_ids = df_filtered["dataset_id"].tolist()
+
+ with open(output_path, "w") as f:
+ f.write("# Selected OpenML Dataset IDs\n")
+ f.write("# Filtering criteria applied - see data_loader.py\n")
+ for did in dataset_ids:
+ f.write(f"{int(did)}\n")
+
+ return dataset_ids
+
+
+def download_filtered_datasets(
+ df_filtered: pd.DataFrame,
+ output_dir: str = "paper/benchmarking_datasets/datasets",
+ file_format: str = "csv",
+) -> dict:
+ """
+ Download all datasets that passed filtering criteria.
+
+ Parameters
+ ----------
+ df_filtered : pd.DataFrame
+ Filtered DataFrame from filter_datasets()
+ output_dir : str
+ Directory to save downloaded datasets
+ file_format : str
+ Output format: 'csv' or 'parquet' (default: 'csv')
+
+ Returns
+ -------
+ dict
+ Dictionary with dataset names as keys and info dict as values
+ containing 'path', 'shape', 'target', 'features'
+ """
+ import os
+
+ os.makedirs(output_dir, exist_ok=True)
+
+ downloaded = {}
+ total = len(df_filtered)
+
+ for _, row in df_filtered.iterrows():
+ dataset_id = int(row["dataset_id"])
+ dataset_name = row["name"]
+
+ print(
+ f"[{len(downloaded) + 1}/{total}] Downloading {dataset_name} "
+ f"(ID: {dataset_id})..."
+ )
+
+ try:
+ dataset = openml.datasets.get_dataset(
+ dataset_id, download_data=True
+ )
+ X, y, categorical_indicator, attribute_names = dataset.get_data(
+ target=dataset.default_target_attribute
+ )
+
+ # Combine features and target into single DataFrame
+ df = X.copy()
+ target_name = dataset.default_target_attribute
+ if target_name:
+ df[target_name] = y
+
+ # Clean filename
+ clean_name = (
+ dataset_name.lower().replace(" ", "_").replace("-", "_")
+ )
+
+ # Save to file
+ if file_format == "parquet":
+ output_path = os.path.join(output_dir, f"{clean_name}.parquet")
+ df.to_parquet(output_path, index=False)
+ else:
+ output_path = os.path.join(output_dir, f"{clean_name}.csv")
+ df.to_csv(output_path, index=False)
+
+ downloaded[dataset_name] = {
+ "dataset_id": dataset_id,
+ "path": output_path,
+ "shape": df.shape,
+ "target": target_name,
+ "features": list(X.columns),
+ "categorical_features": (
+ [
+ name
+ for name, is_cat in zip(
+ attribute_names, categorical_indicator
+ )
+ if is_cat
+ ]
+ if categorical_indicator
+ else []
+ ),
+ }
+
+ print(
+ f" Saved: {output_path} ({df.shape[0]} rows, "
+ f"{df.shape[1]} cols)"
+ )
+
+ except Exception as e:
+ print(f" ERROR: {e}")
+ downloaded[dataset_name] = {"error": str(e)}
+
+ # Save summary
+ summary_path = os.path.join(output_dir, "download_summary.csv")
+ summary_data = []
+ for name, info in downloaded.items():
+ if "error" not in info:
+ summary_data.append(
+ {
+ "name": name,
+ "dataset_id": info["dataset_id"],
+ "path": info["path"],
+ "n_rows": info["shape"][0],
+ "n_cols": info["shape"][1],
+ "target": info["target"],
+ "n_categorical": len(info["categorical_features"]),
+ }
+ )
+ pd.DataFrame(summary_data).to_csv(summary_path, index=False)
+ print(f"\nSummary saved to: {summary_path}")
+ print(f"Successfully downloaded: {len(summary_data)}/{total} datasets")
+
+ return downloaded
+
+
+def get_dataset_details(dataset_id: int) -> dict:
+ """
+ Get detailed information about a specific dataset including
+ feature relationships.
+
+ Parameters
+ ----------
+ dataset_id : int
+ OpenML dataset ID
+
+ Returns
+ -------
+ dict
+ Detailed dataset information
+ """
+ dataset = openml.datasets.get_dataset(
+ dataset_id,
+ download_data=True,
+ download_qualities=True,
+ download_features_meta_data=True,
+ )
+
+ X, _, categorical_indicator, attribute_names = dataset.get_data(
+ target=dataset.default_target_attribute
+ )
+
+ details = {
+ "name": dataset.name,
+ "description": dataset.description,
+ "target": dataset.default_target_attribute,
+ "features": attribute_names,
+ "categorical_features": [
+ name
+ for name, is_cat in zip(attribute_names, categorical_indicator)
+ if is_cat
+ ],
+ "numeric_features": [
+ name
+ for name, is_cat in zip(attribute_names, categorical_indicator)
+ if not is_cat
+ ],
+ "shape": X.shape,
+ "missing_per_feature": X.isnull().sum().to_dict(),
+ "data_sample": X.head(),
+ }
+
+ return details
+
+
+if __name__ == "__main__":
+ # Use first dataset in suite to discover available qualities
+ suite = openml.study.get_suite(269)
+ first_dataset_id = suite.data[0]
+ all_qualities = list_all_qualities(first_dataset_id)
+
+ # Fetch and display benchmark suite metadata
+ print("\n" + "=" * 80)
+ print("OpenML AutoML Benchmark Regression Suite Analysis")
+ print("=" * 80)
+
+ metadata_df = get_benchmark_suite_metadata(269)
+
+ # Display summary table
+ print("\n" + "=" * 80)
+ print("DATASET SUMMARY TABLE - Basic Info")
+ print("=" * 80)
+
+ # Set display options for better viewing
+ pd.set_option("display.max_columns", None)
+ pd.set_option("display.width", None)
+ pd.set_option("display.max_colwidth", 50)
+
+ summary_cols = [
+ "dataset_id",
+ "name",
+ "n_instances",
+ "n_features",
+ "n_numeric_features",
+ "n_categorical_features",
+ ]
+
+ print(metadata_df[summary_cols].to_string(index=False))
+
+ # Filter datasets based on criteria
+ print("\n" + "=" * 80)
+ print("FILTERED DATASETS FOR BENCHMARKING")
+ print("=" * 80)
+
+ # Define filtering parameters
+ FILTER_PARAMS = {
+ "min_numeric_proportion": 0.5, # At least 50% numeric features
+ "max_missing_values": 0, # No missing values
+ "max_instances": 40000, # Less than 40k instances
+ "min_instances": 1000, # At least 1k instances
+ "min_features": 0,
+ "max_features": None,
+ "exclude_name_patterns": ["QSAR", "MIP-2016", "yprop", "topo", "wine"],
+ "min_interpretability_score": 0.8, # 80% interpretable features
+ }
+
+ print("\nFiltering criteria:")
+ for param, value in FILTER_PARAMS.items():
+ print(f" {param}: {value}")
+
+ filtered_df = filter_datasets(metadata_df, **FILTER_PARAMS)
+
+ print(
+ f"\nDatasets meeting criteria: {len(filtered_df)} / {len(metadata_df)}"
+ )
+ print("\n" + "-" * 80)
+
+ filter_cols = [
+ "dataset_id",
+ "name",
+ "n_instances",
+ "n_features",
+ "n_numeric_features",
+ "numeric_proportion",
+ "interpretability_score",
+ ]
+
+ print(filtered_df[filter_cols].to_string(index=False))
+
+ # Print filtered dataset IDs
+ selected_ids = filtered_df["dataset_id"].tolist()
+ print(f"IDs: {selected_ids}")
+
+ # Download all filtered datasets
+ downloaded = download_filtered_datasets(
+ filtered_df,
+ output_dir="paper/benchmarking_datasets/datasets",
+ file_format="csv",
+ )
diff --git a/paper/benchmarking_datasets/dataset_analysis.ipynb b/paper/benchmarking_datasets/dataset_analysis.ipynb
new file mode 100644
index 0000000..19e57fb
--- /dev/null
+++ b/paper/benchmarking_datasets/dataset_analysis.ipynb
@@ -0,0 +1,29933 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Benchmarking Datasets Analysis\n",
+ "\n",
+ "This notebook analyzes each dataset in the benchmarking datasets folder:\n",
+ "1. Computes predictor correlations (Pearson, Spearman, and Mutual Information)\n",
+ "2. Runs leave-one-out analysis to identify important predictors\n",
+ "3. Creates 60/40 train/test splits for donor/receiver data\n",
+ "4. Runs autoimpute using the donor/receiver splits"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from microimpute import (\n",
+ " method_comparison_results,\n",
+ " compute_predictor_correlations,\n",
+ " leave_one_out_analysis,\n",
+ " autoimpute,\n",
+ " QRF,\n",
+ " QuantReg, \n",
+ " OLS, \n",
+ " Matching,\n",
+ ")\n",
+ "from microimpute.comparisons import compare_distributions\n",
+ "\n",
+ "# Import benchmark utilities for CIA analysis and cross-dataset summary\n",
+ "from benchmark_utils import (\n",
+ " progressive_predictor_exclusion,\n",
+ " plot_cia_degradation_curves,\n",
+ " create_benchmark_summary_table,\n",
+ " create_benchmark_heatmap,\n",
+ " run_cia_analysis_for_dataset,\n",
+ ")\n",
+ "\n",
+ "pd.set_option('display.max_columns', None)\n",
+ "pd.set_option('display.width', None)\n",
+ "pd.set_option('display.max_colwidth', 50)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Available datasets:\n",
+ " name target n_rows n_cols\n",
+ "0 space_ga ln(VOTES/POP) 3107 7\n",
+ "1 wine_quality quality 6497 12\n",
+ "2 elevators Goal 16599 19\n",
+ "3 Brazilian_houses total_(BRL) 10692 13\n",
+ "4 OnlineNewsPopularity shares 39644 60\n",
+ "5 abalone Class_number_of_rings 4177 9\n",
+ "6 house_sales price 21613 22\n"
+ ]
+ }
+ ],
+ "source": [
+ "DATASETS_DIR = \"datasets\"\n",
+ "\n",
+ "summary_df = pd.read_csv(os.path.join(DATASETS_DIR, \"download_summary.csv\"))\n",
+ "print(\"Available datasets:\")\n",
+ "print(summary_df[[\"name\", \"target\", \"n_rows\", \"n_cols\"]])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def load_dataset(name):\n",
+ " \"\"\"Load a dataset by name and return the DataFrame.\"\"\"\n",
+ " # Clean name to match file naming convention\n",
+ " clean_name = name.lower().replace(\" \", \"_\").replace(\"-\", \"_\")\n",
+ " file_path = os.path.join(DATASETS_DIR, f\"{clean_name}.csv\")\n",
+ " return pd.read_csv(file_path)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "## 1. Space GA Dataset\n",
+ "\n",
+ "Georgia county voting data with geographic variables."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Shape: (3107, 7)\n",
+ "\n",
+ "Column names:\n",
+ " 1. POP\n",
+ " 2. EDUCATION\n",
+ " 3. HOUSES\n",
+ " 4. INCOME\n",
+ " 5. XCOORD\n",
+ " 6. YCOORD\n",
+ " 7. ln(VOTES/POP)\n",
+ "\n",
+ "Data types:\n",
+ "POP float64\n",
+ "EDUCATION float64\n",
+ "HOUSES float64\n",
+ "INCOME float64\n",
+ "XCOORD float64\n",
+ "YCOORD float64\n",
+ "ln(VOTES/POP) float64\n",
+ "dtype: object\n",
+ "\n",
+ "First few rows:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " POP | \n",
+ " EDUCATION | \n",
+ " HOUSES | \n",
+ " INCOME | \n",
+ " XCOORD | \n",
+ " YCOORD | \n",
+ " ln(VOTES/POP) | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 9.972920 | \n",
+ " 9.246286 | \n",
+ " 9.004054 | \n",
+ " 12.134915 | \n",
+ " -86641472.0 | \n",
+ " 32542207.0 | \n",
+ " -0.661559 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 10.903347 | \n",
+ " 10.221214 | \n",
+ " 9.965758 | \n",
+ " 13.056638 | \n",
+ " -87754736.0 | \n",
+ " 30654881.0 | \n",
+ " -0.650859 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 9.722205 | \n",
+ " 8.753529 | \n",
+ " 8.707648 | \n",
+ " 11.630628 | \n",
+ " -85388993.0 | \n",
+ " 31863073.0 | \n",
+ " -0.617114 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 9.273691 | \n",
+ " 8.183118 | \n",
+ " 8.277412 | \n",
+ " 11.243712 | \n",
+ " -87126855.0 | \n",
+ " 32996943.0 | \n",
+ " -0.639070 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 10.151519 | \n",
+ " 9.207737 | \n",
+ " 9.240676 | \n",
+ " 12.155100 | \n",
+ " -86566214.0 | \n",
+ " 33979740.0 | \n",
+ " -0.700274 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " POP EDUCATION HOUSES INCOME XCOORD YCOORD \\\n",
+ "0 9.972920 9.246286 9.004054 12.134915 -86641472.0 32542207.0 \n",
+ "1 10.903347 10.221214 9.965758 13.056638 -87754736.0 30654881.0 \n",
+ "2 9.722205 8.753529 8.707648 11.630628 -85388993.0 31863073.0 \n",
+ "3 9.273691 8.183118 8.277412 11.243712 -87126855.0 32996943.0 \n",
+ "4 10.151519 9.207737 9.240676 12.155100 -86566214.0 33979740.0 \n",
+ "\n",
+ " ln(VOTES/POP) \n",
+ "0 -0.661559 \n",
+ "1 -0.650859 \n",
+ "2 -0.617114 \n",
+ "3 -0.639070 \n",
+ "4 -0.700274 "
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_space_ga = load_dataset(\"space_ga\")\n",
+ "print(f\"\\nShape: {df_space_ga.shape}\")\n",
+ "print(f\"\\nColumn names:\")\n",
+ "for i, col in enumerate(df_space_ga.columns):\n",
+ " print(f\" {i+1}. {col}\")\n",
+ "print(f\"\\nData types:\")\n",
+ "print(df_space_ga.dtypes)\n",
+ "print(f\"\\nFirst few rows:\")\n",
+ "df_space_ga.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Target variable: ln(VOTES/POP)\n",
+ "Predictors (6): ['POP', 'EDUCATION', 'HOUSES', 'INCOME', 'XCOORD', 'YCOORD']\n"
+ ]
+ }
+ ],
+ "source": [
+ "space_ga_target = \"ln(VOTES/POP)\" # Target variable to impute\n",
+ "space_ga_predictors = [col for col in df_space_ga.columns if col != space_ga_target]\n",
+ "\n",
+ "print(f\"Target variable: {space_ga_target}\")\n",
+ "print(f\"Predictors ({len(space_ga_predictors)}): {space_ga_predictors}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Computing predictor correlations...\n",
+ "\n",
+ "--- Pearson Correlation Matrix ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " POP | \n",
+ " EDUCATION | \n",
+ " HOUSES | \n",
+ " INCOME | \n",
+ " XCOORD | \n",
+ " YCOORD | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | POP | \n",
+ " 1.000000 | \n",
+ " 0.987472 | \n",
+ " 0.994002 | \n",
+ " 0.992899 | \n",
+ " 0.267719 | \n",
+ " -0.040601 | \n",
+ "
\n",
+ " \n",
+ " | EDUCATION | \n",
+ " 0.987472 | \n",
+ " 1.000000 | \n",
+ " 0.984365 | \n",
+ " 0.993133 | \n",
+ " 0.199129 | \n",
+ " 0.047424 | \n",
+ "
\n",
+ " \n",
+ " | HOUSES | \n",
+ " 0.994002 | \n",
+ " 0.984365 | \n",
+ " 1.000000 | \n",
+ " 0.988084 | \n",
+ " 0.268707 | \n",
+ " -0.030783 | \n",
+ "
\n",
+ " \n",
+ " | INCOME | \n",
+ " 0.992899 | \n",
+ " 0.993133 | \n",
+ " 0.988084 | \n",
+ " 1.000000 | \n",
+ " 0.231344 | \n",
+ " -0.008157 | \n",
+ "
\n",
+ " \n",
+ " | XCOORD | \n",
+ " 0.267719 | \n",
+ " 0.199129 | \n",
+ " 0.268707 | \n",
+ " 0.231344 | \n",
+ " 1.000000 | \n",
+ " -0.193174 | \n",
+ "
\n",
+ " \n",
+ " | YCOORD | \n",
+ " -0.040601 | \n",
+ " 0.047424 | \n",
+ " -0.030783 | \n",
+ " -0.008157 | \n",
+ " -0.193174 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " POP EDUCATION HOUSES INCOME XCOORD YCOORD\n",
+ "POP 1.000000 0.987472 0.994002 0.992899 0.267719 -0.040601\n",
+ "EDUCATION 0.987472 1.000000 0.984365 0.993133 0.199129 0.047424\n",
+ "HOUSES 0.994002 0.984365 1.000000 0.988084 0.268707 -0.030783\n",
+ "INCOME 0.992899 0.993133 0.988084 1.000000 0.231344 -0.008157\n",
+ "XCOORD 0.267719 0.199129 0.268707 0.231344 1.000000 -0.193174\n",
+ "YCOORD -0.040601 0.047424 -0.030783 -0.008157 -0.193174 1.000000"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Spearman Correlation Matrix ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " POP | \n",
+ " EDUCATION | \n",
+ " HOUSES | \n",
+ " INCOME | \n",
+ " XCOORD | \n",
+ " YCOORD | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | POP | \n",
+ " 1.000000 | \n",
+ " 0.982208 | \n",
+ " 0.994249 | \n",
+ " 0.990724 | \n",
+ " 0.353281 | \n",
+ " -0.022140 | \n",
+ "
\n",
+ " \n",
+ " | EDUCATION | \n",
+ " 0.982208 | \n",
+ " 1.000000 | \n",
+ " 0.980917 | \n",
+ " 0.991055 | \n",
+ " 0.280971 | \n",
+ " 0.082580 | \n",
+ "
\n",
+ " \n",
+ " | HOUSES | \n",
+ " 0.994249 | \n",
+ " 0.980917 | \n",
+ " 1.000000 | \n",
+ " 0.987013 | \n",
+ " 0.350405 | \n",
+ " -0.010133 | \n",
+ "
\n",
+ " \n",
+ " | INCOME | \n",
+ " 0.990724 | \n",
+ " 0.991055 | \n",
+ " 0.987013 | \n",
+ " 1.000000 | \n",
+ " 0.316362 | \n",
+ " 0.020701 | \n",
+ "
\n",
+ " \n",
+ " | XCOORD | \n",
+ " 0.353281 | \n",
+ " 0.280971 | \n",
+ " 0.350405 | \n",
+ " 0.316362 | \n",
+ " 1.000000 | \n",
+ " -0.139110 | \n",
+ "
\n",
+ " \n",
+ " | YCOORD | \n",
+ " -0.022140 | \n",
+ " 0.082580 | \n",
+ " -0.010133 | \n",
+ " 0.020701 | \n",
+ " -0.139110 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " POP EDUCATION HOUSES INCOME XCOORD YCOORD\n",
+ "POP 1.000000 0.982208 0.994249 0.990724 0.353281 -0.022140\n",
+ "EDUCATION 0.982208 1.000000 0.980917 0.991055 0.280971 0.082580\n",
+ "HOUSES 0.994249 0.980917 1.000000 0.987013 0.350405 -0.010133\n",
+ "INCOME 0.990724 0.991055 0.987013 1.000000 0.316362 0.020701\n",
+ "XCOORD 0.353281 0.280971 0.350405 0.316362 1.000000 -0.139110\n",
+ "YCOORD -0.022140 0.082580 -0.010133 0.020701 -0.139110 1.000000"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Mutual Information Matrix ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " POP | \n",
+ " EDUCATION | \n",
+ " HOUSES | \n",
+ " INCOME | \n",
+ " XCOORD | \n",
+ " YCOORD | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | POP | \n",
+ " 1.000000 | \n",
+ " 0.173367 | \n",
+ " 0.229931 | \n",
+ " 0.187904 | \n",
+ " 0.018096 | \n",
+ " 0.004794 | \n",
+ "
\n",
+ " \n",
+ " | EDUCATION | \n",
+ " 0.173367 | \n",
+ " 1.000000 | \n",
+ " 0.167237 | \n",
+ " 0.195508 | \n",
+ " 0.016309 | \n",
+ " 0.004089 | \n",
+ "
\n",
+ " \n",
+ " | HOUSES | \n",
+ " 0.229931 | \n",
+ " 0.167237 | \n",
+ " 1.000000 | \n",
+ " 0.178265 | \n",
+ " 0.019829 | \n",
+ " 0.006102 | \n",
+ "
\n",
+ " \n",
+ " | INCOME | \n",
+ " 0.187904 | \n",
+ " 0.195508 | \n",
+ " 0.178265 | \n",
+ " 1.000000 | \n",
+ " 0.015291 | \n",
+ " 0.006218 | \n",
+ "
\n",
+ " \n",
+ " | XCOORD | \n",
+ " 0.018096 | \n",
+ " 0.016309 | \n",
+ " 0.019829 | \n",
+ " 0.015291 | \n",
+ " 1.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " | YCOORD | \n",
+ " 0.004794 | \n",
+ " 0.004089 | \n",
+ " 0.006102 | \n",
+ " 0.006218 | \n",
+ " 0.000000 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " POP EDUCATION HOUSES INCOME XCOORD YCOORD\n",
+ "POP 1.000000 0.173367 0.229931 0.187904 0.018096 0.004794\n",
+ "EDUCATION 0.173367 1.000000 0.167237 0.195508 0.016309 0.004089\n",
+ "HOUSES 0.229931 0.167237 1.000000 0.178265 0.019829 0.006102\n",
+ "INCOME 0.187904 0.195508 0.178265 1.000000 0.015291 0.006218\n",
+ "XCOORD 0.018096 0.016309 0.019829 0.015291 1.000000 0.000000\n",
+ "YCOORD 0.004794 0.004089 0.006102 0.006218 0.000000 1.000000"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Predictor-Target Mutual Information ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ln(VOTES/POP) | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | YCOORD | \n",
+ " 0.021069 | \n",
+ "
\n",
+ " \n",
+ " | XCOORD | \n",
+ " 0.017490 | \n",
+ "
\n",
+ " \n",
+ " | POP | \n",
+ " 0.012771 | \n",
+ "
\n",
+ " \n",
+ " | HOUSES | \n",
+ " 0.011503 | \n",
+ "
\n",
+ " \n",
+ " | INCOME | \n",
+ " 0.010931 | \n",
+ "
\n",
+ " \n",
+ " | EDUCATION | \n",
+ " 0.009178 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ln(VOTES/POP)\n",
+ "YCOORD 0.021069\n",
+ "XCOORD 0.017490\n",
+ "POP 0.012771\n",
+ "HOUSES 0.011503\n",
+ "INCOME 0.010931\n",
+ "EDUCATION 0.009178"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "print(\"Computing predictor correlations...\")\n",
+ "space_ga_correlations = compute_predictor_correlations(\n",
+ " data=df_space_ga,\n",
+ " predictors=space_ga_predictors,\n",
+ " imputed_variables=[space_ga_target],\n",
+ " method=\"all\"\n",
+ ")\n",
+ "\n",
+ "print(\"\\n--- Pearson Correlation Matrix ---\")\n",
+ "display(space_ga_correlations[\"pearson\"])\n",
+ "\n",
+ "print(\"\\n--- Spearman Correlation Matrix ---\")\n",
+ "display(space_ga_correlations[\"spearman\"])\n",
+ "\n",
+ "print(\"\\n--- Mutual Information Matrix ---\")\n",
+ "display(space_ga_correlations[\"mutual_info\"])\n",
+ "\n",
+ "print(\"\\n--- Predictor-Target Mutual Information ---\")\n",
+ "display(space_ga_correlations[\"predictor_target_mi\"].sort_values(\n",
+ " by=space_ga_target, ascending=False\n",
+ "))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Running leave-one-out analysis...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "261f3646fa2b4cdfa55c0aa2a4da3e7e",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Leave-one-out analysis: 0%| | 0/6 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Leave-One-Out Results ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " predictor_removed | \n",
+ " avg_quantile_loss | \n",
+ " avg_log_loss | \n",
+ " loss_increase | \n",
+ " relative_impact | \n",
+ " baseline_quantile_loss | \n",
+ " baseline_log_loss | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 5 | \n",
+ " YCOORD | \n",
+ " 0.047892 | \n",
+ " 0 | \n",
+ " 0.008613 | \n",
+ " 21.927196 | \n",
+ " 0.039279 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " XCOORD | \n",
+ " 0.045017 | \n",
+ " 0 | \n",
+ " 0.005738 | \n",
+ " 14.608486 | \n",
+ " 0.039279 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " HOUSES | \n",
+ " 0.039934 | \n",
+ " 0 | \n",
+ " 0.000655 | \n",
+ " 1.666995 | \n",
+ " 0.039279 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 0 | \n",
+ " POP | \n",
+ " 0.039710 | \n",
+ " 0 | \n",
+ " 0.000431 | \n",
+ " 1.096066 | \n",
+ " 0.039279 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " EDUCATION | \n",
+ " 0.039361 | \n",
+ " 0 | \n",
+ " 0.000082 | \n",
+ " 0.208418 | \n",
+ " 0.039279 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " INCOME | \n",
+ " 0.039025 | \n",
+ " 0 | \n",
+ " -0.000254 | \n",
+ " -0.647140 | \n",
+ " 0.039279 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " predictor_removed avg_quantile_loss avg_log_loss loss_increase \\\n",
+ "5 YCOORD 0.047892 0 0.008613 \n",
+ "4 XCOORD 0.045017 0 0.005738 \n",
+ "2 HOUSES 0.039934 0 0.000655 \n",
+ "0 POP 0.039710 0 0.000431 \n",
+ "1 EDUCATION 0.039361 0 0.000082 \n",
+ "3 INCOME 0.039025 0 -0.000254 \n",
+ "\n",
+ " relative_impact baseline_quantile_loss baseline_log_loss \n",
+ "5 21.927196 0.039279 0 \n",
+ "4 14.608486 0.039279 0 \n",
+ "2 1.666995 0.039279 0 \n",
+ "0 1.096066 0.039279 0 \n",
+ "1 0.208418 0.039279 0 \n",
+ "3 -0.647140 0.039279 0 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "print(\"Running leave-one-out analysis...\")\n",
+ "space_ga_loo = leave_one_out_analysis(\n",
+ " data=df_space_ga,\n",
+ " predictors=space_ga_predictors,\n",
+ " imputed_variables=[space_ga_target],\n",
+ " model_class=QRF,\n",
+ " train_size=0.6,\n",
+ " n_jobs=1\n",
+ ")\n",
+ "\n",
+ "print(\"\\n--- Leave-One-Out Results ---\")\n",
+ "display(space_ga_loo)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Creating 60/40 donor/receiver split...\n",
+ "Donor size: 1864\n",
+ "Receiver size: 1243\n",
+ "\n",
+ "Running autoimpute...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "9c2c432f784c44549683f59d501cfe0f",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "AutoImputation progress: 0%| | 0/5 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "4c2b64882928417ba9b4f2b167872a07",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Evaluating models: 0%| | 0/5 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
+ "[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 9.1s remaining: 13.7s\n",
+ "[Parallel(n_jobs=-1)]: Done 3 out of 5 | elapsed: 9.1s remaining: 6.1s\n",
+ "[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 9.2s finished\n",
+ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
+ "[Parallel(n_jobs=-1)]: Batch computation too fast (0.19906830787658691s.) Setting batch_size=2.\n",
+ "[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 0.2s remaining: 0.3s\n",
+ "[Parallel(n_jobs=-1)]: Done 3 out of 5 | elapsed: 0.2s remaining: 0.1s\n",
+ "[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 0.2s finished\n",
+ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
+ "[Parallel(n_jobs=-1)]: Batch computation too fast (0.05776691436767578s.) Setting batch_size=2.\n",
+ "[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 0.1s remaining: 0.1s\n",
+ "[Parallel(n_jobs=-1)]: Done 3 out of 5 | elapsed: 0.1s remaining: 0.0s\n",
+ "[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 0.1s finished\n",
+ "[Parallel(n_jobs=1)]: Done 1 tasks | elapsed: 2.9s\n",
+ "[Parallel(n_jobs=1)]: Done 4 tasks | elapsed: 8.3s\n",
+ "[Parallel(n_jobs=1)]: Done 5 out of 5 | elapsed: 9.9s finished\n",
+ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
+ "[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 2.3s remaining: 3.4s\n",
+ "[Parallel(n_jobs=-1)]: Done 3 out of 5 | elapsed: 2.3s remaining: 1.5s\n",
+ "[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 2.3s finished\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Autoimpute CV Benchmarking Results ---\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.plotly.v1+json": {
+ "config": {
+ "plotlyServerURL": "https://plot.ly"
+ },
+ "data": [
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 0.001289182090203,
+ 0.001641586846550629,
+ 0.0014254160586643666,
+ 0.0016126628972894675,
+ 0.0018542282294491822,
+ 0.001254829481032883,
+ 0.0012616422274823419,
+ 0.0008925075876101453,
+ 0.0009789985599894782,
+ 0.0020550687969806075,
+ 0.0011614534446129574,
+ 0.0008602584652660213,
+ 0.0008918468252295444,
+ 0.001238592332939211,
+ 0.0015322596313849216,
+ 0.0012259448177210923,
+ 0.0012954073110602942,
+ 0.0009536104707202338,
+ 0.0007957629959803788
+ ]
+ },
+ "hovertemplate": "Method=QRF
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "QRF",
+ "marker": {
+ "color": "#88CCEE",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "QRF",
+ "offsetgroup": "QRF",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 0.01817500633531141,
+ 0.026700310464725936,
+ 0.03332652926829131,
+ 0.03909195798332123,
+ 0.04481586833567268,
+ 0.04930413917960528,
+ 0.05246454656928429,
+ 0.052874904051341154,
+ 0.052681695396049846,
+ 0.05153127849087411,
+ 0.05240536885595963,
+ 0.04965082859710386,
+ 0.04572216335007338,
+ 0.04178626351653487,
+ 0.037213526486628165,
+ 0.032431151042838766,
+ 0.02616058441739506,
+ 0.020133327326486324,
+ 0.013122355392717968
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 0.0010833911164227247,
+ 0.0011706862196016997,
+ 0.0009853048017531005,
+ 0.000892210416357871,
+ 0.0009172973122542029,
+ 0.001164622135044908,
+ 0.0014272021984131826,
+ 0.001686676762208881,
+ 0.001831444691663165,
+ 0.0019462562032744573,
+ 0.00194753722126197,
+ 0.0019407490556580332,
+ 0.0018799036550497994,
+ 0.0017377065265933025,
+ 0.001591968134523271,
+ 0.0013560685519335932,
+ 0.0010980755543928862,
+ 0.0008820058771501513,
+ 0.0007627430667108099
+ ]
+ },
+ "hovertemplate": "Method=OLS
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "OLS",
+ "marker": {
+ "color": "#CC6677",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "OLS",
+ "offsetgroup": "OLS",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 0.013601495172351546,
+ 0.022039520319063255,
+ 0.028582415903448954,
+ 0.033730970460644716,
+ 0.03799893409026435,
+ 0.04147346522482332,
+ 0.04428975359762964,
+ 0.04629995838053462,
+ 0.047597906635362466,
+ 0.04809691002792157,
+ 0.047873756746492246,
+ 0.046863588384280196,
+ 0.04505972475506838,
+ 0.042536489732329034,
+ 0.03925222528048754,
+ 0.03509874687230753,
+ 0.02978220718670343,
+ 0.022938535366078407,
+ 0.01405148700683857
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 0.011188471044211747,
+ 0.011170466264537793,
+ 0.007572416709230087,
+ 0.008046557525503314,
+ 0.006599233742856614,
+ 0.005785321345071802,
+ 0.004151473977884341,
+ 0.0034082976292562793,
+ 0.00280840105564643,
+ 0.002763961428761023,
+ 0.0030371975282827667,
+ 0.003647703367441269,
+ 0.0039005773788334955,
+ 0.004922978653361434,
+ 0.005063572136316329,
+ 0.005799875185466762,
+ 0.006854700224737245,
+ 0.007405866165028072,
+ 0.008082787419597216
+ ]
+ },
+ "hovertemplate": "Method=QuantReg
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "QuantReg",
+ "marker": {
+ "color": "#DDCC77",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "QuantReg",
+ "offsetgroup": "QuantReg",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 0.08281870777807214,
+ 0.08151629060086235,
+ 0.08802879746544752,
+ 0.08784592140541951,
+ 0.09231444655153817,
+ 0.09395360366830244,
+ 0.09430540688928825,
+ 0.09437480622748887,
+ 0.09371971036321496,
+ 0.09283779711962045,
+ 0.09115149545375179,
+ 0.08931422613245024,
+ 0.0865236553694542,
+ 0.0837545184989678,
+ 0.0780786632773851,
+ 0.07512390387644333,
+ 0.06999364643389736,
+ 0.06438587458499198,
+ 0.05699896802855233
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 0.004990500230487147,
+ 0.004500933987506405,
+ 0.004039452394904967,
+ 0.0036168217018443067,
+ 0.0032482422133479484,
+ 0.002954015980260504,
+ 0.0027580424268144484,
+ 0.0026819471146907023,
+ 0.0027357516892624442,
+ 0.0029122652636774096,
+ 0.0031911902651137842,
+ 0.0035484583035040856,
+ 0.003962937299109404,
+ 0.004418556599507702,
+ 0.004903862507335305,
+ 0.005410873057100847,
+ 0.005934027441012225,
+ 0.006469410389110239,
+ 0.007014222307297095
+ ]
+ },
+ "hovertemplate": "Method=Matching
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "Matching",
+ "marker": {
+ "color": "#117733",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "Matching",
+ "offsetgroup": "Matching",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 0.0662964790001646,
+ 0.06590329223614041,
+ 0.06551010547211625,
+ 0.06511691870809205,
+ 0.06472373194406786,
+ 0.06433054518004368,
+ 0.06393735841601951,
+ 0.06354417165199532,
+ 0.06315098488797113,
+ 0.06275779812394694,
+ 0.06236461135992276,
+ 0.06197142459589857,
+ 0.0615782378318744,
+ 0.0611850510678502,
+ 0.060791864303826014,
+ 0.060398677539801826,
+ 0.060005490775777645,
+ 0.05961230401175346,
+ 0.05921911724772928
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 0.0033196477456942774,
+ 0.004851665534085506,
+ 0.005580743734245581,
+ 0.00595074087756197,
+ 0.00507136013529618,
+ 0.004385024588997225,
+ 0.005090702063578518,
+ 0.00490024195318755,
+ 0.004839620212289013,
+ 0.004905549507001035,
+ 0.005263668304284575,
+ 0.005931056011551634,
+ 0.006897960128047957,
+ 0.008196001934724374,
+ 0.009316496895623303,
+ 0.00969934721819898,
+ 0.009198206736044214,
+ 0.00772767756159229,
+ 0.005294394589765286
+ ]
+ },
+ "hovertemplate": "Method=MDN
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "MDN",
+ "marker": {
+ "color": "#332288",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "MDN",
+ "offsetgroup": "MDN",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 0.022783922599610946,
+ 0.03559129951075892,
+ 0.04464404245361313,
+ 0.05051369261676569,
+ 0.05519761013291058,
+ 0.05817472426610678,
+ 0.059641272680632317,
+ 0.061052271278365086,
+ 0.061756079716894605,
+ 0.061930196518500166,
+ 0.06169980566566542,
+ 0.06096723283012295,
+ 0.0597625597927293,
+ 0.05776486409427974,
+ 0.05472986967183059,
+ 0.05020685073900701,
+ 0.04374091826469292,
+ 0.03486241920427831,
+ 0.02278261198030372
+ ],
+ "yaxis": "y"
+ }
+ ],
+ "layout": {
+ "barmode": "group",
+ "height": 600,
+ "legend": {
+ "title": {
+ "text": "Method"
+ },
+ "tracegroupgap": 0
+ },
+ "paper_bgcolor": "#FAFAFA",
+ "plot_bgcolor": "#FAFAFA",
+ "shapes": [
+ {
+ "line": {
+ "color": "#88CCEE",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "QRF Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 0.03892588447685344,
+ "y1": 0.03892588447685344
+ },
+ {
+ "line": {
+ "color": "#CC6677",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "OLS Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 0.03616674163908578,
+ "y1": 0.03616674163908578
+ },
+ {
+ "line": {
+ "color": "#DDCC77",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "QuantReg Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 0.08405475998553413,
+ "y1": 0.08405475998553413
+ },
+ {
+ "line": {
+ "color": "#117733",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "Matching Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 0.06275779812394694,
+ "y1": 0.06275779812394694
+ },
+ {
+ "line": {
+ "color": "#332288",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "MDN Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 0.05041064442195096,
+ "y1": 0.05041064442195096
+ }
+ ],
+ "template": {
+ "data": {
+ "bar": [
+ {
+ "error_x": {
+ "color": "#2a3f5f"
+ },
+ "error_y": {
+ "color": "#2a3f5f"
+ },
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "bar"
+ }
+ ],
+ "barpolar": [
+ {
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "barpolar"
+ }
+ ],
+ "carpet": [
+ {
+ "aaxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "baxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "type": "carpet"
+ }
+ ],
+ "choropleth": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "choropleth"
+ }
+ ],
+ "contour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "contour"
+ }
+ ],
+ "contourcarpet": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "contourcarpet"
+ }
+ ],
+ "heatmap": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmap"
+ }
+ ],
+ "heatmapgl": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmapgl"
+ }
+ ],
+ "histogram": [
+ {
+ "marker": {
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "histogram"
+ }
+ ],
+ "histogram2d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2d"
+ }
+ ],
+ "histogram2dcontour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2dcontour"
+ }
+ ],
+ "mesh3d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "mesh3d"
+ }
+ ],
+ "parcoords": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "parcoords"
+ }
+ ],
+ "pie": [
+ {
+ "automargin": true,
+ "type": "pie"
+ }
+ ],
+ "scatter": [
+ {
+ "fillpattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ },
+ "type": "scatter"
+ }
+ ],
+ "scatter3d": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter3d"
+ }
+ ],
+ "scattercarpet": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattercarpet"
+ }
+ ],
+ "scattergeo": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergeo"
+ }
+ ],
+ "scattergl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergl"
+ }
+ ],
+ "scattermapbox": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermapbox"
+ }
+ ],
+ "scatterpolar": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolar"
+ }
+ ],
+ "scatterpolargl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolargl"
+ }
+ ],
+ "scatterternary": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterternary"
+ }
+ ],
+ "surface": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "surface"
+ }
+ ],
+ "table": [
+ {
+ "cells": {
+ "fill": {
+ "color": "#EBF0F8"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "header": {
+ "fill": {
+ "color": "#C8D4E3"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "type": "table"
+ }
+ ]
+ },
+ "layout": {
+ "annotationdefaults": {
+ "arrowcolor": "#2a3f5f",
+ "arrowhead": 0,
+ "arrowwidth": 1
+ },
+ "autotypenumbers": "strict",
+ "coloraxis": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "colorscale": {
+ "diverging": [
+ [
+ 0,
+ "#8e0152"
+ ],
+ [
+ 0.1,
+ "#c51b7d"
+ ],
+ [
+ 0.2,
+ "#de77ae"
+ ],
+ [
+ 0.3,
+ "#f1b6da"
+ ],
+ [
+ 0.4,
+ "#fde0ef"
+ ],
+ [
+ 0.5,
+ "#f7f7f7"
+ ],
+ [
+ 0.6,
+ "#e6f5d0"
+ ],
+ [
+ 0.7,
+ "#b8e186"
+ ],
+ [
+ 0.8,
+ "#7fbc41"
+ ],
+ [
+ 0.9,
+ "#4d9221"
+ ],
+ [
+ 1,
+ "#276419"
+ ]
+ ],
+ "sequential": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "sequentialminus": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ]
+ },
+ "colorway": [
+ "#636efa",
+ "#EF553B",
+ "#00cc96",
+ "#ab63fa",
+ "#FFA15A",
+ "#19d3f3",
+ "#FF6692",
+ "#B6E880",
+ "#FF97FF",
+ "#FECB52"
+ ],
+ "font": {
+ "color": "#2a3f5f"
+ },
+ "geo": {
+ "bgcolor": "white",
+ "lakecolor": "white",
+ "landcolor": "#E5ECF6",
+ "showlakes": true,
+ "showland": true,
+ "subunitcolor": "white"
+ },
+ "hoverlabel": {
+ "align": "left"
+ },
+ "hovermode": "closest",
+ "mapbox": {
+ "style": "light"
+ },
+ "paper_bgcolor": "white",
+ "plot_bgcolor": "#E5ECF6",
+ "polar": {
+ "angularaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "radialaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "scene": {
+ "xaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "yaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "zaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ }
+ },
+ "shapedefaults": {
+ "line": {
+ "color": "#2a3f5f"
+ }
+ },
+ "ternary": {
+ "aaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "baxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "caxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "title": {
+ "x": 0.05
+ },
+ "xaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ },
+ "yaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ }
+ }
+ },
+ "title": {
+ "font": {
+ "size": 14
+ },
+ "text": "Space GA Dataset Benchmarking Results"
+ },
+ "width": 750,
+ "xaxis": {
+ "anchor": "y",
+ "domain": [
+ 0,
+ 1
+ ],
+ "gridcolor": "#E5E5E5",
+ "gridwidth": 1,
+ "linecolor": "#CCCCCC",
+ "showgrid": false,
+ "showline": true,
+ "title": {
+ "font": {
+ "size": 12
+ },
+ "text": "Quantiles"
+ },
+ "zeroline": false
+ },
+ "yaxis": {
+ "anchor": "x",
+ "domain": [
+ 0,
+ 1
+ ],
+ "gridcolor": "#E5E5E5",
+ "gridwidth": 1,
+ "linecolor": "#CCCCCC",
+ "showgrid": true,
+ "showline": true,
+ "title": {
+ "font": {
+ "size": 12
+ },
+ "text": "Quantile loss"
+ },
+ "zeroline": false
+ }
+ }
+ }
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "print(\"Creating 60/40 donor/receiver split...\")\n",
+ "space_ga_donor, space_ga_receiver = train_test_split(\n",
+ " df_space_ga, train_size=0.6, random_state=42\n",
+ ")\n",
+ "\n",
+ "# Remove target from receiver (simulating missing data)\n",
+ "space_ga_receiver_no_target = space_ga_receiver.drop(columns=[space_ga_target])\n",
+ "\n",
+ "print(f\"Donor size: {len(space_ga_donor)}\")\n",
+ "print(f\"Receiver size: {len(space_ga_receiver)}\")\n",
+ "\n",
+ "print(\"\\nRunning autoimpute...\")\n",
+ "space_ga_result = autoimpute(\n",
+ " donor_data=space_ga_donor,\n",
+ " receiver_data=space_ga_receiver_no_target.copy(),\n",
+ " predictors=space_ga_predictors,\n",
+ " imputed_variables=[space_ga_target],\n",
+ " impute_all=True,\n",
+ " log_level=\"INFO\"\n",
+ ")\n",
+ "\n",
+ "print(\"\\n--- Autoimpute CV Benchmarking Results ---\")\n",
+ "comparison_viz = method_comparison_results(\n",
+ " data=space_ga_result.cv_results,\n",
+ " metric=\"quantile_loss\",\n",
+ " data_format=\"wide\",\n",
+ ")\n",
+ "fig = comparison_viz.plot(\n",
+ " title=\"Space GA Dataset Benchmarking Results\",\n",
+ " show_mean=True,\n",
+ ")\n",
+ "fig.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Distribution Comparison: Imputed vs Ground Truth (Space GA) ---\n",
+ "OLS: Wasserstein Distance = 0.0272\n",
+ "QRF: Wasserstein Distance = 0.0101\n",
+ "QuantReg: Wasserstein Distance = 0.0981\n",
+ "Matching: Wasserstein Distance = 0.0111\n",
+ "MDN: Wasserstein Distance = 0.0504\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"\\n--- Distribution Comparison: Imputed vs Ground Truth (Space GA) ---\")\n",
+ "# Store Wasserstein distances for all methods\n",
+ "space_ga_wasserstein = {}\n",
+ "\n",
+ "distribution_comparison_space_ga = compare_distributions(\n",
+ " donor_data=space_ga_receiver, # Ground truth test split\n",
+ " receiver_data=space_ga_result.receiver_data, # Imputed values\n",
+ " imputed_variables=[space_ga_target],\n",
+ ")\n",
+ "best_method_name = space_ga_result.fitted_models[\"best_method\"].__class__.__name__ \n",
+ "best_method_name = best_method_name.replace(\"Results\", \"\")\n",
+ "space_ga_wasserstein[best_method_name] = distribution_comparison_space_ga[\n",
+ " distribution_comparison_space_ga['Metric'] == 'wasserstein_distance'\n",
+ "]['Distance'].values[0]\n",
+ "print(f\"{best_method_name}: Wasserstein Distance = {space_ga_wasserstein.get(best_method_name, 'N/A'):.4f}\")\n",
+ " \n",
+ "for method_name, imputations in space_ga_result.imputations.items():\n",
+ " # Skip the 'best_method' (it's a duplicate)\n",
+ " if method_name == 'best_method': \n",
+ " continue\n",
+ " \n",
+ " # Create a copy of receiver data with this method's imputations \n",
+ " receiver_with_imputations = space_ga_receiver_no_target.copy() \n",
+ " \n",
+ " # Handle both dict (quantile->DataFrame) and DataFrame formats \n",
+ " if isinstance(imputations, dict): \n",
+ " # Get median quantile (0.5) imputations \n",
+ " imp_df = imputations.get(0.5, list(imputations.values())[0]) \n",
+ " else: \n",
+ " imp_df = imputations\n",
+ " \n",
+ " # Add imputed values \n",
+ " for var in [space_ga_target]:\n",
+ " if var in imp_df.columns: \n",
+ " receiver_with_imputations[var] = imp_df[var].values \n",
+ " \n",
+ " # Calculate distribution comparison \n",
+ " dist_comparison = compare_distributions( \n",
+ " donor_data=space_ga_receiver, # Ground truth \n",
+ " receiver_data=receiver_with_imputations,\n",
+ " imputed_variables=[space_ga_target], \n",
+ " )\n",
+ "\n",
+ " # Extract Wasserstein distance \n",
+ " wd = dist_comparison[dist_comparison['Metric'] == 'wasserstein_distance']['Distance'].values \n",
+ " space_ga_wasserstein[method_name] = wd[0]\n",
+ " \n",
+ " print(f\"{method_name}: Wasserstein Distance = {space_ga_wasserstein.get(method_name, 'N/A'):.4f}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### CIA Sensitivity Analysis\n",
+ "\n",
+ "Measuring sensitivity to the Conditional Independence Assumption by progressively removing predictors."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Running CIA sensitivity analysis...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "3b2c921f8eab4a1e85a1930e593a3c22",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/6 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " QRF sensitivity score: 1.443\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "63a5034ff25f4c0ca0344a47be61188e",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/6 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " OLS sensitivity score: 1.192\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "f1c1894d9e3a4ee6b7fd34ea13d220a5",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/6 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " QuantReg sensitivity score: 0.688\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "d0111125e4eb4aa6a202b3631b564069",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/6 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " Matching sensitivity score: 1.267\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "89f91f54e4bc46b5aed9c42622831bae",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/6 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " MDN sensitivity score: 1.312\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.plotly.v1+json": {
+ "config": {
+ "plotlyServerURL": "https://plot.ly"
+ },
+ "data": [
+ {
+ "line": {
+ "color": "#88CCEE",
+ "width": 2
+ },
+ "marker": {
+ "color": "#88CCEE",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "QRF (baseline=0.0385)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5
+ ],
+ "y": [
+ 0.038467598301926476,
+ 0.046349112806030976,
+ 0.04892122406975649,
+ 0.057574528391621846,
+ 0.06414660703151685,
+ 0.08246282216150921
+ ]
+ },
+ {
+ "line": {
+ "color": "#CC6677",
+ "width": 2
+ },
+ "marker": {
+ "color": "#CC6677",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "OLS (baseline=0.0378)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5
+ ],
+ "y": [
+ 0.03776066596019584,
+ 0.04081358386541088,
+ 0.04095723660354414,
+ 0.04724657150716196,
+ 0.05019125026945488,
+ 0.053877746882966635
+ ]
+ },
+ {
+ "line": {
+ "color": "#DDCC77",
+ "width": 2
+ },
+ "marker": {
+ "color": "#DDCC77",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "QuantReg (baseline=0.0813)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5
+ ],
+ "y": [
+ 0.0812705799474595,
+ 0.0756943013337473,
+ 0.039832312046727415,
+ 0.0464272933462065,
+ 0.04990904439994729,
+ 0.05389388531046596
+ ]
+ },
+ {
+ "line": {
+ "color": "#117733",
+ "width": 2
+ },
+ "marker": {
+ "color": "#117733",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "Matching (baseline=0.0668)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5
+ ],
+ "y": [
+ 0.06683934854938364,
+ 0.09370864563310403,
+ 0.06999962605586944,
+ 0.08382087593050129,
+ 0.08962890291527795,
+ 0.10580349377942175
+ ]
+ },
+ {
+ "line": {
+ "color": "#332288",
+ "width": 2
+ },
+ "marker": {
+ "color": "#332288",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "MDN (baseline=0.0358)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5
+ ],
+ "y": [
+ 0.035834278797175254,
+ 0.041149260361601624,
+ 0.04103186805064114,
+ 0.05255487742165259,
+ 0.05091405950566863,
+ 0.06316861701453608
+ ]
+ }
+ ],
+ "layout": {
+ "height": 600,
+ "hovermode": "x unified",
+ "legend": {
+ "x": 0.01,
+ "xanchor": "left",
+ "y": 0.99,
+ "yanchor": "top"
+ },
+ "paper_bgcolor": "#FAFAFA",
+ "plot_bgcolor": "#FAFAFA",
+ "template": {
+ "data": {
+ "bar": [
+ {
+ "error_x": {
+ "color": "#2a3f5f"
+ },
+ "error_y": {
+ "color": "#2a3f5f"
+ },
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "bar"
+ }
+ ],
+ "barpolar": [
+ {
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "barpolar"
+ }
+ ],
+ "carpet": [
+ {
+ "aaxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "baxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "type": "carpet"
+ }
+ ],
+ "choropleth": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "choropleth"
+ }
+ ],
+ "contour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "contour"
+ }
+ ],
+ "contourcarpet": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "contourcarpet"
+ }
+ ],
+ "heatmap": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmap"
+ }
+ ],
+ "heatmapgl": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmapgl"
+ }
+ ],
+ "histogram": [
+ {
+ "marker": {
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "histogram"
+ }
+ ],
+ "histogram2d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2d"
+ }
+ ],
+ "histogram2dcontour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2dcontour"
+ }
+ ],
+ "mesh3d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "mesh3d"
+ }
+ ],
+ "parcoords": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "parcoords"
+ }
+ ],
+ "pie": [
+ {
+ "automargin": true,
+ "type": "pie"
+ }
+ ],
+ "scatter": [
+ {
+ "fillpattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ },
+ "type": "scatter"
+ }
+ ],
+ "scatter3d": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter3d"
+ }
+ ],
+ "scattercarpet": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattercarpet"
+ }
+ ],
+ "scattergeo": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergeo"
+ }
+ ],
+ "scattergl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergl"
+ }
+ ],
+ "scattermapbox": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermapbox"
+ }
+ ],
+ "scatterpolar": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolar"
+ }
+ ],
+ "scatterpolargl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolargl"
+ }
+ ],
+ "scatterternary": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterternary"
+ }
+ ],
+ "surface": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "surface"
+ }
+ ],
+ "table": [
+ {
+ "cells": {
+ "fill": {
+ "color": "#EBF0F8"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "header": {
+ "fill": {
+ "color": "#C8D4E3"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "type": "table"
+ }
+ ]
+ },
+ "layout": {
+ "annotationdefaults": {
+ "arrowcolor": "#2a3f5f",
+ "arrowhead": 0,
+ "arrowwidth": 1
+ },
+ "autotypenumbers": "strict",
+ "coloraxis": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "colorscale": {
+ "diverging": [
+ [
+ 0,
+ "#8e0152"
+ ],
+ [
+ 0.1,
+ "#c51b7d"
+ ],
+ [
+ 0.2,
+ "#de77ae"
+ ],
+ [
+ 0.3,
+ "#f1b6da"
+ ],
+ [
+ 0.4,
+ "#fde0ef"
+ ],
+ [
+ 0.5,
+ "#f7f7f7"
+ ],
+ [
+ 0.6,
+ "#e6f5d0"
+ ],
+ [
+ 0.7,
+ "#b8e186"
+ ],
+ [
+ 0.8,
+ "#7fbc41"
+ ],
+ [
+ 0.9,
+ "#4d9221"
+ ],
+ [
+ 1,
+ "#276419"
+ ]
+ ],
+ "sequential": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "sequentialminus": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ]
+ },
+ "colorway": [
+ "#636efa",
+ "#EF553B",
+ "#00cc96",
+ "#ab63fa",
+ "#FFA15A",
+ "#19d3f3",
+ "#FF6692",
+ "#B6E880",
+ "#FF97FF",
+ "#FECB52"
+ ],
+ "font": {
+ "color": "#2a3f5f"
+ },
+ "geo": {
+ "bgcolor": "white",
+ "lakecolor": "white",
+ "landcolor": "#E5ECF6",
+ "showlakes": true,
+ "showland": true,
+ "subunitcolor": "white"
+ },
+ "hoverlabel": {
+ "align": "left"
+ },
+ "hovermode": "closest",
+ "mapbox": {
+ "style": "light"
+ },
+ "paper_bgcolor": "white",
+ "plot_bgcolor": "#E5ECF6",
+ "polar": {
+ "angularaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "radialaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "scene": {
+ "xaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "yaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "zaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ }
+ },
+ "shapedefaults": {
+ "line": {
+ "color": "#2a3f5f"
+ }
+ },
+ "ternary": {
+ "aaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "baxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "caxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "title": {
+ "x": 0.05
+ },
+ "xaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ },
+ "yaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ }
+ }
+ },
+ "title": {
+ "text": "CIA Sensitivity: Space Ga"
+ },
+ "width": 750,
+ "xaxis": {
+ "gridcolor": "#E5E5E5",
+ "linecolor": "#CCCCCC",
+ "showgrid": false,
+ "showline": true,
+ "title": {
+ "text": "Number of Predictors Removed"
+ }
+ },
+ "yaxis": {
+ "gridcolor": "#E5E5E5",
+ "linecolor": "#CCCCCC",
+ "showgrid": true,
+ "showline": true,
+ "title": {
+ "text": "Quantile Loss"
+ }
+ }
+ }
+ }
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Sensitivity Scores (higher = more sensitive to predictor removal):\n",
+ " QRF: 1.443\n",
+ " OLS: 1.192\n",
+ " QuantReg: 0.688\n",
+ " Matching: 1.267\n",
+ " MDN: 1.312\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"Running CIA sensitivity analysis...\")\n",
+ "space_ga_cia_results = run_cia_analysis_for_dataset(\n",
+ " data=df_space_ga,\n",
+ " predictors=space_ga_predictors,\n",
+ " imputed_variables=[space_ga_target],\n",
+ " ordering=\"mutual_info\",\n",
+ ")\n",
+ "\n",
+ "# Plot degradation curves\n",
+ "space_ga_cia_fig = plot_cia_degradation_curves(\n",
+ " space_ga_cia_results,\n",
+ " title=\"CIA Sensitivity: Space Ga\"\n",
+ ")\n",
+ "space_ga_cia_fig.show()\n",
+ "\n",
+ "# Display sensitivity scores\n",
+ "print(\"\\nSensitivity Scores (higher = more sensitive to predictor removal):\")\n",
+ "for method, results in space_ga_cia_results.items():\n",
+ " print(f\" {method}: {results['sensitivity_score']:.3f}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "## 2. Elevators Dataset\n",
+ "\n",
+ "Aircraft control system data from an F-16 simulation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Shape: (16599, 19)\n",
+ "\n",
+ "Column names:\n",
+ " 1. climbRate\n",
+ " 2. Sgz\n",
+ " 3. p\n",
+ " 4. q\n",
+ " 5. curRoll\n",
+ " 6. absRoll\n",
+ " 7. diffClb\n",
+ " 8. diffRollRate\n",
+ " 9. diffDiffClb\n",
+ " 10. SaTime1\n",
+ " 11. SaTime2\n",
+ " 12. SaTime3\n",
+ " 13. SaTime4\n",
+ " 14. diffSaTime1\n",
+ " 15. diffSaTime2\n",
+ " 16. diffSaTime3\n",
+ " 17. diffSaTime4\n",
+ " 18. Sa\n",
+ " 19. Goal\n",
+ "\n",
+ "Data types:\n",
+ "climbRate float64\n",
+ "Sgz float64\n",
+ "p float64\n",
+ "q float64\n",
+ "curRoll float64\n",
+ "absRoll float64\n",
+ "diffClb float64\n",
+ "diffRollRate float64\n",
+ "diffDiffClb float64\n",
+ "SaTime1 float64\n",
+ "SaTime2 float64\n",
+ "SaTime3 float64\n",
+ "SaTime4 float64\n",
+ "diffSaTime1 float64\n",
+ "diffSaTime2 float64\n",
+ "diffSaTime3 float64\n",
+ "diffSaTime4 float64\n",
+ "Sa float64\n",
+ "Goal float64\n",
+ "dtype: object\n",
+ "\n",
+ "First few rows:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " climbRate | \n",
+ " Sgz | \n",
+ " p | \n",
+ " q | \n",
+ " curRoll | \n",
+ " absRoll | \n",
+ " diffClb | \n",
+ " diffRollRate | \n",
+ " diffDiffClb | \n",
+ " SaTime1 | \n",
+ " SaTime2 | \n",
+ " SaTime3 | \n",
+ " SaTime4 | \n",
+ " diffSaTime1 | \n",
+ " diffSaTime2 | \n",
+ " diffSaTime3 | \n",
+ " diffSaTime4 | \n",
+ " Sa | \n",
+ " Goal | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 118.0 | \n",
+ " -55.0 | \n",
+ " -0.28 | \n",
+ " -0.08 | \n",
+ " -0.2 | \n",
+ " -11.0 | \n",
+ " 11.0 | \n",
+ " 0.005 | \n",
+ " -0.2 | \n",
+ " -0.0010 | \n",
+ " -0.0010 | \n",
+ " -0.0010 | \n",
+ " -0.0010 | \n",
+ " 0.0000 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " -0.0010 | \n",
+ " 0.031 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 390.0 | \n",
+ " -45.0 | \n",
+ " -0.06 | \n",
+ " -0.07 | \n",
+ " -0.6 | \n",
+ " -12.0 | \n",
+ " 11.0 | \n",
+ " 0.010 | \n",
+ " -0.2 | \n",
+ " -0.0008 | \n",
+ " -0.0008 | \n",
+ " -0.0008 | \n",
+ " -0.0008 | \n",
+ " 0.0000 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " -0.0008 | \n",
+ " 0.034 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 68.0 | \n",
+ " 6.0 | \n",
+ " 0.11 | \n",
+ " 0.15 | \n",
+ " 0.6 | \n",
+ " -10.0 | \n",
+ " -9.0 | \n",
+ " -0.003 | \n",
+ " -0.2 | \n",
+ " -0.0011 | \n",
+ " -0.0010 | \n",
+ " -0.0010 | \n",
+ " -0.0010 | \n",
+ " -0.0002 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " -0.0010 | \n",
+ " 0.033 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " -358.0 | \n",
+ " -12.0 | \n",
+ " -0.20 | \n",
+ " 0.13 | \n",
+ " -0.3 | \n",
+ " -11.0 | \n",
+ " -7.0 | \n",
+ " 0.001 | \n",
+ " -0.1 | \n",
+ " -0.0010 | \n",
+ " -0.0010 | \n",
+ " -0.0010 | \n",
+ " -0.0010 | \n",
+ " 0.0000 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " -0.0010 | \n",
+ " 0.032 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " -411.0 | \n",
+ " -19.0 | \n",
+ " -0.18 | \n",
+ " 0.02 | \n",
+ " -0.5 | \n",
+ " -11.0 | \n",
+ " -3.0 | \n",
+ " 0.002 | \n",
+ " 1.2 | \n",
+ " -0.0010 | \n",
+ " -0.0010 | \n",
+ " -0.0010 | \n",
+ " -0.0010 | \n",
+ " 0.0000 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " -0.0010 | \n",
+ " 0.030 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " climbRate Sgz p q curRoll absRoll diffClb diffRollRate \\\n",
+ "0 118.0 -55.0 -0.28 -0.08 -0.2 -11.0 11.0 0.005 \n",
+ "1 390.0 -45.0 -0.06 -0.07 -0.6 -12.0 11.0 0.010 \n",
+ "2 68.0 6.0 0.11 0.15 0.6 -10.0 -9.0 -0.003 \n",
+ "3 -358.0 -12.0 -0.20 0.13 -0.3 -11.0 -7.0 0.001 \n",
+ "4 -411.0 -19.0 -0.18 0.02 -0.5 -11.0 -3.0 0.002 \n",
+ "\n",
+ " diffDiffClb SaTime1 SaTime2 SaTime3 SaTime4 diffSaTime1 diffSaTime2 \\\n",
+ "0 -0.2 -0.0010 -0.0010 -0.0010 -0.0010 0.0000 0.0 \n",
+ "1 -0.2 -0.0008 -0.0008 -0.0008 -0.0008 0.0000 0.0 \n",
+ "2 -0.2 -0.0011 -0.0010 -0.0010 -0.0010 -0.0002 0.0 \n",
+ "3 -0.1 -0.0010 -0.0010 -0.0010 -0.0010 0.0000 0.0 \n",
+ "4 1.2 -0.0010 -0.0010 -0.0010 -0.0010 0.0000 0.0 \n",
+ "\n",
+ " diffSaTime3 diffSaTime4 Sa Goal \n",
+ "0 0.0 0.0 -0.0010 0.031 \n",
+ "1 0.0 0.0 -0.0008 0.034 \n",
+ "2 0.0 0.0 -0.0010 0.033 \n",
+ "3 0.0 0.0 -0.0010 0.032 \n",
+ "4 0.0 0.0 -0.0010 0.030 "
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_elevators = load_dataset(\"elevators\")\n",
+ "print(f\"\\nShape: {df_elevators.shape}\")\n",
+ "print(f\"\\nColumn names:\")\n",
+ "for i, col in enumerate(df_elevators.columns):\n",
+ " print(f\" {i+1}. {col}\")\n",
+ "print(f\"\\nData types:\")\n",
+ "print(df_elevators.dtypes)\n",
+ "print(f\"\\nFirst few rows:\")\n",
+ "df_elevators.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Target variable: Goal\n",
+ "Predictors (8): ['climbRate', 'Sgz', 'curRoll', 'absRoll', 'SaTime1', 'SaTime2', 'SaTime3', 'SaTime4']\n"
+ ]
+ }
+ ],
+ "source": [
+ "elevators_target = \"Goal\" # Target variable to impute\n",
+ "elevators_predictors = [\"climbRate\", \"Sgz\", \"curRoll\", \"absRoll\", \"SaTime1\", \"SaTime2\", \"SaTime3\", \"SaTime4\"]\n",
+ "\n",
+ "print(f\"Target variable: {elevators_target}\")\n",
+ "print(f\"Predictors ({len(elevators_predictors)}): {elevators_predictors}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Computing predictor correlations...\n",
+ "\n",
+ "--- Pearson Correlation Matrix ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " climbRate | \n",
+ " Sgz | \n",
+ " curRoll | \n",
+ " absRoll | \n",
+ " SaTime1 | \n",
+ " SaTime2 | \n",
+ " SaTime3 | \n",
+ " SaTime4 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | climbRate | \n",
+ " 1.000000 | \n",
+ " -0.013900 | \n",
+ " -0.146812 | \n",
+ " -0.023482 | \n",
+ " 0.094790 | \n",
+ " 0.104038 | \n",
+ " 0.104072 | \n",
+ " 0.107007 | \n",
+ "
\n",
+ " \n",
+ " | Sgz | \n",
+ " -0.013900 | \n",
+ " 1.000000 | \n",
+ " 0.017043 | \n",
+ " 0.077119 | \n",
+ " 0.062284 | \n",
+ " 0.070309 | \n",
+ " 0.070308 | \n",
+ " 0.076198 | \n",
+ "
\n",
+ " \n",
+ " | curRoll | \n",
+ " -0.146812 | \n",
+ " 0.017043 | \n",
+ " 1.000000 | \n",
+ " 0.194465 | \n",
+ " -0.230882 | \n",
+ " -0.227539 | \n",
+ " -0.227519 | \n",
+ " -0.224023 | \n",
+ "
\n",
+ " \n",
+ " | absRoll | \n",
+ " -0.023482 | \n",
+ " 0.077119 | \n",
+ " 0.194465 | \n",
+ " 1.000000 | \n",
+ " 0.683132 | \n",
+ " 0.678935 | \n",
+ " 0.678954 | \n",
+ " 0.678385 | \n",
+ "
\n",
+ " \n",
+ " | SaTime1 | \n",
+ " 0.094790 | \n",
+ " 0.062284 | \n",
+ " -0.230882 | \n",
+ " 0.683132 | \n",
+ " 1.000000 | \n",
+ " 0.988001 | \n",
+ " 0.987995 | \n",
+ " 0.976332 | \n",
+ "
\n",
+ " \n",
+ " | SaTime2 | \n",
+ " 0.104038 | \n",
+ " 0.070309 | \n",
+ " -0.227539 | \n",
+ " 0.678935 | \n",
+ " 0.988001 | \n",
+ " 1.000000 | \n",
+ " 0.999991 | \n",
+ " 0.994399 | \n",
+ "
\n",
+ " \n",
+ " | SaTime3 | \n",
+ " 0.104072 | \n",
+ " 0.070308 | \n",
+ " -0.227519 | \n",
+ " 0.678954 | \n",
+ " 0.987995 | \n",
+ " 0.999991 | \n",
+ " 1.000000 | \n",
+ " 0.994408 | \n",
+ "
\n",
+ " \n",
+ " | SaTime4 | \n",
+ " 0.107007 | \n",
+ " 0.076198 | \n",
+ " -0.224023 | \n",
+ " 0.678385 | \n",
+ " 0.976332 | \n",
+ " 0.994399 | \n",
+ " 0.994408 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " climbRate Sgz curRoll absRoll SaTime1 SaTime2 \\\n",
+ "climbRate 1.000000 -0.013900 -0.146812 -0.023482 0.094790 0.104038 \n",
+ "Sgz -0.013900 1.000000 0.017043 0.077119 0.062284 0.070309 \n",
+ "curRoll -0.146812 0.017043 1.000000 0.194465 -0.230882 -0.227539 \n",
+ "absRoll -0.023482 0.077119 0.194465 1.000000 0.683132 0.678935 \n",
+ "SaTime1 0.094790 0.062284 -0.230882 0.683132 1.000000 0.988001 \n",
+ "SaTime2 0.104038 0.070309 -0.227539 0.678935 0.988001 1.000000 \n",
+ "SaTime3 0.104072 0.070308 -0.227519 0.678954 0.987995 0.999991 \n",
+ "SaTime4 0.107007 0.076198 -0.224023 0.678385 0.976332 0.994399 \n",
+ "\n",
+ " SaTime3 SaTime4 \n",
+ "climbRate 0.104072 0.107007 \n",
+ "Sgz 0.070308 0.076198 \n",
+ "curRoll -0.227519 -0.224023 \n",
+ "absRoll 0.678954 0.678385 \n",
+ "SaTime1 0.987995 0.976332 \n",
+ "SaTime2 0.999991 0.994399 \n",
+ "SaTime3 1.000000 0.994408 \n",
+ "SaTime4 0.994408 1.000000 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Spearman Correlation Matrix ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " climbRate | \n",
+ " Sgz | \n",
+ " curRoll | \n",
+ " absRoll | \n",
+ " SaTime1 | \n",
+ " SaTime2 | \n",
+ " SaTime3 | \n",
+ " SaTime4 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | climbRate | \n",
+ " 1.000000 | \n",
+ " -0.017436 | \n",
+ " -0.147546 | \n",
+ " -0.019983 | \n",
+ " 0.118637 | \n",
+ " 0.127793 | \n",
+ " 0.127798 | \n",
+ " 0.131181 | \n",
+ "
\n",
+ " \n",
+ " | Sgz | \n",
+ " -0.017436 | \n",
+ " 1.000000 | \n",
+ " 0.014349 | \n",
+ " 0.088700 | \n",
+ " 0.056970 | \n",
+ " 0.065403 | \n",
+ " 0.065405 | \n",
+ " 0.071326 | \n",
+ "
\n",
+ " \n",
+ " | curRoll | \n",
+ " -0.147546 | \n",
+ " 0.014349 | \n",
+ " 1.000000 | \n",
+ " 0.196730 | \n",
+ " -0.257374 | \n",
+ " -0.252452 | \n",
+ " -0.252451 | \n",
+ " -0.247426 | \n",
+ "
\n",
+ " \n",
+ " | absRoll | \n",
+ " -0.019983 | \n",
+ " 0.088700 | \n",
+ " 0.196730 | \n",
+ " 1.000000 | \n",
+ " 0.706120 | \n",
+ " 0.700693 | \n",
+ " 0.700695 | \n",
+ " 0.699823 | \n",
+ "
\n",
+ " \n",
+ " | SaTime1 | \n",
+ " 0.118637 | \n",
+ " 0.056970 | \n",
+ " -0.257374 | \n",
+ " 0.706120 | \n",
+ " 1.000000 | \n",
+ " 0.986084 | \n",
+ " 0.986084 | \n",
+ " 0.974161 | \n",
+ "
\n",
+ " \n",
+ " | SaTime2 | \n",
+ " 0.127793 | \n",
+ " 0.065403 | \n",
+ " -0.252452 | \n",
+ " 0.700693 | \n",
+ " 0.986084 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 0.993382 | \n",
+ "
\n",
+ " \n",
+ " | SaTime3 | \n",
+ " 0.127798 | \n",
+ " 0.065405 | \n",
+ " -0.252451 | \n",
+ " 0.700695 | \n",
+ " 0.986084 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 0.993382 | \n",
+ "
\n",
+ " \n",
+ " | SaTime4 | \n",
+ " 0.131181 | \n",
+ " 0.071326 | \n",
+ " -0.247426 | \n",
+ " 0.699823 | \n",
+ " 0.974161 | \n",
+ " 0.993382 | \n",
+ " 0.993382 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " climbRate Sgz curRoll absRoll SaTime1 SaTime2 \\\n",
+ "climbRate 1.000000 -0.017436 -0.147546 -0.019983 0.118637 0.127793 \n",
+ "Sgz -0.017436 1.000000 0.014349 0.088700 0.056970 0.065403 \n",
+ "curRoll -0.147546 0.014349 1.000000 0.196730 -0.257374 -0.252452 \n",
+ "absRoll -0.019983 0.088700 0.196730 1.000000 0.706120 0.700693 \n",
+ "SaTime1 0.118637 0.056970 -0.257374 0.706120 1.000000 0.986084 \n",
+ "SaTime2 0.127793 0.065403 -0.252452 0.700693 0.986084 1.000000 \n",
+ "SaTime3 0.127798 0.065405 -0.252451 0.700695 0.986084 1.000000 \n",
+ "SaTime4 0.131181 0.071326 -0.247426 0.699823 0.974161 0.993382 \n",
+ "\n",
+ " SaTime3 SaTime4 \n",
+ "climbRate 0.127798 0.131181 \n",
+ "Sgz 0.065405 0.071326 \n",
+ "curRoll -0.252451 -0.247426 \n",
+ "absRoll 0.700695 0.699823 \n",
+ "SaTime1 0.986084 0.974161 \n",
+ "SaTime2 1.000000 0.993382 \n",
+ "SaTime3 1.000000 0.993382 \n",
+ "SaTime4 0.993382 1.000000 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Mutual Information Matrix ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " climbRate | \n",
+ " Sgz | \n",
+ " curRoll | \n",
+ " absRoll | \n",
+ " SaTime1 | \n",
+ " SaTime2 | \n",
+ " SaTime3 | \n",
+ " SaTime4 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | climbRate | \n",
+ " 1.000000 | \n",
+ " 0.001757 | \n",
+ " 0.001254 | \n",
+ " 0.002738 | \n",
+ " 0.001881 | \n",
+ " 0.004853 | \n",
+ " 0.004776 | \n",
+ " 0.006018 | \n",
+ "
\n",
+ " \n",
+ " | Sgz | \n",
+ " 0.001757 | \n",
+ " 1.000000 | \n",
+ " 0.000000 | \n",
+ " 0.004338 | \n",
+ " 0.003036 | \n",
+ " 0.003655 | \n",
+ " 0.003656 | \n",
+ " 0.004861 | \n",
+ "
\n",
+ " \n",
+ " | curRoll | \n",
+ " 0.001254 | \n",
+ " 0.000000 | \n",
+ " 1.000000 | \n",
+ " 0.053293 | \n",
+ " 0.013725 | \n",
+ " 0.013256 | \n",
+ " 0.013214 | \n",
+ " 0.012820 | \n",
+ "
\n",
+ " \n",
+ " | absRoll | \n",
+ " 0.002738 | \n",
+ " 0.004338 | \n",
+ " 0.053293 | \n",
+ " 1.000000 | \n",
+ " 0.095271 | \n",
+ " 0.093617 | \n",
+ " 0.093553 | \n",
+ " 0.094038 | \n",
+ "
\n",
+ " \n",
+ " | SaTime1 | \n",
+ " 0.001881 | \n",
+ " 0.003036 | \n",
+ " 0.013725 | \n",
+ " 0.095271 | \n",
+ " 1.000000 | \n",
+ " 0.505716 | \n",
+ " 0.505646 | \n",
+ " 0.458947 | \n",
+ "
\n",
+ " \n",
+ " | SaTime2 | \n",
+ " 0.004853 | \n",
+ " 0.003655 | \n",
+ " 0.013256 | \n",
+ " 0.093617 | \n",
+ " 0.505716 | \n",
+ " 1.000000 | \n",
+ " 0.691706 | \n",
+ " 0.584177 | \n",
+ "
\n",
+ " \n",
+ " | SaTime3 | \n",
+ " 0.004776 | \n",
+ " 0.003656 | \n",
+ " 0.013214 | \n",
+ " 0.093553 | \n",
+ " 0.505646 | \n",
+ " 0.691706 | \n",
+ " 1.000000 | \n",
+ " 0.584307 | \n",
+ "
\n",
+ " \n",
+ " | SaTime4 | \n",
+ " 0.006018 | \n",
+ " 0.004861 | \n",
+ " 0.012820 | \n",
+ " 0.094038 | \n",
+ " 0.458947 | \n",
+ " 0.584177 | \n",
+ " 0.584307 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " climbRate Sgz curRoll absRoll SaTime1 SaTime2 \\\n",
+ "climbRate 1.000000 0.001757 0.001254 0.002738 0.001881 0.004853 \n",
+ "Sgz 0.001757 1.000000 0.000000 0.004338 0.003036 0.003655 \n",
+ "curRoll 0.001254 0.000000 1.000000 0.053293 0.013725 0.013256 \n",
+ "absRoll 0.002738 0.004338 0.053293 1.000000 0.095271 0.093617 \n",
+ "SaTime1 0.001881 0.003036 0.013725 0.095271 1.000000 0.505716 \n",
+ "SaTime2 0.004853 0.003655 0.013256 0.093617 0.505716 1.000000 \n",
+ "SaTime3 0.004776 0.003656 0.013214 0.093553 0.505646 0.691706 \n",
+ "SaTime4 0.006018 0.004861 0.012820 0.094038 0.458947 0.584177 \n",
+ "\n",
+ " SaTime3 SaTime4 \n",
+ "climbRate 0.004776 0.006018 \n",
+ "Sgz 0.003656 0.004861 \n",
+ "curRoll 0.013214 0.012820 \n",
+ "absRoll 0.093553 0.094038 \n",
+ "SaTime1 0.505646 0.458947 \n",
+ "SaTime2 0.691706 0.584177 \n",
+ "SaTime3 1.000000 0.584307 \n",
+ "SaTime4 0.584307 1.000000 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Predictor-Target Mutual Information ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Goal | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | SaTime1 | \n",
+ " 0.055697 | \n",
+ "
\n",
+ " \n",
+ " | SaTime3 | \n",
+ " 0.048387 | \n",
+ "
\n",
+ " \n",
+ " | SaTime2 | \n",
+ " 0.048359 | \n",
+ "
\n",
+ " \n",
+ " | SaTime4 | \n",
+ " 0.047626 | \n",
+ "
\n",
+ " \n",
+ " | absRoll | \n",
+ " 0.025873 | \n",
+ "
\n",
+ " \n",
+ " | climbRate | \n",
+ " 0.013537 | \n",
+ "
\n",
+ " \n",
+ " | Sgz | \n",
+ " 0.005171 | \n",
+ "
\n",
+ " \n",
+ " | curRoll | \n",
+ " 0.000220 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Goal\n",
+ "SaTime1 0.055697\n",
+ "SaTime3 0.048387\n",
+ "SaTime2 0.048359\n",
+ "SaTime4 0.047626\n",
+ "absRoll 0.025873\n",
+ "climbRate 0.013537\n",
+ "Sgz 0.005171\n",
+ "curRoll 0.000220"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "print(\"Computing predictor correlations...\")\n",
+ "elevators_correlations = compute_predictor_correlations(\n",
+ " data=df_elevators,\n",
+ " predictors=elevators_predictors,\n",
+ " imputed_variables=[elevators_target],\n",
+ " method=\"all\"\n",
+ ")\n",
+ "\n",
+ "print(\"\\n--- Pearson Correlation Matrix ---\")\n",
+ "display(elevators_correlations[\"pearson\"])\n",
+ "\n",
+ "print(\"\\n--- Spearman Correlation Matrix ---\")\n",
+ "display(elevators_correlations[\"spearman\"])\n",
+ "\n",
+ "print(\"\\n--- Mutual Information Matrix ---\")\n",
+ "display(elevators_correlations[\"mutual_info\"])\n",
+ "\n",
+ "print(\"\\n--- Predictor-Target Mutual Information ---\")\n",
+ "display(elevators_correlations[\"predictor_target_mi\"].sort_values(\n",
+ " by=elevators_target, ascending=False\n",
+ "))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Running leave-one-out analysis...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "a8796c1175244bfe8b0d41bed1cc6609",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Leave-one-out analysis: 0%| | 0/8 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Leave-One-Out Results ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " predictor_removed | \n",
+ " avg_quantile_loss | \n",
+ " avg_log_loss | \n",
+ " loss_increase | \n",
+ " relative_impact | \n",
+ " baseline_quantile_loss | \n",
+ " baseline_log_loss | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 3 | \n",
+ " absRoll | \n",
+ " 0.001463 | \n",
+ " 0 | \n",
+ " 1.562464e-04 | \n",
+ " 11.955247 | \n",
+ " 0.001307 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " curRoll | \n",
+ " 0.001458 | \n",
+ " 0 | \n",
+ " 1.509115e-04 | \n",
+ " 11.547046 | \n",
+ " 0.001307 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 0 | \n",
+ " climbRate | \n",
+ " 0.001414 | \n",
+ " 0 | \n",
+ " 1.066808e-04 | \n",
+ " 8.162716 | \n",
+ " 0.001307 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Sgz | \n",
+ " 0.001355 | \n",
+ " 0 | \n",
+ " 4.820981e-05 | \n",
+ " 3.688790 | \n",
+ " 0.001307 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " SaTime1 | \n",
+ " 0.001331 | \n",
+ " 0 | \n",
+ " 2.440591e-05 | \n",
+ " 1.867426 | \n",
+ " 0.001307 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " SaTime4 | \n",
+ " 0.001314 | \n",
+ " 0 | \n",
+ " 7.272511e-06 | \n",
+ " 0.556459 | \n",
+ " 0.001307 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " SaTime3 | \n",
+ " 0.001307 | \n",
+ " 0 | \n",
+ " -6.777108e-08 | \n",
+ " -0.005186 | \n",
+ " 0.001307 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " SaTime2 | \n",
+ " 0.001306 | \n",
+ " 0 | \n",
+ " -5.084813e-07 | \n",
+ " -0.038907 | \n",
+ " 0.001307 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " predictor_removed avg_quantile_loss avg_log_loss loss_increase \\\n",
+ "3 absRoll 0.001463 0 1.562464e-04 \n",
+ "2 curRoll 0.001458 0 1.509115e-04 \n",
+ "0 climbRate 0.001414 0 1.066808e-04 \n",
+ "1 Sgz 0.001355 0 4.820981e-05 \n",
+ "4 SaTime1 0.001331 0 2.440591e-05 \n",
+ "7 SaTime4 0.001314 0 7.272511e-06 \n",
+ "6 SaTime3 0.001307 0 -6.777108e-08 \n",
+ "5 SaTime2 0.001306 0 -5.084813e-07 \n",
+ "\n",
+ " relative_impact baseline_quantile_loss baseline_log_loss \n",
+ "3 11.955247 0.001307 0 \n",
+ "2 11.547046 0.001307 0 \n",
+ "0 8.162716 0.001307 0 \n",
+ "1 3.688790 0.001307 0 \n",
+ "4 1.867426 0.001307 0 \n",
+ "7 0.556459 0.001307 0 \n",
+ "6 -0.005186 0.001307 0 \n",
+ "5 -0.038907 0.001307 0 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "print(\"Running leave-one-out analysis...\")\n",
+ "elevators_loo = leave_one_out_analysis(\n",
+ " data=df_elevators,\n",
+ " predictors=elevators_predictors,\n",
+ " imputed_variables=[elevators_target],\n",
+ " model_class=QRF,\n",
+ " train_size=0.6,\n",
+ " n_jobs=1\n",
+ ")\n",
+ "\n",
+ "print(\"\\n--- Leave-One-Out Results ---\")\n",
+ "display(elevators_loo)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Creating 60/40 donor/receiver split...\n",
+ "Donor size: 9959\n",
+ "Receiver size: 6640\n",
+ "\n",
+ "Running autoimpute...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "d1202b9e6f5943fb9588f3ff0ef0bc42",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "AutoImputation progress: 0%| | 0/5 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "a7960940c02b41acba5c5608d4e40cd3",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Evaluating models: 0%| | 0/5 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
+ "[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 8.2s remaining: 12.3s\n",
+ "[Parallel(n_jobs=-1)]: Done 3 out of 5 | elapsed: 8.2s remaining: 5.5s\n",
+ "[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 8.3s finished\n",
+ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
+ "[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 0.4s remaining: 0.6s\n",
+ "[Parallel(n_jobs=-1)]: Done 3 out of 5 | elapsed: 0.4s remaining: 0.2s\n",
+ "[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 0.6s finished\n",
+ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
+ "[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 1.6s remaining: 2.4s\n",
+ "[Parallel(n_jobs=-1)]: Done 3 out of 5 | elapsed: 1.7s remaining: 1.2s\n",
+ "[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 2.0s finished\n",
+ "[Parallel(n_jobs=1)]: Done 1 tasks | elapsed: 8.6s\n",
+ "[Parallel(n_jobs=1)]: Done 4 tasks | elapsed: 29.4s\n",
+ "[Parallel(n_jobs=1)]: Done 5 out of 5 | elapsed: 37.1s finished\n",
+ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
+ "[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 7.4s remaining: 11.1s\n",
+ "[Parallel(n_jobs=-1)]: Done 3 out of 5 | elapsed: 7.5s remaining: 5.0s\n",
+ "[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 7.6s finished\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Autoimpute CV Results ---\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.plotly.v1+json": {
+ "config": {
+ "plotlyServerURL": "https://plot.ly"
+ },
+ "data": [
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 0.000007279832919092497,
+ 0.00002946086781168022,
+ 0.000017680015605553013,
+ 0.00003260064726269813,
+ 0.00003557650694753496,
+ 0.000019613045828998836,
+ 0.00001814932280261359,
+ 0.000028694976305096084,
+ 0.000046368533212824535,
+ 0.000024293989296681853,
+ 0.00003541115170254013,
+ 0.00002710084542648461,
+ 0.000031513500876534857,
+ 0.000020319534850442018,
+ 0.000021683408229124396,
+ 0.000027385567802449857,
+ 0.00002121114757429098,
+ 0.000024668014215883887,
+ 0.00001862200308553563
+ ]
+ },
+ "hovertemplate": "Method=QRF
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "QRF",
+ "marker": {
+ "color": "#88CCEE",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "QRF",
+ "offsetgroup": "QRF",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 0.00041733075698071036,
+ 0.0006945274770604267,
+ 0.0009126544374383521,
+ 0.0011107153879203403,
+ 0.0012569050309727105,
+ 0.0014289171149691683,
+ 0.0015593428510627136,
+ 0.0016715553625854499,
+ 0.0017485400744111554,
+ 0.0018047005450228843,
+ 0.0017793039233780928,
+ 0.0017537879897288806,
+ 0.001709326862951555,
+ 0.001633510137990435,
+ 0.0015007547013770807,
+ 0.0013647536706343202,
+ 0.0011642097218608235,
+ 0.0009397450424500616,
+ 0.0006514380374335112
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 0.00001016377262637028,
+ 0.000016322284753405822,
+ 0.000018987350280795512,
+ 0.00001816726623433957,
+ 0.000018542001529198758,
+ 0.000020704097797901034,
+ 0.000021673718592615825,
+ 0.000023117582879046623,
+ 0.000024665445964902992,
+ 0.00002570622785701564,
+ 0.00002791406542225051,
+ 0.000033108083242520206,
+ 0.00003834331900962756,
+ 0.00004268819946188325,
+ 0.00004613676606862346,
+ 0.00004734341041703108,
+ 0.00004705324688034628,
+ 0.000044173751641560596,
+ 0.000041323795815408205
+ ]
+ },
+ "hovertemplate": "Method=OLS
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "OLS",
+ "marker": {
+ "color": "#CC6677",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "OLS",
+ "offsetgroup": "OLS",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 0.00042523916093980705,
+ 0.0007002687973446931,
+ 0.0009127106131948651,
+ 0.0010828208840845696,
+ 0.0012231928812307582,
+ 0.0013408541000878575,
+ 0.0014416738271257018,
+ 0.0015249423539199505,
+ 0.0015901556236961128,
+ 0.0016398446421357926,
+ 0.001671536236653303,
+ 0.0016807631390262927,
+ 0.0016638363311461753,
+ 0.0016178087853864383,
+ 0.001536460577821025,
+ 0.0014138034451574141,
+ 0.0012402417531022125,
+ 0.0009999176374546677,
+ 0.0006625295568369386
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 0.000010084322371820636,
+ 0.0000159273685916936,
+ 0.000020316394108550417,
+ 0.00002502702016488793,
+ 0.00002952830318623912,
+ 0.00003181429293905948,
+ 0.00003323814954964788,
+ 0.00003455508726395601,
+ 0.00003729659692395035,
+ 0.0000396063558108858,
+ 0.000042004792443265195,
+ 0.0000435656599070157,
+ 0.000043505840286401174,
+ 0.000044223567391292146,
+ 0.00004437533549716239,
+ 0.00004081007347614774,
+ 0.00003288217809198435,
+ 0.000027547328921898425,
+ 0.000021850959006030572
+ ]
+ },
+ "hovertemplate": "Method=QuantReg
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "QuantReg",
+ "marker": {
+ "color": "#DDCC77",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "QuantReg",
+ "offsetgroup": "QuantReg",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 0.0002817784169756737,
+ 0.0005113577150345891,
+ 0.0007144241132768496,
+ 0.0008961487871232647,
+ 0.0010595183080782506,
+ 0.0012034494757487118,
+ 0.0013292688760591382,
+ 0.0014318493580148816,
+ 0.0015135825366891982,
+ 0.0015732220659344747,
+ 0.001607286011995681,
+ 0.001617206960286656,
+ 0.0015990604549444656,
+ 0.0015507169047136707,
+ 0.001469917186888279,
+ 0.0013516167592724619,
+ 0.0011785829817773212,
+ 0.0009424738375426881,
+ 0.0006030690360714073
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 0.00016461815990283907,
+ 0.00015311689125606818,
+ 0.00014182058836153944,
+ 0.00013078237364288925,
+ 0.00012007344546271649,
+ 0.00010979020189466915,
+ 0.0001000639677207584,
+ 0.00009107337655503052,
+ 0.00008305766235220195,
+ 0.0000763245944034747,
+ 0.00007123879016001055,
+ 0.00006816993335212892,
+ 0.00006739412051633259,
+ 0.00006898875433192543,
+ 0.0000727982283131681,
+ 0.00007850075760122417,
+ 0.00008571936140429814,
+ 0.00009410580201198965,
+ 0.00010337624534796863
+ ]
+ },
+ "hovertemplate": "Method=Matching
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "Matching",
+ "marker": {
+ "color": "#117733",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "Matching",
+ "offsetgroup": "Matching",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 0.0025563195070588732,
+ 0.0025649158260364416,
+ 0.0025735121450140086,
+ 0.0025821084639915766,
+ 0.0025907047829691445,
+ 0.0025993011019467115,
+ 0.00260789742092428,
+ 0.002616493739901848,
+ 0.0026250900588794153,
+ 0.002633686377856983,
+ 0.0026422826968345507,
+ 0.0026508790158121186,
+ 0.0026594753347896857,
+ 0.002668071653767254,
+ 0.002676667972744822,
+ 0.0026852642917223894,
+ 0.0026938606106999574,
+ 0.002702456929677525,
+ 0.002711053248655093
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 0.012748900086031252,
+ 0.01846694746217978,
+ 0.02083981882861257,
+ 0.02216094526331568,
+ 0.021535820349572767,
+ 0.020391680612038707,
+ 0.019511224717398017,
+ 0.01767458689932801,
+ 0.017130388858639684,
+ 0.015366509392069976,
+ 0.012751788597809451,
+ 0.010685870542255928,
+ 0.009577198397340676,
+ 0.008226655534004472,
+ 0.006859804131544297,
+ 0.005400338302880356,
+ 0.0047259279217197695,
+ 0.0032804109515719234,
+ 0.002846915284857999
+ ]
+ },
+ "hovertemplate": "Method=MDN
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "MDN",
+ "marker": {
+ "color": "#332288",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "MDN",
+ "offsetgroup": "MDN",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 0.012602380012443976,
+ 0.016513906298007766,
+ 0.018324844300578954,
+ 0.019283949361179616,
+ 0.01913778648904192,
+ 0.01832900282100227,
+ 0.018181329483216564,
+ 0.017506212654085765,
+ 0.017624749494014276,
+ 0.016933351144075036,
+ 0.01615656918312625,
+ 0.015366839451844794,
+ 0.014958599544623951,
+ 0.014316459444683854,
+ 0.013600495835174111,
+ 0.012506342483222849,
+ 0.01161429740353466,
+ 0.009405304824591973,
+ 0.00668313134903145
+ ],
+ "yaxis": "y"
+ }
+ ],
+ "layout": {
+ "barmode": "group",
+ "height": 600,
+ "legend": {
+ "title": {
+ "text": "Method"
+ },
+ "tracegroupgap": 0
+ },
+ "paper_bgcolor": "#FAFAFA",
+ "plot_bgcolor": "#FAFAFA",
+ "shapes": [
+ {
+ "line": {
+ "color": "#88CCEE",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "QRF Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 0.0013211589013804564,
+ "y1": 0.0013211589013804564
+ },
+ {
+ "line": {
+ "color": "#CC6677",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "OLS Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 0.0012825579129655041,
+ "y1": 0.0012825579129655041
+ },
+ {
+ "line": {
+ "color": "#DDCC77",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "QuantReg Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 0.0011807647256014558,
+ "y1": 0.0011807647256014558
+ },
+ {
+ "line": {
+ "color": "#117733",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "Matching Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 0.0026336863778569828,
+ "y1": 0.0026336863778569828
+ },
+ {
+ "line": {
+ "color": "#332288",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "MDN Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 0.015212923767235787,
+ "y1": 0.015212923767235787
+ }
+ ],
+ "template": {
+ "data": {
+ "bar": [
+ {
+ "error_x": {
+ "color": "#2a3f5f"
+ },
+ "error_y": {
+ "color": "#2a3f5f"
+ },
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "bar"
+ }
+ ],
+ "barpolar": [
+ {
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "barpolar"
+ }
+ ],
+ "carpet": [
+ {
+ "aaxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "baxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "type": "carpet"
+ }
+ ],
+ "choropleth": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "choropleth"
+ }
+ ],
+ "contour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "contour"
+ }
+ ],
+ "contourcarpet": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "contourcarpet"
+ }
+ ],
+ "heatmap": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmap"
+ }
+ ],
+ "heatmapgl": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmapgl"
+ }
+ ],
+ "histogram": [
+ {
+ "marker": {
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "histogram"
+ }
+ ],
+ "histogram2d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2d"
+ }
+ ],
+ "histogram2dcontour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2dcontour"
+ }
+ ],
+ "mesh3d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "mesh3d"
+ }
+ ],
+ "parcoords": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "parcoords"
+ }
+ ],
+ "pie": [
+ {
+ "automargin": true,
+ "type": "pie"
+ }
+ ],
+ "scatter": [
+ {
+ "fillpattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ },
+ "type": "scatter"
+ }
+ ],
+ "scatter3d": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter3d"
+ }
+ ],
+ "scattercarpet": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattercarpet"
+ }
+ ],
+ "scattergeo": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergeo"
+ }
+ ],
+ "scattergl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergl"
+ }
+ ],
+ "scattermapbox": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermapbox"
+ }
+ ],
+ "scatterpolar": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolar"
+ }
+ ],
+ "scatterpolargl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolargl"
+ }
+ ],
+ "scatterternary": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterternary"
+ }
+ ],
+ "surface": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "surface"
+ }
+ ],
+ "table": [
+ {
+ "cells": {
+ "fill": {
+ "color": "#EBF0F8"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "header": {
+ "fill": {
+ "color": "#C8D4E3"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "type": "table"
+ }
+ ]
+ },
+ "layout": {
+ "annotationdefaults": {
+ "arrowcolor": "#2a3f5f",
+ "arrowhead": 0,
+ "arrowwidth": 1
+ },
+ "autotypenumbers": "strict",
+ "coloraxis": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "colorscale": {
+ "diverging": [
+ [
+ 0,
+ "#8e0152"
+ ],
+ [
+ 0.1,
+ "#c51b7d"
+ ],
+ [
+ 0.2,
+ "#de77ae"
+ ],
+ [
+ 0.3,
+ "#f1b6da"
+ ],
+ [
+ 0.4,
+ "#fde0ef"
+ ],
+ [
+ 0.5,
+ "#f7f7f7"
+ ],
+ [
+ 0.6,
+ "#e6f5d0"
+ ],
+ [
+ 0.7,
+ "#b8e186"
+ ],
+ [
+ 0.8,
+ "#7fbc41"
+ ],
+ [
+ 0.9,
+ "#4d9221"
+ ],
+ [
+ 1,
+ "#276419"
+ ]
+ ],
+ "sequential": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "sequentialminus": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ]
+ },
+ "colorway": [
+ "#636efa",
+ "#EF553B",
+ "#00cc96",
+ "#ab63fa",
+ "#FFA15A",
+ "#19d3f3",
+ "#FF6692",
+ "#B6E880",
+ "#FF97FF",
+ "#FECB52"
+ ],
+ "font": {
+ "color": "#2a3f5f"
+ },
+ "geo": {
+ "bgcolor": "white",
+ "lakecolor": "white",
+ "landcolor": "#E5ECF6",
+ "showlakes": true,
+ "showland": true,
+ "subunitcolor": "white"
+ },
+ "hoverlabel": {
+ "align": "left"
+ },
+ "hovermode": "closest",
+ "mapbox": {
+ "style": "light"
+ },
+ "paper_bgcolor": "white",
+ "plot_bgcolor": "#E5ECF6",
+ "polar": {
+ "angularaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "radialaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "scene": {
+ "xaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "yaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "zaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ }
+ },
+ "shapedefaults": {
+ "line": {
+ "color": "#2a3f5f"
+ }
+ },
+ "ternary": {
+ "aaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "baxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "caxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "title": {
+ "x": 0.05
+ },
+ "xaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ },
+ "yaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ }
+ }
+ },
+ "title": {
+ "font": {
+ "size": 14
+ },
+ "text": "Elevators Dataset Benchmarking Results"
+ },
+ "width": 750,
+ "xaxis": {
+ "anchor": "y",
+ "domain": [
+ 0,
+ 1
+ ],
+ "gridcolor": "#E5E5E5",
+ "gridwidth": 1,
+ "linecolor": "#CCCCCC",
+ "showgrid": false,
+ "showline": true,
+ "title": {
+ "font": {
+ "size": 12
+ },
+ "text": "Quantiles"
+ },
+ "zeroline": false
+ },
+ "yaxis": {
+ "anchor": "x",
+ "domain": [
+ 0,
+ 1
+ ],
+ "gridcolor": "#E5E5E5",
+ "gridwidth": 1,
+ "linecolor": "#CCCCCC",
+ "showgrid": true,
+ "showline": true,
+ "title": {
+ "font": {
+ "size": 12
+ },
+ "text": "Quantile loss"
+ },
+ "zeroline": false
+ }
+ }
+ }
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "print(\"Creating 60/40 donor/receiver split...\")\n",
+ "elevators_donor, elevators_receiver = train_test_split(\n",
+ " df_elevators, train_size=0.6, random_state=42\n",
+ ")\n",
+ "\n",
+ "elevators_receiver_no_target = elevators_receiver.drop(columns=[elevators_target])\n",
+ "\n",
+ "print(f\"Donor size: {len(elevators_donor)}\")\n",
+ "print(f\"Receiver size: {len(elevators_receiver)}\")\n",
+ "\n",
+ "print(\"\\nRunning autoimpute...\")\n",
+ "elevators_result = autoimpute(\n",
+ " donor_data=elevators_donor,\n",
+ " receiver_data=elevators_receiver_no_target.copy(),\n",
+ " predictors=elevators_predictors,\n",
+ " imputed_variables=[elevators_target],\n",
+ " impute_all=True,\n",
+ " log_level=\"INFO\"\n",
+ ")\n",
+ "\n",
+ "print(\"\\n--- Autoimpute CV Results ---\")\n",
+ "comparison_viz = method_comparison_results(\n",
+ " data=elevators_result.cv_results,\n",
+ " metric=\"quantile_loss\",\n",
+ ")\n",
+ "fig = comparison_viz.plot(\n",
+ " title=\"Elevators Dataset Benchmarking Results\",\n",
+ " show_mean=True,\n",
+ ")\n",
+ "fig.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Autoimpute CV Results (Zoomed In Without MDN) ---\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.plotly.v1+json": {
+ "config": {
+ "plotlyServerURL": "https://plot.ly"
+ },
+ "data": [
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 0.000007279832919092497,
+ 0.00002946086781168022,
+ 0.000017680015605553013,
+ 0.00003260064726269813,
+ 0.00003557650694753496,
+ 0.000019613045828998836,
+ 0.00001814932280261359,
+ 0.000028694976305096084,
+ 0.000046368533212824535,
+ 0.000024293989296681853,
+ 0.00003541115170254013,
+ 0.00002710084542648461,
+ 0.000031513500876534857,
+ 0.000020319534850442018,
+ 0.000021683408229124396,
+ 0.000027385567802449857,
+ 0.00002121114757429098,
+ 0.000024668014215883887,
+ 0.00001862200308553563
+ ]
+ },
+ "hovertemplate": "Method=QRF
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "QRF",
+ "marker": {
+ "color": "#88CCEE",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "QRF",
+ "offsetgroup": "QRF",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 0.00041733075698071036,
+ 0.0006945274770604267,
+ 0.0009126544374383521,
+ 0.0011107153879203403,
+ 0.0012569050309727105,
+ 0.0014289171149691683,
+ 0.0015593428510627136,
+ 0.0016715553625854499,
+ 0.0017485400744111554,
+ 0.0018047005450228843,
+ 0.0017793039233780928,
+ 0.0017537879897288806,
+ 0.001709326862951555,
+ 0.001633510137990435,
+ 0.0015007547013770807,
+ 0.0013647536706343202,
+ 0.0011642097218608235,
+ 0.0009397450424500616,
+ 0.0006514380374335112
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 0.00001016377262637028,
+ 0.000016322284753405822,
+ 0.000018987350280795512,
+ 0.00001816726623433957,
+ 0.000018542001529198758,
+ 0.000020704097797901034,
+ 0.000021673718592615825,
+ 0.000023117582879046623,
+ 0.000024665445964902992,
+ 0.00002570622785701564,
+ 0.00002791406542225051,
+ 0.000033108083242520206,
+ 0.00003834331900962756,
+ 0.00004268819946188325,
+ 0.00004613676606862346,
+ 0.00004734341041703108,
+ 0.00004705324688034628,
+ 0.000044173751641560596,
+ 0.000041323795815408205
+ ]
+ },
+ "hovertemplate": "Method=OLS
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "OLS",
+ "marker": {
+ "color": "#CC6677",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "OLS",
+ "offsetgroup": "OLS",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 0.00042523916093980705,
+ 0.0007002687973446931,
+ 0.0009127106131948651,
+ 0.0010828208840845696,
+ 0.0012231928812307582,
+ 0.0013408541000878575,
+ 0.0014416738271257018,
+ 0.0015249423539199505,
+ 0.0015901556236961128,
+ 0.0016398446421357926,
+ 0.001671536236653303,
+ 0.0016807631390262927,
+ 0.0016638363311461753,
+ 0.0016178087853864383,
+ 0.001536460577821025,
+ 0.0014138034451574141,
+ 0.0012402417531022125,
+ 0.0009999176374546677,
+ 0.0006625295568369386
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 0.000010084322371820636,
+ 0.0000159273685916936,
+ 0.000020316394108550417,
+ 0.00002502702016488793,
+ 0.00002952830318623912,
+ 0.00003181429293905948,
+ 0.00003323814954964788,
+ 0.00003455508726395601,
+ 0.00003729659692395035,
+ 0.0000396063558108858,
+ 0.000042004792443265195,
+ 0.0000435656599070157,
+ 0.000043505840286401174,
+ 0.000044223567391292146,
+ 0.00004437533549716239,
+ 0.00004081007347614774,
+ 0.00003288217809198435,
+ 0.000027547328921898425,
+ 0.000021850959006030572
+ ]
+ },
+ "hovertemplate": "Method=QuantReg
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "QuantReg",
+ "marker": {
+ "color": "#DDCC77",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "QuantReg",
+ "offsetgroup": "QuantReg",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 0.0002817784169756737,
+ 0.0005113577150345891,
+ 0.0007144241132768496,
+ 0.0008961487871232647,
+ 0.0010595183080782506,
+ 0.0012034494757487118,
+ 0.0013292688760591382,
+ 0.0014318493580148816,
+ 0.0015135825366891982,
+ 0.0015732220659344747,
+ 0.001607286011995681,
+ 0.001617206960286656,
+ 0.0015990604549444656,
+ 0.0015507169047136707,
+ 0.001469917186888279,
+ 0.0013516167592724619,
+ 0.0011785829817773212,
+ 0.0009424738375426881,
+ 0.0006030690360714073
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 0.00016461815990283907,
+ 0.00015311689125606818,
+ 0.00014182058836153944,
+ 0.00013078237364288925,
+ 0.00012007344546271649,
+ 0.00010979020189466915,
+ 0.0001000639677207584,
+ 0.00009107337655503052,
+ 0.00008305766235220195,
+ 0.0000763245944034747,
+ 0.00007123879016001055,
+ 0.00006816993335212892,
+ 0.00006739412051633259,
+ 0.00006898875433192543,
+ 0.0000727982283131681,
+ 0.00007850075760122417,
+ 0.00008571936140429814,
+ 0.00009410580201198965,
+ 0.00010337624534796863
+ ]
+ },
+ "hovertemplate": "Method=Matching
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "Matching",
+ "marker": {
+ "color": "#117733",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "Matching",
+ "offsetgroup": "Matching",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 0.0025563195070588732,
+ 0.0025649158260364416,
+ 0.0025735121450140086,
+ 0.0025821084639915766,
+ 0.0025907047829691445,
+ 0.0025993011019467115,
+ 0.00260789742092428,
+ 0.002616493739901848,
+ 0.0026250900588794153,
+ 0.002633686377856983,
+ 0.0026422826968345507,
+ 0.0026508790158121186,
+ 0.0026594753347896857,
+ 0.002668071653767254,
+ 0.002676667972744822,
+ 0.0026852642917223894,
+ 0.0026938606106999574,
+ 0.002702456929677525,
+ 0.002711053248655093
+ ],
+ "yaxis": "y"
+ }
+ ],
+ "layout": {
+ "barmode": "group",
+ "height": 600,
+ "legend": {
+ "title": {
+ "text": "Method"
+ },
+ "tracegroupgap": 0
+ },
+ "paper_bgcolor": "#FAFAFA",
+ "plot_bgcolor": "#FAFAFA",
+ "shapes": [
+ {
+ "line": {
+ "color": "#88CCEE",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "QRF Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 0.0013211589013804564,
+ "y1": 0.0013211589013804564
+ },
+ {
+ "line": {
+ "color": "#CC6677",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "OLS Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 0.0012825579129655041,
+ "y1": 0.0012825579129655041
+ },
+ {
+ "line": {
+ "color": "#DDCC77",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "QuantReg Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 0.0011807647256014558,
+ "y1": 0.0011807647256014558
+ },
+ {
+ "line": {
+ "color": "#117733",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "Matching Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 0.0026336863778569828,
+ "y1": 0.0026336863778569828
+ }
+ ],
+ "template": {
+ "data": {
+ "bar": [
+ {
+ "error_x": {
+ "color": "#2a3f5f"
+ },
+ "error_y": {
+ "color": "#2a3f5f"
+ },
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "bar"
+ }
+ ],
+ "barpolar": [
+ {
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "barpolar"
+ }
+ ],
+ "carpet": [
+ {
+ "aaxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "baxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "type": "carpet"
+ }
+ ],
+ "choropleth": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "choropleth"
+ }
+ ],
+ "contour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "contour"
+ }
+ ],
+ "contourcarpet": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "contourcarpet"
+ }
+ ],
+ "heatmap": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmap"
+ }
+ ],
+ "heatmapgl": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmapgl"
+ }
+ ],
+ "histogram": [
+ {
+ "marker": {
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "histogram"
+ }
+ ],
+ "histogram2d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2d"
+ }
+ ],
+ "histogram2dcontour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2dcontour"
+ }
+ ],
+ "mesh3d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "mesh3d"
+ }
+ ],
+ "parcoords": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "parcoords"
+ }
+ ],
+ "pie": [
+ {
+ "automargin": true,
+ "type": "pie"
+ }
+ ],
+ "scatter": [
+ {
+ "fillpattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ },
+ "type": "scatter"
+ }
+ ],
+ "scatter3d": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter3d"
+ }
+ ],
+ "scattercarpet": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattercarpet"
+ }
+ ],
+ "scattergeo": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergeo"
+ }
+ ],
+ "scattergl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergl"
+ }
+ ],
+ "scattermapbox": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermapbox"
+ }
+ ],
+ "scatterpolar": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolar"
+ }
+ ],
+ "scatterpolargl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolargl"
+ }
+ ],
+ "scatterternary": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterternary"
+ }
+ ],
+ "surface": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "surface"
+ }
+ ],
+ "table": [
+ {
+ "cells": {
+ "fill": {
+ "color": "#EBF0F8"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "header": {
+ "fill": {
+ "color": "#C8D4E3"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "type": "table"
+ }
+ ]
+ },
+ "layout": {
+ "annotationdefaults": {
+ "arrowcolor": "#2a3f5f",
+ "arrowhead": 0,
+ "arrowwidth": 1
+ },
+ "autotypenumbers": "strict",
+ "coloraxis": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "colorscale": {
+ "diverging": [
+ [
+ 0,
+ "#8e0152"
+ ],
+ [
+ 0.1,
+ "#c51b7d"
+ ],
+ [
+ 0.2,
+ "#de77ae"
+ ],
+ [
+ 0.3,
+ "#f1b6da"
+ ],
+ [
+ 0.4,
+ "#fde0ef"
+ ],
+ [
+ 0.5,
+ "#f7f7f7"
+ ],
+ [
+ 0.6,
+ "#e6f5d0"
+ ],
+ [
+ 0.7,
+ "#b8e186"
+ ],
+ [
+ 0.8,
+ "#7fbc41"
+ ],
+ [
+ 0.9,
+ "#4d9221"
+ ],
+ [
+ 1,
+ "#276419"
+ ]
+ ],
+ "sequential": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "sequentialminus": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ]
+ },
+ "colorway": [
+ "#636efa",
+ "#EF553B",
+ "#00cc96",
+ "#ab63fa",
+ "#FFA15A",
+ "#19d3f3",
+ "#FF6692",
+ "#B6E880",
+ "#FF97FF",
+ "#FECB52"
+ ],
+ "font": {
+ "color": "#2a3f5f"
+ },
+ "geo": {
+ "bgcolor": "white",
+ "lakecolor": "white",
+ "landcolor": "#E5ECF6",
+ "showlakes": true,
+ "showland": true,
+ "subunitcolor": "white"
+ },
+ "hoverlabel": {
+ "align": "left"
+ },
+ "hovermode": "closest",
+ "mapbox": {
+ "style": "light"
+ },
+ "paper_bgcolor": "white",
+ "plot_bgcolor": "#E5ECF6",
+ "polar": {
+ "angularaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "radialaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "scene": {
+ "xaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "yaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "zaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ }
+ },
+ "shapedefaults": {
+ "line": {
+ "color": "#2a3f5f"
+ }
+ },
+ "ternary": {
+ "aaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "baxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "caxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "title": {
+ "x": 0.05
+ },
+ "xaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ },
+ "yaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ }
+ }
+ },
+ "title": {
+ "font": {
+ "size": 14
+ },
+ "text": "Elevators Dataset Benchmarking Results"
+ },
+ "width": 750,
+ "xaxis": {
+ "anchor": "y",
+ "domain": [
+ 0,
+ 1
+ ],
+ "gridcolor": "#E5E5E5",
+ "gridwidth": 1,
+ "linecolor": "#CCCCCC",
+ "showgrid": false,
+ "showline": true,
+ "title": {
+ "font": {
+ "size": 12
+ },
+ "text": "Quantiles"
+ },
+ "zeroline": false
+ },
+ "yaxis": {
+ "anchor": "x",
+ "domain": [
+ 0,
+ 1
+ ],
+ "gridcolor": "#E5E5E5",
+ "gridwidth": 1,
+ "linecolor": "#CCCCCC",
+ "showgrid": true,
+ "showline": true,
+ "title": {
+ "font": {
+ "size": 12
+ },
+ "text": "Quantile loss"
+ },
+ "zeroline": false
+ }
+ }
+ }
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "elevators_result.cv_results.pop('MDN', None)\n",
+ "\n",
+ "print(\"\\n--- Autoimpute CV Results (Zoomed In Without MDN) ---\")\n",
+ "comparison_viz = method_comparison_results(\n",
+ " data=elevators_result.cv_results,\n",
+ " metric=\"quantile_loss\",\n",
+ ")\n",
+ "fig = comparison_viz.plot(\n",
+ " title=\"Elevators Dataset Benchmarking Results\",\n",
+ " show_mean=True,\n",
+ ")\n",
+ "fig.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Distribution Comparison: Imputed vs Ground Truth (Elevators) ---\n",
+ "QuantReg: Wasserstein Distance = 0.0015\n",
+ "QRF: Wasserstein Distance = 0.0003\n",
+ "OLS: Wasserstein Distance = 0.0014\n",
+ "Matching: Wasserstein Distance = 0.0002\n",
+ "MDN: Wasserstein Distance = 0.0200\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"\\n--- Distribution Comparison: Imputed vs Ground Truth (Elevators) ---\")\n",
+ "# Store Wasserstein distances for all methods\n",
+ "elevators_wasserstein = {}\n",
+ "\n",
+ "distribution_comparison_elevators = compare_distributions(\n",
+ " donor_data=elevators_receiver, # Ground truth test split\n",
+ " receiver_data=elevators_result.receiver_data, # Imputed values\n",
+ " imputed_variables=[elevators_target],\n",
+ ")\n",
+ "best_method_name = elevators_result.fitted_models[\"best_method\"].__class__.__name__ \n",
+ "best_method_name = best_method_name.replace(\"Results\", \"\")\n",
+ "elevators_wasserstein[best_method_name] = distribution_comparison_elevators[\n",
+ " distribution_comparison_elevators['Metric'] == 'wasserstein_distance'\n",
+ "]['Distance'].values[0]\n",
+ "print(f\"{best_method_name}: Wasserstein Distance = {elevators_wasserstein.get(best_method_name, 'N/A'):.4f}\")\n",
+ " \n",
+ "for method_name, imputations in elevators_result.imputations.items():\n",
+ " # Skip the 'best_method' (it's a duplicate)\n",
+ " if method_name == 'best_method': \n",
+ " continue\n",
+ " \n",
+ " # Create a copy of receiver data with this method's imputations \n",
+ " receiver_with_imputations = elevators_receiver_no_target.copy() \n",
+ " \n",
+ " # Handle both dict (quantile->DataFrame) and DataFrame formats \n",
+ " if isinstance(imputations, dict): \n",
+ " # Get median quantile (0.5) imputations \n",
+ " imp_df = imputations.get(0.5, list(imputations.values())[0]) \n",
+ " else: \n",
+ " imp_df = imputations\n",
+ " \n",
+ " # Add imputed values \n",
+ " for var in [elevators_target]:\n",
+ " if var in imp_df.columns: \n",
+ " receiver_with_imputations[var] = imp_df[var].values \n",
+ " \n",
+ " # Calculate distribution comparison \n",
+ " dist_comparison = compare_distributions( \n",
+ " donor_data=elevators_receiver, # Ground truth \n",
+ " receiver_data=receiver_with_imputations,\n",
+ " imputed_variables=[elevators_target], \n",
+ " )\n",
+ "\n",
+ " # Extract Wasserstein distance \n",
+ " wd = dist_comparison[dist_comparison['Metric'] == 'wasserstein_distance']['Distance'].values \n",
+ " elevators_wasserstein[method_name] = wd[0]\n",
+ " \n",
+ " print(f\"{method_name}: Wasserstein Distance = {elevators_wasserstein.get(method_name, 'N/A'):.4f}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### CIA Sensitivity Analysis\n",
+ "\n",
+ "Measuring sensitivity to the Conditional Independence Assumption by progressively removing predictors."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Running CIA sensitivity analysis...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "a42e0008daff49d3a8df67366598ebb1",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/8 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " QRF sensitivity score: 1.293\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "4a9c722bd09f45cfa86009ac0e5e9a66",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/8 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " OLS sensitivity score: 1.180\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "f323a02b5af44d73bb05567f580e54cc",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/8 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " QuantReg sensitivity score: 1.174\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "c2c1ffac637d4fc68371423309dd3b11",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/8 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " Matching sensitivity score: 1.050\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "8e23b7dbc08d4d9ca6d156ffb8a3ca1d",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/8 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " MDN sensitivity score: 2.055\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.plotly.v1+json": {
+ "config": {
+ "plotlyServerURL": "https://plot.ly"
+ },
+ "data": [
+ {
+ "line": {
+ "color": "#88CCEE",
+ "width": 2
+ },
+ "marker": {
+ "color": "#88CCEE",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "QRF (baseline=0.0013)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7
+ ],
+ "y": [
+ 0.0012763855421686746,
+ 0.0012834036144578314,
+ 0.001284433734939759,
+ 0.0012911746987951808,
+ 0.001779144578313253,
+ 0.0020770542168674693,
+ 0.002232885542168675,
+ 0.001930421686746988
+ ]
+ },
+ {
+ "line": {
+ "color": "#CC6677",
+ "width": 2
+ },
+ "marker": {
+ "color": "#CC6677",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "OLS (baseline=0.0013)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7
+ ],
+ "y": [
+ 0.0012500624185658913,
+ 0.0012790174353942765,
+ 0.0012791453105987344,
+ 0.0012995060752243423,
+ 0.0015676697180221847,
+ 0.0016981605659520615,
+ 0.0017195681477192664,
+ 0.0017193279810154838
+ ]
+ },
+ {
+ "line": {
+ "color": "#DDCC77",
+ "width": 2
+ },
+ "marker": {
+ "color": "#DDCC77",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "QuantReg (baseline=0.0011)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7
+ ],
+ "y": [
+ 0.0011327409962583422,
+ 0.00115436848288294,
+ 0.0011543742509937177,
+ 0.0011703123646945586,
+ 0.0013811625011721685,
+ 0.0015298947409129993,
+ 0.0015657802624936227,
+ 0.0015718894935134787
+ ]
+ },
+ {
+ "line": {
+ "color": "#117733",
+ "width": 2
+ },
+ "marker": {
+ "color": "#117733",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "Matching (baseline=0.0026)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7
+ ],
+ "y": [
+ 0.0026465361445783134,
+ 0.002648192771084337,
+ 0.002646385542168675,
+ 0.0026460843373493975,
+ 0.0026566265060240964,
+ 0.0029864457831325305,
+ 0.003002710843373494,
+ 0.003099096385542169
+ ]
+ },
+ {
+ "line": {
+ "color": "#332288",
+ "width": 2
+ },
+ "marker": {
+ "color": "#332288",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "MDN (baseline=0.0092)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7
+ ],
+ "y": [
+ 0.009209963862038689,
+ 0.0047474434456332235,
+ 0.01281756774463437,
+ 0.0017777999726410238,
+ 0.005320881888823777,
+ 0.03880265320018465,
+ 0.045545391523959186,
+ 0.037742739286400165
+ ]
+ }
+ ],
+ "layout": {
+ "height": 600,
+ "hovermode": "x unified",
+ "legend": {
+ "x": 0.01,
+ "xanchor": "left",
+ "y": 0.99,
+ "yanchor": "top"
+ },
+ "paper_bgcolor": "#FAFAFA",
+ "plot_bgcolor": "#FAFAFA",
+ "template": {
+ "data": {
+ "bar": [
+ {
+ "error_x": {
+ "color": "#2a3f5f"
+ },
+ "error_y": {
+ "color": "#2a3f5f"
+ },
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "bar"
+ }
+ ],
+ "barpolar": [
+ {
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "barpolar"
+ }
+ ],
+ "carpet": [
+ {
+ "aaxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "baxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "type": "carpet"
+ }
+ ],
+ "choropleth": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "choropleth"
+ }
+ ],
+ "contour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "contour"
+ }
+ ],
+ "contourcarpet": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "contourcarpet"
+ }
+ ],
+ "heatmap": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmap"
+ }
+ ],
+ "heatmapgl": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmapgl"
+ }
+ ],
+ "histogram": [
+ {
+ "marker": {
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "histogram"
+ }
+ ],
+ "histogram2d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2d"
+ }
+ ],
+ "histogram2dcontour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2dcontour"
+ }
+ ],
+ "mesh3d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "mesh3d"
+ }
+ ],
+ "parcoords": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "parcoords"
+ }
+ ],
+ "pie": [
+ {
+ "automargin": true,
+ "type": "pie"
+ }
+ ],
+ "scatter": [
+ {
+ "fillpattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ },
+ "type": "scatter"
+ }
+ ],
+ "scatter3d": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter3d"
+ }
+ ],
+ "scattercarpet": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattercarpet"
+ }
+ ],
+ "scattergeo": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergeo"
+ }
+ ],
+ "scattergl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergl"
+ }
+ ],
+ "scattermapbox": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermapbox"
+ }
+ ],
+ "scatterpolar": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolar"
+ }
+ ],
+ "scatterpolargl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolargl"
+ }
+ ],
+ "scatterternary": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterternary"
+ }
+ ],
+ "surface": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "surface"
+ }
+ ],
+ "table": [
+ {
+ "cells": {
+ "fill": {
+ "color": "#EBF0F8"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "header": {
+ "fill": {
+ "color": "#C8D4E3"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "type": "table"
+ }
+ ]
+ },
+ "layout": {
+ "annotationdefaults": {
+ "arrowcolor": "#2a3f5f",
+ "arrowhead": 0,
+ "arrowwidth": 1
+ },
+ "autotypenumbers": "strict",
+ "coloraxis": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "colorscale": {
+ "diverging": [
+ [
+ 0,
+ "#8e0152"
+ ],
+ [
+ 0.1,
+ "#c51b7d"
+ ],
+ [
+ 0.2,
+ "#de77ae"
+ ],
+ [
+ 0.3,
+ "#f1b6da"
+ ],
+ [
+ 0.4,
+ "#fde0ef"
+ ],
+ [
+ 0.5,
+ "#f7f7f7"
+ ],
+ [
+ 0.6,
+ "#e6f5d0"
+ ],
+ [
+ 0.7,
+ "#b8e186"
+ ],
+ [
+ 0.8,
+ "#7fbc41"
+ ],
+ [
+ 0.9,
+ "#4d9221"
+ ],
+ [
+ 1,
+ "#276419"
+ ]
+ ],
+ "sequential": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "sequentialminus": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ]
+ },
+ "colorway": [
+ "#636efa",
+ "#EF553B",
+ "#00cc96",
+ "#ab63fa",
+ "#FFA15A",
+ "#19d3f3",
+ "#FF6692",
+ "#B6E880",
+ "#FF97FF",
+ "#FECB52"
+ ],
+ "font": {
+ "color": "#2a3f5f"
+ },
+ "geo": {
+ "bgcolor": "white",
+ "lakecolor": "white",
+ "landcolor": "#E5ECF6",
+ "showlakes": true,
+ "showland": true,
+ "subunitcolor": "white"
+ },
+ "hoverlabel": {
+ "align": "left"
+ },
+ "hovermode": "closest",
+ "mapbox": {
+ "style": "light"
+ },
+ "paper_bgcolor": "white",
+ "plot_bgcolor": "#E5ECF6",
+ "polar": {
+ "angularaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "radialaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "scene": {
+ "xaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "yaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "zaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ }
+ },
+ "shapedefaults": {
+ "line": {
+ "color": "#2a3f5f"
+ }
+ },
+ "ternary": {
+ "aaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "baxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "caxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "title": {
+ "x": 0.05
+ },
+ "xaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ },
+ "yaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ }
+ }
+ },
+ "title": {
+ "text": "CIA Sensitivity: Elevators"
+ },
+ "width": 750,
+ "xaxis": {
+ "gridcolor": "#E5E5E5",
+ "linecolor": "#CCCCCC",
+ "showgrid": false,
+ "showline": true,
+ "title": {
+ "text": "Number of Predictors Removed"
+ }
+ },
+ "yaxis": {
+ "gridcolor": "#E5E5E5",
+ "linecolor": "#CCCCCC",
+ "showgrid": true,
+ "showline": true,
+ "title": {
+ "text": "Quantile Loss"
+ }
+ }
+ }
+ }
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Sensitivity Scores (higher = more sensitive to predictor removal):\n",
+ " QRF: 1.293\n",
+ " OLS: 1.180\n",
+ " QuantReg: 1.174\n",
+ " Matching: 1.050\n",
+ " MDN: 2.055\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"Running CIA sensitivity analysis...\")\n",
+ "elevators_cia_results = run_cia_analysis_for_dataset(\n",
+ " data=df_elevators,\n",
+ " predictors=elevators_predictors,\n",
+ " imputed_variables=[elevators_target],\n",
+ " ordering=\"mutual_info\",\n",
+ ")\n",
+ "\n",
+ "# Plot degradation curves\n",
+ "elevators_cia_fig = plot_cia_degradation_curves(\n",
+ " elevators_cia_results,\n",
+ " title=\"CIA Sensitivity: Elevators\"\n",
+ ")\n",
+ "elevators_cia_fig.show()\n",
+ "\n",
+ "# Display sensitivity scores\n",
+ "print(\"\\nSensitivity Scores (higher = more sensitive to predictor removal):\")\n",
+ "for method, results in elevators_cia_results.items():\n",
+ " print(f\" {method}: {results['sensitivity_score']:.3f}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "## 3. Brazilian Houses Dataset\n",
+ "\n",
+ "Real estate data from Brazil with property characteristics and prices."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Shape: (10692, 13)\n",
+ "\n",
+ "Column names:\n",
+ " 1. city\n",
+ " 2. area\n",
+ " 3. rooms\n",
+ " 4. bathroom\n",
+ " 5. parking_spaces\n",
+ " 6. floor\n",
+ " 7. animal\n",
+ " 8. furniture\n",
+ " 9. hoa_(BRL)\n",
+ " 10. rent_amount_(BRL)\n",
+ " 11. property_tax_(BRL)\n",
+ " 12. fire_insurance_(BRL)\n",
+ " 13. total_(BRL)\n",
+ "\n",
+ "Data types:\n",
+ "city object\n",
+ "area int64\n",
+ "rooms int64\n",
+ "bathroom int64\n",
+ "parking_spaces int64\n",
+ "floor int64\n",
+ "animal object\n",
+ "furniture object\n",
+ "hoa_(BRL) int64\n",
+ "rent_amount_(BRL) int64\n",
+ "property_tax_(BRL) int64\n",
+ "fire_insurance_(BRL) int64\n",
+ "total_(BRL) int64\n",
+ "dtype: object\n",
+ "\n",
+ "First few rows:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " city | \n",
+ " area | \n",
+ " rooms | \n",
+ " bathroom | \n",
+ " parking_spaces | \n",
+ " floor | \n",
+ " animal | \n",
+ " furniture | \n",
+ " hoa_(BRL) | \n",
+ " rent_amount_(BRL) | \n",
+ " property_tax_(BRL) | \n",
+ " fire_insurance_(BRL) | \n",
+ " total_(BRL) | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Sao Paulo | \n",
+ " 70 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 7 | \n",
+ " acept | \n",
+ " furnished | \n",
+ " 2065 | \n",
+ " 3300 | \n",
+ " 211 | \n",
+ " 42 | \n",
+ " 5618 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Sao Paulo | \n",
+ " 320 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 20 | \n",
+ " acept | \n",
+ " not furnished | \n",
+ " 1200 | \n",
+ " 4960 | \n",
+ " 1750 | \n",
+ " 63 | \n",
+ " 7973 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Porto Alegre | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 6 | \n",
+ " acept | \n",
+ " not furnished | \n",
+ " 1000 | \n",
+ " 2800 | \n",
+ " 0 | \n",
+ " 41 | \n",
+ " 3841 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Porto Alegre | \n",
+ " 51 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " acept | \n",
+ " not furnished | \n",
+ " 270 | \n",
+ " 1112 | \n",
+ " 22 | \n",
+ " 17 | \n",
+ " 1421 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Sao Paulo | \n",
+ " 25 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " not acept | \n",
+ " not furnished | \n",
+ " 0 | \n",
+ " 800 | \n",
+ " 25 | \n",
+ " 11 | \n",
+ " 836 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " city area rooms bathroom parking_spaces floor animal \\\n",
+ "0 Sao Paulo 70 2 1 1 7 acept \n",
+ "1 Sao Paulo 320 4 4 0 20 acept \n",
+ "2 Porto Alegre 80 1 1 1 6 acept \n",
+ "3 Porto Alegre 51 2 1 0 2 acept \n",
+ "4 Sao Paulo 25 1 1 0 1 not acept \n",
+ "\n",
+ " furniture hoa_(BRL) rent_amount_(BRL) property_tax_(BRL) \\\n",
+ "0 furnished 2065 3300 211 \n",
+ "1 not furnished 1200 4960 1750 \n",
+ "2 not furnished 1000 2800 0 \n",
+ "3 not furnished 270 1112 22 \n",
+ "4 not furnished 0 800 25 \n",
+ "\n",
+ " fire_insurance_(BRL) total_(BRL) \n",
+ "0 42 5618 \n",
+ "1 63 7973 \n",
+ "2 41 3841 \n",
+ "3 17 1421 \n",
+ "4 11 836 "
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_brazilian = load_dataset(\"brazilian_houses\")\n",
+ "print(f\"\\nShape: {df_brazilian.shape}\")\n",
+ "print(f\"\\nColumn names:\")\n",
+ "for i, col in enumerate(df_brazilian.columns):\n",
+ " print(f\" {i+1}. {col}\")\n",
+ "print(f\"\\nData types:\")\n",
+ "print(df_brazilian.dtypes)\n",
+ "print(f\"\\nFirst few rows:\")\n",
+ "df_brazilian.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Target variable: rent_amount_(BRL)\n",
+ "Predictors (5): ['rooms', 'floor', 'hoa_(BRL)', 'property_tax_(BRL)', 'fire_insurance_(BRL)']\n"
+ ]
+ }
+ ],
+ "source": [
+ "brazilian_target = \"rent_amount_(BRL)\" # Target variable to impute\n",
+ "brazilian_predictors = [\"rooms\", \"floor\", \"hoa_(BRL)\", \"property_tax_(BRL)\", \"fire_insurance_(BRL)\"]\n",
+ "\n",
+ "print(f\"Target variable: {brazilian_target}\")\n",
+ "print(f\"Predictors ({len(brazilian_predictors)}): {brazilian_predictors}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Computing predictor correlations...\n",
+ "\n",
+ "--- Pearson Correlation Matrix ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " rooms | \n",
+ " floor | \n",
+ " hoa_(BRL) | \n",
+ " property_tax_(BRL) | \n",
+ " fire_insurance_(BRL) | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | rooms | \n",
+ " 1.000000 | \n",
+ " -0.078687 | \n",
+ " 0.007139 | \n",
+ " 0.075252 | \n",
+ " 0.565148 | \n",
+ "
\n",
+ " \n",
+ " | floor | \n",
+ " -0.078687 | \n",
+ " 1.000000 | \n",
+ " 0.019872 | \n",
+ " 0.012626 | \n",
+ " 0.013652 | \n",
+ "
\n",
+ " \n",
+ " | hoa_(BRL) | \n",
+ " 0.007139 | \n",
+ " 0.019872 | \n",
+ " 1.000000 | \n",
+ " 0.007627 | \n",
+ " 0.029535 | \n",
+ "
\n",
+ " \n",
+ " | property_tax_(BRL) | \n",
+ " 0.075252 | \n",
+ " 0.012626 | \n",
+ " 0.007627 | \n",
+ " 1.000000 | \n",
+ " 0.105661 | \n",
+ "
\n",
+ " \n",
+ " | fire_insurance_(BRL) | \n",
+ " 0.565148 | \n",
+ " 0.013652 | \n",
+ " 0.029535 | \n",
+ " 0.105661 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " rooms floor hoa_(BRL) property_tax_(BRL) \\\n",
+ "rooms 1.000000 -0.078687 0.007139 0.075252 \n",
+ "floor -0.078687 1.000000 0.019872 0.012626 \n",
+ "hoa_(BRL) 0.007139 0.019872 1.000000 0.007627 \n",
+ "property_tax_(BRL) 0.075252 0.012626 0.007627 1.000000 \n",
+ "fire_insurance_(BRL) 0.565148 0.013652 0.029535 0.105661 \n",
+ "\n",
+ " fire_insurance_(BRL) \n",
+ "rooms 0.565148 \n",
+ "floor 0.013652 \n",
+ "hoa_(BRL) 0.029535 \n",
+ "property_tax_(BRL) 0.105661 \n",
+ "fire_insurance_(BRL) 1.000000 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Spearman Correlation Matrix ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " rooms | \n",
+ " floor | \n",
+ " hoa_(BRL) | \n",
+ " property_tax_(BRL) | \n",
+ " fire_insurance_(BRL) | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | rooms | \n",
+ " 1.000000 | \n",
+ " -0.117345 | \n",
+ " 0.206182 | \n",
+ " 0.595516 | \n",
+ " 0.613788 | \n",
+ "
\n",
+ " \n",
+ " | floor | \n",
+ " -0.117345 | \n",
+ " 1.000000 | \n",
+ " 0.618250 | \n",
+ " 0.048770 | \n",
+ " -0.004572 | \n",
+ "
\n",
+ " \n",
+ " | hoa_(BRL) | \n",
+ " 0.206182 | \n",
+ " 0.618250 | \n",
+ " 1.000000 | \n",
+ " 0.392537 | \n",
+ " 0.293228 | \n",
+ "
\n",
+ " \n",
+ " | property_tax_(BRL) | \n",
+ " 0.595516 | \n",
+ " 0.048770 | \n",
+ " 0.392537 | \n",
+ " 1.000000 | \n",
+ " 0.656049 | \n",
+ "
\n",
+ " \n",
+ " | fire_insurance_(BRL) | \n",
+ " 0.613788 | \n",
+ " -0.004572 | \n",
+ " 0.293228 | \n",
+ " 0.656049 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " rooms floor hoa_(BRL) property_tax_(BRL) \\\n",
+ "rooms 1.000000 -0.117345 0.206182 0.595516 \n",
+ "floor -0.117345 1.000000 0.618250 0.048770 \n",
+ "hoa_(BRL) 0.206182 0.618250 1.000000 0.392537 \n",
+ "property_tax_(BRL) 0.595516 0.048770 0.392537 1.000000 \n",
+ "fire_insurance_(BRL) 0.613788 -0.004572 0.293228 0.656049 \n",
+ "\n",
+ " fire_insurance_(BRL) \n",
+ "rooms 0.613788 \n",
+ "floor -0.004572 \n",
+ "hoa_(BRL) 0.293228 \n",
+ "property_tax_(BRL) 0.656049 \n",
+ "fire_insurance_(BRL) 1.000000 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Mutual Information Matrix ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " rooms | \n",
+ " floor | \n",
+ " hoa_(BRL) | \n",
+ " property_tax_(BRL) | \n",
+ " fire_insurance_(BRL) | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | rooms | \n",
+ " 1.000000 | \n",
+ " 0.019668 | \n",
+ " 0.102992 | \n",
+ " 0.118588 | \n",
+ " 0.119550 | \n",
+ "
\n",
+ " \n",
+ " | floor | \n",
+ " 0.019668 | \n",
+ " 1.000000 | \n",
+ " 0.107208 | \n",
+ " 0.014203 | \n",
+ " 0.039879 | \n",
+ "
\n",
+ " \n",
+ " | hoa_(BRL) | \n",
+ " 0.102992 | \n",
+ " 0.107208 | \n",
+ " 1.000000 | \n",
+ " 0.065145 | \n",
+ " 0.066869 | \n",
+ "
\n",
+ " \n",
+ " | property_tax_(BRL) | \n",
+ " 0.118588 | \n",
+ " 0.014203 | \n",
+ " 0.065145 | \n",
+ " 1.000000 | \n",
+ " 0.063229 | \n",
+ "
\n",
+ " \n",
+ " | fire_insurance_(BRL) | \n",
+ " 0.119550 | \n",
+ " 0.039879 | \n",
+ " 0.066869 | \n",
+ " 0.063229 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " rooms floor hoa_(BRL) property_tax_(BRL) \\\n",
+ "rooms 1.000000 0.019668 0.102992 0.118588 \n",
+ "floor 0.019668 1.000000 0.107208 0.014203 \n",
+ "hoa_(BRL) 0.102992 0.107208 1.000000 0.065145 \n",
+ "property_tax_(BRL) 0.118588 0.014203 0.065145 1.000000 \n",
+ "fire_insurance_(BRL) 0.119550 0.039879 0.066869 0.063229 \n",
+ "\n",
+ " fire_insurance_(BRL) \n",
+ "rooms 0.119550 \n",
+ "floor 0.039879 \n",
+ "hoa_(BRL) 0.066869 \n",
+ "property_tax_(BRL) 0.063229 \n",
+ "fire_insurance_(BRL) 1.000000 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Predictor-Target Mutual Information ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " rent_amount_(BRL) | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | fire_insurance_(BRL) | \n",
+ " 0.412287 | \n",
+ "
\n",
+ " \n",
+ " | property_tax_(BRL) | \n",
+ " 0.057195 | \n",
+ "
\n",
+ " \n",
+ " | hoa_(BRL) | \n",
+ " 0.051921 | \n",
+ "
\n",
+ " \n",
+ " | rooms | \n",
+ " 0.031211 | \n",
+ "
\n",
+ " \n",
+ " | floor | \n",
+ " 0.009949 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " rent_amount_(BRL)\n",
+ "fire_insurance_(BRL) 0.412287\n",
+ "property_tax_(BRL) 0.057195\n",
+ "hoa_(BRL) 0.051921\n",
+ "rooms 0.031211\n",
+ "floor 0.009949"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "print(\"Computing predictor correlations...\")\n",
+ "brazilian_correlations = compute_predictor_correlations(\n",
+ " data=df_brazilian,\n",
+ " predictors=brazilian_predictors,\n",
+ " imputed_variables=[brazilian_target],\n",
+ " method=\"all\"\n",
+ ")\n",
+ "\n",
+ "print(\"\\n--- Pearson Correlation Matrix ---\")\n",
+ "display(brazilian_correlations[\"pearson\"])\n",
+ "\n",
+ "print(\"\\n--- Spearman Correlation Matrix ---\")\n",
+ "display(brazilian_correlations[\"spearman\"])\n",
+ "\n",
+ "print(\"\\n--- Mutual Information Matrix ---\")\n",
+ "display(brazilian_correlations[\"mutual_info\"])\n",
+ "\n",
+ "print(\"\\n--- Predictor-Target Mutual Information ---\")\n",
+ "display(brazilian_correlations[\"predictor_target_mi\"].sort_values(\n",
+ " by=brazilian_target, ascending=False\n",
+ "))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Running leave-one-out analysis...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "27335e43812e468c9dd620f897266fdc",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Leave-one-out analysis: 0%| | 0/5 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Leave-One-Out Results ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " predictor_removed | \n",
+ " avg_quantile_loss | \n",
+ " avg_log_loss | \n",
+ " loss_increase | \n",
+ " relative_impact | \n",
+ " baseline_quantile_loss | \n",
+ " baseline_log_loss | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 4 | \n",
+ " fire_insurance_(BRL) | \n",
+ " 662.949094 | \n",
+ " 0 | \n",
+ " 611.600644 | \n",
+ " 1191.079089 | \n",
+ " 51.348449 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " floor | \n",
+ " 56.874794 | \n",
+ " 0 | \n",
+ " 5.526345 | \n",
+ " 10.762437 | \n",
+ " 51.348449 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " hoa_(BRL) | \n",
+ " 53.955997 | \n",
+ " 0 | \n",
+ " 2.607547 | \n",
+ " 5.078142 | \n",
+ " 51.348449 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " property_tax_(BRL) | \n",
+ " 52.573124 | \n",
+ " 0 | \n",
+ " 1.224674 | \n",
+ " 2.385027 | \n",
+ " 51.348449 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 0 | \n",
+ " rooms | \n",
+ " 52.341734 | \n",
+ " 0 | \n",
+ " 0.993285 | \n",
+ " 1.934401 | \n",
+ " 51.348449 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " predictor_removed avg_quantile_loss avg_log_loss loss_increase \\\n",
+ "4 fire_insurance_(BRL) 662.949094 0 611.600644 \n",
+ "1 floor 56.874794 0 5.526345 \n",
+ "2 hoa_(BRL) 53.955997 0 2.607547 \n",
+ "3 property_tax_(BRL) 52.573124 0 1.224674 \n",
+ "0 rooms 52.341734 0 0.993285 \n",
+ "\n",
+ " relative_impact baseline_quantile_loss baseline_log_loss \n",
+ "4 1191.079089 51.348449 0 \n",
+ "1 10.762437 51.348449 0 \n",
+ "2 5.078142 51.348449 0 \n",
+ "3 2.385027 51.348449 0 \n",
+ "0 1.934401 51.348449 0 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "print(\"Running leave-one-out analysis...\")\n",
+ "brazilian_loo = leave_one_out_analysis(\n",
+ " data=df_brazilian,\n",
+ " predictors=brazilian_predictors,\n",
+ " imputed_variables=[brazilian_target],\n",
+ " model_class=QRF,\n",
+ " train_size=0.6,\n",
+ " n_jobs=1\n",
+ ")\n",
+ "\n",
+ "print(\"\\n--- Leave-One-Out Results ---\")\n",
+ "display(brazilian_loo)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Creating 60/40 donor/receiver split...\n",
+ "Donor size: 6415\n",
+ "Receiver size: 4277\n",
+ "\n",
+ "Running autoimpute...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "f796139cda7e4e28a2eab0e7474d88d9",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "AutoImputation progress: 0%| | 0/5 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "f52742daf25e47a2b6916f9df7a1fb0a",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Evaluating models: 0%| | 0/5 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
+ "[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 17.8s remaining: 26.7s\n",
+ "[Parallel(n_jobs=-1)]: Done 3 out of 5 | elapsed: 17.8s remaining: 11.9s\n",
+ "[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 18.0s finished\n",
+ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
+ "[Parallel(n_jobs=-1)]: Batch computation too fast (0.18162083625793457s.) Setting batch_size=2.\n",
+ "[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 0.2s remaining: 0.3s\n",
+ "[Parallel(n_jobs=-1)]: Done 3 out of 5 | elapsed: 0.2s remaining: 0.1s\n",
+ "[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 0.3s finished\n",
+ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
+ "[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 2.7s remaining: 4.1s\n",
+ "[Parallel(n_jobs=-1)]: Done 3 out of 5 | elapsed: 3.2s remaining: 2.1s\n",
+ "[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 3.8s finished\n",
+ "[Parallel(n_jobs=1)]: Done 1 tasks | elapsed: 5.2s\n",
+ "[Parallel(n_jobs=1)]: Done 4 tasks | elapsed: 18.9s\n",
+ "[Parallel(n_jobs=1)]: Done 5 out of 5 | elapsed: 23.4s finished\n",
+ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
+ "[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 4.2s remaining: 6.3s\n",
+ "[Parallel(n_jobs=-1)]: Done 3 out of 5 | elapsed: 4.2s remaining: 2.8s\n",
+ "[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 4.2s finished\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Autoimpute CV Results ---\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.plotly.v1+json": {
+ "config": {
+ "plotlyServerURL": "https://plot.ly"
+ },
+ "data": [
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 3.1072974576558354,
+ 3.173472785580769,
+ 2.845580589626428,
+ 1.7549878759386435,
+ 4.177878882735426,
+ 3.347338117083344,
+ 3.3550935697327104,
+ 3.649098163471762,
+ 4.019663168790427,
+ 4.78874671805982,
+ 6.2170904100295425,
+ 4.460696816140001,
+ 7.5800773055275545,
+ 7.8227182830889435,
+ 7.401020283305162,
+ 10.023676321476369,
+ 9.723242741104022,
+ 10.253533450814857,
+ 11.13805573184919
+ ]
+ },
+ "hovertemplate": "Method=QRF
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "QRF",
+ "marker": {
+ "color": "#88CCEE",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "QRF",
+ "offsetgroup": "QRF",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 27.434123148869837,
+ 35.71683554169915,
+ 43.922470771628994,
+ 49.21275136399065,
+ 55.548480124707716,
+ 60.572860483242394,
+ 65.50799688230708,
+ 66.43289166017146,
+ 67.35795011691349,
+ 65.8282151208106,
+ 64.32813717848792,
+ 62.38007794232267,
+ 60.019088074824616,
+ 56.780280592361656,
+ 52.44731098986749,
+ 47.88885424785658,
+ 42.467918939984415,
+ 37.35066250974279,
+ 29.9317614964926
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 9.557234604861549,
+ 8.956439155330603,
+ 8.289368354662047,
+ 7.596266973236795,
+ 6.9520128338534635,
+ 6.296534342783538,
+ 5.6211587352247445,
+ 4.908122927624412,
+ 3.7955282448487293,
+ 2.818644336351974,
+ 2.09174379627377,
+ 1.8420185929381039,
+ 2.05294500583899,
+ 2.2709424329333614,
+ 2.6048666108839944,
+ 2.8950856016426756,
+ 3.289546046612837,
+ 3.475716713562988,
+ 3.1826312874153015
+ ]
+ },
+ "hovertemplate": "Method=OLS
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "OLS",
+ "marker": {
+ "color": "#CC6677",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "OLS",
+ "offsetgroup": "OLS",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 60.21841618347967,
+ 89.20306359511531,
+ 109.6712176243354,
+ 124.46568009905874,
+ 134.78429758257494,
+ 141.76553013766224,
+ 146.21444277019924,
+ 148.99821816017044,
+ 151.32972095844445,
+ 153.51725530867773,
+ 154.60351178771742,
+ 154.74132339224963,
+ 153.35356004060458,
+ 149.65104502519253,
+ 142.39016058555669,
+ 131.17725926043133,
+ 114.78010041972375,
+ 91.63331439897473,
+ 59.110454376877534
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 8.15670690198403,
+ 7.809585963580716,
+ 5.578023744610601,
+ 5.913426158297302,
+ 8.782705554389233,
+ 2.656850537091216,
+ 2.479605603420727,
+ 10.004387902935983,
+ 15.30913095948508,
+ 15.387617206801348,
+ 10.2752171682886,
+ 8.783492202987343,
+ 7.602765808678525,
+ 5.96440876713662,
+ 5.116530437829269,
+ 15.632955439876456,
+ 31.718604969178777,
+ 3.770810507561032,
+ 3.564229901764414
+ ]
+ },
+ "hovertemplate": "Method=QuantReg
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "QuantReg",
+ "marker": {
+ "color": "#DDCC77",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "QuantReg",
+ "offsetgroup": "QuantReg",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 43.39814190414394,
+ 69.36283126903236,
+ 85.77857204469834,
+ 102.36882753066385,
+ 114.15020625795276,
+ 122.93034306885575,
+ 133.79421716721043,
+ 152.16683197699382,
+ 167.39144465711925,
+ 162.35099337948319,
+ 147.43137042156485,
+ 131.3510126289026,
+ 118.20775772834531,
+ 104.96705980967423,
+ 91.40677696892523,
+ 85.65733745986346,
+ 79.54944934430779,
+ 48.28930320097427,
+ 33.231650551214386
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 20.787085449742314,
+ 18.133934780109254,
+ 15.596770860589444,
+ 13.242429289987362,
+ 11.186939137986156,
+ 9.623735377066708,
+ 8.818614374872773,
+ 8.977897188747919,
+ 10.055859879933255,
+ 11.803431682018678,
+ 13.971566933362238,
+ 16.394247992172986,
+ 18.97422075971559,
+ 21.655339784960336,
+ 24.404291231663677,
+ 27.200516932435963,
+ 30.030814404836313,
+ 32.88638788006087,
+ 35.76118291292973
+ ]
+ },
+ "hovertemplate": "Method=Matching
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "Matching",
+ "marker": {
+ "color": "#117733",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "Matching",
+ "offsetgroup": "Matching",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 354.65874512860483,
+ 359.5553702260328,
+ 364.45199532346066,
+ 369.34862042088855,
+ 374.24524551831644,
+ 379.1418706157443,
+ 384.03849571317227,
+ 388.93512081060015,
+ 393.831745908028,
+ 398.728371005456,
+ 403.6249961028839,
+ 408.5216212003118,
+ 413.41824629773964,
+ 418.3148713951676,
+ 423.2114964925954,
+ 428.1081215900234,
+ 433.0047466874513,
+ 437.9013717848792,
+ 442.7979968823071
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 17.82708018826267,
+ 25.705230261351847,
+ 19.507534048055877,
+ 14.297240685602839,
+ 33.842332189775505,
+ 66.59788185374984,
+ 105.56188466433146,
+ 150.7109411202626,
+ 200.2093336892162,
+ 254.519039676545,
+ 312.86215715520075,
+ 375.71540655326277,
+ 442.0045411281469,
+ 512.7219798864934,
+ 587.260642342192,
+ 667.7304038811378,
+ 753.0074651554911,
+ 845.6548833756536,
+ 947.8124427082157
+ ]
+ },
+ "hovertemplate": "Method=MDN
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "MDN",
+ "marker": {
+ "color": "#332288",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "MDN",
+ "offsetgroup": "MDN",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 211.95363543779845,
+ 406.6583397522585,
+ 588.5803247627703,
+ 760.992829991829,
+ 925.0212772214775,
+ 1080.747056574654,
+ 1230.5342890000138,
+ 1373.8658721081802,
+ 1511.7503385087427,
+ 1643.7695372411265,
+ 1770.2148889244006,
+ 1890.0146683348182,
+ 2004.0096345906543,
+ 2110.7926275320788,
+ 2209.449751827035,
+ 2297.591575550011,
+ 2372.695563690352,
+ 2427.2053368165393,
+ 2443.2381622618914
+ ],
+ "yaxis": "y"
+ }
+ ],
+ "layout": {
+ "barmode": "group",
+ "height": 600,
+ "legend": {
+ "title": {
+ "text": "Method"
+ },
+ "tracegroupgap": 0
+ },
+ "paper_bgcolor": "#FAFAFA",
+ "plot_bgcolor": "#FAFAFA",
+ "shapes": [
+ {
+ "line": {
+ "color": "#88CCEE",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "QRF Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 52.16466669401485,
+ "y1": 52.16466669401485
+ },
+ {
+ "line": {
+ "color": "#CC6677",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "OLS Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 126.92676693194979,
+ "y1": 126.92676693194979
+ },
+ {
+ "line": {
+ "color": "#DDCC77",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "QuantReg Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 104.9360067036803,
+ "y1": 104.9360067036803
+ },
+ {
+ "line": {
+ "color": "#117733",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "Matching Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 398.72837100545604,
+ "y1": 398.72837100545604
+ },
+ {
+ "line": {
+ "color": "#332288",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "MDN Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 1539.9518794803491,
+ "y1": 1539.9518794803491
+ }
+ ],
+ "template": {
+ "data": {
+ "bar": [
+ {
+ "error_x": {
+ "color": "#2a3f5f"
+ },
+ "error_y": {
+ "color": "#2a3f5f"
+ },
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "bar"
+ }
+ ],
+ "barpolar": [
+ {
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "barpolar"
+ }
+ ],
+ "carpet": [
+ {
+ "aaxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "baxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "type": "carpet"
+ }
+ ],
+ "choropleth": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "choropleth"
+ }
+ ],
+ "contour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "contour"
+ }
+ ],
+ "contourcarpet": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "contourcarpet"
+ }
+ ],
+ "heatmap": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmap"
+ }
+ ],
+ "heatmapgl": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmapgl"
+ }
+ ],
+ "histogram": [
+ {
+ "marker": {
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "histogram"
+ }
+ ],
+ "histogram2d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2d"
+ }
+ ],
+ "histogram2dcontour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2dcontour"
+ }
+ ],
+ "mesh3d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "mesh3d"
+ }
+ ],
+ "parcoords": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "parcoords"
+ }
+ ],
+ "pie": [
+ {
+ "automargin": true,
+ "type": "pie"
+ }
+ ],
+ "scatter": [
+ {
+ "fillpattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ },
+ "type": "scatter"
+ }
+ ],
+ "scatter3d": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter3d"
+ }
+ ],
+ "scattercarpet": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattercarpet"
+ }
+ ],
+ "scattergeo": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergeo"
+ }
+ ],
+ "scattergl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergl"
+ }
+ ],
+ "scattermapbox": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermapbox"
+ }
+ ],
+ "scatterpolar": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolar"
+ }
+ ],
+ "scatterpolargl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolargl"
+ }
+ ],
+ "scatterternary": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterternary"
+ }
+ ],
+ "surface": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "surface"
+ }
+ ],
+ "table": [
+ {
+ "cells": {
+ "fill": {
+ "color": "#EBF0F8"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "header": {
+ "fill": {
+ "color": "#C8D4E3"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "type": "table"
+ }
+ ]
+ },
+ "layout": {
+ "annotationdefaults": {
+ "arrowcolor": "#2a3f5f",
+ "arrowhead": 0,
+ "arrowwidth": 1
+ },
+ "autotypenumbers": "strict",
+ "coloraxis": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "colorscale": {
+ "diverging": [
+ [
+ 0,
+ "#8e0152"
+ ],
+ [
+ 0.1,
+ "#c51b7d"
+ ],
+ [
+ 0.2,
+ "#de77ae"
+ ],
+ [
+ 0.3,
+ "#f1b6da"
+ ],
+ [
+ 0.4,
+ "#fde0ef"
+ ],
+ [
+ 0.5,
+ "#f7f7f7"
+ ],
+ [
+ 0.6,
+ "#e6f5d0"
+ ],
+ [
+ 0.7,
+ "#b8e186"
+ ],
+ [
+ 0.8,
+ "#7fbc41"
+ ],
+ [
+ 0.9,
+ "#4d9221"
+ ],
+ [
+ 1,
+ "#276419"
+ ]
+ ],
+ "sequential": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "sequentialminus": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ]
+ },
+ "colorway": [
+ "#636efa",
+ "#EF553B",
+ "#00cc96",
+ "#ab63fa",
+ "#FFA15A",
+ "#19d3f3",
+ "#FF6692",
+ "#B6E880",
+ "#FF97FF",
+ "#FECB52"
+ ],
+ "font": {
+ "color": "#2a3f5f"
+ },
+ "geo": {
+ "bgcolor": "white",
+ "lakecolor": "white",
+ "landcolor": "#E5ECF6",
+ "showlakes": true,
+ "showland": true,
+ "subunitcolor": "white"
+ },
+ "hoverlabel": {
+ "align": "left"
+ },
+ "hovermode": "closest",
+ "mapbox": {
+ "style": "light"
+ },
+ "paper_bgcolor": "white",
+ "plot_bgcolor": "#E5ECF6",
+ "polar": {
+ "angularaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "radialaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "scene": {
+ "xaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "yaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "zaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ }
+ },
+ "shapedefaults": {
+ "line": {
+ "color": "#2a3f5f"
+ }
+ },
+ "ternary": {
+ "aaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "baxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "caxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "title": {
+ "x": 0.05
+ },
+ "xaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ },
+ "yaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ }
+ }
+ },
+ "title": {
+ "font": {
+ "size": 14
+ },
+ "text": "Brazilian Houses Dataset Benchmarking Results"
+ },
+ "width": 750,
+ "xaxis": {
+ "anchor": "y",
+ "domain": [
+ 0,
+ 1
+ ],
+ "gridcolor": "#E5E5E5",
+ "gridwidth": 1,
+ "linecolor": "#CCCCCC",
+ "showgrid": false,
+ "showline": true,
+ "title": {
+ "font": {
+ "size": 12
+ },
+ "text": "Quantiles"
+ },
+ "zeroline": false
+ },
+ "yaxis": {
+ "anchor": "x",
+ "domain": [
+ 0,
+ 1
+ ],
+ "gridcolor": "#E5E5E5",
+ "gridwidth": 1,
+ "linecolor": "#CCCCCC",
+ "showgrid": true,
+ "showline": true,
+ "title": {
+ "font": {
+ "size": 12
+ },
+ "text": "Quantile loss"
+ },
+ "zeroline": false
+ }
+ }
+ }
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "print(\"Creating 60/40 donor/receiver split...\")\n",
+ "brazilian_donor, brazilian_receiver = train_test_split(\n",
+ " df_brazilian, train_size=0.6, random_state=42\n",
+ ")\n",
+ "\n",
+ "brazilian_receiver_no_target = brazilian_receiver.drop(columns=[brazilian_target])\n",
+ "\n",
+ "print(f\"Donor size: {len(brazilian_donor)}\")\n",
+ "print(f\"Receiver size: {len(brazilian_receiver)}\")\n",
+ "\n",
+ "print(\"\\nRunning autoimpute...\")\n",
+ "brazilian_result = autoimpute(\n",
+ " donor_data=brazilian_donor,\n",
+ " receiver_data=brazilian_receiver_no_target.copy(),\n",
+ " predictors=brazilian_predictors,\n",
+ " imputed_variables=[brazilian_target],\n",
+ " impute_all=True,\n",
+ " log_level=\"INFO\"\n",
+ ")\n",
+ "\n",
+ "print(\"\\n--- Autoimpute CV Results ---\")\n",
+ "comparison_viz = method_comparison_results(\n",
+ " data=brazilian_result.cv_results,\n",
+ " metric=\"quantile_loss\",\n",
+ ")\n",
+ "fig = comparison_viz.plot(\n",
+ " title=\"Brazilian Houses Dataset Benchmarking Results\",\n",
+ " show_mean=True,\n",
+ ")\n",
+ "fig.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Autoimpute CV Results (Zoomed In Without MDN) ---\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.plotly.v1+json": {
+ "config": {
+ "plotlyServerURL": "https://plot.ly"
+ },
+ "data": [
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 3.1072974576558354,
+ 3.173472785580769,
+ 2.845580589626428,
+ 1.7549878759386435,
+ 4.177878882735426,
+ 3.347338117083344,
+ 3.3550935697327104,
+ 3.649098163471762,
+ 4.019663168790427,
+ 4.78874671805982,
+ 6.2170904100295425,
+ 4.460696816140001,
+ 7.5800773055275545,
+ 7.8227182830889435,
+ 7.401020283305162,
+ 10.023676321476369,
+ 9.723242741104022,
+ 10.253533450814857,
+ 11.13805573184919
+ ]
+ },
+ "hovertemplate": "Method=QRF
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "QRF",
+ "marker": {
+ "color": "#88CCEE",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "QRF",
+ "offsetgroup": "QRF",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 27.434123148869837,
+ 35.71683554169915,
+ 43.922470771628994,
+ 49.21275136399065,
+ 55.548480124707716,
+ 60.572860483242394,
+ 65.50799688230708,
+ 66.43289166017146,
+ 67.35795011691349,
+ 65.8282151208106,
+ 64.32813717848792,
+ 62.38007794232267,
+ 60.019088074824616,
+ 56.780280592361656,
+ 52.44731098986749,
+ 47.88885424785658,
+ 42.467918939984415,
+ 37.35066250974279,
+ 29.9317614964926
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 9.557234604861549,
+ 8.956439155330603,
+ 8.289368354662047,
+ 7.596266973236795,
+ 6.9520128338534635,
+ 6.296534342783538,
+ 5.6211587352247445,
+ 4.908122927624412,
+ 3.7955282448487293,
+ 2.818644336351974,
+ 2.09174379627377,
+ 1.8420185929381039,
+ 2.05294500583899,
+ 2.2709424329333614,
+ 2.6048666108839944,
+ 2.8950856016426756,
+ 3.289546046612837,
+ 3.475716713562988,
+ 3.1826312874153015
+ ]
+ },
+ "hovertemplate": "Method=OLS
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "OLS",
+ "marker": {
+ "color": "#CC6677",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "OLS",
+ "offsetgroup": "OLS",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 60.21841618347967,
+ 89.20306359511531,
+ 109.6712176243354,
+ 124.46568009905874,
+ 134.78429758257494,
+ 141.76553013766224,
+ 146.21444277019924,
+ 148.99821816017044,
+ 151.32972095844445,
+ 153.51725530867773,
+ 154.60351178771742,
+ 154.74132339224963,
+ 153.35356004060458,
+ 149.65104502519253,
+ 142.39016058555669,
+ 131.17725926043133,
+ 114.78010041972375,
+ 91.63331439897473,
+ 59.110454376877534
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 8.15670690198403,
+ 7.809585963580716,
+ 5.578023744610601,
+ 5.913426158297302,
+ 8.782705554389233,
+ 2.656850537091216,
+ 2.479605603420727,
+ 10.004387902935983,
+ 15.30913095948508,
+ 15.387617206801348,
+ 10.2752171682886,
+ 8.783492202987343,
+ 7.602765808678525,
+ 5.96440876713662,
+ 5.116530437829269,
+ 15.632955439876456,
+ 31.718604969178777,
+ 3.770810507561032,
+ 3.564229901764414
+ ]
+ },
+ "hovertemplate": "Method=QuantReg
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "QuantReg",
+ "marker": {
+ "color": "#DDCC77",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "QuantReg",
+ "offsetgroup": "QuantReg",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 43.39814190414394,
+ 69.36283126903236,
+ 85.77857204469834,
+ 102.36882753066385,
+ 114.15020625795276,
+ 122.93034306885575,
+ 133.79421716721043,
+ 152.16683197699382,
+ 167.39144465711925,
+ 162.35099337948319,
+ 147.43137042156485,
+ 131.3510126289026,
+ 118.20775772834531,
+ 104.96705980967423,
+ 91.40677696892523,
+ 85.65733745986346,
+ 79.54944934430779,
+ 48.28930320097427,
+ 33.231650551214386
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 20.787085449742314,
+ 18.133934780109254,
+ 15.596770860589444,
+ 13.242429289987362,
+ 11.186939137986156,
+ 9.623735377066708,
+ 8.818614374872773,
+ 8.977897188747919,
+ 10.055859879933255,
+ 11.803431682018678,
+ 13.971566933362238,
+ 16.394247992172986,
+ 18.97422075971559,
+ 21.655339784960336,
+ 24.404291231663677,
+ 27.200516932435963,
+ 30.030814404836313,
+ 32.88638788006087,
+ 35.76118291292973
+ ]
+ },
+ "hovertemplate": "Method=Matching
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "Matching",
+ "marker": {
+ "color": "#117733",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "Matching",
+ "offsetgroup": "Matching",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 354.65874512860483,
+ 359.5553702260328,
+ 364.45199532346066,
+ 369.34862042088855,
+ 374.24524551831644,
+ 379.1418706157443,
+ 384.03849571317227,
+ 388.93512081060015,
+ 393.831745908028,
+ 398.728371005456,
+ 403.6249961028839,
+ 408.5216212003118,
+ 413.41824629773964,
+ 418.3148713951676,
+ 423.2114964925954,
+ 428.1081215900234,
+ 433.0047466874513,
+ 437.9013717848792,
+ 442.7979968823071
+ ],
+ "yaxis": "y"
+ }
+ ],
+ "layout": {
+ "barmode": "group",
+ "height": 600,
+ "legend": {
+ "title": {
+ "text": "Method"
+ },
+ "tracegroupgap": 0
+ },
+ "paper_bgcolor": "#FAFAFA",
+ "plot_bgcolor": "#FAFAFA",
+ "shapes": [
+ {
+ "line": {
+ "color": "#88CCEE",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "QRF Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 52.16466669401485,
+ "y1": 52.16466669401485
+ },
+ {
+ "line": {
+ "color": "#CC6677",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "OLS Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 126.92676693194979,
+ "y1": 126.92676693194979
+ },
+ {
+ "line": {
+ "color": "#DDCC77",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "QuantReg Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 104.9360067036803,
+ "y1": 104.9360067036803
+ },
+ {
+ "line": {
+ "color": "#117733",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "Matching Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 398.72837100545604,
+ "y1": 398.72837100545604
+ }
+ ],
+ "template": {
+ "data": {
+ "bar": [
+ {
+ "error_x": {
+ "color": "#2a3f5f"
+ },
+ "error_y": {
+ "color": "#2a3f5f"
+ },
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "bar"
+ }
+ ],
+ "barpolar": [
+ {
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "barpolar"
+ }
+ ],
+ "carpet": [
+ {
+ "aaxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "baxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "type": "carpet"
+ }
+ ],
+ "choropleth": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "choropleth"
+ }
+ ],
+ "contour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "contour"
+ }
+ ],
+ "contourcarpet": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "contourcarpet"
+ }
+ ],
+ "heatmap": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmap"
+ }
+ ],
+ "heatmapgl": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmapgl"
+ }
+ ],
+ "histogram": [
+ {
+ "marker": {
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "histogram"
+ }
+ ],
+ "histogram2d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2d"
+ }
+ ],
+ "histogram2dcontour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2dcontour"
+ }
+ ],
+ "mesh3d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "mesh3d"
+ }
+ ],
+ "parcoords": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "parcoords"
+ }
+ ],
+ "pie": [
+ {
+ "automargin": true,
+ "type": "pie"
+ }
+ ],
+ "scatter": [
+ {
+ "fillpattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ },
+ "type": "scatter"
+ }
+ ],
+ "scatter3d": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter3d"
+ }
+ ],
+ "scattercarpet": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattercarpet"
+ }
+ ],
+ "scattergeo": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergeo"
+ }
+ ],
+ "scattergl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergl"
+ }
+ ],
+ "scattermapbox": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermapbox"
+ }
+ ],
+ "scatterpolar": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolar"
+ }
+ ],
+ "scatterpolargl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolargl"
+ }
+ ],
+ "scatterternary": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterternary"
+ }
+ ],
+ "surface": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "surface"
+ }
+ ],
+ "table": [
+ {
+ "cells": {
+ "fill": {
+ "color": "#EBF0F8"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "header": {
+ "fill": {
+ "color": "#C8D4E3"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "type": "table"
+ }
+ ]
+ },
+ "layout": {
+ "annotationdefaults": {
+ "arrowcolor": "#2a3f5f",
+ "arrowhead": 0,
+ "arrowwidth": 1
+ },
+ "autotypenumbers": "strict",
+ "coloraxis": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "colorscale": {
+ "diverging": [
+ [
+ 0,
+ "#8e0152"
+ ],
+ [
+ 0.1,
+ "#c51b7d"
+ ],
+ [
+ 0.2,
+ "#de77ae"
+ ],
+ [
+ 0.3,
+ "#f1b6da"
+ ],
+ [
+ 0.4,
+ "#fde0ef"
+ ],
+ [
+ 0.5,
+ "#f7f7f7"
+ ],
+ [
+ 0.6,
+ "#e6f5d0"
+ ],
+ [
+ 0.7,
+ "#b8e186"
+ ],
+ [
+ 0.8,
+ "#7fbc41"
+ ],
+ [
+ 0.9,
+ "#4d9221"
+ ],
+ [
+ 1,
+ "#276419"
+ ]
+ ],
+ "sequential": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "sequentialminus": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ]
+ },
+ "colorway": [
+ "#636efa",
+ "#EF553B",
+ "#00cc96",
+ "#ab63fa",
+ "#FFA15A",
+ "#19d3f3",
+ "#FF6692",
+ "#B6E880",
+ "#FF97FF",
+ "#FECB52"
+ ],
+ "font": {
+ "color": "#2a3f5f"
+ },
+ "geo": {
+ "bgcolor": "white",
+ "lakecolor": "white",
+ "landcolor": "#E5ECF6",
+ "showlakes": true,
+ "showland": true,
+ "subunitcolor": "white"
+ },
+ "hoverlabel": {
+ "align": "left"
+ },
+ "hovermode": "closest",
+ "mapbox": {
+ "style": "light"
+ },
+ "paper_bgcolor": "white",
+ "plot_bgcolor": "#E5ECF6",
+ "polar": {
+ "angularaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "radialaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "scene": {
+ "xaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "yaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "zaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ }
+ },
+ "shapedefaults": {
+ "line": {
+ "color": "#2a3f5f"
+ }
+ },
+ "ternary": {
+ "aaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "baxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "caxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "title": {
+ "x": 0.05
+ },
+ "xaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ },
+ "yaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ }
+ }
+ },
+ "title": {
+ "font": {
+ "size": 14
+ },
+ "text": "Brazilian Houses Dataset Benchmarking Results"
+ },
+ "width": 750,
+ "xaxis": {
+ "anchor": "y",
+ "domain": [
+ 0,
+ 1
+ ],
+ "gridcolor": "#E5E5E5",
+ "gridwidth": 1,
+ "linecolor": "#CCCCCC",
+ "showgrid": false,
+ "showline": true,
+ "title": {
+ "font": {
+ "size": 12
+ },
+ "text": "Quantiles"
+ },
+ "zeroline": false
+ },
+ "yaxis": {
+ "anchor": "x",
+ "domain": [
+ 0,
+ 1
+ ],
+ "gridcolor": "#E5E5E5",
+ "gridwidth": 1,
+ "linecolor": "#CCCCCC",
+ "showgrid": true,
+ "showline": true,
+ "title": {
+ "font": {
+ "size": 12
+ },
+ "text": "Quantile loss"
+ },
+ "zeroline": false
+ }
+ }
+ }
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "brazilian_result.cv_results.pop('MDN', None)\n",
+ "\n",
+ "print(\"\\n--- Autoimpute CV Results (Zoomed In Without MDN) ---\")\n",
+ "comparison_viz = method_comparison_results(\n",
+ " data=brazilian_result.cv_results,\n",
+ " metric=\"quantile_loss\",\n",
+ ")\n",
+ "fig = comparison_viz.plot(\n",
+ " title=\"Brazilian Houses Dataset Benchmarking Results\",\n",
+ " show_mean=True,\n",
+ ")\n",
+ "fig.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Distribution Comparison: Imputed vs Ground Truth (Brazilian Houses) ---\n",
+ "QRF: Wasserstein Distance = 27.1747\n",
+ "OLS: Wasserstein Distance = 99.3468\n",
+ "QuantReg: Wasserstein Distance = 112.3643\n",
+ "Matching: Wasserstein Distance = 120.4604\n",
+ "MDN: Wasserstein Distance = 3378.6172\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Variable | \n",
+ " Metric | \n",
+ " Distance | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " rent_amount_(BRL) | \n",
+ " wasserstein_distance | \n",
+ " 27.174655 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Variable Metric Distance\n",
+ "0 rent_amount_(BRL) wasserstein_distance 27.174655"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "print(\"\\n--- Distribution Comparison: Imputed vs Ground Truth (Brazilian Houses) ---\")\n",
+ "# Store Wasserstein distances for all methods\n",
+ "brazilian_wasserstein = {}\n",
+ "\n",
+ "distribution_comparison_brazilian = compare_distributions(\n",
+ " donor_data=brazilian_receiver, # Ground truth test split\n",
+ " receiver_data=brazilian_result.receiver_data, # Imputed values\n",
+ " imputed_variables=[brazilian_target],\n",
+ ")\n",
+ "best_method_name = brazilian_result.fitted_models[\"best_method\"].__class__.__name__ \n",
+ "best_method_name = best_method_name.replace(\"Results\", \"\")\n",
+ "brazilian_wasserstein[best_method_name] = distribution_comparison_brazilian[\n",
+ " distribution_comparison_brazilian['Metric'] == 'wasserstein_distance'\n",
+ "]['Distance'].values[0]\n",
+ "print(f\"{best_method_name}: Wasserstein Distance = {brazilian_wasserstein.get(best_method_name, 'N/A'):.4f}\")\n",
+ " \n",
+ "for method_name, imputations in brazilian_result.imputations.items():\n",
+ " # Skip the 'best_method' (it's a duplicate)\n",
+ " if method_name == 'best_method': \n",
+ " continue\n",
+ " \n",
+ " # Create a copy of receiver data with this method's imputations \n",
+ " receiver_with_imputations = brazilian_receiver_no_target.copy() \n",
+ " \n",
+ " # Handle both dict (quantile->DataFrame) and DataFrame formats \n",
+ " if isinstance(imputations, dict): \n",
+ " # Get median quantile (0.5) imputations \n",
+ " imp_df = imputations.get(0.5, list(imputations.values())[0]) \n",
+ " else: \n",
+ " imp_df = imputations\n",
+ " \n",
+ " # Add imputed values \n",
+ " for var in [brazilian_target]:\n",
+ " if var in imp_df.columns: \n",
+ " receiver_with_imputations[var] = imp_df[var].values \n",
+ " \n",
+ " # Calculate distribution comparison \n",
+ " dist_comparison = compare_distributions( \n",
+ " donor_data=brazilian_receiver, # Ground truth \n",
+ " receiver_data=receiver_with_imputations,\n",
+ " imputed_variables=[brazilian_target], \n",
+ " )\n",
+ "\n",
+ " # Extract Wasserstein distance \n",
+ " wd = dist_comparison[dist_comparison['Metric'] == 'wasserstein_distance']['Distance'].values \n",
+ " brazilian_wasserstein[method_name] = wd[0]\n",
+ " \n",
+ " print(f\"{method_name}: Wasserstein Distance = {brazilian_wasserstein.get(method_name, 'N/A'):.4f}\")\n",
+ " \n",
+ "# Display full comparison for best method\n",
+ "display(distribution_comparison_brazilian)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### CIA Sensitivity Analysis\n",
+ "\n",
+ "Measuring sensitivity to the Conditional Independence Assumption by progressively removing predictors."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Running CIA sensitivity analysis...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "03a9802aea964ed0965c0e243041d378",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/5 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " QRF sensitivity score: 14.692\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "3b3368f6597845839f7017f6885372e5",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/5 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " OLS sensitivity score: 5.602\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "1eac1e16dbeb4d9d89a5e3b598a31d54",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/5 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " QuantReg sensitivity score: 7.268\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "d4688227d0ad427da9618dc097141c17",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/5 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " Matching sensitivity score: 3.060\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "61ee8fbc3e754440b7e7e6aed8500657",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/5 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " MDN sensitivity score: 1.004\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.plotly.v1+json": {
+ "config": {
+ "plotlyServerURL": "https://plot.ly"
+ },
+ "data": [
+ {
+ "line": {
+ "color": "#88CCEE",
+ "width": 2
+ },
+ "marker": {
+ "color": "#88CCEE",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "QRF (baseline=45.7417)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4
+ ],
+ "y": [
+ 45.7416736792894,
+ 623.8108929406264,
+ 709.1143057503507,
+ 804.0901168770454,
+ 1056.5132024310426
+ ]
+ },
+ {
+ "line": {
+ "color": "#CC6677",
+ "width": 2
+ },
+ "marker": {
+ "color": "#CC6677",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "OLS (baseline=123.5432)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4
+ ],
+ "y": [
+ 123.54322716393965,
+ 746.9635077743502,
+ 750.4300176323405,
+ 751.0494055667007,
+ 915.9085603679762
+ ]
+ },
+ {
+ "line": {
+ "color": "#DDCC77",
+ "width": 2
+ },
+ "marker": {
+ "color": "#DDCC77",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "QuantReg (baseline=91.1461)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4
+ ],
+ "y": [
+ 91.14614542283971,
+ 720.2594169403153,
+ 783.1259101387384,
+ 679.5062916677432,
+ 842.4182299709439
+ ]
+ },
+ {
+ "line": {
+ "color": "#117733",
+ "width": 2
+ },
+ "marker": {
+ "color": "#117733",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "Matching (baseline=333.5215)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4
+ ],
+ "y": [
+ 333.52150537634407,
+ 894.8218793828892,
+ 997.1390836839644,
+ 1226.3321645628798,
+ 1593.557737260402
+ ]
+ },
+ {
+ "line": {
+ "color": "#332288",
+ "width": 2
+ },
+ "marker": {
+ "color": "#332288",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "MDN (baseline=1479.5715)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4
+ ],
+ "y": [
+ 1479.5714570515665,
+ 1332.5799732178648,
+ 1487.9663522328165,
+ 1679.526102480061,
+ 1409.471275445297
+ ]
+ }
+ ],
+ "layout": {
+ "height": 600,
+ "hovermode": "x unified",
+ "legend": {
+ "x": 0.01,
+ "xanchor": "left",
+ "y": 0.99,
+ "yanchor": "top"
+ },
+ "paper_bgcolor": "#FAFAFA",
+ "plot_bgcolor": "#FAFAFA",
+ "template": {
+ "data": {
+ "bar": [
+ {
+ "error_x": {
+ "color": "#2a3f5f"
+ },
+ "error_y": {
+ "color": "#2a3f5f"
+ },
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "bar"
+ }
+ ],
+ "barpolar": [
+ {
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "barpolar"
+ }
+ ],
+ "carpet": [
+ {
+ "aaxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "baxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "type": "carpet"
+ }
+ ],
+ "choropleth": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "choropleth"
+ }
+ ],
+ "contour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "contour"
+ }
+ ],
+ "contourcarpet": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "contourcarpet"
+ }
+ ],
+ "heatmap": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmap"
+ }
+ ],
+ "heatmapgl": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmapgl"
+ }
+ ],
+ "histogram": [
+ {
+ "marker": {
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "histogram"
+ }
+ ],
+ "histogram2d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2d"
+ }
+ ],
+ "histogram2dcontour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2dcontour"
+ }
+ ],
+ "mesh3d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "mesh3d"
+ }
+ ],
+ "parcoords": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "parcoords"
+ }
+ ],
+ "pie": [
+ {
+ "automargin": true,
+ "type": "pie"
+ }
+ ],
+ "scatter": [
+ {
+ "fillpattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ },
+ "type": "scatter"
+ }
+ ],
+ "scatter3d": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter3d"
+ }
+ ],
+ "scattercarpet": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattercarpet"
+ }
+ ],
+ "scattergeo": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergeo"
+ }
+ ],
+ "scattergl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergl"
+ }
+ ],
+ "scattermapbox": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermapbox"
+ }
+ ],
+ "scatterpolar": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolar"
+ }
+ ],
+ "scatterpolargl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolargl"
+ }
+ ],
+ "scatterternary": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterternary"
+ }
+ ],
+ "surface": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "surface"
+ }
+ ],
+ "table": [
+ {
+ "cells": {
+ "fill": {
+ "color": "#EBF0F8"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "header": {
+ "fill": {
+ "color": "#C8D4E3"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "type": "table"
+ }
+ ]
+ },
+ "layout": {
+ "annotationdefaults": {
+ "arrowcolor": "#2a3f5f",
+ "arrowhead": 0,
+ "arrowwidth": 1
+ },
+ "autotypenumbers": "strict",
+ "coloraxis": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "colorscale": {
+ "diverging": [
+ [
+ 0,
+ "#8e0152"
+ ],
+ [
+ 0.1,
+ "#c51b7d"
+ ],
+ [
+ 0.2,
+ "#de77ae"
+ ],
+ [
+ 0.3,
+ "#f1b6da"
+ ],
+ [
+ 0.4,
+ "#fde0ef"
+ ],
+ [
+ 0.5,
+ "#f7f7f7"
+ ],
+ [
+ 0.6,
+ "#e6f5d0"
+ ],
+ [
+ 0.7,
+ "#b8e186"
+ ],
+ [
+ 0.8,
+ "#7fbc41"
+ ],
+ [
+ 0.9,
+ "#4d9221"
+ ],
+ [
+ 1,
+ "#276419"
+ ]
+ ],
+ "sequential": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "sequentialminus": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ]
+ },
+ "colorway": [
+ "#636efa",
+ "#EF553B",
+ "#00cc96",
+ "#ab63fa",
+ "#FFA15A",
+ "#19d3f3",
+ "#FF6692",
+ "#B6E880",
+ "#FF97FF",
+ "#FECB52"
+ ],
+ "font": {
+ "color": "#2a3f5f"
+ },
+ "geo": {
+ "bgcolor": "white",
+ "lakecolor": "white",
+ "landcolor": "#E5ECF6",
+ "showlakes": true,
+ "showland": true,
+ "subunitcolor": "white"
+ },
+ "hoverlabel": {
+ "align": "left"
+ },
+ "hovermode": "closest",
+ "mapbox": {
+ "style": "light"
+ },
+ "paper_bgcolor": "white",
+ "plot_bgcolor": "#E5ECF6",
+ "polar": {
+ "angularaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "radialaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "scene": {
+ "xaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "yaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "zaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ }
+ },
+ "shapedefaults": {
+ "line": {
+ "color": "#2a3f5f"
+ }
+ },
+ "ternary": {
+ "aaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "baxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "caxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "title": {
+ "x": 0.05
+ },
+ "xaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ },
+ "yaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ }
+ }
+ },
+ "title": {
+ "text": "CIA Sensitivity: Brazilian Houses"
+ },
+ "width": 750,
+ "xaxis": {
+ "gridcolor": "#E5E5E5",
+ "linecolor": "#CCCCCC",
+ "showgrid": false,
+ "showline": true,
+ "title": {
+ "text": "Number of Predictors Removed"
+ }
+ },
+ "yaxis": {
+ "gridcolor": "#E5E5E5",
+ "linecolor": "#CCCCCC",
+ "showgrid": true,
+ "showline": true,
+ "title": {
+ "text": "Quantile Loss"
+ }
+ }
+ }
+ }
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Sensitivity Scores (higher = more sensitive to predictor removal):\n",
+ " QRF: 14.692\n",
+ " OLS: 5.602\n",
+ " QuantReg: 7.268\n",
+ " Matching: 3.060\n",
+ " MDN: 1.004\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"Running CIA sensitivity analysis...\")\n",
+ "brazilian_cia_results = run_cia_analysis_for_dataset(\n",
+ " data=df_brazilian,\n",
+ " predictors=brazilian_predictors,\n",
+ " imputed_variables=[brazilian_target],\n",
+ " ordering=\"mutual_info\",\n",
+ ")\n",
+ "\n",
+ "# Plot degradation curves\n",
+ "brazilian_cia_fig = plot_cia_degradation_curves(\n",
+ " brazilian_cia_results,\n",
+ " title=\"CIA Sensitivity: Brazilian Houses\"\n",
+ ")\n",
+ "brazilian_cia_fig.show()\n",
+ "\n",
+ "# Display sensitivity scores\n",
+ "print(\"\\nSensitivity Scores (higher = more sensitive to predictor removal):\")\n",
+ "for method, results in brazilian_cia_results.items():\n",
+ " print(f\" {method}: {results['sensitivity_score']:.3f}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "## 4. Online News Popularity Dataset\n",
+ "\n",
+ "News article features and social media shares."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Shape: (39644, 60)\n",
+ "\n",
+ "Column names (first 30 and last 10):\n",
+ " 1. timedelta\n",
+ " 2. n_tokens_title\n",
+ " 3. n_tokens_content\n",
+ " 4. n_unique_tokens\n",
+ " 5. n_non_stop_words\n",
+ " 6. n_non_stop_unique_tokens\n",
+ " 7. num_hrefs\n",
+ " 8. num_self_hrefs\n",
+ " 9. num_imgs\n",
+ " 10. num_videos\n",
+ " 11. average_token_length\n",
+ " 12. num_keywords\n",
+ " 13. data_channel_is_lifestyle\n",
+ " 14. data_channel_is_entertainment\n",
+ " 15. data_channel_is_bus\n",
+ " 16. data_channel_is_socmed\n",
+ " 17. data_channel_is_tech\n",
+ " 18. data_channel_is_world\n",
+ " 19. kw_min_min\n",
+ " 20. kw_max_min\n",
+ " 21. kw_avg_min\n",
+ " 22. kw_min_max\n",
+ " 23. kw_max_max\n",
+ " 24. kw_avg_max\n",
+ " 25. kw_min_avg\n",
+ " 26. kw_max_avg\n",
+ " 27. kw_avg_avg\n",
+ " 28. self_reference_min_shares\n",
+ " 29. self_reference_max_shares\n",
+ " 30. self_reference_avg_sharess\n",
+ " ...\n",
+ " 51. min_positive_polarity\n",
+ " 52. max_positive_polarity\n",
+ " 53. avg_negative_polarity\n",
+ " 54. min_negative_polarity\n",
+ " 55. max_negative_polarity\n",
+ " 56. title_subjectivity\n",
+ " 57. title_sentiment_polarity\n",
+ " 58. abs_title_subjectivity\n",
+ " 59. abs_title_sentiment_polarity\n",
+ " 60. shares\n",
+ "\n",
+ "Data types (sample):\n",
+ "timedelta float64\n",
+ "n_tokens_title int64\n",
+ "n_tokens_content float64\n",
+ "n_unique_tokens float64\n",
+ "n_non_stop_words float64\n",
+ "n_non_stop_unique_tokens float64\n",
+ "num_hrefs float64\n",
+ "num_self_hrefs int64\n",
+ "num_imgs int64\n",
+ "num_videos int64\n",
+ "dtype: object\n",
+ "\n",
+ "First few rows:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " timedelta | \n",
+ " n_tokens_title | \n",
+ " n_tokens_content | \n",
+ " n_unique_tokens | \n",
+ " n_non_stop_words | \n",
+ " n_non_stop_unique_tokens | \n",
+ " num_hrefs | \n",
+ " num_self_hrefs | \n",
+ " num_imgs | \n",
+ " num_videos | \n",
+ " average_token_length | \n",
+ " num_keywords | \n",
+ " data_channel_is_lifestyle | \n",
+ " data_channel_is_entertainment | \n",
+ " data_channel_is_bus | \n",
+ " data_channel_is_socmed | \n",
+ " data_channel_is_tech | \n",
+ " data_channel_is_world | \n",
+ " kw_min_min | \n",
+ " kw_max_min | \n",
+ " kw_avg_min | \n",
+ " kw_min_max | \n",
+ " kw_max_max | \n",
+ " kw_avg_max | \n",
+ " kw_min_avg | \n",
+ " kw_max_avg | \n",
+ " kw_avg_avg | \n",
+ " self_reference_min_shares | \n",
+ " self_reference_max_shares | \n",
+ " self_reference_avg_sharess | \n",
+ " weekday_is_monday | \n",
+ " weekday_is_tuesday | \n",
+ " weekday_is_wednesday | \n",
+ " weekday_is_thursday | \n",
+ " weekday_is_friday | \n",
+ " weekday_is_saturday | \n",
+ " weekday_is_sunday | \n",
+ " is_weekend | \n",
+ " LDA_00 | \n",
+ " LDA_01 | \n",
+ " LDA_02 | \n",
+ " LDA_03 | \n",
+ " LDA_04 | \n",
+ " global_subjectivity | \n",
+ " global_sentiment_polarity | \n",
+ " global_rate_positive_words | \n",
+ " global_rate_negative_words | \n",
+ " rate_positive_words | \n",
+ " rate_negative_words | \n",
+ " avg_positive_polarity | \n",
+ " min_positive_polarity | \n",
+ " max_positive_polarity | \n",
+ " avg_negative_polarity | \n",
+ " min_negative_polarity | \n",
+ " max_negative_polarity | \n",
+ " title_subjectivity | \n",
+ " title_sentiment_polarity | \n",
+ " abs_title_subjectivity | \n",
+ " abs_title_sentiment_polarity | \n",
+ " shares | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 731.0 | \n",
+ " 12 | \n",
+ " 219.0 | \n",
+ " 0.663594 | \n",
+ " 1.0 | \n",
+ " 0.815385 | \n",
+ " 4.0 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 4.680365 | \n",
+ " 5 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 496.0 | \n",
+ " 496.0 | \n",
+ " 496.000000 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0.500331 | \n",
+ " 0.378279 | \n",
+ " 0.040005 | \n",
+ " 0.041263 | \n",
+ " 0.040123 | \n",
+ " 0.521617 | \n",
+ " 0.092562 | \n",
+ " 0.045662 | \n",
+ " 0.013699 | \n",
+ " 0.769231 | \n",
+ " 0.230769 | \n",
+ " 0.378636 | \n",
+ " 0.100000 | \n",
+ " 0.7 | \n",
+ " -0.350000 | \n",
+ " -0.600 | \n",
+ " -0.200000 | \n",
+ " 0.500000 | \n",
+ " -0.187500 | \n",
+ " 0.000000 | \n",
+ " 0.187500 | \n",
+ " 593.0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 731.0 | \n",
+ " 9 | \n",
+ " 255.0 | \n",
+ " 0.604743 | \n",
+ " 1.0 | \n",
+ " 0.791946 | \n",
+ " 3.0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 4.913725 | \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.000000 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0.799756 | \n",
+ " 0.050047 | \n",
+ " 0.050096 | \n",
+ " 0.050101 | \n",
+ " 0.050001 | \n",
+ " 0.341246 | \n",
+ " 0.148948 | \n",
+ " 0.043137 | \n",
+ " 0.015686 | \n",
+ " 0.733333 | \n",
+ " 0.266667 | \n",
+ " 0.286915 | \n",
+ " 0.033333 | \n",
+ " 0.7 | \n",
+ " -0.118750 | \n",
+ " -0.125 | \n",
+ " -0.100000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.500000 | \n",
+ " 0.000000 | \n",
+ " 711.0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 731.0 | \n",
+ " 9 | \n",
+ " 211.0 | \n",
+ " 0.575130 | \n",
+ " 1.0 | \n",
+ " 0.663866 | \n",
+ " 3.0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 4.393365 | \n",
+ " 6 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 918.0 | \n",
+ " 918.0 | \n",
+ " 918.000000 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0.217792 | \n",
+ " 0.033334 | \n",
+ " 0.033351 | \n",
+ " 0.033334 | \n",
+ " 0.682188 | \n",
+ " 0.702222 | \n",
+ " 0.323333 | \n",
+ " 0.056872 | \n",
+ " 0.009479 | \n",
+ " 0.857143 | \n",
+ " 0.142857 | \n",
+ " 0.495833 | \n",
+ " 0.100000 | \n",
+ " 1.0 | \n",
+ " -0.466667 | \n",
+ " -0.800 | \n",
+ " -0.133333 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.500000 | \n",
+ " 0.000000 | \n",
+ " 1500.0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 731.0 | \n",
+ " 9 | \n",
+ " 531.0 | \n",
+ " 0.503788 | \n",
+ " 1.0 | \n",
+ " 0.665635 | \n",
+ " 9.0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 4.404896 | \n",
+ " 7 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.000000 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0.028573 | \n",
+ " 0.419300 | \n",
+ " 0.494651 | \n",
+ " 0.028905 | \n",
+ " 0.028572 | \n",
+ " 0.429850 | \n",
+ " 0.100705 | \n",
+ " 0.041431 | \n",
+ " 0.020716 | \n",
+ " 0.666667 | \n",
+ " 0.333333 | \n",
+ " 0.385965 | \n",
+ " 0.136364 | \n",
+ " 0.8 | \n",
+ " -0.369697 | \n",
+ " -0.600 | \n",
+ " -0.166667 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.500000 | \n",
+ " 0.000000 | \n",
+ " 1200.0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 731.0 | \n",
+ " 13 | \n",
+ " 1072.0 | \n",
+ " 0.415646 | \n",
+ " 1.0 | \n",
+ " 0.540890 | \n",
+ " 19.0 | \n",
+ " 19 | \n",
+ " 20 | \n",
+ " 0 | \n",
+ " 4.682836 | \n",
+ " 7 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 545.0 | \n",
+ " 16000.0 | \n",
+ " 3151.157895 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0.028633 | \n",
+ " 0.028794 | \n",
+ " 0.028575 | \n",
+ " 0.028572 | \n",
+ " 0.885427 | \n",
+ " 0.513502 | \n",
+ " 0.281003 | \n",
+ " 0.074627 | \n",
+ " 0.012127 | \n",
+ " 0.860215 | \n",
+ " 0.139785 | \n",
+ " 0.411127 | \n",
+ " 0.033333 | \n",
+ " 1.0 | \n",
+ " -0.220192 | \n",
+ " -0.500 | \n",
+ " -0.050000 | \n",
+ " 0.454545 | \n",
+ " 0.136364 | \n",
+ " 0.045455 | \n",
+ " 0.136364 | \n",
+ " 505.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " timedelta n_tokens_title n_tokens_content n_unique_tokens \\\n",
+ "0 731.0 12 219.0 0.663594 \n",
+ "1 731.0 9 255.0 0.604743 \n",
+ "2 731.0 9 211.0 0.575130 \n",
+ "3 731.0 9 531.0 0.503788 \n",
+ "4 731.0 13 1072.0 0.415646 \n",
+ "\n",
+ " n_non_stop_words n_non_stop_unique_tokens num_hrefs num_self_hrefs \\\n",
+ "0 1.0 0.815385 4.0 2 \n",
+ "1 1.0 0.791946 3.0 1 \n",
+ "2 1.0 0.663866 3.0 1 \n",
+ "3 1.0 0.665635 9.0 0 \n",
+ "4 1.0 0.540890 19.0 19 \n",
+ "\n",
+ " num_imgs num_videos average_token_length num_keywords \\\n",
+ "0 1 0 4.680365 5 \n",
+ "1 1 0 4.913725 4 \n",
+ "2 1 0 4.393365 6 \n",
+ "3 1 0 4.404896 7 \n",
+ "4 20 0 4.682836 7 \n",
+ "\n",
+ " data_channel_is_lifestyle data_channel_is_entertainment \\\n",
+ "0 0 1 \n",
+ "1 0 0 \n",
+ "2 0 0 \n",
+ "3 0 1 \n",
+ "4 0 0 \n",
+ "\n",
+ " data_channel_is_bus data_channel_is_socmed data_channel_is_tech \\\n",
+ "0 0 0 0 \n",
+ "1 1 0 0 \n",
+ "2 1 0 0 \n",
+ "3 0 0 0 \n",
+ "4 0 0 1 \n",
+ "\n",
+ " data_channel_is_world kw_min_min kw_max_min kw_avg_min kw_min_max \\\n",
+ "0 0 0.0 0.0 0.0 0.0 \n",
+ "1 0 0.0 0.0 0.0 0.0 \n",
+ "2 0 0.0 0.0 0.0 0.0 \n",
+ "3 0 0.0 0.0 0.0 0.0 \n",
+ "4 0 0.0 0.0 0.0 0.0 \n",
+ "\n",
+ " kw_max_max kw_avg_max kw_min_avg kw_max_avg kw_avg_avg \\\n",
+ "0 0.0 0.0 0.0 0.0 0.0 \n",
+ "1 0.0 0.0 0.0 0.0 0.0 \n",
+ "2 0.0 0.0 0.0 0.0 0.0 \n",
+ "3 0.0 0.0 0.0 0.0 0.0 \n",
+ "4 0.0 0.0 0.0 0.0 0.0 \n",
+ "\n",
+ " self_reference_min_shares self_reference_max_shares \\\n",
+ "0 496.0 496.0 \n",
+ "1 0.0 0.0 \n",
+ "2 918.0 918.0 \n",
+ "3 0.0 0.0 \n",
+ "4 545.0 16000.0 \n",
+ "\n",
+ " self_reference_avg_sharess weekday_is_monday weekday_is_tuesday \\\n",
+ "0 496.000000 1 0 \n",
+ "1 0.000000 1 0 \n",
+ "2 918.000000 1 0 \n",
+ "3 0.000000 1 0 \n",
+ "4 3151.157895 1 0 \n",
+ "\n",
+ " weekday_is_wednesday weekday_is_thursday weekday_is_friday \\\n",
+ "0 0 0 0 \n",
+ "1 0 0 0 \n",
+ "2 0 0 0 \n",
+ "3 0 0 0 \n",
+ "4 0 0 0 \n",
+ "\n",
+ " weekday_is_saturday weekday_is_sunday is_weekend LDA_00 LDA_01 \\\n",
+ "0 0 0 0 0.500331 0.378279 \n",
+ "1 0 0 0 0.799756 0.050047 \n",
+ "2 0 0 0 0.217792 0.033334 \n",
+ "3 0 0 0 0.028573 0.419300 \n",
+ "4 0 0 0 0.028633 0.028794 \n",
+ "\n",
+ " LDA_02 LDA_03 LDA_04 global_subjectivity \\\n",
+ "0 0.040005 0.041263 0.040123 0.521617 \n",
+ "1 0.050096 0.050101 0.050001 0.341246 \n",
+ "2 0.033351 0.033334 0.682188 0.702222 \n",
+ "3 0.494651 0.028905 0.028572 0.429850 \n",
+ "4 0.028575 0.028572 0.885427 0.513502 \n",
+ "\n",
+ " global_sentiment_polarity global_rate_positive_words \\\n",
+ "0 0.092562 0.045662 \n",
+ "1 0.148948 0.043137 \n",
+ "2 0.323333 0.056872 \n",
+ "3 0.100705 0.041431 \n",
+ "4 0.281003 0.074627 \n",
+ "\n",
+ " global_rate_negative_words rate_positive_words rate_negative_words \\\n",
+ "0 0.013699 0.769231 0.230769 \n",
+ "1 0.015686 0.733333 0.266667 \n",
+ "2 0.009479 0.857143 0.142857 \n",
+ "3 0.020716 0.666667 0.333333 \n",
+ "4 0.012127 0.860215 0.139785 \n",
+ "\n",
+ " avg_positive_polarity min_positive_polarity max_positive_polarity \\\n",
+ "0 0.378636 0.100000 0.7 \n",
+ "1 0.286915 0.033333 0.7 \n",
+ "2 0.495833 0.100000 1.0 \n",
+ "3 0.385965 0.136364 0.8 \n",
+ "4 0.411127 0.033333 1.0 \n",
+ "\n",
+ " avg_negative_polarity min_negative_polarity max_negative_polarity \\\n",
+ "0 -0.350000 -0.600 -0.200000 \n",
+ "1 -0.118750 -0.125 -0.100000 \n",
+ "2 -0.466667 -0.800 -0.133333 \n",
+ "3 -0.369697 -0.600 -0.166667 \n",
+ "4 -0.220192 -0.500 -0.050000 \n",
+ "\n",
+ " title_subjectivity title_sentiment_polarity abs_title_subjectivity \\\n",
+ "0 0.500000 -0.187500 0.000000 \n",
+ "1 0.000000 0.000000 0.500000 \n",
+ "2 0.000000 0.000000 0.500000 \n",
+ "3 0.000000 0.000000 0.500000 \n",
+ "4 0.454545 0.136364 0.045455 \n",
+ "\n",
+ " abs_title_sentiment_polarity shares \n",
+ "0 0.187500 593.0 \n",
+ "1 0.000000 711.0 \n",
+ "2 0.000000 1500.0 \n",
+ "3 0.000000 1200.0 \n",
+ "4 0.136364 505.0 "
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_news = load_dataset(\"onlinenewspopularity\")\n",
+ "print(f\"\\nShape: {df_news.shape}\")\n",
+ "print(f\"\\nColumn names (first 30 and last 10):\")\n",
+ "cols = list(df_news.columns)\n",
+ "for i, col in enumerate(cols[:30]):\n",
+ " print(f\" {i+1}. {col}\")\n",
+ "print(\" ...\")\n",
+ "for i, col in enumerate(cols[-10:]):\n",
+ " print(f\" {len(cols)-10+i+1}. {col}\")\n",
+ "print(f\"\\nData types (sample):\")\n",
+ "print(df_news.dtypes[:10])\n",
+ "print(f\"\\nFirst few rows:\")\n",
+ "df_news.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Target variable: shares\n",
+ "Predictors 9: ['n_tokens_title', 'n_tokens_content', 'num_hrefs', 'num_imgs', 'num_videos', 'global_subjectivity', 'global_sentiment_polarity', 'title_subjectivity', 'title_sentiment_polarity']\n"
+ ]
+ }
+ ],
+ "source": [
+ "news_target = \"shares\" # Target variable to impute\n",
+ "news_predictors = [\"n_tokens_title\", \"n_tokens_content\", \"num_hrefs\", \"num_imgs\", \"num_videos\", \"global_subjectivity\", \"global_sentiment_polarity\", \"title_subjectivity\", \"title_sentiment_polarity\"]\n",
+ "\n",
+ "print(f\"Target variable: {news_target}\")\n",
+ "print(f\"Predictors {len(news_predictors)}: {news_predictors}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Computing predictor correlations...\n",
+ "\n",
+ "--- Pearson Correlation Matrix ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " n_tokens_title | \n",
+ " n_tokens_content | \n",
+ " num_hrefs | \n",
+ " num_imgs | \n",
+ " num_videos | \n",
+ " global_subjectivity | \n",
+ " global_sentiment_polarity | \n",
+ " title_subjectivity | \n",
+ " title_sentiment_polarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | n_tokens_title | \n",
+ " 1.000000 | \n",
+ " 0.018160 | \n",
+ " -0.053496 | \n",
+ " -0.008858 | \n",
+ " 0.051460 | \n",
+ " -0.056804 | \n",
+ " -0.072226 | \n",
+ " 0.077245 | \n",
+ " 0.000240 | \n",
+ "
\n",
+ " \n",
+ " | n_tokens_content | \n",
+ " 0.018160 | \n",
+ " 1.000000 | \n",
+ " 0.423065 | \n",
+ " 0.342600 | \n",
+ " 0.103699 | \n",
+ " 0.127879 | \n",
+ " 0.021937 | \n",
+ " 0.004484 | \n",
+ " 0.023358 | \n",
+ "
\n",
+ " \n",
+ " | num_hrefs | \n",
+ " -0.053496 | \n",
+ " 0.423065 | \n",
+ " 1.000000 | \n",
+ " 0.342633 | \n",
+ " 0.114518 | \n",
+ " 0.203464 | \n",
+ " 0.086859 | \n",
+ " 0.043950 | \n",
+ " 0.039041 | \n",
+ "
\n",
+ " \n",
+ " | num_imgs | \n",
+ " -0.008858 | \n",
+ " 0.342600 | \n",
+ " 0.342633 | \n",
+ " 1.000000 | \n",
+ " -0.067336 | \n",
+ " 0.080468 | \n",
+ " 0.021082 | \n",
+ " 0.056815 | \n",
+ " 0.046310 | \n",
+ "
\n",
+ " \n",
+ " | num_videos | \n",
+ " 0.051460 | \n",
+ " 0.103699 | \n",
+ " 0.114518 | \n",
+ " -0.067336 | \n",
+ " 1.000000 | \n",
+ " 0.082052 | \n",
+ " -0.028434 | \n",
+ " 0.061028 | \n",
+ " 0.021980 | \n",
+ "
\n",
+ " \n",
+ " | global_subjectivity | \n",
+ " -0.056804 | \n",
+ " 0.127879 | \n",
+ " 0.203464 | \n",
+ " 0.080468 | \n",
+ " 0.082052 | \n",
+ " 1.000000 | \n",
+ " 0.339436 | \n",
+ " 0.114123 | \n",
+ " 0.034075 | \n",
+ "
\n",
+ " \n",
+ " | global_sentiment_polarity | \n",
+ " -0.072226 | \n",
+ " 0.021937 | \n",
+ " 0.086859 | \n",
+ " 0.021082 | \n",
+ " -0.028434 | \n",
+ " 0.339436 | \n",
+ " 1.000000 | \n",
+ " 0.023620 | \n",
+ " 0.238266 | \n",
+ "
\n",
+ " \n",
+ " | title_subjectivity | \n",
+ " 0.077245 | \n",
+ " 0.004484 | \n",
+ " 0.043950 | \n",
+ " 0.056815 | \n",
+ " 0.061028 | \n",
+ " 0.114123 | \n",
+ " 0.023620 | \n",
+ " 1.000000 | \n",
+ " 0.232130 | \n",
+ "
\n",
+ " \n",
+ " | title_sentiment_polarity | \n",
+ " 0.000240 | \n",
+ " 0.023358 | \n",
+ " 0.039041 | \n",
+ " 0.046310 | \n",
+ " 0.021980 | \n",
+ " 0.034075 | \n",
+ " 0.238266 | \n",
+ " 0.232130 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " n_tokens_title n_tokens_content num_hrefs \\\n",
+ "n_tokens_title 1.000000 0.018160 -0.053496 \n",
+ "n_tokens_content 0.018160 1.000000 0.423065 \n",
+ "num_hrefs -0.053496 0.423065 1.000000 \n",
+ "num_imgs -0.008858 0.342600 0.342633 \n",
+ "num_videos 0.051460 0.103699 0.114518 \n",
+ "global_subjectivity -0.056804 0.127879 0.203464 \n",
+ "global_sentiment_polarity -0.072226 0.021937 0.086859 \n",
+ "title_subjectivity 0.077245 0.004484 0.043950 \n",
+ "title_sentiment_polarity 0.000240 0.023358 0.039041 \n",
+ "\n",
+ " num_imgs num_videos global_subjectivity \\\n",
+ "n_tokens_title -0.008858 0.051460 -0.056804 \n",
+ "n_tokens_content 0.342600 0.103699 0.127879 \n",
+ "num_hrefs 0.342633 0.114518 0.203464 \n",
+ "num_imgs 1.000000 -0.067336 0.080468 \n",
+ "num_videos -0.067336 1.000000 0.082052 \n",
+ "global_subjectivity 0.080468 0.082052 1.000000 \n",
+ "global_sentiment_polarity 0.021082 -0.028434 0.339436 \n",
+ "title_subjectivity 0.056815 0.061028 0.114123 \n",
+ "title_sentiment_polarity 0.046310 0.021980 0.034075 \n",
+ "\n",
+ " global_sentiment_polarity title_subjectivity \\\n",
+ "n_tokens_title -0.072226 0.077245 \n",
+ "n_tokens_content 0.021937 0.004484 \n",
+ "num_hrefs 0.086859 0.043950 \n",
+ "num_imgs 0.021082 0.056815 \n",
+ "num_videos -0.028434 0.061028 \n",
+ "global_subjectivity 0.339436 0.114123 \n",
+ "global_sentiment_polarity 1.000000 0.023620 \n",
+ "title_subjectivity 0.023620 1.000000 \n",
+ "title_sentiment_polarity 0.238266 0.232130 \n",
+ "\n",
+ " title_sentiment_polarity \n",
+ "n_tokens_title 0.000240 \n",
+ "n_tokens_content 0.023358 \n",
+ "num_hrefs 0.039041 \n",
+ "num_imgs 0.046310 \n",
+ "num_videos 0.021980 \n",
+ "global_subjectivity 0.034075 \n",
+ "global_sentiment_polarity 0.238266 \n",
+ "title_subjectivity 0.232130 \n",
+ "title_sentiment_polarity 1.000000 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Spearman Correlation Matrix ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " n_tokens_title | \n",
+ " n_tokens_content | \n",
+ " num_hrefs | \n",
+ " num_imgs | \n",
+ " num_videos | \n",
+ " global_subjectivity | \n",
+ " global_sentiment_polarity | \n",
+ " title_subjectivity | \n",
+ " title_sentiment_polarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | n_tokens_title | \n",
+ " 1.000000 | \n",
+ " 0.010237 | \n",
+ " -0.061430 | \n",
+ " -0.012670 | \n",
+ " 0.116492 | \n",
+ " -0.047503 | \n",
+ " -0.077222 | \n",
+ " 0.108684 | \n",
+ " 0.011174 | \n",
+ "
\n",
+ " \n",
+ " | n_tokens_content | \n",
+ " 0.010237 | \n",
+ " 1.000000 | \n",
+ " 0.506176 | \n",
+ " 0.391022 | \n",
+ " -0.037341 | \n",
+ " 0.073055 | \n",
+ " 0.029948 | \n",
+ " -0.010457 | \n",
+ " 0.010359 | \n",
+ "
\n",
+ " \n",
+ " | num_hrefs | \n",
+ " -0.061430 | \n",
+ " 0.506176 | \n",
+ " 1.000000 | \n",
+ " 0.349176 | \n",
+ " -0.032617 | \n",
+ " 0.193206 | \n",
+ " 0.130599 | \n",
+ " 0.020670 | \n",
+ " 0.026945 | \n",
+ "
\n",
+ " \n",
+ " | num_imgs | \n",
+ " -0.012670 | \n",
+ " 0.391022 | \n",
+ " 0.349176 | \n",
+ " 1.000000 | \n",
+ " -0.168468 | \n",
+ " 0.113552 | \n",
+ " 0.046920 | \n",
+ " 0.023520 | \n",
+ " 0.030852 | \n",
+ "
\n",
+ " \n",
+ " | num_videos | \n",
+ " 0.116492 | \n",
+ " -0.037341 | \n",
+ " -0.032617 | \n",
+ " -0.168468 | \n",
+ " 1.000000 | \n",
+ " 0.072735 | \n",
+ " -0.047764 | \n",
+ " 0.059727 | \n",
+ " 0.006032 | \n",
+ "
\n",
+ " \n",
+ " | global_subjectivity | \n",
+ " -0.047503 | \n",
+ " 0.073055 | \n",
+ " 0.193206 | \n",
+ " 0.113552 | \n",
+ " 0.072735 | \n",
+ " 1.000000 | \n",
+ " 0.336713 | \n",
+ " 0.134525 | \n",
+ " 0.050840 | \n",
+ "
\n",
+ " \n",
+ " | global_sentiment_polarity | \n",
+ " -0.077222 | \n",
+ " 0.029948 | \n",
+ " 0.130599 | \n",
+ " 0.046920 | \n",
+ " -0.047764 | \n",
+ " 0.336713 | \n",
+ " 1.000000 | \n",
+ " 0.025508 | \n",
+ " 0.222153 | \n",
+ "
\n",
+ " \n",
+ " | title_subjectivity | \n",
+ " 0.108684 | \n",
+ " -0.010457 | \n",
+ " 0.020670 | \n",
+ " 0.023520 | \n",
+ " 0.059727 | \n",
+ " 0.134525 | \n",
+ " 0.025508 | \n",
+ " 1.000000 | \n",
+ " 0.351178 | \n",
+ "
\n",
+ " \n",
+ " | title_sentiment_polarity | \n",
+ " 0.011174 | \n",
+ " 0.010359 | \n",
+ " 0.026945 | \n",
+ " 0.030852 | \n",
+ " 0.006032 | \n",
+ " 0.050840 | \n",
+ " 0.222153 | \n",
+ " 0.351178 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " n_tokens_title n_tokens_content num_hrefs \\\n",
+ "n_tokens_title 1.000000 0.010237 -0.061430 \n",
+ "n_tokens_content 0.010237 1.000000 0.506176 \n",
+ "num_hrefs -0.061430 0.506176 1.000000 \n",
+ "num_imgs -0.012670 0.391022 0.349176 \n",
+ "num_videos 0.116492 -0.037341 -0.032617 \n",
+ "global_subjectivity -0.047503 0.073055 0.193206 \n",
+ "global_sentiment_polarity -0.077222 0.029948 0.130599 \n",
+ "title_subjectivity 0.108684 -0.010457 0.020670 \n",
+ "title_sentiment_polarity 0.011174 0.010359 0.026945 \n",
+ "\n",
+ " num_imgs num_videos global_subjectivity \\\n",
+ "n_tokens_title -0.012670 0.116492 -0.047503 \n",
+ "n_tokens_content 0.391022 -0.037341 0.073055 \n",
+ "num_hrefs 0.349176 -0.032617 0.193206 \n",
+ "num_imgs 1.000000 -0.168468 0.113552 \n",
+ "num_videos -0.168468 1.000000 0.072735 \n",
+ "global_subjectivity 0.113552 0.072735 1.000000 \n",
+ "global_sentiment_polarity 0.046920 -0.047764 0.336713 \n",
+ "title_subjectivity 0.023520 0.059727 0.134525 \n",
+ "title_sentiment_polarity 0.030852 0.006032 0.050840 \n",
+ "\n",
+ " global_sentiment_polarity title_subjectivity \\\n",
+ "n_tokens_title -0.077222 0.108684 \n",
+ "n_tokens_content 0.029948 -0.010457 \n",
+ "num_hrefs 0.130599 0.020670 \n",
+ "num_imgs 0.046920 0.023520 \n",
+ "num_videos -0.047764 0.059727 \n",
+ "global_subjectivity 0.336713 0.134525 \n",
+ "global_sentiment_polarity 1.000000 0.025508 \n",
+ "title_subjectivity 0.025508 1.000000 \n",
+ "title_sentiment_polarity 0.222153 0.351178 \n",
+ "\n",
+ " title_sentiment_polarity \n",
+ "n_tokens_title 0.011174 \n",
+ "n_tokens_content 0.010359 \n",
+ "num_hrefs 0.026945 \n",
+ "num_imgs 0.030852 \n",
+ "num_videos 0.006032 \n",
+ "global_subjectivity 0.050840 \n",
+ "global_sentiment_polarity 0.222153 \n",
+ "title_subjectivity 0.351178 \n",
+ "title_sentiment_polarity 1.000000 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Mutual Information Matrix ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " n_tokens_title | \n",
+ " n_tokens_content | \n",
+ " num_hrefs | \n",
+ " num_imgs | \n",
+ " num_videos | \n",
+ " global_subjectivity | \n",
+ " global_sentiment_polarity | \n",
+ " title_subjectivity | \n",
+ " title_sentiment_polarity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | n_tokens_title | \n",
+ " 1.000000 | \n",
+ " 0.000862 | \n",
+ " 0.000354 | \n",
+ " 0.003505 | \n",
+ " 0.003229 | \n",
+ " 0.000254 | \n",
+ " 0.000917 | \n",
+ " 0.008202 | \n",
+ " 0.007850 | \n",
+ "
\n",
+ " \n",
+ " | n_tokens_content | \n",
+ " 0.000862 | \n",
+ " 1.000000 | \n",
+ " 0.055766 | \n",
+ " 0.050216 | \n",
+ " 0.025189 | \n",
+ " 0.017313 | \n",
+ " 0.015528 | \n",
+ " 0.004697 | \n",
+ " 0.005491 | \n",
+ "
\n",
+ " \n",
+ " | num_hrefs | \n",
+ " 0.000354 | \n",
+ " 0.055766 | \n",
+ " 1.000000 | \n",
+ " 0.049407 | \n",
+ " 0.022149 | \n",
+ " 0.030201 | \n",
+ " 0.026079 | \n",
+ " 0.003155 | \n",
+ " 0.001383 | \n",
+ "
\n",
+ " \n",
+ " | num_imgs | \n",
+ " 0.003505 | \n",
+ " 0.050216 | \n",
+ " 0.049407 | \n",
+ " 1.000000 | \n",
+ " 0.044929 | \n",
+ " 0.020942 | \n",
+ " 0.016804 | \n",
+ " 0.006252 | \n",
+ " 0.007037 | \n",
+ "
\n",
+ " \n",
+ " | num_videos | \n",
+ " 0.003229 | \n",
+ " 0.025189 | \n",
+ " 0.022149 | \n",
+ " 0.044929 | \n",
+ " 1.000000 | \n",
+ " 0.018791 | \n",
+ " 0.016800 | \n",
+ " 0.006123 | \n",
+ " 0.006016 | \n",
+ "
\n",
+ " \n",
+ " | global_subjectivity | \n",
+ " 0.000254 | \n",
+ " 0.017313 | \n",
+ " 0.030201 | \n",
+ " 0.020942 | \n",
+ " 0.018791 | \n",
+ " 1.000000 | \n",
+ " 0.013351 | \n",
+ " 0.006450 | \n",
+ " 0.004167 | \n",
+ "
\n",
+ " \n",
+ " | global_sentiment_polarity | \n",
+ " 0.000917 | \n",
+ " 0.015528 | \n",
+ " 0.026079 | \n",
+ " 0.016804 | \n",
+ " 0.016800 | \n",
+ " 0.013351 | \n",
+ " 1.000000 | \n",
+ " 0.004410 | \n",
+ " 0.011626 | \n",
+ "
\n",
+ " \n",
+ " | title_subjectivity | \n",
+ " 0.008202 | \n",
+ " 0.004697 | \n",
+ " 0.003155 | \n",
+ " 0.006252 | \n",
+ " 0.006123 | \n",
+ " 0.006450 | \n",
+ " 0.004410 | \n",
+ " 1.000000 | \n",
+ " 0.330122 | \n",
+ "
\n",
+ " \n",
+ " | title_sentiment_polarity | \n",
+ " 0.007850 | \n",
+ " 0.005491 | \n",
+ " 0.001383 | \n",
+ " 0.007037 | \n",
+ " 0.006016 | \n",
+ " 0.004167 | \n",
+ " 0.011626 | \n",
+ " 0.330122 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " n_tokens_title n_tokens_content num_hrefs \\\n",
+ "n_tokens_title 1.000000 0.000862 0.000354 \n",
+ "n_tokens_content 0.000862 1.000000 0.055766 \n",
+ "num_hrefs 0.000354 0.055766 1.000000 \n",
+ "num_imgs 0.003505 0.050216 0.049407 \n",
+ "num_videos 0.003229 0.025189 0.022149 \n",
+ "global_subjectivity 0.000254 0.017313 0.030201 \n",
+ "global_sentiment_polarity 0.000917 0.015528 0.026079 \n",
+ "title_subjectivity 0.008202 0.004697 0.003155 \n",
+ "title_sentiment_polarity 0.007850 0.005491 0.001383 \n",
+ "\n",
+ " num_imgs num_videos global_subjectivity \\\n",
+ "n_tokens_title 0.003505 0.003229 0.000254 \n",
+ "n_tokens_content 0.050216 0.025189 0.017313 \n",
+ "num_hrefs 0.049407 0.022149 0.030201 \n",
+ "num_imgs 1.000000 0.044929 0.020942 \n",
+ "num_videos 0.044929 1.000000 0.018791 \n",
+ "global_subjectivity 0.020942 0.018791 1.000000 \n",
+ "global_sentiment_polarity 0.016804 0.016800 0.013351 \n",
+ "title_subjectivity 0.006252 0.006123 0.006450 \n",
+ "title_sentiment_polarity 0.007037 0.006016 0.004167 \n",
+ "\n",
+ " global_sentiment_polarity title_subjectivity \\\n",
+ "n_tokens_title 0.000917 0.008202 \n",
+ "n_tokens_content 0.015528 0.004697 \n",
+ "num_hrefs 0.026079 0.003155 \n",
+ "num_imgs 0.016804 0.006252 \n",
+ "num_videos 0.016800 0.006123 \n",
+ "global_subjectivity 0.013351 0.006450 \n",
+ "global_sentiment_polarity 1.000000 0.004410 \n",
+ "title_subjectivity 0.004410 1.000000 \n",
+ "title_sentiment_polarity 0.011626 0.330122 \n",
+ "\n",
+ " title_sentiment_polarity \n",
+ "n_tokens_title 0.007850 \n",
+ "n_tokens_content 0.005491 \n",
+ "num_hrefs 0.001383 \n",
+ "num_imgs 0.007037 \n",
+ "num_videos 0.006016 \n",
+ "global_subjectivity 0.004167 \n",
+ "global_sentiment_polarity 0.011626 \n",
+ "title_subjectivity 0.330122 \n",
+ "title_sentiment_polarity 1.000000 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Predictor-Target Mutual Information ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " shares | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | num_imgs | \n",
+ " 0.002237 | \n",
+ "
\n",
+ " \n",
+ " | global_sentiment_polarity | \n",
+ " 0.000848 | \n",
+ "
\n",
+ " \n",
+ " | num_hrefs | \n",
+ " 0.000824 | \n",
+ "
\n",
+ " \n",
+ " | title_subjectivity | \n",
+ " 0.000505 | \n",
+ "
\n",
+ " \n",
+ " | n_tokens_title | \n",
+ " 0.000396 | \n",
+ "
\n",
+ " \n",
+ " | title_sentiment_polarity | \n",
+ " 0.000390 | \n",
+ "
\n",
+ " \n",
+ " | global_subjectivity | \n",
+ " 0.000360 | \n",
+ "
\n",
+ " \n",
+ " | n_tokens_content | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " | num_videos | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " shares\n",
+ "num_imgs 0.002237\n",
+ "global_sentiment_polarity 0.000848\n",
+ "num_hrefs 0.000824\n",
+ "title_subjectivity 0.000505\n",
+ "n_tokens_title 0.000396\n",
+ "title_sentiment_polarity 0.000390\n",
+ "global_subjectivity 0.000360\n",
+ "n_tokens_content 0.000000\n",
+ "num_videos 0.000000"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "print(\"Computing predictor correlations...\")\n",
+ "news_correlations = compute_predictor_correlations(\n",
+ " data=df_news,\n",
+ " predictors=news_predictors,\n",
+ " imputed_variables=[news_target],\n",
+ " method=\"all\"\n",
+ ")\n",
+ "\n",
+ "print(\"\\n--- Pearson Correlation Matrix ---\")\n",
+ "display(news_correlations[\"pearson\"])\n",
+ "\n",
+ "print(\"\\n--- Spearman Correlation Matrix ---\")\n",
+ "display(news_correlations[\"spearman\"])\n",
+ "\n",
+ "print(\"\\n--- Mutual Information Matrix ---\")\n",
+ "display(news_correlations[\"mutual_info\"])\n",
+ "\n",
+ "print(\"\\n--- Predictor-Target Mutual Information ---\")\n",
+ "display(news_correlations[\"predictor_target_mi\"].sort_values(\n",
+ " by=news_target, ascending=False\n",
+ "))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Running leave-one-out analysis on top predictors...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "0da6e91ad8bb4b82ac700476832fbb15",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Leave-one-out analysis: 0%| | 0/9 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Leave-One-Out Results ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " predictor_removed | \n",
+ " avg_quantile_loss | \n",
+ " avg_log_loss | \n",
+ " loss_increase | \n",
+ " relative_impact | \n",
+ " baseline_quantile_loss | \n",
+ " baseline_log_loss | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 5 | \n",
+ " global_subjectivity | \n",
+ " 1446.384028 | \n",
+ " 0 | \n",
+ " 22.472743 | \n",
+ " 1.578240 | \n",
+ " 1423.911286 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " num_videos | \n",
+ " 1445.642897 | \n",
+ " 0 | \n",
+ " 21.731611 | \n",
+ " 1.526191 | \n",
+ " 1423.911286 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " global_sentiment_polarity | \n",
+ " 1436.540498 | \n",
+ " 0 | \n",
+ " 12.629212 | \n",
+ " 0.886938 | \n",
+ " 1423.911286 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " title_sentiment_polarity | \n",
+ " 1427.685541 | \n",
+ " 0 | \n",
+ " 3.774255 | \n",
+ " 0.265063 | \n",
+ " 1423.911286 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " title_subjectivity | \n",
+ " 1423.478042 | \n",
+ " 0 | \n",
+ " -0.433243 | \n",
+ " -0.030426 | \n",
+ " 1423.911286 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 0 | \n",
+ " n_tokens_title | \n",
+ " 1423.140839 | \n",
+ " 0 | \n",
+ " -0.770446 | \n",
+ " -0.054108 | \n",
+ " 1423.911286 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " n_tokens_content | \n",
+ " 1419.151211 | \n",
+ " 0 | \n",
+ " -4.760075 | \n",
+ " -0.334296 | \n",
+ " 1423.911286 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " num_imgs | \n",
+ " 1413.187495 | \n",
+ " 0 | \n",
+ " -10.723790 | \n",
+ " -0.753122 | \n",
+ " 1423.911286 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " num_hrefs | \n",
+ " 1400.835671 | \n",
+ " 0 | \n",
+ " -23.075615 | \n",
+ " -1.620580 | \n",
+ " 1423.911286 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " predictor_removed avg_quantile_loss avg_log_loss loss_increase \\\n",
+ "5 global_subjectivity 1446.384028 0 22.472743 \n",
+ "4 num_videos 1445.642897 0 21.731611 \n",
+ "6 global_sentiment_polarity 1436.540498 0 12.629212 \n",
+ "8 title_sentiment_polarity 1427.685541 0 3.774255 \n",
+ "7 title_subjectivity 1423.478042 0 -0.433243 \n",
+ "0 n_tokens_title 1423.140839 0 -0.770446 \n",
+ "1 n_tokens_content 1419.151211 0 -4.760075 \n",
+ "3 num_imgs 1413.187495 0 -10.723790 \n",
+ "2 num_hrefs 1400.835671 0 -23.075615 \n",
+ "\n",
+ " relative_impact baseline_quantile_loss baseline_log_loss \n",
+ "5 1.578240 1423.911286 0 \n",
+ "4 1.526191 1423.911286 0 \n",
+ "6 0.886938 1423.911286 0 \n",
+ "8 0.265063 1423.911286 0 \n",
+ "7 -0.030426 1423.911286 0 \n",
+ "0 -0.054108 1423.911286 0 \n",
+ "1 -0.334296 1423.911286 0 \n",
+ "3 -0.753122 1423.911286 0 \n",
+ "2 -1.620580 1423.911286 0 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "print(\"Running leave-one-out analysis on top predictors...\")\n",
+ "news_loo = leave_one_out_analysis(\n",
+ " data=df_news,\n",
+ " predictors=news_predictors,\n",
+ " imputed_variables=[news_target],\n",
+ " model_class=QRF,\n",
+ " train_size=0.6,\n",
+ " n_jobs=1\n",
+ ")\n",
+ "\n",
+ "print(\"\\n--- Leave-One-Out Results ---\")\n",
+ "display(news_loo)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Creating 60/40 donor/receiver split...\n",
+ "Donor size: 23786\n",
+ "Receiver size: 15858\n",
+ "\n",
+ "Running autoimpute with top predictors...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "076836bfd28f43db96c909103a354b45",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "AutoImputation progress: 0%| | 0/5 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "db03d98e585b45a1934823026f67be5e",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Evaluating models: 0%| | 0/5 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
+ "[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 30.3s remaining: 45.5s\n",
+ "[Parallel(n_jobs=-1)]: Done 3 out of 5 | elapsed: 30.4s remaining: 20.3s\n",
+ "[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 30.6s finished\n",
+ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
+ "[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 0.4s remaining: 0.6s\n",
+ "[Parallel(n_jobs=-1)]: Done 3 out of 5 | elapsed: 0.4s remaining: 0.3s\n",
+ "[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 0.4s finished\n",
+ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
+ "[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 5.7s remaining: 8.5s\n",
+ "[Parallel(n_jobs=-1)]: Done 3 out of 5 | elapsed: 6.1s remaining: 4.1s\n",
+ "[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 6.5s finished\n",
+ "[Parallel(n_jobs=1)]: Done 1 tasks | elapsed: 29.2s\n",
+ "[Parallel(n_jobs=1)]: Done 4 tasks | elapsed: 1.9min\n",
+ "[Parallel(n_jobs=1)]: Done 5 out of 5 | elapsed: 2.4min finished\n",
+ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
+ "[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 14.9s remaining: 22.3s\n",
+ "[Parallel(n_jobs=-1)]: Done 3 out of 5 | elapsed: 15.0s remaining: 10.0s\n",
+ "[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 15.1s finished\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Autoimpute CV Results ---\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.plotly.v1+json": {
+ "config": {
+ "plotlyServerURL": "https://plot.ly"
+ },
+ "data": [
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 15.560146071647623,
+ 60.47903144053638,
+ 15.745604364415303,
+ 10.507890019211057,
+ 54.00676077697914,
+ 61.679857230949004,
+ 32.233187446152684,
+ 28.7936291834078,
+ 49.9446676361043,
+ 43.857603822118776,
+ 54.0480647598996,
+ 59.100231367375066,
+ 75.36287533473256,
+ 85.78886401519938,
+ 69.18900733234582,
+ 66.81320701120315,
+ 81.37327096024065,
+ 87.26188169948132,
+ 90.5746664000503
+ ]
+ },
+ "hovertemplate": "Method=QRF
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "QRF",
+ "marker": {
+ "color": "#88CCEE",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "QRF",
+ "offsetgroup": "QRF",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 223.8630796172769,
+ 458.84026692638434,
+ 579.6705542907807,
+ 781.7957689873281,
+ 965.9503445067965,
+ 1110.6244506911476,
+ 1263.9309570621926,
+ 1396.3557107328745,
+ 1567.414674054377,
+ 1666.0733698079766,
+ 1774.2381807111894,
+ 1841.788580990753,
+ 1877.6493608909611,
+ 1899.9221622620603,
+ 1903.3988327084717,
+ 1856.8536603963116,
+ 1739.2884707393005,
+ 1574.7065993779393,
+ 1304.629534601914
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 60.66262042015628,
+ 92.40862843596022,
+ 109.38663897969919,
+ 114.87514690423545,
+ 110.71243384983525,
+ 97.51259044999823,
+ 72.39778902095841,
+ 24.489905801095226,
+ 49.07917271377307,
+ 41.086084102999116,
+ 38.42291404644562,
+ 51.15325392240629,
+ 65.85759198446279,
+ 76.76583916930579,
+ 81.6257913697671,
+ 80.20353801031538,
+ 72.20311495082414,
+ 61.19170276598627,
+ 63.28109879404786
+ ]
+ },
+ "hovertemplate": "Method=OLS
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "OLS",
+ "marker": {
+ "color": "#CC6677",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "OLS",
+ "offsetgroup": "OLS",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 851.3189066921525,
+ 1326.5518535129859,
+ 1609.221044290723,
+ 1742.5809808365116,
+ 1746.6853738523198,
+ 1632.1026107358648,
+ 1413.2811207319949,
+ 1161.4009118822155,
+ 1215.9340325272856,
+ 1561.6328726709219,
+ 1923.9122801839508,
+ 2226.661242514924,
+ 2448.080017465779,
+ 2577.336010519308,
+ 2604.558954800209,
+ 2520.376100812142,
+ 2310.248440388663,
+ 1951.3192041531165,
+ 1389.5216274046845
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 4.91488994454613,
+ 9.948503450916574,
+ 15.11926481205003,
+ 20.241335802672026,
+ 25.124830985471814,
+ 30.035137787030546,
+ 35.078322511816275,
+ 40.07014533759876,
+ 44.9426815444261,
+ 49.6931115714545,
+ 54.12026828862763,
+ 58.35462095581471,
+ 62.81674257469332,
+ 67.51121552820844,
+ 72.4076611411818,
+ 77.69495973538872,
+ 82.53481296075533,
+ 88.40831545916754,
+ 91.24919098584027
+ ]
+ },
+ "hovertemplate": "Method=QuantReg
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "QuantReg",
+ "marker": {
+ "color": "#DDCC77",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "QuantReg",
+ "offsetgroup": "QuantReg",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 145.36548795887333,
+ 280.411730240581,
+ 410.2865542050552,
+ 536.09591403514,
+ 658.0460032028413,
+ 775.9725259275932,
+ 889.6302925659251,
+ 998.6251910411793,
+ 1102.7752220692698,
+ 1201.1814439113527,
+ 1292.647877761528,
+ 1375.3670481007794,
+ 1447.1915914696597,
+ 1505.65883141304,
+ 1544.2728703297594,
+ 1556.0701090240518,
+ 1526.66017664746,
+ 1428.4784357764186,
+ 1186.3300439504387
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 242.2009251811794,
+ 226.78423066795636,
+ 211.42818424761032,
+ 196.14703057240666,
+ 180.95974360075425,
+ 165.89210559660626,
+ 150.97994333038858,
+ 136.2743063470363,
+ 121.84999198289339,
+ 107.81996668355704,
+ 94.36027012785775,
+ 81.75308333700983,
+ 70.45752894588836,
+ 61.204173295291646,
+ 55.03298071071827,
+ 53.031102162780925,
+ 55.650312854818324,
+ 62.3105732490546,
+ 71.89748569984397
+ ]
+ },
+ "hovertemplate": "Method=Matching
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "Matching",
+ "marker": {
+ "color": "#117733",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "Matching",
+ "offsetgroup": "Matching",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 1966.6652406524117,
+ 1968.6733507276683,
+ 1970.6814608029247,
+ 1972.6895708781813,
+ 1974.6976809534376,
+ 1976.7057910286935,
+ 1978.7139011039508,
+ 1980.7220111792071,
+ 1982.7301212544637,
+ 1984.7382313297198,
+ 1986.7463414049762,
+ 1988.7544514802328,
+ 1990.7625615554891,
+ 1992.7706716307457,
+ 1994.778781706002,
+ 1996.7868917812586,
+ 1998.7950018565148,
+ 2000.8031119317711,
+ 2002.8112220070282
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 266.34117800466385,
+ 253.38824166473228,
+ 231.15110026372184,
+ 205.8384739082198,
+ 179.88970809464792,
+ 154.26718277770672,
+ 129.39218067802256,
+ 105.50781777859422,
+ 83.89503130821453,
+ 65.7117562885142,
+ 53.414829272215954,
+ 49.69904604985873,
+ 54.860506687595695,
+ 66.83410550511452,
+ 80.48103879439213,
+ 94.99272759032986,
+ 109.22312522440023,
+ 122.50664828922461,
+ 135.32315717220257
+ ]
+ },
+ "hovertemplate": "Method=MDN
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "MDN",
+ "marker": {
+ "color": "#332288",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "MDN",
+ "offsetgroup": "MDN",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 371.5259677070179,
+ 532.4889352860997,
+ 670.1948056871282,
+ 793.8261486813573,
+ 906.0296681680782,
+ 1008.7435490980872,
+ 1103.321773520055,
+ 1190.6293059607194,
+ 1270.9509298679063,
+ 1344.7670541012708,
+ 1411.6253748651386,
+ 1471.2630880836289,
+ 1523.7629486014057,
+ 1569.196893669104,
+ 1606.055629435565,
+ 1633.0053395677787,
+ 1648.4900320672273,
+ 1648.2520695118499,
+ 1622.041442428474
+ ],
+ "yaxis": "y"
+ }
+ ],
+ "layout": {
+ "barmode": "group",
+ "height": 600,
+ "legend": {
+ "title": {
+ "text": "Method"
+ },
+ "tracegroupgap": 0
+ },
+ "paper_bgcolor": "#FAFAFA",
+ "plot_bgcolor": "#FAFAFA",
+ "shapes": [
+ {
+ "line": {
+ "color": "#88CCEE",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "QRF Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 1357.210239966107,
+ "y1": 1357.210239966107
+ },
+ {
+ "line": {
+ "color": "#CC6677",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "OLS Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 1800.6696624197764,
+ "y1": 1800.6696624197764
+ },
+ {
+ "line": {
+ "color": "#DDCC77",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "QuantReg Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 1045.3193341911024,
+ "y1": 1045.3193341911024
+ },
+ {
+ "line": {
+ "color": "#117733",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "Matching Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 1984.7382313297196,
+ "y1": 1984.7382313297196
+ },
+ {
+ "line": {
+ "color": "#332288",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "MDN Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 1227.6932082267313,
+ "y1": 1227.6932082267313
+ }
+ ],
+ "template": {
+ "data": {
+ "bar": [
+ {
+ "error_x": {
+ "color": "#2a3f5f"
+ },
+ "error_y": {
+ "color": "#2a3f5f"
+ },
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "bar"
+ }
+ ],
+ "barpolar": [
+ {
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "barpolar"
+ }
+ ],
+ "carpet": [
+ {
+ "aaxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "baxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "type": "carpet"
+ }
+ ],
+ "choropleth": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "choropleth"
+ }
+ ],
+ "contour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "contour"
+ }
+ ],
+ "contourcarpet": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "contourcarpet"
+ }
+ ],
+ "heatmap": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmap"
+ }
+ ],
+ "heatmapgl": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmapgl"
+ }
+ ],
+ "histogram": [
+ {
+ "marker": {
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "histogram"
+ }
+ ],
+ "histogram2d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2d"
+ }
+ ],
+ "histogram2dcontour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2dcontour"
+ }
+ ],
+ "mesh3d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "mesh3d"
+ }
+ ],
+ "parcoords": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "parcoords"
+ }
+ ],
+ "pie": [
+ {
+ "automargin": true,
+ "type": "pie"
+ }
+ ],
+ "scatter": [
+ {
+ "fillpattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ },
+ "type": "scatter"
+ }
+ ],
+ "scatter3d": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter3d"
+ }
+ ],
+ "scattercarpet": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattercarpet"
+ }
+ ],
+ "scattergeo": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergeo"
+ }
+ ],
+ "scattergl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergl"
+ }
+ ],
+ "scattermapbox": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermapbox"
+ }
+ ],
+ "scatterpolar": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolar"
+ }
+ ],
+ "scatterpolargl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolargl"
+ }
+ ],
+ "scatterternary": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterternary"
+ }
+ ],
+ "surface": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "surface"
+ }
+ ],
+ "table": [
+ {
+ "cells": {
+ "fill": {
+ "color": "#EBF0F8"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "header": {
+ "fill": {
+ "color": "#C8D4E3"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "type": "table"
+ }
+ ]
+ },
+ "layout": {
+ "annotationdefaults": {
+ "arrowcolor": "#2a3f5f",
+ "arrowhead": 0,
+ "arrowwidth": 1
+ },
+ "autotypenumbers": "strict",
+ "coloraxis": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "colorscale": {
+ "diverging": [
+ [
+ 0,
+ "#8e0152"
+ ],
+ [
+ 0.1,
+ "#c51b7d"
+ ],
+ [
+ 0.2,
+ "#de77ae"
+ ],
+ [
+ 0.3,
+ "#f1b6da"
+ ],
+ [
+ 0.4,
+ "#fde0ef"
+ ],
+ [
+ 0.5,
+ "#f7f7f7"
+ ],
+ [
+ 0.6,
+ "#e6f5d0"
+ ],
+ [
+ 0.7,
+ "#b8e186"
+ ],
+ [
+ 0.8,
+ "#7fbc41"
+ ],
+ [
+ 0.9,
+ "#4d9221"
+ ],
+ [
+ 1,
+ "#276419"
+ ]
+ ],
+ "sequential": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "sequentialminus": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ]
+ },
+ "colorway": [
+ "#636efa",
+ "#EF553B",
+ "#00cc96",
+ "#ab63fa",
+ "#FFA15A",
+ "#19d3f3",
+ "#FF6692",
+ "#B6E880",
+ "#FF97FF",
+ "#FECB52"
+ ],
+ "font": {
+ "color": "#2a3f5f"
+ },
+ "geo": {
+ "bgcolor": "white",
+ "lakecolor": "white",
+ "landcolor": "#E5ECF6",
+ "showlakes": true,
+ "showland": true,
+ "subunitcolor": "white"
+ },
+ "hoverlabel": {
+ "align": "left"
+ },
+ "hovermode": "closest",
+ "mapbox": {
+ "style": "light"
+ },
+ "paper_bgcolor": "white",
+ "plot_bgcolor": "#E5ECF6",
+ "polar": {
+ "angularaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "radialaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "scene": {
+ "xaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "yaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "zaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ }
+ },
+ "shapedefaults": {
+ "line": {
+ "color": "#2a3f5f"
+ }
+ },
+ "ternary": {
+ "aaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "baxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "caxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "title": {
+ "x": 0.05
+ },
+ "xaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ },
+ "yaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ }
+ }
+ },
+ "title": {
+ "font": {
+ "size": 14
+ },
+ "text": "News Dataset Benchmarking Results"
+ },
+ "width": 750,
+ "xaxis": {
+ "anchor": "y",
+ "domain": [
+ 0,
+ 1
+ ],
+ "gridcolor": "#E5E5E5",
+ "gridwidth": 1,
+ "linecolor": "#CCCCCC",
+ "showgrid": false,
+ "showline": true,
+ "title": {
+ "font": {
+ "size": 12
+ },
+ "text": "Quantiles"
+ },
+ "zeroline": false
+ },
+ "yaxis": {
+ "anchor": "x",
+ "domain": [
+ 0,
+ 1
+ ],
+ "gridcolor": "#E5E5E5",
+ "gridwidth": 1,
+ "linecolor": "#CCCCCC",
+ "showgrid": true,
+ "showline": true,
+ "title": {
+ "font": {
+ "size": 12
+ },
+ "text": "Quantile loss"
+ },
+ "zeroline": false
+ }
+ }
+ }
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "print(\"Creating 60/40 donor/receiver split...\")\n",
+ "\n",
+ "news_donor, news_receiver = train_test_split(\n",
+ " df_news, train_size=0.6, random_state=42\n",
+ ")\n",
+ "\n",
+ "news_receiver_no_target = news_receiver.drop(columns=[news_target])\n",
+ "\n",
+ "print(f\"Donor size: {len(news_donor)}\")\n",
+ "print(f\"Receiver size: {len(news_receiver)}\")\n",
+ "\n",
+ "print(\"\\nRunning autoimpute with top predictors...\")\n",
+ "news_result = autoimpute(\n",
+ " donor_data=news_donor,\n",
+ " receiver_data=news_receiver_no_target.copy(),\n",
+ " predictors=news_predictors,\n",
+ " imputed_variables=[news_target],\n",
+ " impute_all=True,\n",
+ " log_level=\"INFO\"\n",
+ ")\n",
+ "\n",
+ "print(\"\\n--- Autoimpute CV Results ---\")\n",
+ "comparison_viz = method_comparison_results(\n",
+ " data=news_result.cv_results,\n",
+ " metric=\"quantile_loss\",\n",
+ ")\n",
+ "fig = comparison_viz.plot(\n",
+ " title=\"News Dataset Benchmarking Results\",\n",
+ " show_mean=True,\n",
+ ")\n",
+ "fig.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Distribution Comparison: Imputed vs Ground Truth (Online News) ---\n",
+ "QuantReg: Wasserstein Distance = 2376.8148\n",
+ "QRF: Wasserstein Distance = 783.7171\n",
+ "OLS: Wasserstein Distance = 2777.1152\n",
+ "Matching: Wasserstein Distance = 239.8596\n",
+ "MDN: Wasserstein Distance = 2038.0328\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Variable | \n",
+ " Metric | \n",
+ " Distance | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " shares | \n",
+ " wasserstein_distance | \n",
+ " 2376.814836 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Variable Metric Distance\n",
+ "0 shares wasserstein_distance 2376.814836"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "print(\"\\n--- Distribution Comparison: Imputed vs Ground Truth (Online News) ---\")\n",
+ "# Store Wasserstein distances for all methods\n",
+ "news_wasserstein = {}\n",
+ "\n",
+ "distribution_comparison_news = compare_distributions(\n",
+ " donor_data=news_receiver, # Ground truth test split\n",
+ " receiver_data=news_result.receiver_data, # Imputed values\n",
+ " imputed_variables=[news_target],\n",
+ ")\n",
+ "best_method_name = news_result.fitted_models[\"best_method\"].__class__.__name__ \n",
+ "best_method_name = best_method_name.replace(\"Results\", \"\")\n",
+ "news_wasserstein[best_method_name] = distribution_comparison_news[\n",
+ " distribution_comparison_news['Metric'] == 'wasserstein_distance'\n",
+ "]['Distance'].values[0]\n",
+ "print(f\"{best_method_name}: Wasserstein Distance = {news_wasserstein.get(best_method_name, 'N/A'):.4f}\")\n",
+ " \n",
+ "for method_name, imputations in news_result.imputations.items():\n",
+ " # Skip the 'best_method' (it's a duplicate)\n",
+ " if method_name == 'best_method': \n",
+ " continue\n",
+ " \n",
+ " # Create a copy of receiver data with this method's imputations \n",
+ " receiver_with_imputations = news_receiver_no_target.copy() \n",
+ " \n",
+ " # Handle both dict (quantile->DataFrame) and DataFrame formats \n",
+ " if isinstance(imputations, dict): \n",
+ " # Get median quantile (0.5) imputations \n",
+ " imp_df = imputations.get(0.5, list(imputations.values())[0]) \n",
+ " else: \n",
+ " imp_df = imputations\n",
+ " \n",
+ " # Add imputed values \n",
+ " for var in [news_target]:\n",
+ " if var in imp_df.columns: \n",
+ " receiver_with_imputations[var] = imp_df[var].values \n",
+ " \n",
+ " # Calculate distribution comparison \n",
+ " dist_comparison = compare_distributions( \n",
+ " donor_data=news_receiver, # Ground truth \n",
+ " receiver_data=receiver_with_imputations,\n",
+ " imputed_variables=[news_target], \n",
+ " )\n",
+ "\n",
+ " # Extract Wasserstein distance \n",
+ " wd = dist_comparison[dist_comparison['Metric'] == 'wasserstein_distance']['Distance'].values \n",
+ " news_wasserstein[method_name] = wd[0]\n",
+ " \n",
+ " print(f\"{method_name}: Wasserstein Distance = {news_wasserstein.get(method_name, 'N/A'):.4f}\")\n",
+ " \n",
+ "# Display full comparison for best method \n",
+ "display(distribution_comparison_news)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### CIA Sensitivity Analysis\n",
+ "\n",
+ "Measuring sensitivity to the Conditional Independence Assumption by progressively removing predictors."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Running CIA sensitivity analysis...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "53b3b2254999454281449d68d51816db",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/9 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " QRF sensitivity score: 1.020\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "feb0b19b6d8c4b2887573736f0c8da6a",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/9 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " OLS sensitivity score: 1.003\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "a4c193c5228e40b4aee8c5a4f71884d3",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/9 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " QuantReg sensitivity score: 1.006\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "7b6fab2c4748457f95800503612a70c6",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/9 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " Matching sensitivity score: 1.001\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "89ea4a90ceb54a6098c5f46fdb57ca4d",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/9 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " MDN sensitivity score: 1.006\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.plotly.v1+json": {
+ "config": {
+ "plotlyServerURL": "https://plot.ly"
+ },
+ "data": [
+ {
+ "line": {
+ "color": "#88CCEE",
+ "width": 2
+ },
+ "marker": {
+ "color": "#88CCEE",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "QRF (baseline=1349.1703)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8
+ ],
+ "y": [
+ 1349.170298902762,
+ 1338.5774423004161,
+ 1340.6648202799847,
+ 1344.2901425148189,
+ 1388.673739437508,
+ 1410.5695346197504,
+ 1413.1374170765544,
+ 1447.8591701349476,
+ 1296.766785218817
+ ]
+ },
+ {
+ "line": {
+ "color": "#CC6677",
+ "width": 2
+ },
+ "marker": {
+ "color": "#CC6677",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "OLS (baseline=1944.3781)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8
+ ],
+ "y": [
+ 1944.3780501930905,
+ 1946.2777404678332,
+ 1950.0966315694538,
+ 1949.746157368029,
+ 1949.73516300043,
+ 1950.6366114918412,
+ 1951.1446767165871,
+ 1952.2563210878816,
+ 1952.7410768309833
+ ]
+ },
+ {
+ "line": {
+ "color": "#DDCC77",
+ "width": 2
+ },
+ "marker": {
+ "color": "#DDCC77",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "QuantReg (baseline=1028.4152)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8
+ ],
+ "y": [
+ 1028.4151968891467,
+ 1031.2314284887393,
+ 1035.1585047313433,
+ 1035.5391673039437,
+ 1035.7129814449897,
+ 1036.1345577111144,
+ 1036.287670986901,
+ 1037.121203579935,
+ 1038.0987340299885
+ ]
+ },
+ {
+ "line": {
+ "color": "#117733",
+ "width": 2
+ },
+ "marker": {
+ "color": "#117733",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "Matching (baseline=1942.0466)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8
+ ],
+ "y": [
+ 1942.046601084626,
+ 1860.1268760247192,
+ 1946.1806028502965,
+ 1913.2927859755328,
+ 1875.07951822424,
+ 1965.5614831630724,
+ 1963.9431201917014,
+ 2026.2640938327659,
+ 2075.2908311262454
+ ]
+ },
+ {
+ "line": {
+ "color": "#332288",
+ "width": 2
+ },
+ "marker": {
+ "color": "#332288",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "MDN (baseline=1480.7821)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8
+ ],
+ "y": [
+ 1480.7821241797008,
+ 1595.6940868019838,
+ 1150.7572682511513,
+ 1530.8876713279699,
+ 1689.688169321423,
+ 1673.4469904669077,
+ 1515.8517578336116,
+ 1459.1484442286085,
+ 1125.472434504482
+ ]
+ }
+ ],
+ "layout": {
+ "height": 600,
+ "hovermode": "x unified",
+ "legend": {
+ "x": 0.01,
+ "xanchor": "left",
+ "y": 0.99,
+ "yanchor": "top"
+ },
+ "paper_bgcolor": "#FAFAFA",
+ "plot_bgcolor": "#FAFAFA",
+ "template": {
+ "data": {
+ "bar": [
+ {
+ "error_x": {
+ "color": "#2a3f5f"
+ },
+ "error_y": {
+ "color": "#2a3f5f"
+ },
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "bar"
+ }
+ ],
+ "barpolar": [
+ {
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "barpolar"
+ }
+ ],
+ "carpet": [
+ {
+ "aaxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "baxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "type": "carpet"
+ }
+ ],
+ "choropleth": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "choropleth"
+ }
+ ],
+ "contour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "contour"
+ }
+ ],
+ "contourcarpet": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "contourcarpet"
+ }
+ ],
+ "heatmap": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmap"
+ }
+ ],
+ "heatmapgl": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmapgl"
+ }
+ ],
+ "histogram": [
+ {
+ "marker": {
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "histogram"
+ }
+ ],
+ "histogram2d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2d"
+ }
+ ],
+ "histogram2dcontour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2dcontour"
+ }
+ ],
+ "mesh3d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "mesh3d"
+ }
+ ],
+ "parcoords": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "parcoords"
+ }
+ ],
+ "pie": [
+ {
+ "automargin": true,
+ "type": "pie"
+ }
+ ],
+ "scatter": [
+ {
+ "fillpattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ },
+ "type": "scatter"
+ }
+ ],
+ "scatter3d": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter3d"
+ }
+ ],
+ "scattercarpet": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattercarpet"
+ }
+ ],
+ "scattergeo": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergeo"
+ }
+ ],
+ "scattergl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergl"
+ }
+ ],
+ "scattermapbox": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermapbox"
+ }
+ ],
+ "scatterpolar": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolar"
+ }
+ ],
+ "scatterpolargl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolargl"
+ }
+ ],
+ "scatterternary": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterternary"
+ }
+ ],
+ "surface": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "surface"
+ }
+ ],
+ "table": [
+ {
+ "cells": {
+ "fill": {
+ "color": "#EBF0F8"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "header": {
+ "fill": {
+ "color": "#C8D4E3"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "type": "table"
+ }
+ ]
+ },
+ "layout": {
+ "annotationdefaults": {
+ "arrowcolor": "#2a3f5f",
+ "arrowhead": 0,
+ "arrowwidth": 1
+ },
+ "autotypenumbers": "strict",
+ "coloraxis": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "colorscale": {
+ "diverging": [
+ [
+ 0,
+ "#8e0152"
+ ],
+ [
+ 0.1,
+ "#c51b7d"
+ ],
+ [
+ 0.2,
+ "#de77ae"
+ ],
+ [
+ 0.3,
+ "#f1b6da"
+ ],
+ [
+ 0.4,
+ "#fde0ef"
+ ],
+ [
+ 0.5,
+ "#f7f7f7"
+ ],
+ [
+ 0.6,
+ "#e6f5d0"
+ ],
+ [
+ 0.7,
+ "#b8e186"
+ ],
+ [
+ 0.8,
+ "#7fbc41"
+ ],
+ [
+ 0.9,
+ "#4d9221"
+ ],
+ [
+ 1,
+ "#276419"
+ ]
+ ],
+ "sequential": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "sequentialminus": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ]
+ },
+ "colorway": [
+ "#636efa",
+ "#EF553B",
+ "#00cc96",
+ "#ab63fa",
+ "#FFA15A",
+ "#19d3f3",
+ "#FF6692",
+ "#B6E880",
+ "#FF97FF",
+ "#FECB52"
+ ],
+ "font": {
+ "color": "#2a3f5f"
+ },
+ "geo": {
+ "bgcolor": "white",
+ "lakecolor": "white",
+ "landcolor": "#E5ECF6",
+ "showlakes": true,
+ "showland": true,
+ "subunitcolor": "white"
+ },
+ "hoverlabel": {
+ "align": "left"
+ },
+ "hovermode": "closest",
+ "mapbox": {
+ "style": "light"
+ },
+ "paper_bgcolor": "white",
+ "plot_bgcolor": "#E5ECF6",
+ "polar": {
+ "angularaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "radialaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "scene": {
+ "xaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "yaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "zaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ }
+ },
+ "shapedefaults": {
+ "line": {
+ "color": "#2a3f5f"
+ }
+ },
+ "ternary": {
+ "aaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "baxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "caxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "title": {
+ "x": 0.05
+ },
+ "xaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ },
+ "yaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ }
+ }
+ },
+ "title": {
+ "text": "CIA Sensitivity: News"
+ },
+ "width": 750,
+ "xaxis": {
+ "gridcolor": "#E5E5E5",
+ "linecolor": "#CCCCCC",
+ "showgrid": false,
+ "showline": true,
+ "title": {
+ "text": "Number of Predictors Removed"
+ }
+ },
+ "yaxis": {
+ "gridcolor": "#E5E5E5",
+ "linecolor": "#CCCCCC",
+ "showgrid": true,
+ "showline": true,
+ "title": {
+ "text": "Quantile Loss"
+ }
+ }
+ }
+ }
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Sensitivity Scores (higher = more sensitive to predictor removal):\n",
+ " QRF: 1.020\n",
+ " OLS: 1.003\n",
+ " QuantReg: 1.006\n",
+ " Matching: 1.001\n",
+ " MDN: 1.006\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"Running CIA sensitivity analysis...\")\n",
+ "news_cia_results = run_cia_analysis_for_dataset(\n",
+ " data=df_news,\n",
+ " predictors=news_predictors,\n",
+ " imputed_variables=[news_target],\n",
+ " ordering=\"mutual_info\",\n",
+ ")\n",
+ "\n",
+ "# Plot degradation curves\n",
+ "news_cia_fig = plot_cia_degradation_curves(\n",
+ " news_cia_results,\n",
+ " title=\"CIA Sensitivity: News\"\n",
+ ")\n",
+ "news_cia_fig.show()\n",
+ "\n",
+ "# Display sensitivity scores\n",
+ "print(\"\\nSensitivity Scores (higher = more sensitive to predictor removal):\")\n",
+ "for method, results in news_cia_results.items():\n",
+ " print(f\" {method}: {results['sensitivity_score']:.3f}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "## 5. Abalone Dataset\n",
+ "\n",
+ "Predicting the age of abalone from physical measurements."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Shape: (4177, 9)\n",
+ "\n",
+ "Column names:\n",
+ " 1. Sex\n",
+ " 2. Length\n",
+ " 3. Diameter\n",
+ " 4. Height\n",
+ " 5. Whole_weight\n",
+ " 6. Shucked_weight\n",
+ " 7. Viscera_weight\n",
+ " 8. Shell_weight\n",
+ " 9. Class_number_of_rings\n",
+ "\n",
+ "Data types:\n",
+ "Sex object\n",
+ "Length float64\n",
+ "Diameter float64\n",
+ "Height float64\n",
+ "Whole_weight float64\n",
+ "Shucked_weight float64\n",
+ "Viscera_weight float64\n",
+ "Shell_weight float64\n",
+ "Class_number_of_rings int64\n",
+ "dtype: object\n",
+ "\n",
+ "First few rows:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Sex | \n",
+ " Length | \n",
+ " Diameter | \n",
+ " Height | \n",
+ " Whole_weight | \n",
+ " Shucked_weight | \n",
+ " Viscera_weight | \n",
+ " Shell_weight | \n",
+ " Class_number_of_rings | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " M | \n",
+ " 0.455 | \n",
+ " 0.365 | \n",
+ " 0.095 | \n",
+ " 0.5140 | \n",
+ " 0.2245 | \n",
+ " 0.1010 | \n",
+ " 0.150 | \n",
+ " 15 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " M | \n",
+ " 0.350 | \n",
+ " 0.265 | \n",
+ " 0.090 | \n",
+ " 0.2255 | \n",
+ " 0.0995 | \n",
+ " 0.0485 | \n",
+ " 0.070 | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " F | \n",
+ " 0.530 | \n",
+ " 0.420 | \n",
+ " 0.135 | \n",
+ " 0.6770 | \n",
+ " 0.2565 | \n",
+ " 0.1415 | \n",
+ " 0.210 | \n",
+ " 9 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " M | \n",
+ " 0.440 | \n",
+ " 0.365 | \n",
+ " 0.125 | \n",
+ " 0.5160 | \n",
+ " 0.2155 | \n",
+ " 0.1140 | \n",
+ " 0.155 | \n",
+ " 10 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " I | \n",
+ " 0.330 | \n",
+ " 0.255 | \n",
+ " 0.080 | \n",
+ " 0.2050 | \n",
+ " 0.0895 | \n",
+ " 0.0395 | \n",
+ " 0.055 | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Sex Length Diameter Height Whole_weight Shucked_weight Viscera_weight \\\n",
+ "0 M 0.455 0.365 0.095 0.5140 0.2245 0.1010 \n",
+ "1 M 0.350 0.265 0.090 0.2255 0.0995 0.0485 \n",
+ "2 F 0.530 0.420 0.135 0.6770 0.2565 0.1415 \n",
+ "3 M 0.440 0.365 0.125 0.5160 0.2155 0.1140 \n",
+ "4 I 0.330 0.255 0.080 0.2050 0.0895 0.0395 \n",
+ "\n",
+ " Shell_weight Class_number_of_rings \n",
+ "0 0.150 15 \n",
+ "1 0.070 7 \n",
+ "2 0.210 9 \n",
+ "3 0.155 10 \n",
+ "4 0.055 7 "
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_abalone = load_dataset(\"abalone\")\n",
+ "print(f\"\\nShape: {df_abalone.shape}\")\n",
+ "print(f\"\\nColumn names:\")\n",
+ "for i, col in enumerate(df_abalone.columns):\n",
+ " print(f\" {i+1}. {col}\")\n",
+ "print(f\"\\nData types:\")\n",
+ "print(df_abalone.dtypes)\n",
+ "print(f\"\\nFirst few rows:\")\n",
+ "df_abalone.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Target variable: Shell_weight\n",
+ "Predictors (8): ['Sex', 'Length', 'Diameter', 'Height', 'Whole_weight', 'Shucked_weight', 'Viscera_weight', 'Class_number_of_rings']\n"
+ ]
+ }
+ ],
+ "source": [
+ "abalone_target = \"Shell_weight\" # Target variable to impute\n",
+ "abalone_predictors = [col for col in df_abalone.columns if col != abalone_target]\n",
+ "\n",
+ "print(f\"Target variable: {abalone_target}\")\n",
+ "print(f\"Predictors ({len(abalone_predictors)}): {abalone_predictors}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Computing predictor correlations...\n",
+ "\n",
+ "--- Pearson Correlation Matrix ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Sex | \n",
+ " Length | \n",
+ " Diameter | \n",
+ " Height | \n",
+ " Whole_weight | \n",
+ " Shucked_weight | \n",
+ " Viscera_weight | \n",
+ " Class_number_of_rings | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Sex | \n",
+ " 1.000000 | \n",
+ " -0.036066 | \n",
+ " -0.038874 | \n",
+ " -0.042077 | \n",
+ " -0.021391 | \n",
+ " -0.001373 | \n",
+ " -0.032067 | \n",
+ " -0.034627 | \n",
+ "
\n",
+ " \n",
+ " | Length | \n",
+ " -0.036066 | \n",
+ " 1.000000 | \n",
+ " 0.986812 | \n",
+ " 0.827554 | \n",
+ " 0.925261 | \n",
+ " 0.897914 | \n",
+ " 0.903018 | \n",
+ " 0.556720 | \n",
+ "
\n",
+ " \n",
+ " | Diameter | \n",
+ " -0.038874 | \n",
+ " 0.986812 | \n",
+ " 1.000000 | \n",
+ " 0.833684 | \n",
+ " 0.925452 | \n",
+ " 0.893162 | \n",
+ " 0.899724 | \n",
+ " 0.574660 | \n",
+ "
\n",
+ " \n",
+ " | Height | \n",
+ " -0.042077 | \n",
+ " 0.827554 | \n",
+ " 0.833684 | \n",
+ " 1.000000 | \n",
+ " 0.819221 | \n",
+ " 0.774972 | \n",
+ " 0.798319 | \n",
+ " 0.557467 | \n",
+ "
\n",
+ " \n",
+ " | Whole_weight | \n",
+ " -0.021391 | \n",
+ " 0.925261 | \n",
+ " 0.925452 | \n",
+ " 0.819221 | \n",
+ " 1.000000 | \n",
+ " 0.969405 | \n",
+ " 0.966375 | \n",
+ " 0.540390 | \n",
+ "
\n",
+ " \n",
+ " | Shucked_weight | \n",
+ " -0.001373 | \n",
+ " 0.897914 | \n",
+ " 0.893162 | \n",
+ " 0.774972 | \n",
+ " 0.969405 | \n",
+ " 1.000000 | \n",
+ " 0.931961 | \n",
+ " 0.420884 | \n",
+ "
\n",
+ " \n",
+ " | Viscera_weight | \n",
+ " -0.032067 | \n",
+ " 0.903018 | \n",
+ " 0.899724 | \n",
+ " 0.798319 | \n",
+ " 0.966375 | \n",
+ " 0.931961 | \n",
+ " 1.000000 | \n",
+ " 0.503819 | \n",
+ "
\n",
+ " \n",
+ " | Class_number_of_rings | \n",
+ " -0.034627 | \n",
+ " 0.556720 | \n",
+ " 0.574660 | \n",
+ " 0.557467 | \n",
+ " 0.540390 | \n",
+ " 0.420884 | \n",
+ " 0.503819 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Sex Length Diameter Height Whole_weight \\\n",
+ "Sex 1.000000 -0.036066 -0.038874 -0.042077 -0.021391 \n",
+ "Length -0.036066 1.000000 0.986812 0.827554 0.925261 \n",
+ "Diameter -0.038874 0.986812 1.000000 0.833684 0.925452 \n",
+ "Height -0.042077 0.827554 0.833684 1.000000 0.819221 \n",
+ "Whole_weight -0.021391 0.925261 0.925452 0.819221 1.000000 \n",
+ "Shucked_weight -0.001373 0.897914 0.893162 0.774972 0.969405 \n",
+ "Viscera_weight -0.032067 0.903018 0.899724 0.798319 0.966375 \n",
+ "Class_number_of_rings -0.034627 0.556720 0.574660 0.557467 0.540390 \n",
+ "\n",
+ " Shucked_weight Viscera_weight Class_number_of_rings \n",
+ "Sex -0.001373 -0.032067 -0.034627 \n",
+ "Length 0.897914 0.903018 0.556720 \n",
+ "Diameter 0.893162 0.899724 0.574660 \n",
+ "Height 0.774972 0.798319 0.557467 \n",
+ "Whole_weight 0.969405 0.966375 0.540390 \n",
+ "Shucked_weight 1.000000 0.931961 0.420884 \n",
+ "Viscera_weight 0.931961 1.000000 0.503819 \n",
+ "Class_number_of_rings 0.420884 0.503819 1.000000 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Spearman Correlation Matrix ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Sex | \n",
+ " Length | \n",
+ " Diameter | \n",
+ " Height | \n",
+ " Whole_weight | \n",
+ " Shucked_weight | \n",
+ " Viscera_weight | \n",
+ " Class_number_of_rings | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Sex | \n",
+ " 1.000000 | \n",
+ " -0.021516 | \n",
+ " -0.022653 | \n",
+ " -0.029176 | \n",
+ " -0.013197 | \n",
+ " -0.000023 | \n",
+ " -0.021652 | \n",
+ " -0.020349 | \n",
+ "
\n",
+ " \n",
+ " | Length | \n",
+ " -0.021516 | \n",
+ " 1.000000 | \n",
+ " 0.983319 | \n",
+ " 0.888206 | \n",
+ " 0.972633 | \n",
+ " 0.956830 | \n",
+ " 0.952658 | \n",
+ " 0.604385 | \n",
+ "
\n",
+ " \n",
+ " | Diameter | \n",
+ " -0.022653 | \n",
+ " 0.983319 | \n",
+ " 1.000000 | \n",
+ " 0.895705 | \n",
+ " 0.971324 | \n",
+ " 0.950472 | \n",
+ " 0.948391 | \n",
+ " 0.622895 | \n",
+ "
\n",
+ " \n",
+ " | Height | \n",
+ " -0.029176 | \n",
+ " 0.888206 | \n",
+ " 0.895705 | \n",
+ " 1.000000 | \n",
+ " 0.915985 | \n",
+ " 0.874196 | \n",
+ " 0.900587 | \n",
+ " 0.657716 | \n",
+ "
\n",
+ " \n",
+ " | Whole_weight | \n",
+ " -0.013197 | \n",
+ " 0.972633 | \n",
+ " 0.971324 | \n",
+ " 0.915985 | \n",
+ " 1.000000 | \n",
+ " 0.977060 | \n",
+ " 0.975252 | \n",
+ " 0.630832 | \n",
+ "
\n",
+ " \n",
+ " | Shucked_weight | \n",
+ " -0.000023 | \n",
+ " 0.956830 | \n",
+ " 0.950472 | \n",
+ " 0.874196 | \n",
+ " 0.977060 | \n",
+ " 1.000000 | \n",
+ " 0.947635 | \n",
+ " 0.539420 | \n",
+ "
\n",
+ " \n",
+ " | Viscera_weight | \n",
+ " -0.021652 | \n",
+ " 0.952658 | \n",
+ " 0.948391 | \n",
+ " 0.900587 | \n",
+ " 0.975252 | \n",
+ " 0.947635 | \n",
+ " 1.000000 | \n",
+ " 0.614344 | \n",
+ "
\n",
+ " \n",
+ " | Class_number_of_rings | \n",
+ " -0.020349 | \n",
+ " 0.604385 | \n",
+ " 0.622895 | \n",
+ " 0.657716 | \n",
+ " 0.630832 | \n",
+ " 0.539420 | \n",
+ " 0.614344 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Sex Length Diameter Height Whole_weight \\\n",
+ "Sex 1.000000 -0.021516 -0.022653 -0.029176 -0.013197 \n",
+ "Length -0.021516 1.000000 0.983319 0.888206 0.972633 \n",
+ "Diameter -0.022653 0.983319 1.000000 0.895705 0.971324 \n",
+ "Height -0.029176 0.888206 0.895705 1.000000 0.915985 \n",
+ "Whole_weight -0.013197 0.972633 0.971324 0.915985 1.000000 \n",
+ "Shucked_weight -0.000023 0.956830 0.950472 0.874196 0.977060 \n",
+ "Viscera_weight -0.021652 0.952658 0.948391 0.900587 0.975252 \n",
+ "Class_number_of_rings -0.020349 0.604385 0.622895 0.657716 0.630832 \n",
+ "\n",
+ " Shucked_weight Viscera_weight Class_number_of_rings \n",
+ "Sex -0.000023 -0.021652 -0.020349 \n",
+ "Length 0.956830 0.952658 0.604385 \n",
+ "Diameter 0.950472 0.948391 0.622895 \n",
+ "Height 0.874196 0.900587 0.657716 \n",
+ "Whole_weight 0.977060 0.975252 0.630832 \n",
+ "Shucked_weight 1.000000 0.947635 0.539420 \n",
+ "Viscera_weight 0.947635 1.000000 0.614344 \n",
+ "Class_number_of_rings 0.539420 0.614344 1.000000 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Mutual Information Matrix ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Sex | \n",
+ " Length | \n",
+ " Diameter | \n",
+ " Height | \n",
+ " Whole_weight | \n",
+ " Shucked_weight | \n",
+ " Viscera_weight | \n",
+ " Class_number_of_rings | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Sex | \n",
+ " 1.000000 | \n",
+ " 0.128015 | \n",
+ " 0.118749 | \n",
+ " 0.125884 | \n",
+ " 0.127697 | \n",
+ " 0.125722 | \n",
+ " 0.130254 | \n",
+ " 0.093153 | \n",
+ "
\n",
+ " \n",
+ " | Length | \n",
+ " 0.128015 | \n",
+ " 1.000000 | \n",
+ " 0.300949 | \n",
+ " 0.183178 | \n",
+ " 0.256054 | \n",
+ " 0.217821 | \n",
+ " 0.204559 | \n",
+ " 0.105884 | \n",
+ "
\n",
+ " \n",
+ " | Diameter | \n",
+ " 0.118749 | \n",
+ " 0.300949 | \n",
+ " 1.000000 | \n",
+ " 0.188396 | \n",
+ " 0.271122 | \n",
+ " 0.216334 | \n",
+ " 0.206302 | \n",
+ " 0.112585 | \n",
+ "
\n",
+ " \n",
+ " | Height | \n",
+ " 0.125884 | \n",
+ " 0.183178 | \n",
+ " 0.188396 | \n",
+ " 1.000000 | \n",
+ " 0.207010 | \n",
+ " 0.171274 | \n",
+ " 0.190995 | \n",
+ " 0.101486 | \n",
+ "
\n",
+ " \n",
+ " | Whole_weight | \n",
+ " 0.127697 | \n",
+ " 0.256054 | \n",
+ " 0.271122 | \n",
+ " 0.207010 | \n",
+ " 1.000000 | \n",
+ " 0.173732 | \n",
+ " 0.174160 | \n",
+ " 0.108976 | \n",
+ "
\n",
+ " \n",
+ " | Shucked_weight | \n",
+ " 0.125722 | \n",
+ " 0.217821 | \n",
+ " 0.216334 | \n",
+ " 0.171274 | \n",
+ " 0.173732 | \n",
+ " 1.000000 | \n",
+ " 0.134141 | \n",
+ " 0.091902 | \n",
+ "
\n",
+ " \n",
+ " | Viscera_weight | \n",
+ " 0.130254 | \n",
+ " 0.204559 | \n",
+ " 0.206302 | \n",
+ " 0.190995 | \n",
+ " 0.174160 | \n",
+ " 0.134141 | \n",
+ " 1.000000 | \n",
+ " 0.105824 | \n",
+ "
\n",
+ " \n",
+ " | Class_number_of_rings | \n",
+ " 0.093153 | \n",
+ " 0.105884 | \n",
+ " 0.112585 | \n",
+ " 0.101486 | \n",
+ " 0.108976 | \n",
+ " 0.091902 | \n",
+ " 0.105824 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Sex Length Diameter Height Whole_weight \\\n",
+ "Sex 1.000000 0.128015 0.118749 0.125884 0.127697 \n",
+ "Length 0.128015 1.000000 0.300949 0.183178 0.256054 \n",
+ "Diameter 0.118749 0.300949 1.000000 0.188396 0.271122 \n",
+ "Height 0.125884 0.183178 0.188396 1.000000 0.207010 \n",
+ "Whole_weight 0.127697 0.256054 0.271122 0.207010 1.000000 \n",
+ "Shucked_weight 0.125722 0.217821 0.216334 0.171274 0.173732 \n",
+ "Viscera_weight 0.130254 0.204559 0.206302 0.190995 0.174160 \n",
+ "Class_number_of_rings 0.093153 0.105884 0.112585 0.101486 0.108976 \n",
+ "\n",
+ " Shucked_weight Viscera_weight Class_number_of_rings \n",
+ "Sex 0.125722 0.130254 0.093153 \n",
+ "Length 0.217821 0.204559 0.105884 \n",
+ "Diameter 0.216334 0.206302 0.112585 \n",
+ "Height 0.171274 0.190995 0.101486 \n",
+ "Whole_weight 0.173732 0.174160 0.108976 \n",
+ "Shucked_weight 1.000000 0.134141 0.091902 \n",
+ "Viscera_weight 0.134141 1.000000 0.105824 \n",
+ "Class_number_of_rings 0.091902 0.105824 1.000000 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Predictor-Target Mutual Information ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Shell_weight | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Whole_weight | \n",
+ " 0.186427 | \n",
+ "
\n",
+ " \n",
+ " | Diameter | \n",
+ " 0.167913 | \n",
+ "
\n",
+ " \n",
+ " | Length | \n",
+ " 0.159534 | \n",
+ "
\n",
+ " \n",
+ " | Viscera_weight | \n",
+ " 0.141901 | \n",
+ "
\n",
+ " \n",
+ " | Shucked_weight | \n",
+ " 0.127999 | \n",
+ "
\n",
+ " \n",
+ " | Height | \n",
+ " 0.118932 | \n",
+ "
\n",
+ " \n",
+ " | Class_number_of_rings | \n",
+ " 0.052894 | \n",
+ "
\n",
+ " \n",
+ " | Sex | \n",
+ " 0.024884 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Shell_weight\n",
+ "Whole_weight 0.186427\n",
+ "Diameter 0.167913\n",
+ "Length 0.159534\n",
+ "Viscera_weight 0.141901\n",
+ "Shucked_weight 0.127999\n",
+ "Height 0.118932\n",
+ "Class_number_of_rings 0.052894\n",
+ "Sex 0.024884"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "print(\"Computing predictor correlations...\")\n",
+ "abalone_correlations = compute_predictor_correlations(\n",
+ " data=df_abalone,\n",
+ " predictors=abalone_predictors,\n",
+ " imputed_variables=[abalone_target],\n",
+ " method=\"all\"\n",
+ ")\n",
+ "\n",
+ "print(\"\\n--- Pearson Correlation Matrix ---\")\n",
+ "display(abalone_correlations[\"pearson\"])\n",
+ "\n",
+ "print(\"\\n--- Spearman Correlation Matrix ---\")\n",
+ "display(abalone_correlations[\"spearman\"])\n",
+ "\n",
+ "print(\"\\n--- Mutual Information Matrix ---\")\n",
+ "display(abalone_correlations[\"mutual_info\"])\n",
+ "\n",
+ "print(\"\\n--- Predictor-Target Mutual Information ---\")\n",
+ "display(abalone_correlations[\"predictor_target_mi\"].sort_values(\n",
+ " by=abalone_target, ascending=False\n",
+ "))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Running leave-one-out analysis...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "d1f96f060e724322a7cea8f37c286482",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Leave-one-out analysis: 0%| | 0/8 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Leave-One-Out Results ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " predictor_removed | \n",
+ " avg_quantile_loss | \n",
+ " avg_log_loss | \n",
+ " loss_increase | \n",
+ " relative_impact | \n",
+ " baseline_quantile_loss | \n",
+ " baseline_log_loss | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 4 | \n",
+ " Whole_weight | \n",
+ " 0.011441 | \n",
+ " 0 | \n",
+ " 0.002679 | \n",
+ " 30.572962 | \n",
+ " 0.008762 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " Shucked_weight | \n",
+ " 0.010085 | \n",
+ " 0 | \n",
+ " 0.001322 | \n",
+ " 15.091441 | \n",
+ " 0.008762 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " Viscera_weight | \n",
+ " 0.009000 | \n",
+ " 0 | \n",
+ " 0.000237 | \n",
+ " 2.707903 | \n",
+ " 0.008762 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Height | \n",
+ " 0.008880 | \n",
+ " 0 | \n",
+ " 0.000118 | \n",
+ " 1.342525 | \n",
+ " 0.008762 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Diameter | \n",
+ " 0.008771 | \n",
+ " 0 | \n",
+ " 0.000009 | \n",
+ " 0.102489 | \n",
+ " 0.008762 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 0 | \n",
+ " Sex | \n",
+ " 0.008755 | \n",
+ " 0 | \n",
+ " -0.000007 | \n",
+ " -0.082935 | \n",
+ " 0.008762 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Length | \n",
+ " 0.008714 | \n",
+ " 0 | \n",
+ " -0.000048 | \n",
+ " -0.548617 | \n",
+ " 0.008762 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " Class_number_of_rings | \n",
+ " 0.008650 | \n",
+ " 0 | \n",
+ " -0.000112 | \n",
+ " -1.281598 | \n",
+ " 0.008762 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " predictor_removed avg_quantile_loss avg_log_loss loss_increase \\\n",
+ "4 Whole_weight 0.011441 0 0.002679 \n",
+ "5 Shucked_weight 0.010085 0 0.001322 \n",
+ "6 Viscera_weight 0.009000 0 0.000237 \n",
+ "3 Height 0.008880 0 0.000118 \n",
+ "2 Diameter 0.008771 0 0.000009 \n",
+ "0 Sex 0.008755 0 -0.000007 \n",
+ "1 Length 0.008714 0 -0.000048 \n",
+ "7 Class_number_of_rings 0.008650 0 -0.000112 \n",
+ "\n",
+ " relative_impact baseline_quantile_loss baseline_log_loss \n",
+ "4 30.572962 0.008762 0 \n",
+ "5 15.091441 0.008762 0 \n",
+ "6 2.707903 0.008762 0 \n",
+ "3 1.342525 0.008762 0 \n",
+ "2 0.102489 0.008762 0 \n",
+ "0 -0.082935 0.008762 0 \n",
+ "1 -0.548617 0.008762 0 \n",
+ "7 -1.281598 0.008762 0 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "print(\"Running leave-one-out analysis...\")\n",
+ "abalone_loo = leave_one_out_analysis(\n",
+ " data=df_abalone,\n",
+ " predictors=abalone_predictors,\n",
+ " imputed_variables=[abalone_target],\n",
+ " model_class=QRF,\n",
+ " train_size=0.6,\n",
+ " n_jobs=1\n",
+ ")\n",
+ "\n",
+ "print(\"\\n--- Leave-One-Out Results ---\")\n",
+ "display(abalone_loo)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Creating 60/40 donor/receiver split...\n",
+ "Donor size: 2506\n",
+ "Receiver size: 1671\n",
+ "\n",
+ "Running autoimpute...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "077feb4af4a6419da4569e772accd507",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "AutoImputation progress: 0%| | 0/5 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "f1f4a5233f8e49c3ba536571372bc99b",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Evaluating models: 0%| | 0/5 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
+ "[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 8.8s remaining: 13.1s\n",
+ "[Parallel(n_jobs=-1)]: Done 3 out of 5 | elapsed: 8.8s remaining: 5.8s\n",
+ "[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 8.8s finished\n",
+ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
+ "[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 0.2s remaining: 0.3s\n",
+ "[Parallel(n_jobs=-1)]: Done 3 out of 5 | elapsed: 0.2s remaining: 0.2s\n",
+ "[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 0.2s finished\n",
+ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
+ "[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 0.5s remaining: 0.8s\n",
+ "[Parallel(n_jobs=-1)]: Done 3 out of 5 | elapsed: 0.6s remaining: 0.4s\n",
+ "[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 0.6s finished\n",
+ "[Parallel(n_jobs=1)]: Done 1 tasks | elapsed: 3.2s\n",
+ "[Parallel(n_jobs=1)]: Done 4 tasks | elapsed: 11.7s\n",
+ "[Parallel(n_jobs=1)]: Done 5 out of 5 | elapsed: 14.4s finished\n",
+ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
+ "[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 2.1s remaining: 3.2s\n",
+ "[Parallel(n_jobs=-1)]: Done 3 out of 5 | elapsed: 2.1s remaining: 1.4s\n",
+ "[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 2.2s finished\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Autoimpute CV Results ---\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.plotly.v1+json": {
+ "config": {
+ "plotlyServerURL": "https://plot.ly"
+ },
+ "data": [
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 0.00024326965613388657,
+ 0.00027589472404169373,
+ 0.00048719321481228246,
+ 0.0001636908274257018,
+ 0.00021311962585539253,
+ 0.00047804171632852575,
+ 0.0006962692894726471,
+ 0.00029024551341653494,
+ 0.00029087801345889877,
+ 0.00045216813532945134,
+ 0.00020430631948203218,
+ 0.0006033058929973939,
+ 0.00040279946387063235,
+ 0.00045993561342650747,
+ 0.0002963424751161734,
+ 0.0003998795374206044,
+ 0.00015180504092043815,
+ 0.00023167787898070507,
+ 0.0003883306558155267
+ ]
+ },
+ "hovertemplate": "Method=QRF
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "QRF",
+ "marker": {
+ "color": "#88CCEE",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "QRF",
+ "offsetgroup": "QRF",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 0.003171187505467154,
+ 0.005130387233501125,
+ 0.0070908502119267445,
+ 0.008298522317913974,
+ 0.00977020928262996,
+ 0.010720453793608002,
+ 0.011885963511224562,
+ 0.012002792423121883,
+ 0.012249910279043508,
+ 0.012238157748248522,
+ 0.012173357885821981,
+ 0.011792550357452424,
+ 0.011332565884167919,
+ 0.010413367488131308,
+ 0.009396204403941122,
+ 0.008270376378716669,
+ 0.007089934215234869,
+ 0.005423739532886418,
+ 0.0036155805719238826
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 0.000370609330897358,
+ 0.0003652745273775171,
+ 0.0003542015967681174,
+ 0.0003581402999814908,
+ 0.0003486765666195462,
+ 0.0003441475733918232,
+ 0.0003450233024532522,
+ 0.00036724649880935217,
+ 0.00039053815097184126,
+ 0.00041447525982789247,
+ 0.00045442297546678445,
+ 0.0004970591309255817,
+ 0.0005384563278486812,
+ 0.0005448871576265109,
+ 0.0005371124042098624,
+ 0.0005085526161979347,
+ 0.0004891847358885552,
+ 0.00047062940171582203,
+ 0.00045756947185525556
+ ]
+ },
+ "hovertemplate": "Method=OLS
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "OLS",
+ "marker": {
+ "color": "#CC6677",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "OLS",
+ "offsetgroup": "OLS",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 0.003640431971412939,
+ 0.0054201672745586415,
+ 0.006631847486593419,
+ 0.00746375357970813,
+ 0.007996822864203368,
+ 0.008308100389038897,
+ 0.008445008917186806,
+ 0.008491620640887173,
+ 0.00851848761584949,
+ 0.008581834515710474,
+ 0.008684830402663123,
+ 0.008760733491948555,
+ 0.00873043495812654,
+ 0.008578226744835217,
+ 0.008254303974197876,
+ 0.0076962383272222035,
+ 0.006860191812274187,
+ 0.005653547347567274,
+ 0.0038813771491098977
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 0.0003002957405243558,
+ 0.0003916647076410419,
+ 0.0004575980596868312,
+ 0.00048010456147152734,
+ 0.0004743316771156946,
+ 0.00046776978159706847,
+ 0.00046703103575669147,
+ 0.0004901431426791614,
+ 0.0005164720063186571,
+ 0.000565445067077025,
+ 0.0005982200297485073,
+ 0.0006221877855910843,
+ 0.0006330517928663067,
+ 0.00064919727119191,
+ 0.0006620420285619062,
+ 0.0006772332349061551,
+ 0.0006710739189733639,
+ 0.0006771438751547985,
+ 0.0006343251427314907
+ ]
+ },
+ "hovertemplate": "Method=QuantReg
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "QuantReg",
+ "marker": {
+ "color": "#DDCC77",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "QuantReg",
+ "offsetgroup": "QuantReg",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 0.0025812372039487864,
+ 0.00415203322551073,
+ 0.00536652434879917,
+ 0.006295003165407054,
+ 0.0069760216580269764,
+ 0.007467090953206039,
+ 0.007856627683712415,
+ 0.008124981770825527,
+ 0.008272805362843293,
+ 0.008280681178693978,
+ 0.008220785772956412,
+ 0.008096860802368306,
+ 0.007803099764698377,
+ 0.00739903476423837,
+ 0.0068958004097455575,
+ 0.0062701701281025505,
+ 0.005462830005979865,
+ 0.0044332588930873825,
+ 0.003138288530458782
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 0.0020365104336603802,
+ 0.0019380406463349053,
+ 0.0018407949105962973,
+ 0.0017449778836709257,
+ 0.0016508383572344301,
+ 0.0015586803086344201,
+ 0.0014688767419103995,
+ 0.0013818867598028154,
+ 0.0012982760517331199,
+ 0.0012187403134718854,
+ 0.0011441296921467239,
+ 0.0010754697154333852,
+ 0.001013969930900011,
+ 0.0009610059772534084,
+ 0.0009180563770578484,
+ 0.0008865777406346185,
+ 0.0008678192349293623,
+ 0.0008626111026588879,
+ 0.0008711963957339234
+ ]
+ },
+ "hovertemplate": "Method=Matching
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "Matching",
+ "marker": {
+ "color": "#117733",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "Matching",
+ "offsetgroup": "Matching",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 0.014285100178129794,
+ 0.014380612201891041,
+ 0.014476124225652282,
+ 0.014571636249413525,
+ 0.014667148273174765,
+ 0.01476266029693601,
+ 0.014858172320697253,
+ 0.014953684344458493,
+ 0.015049196368219737,
+ 0.015144708391980977,
+ 0.015240220415742222,
+ 0.015335732439503463,
+ 0.015431244463264706,
+ 0.015526756487025947,
+ 0.015622268510787189,
+ 0.015717780534548434,
+ 0.015813292558309676,
+ 0.015908804582070918,
+ 0.01600431660583216
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 0.004933235896485864,
+ 0.0061496603531529725,
+ 0.007025759919520332,
+ 0.007787472230557316,
+ 0.008366206347179553,
+ 0.008728138651863643,
+ 0.00893846297741526,
+ 0.008968634550059822,
+ 0.008847267828909986,
+ 0.008667859487156317,
+ 0.008372227090893001,
+ 0.007942799639520893,
+ 0.007393834191791668,
+ 0.006813615496239677,
+ 0.006038886484706126,
+ 0.005016465552396074,
+ 0.003888893897177334,
+ 0.002629321426474522,
+ 0.0016051762364270791
+ ]
+ },
+ "hovertemplate": "Method=MDN
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "MDN",
+ "marker": {
+ "color": "#332288",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "MDN",
+ "offsetgroup": "MDN",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 0.012276574516477503,
+ 0.018432648819518734,
+ 0.02317406481400918,
+ 0.02672731973268183,
+ 0.029399918741729713,
+ 0.03134876719068555,
+ 0.03275305750018242,
+ 0.03354567398997228,
+ 0.03382556654362355,
+ 0.033535913691053534,
+ 0.03283750432650534,
+ 0.031678071755279626,
+ 0.030068237694899587,
+ 0.028108862716910155,
+ 0.02571725838163568,
+ 0.02272632121067506,
+ 0.01919042724359827,
+ 0.014768039973995072,
+ 0.009304198960612093
+ ],
+ "yaxis": "y"
+ }
+ ],
+ "layout": {
+ "barmode": "group",
+ "height": 600,
+ "legend": {
+ "title": {
+ "text": "Method"
+ },
+ "tracegroupgap": 0
+ },
+ "paper_bgcolor": "#FAFAFA",
+ "plot_bgcolor": "#FAFAFA",
+ "shapes": [
+ {
+ "line": {
+ "color": "#88CCEE",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "QRF Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 0.009056111106576949,
+ "y1": 0.009056111106576949
+ },
+ {
+ "line": {
+ "color": "#CC6677",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "OLS Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 0.0073998926033207475,
+ "y1": 0.0073998926033207475
+ },
+ {
+ "line": {
+ "color": "#DDCC77",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "QuantReg Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 0.006478586085400504,
+ "y1": 0.006478586085400504
+ },
+ {
+ "line": {
+ "color": "#117733",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "Matching Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 0.015144708391980977,
+ "y1": 0.015144708391980977
+ },
+ {
+ "line": {
+ "color": "#332288",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "MDN Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 0.02575886462126553,
+ "y1": 0.02575886462126553
+ }
+ ],
+ "template": {
+ "data": {
+ "bar": [
+ {
+ "error_x": {
+ "color": "#2a3f5f"
+ },
+ "error_y": {
+ "color": "#2a3f5f"
+ },
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "bar"
+ }
+ ],
+ "barpolar": [
+ {
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "barpolar"
+ }
+ ],
+ "carpet": [
+ {
+ "aaxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "baxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "type": "carpet"
+ }
+ ],
+ "choropleth": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "choropleth"
+ }
+ ],
+ "contour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "contour"
+ }
+ ],
+ "contourcarpet": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "contourcarpet"
+ }
+ ],
+ "heatmap": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmap"
+ }
+ ],
+ "heatmapgl": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmapgl"
+ }
+ ],
+ "histogram": [
+ {
+ "marker": {
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "histogram"
+ }
+ ],
+ "histogram2d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2d"
+ }
+ ],
+ "histogram2dcontour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2dcontour"
+ }
+ ],
+ "mesh3d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "mesh3d"
+ }
+ ],
+ "parcoords": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "parcoords"
+ }
+ ],
+ "pie": [
+ {
+ "automargin": true,
+ "type": "pie"
+ }
+ ],
+ "scatter": [
+ {
+ "fillpattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ },
+ "type": "scatter"
+ }
+ ],
+ "scatter3d": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter3d"
+ }
+ ],
+ "scattercarpet": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattercarpet"
+ }
+ ],
+ "scattergeo": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergeo"
+ }
+ ],
+ "scattergl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergl"
+ }
+ ],
+ "scattermapbox": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermapbox"
+ }
+ ],
+ "scatterpolar": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolar"
+ }
+ ],
+ "scatterpolargl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolargl"
+ }
+ ],
+ "scatterternary": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterternary"
+ }
+ ],
+ "surface": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "surface"
+ }
+ ],
+ "table": [
+ {
+ "cells": {
+ "fill": {
+ "color": "#EBF0F8"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "header": {
+ "fill": {
+ "color": "#C8D4E3"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "type": "table"
+ }
+ ]
+ },
+ "layout": {
+ "annotationdefaults": {
+ "arrowcolor": "#2a3f5f",
+ "arrowhead": 0,
+ "arrowwidth": 1
+ },
+ "autotypenumbers": "strict",
+ "coloraxis": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "colorscale": {
+ "diverging": [
+ [
+ 0,
+ "#8e0152"
+ ],
+ [
+ 0.1,
+ "#c51b7d"
+ ],
+ [
+ 0.2,
+ "#de77ae"
+ ],
+ [
+ 0.3,
+ "#f1b6da"
+ ],
+ [
+ 0.4,
+ "#fde0ef"
+ ],
+ [
+ 0.5,
+ "#f7f7f7"
+ ],
+ [
+ 0.6,
+ "#e6f5d0"
+ ],
+ [
+ 0.7,
+ "#b8e186"
+ ],
+ [
+ 0.8,
+ "#7fbc41"
+ ],
+ [
+ 0.9,
+ "#4d9221"
+ ],
+ [
+ 1,
+ "#276419"
+ ]
+ ],
+ "sequential": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "sequentialminus": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ]
+ },
+ "colorway": [
+ "#636efa",
+ "#EF553B",
+ "#00cc96",
+ "#ab63fa",
+ "#FFA15A",
+ "#19d3f3",
+ "#FF6692",
+ "#B6E880",
+ "#FF97FF",
+ "#FECB52"
+ ],
+ "font": {
+ "color": "#2a3f5f"
+ },
+ "geo": {
+ "bgcolor": "white",
+ "lakecolor": "white",
+ "landcolor": "#E5ECF6",
+ "showlakes": true,
+ "showland": true,
+ "subunitcolor": "white"
+ },
+ "hoverlabel": {
+ "align": "left"
+ },
+ "hovermode": "closest",
+ "mapbox": {
+ "style": "light"
+ },
+ "paper_bgcolor": "white",
+ "plot_bgcolor": "#E5ECF6",
+ "polar": {
+ "angularaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "radialaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "scene": {
+ "xaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "yaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "zaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ }
+ },
+ "shapedefaults": {
+ "line": {
+ "color": "#2a3f5f"
+ }
+ },
+ "ternary": {
+ "aaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "baxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "caxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "title": {
+ "x": 0.05
+ },
+ "xaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ },
+ "yaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ }
+ }
+ },
+ "title": {
+ "font": {
+ "size": 14
+ },
+ "text": "Abalone Dataset Benchmarking Results"
+ },
+ "width": 750,
+ "xaxis": {
+ "anchor": "y",
+ "domain": [
+ 0,
+ 1
+ ],
+ "gridcolor": "#E5E5E5",
+ "gridwidth": 1,
+ "linecolor": "#CCCCCC",
+ "showgrid": false,
+ "showline": true,
+ "title": {
+ "font": {
+ "size": 12
+ },
+ "text": "Quantiles"
+ },
+ "zeroline": false
+ },
+ "yaxis": {
+ "anchor": "x",
+ "domain": [
+ 0,
+ 1
+ ],
+ "gridcolor": "#E5E5E5",
+ "gridwidth": 1,
+ "linecolor": "#CCCCCC",
+ "showgrid": true,
+ "showline": true,
+ "title": {
+ "font": {
+ "size": 12
+ },
+ "text": "Quantile loss"
+ },
+ "zeroline": false
+ }
+ }
+ }
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "print(\"Creating 60/40 donor/receiver split...\")\n",
+ "abalone_donor, abalone_receiver = train_test_split(\n",
+ " df_abalone, train_size=0.6, random_state=42\n",
+ ")\n",
+ "\n",
+ "abalone_receiver_no_target = abalone_receiver.drop(columns=[abalone_target])\n",
+ "\n",
+ "print(f\"Donor size: {len(abalone_donor)}\")\n",
+ "print(f\"Receiver size: {len(abalone_receiver)}\")\n",
+ "\n",
+ "print(\"\\nRunning autoimpute...\")\n",
+ "abalone_result = autoimpute(\n",
+ " donor_data=abalone_donor,\n",
+ " receiver_data=abalone_receiver_no_target.copy(),\n",
+ " predictors=abalone_predictors,\n",
+ " imputed_variables=[abalone_target],\n",
+ " impute_all=True,\n",
+ " log_level=\"INFO\"\n",
+ ")\n",
+ "\n",
+ "print(\"\\n--- Autoimpute CV Results ---\")\n",
+ "comparison_viz = method_comparison_results(\n",
+ " data=abalone_result.cv_results,\n",
+ " metric=\"quantile_loss\",\n",
+ ")\n",
+ "fig = comparison_viz.plot(\n",
+ " title=\"Abalone Dataset Benchmarking Results\",\n",
+ " show_mean=True,\n",
+ ")\n",
+ "fig.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Distribution Comparison: Imputed vs Ground Truth (Abalone) ---\n",
+ "QuantReg: Wasserstein Distance = 0.0027\n",
+ "QRF: Wasserstein Distance = 0.0028\n",
+ "OLS: Wasserstein Distance = 0.0037\n",
+ "Matching: Wasserstein Distance = 0.0025\n",
+ "MDN: Wasserstein Distance = 0.0404\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Variable | \n",
+ " Metric | \n",
+ " Distance | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Shell_weight | \n",
+ " wasserstein_distance | \n",
+ " 0.002713 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Variable Metric Distance\n",
+ "0 Shell_weight wasserstein_distance 0.002713"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "print(\"\\n--- Distribution Comparison: Imputed vs Ground Truth (Abalone) ---\")\n",
+ "# Store Wasserstein distances for all methods\n",
+ "abalone_wasserstein = {}\n",
+ "\n",
+ "distribution_comparison_abalone = compare_distributions(\n",
+ " donor_data=abalone_receiver, # Ground truth test split\n",
+ " receiver_data=abalone_result.receiver_data, # Imputed values\n",
+ " imputed_variables=[abalone_target],\n",
+ ")\n",
+ "best_method_name = abalone_result.fitted_models[\"best_method\"].__class__.__name__ \n",
+ "best_method_name = best_method_name.replace(\"Results\", \"\")\n",
+ "abalone_wasserstein[best_method_name] = distribution_comparison_abalone[\n",
+ " distribution_comparison_abalone['Metric'] == 'wasserstein_distance'\n",
+ "]['Distance'].values[0]\n",
+ "print(f\"{best_method_name}: Wasserstein Distance = {abalone_wasserstein.get(best_method_name, 'N/A'):.4f}\")\n",
+ " \n",
+ "for method_name, imputations in abalone_result.imputations.items():\n",
+ " # Skip the 'best_method' (it's a duplicate)\n",
+ " if method_name == 'best_method': \n",
+ " continue\n",
+ " \n",
+ " # Create a copy of receiver data with this method's imputations \n",
+ " receiver_with_imputations = abalone_receiver_no_target.copy() \n",
+ " \n",
+ " # Handle both dict (quantile->DataFrame) and DataFrame formats \n",
+ " if isinstance(imputations, dict): \n",
+ " # Get median quantile (0.5) imputations \n",
+ " imp_df = imputations.get(0.5, list(imputations.values())[0]) \n",
+ " else: \n",
+ " imp_df = imputations\n",
+ " \n",
+ " # Add imputed values \n",
+ " for var in [abalone_target]:\n",
+ " if var in imp_df.columns: \n",
+ " receiver_with_imputations[var] = imp_df[var].values \n",
+ " \n",
+ " # Calculate distribution comparison \n",
+ " dist_comparison = compare_distributions( \n",
+ " donor_data=abalone_receiver, # Ground truth \n",
+ " receiver_data=receiver_with_imputations,\n",
+ " imputed_variables=[abalone_target], \n",
+ " )\n",
+ "\n",
+ " # Extract Wasserstein distance \n",
+ " wd = dist_comparison[dist_comparison['Metric'] == 'wasserstein_distance']['Distance'].values \n",
+ " abalone_wasserstein[method_name] = wd[0]\n",
+ " \n",
+ " print(f\"{method_name}: Wasserstein Distance = {abalone_wasserstein.get(method_name, 'N/A'):.4f}\")\n",
+ " \n",
+ "# Display full comparison for best method\n",
+ "display(distribution_comparison_abalone)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### CIA Sensitivity Analysis\n",
+ "\n",
+ "Measuring sensitivity to the Conditional Independence Assumption by progressively removing predictors."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Running CIA sensitivity analysis...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "b4b2625d9d3f48199e2a8917540755cb",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/8 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " QRF sensitivity score: 2.071\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "352b3daebeef4a27baa3badac85a323e",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/8 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " OLS sensitivity score: 2.067\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "9d63e8d35ae544859a427dbc601614d3",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/8 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " QuantReg sensitivity score: 2.115\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "3971d273c6fd43f3becb1fb68f181424",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/8 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " Matching sensitivity score: 1.886\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "631e36996e5048fcb851292dbbf5964c",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/8 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " MDN sensitivity score: 1.472\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.plotly.v1+json": {
+ "config": {
+ "plotlyServerURL": "https://plot.ly"
+ },
+ "data": [
+ {
+ "line": {
+ "color": "#88CCEE",
+ "width": 2
+ },
+ "marker": {
+ "color": "#88CCEE",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "QRF (baseline=0.0087)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7
+ ],
+ "y": [
+ 0.008693014354066986,
+ 0.011421351674641148,
+ 0.012103755980861244,
+ 0.013193911483253589,
+ 0.014684198564593302,
+ 0.018883708133971288,
+ 0.031053277511961724,
+ 0.040702093301435406
+ ]
+ },
+ {
+ "line": {
+ "color": "#CC6677",
+ "width": 2
+ },
+ "marker": {
+ "color": "#CC6677",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "OLS (baseline=0.0078)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7
+ ],
+ "y": [
+ 0.007839771157254703,
+ 0.011548659677199213,
+ 0.01175994861659598,
+ 0.011969652307556065,
+ 0.013155691406633488,
+ 0.01786605615100704,
+ 0.027496513271988255,
+ 0.03142477967305995
+ ]
+ },
+ {
+ "line": {
+ "color": "#DDCC77",
+ "width": 2
+ },
+ "marker": {
+ "color": "#DDCC77",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "QuantReg (baseline=0.0071)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7
+ ],
+ "y": [
+ 0.00710735783241504,
+ 0.010448574624384293,
+ 0.010626440020242458,
+ 0.01077682863240476,
+ 0.011957216167721843,
+ 0.016090966710173474,
+ 0.026312275154905133,
+ 0.030935669438922498
+ ]
+ },
+ {
+ "line": {
+ "color": "#117733",
+ "width": 2
+ },
+ "marker": {
+ "color": "#117733",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "Matching (baseline=0.0145)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7
+ ],
+ "y": [
+ 0.014497906698564595,
+ 0.01839712918660287,
+ 0.018835825358851675,
+ 0.01921471291866029,
+ 0.020236543062200955,
+ 0.028187499999999997,
+ 0.04815041866028708,
+ 0.06218062200956938
+ ]
+ },
+ {
+ "line": {
+ "color": "#332288",
+ "width": 2
+ },
+ "marker": {
+ "color": "#332288",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "MDN (baseline=0.0196)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7
+ ],
+ "y": [
+ 0.019642647046887837,
+ 0.017146109203536826,
+ 0.03258478932891066,
+ 0.03449703167624112,
+ 0.03477899730387567,
+ 0.02083813833329961,
+ 0.034876956218558935,
+ 0.03565663817672674
+ ]
+ }
+ ],
+ "layout": {
+ "height": 600,
+ "hovermode": "x unified",
+ "legend": {
+ "x": 0.01,
+ "xanchor": "left",
+ "y": 0.99,
+ "yanchor": "top"
+ },
+ "paper_bgcolor": "#FAFAFA",
+ "plot_bgcolor": "#FAFAFA",
+ "template": {
+ "data": {
+ "bar": [
+ {
+ "error_x": {
+ "color": "#2a3f5f"
+ },
+ "error_y": {
+ "color": "#2a3f5f"
+ },
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "bar"
+ }
+ ],
+ "barpolar": [
+ {
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "barpolar"
+ }
+ ],
+ "carpet": [
+ {
+ "aaxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "baxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "type": "carpet"
+ }
+ ],
+ "choropleth": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "choropleth"
+ }
+ ],
+ "contour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "contour"
+ }
+ ],
+ "contourcarpet": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "contourcarpet"
+ }
+ ],
+ "heatmap": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmap"
+ }
+ ],
+ "heatmapgl": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmapgl"
+ }
+ ],
+ "histogram": [
+ {
+ "marker": {
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "histogram"
+ }
+ ],
+ "histogram2d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2d"
+ }
+ ],
+ "histogram2dcontour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2dcontour"
+ }
+ ],
+ "mesh3d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "mesh3d"
+ }
+ ],
+ "parcoords": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "parcoords"
+ }
+ ],
+ "pie": [
+ {
+ "automargin": true,
+ "type": "pie"
+ }
+ ],
+ "scatter": [
+ {
+ "fillpattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ },
+ "type": "scatter"
+ }
+ ],
+ "scatter3d": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter3d"
+ }
+ ],
+ "scattercarpet": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattercarpet"
+ }
+ ],
+ "scattergeo": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergeo"
+ }
+ ],
+ "scattergl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergl"
+ }
+ ],
+ "scattermapbox": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermapbox"
+ }
+ ],
+ "scatterpolar": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolar"
+ }
+ ],
+ "scatterpolargl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolargl"
+ }
+ ],
+ "scatterternary": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterternary"
+ }
+ ],
+ "surface": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "surface"
+ }
+ ],
+ "table": [
+ {
+ "cells": {
+ "fill": {
+ "color": "#EBF0F8"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "header": {
+ "fill": {
+ "color": "#C8D4E3"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "type": "table"
+ }
+ ]
+ },
+ "layout": {
+ "annotationdefaults": {
+ "arrowcolor": "#2a3f5f",
+ "arrowhead": 0,
+ "arrowwidth": 1
+ },
+ "autotypenumbers": "strict",
+ "coloraxis": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "colorscale": {
+ "diverging": [
+ [
+ 0,
+ "#8e0152"
+ ],
+ [
+ 0.1,
+ "#c51b7d"
+ ],
+ [
+ 0.2,
+ "#de77ae"
+ ],
+ [
+ 0.3,
+ "#f1b6da"
+ ],
+ [
+ 0.4,
+ "#fde0ef"
+ ],
+ [
+ 0.5,
+ "#f7f7f7"
+ ],
+ [
+ 0.6,
+ "#e6f5d0"
+ ],
+ [
+ 0.7,
+ "#b8e186"
+ ],
+ [
+ 0.8,
+ "#7fbc41"
+ ],
+ [
+ 0.9,
+ "#4d9221"
+ ],
+ [
+ 1,
+ "#276419"
+ ]
+ ],
+ "sequential": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "sequentialminus": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ]
+ },
+ "colorway": [
+ "#636efa",
+ "#EF553B",
+ "#00cc96",
+ "#ab63fa",
+ "#FFA15A",
+ "#19d3f3",
+ "#FF6692",
+ "#B6E880",
+ "#FF97FF",
+ "#FECB52"
+ ],
+ "font": {
+ "color": "#2a3f5f"
+ },
+ "geo": {
+ "bgcolor": "white",
+ "lakecolor": "white",
+ "landcolor": "#E5ECF6",
+ "showlakes": true,
+ "showland": true,
+ "subunitcolor": "white"
+ },
+ "hoverlabel": {
+ "align": "left"
+ },
+ "hovermode": "closest",
+ "mapbox": {
+ "style": "light"
+ },
+ "paper_bgcolor": "white",
+ "plot_bgcolor": "#E5ECF6",
+ "polar": {
+ "angularaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "radialaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "scene": {
+ "xaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "yaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "zaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ }
+ },
+ "shapedefaults": {
+ "line": {
+ "color": "#2a3f5f"
+ }
+ },
+ "ternary": {
+ "aaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "baxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "caxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "title": {
+ "x": 0.05
+ },
+ "xaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ },
+ "yaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ }
+ }
+ },
+ "title": {
+ "text": "CIA Sensitivity: Abalone"
+ },
+ "width": 750,
+ "xaxis": {
+ "gridcolor": "#E5E5E5",
+ "linecolor": "#CCCCCC",
+ "showgrid": false,
+ "showline": true,
+ "title": {
+ "text": "Number of Predictors Removed"
+ }
+ },
+ "yaxis": {
+ "gridcolor": "#E5E5E5",
+ "linecolor": "#CCCCCC",
+ "showgrid": true,
+ "showline": true,
+ "title": {
+ "text": "Quantile Loss"
+ }
+ }
+ }
+ }
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Sensitivity Scores (higher = more sensitive to predictor removal):\n",
+ " QRF: 2.071\n",
+ " OLS: 2.067\n",
+ " QuantReg: 2.115\n",
+ " Matching: 1.886\n",
+ " MDN: 1.472\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"Running CIA sensitivity analysis...\")\n",
+ "abalone_cia_results = run_cia_analysis_for_dataset(\n",
+ " data=df_abalone,\n",
+ " predictors=abalone_predictors,\n",
+ " imputed_variables=[abalone_target],\n",
+ " ordering=\"mutual_info\",\n",
+ ")\n",
+ "\n",
+ "# Plot degradation curves\n",
+ "abalone_cia_fig = plot_cia_degradation_curves(\n",
+ " abalone_cia_results,\n",
+ " title=\"CIA Sensitivity: Abalone\"\n",
+ ")\n",
+ "abalone_cia_fig.show()\n",
+ "\n",
+ "# Display sensitivity scores\n",
+ "print(\"\\nSensitivity Scores (higher = more sensitive to predictor removal):\")\n",
+ "for method, results in abalone_cia_results.items():\n",
+ " print(f\" {method}: {results['sensitivity_score']:.3f}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "## 6. House Sales Dataset\n",
+ "\n",
+ "King County house sales data with property features and prices."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Shape: (21613, 22)\n",
+ "\n",
+ "Column names:\n",
+ " 1. bedrooms\n",
+ " 2. bathrooms\n",
+ " 3. sqft_living\n",
+ " 4. sqft_lot\n",
+ " 5. floors\n",
+ " 6. waterfront\n",
+ " 7. view\n",
+ " 8. condition\n",
+ " 9. grade\n",
+ " 10. sqft_above\n",
+ " 11. sqft_basement\n",
+ " 12. yr_built\n",
+ " 13. yr_renovated\n",
+ " 14. zipcode\n",
+ " 15. lat\n",
+ " 16. long\n",
+ " 17. sqft_living15\n",
+ " 18. sqft_lot15\n",
+ " 19. date_year\n",
+ " 20. date_month\n",
+ " 21. date_day\n",
+ " 22. price\n",
+ "\n",
+ "Data types:\n",
+ "bedrooms int64\n",
+ "bathrooms float64\n",
+ "sqft_living int64\n",
+ "sqft_lot int64\n",
+ "floors float64\n",
+ "waterfront int64\n",
+ "view int64\n",
+ "condition int64\n",
+ "grade int64\n",
+ "sqft_above int64\n",
+ "sqft_basement int64\n",
+ "yr_built int64\n",
+ "yr_renovated int64\n",
+ "zipcode int64\n",
+ "lat float64\n",
+ "long float64\n",
+ "sqft_living15 int64\n",
+ "sqft_lot15 int64\n",
+ "date_year int64\n",
+ "date_month int64\n",
+ "date_day int64\n",
+ "price float64\n",
+ "dtype: object\n",
+ "\n",
+ "First few rows:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " bedrooms | \n",
+ " bathrooms | \n",
+ " sqft_living | \n",
+ " sqft_lot | \n",
+ " floors | \n",
+ " waterfront | \n",
+ " view | \n",
+ " condition | \n",
+ " grade | \n",
+ " sqft_above | \n",
+ " sqft_basement | \n",
+ " yr_built | \n",
+ " yr_renovated | \n",
+ " zipcode | \n",
+ " lat | \n",
+ " long | \n",
+ " sqft_living15 | \n",
+ " sqft_lot15 | \n",
+ " date_year | \n",
+ " date_month | \n",
+ " date_day | \n",
+ " price | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 3 | \n",
+ " 1.00 | \n",
+ " 1180 | \n",
+ " 5650 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " 7 | \n",
+ " 1180 | \n",
+ " 0 | \n",
+ " 1955 | \n",
+ " 0 | \n",
+ " 98178 | \n",
+ " 47.5112 | \n",
+ " -122.257 | \n",
+ " 1340 | \n",
+ " 5650 | \n",
+ " 2014 | \n",
+ " 10 | \n",
+ " 13 | \n",
+ " 221900.0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 3 | \n",
+ " 2.25 | \n",
+ " 2570 | \n",
+ " 7242 | \n",
+ " 2.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " 7 | \n",
+ " 2170 | \n",
+ " 400 | \n",
+ " 1951 | \n",
+ " 1991 | \n",
+ " 98125 | \n",
+ " 47.7210 | \n",
+ " -122.319 | \n",
+ " 1690 | \n",
+ " 7639 | \n",
+ " 2014 | \n",
+ " 12 | \n",
+ " 9 | \n",
+ " 538000.0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 2 | \n",
+ " 1.00 | \n",
+ " 770 | \n",
+ " 10000 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " 6 | \n",
+ " 770 | \n",
+ " 0 | \n",
+ " 1933 | \n",
+ " 0 | \n",
+ " 98028 | \n",
+ " 47.7379 | \n",
+ " -122.233 | \n",
+ " 2720 | \n",
+ " 8062 | \n",
+ " 2015 | \n",
+ " 2 | \n",
+ " 25 | \n",
+ " 180000.0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " 3.00 | \n",
+ " 1960 | \n",
+ " 5000 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 5 | \n",
+ " 7 | \n",
+ " 1050 | \n",
+ " 910 | \n",
+ " 1965 | \n",
+ " 0 | \n",
+ " 98136 | \n",
+ " 47.5208 | \n",
+ " -122.393 | \n",
+ " 1360 | \n",
+ " 5000 | \n",
+ " 2014 | \n",
+ " 12 | \n",
+ " 9 | \n",
+ " 604000.0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 3 | \n",
+ " 2.00 | \n",
+ " 1680 | \n",
+ " 8080 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " 8 | \n",
+ " 1680 | \n",
+ " 0 | \n",
+ " 1987 | \n",
+ " 0 | \n",
+ " 98074 | \n",
+ " 47.6168 | \n",
+ " -122.045 | \n",
+ " 1800 | \n",
+ " 7503 | \n",
+ " 2015 | \n",
+ " 2 | \n",
+ " 18 | \n",
+ " 510000.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " bedrooms bathrooms sqft_living sqft_lot floors waterfront view \\\n",
+ "0 3 1.00 1180 5650 1.0 0 0 \n",
+ "1 3 2.25 2570 7242 2.0 0 0 \n",
+ "2 2 1.00 770 10000 1.0 0 0 \n",
+ "3 4 3.00 1960 5000 1.0 0 0 \n",
+ "4 3 2.00 1680 8080 1.0 0 0 \n",
+ "\n",
+ " condition grade sqft_above sqft_basement yr_built yr_renovated \\\n",
+ "0 3 7 1180 0 1955 0 \n",
+ "1 3 7 2170 400 1951 1991 \n",
+ "2 3 6 770 0 1933 0 \n",
+ "3 5 7 1050 910 1965 0 \n",
+ "4 3 8 1680 0 1987 0 \n",
+ "\n",
+ " zipcode lat long sqft_living15 sqft_lot15 date_year \\\n",
+ "0 98178 47.5112 -122.257 1340 5650 2014 \n",
+ "1 98125 47.7210 -122.319 1690 7639 2014 \n",
+ "2 98028 47.7379 -122.233 2720 8062 2015 \n",
+ "3 98136 47.5208 -122.393 1360 5000 2014 \n",
+ "4 98074 47.6168 -122.045 1800 7503 2015 \n",
+ "\n",
+ " date_month date_day price \n",
+ "0 10 13 221900.0 \n",
+ "1 12 9 538000.0 \n",
+ "2 2 25 180000.0 \n",
+ "3 12 9 604000.0 \n",
+ "4 2 18 510000.0 "
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_house = load_dataset(\"house_sales\")\n",
+ "print(f\"\\nShape: {df_house.shape}\")\n",
+ "print(f\"\\nColumn names:\")\n",
+ "for i, col in enumerate(df_house.columns):\n",
+ " print(f\" {i+1}. {col}\")\n",
+ "print(f\"\\nData types:\")\n",
+ "print(df_house.dtypes)\n",
+ "print(f\"\\nFirst few rows:\")\n",
+ "df_house.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Target variable: price\n",
+ "Predictors (10): ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'waterfront', 'view', 'condition', 'grade', 'sqft_above', 'sqft_basement']\n"
+ ]
+ }
+ ],
+ "source": [
+ "house_target = \"price\" # Target variable to impute\n",
+ "house_predictors = [\"bedrooms\", \"bathrooms\", \"sqft_living\", \"sqft_lot\", \"waterfront\", \"view\", \"condition\", \"grade\", \"sqft_above\", \"sqft_basement\"]\n",
+ "\n",
+ "print(f\"Target variable: {house_target}\")\n",
+ "print(f\"Predictors ({len(house_predictors)}): {house_predictors}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Computing predictor correlations...\n",
+ "\n",
+ "--- Pearson Correlation Matrix ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " bedrooms | \n",
+ " bathrooms | \n",
+ " sqft_living | \n",
+ " sqft_lot | \n",
+ " waterfront | \n",
+ " view | \n",
+ " condition | \n",
+ " grade | \n",
+ " sqft_above | \n",
+ " sqft_basement | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | bedrooms | \n",
+ " 1.000000 | \n",
+ " 0.515884 | \n",
+ " 0.576671 | \n",
+ " 0.031703 | \n",
+ " -0.006582 | \n",
+ " 0.079532 | \n",
+ " 0.028472 | \n",
+ " 0.356967 | \n",
+ " 0.477600 | \n",
+ " 0.303093 | \n",
+ "
\n",
+ " \n",
+ " | bathrooms | \n",
+ " 0.515884 | \n",
+ " 1.000000 | \n",
+ " 0.754665 | \n",
+ " 0.087740 | \n",
+ " 0.063744 | \n",
+ " 0.187737 | \n",
+ " -0.124982 | \n",
+ " 0.664983 | \n",
+ " 0.685342 | \n",
+ " 0.283770 | \n",
+ "
\n",
+ " \n",
+ " | sqft_living | \n",
+ " 0.576671 | \n",
+ " 0.754665 | \n",
+ " 1.000000 | \n",
+ " 0.172826 | \n",
+ " 0.103818 | \n",
+ " 0.284611 | \n",
+ " -0.058753 | \n",
+ " 0.762704 | \n",
+ " 0.876597 | \n",
+ " 0.435043 | \n",
+ "
\n",
+ " \n",
+ " | sqft_lot | \n",
+ " 0.031703 | \n",
+ " 0.087740 | \n",
+ " 0.172826 | \n",
+ " 1.000000 | \n",
+ " 0.021604 | \n",
+ " 0.074710 | \n",
+ " -0.008958 | \n",
+ " 0.113621 | \n",
+ " 0.183512 | \n",
+ " 0.015286 | \n",
+ "
\n",
+ " \n",
+ " | waterfront | \n",
+ " -0.006582 | \n",
+ " 0.063744 | \n",
+ " 0.103818 | \n",
+ " 0.021604 | \n",
+ " 1.000000 | \n",
+ " 0.401857 | \n",
+ " 0.016653 | \n",
+ " 0.082775 | \n",
+ " 0.072075 | \n",
+ " 0.080588 | \n",
+ "
\n",
+ " \n",
+ " | view | \n",
+ " 0.079532 | \n",
+ " 0.187737 | \n",
+ " 0.284611 | \n",
+ " 0.074710 | \n",
+ " 0.401857 | \n",
+ " 1.000000 | \n",
+ " 0.045990 | \n",
+ " 0.251321 | \n",
+ " 0.167649 | \n",
+ " 0.276947 | \n",
+ "
\n",
+ " \n",
+ " | condition | \n",
+ " 0.028472 | \n",
+ " -0.124982 | \n",
+ " -0.058753 | \n",
+ " -0.008958 | \n",
+ " 0.016653 | \n",
+ " 0.045990 | \n",
+ " 1.000000 | \n",
+ " -0.144674 | \n",
+ " -0.158214 | \n",
+ " 0.174105 | \n",
+ "
\n",
+ " \n",
+ " | grade | \n",
+ " 0.356967 | \n",
+ " 0.664983 | \n",
+ " 0.762704 | \n",
+ " 0.113621 | \n",
+ " 0.082775 | \n",
+ " 0.251321 | \n",
+ " -0.144674 | \n",
+ " 1.000000 | \n",
+ " 0.755923 | \n",
+ " 0.168392 | \n",
+ "
\n",
+ " \n",
+ " | sqft_above | \n",
+ " 0.477600 | \n",
+ " 0.685342 | \n",
+ " 0.876597 | \n",
+ " 0.183512 | \n",
+ " 0.072075 | \n",
+ " 0.167649 | \n",
+ " -0.158214 | \n",
+ " 0.755923 | \n",
+ " 1.000000 | \n",
+ " -0.051943 | \n",
+ "
\n",
+ " \n",
+ " | sqft_basement | \n",
+ " 0.303093 | \n",
+ " 0.283770 | \n",
+ " 0.435043 | \n",
+ " 0.015286 | \n",
+ " 0.080588 | \n",
+ " 0.276947 | \n",
+ " 0.174105 | \n",
+ " 0.168392 | \n",
+ " -0.051943 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " bedrooms bathrooms sqft_living sqft_lot waterfront \\\n",
+ "bedrooms 1.000000 0.515884 0.576671 0.031703 -0.006582 \n",
+ "bathrooms 0.515884 1.000000 0.754665 0.087740 0.063744 \n",
+ "sqft_living 0.576671 0.754665 1.000000 0.172826 0.103818 \n",
+ "sqft_lot 0.031703 0.087740 0.172826 1.000000 0.021604 \n",
+ "waterfront -0.006582 0.063744 0.103818 0.021604 1.000000 \n",
+ "view 0.079532 0.187737 0.284611 0.074710 0.401857 \n",
+ "condition 0.028472 -0.124982 -0.058753 -0.008958 0.016653 \n",
+ "grade 0.356967 0.664983 0.762704 0.113621 0.082775 \n",
+ "sqft_above 0.477600 0.685342 0.876597 0.183512 0.072075 \n",
+ "sqft_basement 0.303093 0.283770 0.435043 0.015286 0.080588 \n",
+ "\n",
+ " view condition grade sqft_above sqft_basement \n",
+ "bedrooms 0.079532 0.028472 0.356967 0.477600 0.303093 \n",
+ "bathrooms 0.187737 -0.124982 0.664983 0.685342 0.283770 \n",
+ "sqft_living 0.284611 -0.058753 0.762704 0.876597 0.435043 \n",
+ "sqft_lot 0.074710 -0.008958 0.113621 0.183512 0.015286 \n",
+ "waterfront 0.401857 0.016653 0.082775 0.072075 0.080588 \n",
+ "view 1.000000 0.045990 0.251321 0.167649 0.276947 \n",
+ "condition 0.045990 1.000000 -0.144674 -0.158214 0.174105 \n",
+ "grade 0.251321 -0.144674 1.000000 0.755923 0.168392 \n",
+ "sqft_above 0.167649 -0.158214 0.755923 1.000000 -0.051943 \n",
+ "sqft_basement 0.276947 0.174105 0.168392 -0.051943 1.000000 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Spearman Correlation Matrix ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " bedrooms | \n",
+ " bathrooms | \n",
+ " sqft_living | \n",
+ " sqft_lot | \n",
+ " waterfront | \n",
+ " view | \n",
+ " condition | \n",
+ " grade | \n",
+ " sqft_above | \n",
+ " sqft_basement | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | bedrooms | \n",
+ " 1.000000 | \n",
+ " 0.521406 | \n",
+ " 0.647366 | \n",
+ " 0.216531 | \n",
+ " -0.007598 | \n",
+ " 0.081421 | \n",
+ " 0.012660 | \n",
+ " 0.380534 | \n",
+ " 0.539704 | \n",
+ " 0.230496 | \n",
+ "
\n",
+ " \n",
+ " | bathrooms | \n",
+ " 0.521406 | \n",
+ " 1.000000 | \n",
+ " 0.745526 | \n",
+ " 0.068805 | \n",
+ " 0.049522 | \n",
+ " 0.155549 | \n",
+ " -0.162891 | \n",
+ " 0.658194 | \n",
+ " 0.691006 | \n",
+ " 0.191848 | \n",
+ "
\n",
+ " \n",
+ " | sqft_living | \n",
+ " 0.647366 | \n",
+ " 0.745526 | \n",
+ " 1.000000 | \n",
+ " 0.304159 | \n",
+ " 0.070326 | \n",
+ " 0.232994 | \n",
+ " -0.062638 | \n",
+ " 0.716400 | \n",
+ " 0.843504 | \n",
+ " 0.327878 | \n",
+ "
\n",
+ " \n",
+ " | sqft_lot | \n",
+ " 0.216531 | \n",
+ " 0.068805 | \n",
+ " 0.304159 | \n",
+ " 1.000000 | \n",
+ " 0.085601 | \n",
+ " 0.117033 | \n",
+ " 0.114724 | \n",
+ " 0.152049 | \n",
+ " 0.272408 | \n",
+ " 0.036624 | \n",
+ "
\n",
+ " \n",
+ " | waterfront | \n",
+ " -0.007598 | \n",
+ " 0.049522 | \n",
+ " 0.070326 | \n",
+ " 0.085601 | \n",
+ " 1.000000 | \n",
+ " 0.284924 | \n",
+ " 0.016744 | \n",
+ " 0.062189 | \n",
+ " 0.054490 | \n",
+ " 0.051969 | \n",
+ "
\n",
+ " \n",
+ " | view | \n",
+ " 0.081421 | \n",
+ " 0.155549 | \n",
+ " 0.232994 | \n",
+ " 0.117033 | \n",
+ " 0.284924 | \n",
+ " 1.000000 | \n",
+ " 0.046000 | \n",
+ " 0.217044 | \n",
+ " 0.144394 | \n",
+ " 0.236525 | \n",
+ "
\n",
+ " \n",
+ " | condition | \n",
+ " 0.012660 | \n",
+ " -0.162891 | \n",
+ " -0.062638 | \n",
+ " 0.114724 | \n",
+ " 0.016744 | \n",
+ " 0.046000 | \n",
+ " 1.000000 | \n",
+ " -0.167374 | \n",
+ " -0.158126 | \n",
+ " 0.161623 | \n",
+ "
\n",
+ " \n",
+ " | grade | \n",
+ " 0.380534 | \n",
+ " 0.658194 | \n",
+ " 0.716400 | \n",
+ " 0.152049 | \n",
+ " 0.062189 | \n",
+ " 0.217044 | \n",
+ " -0.167374 | \n",
+ " 1.000000 | \n",
+ " 0.711815 | \n",
+ " 0.092927 | \n",
+ "
\n",
+ " \n",
+ " | sqft_above | \n",
+ " 0.539704 | \n",
+ " 0.691006 | \n",
+ " 0.843504 | \n",
+ " 0.272408 | \n",
+ " 0.054490 | \n",
+ " 0.144394 | \n",
+ " -0.158126 | \n",
+ " 0.711815 | \n",
+ " 1.000000 | \n",
+ " -0.165644 | \n",
+ "
\n",
+ " \n",
+ " | sqft_basement | \n",
+ " 0.230496 | \n",
+ " 0.191848 | \n",
+ " 0.327878 | \n",
+ " 0.036624 | \n",
+ " 0.051969 | \n",
+ " 0.236525 | \n",
+ " 0.161623 | \n",
+ " 0.092927 | \n",
+ " -0.165644 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " bedrooms bathrooms sqft_living sqft_lot waterfront \\\n",
+ "bedrooms 1.000000 0.521406 0.647366 0.216531 -0.007598 \n",
+ "bathrooms 0.521406 1.000000 0.745526 0.068805 0.049522 \n",
+ "sqft_living 0.647366 0.745526 1.000000 0.304159 0.070326 \n",
+ "sqft_lot 0.216531 0.068805 0.304159 1.000000 0.085601 \n",
+ "waterfront -0.007598 0.049522 0.070326 0.085601 1.000000 \n",
+ "view 0.081421 0.155549 0.232994 0.117033 0.284924 \n",
+ "condition 0.012660 -0.162891 -0.062638 0.114724 0.016744 \n",
+ "grade 0.380534 0.658194 0.716400 0.152049 0.062189 \n",
+ "sqft_above 0.539704 0.691006 0.843504 0.272408 0.054490 \n",
+ "sqft_basement 0.230496 0.191848 0.327878 0.036624 0.051969 \n",
+ "\n",
+ " view condition grade sqft_above sqft_basement \n",
+ "bedrooms 0.081421 0.012660 0.380534 0.539704 0.230496 \n",
+ "bathrooms 0.155549 -0.162891 0.658194 0.691006 0.191848 \n",
+ "sqft_living 0.232994 -0.062638 0.716400 0.843504 0.327878 \n",
+ "sqft_lot 0.117033 0.114724 0.152049 0.272408 0.036624 \n",
+ "waterfront 0.284924 0.016744 0.062189 0.054490 0.051969 \n",
+ "view 1.000000 0.046000 0.217044 0.144394 0.236525 \n",
+ "condition 0.046000 1.000000 -0.167374 -0.158126 0.161623 \n",
+ "grade 0.217044 -0.167374 1.000000 0.711815 0.092927 \n",
+ "sqft_above 0.144394 -0.158126 0.711815 1.000000 -0.165644 \n",
+ "sqft_basement 0.236525 0.161623 0.092927 -0.165644 1.000000 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Mutual Information Matrix ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " bedrooms | \n",
+ " bathrooms | \n",
+ " sqft_living | \n",
+ " sqft_lot | \n",
+ " waterfront | \n",
+ " view | \n",
+ " condition | \n",
+ " grade | \n",
+ " sqft_above | \n",
+ " sqft_basement | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | bedrooms | \n",
+ " 1.000000 | \n",
+ " 0.098199 | \n",
+ " 0.159197 | \n",
+ " 0.038320 | \n",
+ " 0.037347 | \n",
+ " 0.007649 | \n",
+ " 0.006323 | \n",
+ " 0.049392 | \n",
+ " 0.112457 | \n",
+ " 0.034205 | \n",
+ "
\n",
+ " \n",
+ " | bathrooms | \n",
+ " 0.098199 | \n",
+ " 1.000000 | \n",
+ " 0.151131 | \n",
+ " 0.028646 | \n",
+ " 0.056060 | \n",
+ " 0.029864 | \n",
+ " 0.034050 | \n",
+ " 0.148806 | \n",
+ " 0.117633 | \n",
+ " 0.030131 | \n",
+ "
\n",
+ " \n",
+ " | sqft_living | \n",
+ " 0.159197 | \n",
+ " 0.151131 | \n",
+ " 1.000000 | \n",
+ " 0.015266 | \n",
+ " 0.049643 | \n",
+ " 0.047129 | \n",
+ " 0.010742 | \n",
+ " 0.200484 | \n",
+ " 0.405167 | \n",
+ " 0.087963 | \n",
+ "
\n",
+ " \n",
+ " | sqft_lot | \n",
+ " 0.038320 | \n",
+ " 0.028646 | \n",
+ " 0.015266 | \n",
+ " 1.000000 | \n",
+ " 0.081274 | \n",
+ " 0.031370 | \n",
+ " 0.033865 | \n",
+ " 0.043008 | \n",
+ " 0.016837 | \n",
+ " 0.019837 | \n",
+ "
\n",
+ " \n",
+ " | waterfront | \n",
+ " 0.037347 | \n",
+ " 0.056060 | \n",
+ " 0.049643 | \n",
+ " 0.081274 | \n",
+ " 1.000000 | \n",
+ " 0.418037 | \n",
+ " 0.000000 | \n",
+ " 0.050705 | \n",
+ " 0.028753 | \n",
+ " 0.215099 | \n",
+ "
\n",
+ " \n",
+ " | view | \n",
+ " 0.007649 | \n",
+ " 0.029864 | \n",
+ " 0.047129 | \n",
+ " 0.031370 | \n",
+ " 0.418037 | \n",
+ " 1.000000 | \n",
+ " 0.000000 | \n",
+ " 0.043477 | \n",
+ " 0.023920 | \n",
+ " 0.074452 | \n",
+ "
\n",
+ " \n",
+ " | condition | \n",
+ " 0.006323 | \n",
+ " 0.034050 | \n",
+ " 0.010742 | \n",
+ " 0.033865 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 1.000000 | \n",
+ " 0.020681 | \n",
+ " 0.020348 | \n",
+ " 0.018175 | \n",
+ "
\n",
+ " \n",
+ " | grade | \n",
+ " 0.049392 | \n",
+ " 0.148806 | \n",
+ " 0.200484 | \n",
+ " 0.043008 | \n",
+ " 0.050705 | \n",
+ " 0.043477 | \n",
+ " 0.020681 | \n",
+ " 1.000000 | \n",
+ " 0.193197 | \n",
+ " 0.020057 | \n",
+ "
\n",
+ " \n",
+ " | sqft_above | \n",
+ " 0.112457 | \n",
+ " 0.117633 | \n",
+ " 0.405167 | \n",
+ " 0.016837 | \n",
+ " 0.028753 | \n",
+ " 0.023920 | \n",
+ " 0.020348 | \n",
+ " 0.193197 | \n",
+ " 1.000000 | \n",
+ " 0.043748 | \n",
+ "
\n",
+ " \n",
+ " | sqft_basement | \n",
+ " 0.034205 | \n",
+ " 0.030131 | \n",
+ " 0.087963 | \n",
+ " 0.019837 | \n",
+ " 0.215099 | \n",
+ " 0.074452 | \n",
+ " 0.018175 | \n",
+ " 0.020057 | \n",
+ " 0.043748 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " bedrooms bathrooms sqft_living sqft_lot waterfront \\\n",
+ "bedrooms 1.000000 0.098199 0.159197 0.038320 0.037347 \n",
+ "bathrooms 0.098199 1.000000 0.151131 0.028646 0.056060 \n",
+ "sqft_living 0.159197 0.151131 1.000000 0.015266 0.049643 \n",
+ "sqft_lot 0.038320 0.028646 0.015266 1.000000 0.081274 \n",
+ "waterfront 0.037347 0.056060 0.049643 0.081274 1.000000 \n",
+ "view 0.007649 0.029864 0.047129 0.031370 0.418037 \n",
+ "condition 0.006323 0.034050 0.010742 0.033865 0.000000 \n",
+ "grade 0.049392 0.148806 0.200484 0.043008 0.050705 \n",
+ "sqft_above 0.112457 0.117633 0.405167 0.016837 0.028753 \n",
+ "sqft_basement 0.034205 0.030131 0.087963 0.019837 0.215099 \n",
+ "\n",
+ " view condition grade sqft_above sqft_basement \n",
+ "bedrooms 0.007649 0.006323 0.049392 0.112457 0.034205 \n",
+ "bathrooms 0.029864 0.034050 0.148806 0.117633 0.030131 \n",
+ "sqft_living 0.047129 0.010742 0.200484 0.405167 0.087963 \n",
+ "sqft_lot 0.031370 0.033865 0.043008 0.016837 0.019837 \n",
+ "waterfront 0.418037 0.000000 0.050705 0.028753 0.215099 \n",
+ "view 1.000000 0.000000 0.043477 0.023920 0.074452 \n",
+ "condition 0.000000 1.000000 0.020681 0.020348 0.018175 \n",
+ "grade 0.043477 0.020681 1.000000 0.193197 0.020057 \n",
+ "sqft_above 0.023920 0.020348 0.193197 1.000000 0.043748 \n",
+ "sqft_basement 0.074452 0.018175 0.020057 0.043748 1.000000 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Predictor-Target Mutual Information ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " price | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | sqft_living | \n",
+ " 0.034919 | \n",
+ "
\n",
+ " \n",
+ " | grade | \n",
+ " 0.033545 | \n",
+ "
\n",
+ " \n",
+ " | sqft_above | \n",
+ " 0.025859 | \n",
+ "
\n",
+ " \n",
+ " | bathrooms | \n",
+ " 0.020110 | \n",
+ "
\n",
+ " \n",
+ " | bedrooms | \n",
+ " 0.007915 | \n",
+ "
\n",
+ " \n",
+ " | sqft_basement | \n",
+ " 0.006930 | \n",
+ "
\n",
+ " \n",
+ " | sqft_lot | \n",
+ " 0.006063 | \n",
+ "
\n",
+ " \n",
+ " | view | \n",
+ " 0.005654 | \n",
+ "
\n",
+ " \n",
+ " | condition | \n",
+ " 0.001584 | \n",
+ "
\n",
+ " \n",
+ " | waterfront | \n",
+ " 0.001340 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " price\n",
+ "sqft_living 0.034919\n",
+ "grade 0.033545\n",
+ "sqft_above 0.025859\n",
+ "bathrooms 0.020110\n",
+ "bedrooms 0.007915\n",
+ "sqft_basement 0.006930\n",
+ "sqft_lot 0.006063\n",
+ "view 0.005654\n",
+ "condition 0.001584\n",
+ "waterfront 0.001340"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "print(\"Computing predictor correlations...\")\n",
+ "house_correlations = compute_predictor_correlations(\n",
+ " data=df_house,\n",
+ " predictors=house_predictors,\n",
+ " imputed_variables=[house_target],\n",
+ " method=\"all\"\n",
+ ")\n",
+ "\n",
+ "print(\"\\n--- Pearson Correlation Matrix ---\")\n",
+ "display(house_correlations[\"pearson\"])\n",
+ "\n",
+ "print(\"\\n--- Spearman Correlation Matrix ---\")\n",
+ "display(house_correlations[\"spearman\"])\n",
+ "\n",
+ "print(\"\\n--- Mutual Information Matrix ---\")\n",
+ "display(house_correlations[\"mutual_info\"])\n",
+ "\n",
+ "print(\"\\n--- Predictor-Target Mutual Information ---\")\n",
+ "display(house_correlations[\"predictor_target_mi\"].sort_values(\n",
+ " by=house_target, ascending=False\n",
+ "))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Running leave-one-out analysis...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "9424f4e64a4c41d4b1c132f0ae055ee0",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Leave-one-out analysis: 0%| | 0/10 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Leave-One-Out Results ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " predictor_removed | \n",
+ " avg_quantile_loss | \n",
+ " avg_log_loss | \n",
+ " loss_increase | \n",
+ " relative_impact | \n",
+ " baseline_quantile_loss | \n",
+ " baseline_log_loss | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 7 | \n",
+ " grade | \n",
+ " 69216.847912 | \n",
+ " 0 | \n",
+ " 6095.055599 | \n",
+ " 9.656024 | \n",
+ " 63121.792313 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " sqft_lot | \n",
+ " 68609.389908 | \n",
+ " 0 | \n",
+ " 5487.597595 | \n",
+ " 8.693666 | \n",
+ " 63121.792313 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " condition | \n",
+ " 64704.662524 | \n",
+ " 0 | \n",
+ " 1582.870212 | \n",
+ " 2.507645 | \n",
+ " 63121.792313 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " bathrooms | \n",
+ " 64190.973160 | \n",
+ " 0 | \n",
+ " 1069.180848 | \n",
+ " 1.693838 | \n",
+ " 63121.792313 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " view | \n",
+ " 63788.472535 | \n",
+ " 0 | \n",
+ " 666.680222 | \n",
+ " 1.056181 | \n",
+ " 63121.792313 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 0 | \n",
+ " bedrooms | \n",
+ " 63643.886195 | \n",
+ " 0 | \n",
+ " 522.093882 | \n",
+ " 0.827121 | \n",
+ " 63121.792313 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " sqft_living | \n",
+ " 63438.111978 | \n",
+ " 0 | \n",
+ " 316.319666 | \n",
+ " 0.501126 | \n",
+ " 63121.792313 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " waterfront | \n",
+ " 63387.546779 | \n",
+ " 0 | \n",
+ " 265.754467 | \n",
+ " 0.421019 | \n",
+ " 63121.792313 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " sqft_basement | \n",
+ " 63230.876154 | \n",
+ " 0 | \n",
+ " 109.083842 | \n",
+ " 0.172815 | \n",
+ " 63121.792313 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " sqft_above | \n",
+ " 63195.338181 | \n",
+ " 0 | \n",
+ " 73.545868 | \n",
+ " 0.116514 | \n",
+ " 63121.792313 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " predictor_removed avg_quantile_loss avg_log_loss loss_increase \\\n",
+ "7 grade 69216.847912 0 6095.055599 \n",
+ "3 sqft_lot 68609.389908 0 5487.597595 \n",
+ "6 condition 64704.662524 0 1582.870212 \n",
+ "1 bathrooms 64190.973160 0 1069.180848 \n",
+ "5 view 63788.472535 0 666.680222 \n",
+ "0 bedrooms 63643.886195 0 522.093882 \n",
+ "2 sqft_living 63438.111978 0 316.319666 \n",
+ "4 waterfront 63387.546779 0 265.754467 \n",
+ "9 sqft_basement 63230.876154 0 109.083842 \n",
+ "8 sqft_above 63195.338181 0 73.545868 \n",
+ "\n",
+ " relative_impact baseline_quantile_loss baseline_log_loss \n",
+ "7 9.656024 63121.792313 0 \n",
+ "3 8.693666 63121.792313 0 \n",
+ "6 2.507645 63121.792313 0 \n",
+ "1 1.693838 63121.792313 0 \n",
+ "5 1.056181 63121.792313 0 \n",
+ "0 0.827121 63121.792313 0 \n",
+ "2 0.501126 63121.792313 0 \n",
+ "4 0.421019 63121.792313 0 \n",
+ "9 0.172815 63121.792313 0 \n",
+ "8 0.116514 63121.792313 0 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "print(\"Running leave-one-out analysis...\")\n",
+ "house_loo = leave_one_out_analysis(\n",
+ " data=df_house,\n",
+ " predictors=house_predictors,\n",
+ " imputed_variables=[house_target],\n",
+ " model_class=QRF,\n",
+ " train_size=0.6,\n",
+ " n_jobs=1\n",
+ ")\n",
+ "\n",
+ "print(\"\\n--- Leave-One-Out Results ---\")\n",
+ "display(house_loo)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Creating 60/40 donor/receiver split...\n",
+ "Donor size: 12967\n",
+ "Receiver size: 8646\n",
+ "\n",
+ "Running autoimpute...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "ff390d112d1f439eb79aa91fc5a0cd3d",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "AutoImputation progress: 0%| | 0/5 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "20857f3176104b3d9e739695c02ebcec",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Evaluating models: 0%| | 0/5 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
+ "[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 10.5s remaining: 15.8s\n",
+ "[Parallel(n_jobs=-1)]: Done 3 out of 5 | elapsed: 10.5s remaining: 7.0s\n",
+ "[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 10.6s finished\n",
+ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
+ "[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 0.3s remaining: 0.4s\n",
+ "[Parallel(n_jobs=-1)]: Done 3 out of 5 | elapsed: 0.3s remaining: 0.2s\n",
+ "[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 0.3s finished\n",
+ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
+ "[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 19.3s remaining: 29.0s\n",
+ "[Parallel(n_jobs=-1)]: Done 3 out of 5 | elapsed: 20.0s remaining: 13.3s\n",
+ "[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 20.2s finished\n",
+ "[Parallel(n_jobs=1)]: Done 1 tasks | elapsed: 16.8s\n",
+ "[Parallel(n_jobs=1)]: Done 4 tasks | elapsed: 1.1min\n",
+ "[Parallel(n_jobs=1)]: Done 5 out of 5 | elapsed: 1.4min finished\n",
+ "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
+ "[Parallel(n_jobs=-1)]: Done 2 out of 5 | elapsed: 11.5s remaining: 17.2s\n",
+ "[Parallel(n_jobs=-1)]: Done 3 out of 5 | elapsed: 12.2s remaining: 8.1s\n",
+ "[Parallel(n_jobs=-1)]: Done 5 out of 5 | elapsed: 12.3s finished\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Autoimpute CV Results ---\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.plotly.v1+json": {
+ "config": {
+ "plotlyServerURL": "https://plot.ly"
+ },
+ "data": [
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 719.328189753778,
+ 940.3832731105417,
+ 1482.848168298151,
+ 981.7573126166058,
+ 893.1098804459698,
+ 890.1490739996166,
+ 918.5323509728148,
+ 1588.7521448227917,
+ 1201.8109300236083,
+ 1336.585972042465,
+ 666.4767580846618,
+ 758.9475683984272,
+ 1596.3561562877353,
+ 1413.758832331186,
+ 1812.523234457851,
+ 1962.6534657951277,
+ 1475.01168353744,
+ 1950.596397145942,
+ 1745.9008891730946
+ ]
+ },
+ "hovertemplate": "Method=QRF
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "QRF",
+ "marker": {
+ "color": "#88CCEE",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "QRF",
+ "offsetgroup": "QRF",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 20709.781289760314,
+ 32711.011025779328,
+ 44535.18256502814,
+ 53703.12974447841,
+ 62417.06348529387,
+ 68618.35321517424,
+ 75277.79127833492,
+ 78690.66612006526,
+ 81692.4921553432,
+ 82387.89194252898,
+ 82190.59544011946,
+ 80668.23840043221,
+ 76139.77789636323,
+ 71953.78781378965,
+ 66902.27469114999,
+ 59408.259501290624,
+ 51162.40596579338,
+ 40201.64217328191,
+ 28726.94975485272
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 194.028361567845,
+ 270.40611376212814,
+ 424.35766919963396,
+ 655.5625728964469,
+ 859.3968486998843,
+ 1081.5312202798207,
+ 1290.8863186105052,
+ 1397.2462643130293,
+ 1540.027505583281,
+ 1619.1472241056717,
+ 1689.5007427358473,
+ 1708.6528162176992,
+ 1692.2362737940518,
+ 1724.0607813481622,
+ 1740.5827151189064,
+ 1841.4550330271604,
+ 1938.1655872462368,
+ 2037.0636749212763,
+ 2092.754289131688
+ ]
+ },
+ "hovertemplate": "Method=OLS
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "OLS",
+ "marker": {
+ "color": "#CC6677",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "OLS",
+ "offsetgroup": "OLS",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 20473.589998960033,
+ 33432.25165704795,
+ 43320.90225158931,
+ 51196.22021063475,
+ 57569.70124450637,
+ 62801.63397552215,
+ 67045.07770711477,
+ 70407.69702574884,
+ 72927.84248013695,
+ 74521.04034750645,
+ 75226.70101567755,
+ 75023.80226665984,
+ 73828.67718656275,
+ 71527.86883635657,
+ 67858.72913281275,
+ 62484.32419192977,
+ 54953.553220199516,
+ 44734.247783678955,
+ 30447.100712490104
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 182.3720945383094,
+ 472.3398849856401,
+ 524.3005985089421,
+ 971.9646125139536,
+ 1224.9792826088424,
+ 2115.933780108482,
+ 1056.3153214213212,
+ 1829.0166203736255,
+ 2127.8709907744037,
+ 1718.448175433048,
+ 1352.7976079507,
+ 1559.8561647881027,
+ 1275.6384343952695,
+ 2065.063847283456,
+ 1573.1974067010156,
+ 1647.5556756086503,
+ 1912.659852465349,
+ 1602.9127058010229,
+ 1420.088344050314
+ ]
+ },
+ "hovertemplate": "Method=QuantReg
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "QuantReg",
+ "marker": {
+ "color": "#DDCC77",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "QuantReg",
+ "offsetgroup": "QuantReg",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 14181.66310174412,
+ 25755.579151254104,
+ 35955.151863432664,
+ 44566.35401926348,
+ 52056.62402430021,
+ 58865.9289442663,
+ 63653.54425385364,
+ 67731.13211630892,
+ 70620.50999472741,
+ 72451.6790063794,
+ 73581.67597367859,
+ 72803.27067734861,
+ 72027.60910654339,
+ 69655.74924359658,
+ 65282.60359285939,
+ 60793.11896628221,
+ 53597.61821135791,
+ 42022.121677457835,
+ 27454.727576089623
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 1980.2054753191617,
+ 1703.6432646631138,
+ 1455.5825320961067,
+ 1253.0658913312486,
+ 1121.0536267527482,
+ 1085.579027918468,
+ 1155.5673513130084,
+ 1314.2767846796703,
+ 1534.41992812871,
+ 1793.515835082672,
+ 2077.0380524461475,
+ 2376.259366670552,
+ 2685.938117275419,
+ 3002.840663062175,
+ 3324.9021055377766,
+ 3650.75738181033,
+ 3979.474638195215,
+ 4310.39914822265,
+ 4643.05898344198
+ ]
+ },
+ "hovertemplate": "Method=Matching
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "Matching",
+ "marker": {
+ "color": "#117733",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "Matching",
+ "offsetgroup": "Matching",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 101408.0160260291,
+ 101571.7732345491,
+ 101735.5304430691,
+ 101899.2876515891,
+ 102063.0448601091,
+ 102226.80206862911,
+ 102390.5592771491,
+ 102554.31648566911,
+ 102718.07369418911,
+ 102881.83090270912,
+ 103045.58811122912,
+ 103209.34531974913,
+ 103373.10252826913,
+ 103536.85973678913,
+ 103700.61694530913,
+ 103864.37415382915,
+ 104028.13136234913,
+ 104191.88857086914,
+ 104355.64577938915
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 284.7326924435361,
+ 569.2486180173503,
+ 853.6520482378114,
+ 1137.9652075982785,
+ 1422.2045123242642,
+ 1706.368936967098,
+ 1990.47200208671,
+ 2274.5075204441405,
+ 2558.4736744376673,
+ 2842.3662015453397,
+ 3126.183792588692,
+ 3409.9218109892086,
+ 3693.5727944433816,
+ 3977.115932312908,
+ 4260.527678263926,
+ 4543.760410967098,
+ 4826.740852245481,
+ 5109.3345422247,
+ 5391.098581809968
+ ]
+ },
+ "hovertemplate": "Method=MDN
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "MDN",
+ "marker": {
+ "color": "#332288",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "MDN",
+ "offsetgroup": "MDN",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 26871.173484862265,
+ 53740.50929097005,
+ 80608.89541027459,
+ 107476.54579793694,
+ 134343.55265796493,
+ 161209.97167135123,
+ 188075.82470534535,
+ 214941.1126265573,
+ 241805.8201917059,
+ 268669.9238017856,
+ 295533.39161048527,
+ 322396.1651101296,
+ 349258.13342021353,
+ 376119.19087407645,
+ 402979.11892909755,
+ 429837.5833290928,
+ 456694.0042487571,
+ 483547.1238196845,
+ 510393.3069950899
+ ],
+ "yaxis": "y"
+ }
+ ],
+ "layout": {
+ "barmode": "group",
+ "height": 600,
+ "legend": {
+ "title": {
+ "text": "Method"
+ },
+ "tracegroupgap": 0
+ },
+ "paper_bgcolor": "#FAFAFA",
+ "plot_bgcolor": "#FAFAFA",
+ "shapes": [
+ {
+ "line": {
+ "color": "#88CCEE",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "QRF Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 60952.489182045254,
+ "y1": 60952.489182045254
+ },
+ {
+ "line": {
+ "color": "#CC6677",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "OLS Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 58409.52427605975,
+ "y1": 58409.52427605975
+ },
+ {
+ "line": {
+ "color": "#DDCC77",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "QuantReg Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 54897.71902635497,
+ "y1": 54897.71902635497
+ },
+ {
+ "line": {
+ "color": "#117733",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "Matching Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 102881.83090270912,
+ "y1": 102881.83090270912
+ },
+ {
+ "line": {
+ "color": "#332288",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "MDN Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 268657.9656829148,
+ "y1": 268657.9656829148
+ }
+ ],
+ "template": {
+ "data": {
+ "bar": [
+ {
+ "error_x": {
+ "color": "#2a3f5f"
+ },
+ "error_y": {
+ "color": "#2a3f5f"
+ },
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "bar"
+ }
+ ],
+ "barpolar": [
+ {
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "barpolar"
+ }
+ ],
+ "carpet": [
+ {
+ "aaxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "baxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "type": "carpet"
+ }
+ ],
+ "choropleth": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "choropleth"
+ }
+ ],
+ "contour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "contour"
+ }
+ ],
+ "contourcarpet": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "contourcarpet"
+ }
+ ],
+ "heatmap": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmap"
+ }
+ ],
+ "heatmapgl": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmapgl"
+ }
+ ],
+ "histogram": [
+ {
+ "marker": {
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "histogram"
+ }
+ ],
+ "histogram2d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2d"
+ }
+ ],
+ "histogram2dcontour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2dcontour"
+ }
+ ],
+ "mesh3d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "mesh3d"
+ }
+ ],
+ "parcoords": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "parcoords"
+ }
+ ],
+ "pie": [
+ {
+ "automargin": true,
+ "type": "pie"
+ }
+ ],
+ "scatter": [
+ {
+ "fillpattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ },
+ "type": "scatter"
+ }
+ ],
+ "scatter3d": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter3d"
+ }
+ ],
+ "scattercarpet": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattercarpet"
+ }
+ ],
+ "scattergeo": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergeo"
+ }
+ ],
+ "scattergl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergl"
+ }
+ ],
+ "scattermapbox": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermapbox"
+ }
+ ],
+ "scatterpolar": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolar"
+ }
+ ],
+ "scatterpolargl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolargl"
+ }
+ ],
+ "scatterternary": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterternary"
+ }
+ ],
+ "surface": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "surface"
+ }
+ ],
+ "table": [
+ {
+ "cells": {
+ "fill": {
+ "color": "#EBF0F8"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "header": {
+ "fill": {
+ "color": "#C8D4E3"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "type": "table"
+ }
+ ]
+ },
+ "layout": {
+ "annotationdefaults": {
+ "arrowcolor": "#2a3f5f",
+ "arrowhead": 0,
+ "arrowwidth": 1
+ },
+ "autotypenumbers": "strict",
+ "coloraxis": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "colorscale": {
+ "diverging": [
+ [
+ 0,
+ "#8e0152"
+ ],
+ [
+ 0.1,
+ "#c51b7d"
+ ],
+ [
+ 0.2,
+ "#de77ae"
+ ],
+ [
+ 0.3,
+ "#f1b6da"
+ ],
+ [
+ 0.4,
+ "#fde0ef"
+ ],
+ [
+ 0.5,
+ "#f7f7f7"
+ ],
+ [
+ 0.6,
+ "#e6f5d0"
+ ],
+ [
+ 0.7,
+ "#b8e186"
+ ],
+ [
+ 0.8,
+ "#7fbc41"
+ ],
+ [
+ 0.9,
+ "#4d9221"
+ ],
+ [
+ 1,
+ "#276419"
+ ]
+ ],
+ "sequential": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "sequentialminus": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ]
+ },
+ "colorway": [
+ "#636efa",
+ "#EF553B",
+ "#00cc96",
+ "#ab63fa",
+ "#FFA15A",
+ "#19d3f3",
+ "#FF6692",
+ "#B6E880",
+ "#FF97FF",
+ "#FECB52"
+ ],
+ "font": {
+ "color": "#2a3f5f"
+ },
+ "geo": {
+ "bgcolor": "white",
+ "lakecolor": "white",
+ "landcolor": "#E5ECF6",
+ "showlakes": true,
+ "showland": true,
+ "subunitcolor": "white"
+ },
+ "hoverlabel": {
+ "align": "left"
+ },
+ "hovermode": "closest",
+ "mapbox": {
+ "style": "light"
+ },
+ "paper_bgcolor": "white",
+ "plot_bgcolor": "#E5ECF6",
+ "polar": {
+ "angularaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "radialaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "scene": {
+ "xaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "yaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "zaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ }
+ },
+ "shapedefaults": {
+ "line": {
+ "color": "#2a3f5f"
+ }
+ },
+ "ternary": {
+ "aaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "baxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "caxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "title": {
+ "x": 0.05
+ },
+ "xaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ },
+ "yaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ }
+ }
+ },
+ "title": {
+ "font": {
+ "size": 14
+ },
+ "text": "House Sales Dataset Benchmarking Results"
+ },
+ "width": 750,
+ "xaxis": {
+ "anchor": "y",
+ "domain": [
+ 0,
+ 1
+ ],
+ "gridcolor": "#E5E5E5",
+ "gridwidth": 1,
+ "linecolor": "#CCCCCC",
+ "showgrid": false,
+ "showline": true,
+ "title": {
+ "font": {
+ "size": 12
+ },
+ "text": "Quantiles"
+ },
+ "zeroline": false
+ },
+ "yaxis": {
+ "anchor": "x",
+ "domain": [
+ 0,
+ 1
+ ],
+ "gridcolor": "#E5E5E5",
+ "gridwidth": 1,
+ "linecolor": "#CCCCCC",
+ "showgrid": true,
+ "showline": true,
+ "title": {
+ "font": {
+ "size": 12
+ },
+ "text": "Quantile loss"
+ },
+ "zeroline": false
+ }
+ }
+ }
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "print(\"Creating 60/40 donor/receiver split...\")\n",
+ "house_donor, house_receiver = train_test_split(\n",
+ " df_house, train_size=0.6, random_state=42\n",
+ ")\n",
+ "\n",
+ "house_receiver_no_target = house_receiver.drop(columns=[house_target])\n",
+ "\n",
+ "print(f\"Donor size: {len(house_donor)}\")\n",
+ "print(f\"Receiver size: {len(house_receiver)}\")\n",
+ "\n",
+ "print(\"\\nRunning autoimpute...\")\n",
+ "house_result = autoimpute(\n",
+ " donor_data=house_donor,\n",
+ " receiver_data=house_receiver_no_target.copy(),\n",
+ " predictors=house_predictors,\n",
+ " imputed_variables=[house_target],\n",
+ " impute_all=True,\n",
+ " log_level=\"INFO\"\n",
+ ")\n",
+ "\n",
+ "print(\"\\n--- Autoimpute CV Results ---\")\n",
+ "comparison_viz = method_comparison_results(\n",
+ " data=house_result.cv_results,\n",
+ " metric=\"quantile_loss\",\n",
+ ")\n",
+ "fig = comparison_viz.plot(\n",
+ " title=\"House Sales Dataset Benchmarking Results\",\n",
+ " show_mean=True,\n",
+ ")\n",
+ "fig.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Autoimpute CV Results ---\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.plotly.v1+json": {
+ "config": {
+ "plotlyServerURL": "https://plot.ly"
+ },
+ "data": [
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 719.328189753778,
+ 940.3832731105417,
+ 1482.848168298151,
+ 981.7573126166058,
+ 893.1098804459698,
+ 890.1490739996166,
+ 918.5323509728148,
+ 1588.7521448227917,
+ 1201.8109300236083,
+ 1336.585972042465,
+ 666.4767580846618,
+ 758.9475683984272,
+ 1596.3561562877353,
+ 1413.758832331186,
+ 1812.523234457851,
+ 1962.6534657951277,
+ 1475.01168353744,
+ 1950.596397145942,
+ 1745.9008891730946
+ ]
+ },
+ "hovertemplate": "Method=QRF
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "QRF",
+ "marker": {
+ "color": "#88CCEE",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "QRF",
+ "offsetgroup": "QRF",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 20709.781289760314,
+ 32711.011025779328,
+ 44535.18256502814,
+ 53703.12974447841,
+ 62417.06348529387,
+ 68618.35321517424,
+ 75277.79127833492,
+ 78690.66612006526,
+ 81692.4921553432,
+ 82387.89194252898,
+ 82190.59544011946,
+ 80668.23840043221,
+ 76139.77789636323,
+ 71953.78781378965,
+ 66902.27469114999,
+ 59408.259501290624,
+ 51162.40596579338,
+ 40201.64217328191,
+ 28726.94975485272
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 194.028361567845,
+ 270.40611376212814,
+ 424.35766919963396,
+ 655.5625728964469,
+ 859.3968486998843,
+ 1081.5312202798207,
+ 1290.8863186105052,
+ 1397.2462643130293,
+ 1540.027505583281,
+ 1619.1472241056717,
+ 1689.5007427358473,
+ 1708.6528162176992,
+ 1692.2362737940518,
+ 1724.0607813481622,
+ 1740.5827151189064,
+ 1841.4550330271604,
+ 1938.1655872462368,
+ 2037.0636749212763,
+ 2092.754289131688
+ ]
+ },
+ "hovertemplate": "Method=OLS
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "OLS",
+ "marker": {
+ "color": "#CC6677",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "OLS",
+ "offsetgroup": "OLS",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 20473.589998960033,
+ 33432.25165704795,
+ 43320.90225158931,
+ 51196.22021063475,
+ 57569.70124450637,
+ 62801.63397552215,
+ 67045.07770711477,
+ 70407.69702574884,
+ 72927.84248013695,
+ 74521.04034750645,
+ 75226.70101567755,
+ 75023.80226665984,
+ 73828.67718656275,
+ 71527.86883635657,
+ 67858.72913281275,
+ 62484.32419192977,
+ 54953.553220199516,
+ 44734.247783678955,
+ 30447.100712490104
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 182.3720945383094,
+ 472.3398849856401,
+ 524.3005985089421,
+ 971.9646125139536,
+ 1224.9792826088424,
+ 2115.933780108482,
+ 1056.3153214213212,
+ 1829.0166203736255,
+ 2127.8709907744037,
+ 1718.448175433048,
+ 1352.7976079507,
+ 1559.8561647881027,
+ 1275.6384343952695,
+ 2065.063847283456,
+ 1573.1974067010156,
+ 1647.5556756086503,
+ 1912.659852465349,
+ 1602.9127058010229,
+ 1420.088344050314
+ ]
+ },
+ "hovertemplate": "Method=QuantReg
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "QuantReg",
+ "marker": {
+ "color": "#DDCC77",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "QuantReg",
+ "offsetgroup": "QuantReg",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 14181.66310174412,
+ 25755.579151254104,
+ 35955.151863432664,
+ 44566.35401926348,
+ 52056.62402430021,
+ 58865.9289442663,
+ 63653.54425385364,
+ 67731.13211630892,
+ 70620.50999472741,
+ 72451.6790063794,
+ 73581.67597367859,
+ 72803.27067734861,
+ 72027.60910654339,
+ 69655.74924359658,
+ 65282.60359285939,
+ 60793.11896628221,
+ 53597.61821135791,
+ 42022.121677457835,
+ 27454.727576089623
+ ],
+ "yaxis": "y"
+ },
+ {
+ "alignmentgroup": "True",
+ "error_y": {
+ "array": [
+ 1980.2054753191617,
+ 1703.6432646631138,
+ 1455.5825320961067,
+ 1253.0658913312486,
+ 1121.0536267527482,
+ 1085.579027918468,
+ 1155.5673513130084,
+ 1314.2767846796703,
+ 1534.41992812871,
+ 1793.515835082672,
+ 2077.0380524461475,
+ 2376.259366670552,
+ 2685.938117275419,
+ 3002.840663062175,
+ 3324.9021055377766,
+ 3650.75738181033,
+ 3979.474638195215,
+ 4310.39914822265,
+ 4643.05898344198
+ ]
+ },
+ "hovertemplate": "Method=Matching
Quantiles=%{x}
Quantile loss=%{y}",
+ "legendgroup": "Matching",
+ "marker": {
+ "color": "#117733",
+ "pattern": {
+ "shape": ""
+ }
+ },
+ "name": "Matching",
+ "offsetgroup": "Matching",
+ "orientation": "v",
+ "showlegend": true,
+ "textposition": "auto",
+ "type": "bar",
+ "x": [
+ "0.05",
+ "0.1",
+ "0.15",
+ "0.2",
+ "0.25",
+ "0.3",
+ "0.35",
+ "0.4",
+ "0.45",
+ "0.5",
+ "0.55",
+ "0.6",
+ "0.65",
+ "0.7",
+ "0.75",
+ "0.8",
+ "0.85",
+ "0.9",
+ "0.95"
+ ],
+ "xaxis": "x",
+ "y": [
+ 101408.0160260291,
+ 101571.7732345491,
+ 101735.5304430691,
+ 101899.2876515891,
+ 102063.0448601091,
+ 102226.80206862911,
+ 102390.5592771491,
+ 102554.31648566911,
+ 102718.07369418911,
+ 102881.83090270912,
+ 103045.58811122912,
+ 103209.34531974913,
+ 103373.10252826913,
+ 103536.85973678913,
+ 103700.61694530913,
+ 103864.37415382915,
+ 104028.13136234913,
+ 104191.88857086914,
+ 104355.64577938915
+ ],
+ "yaxis": "y"
+ }
+ ],
+ "layout": {
+ "barmode": "group",
+ "height": 600,
+ "legend": {
+ "title": {
+ "text": "Method"
+ },
+ "tracegroupgap": 0
+ },
+ "paper_bgcolor": "#FAFAFA",
+ "plot_bgcolor": "#FAFAFA",
+ "shapes": [
+ {
+ "line": {
+ "color": "#88CCEE",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "QRF Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 60952.489182045254,
+ "y1": 60952.489182045254
+ },
+ {
+ "line": {
+ "color": "#CC6677",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "OLS Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 58409.52427605975,
+ "y1": 58409.52427605975
+ },
+ {
+ "line": {
+ "color": "#DDCC77",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "QuantReg Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 54897.71902635497,
+ "y1": 54897.71902635497
+ },
+ {
+ "line": {
+ "color": "#117733",
+ "dash": "dot",
+ "width": 2
+ },
+ "name": "Matching Mean",
+ "type": "line",
+ "x0": -0.5,
+ "x1": 18.5,
+ "y0": 102881.83090270912,
+ "y1": 102881.83090270912
+ }
+ ],
+ "template": {
+ "data": {
+ "bar": [
+ {
+ "error_x": {
+ "color": "#2a3f5f"
+ },
+ "error_y": {
+ "color": "#2a3f5f"
+ },
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "bar"
+ }
+ ],
+ "barpolar": [
+ {
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "barpolar"
+ }
+ ],
+ "carpet": [
+ {
+ "aaxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "baxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "type": "carpet"
+ }
+ ],
+ "choropleth": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "choropleth"
+ }
+ ],
+ "contour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "contour"
+ }
+ ],
+ "contourcarpet": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "contourcarpet"
+ }
+ ],
+ "heatmap": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmap"
+ }
+ ],
+ "heatmapgl": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmapgl"
+ }
+ ],
+ "histogram": [
+ {
+ "marker": {
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "histogram"
+ }
+ ],
+ "histogram2d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2d"
+ }
+ ],
+ "histogram2dcontour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2dcontour"
+ }
+ ],
+ "mesh3d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "mesh3d"
+ }
+ ],
+ "parcoords": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "parcoords"
+ }
+ ],
+ "pie": [
+ {
+ "automargin": true,
+ "type": "pie"
+ }
+ ],
+ "scatter": [
+ {
+ "fillpattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ },
+ "type": "scatter"
+ }
+ ],
+ "scatter3d": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter3d"
+ }
+ ],
+ "scattercarpet": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattercarpet"
+ }
+ ],
+ "scattergeo": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergeo"
+ }
+ ],
+ "scattergl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergl"
+ }
+ ],
+ "scattermapbox": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermapbox"
+ }
+ ],
+ "scatterpolar": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolar"
+ }
+ ],
+ "scatterpolargl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolargl"
+ }
+ ],
+ "scatterternary": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterternary"
+ }
+ ],
+ "surface": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "surface"
+ }
+ ],
+ "table": [
+ {
+ "cells": {
+ "fill": {
+ "color": "#EBF0F8"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "header": {
+ "fill": {
+ "color": "#C8D4E3"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "type": "table"
+ }
+ ]
+ },
+ "layout": {
+ "annotationdefaults": {
+ "arrowcolor": "#2a3f5f",
+ "arrowhead": 0,
+ "arrowwidth": 1
+ },
+ "autotypenumbers": "strict",
+ "coloraxis": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "colorscale": {
+ "diverging": [
+ [
+ 0,
+ "#8e0152"
+ ],
+ [
+ 0.1,
+ "#c51b7d"
+ ],
+ [
+ 0.2,
+ "#de77ae"
+ ],
+ [
+ 0.3,
+ "#f1b6da"
+ ],
+ [
+ 0.4,
+ "#fde0ef"
+ ],
+ [
+ 0.5,
+ "#f7f7f7"
+ ],
+ [
+ 0.6,
+ "#e6f5d0"
+ ],
+ [
+ 0.7,
+ "#b8e186"
+ ],
+ [
+ 0.8,
+ "#7fbc41"
+ ],
+ [
+ 0.9,
+ "#4d9221"
+ ],
+ [
+ 1,
+ "#276419"
+ ]
+ ],
+ "sequential": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "sequentialminus": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ]
+ },
+ "colorway": [
+ "#636efa",
+ "#EF553B",
+ "#00cc96",
+ "#ab63fa",
+ "#FFA15A",
+ "#19d3f3",
+ "#FF6692",
+ "#B6E880",
+ "#FF97FF",
+ "#FECB52"
+ ],
+ "font": {
+ "color": "#2a3f5f"
+ },
+ "geo": {
+ "bgcolor": "white",
+ "lakecolor": "white",
+ "landcolor": "#E5ECF6",
+ "showlakes": true,
+ "showland": true,
+ "subunitcolor": "white"
+ },
+ "hoverlabel": {
+ "align": "left"
+ },
+ "hovermode": "closest",
+ "mapbox": {
+ "style": "light"
+ },
+ "paper_bgcolor": "white",
+ "plot_bgcolor": "#E5ECF6",
+ "polar": {
+ "angularaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "radialaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "scene": {
+ "xaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "yaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "zaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ }
+ },
+ "shapedefaults": {
+ "line": {
+ "color": "#2a3f5f"
+ }
+ },
+ "ternary": {
+ "aaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "baxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "caxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "title": {
+ "x": 0.05
+ },
+ "xaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ },
+ "yaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ }
+ }
+ },
+ "title": {
+ "font": {
+ "size": 14
+ },
+ "text": "House Sales Dataset Benchmarking Results"
+ },
+ "width": 750,
+ "xaxis": {
+ "anchor": "y",
+ "domain": [
+ 0,
+ 1
+ ],
+ "gridcolor": "#E5E5E5",
+ "gridwidth": 1,
+ "linecolor": "#CCCCCC",
+ "showgrid": false,
+ "showline": true,
+ "title": {
+ "font": {
+ "size": 12
+ },
+ "text": "Quantiles"
+ },
+ "zeroline": false
+ },
+ "yaxis": {
+ "anchor": "x",
+ "domain": [
+ 0,
+ 1
+ ],
+ "gridcolor": "#E5E5E5",
+ "gridwidth": 1,
+ "linecolor": "#CCCCCC",
+ "showgrid": true,
+ "showline": true,
+ "title": {
+ "font": {
+ "size": 12
+ },
+ "text": "Quantile loss"
+ },
+ "zeroline": false
+ }
+ }
+ }
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "house_result.cv_results.pop('MDN', None)\n",
+ "\n",
+ "print(\"\\n--- Autoimpute CV Results ---\")\n",
+ "comparison_viz = method_comparison_results(\n",
+ " data=house_result.cv_results,\n",
+ " metric=\"quantile_loss\",\n",
+ ")\n",
+ "fig = comparison_viz.plot(\n",
+ " title=\"House Sales Dataset Benchmarking Results\",\n",
+ " show_mean=True,\n",
+ ")\n",
+ "fig.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Distribution Comparison: Imputed vs Ground Truth (House Sales) ---\n",
+ "QuantReg: Wasserstein Distance = 64725.2760\n",
+ "QRF: Wasserstein Distance = 12576.6113\n",
+ "OLS: Wasserstein Distance = 41862.2873\n",
+ "Matching: Wasserstein Distance = 8797.8007\n",
+ "MDN: Wasserstein Distance = 544730.3917\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Variable | \n",
+ " Metric | \n",
+ " Distance | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " price | \n",
+ " wasserstein_distance | \n",
+ " 64725.276019 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Variable Metric Distance\n",
+ "0 price wasserstein_distance 64725.276019"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "print(\"\\n--- Distribution Comparison: Imputed vs Ground Truth (House Sales) ---\")\n",
+ "# Store Wasserstein distances for all methods\n",
+ "house_wasserstein = {}\n",
+ "\n",
+ "distribution_comparison_house = compare_distributions(\n",
+ " donor_data=house_receiver, # Ground truth test split\n",
+ " receiver_data=house_result.receiver_data, # Imputed values\n",
+ " imputed_variables=[house_target],\n",
+ ")\n",
+ "best_method_name = house_result.fitted_models[\"best_method\"].__class__.__name__ \n",
+ "best_method_name = best_method_name.replace(\"Results\", \"\")\n",
+ "house_wasserstein[best_method_name] = distribution_comparison_house[\n",
+ " distribution_comparison_house['Metric'] == 'wasserstein_distance'\n",
+ "]['Distance'].values[0]\n",
+ "print(f\"{best_method_name}: Wasserstein Distance = {house_wasserstein.get(best_method_name, 'N/A'):.4f}\")\n",
+ " \n",
+ "for method_name, imputations in house_result.imputations.items():\n",
+ " # Skip the 'best_method' (it's a duplicate)\n",
+ " if method_name == 'best_method': \n",
+ " continue\n",
+ " \n",
+ " # Create a copy of receiver data with this method's imputations \n",
+ " receiver_with_imputations = house_receiver_no_target.copy() \n",
+ " \n",
+ " # Handle both dict (quantile->DataFrame) and DataFrame formats \n",
+ " if isinstance(imputations, dict): \n",
+ " # Get median quantile (0.5) imputations \n",
+ " imp_df = imputations.get(0.5, list(imputations.values())[0]) \n",
+ " else: \n",
+ " imp_df = imputations\n",
+ " \n",
+ " # Add imputed values \n",
+ " for var in [house_target]:\n",
+ " if var in imp_df.columns: \n",
+ " receiver_with_imputations[var] = imp_df[var].values \n",
+ " \n",
+ " # Calculate distribution comparison \n",
+ " dist_comparison = compare_distributions( \n",
+ " donor_data=house_receiver, # Ground truth \n",
+ " receiver_data=receiver_with_imputations,\n",
+ " imputed_variables=[house_target], \n",
+ " )\n",
+ "\n",
+ " # Extract Wasserstein distance \n",
+ " wd = dist_comparison[dist_comparison['Metric'] == 'wasserstein_distance']['Distance'].values \n",
+ " house_wasserstein[method_name] = wd[0]\n",
+ " \n",
+ " print(f\"{method_name}: Wasserstein Distance = {house_wasserstein.get(method_name, 'N/A'):.4f}\")\n",
+ " \n",
+ "# Display full comparison for best method\n",
+ "display(distribution_comparison_house)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### CIA Sensitivity Analysis\n",
+ "\n",
+ "Measuring sensitivity to the Conditional Independence Assumption by progressively removing predictors."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Running CIA sensitivity analysis...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "a5c135c318a642e389e2e575613cb093",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/10 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " QRF sensitivity score: 1.460\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "bde4d049dd5748c4be33331c4a5d6e29",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/10 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " OLS sensitivity score: 1.298\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "84d20bc46b824db7be0696208c691ac7",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/10 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " QuantReg sensitivity score: 1.326\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "630cfba0fe3f406bada87a9a81e5e9dc",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/10 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " Matching sensitivity score: 1.277\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "6659ca2be34b4e06a67522980ed9b241",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Progressive exclusion: 0%| | 0/10 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " MDN sensitivity score: 0.982\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.plotly.v1+json": {
+ "config": {
+ "plotlyServerURL": "https://plot.ly"
+ },
+ "data": [
+ {
+ "line": {
+ "color": "#88CCEE",
+ "width": 2
+ },
+ "marker": {
+ "color": "#88CCEE",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "QRF (baseline=61633.9454)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 9
+ ],
+ "y": [
+ 61633.94540828127,
+ 67598.47328244275,
+ 68254.115715938,
+ 84176.27122831368,
+ 95291.8059356928,
+ 101646.55424473745,
+ 109849.02169326856,
+ 95995.41586398333,
+ 105091.3555817719,
+ 102283.51127920425
+ ]
+ },
+ {
+ "line": {
+ "color": "#CC6677",
+ "width": 2
+ },
+ "marker": {
+ "color": "#CC6677",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "OLS (baseline=60040.2811)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 9
+ ],
+ "y": [
+ 60040.28109972512,
+ 63890.34783466803,
+ 63890.34783466738,
+ 74721.62640107218,
+ 80485.27208784976,
+ 82995.34892891458,
+ 84974.7763763976,
+ 85183.47568120067,
+ 89951.50817774664,
+ 90481.03740947727
+ ]
+ },
+ {
+ "line": {
+ "color": "#DDCC77",
+ "width": 2
+ },
+ "marker": {
+ "color": "#DDCC77",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "QuantReg (baseline=55367.6792)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 9
+ ],
+ "y": [
+ 55367.67917555805,
+ 59759.35808903482,
+ 59025.69807488604,
+ 69744.27234363477,
+ 75877.37240229812,
+ 78496.44703226347,
+ 83354.18609273966,
+ 79879.42635194567,
+ 84545.07320458148,
+ 85164.14797218003
+ ]
+ },
+ {
+ "line": {
+ "color": "#117733",
+ "width": 2
+ },
+ "marker": {
+ "color": "#117733",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "Matching (baseline=108071.4749)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 9
+ ],
+ "y": [
+ 108071.47490168863,
+ 108118.89174184595,
+ 108526.14018043026,
+ 139644.67545685868,
+ 143404.15105250984,
+ 146769.88850335416,
+ 150017.4509599815,
+ 150702.08177191764,
+ 159016.97351376357,
+ 163693.8600508906
+ ]
+ },
+ {
+ "line": {
+ "color": "#332288",
+ "width": 2
+ },
+ "marker": {
+ "color": "#332288",
+ "size": 8
+ },
+ "mode": "lines+markers",
+ "name": "MDN (baseline=271993.5970)",
+ "type": "scatter",
+ "x": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 9
+ ],
+ "y": [
+ 271993.5969515431,
+ 271312.6096568421,
+ 269513.8677565154,
+ 273001.0907624635,
+ 272094.2579712954,
+ 266735.8778402246,
+ 271132.3673983646,
+ 249485.3727024148,
+ 264785.5808723055,
+ 260448.8833700815
+ ]
+ }
+ ],
+ "layout": {
+ "height": 600,
+ "hovermode": "x unified",
+ "legend": {
+ "x": 0.01,
+ "xanchor": "left",
+ "y": 0.99,
+ "yanchor": "top"
+ },
+ "paper_bgcolor": "#FAFAFA",
+ "plot_bgcolor": "#FAFAFA",
+ "template": {
+ "data": {
+ "bar": [
+ {
+ "error_x": {
+ "color": "#2a3f5f"
+ },
+ "error_y": {
+ "color": "#2a3f5f"
+ },
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "bar"
+ }
+ ],
+ "barpolar": [
+ {
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "barpolar"
+ }
+ ],
+ "carpet": [
+ {
+ "aaxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "baxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "type": "carpet"
+ }
+ ],
+ "choropleth": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "choropleth"
+ }
+ ],
+ "contour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "contour"
+ }
+ ],
+ "contourcarpet": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "contourcarpet"
+ }
+ ],
+ "heatmap": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmap"
+ }
+ ],
+ "heatmapgl": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmapgl"
+ }
+ ],
+ "histogram": [
+ {
+ "marker": {
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "histogram"
+ }
+ ],
+ "histogram2d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2d"
+ }
+ ],
+ "histogram2dcontour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2dcontour"
+ }
+ ],
+ "mesh3d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "mesh3d"
+ }
+ ],
+ "parcoords": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "parcoords"
+ }
+ ],
+ "pie": [
+ {
+ "automargin": true,
+ "type": "pie"
+ }
+ ],
+ "scatter": [
+ {
+ "fillpattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ },
+ "type": "scatter"
+ }
+ ],
+ "scatter3d": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter3d"
+ }
+ ],
+ "scattercarpet": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattercarpet"
+ }
+ ],
+ "scattergeo": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergeo"
+ }
+ ],
+ "scattergl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergl"
+ }
+ ],
+ "scattermapbox": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermapbox"
+ }
+ ],
+ "scatterpolar": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolar"
+ }
+ ],
+ "scatterpolargl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolargl"
+ }
+ ],
+ "scatterternary": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterternary"
+ }
+ ],
+ "surface": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "surface"
+ }
+ ],
+ "table": [
+ {
+ "cells": {
+ "fill": {
+ "color": "#EBF0F8"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "header": {
+ "fill": {
+ "color": "#C8D4E3"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "type": "table"
+ }
+ ]
+ },
+ "layout": {
+ "annotationdefaults": {
+ "arrowcolor": "#2a3f5f",
+ "arrowhead": 0,
+ "arrowwidth": 1
+ },
+ "autotypenumbers": "strict",
+ "coloraxis": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "colorscale": {
+ "diverging": [
+ [
+ 0,
+ "#8e0152"
+ ],
+ [
+ 0.1,
+ "#c51b7d"
+ ],
+ [
+ 0.2,
+ "#de77ae"
+ ],
+ [
+ 0.3,
+ "#f1b6da"
+ ],
+ [
+ 0.4,
+ "#fde0ef"
+ ],
+ [
+ 0.5,
+ "#f7f7f7"
+ ],
+ [
+ 0.6,
+ "#e6f5d0"
+ ],
+ [
+ 0.7,
+ "#b8e186"
+ ],
+ [
+ 0.8,
+ "#7fbc41"
+ ],
+ [
+ 0.9,
+ "#4d9221"
+ ],
+ [
+ 1,
+ "#276419"
+ ]
+ ],
+ "sequential": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "sequentialminus": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ]
+ },
+ "colorway": [
+ "#636efa",
+ "#EF553B",
+ "#00cc96",
+ "#ab63fa",
+ "#FFA15A",
+ "#19d3f3",
+ "#FF6692",
+ "#B6E880",
+ "#FF97FF",
+ "#FECB52"
+ ],
+ "font": {
+ "color": "#2a3f5f"
+ },
+ "geo": {
+ "bgcolor": "white",
+ "lakecolor": "white",
+ "landcolor": "#E5ECF6",
+ "showlakes": true,
+ "showland": true,
+ "subunitcolor": "white"
+ },
+ "hoverlabel": {
+ "align": "left"
+ },
+ "hovermode": "closest",
+ "mapbox": {
+ "style": "light"
+ },
+ "paper_bgcolor": "white",
+ "plot_bgcolor": "#E5ECF6",
+ "polar": {
+ "angularaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "radialaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "scene": {
+ "xaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "yaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "zaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ }
+ },
+ "shapedefaults": {
+ "line": {
+ "color": "#2a3f5f"
+ }
+ },
+ "ternary": {
+ "aaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "baxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "caxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "title": {
+ "x": 0.05
+ },
+ "xaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ },
+ "yaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ }
+ }
+ },
+ "title": {
+ "text": "CIA Sensitivity: House"
+ },
+ "width": 750,
+ "xaxis": {
+ "gridcolor": "#E5E5E5",
+ "linecolor": "#CCCCCC",
+ "showgrid": false,
+ "showline": true,
+ "title": {
+ "text": "Number of Predictors Removed"
+ }
+ },
+ "yaxis": {
+ "gridcolor": "#E5E5E5",
+ "linecolor": "#CCCCCC",
+ "showgrid": true,
+ "showline": true,
+ "title": {
+ "text": "Quantile Loss"
+ }
+ }
+ }
+ }
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Sensitivity Scores (higher = more sensitive to predictor removal):\n",
+ " QRF: 1.460\n",
+ " OLS: 1.298\n",
+ " QuantReg: 1.326\n",
+ " Matching: 1.277\n",
+ " MDN: 0.982\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"Running CIA sensitivity analysis...\")\n",
+ "house_cia_results = run_cia_analysis_for_dataset(\n",
+ " data=df_house,\n",
+ " predictors=house_predictors,\n",
+ " imputed_variables=[house_target],\n",
+ " ordering=\"mutual_info\",\n",
+ ")\n",
+ "\n",
+ "# Plot degradation curves\n",
+ "house_cia_fig = plot_cia_degradation_curves(\n",
+ " house_cia_results,\n",
+ " title=\"CIA Sensitivity: House\"\n",
+ ")\n",
+ "house_cia_fig.show()\n",
+ "\n",
+ "# Display sensitivity scores\n",
+ "print(\"\\nSensitivity Scores (higher = more sensitive to predictor removal):\")\n",
+ "for method, results in house_cia_results.items():\n",
+ " print(f\" {method}: {results['sensitivity_score']:.3f}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "## Cross-Dataset Results Summary\n",
+ "\n",
+ "Aggregating results across all benchmarking datasets."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "=== Cross-Dataset Benchmark Summary Table ===\n",
+ "\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Dataset | \n",
+ " Best Method | \n",
+ " QRF Rank | \n",
+ " QRF Loss | \n",
+ " OLS Rank | \n",
+ " OLS Loss | \n",
+ " QuantReg Rank | \n",
+ " QuantReg Loss | \n",
+ " Matching Rank | \n",
+ " Matching Loss | \n",
+ " MDN Rank | \n",
+ " MDN Loss | \n",
+ " QRF CIA | \n",
+ " OLS CIA | \n",
+ " QuantReg CIA | \n",
+ " Matching CIA | \n",
+ " MDN CIA | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " space_ga | \n",
+ " OLS | \n",
+ " 2.0 | \n",
+ " 0.038926 | \n",
+ " 1.000000 | \n",
+ " 0.036167 | \n",
+ " 5.000000 | \n",
+ " 0.084055 | \n",
+ " 4.000000 | \n",
+ " 0.062758 | \n",
+ " 3.000000 | \n",
+ " 0.050411 | \n",
+ " 1.442547 | \n",
+ " 1.191864 | \n",
+ " 0.687691 | \n",
+ " 1.267156 | \n",
+ " 1.312439 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " elevators | \n",
+ " QuantReg | \n",
+ " 3.0 | \n",
+ " 0.001321 | \n",
+ " 2.000000 | \n",
+ " 0.001283 | \n",
+ " 1.000000 | \n",
+ " 0.001181 | \n",
+ " 4.000000 | \n",
+ " 0.002634 | \n",
+ " 5.000000 | \n",
+ " NaN | \n",
+ " 1.292881 | \n",
+ " 1.180257 | \n",
+ " 1.173917 | \n",
+ " 1.050390 | \n",
+ " 2.055043 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " brazilian_houses | \n",
+ " QRF | \n",
+ " 1.0 | \n",
+ " 52.164667 | \n",
+ " 3.000000 | \n",
+ " 126.926767 | \n",
+ " 2.000000 | \n",
+ " 104.936007 | \n",
+ " 4.000000 | \n",
+ " 398.728371 | \n",
+ " 5.000000 | \n",
+ " NaN | \n",
+ " 14.691979 | \n",
+ " 5.601620 | \n",
+ " 7.267652 | \n",
+ " 3.059647 | \n",
+ " 1.004445 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " onlinenewspopularity | \n",
+ " QuantReg | \n",
+ " 3.0 | \n",
+ " 1357.210240 | \n",
+ " 4.000000 | \n",
+ " 1800.669662 | \n",
+ " 1.000000 | \n",
+ " 1045.319334 | \n",
+ " 5.000000 | \n",
+ " 1984.738231 | \n",
+ " 2.000000 | \n",
+ " 1227.693208 | \n",
+ " 1.019769 | \n",
+ " 1.002792 | \n",
+ " 1.006457 | \n",
+ " 1.001464 | \n",
+ " 1.006107 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " abalone | \n",
+ " QuantReg | \n",
+ " 3.0 | \n",
+ " 0.009056 | \n",
+ " 2.000000 | \n",
+ " 0.007400 | \n",
+ " 1.000000 | \n",
+ " 0.006479 | \n",
+ " 4.000000 | \n",
+ " 0.015145 | \n",
+ " 5.000000 | \n",
+ " 0.025759 | \n",
+ " 2.071249 | \n",
+ " 2.066912 | \n",
+ " 2.115189 | \n",
+ " 1.885606 | \n",
+ " 1.471810 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " house_sales | \n",
+ " QuantReg | \n",
+ " 3.0 | \n",
+ " 60952.489182 | \n",
+ " 2.000000 | \n",
+ " 58409.524276 | \n",
+ " 1.000000 | \n",
+ " 54897.719026 | \n",
+ " 4.000000 | \n",
+ " 102881.830903 | \n",
+ " 5.000000 | \n",
+ " NaN | \n",
+ " 1.459985 | \n",
+ " 1.297931 | \n",
+ " 1.326381 | \n",
+ " 1.277018 | \n",
+ " 0.982165 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " Mean Rank | \n",
+ " - | \n",
+ " 2.5 | \n",
+ " 10393.652232 | \n",
+ " 2.333333 | \n",
+ " 10056.194259 | \n",
+ " 1.833333 | \n",
+ " 9341.344347 | \n",
+ " 4.166667 | \n",
+ " 17544.229674 | \n",
+ " 4.166667 | \n",
+ " 409.256459 | \n",
+ " 3.663068 | \n",
+ " 2.056896 | \n",
+ " 2.262881 | \n",
+ " 1.590214 | \n",
+ " 1.305335 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Dataset Best Method QRF Rank QRF Loss OLS Rank \\\n",
+ "0 space_ga OLS 2.0 0.038926 1.000000 \n",
+ "1 elevators QuantReg 3.0 0.001321 2.000000 \n",
+ "2 brazilian_houses QRF 1.0 52.164667 3.000000 \n",
+ "3 onlinenewspopularity QuantReg 3.0 1357.210240 4.000000 \n",
+ "4 abalone QuantReg 3.0 0.009056 2.000000 \n",
+ "5 house_sales QuantReg 3.0 60952.489182 2.000000 \n",
+ "6 Mean Rank - 2.5 10393.652232 2.333333 \n",
+ "\n",
+ " OLS Loss QuantReg Rank QuantReg Loss Matching Rank Matching Loss \\\n",
+ "0 0.036167 5.000000 0.084055 4.000000 0.062758 \n",
+ "1 0.001283 1.000000 0.001181 4.000000 0.002634 \n",
+ "2 126.926767 2.000000 104.936007 4.000000 398.728371 \n",
+ "3 1800.669662 1.000000 1045.319334 5.000000 1984.738231 \n",
+ "4 0.007400 1.000000 0.006479 4.000000 0.015145 \n",
+ "5 58409.524276 1.000000 54897.719026 4.000000 102881.830903 \n",
+ "6 10056.194259 1.833333 9341.344347 4.166667 17544.229674 \n",
+ "\n",
+ " MDN Rank MDN Loss QRF CIA OLS CIA QuantReg CIA Matching CIA \\\n",
+ "0 3.000000 0.050411 1.442547 1.191864 0.687691 1.267156 \n",
+ "1 5.000000 NaN 1.292881 1.180257 1.173917 1.050390 \n",
+ "2 5.000000 NaN 14.691979 5.601620 7.267652 3.059647 \n",
+ "3 2.000000 1227.693208 1.019769 1.002792 1.006457 1.001464 \n",
+ "4 5.000000 0.025759 2.071249 2.066912 2.115189 1.885606 \n",
+ "5 5.000000 NaN 1.459985 1.297931 1.326381 1.277018 \n",
+ "6 4.166667 409.256459 3.663068 2.056896 2.262881 1.590214 \n",
+ "\n",
+ " MDN CIA \n",
+ "0 1.312439 \n",
+ "1 2.055043 \n",
+ "2 1.004445 \n",
+ "3 1.006107 \n",
+ "4 1.471810 \n",
+ "5 0.982165 \n",
+ "6 1.305335 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Collect all CV results\n",
+ "all_cv_results = {\n",
+ " \"space_ga\": space_ga_result.cv_results,\n",
+ " \"elevators\": elevators_result.cv_results,\n",
+ " \"brazilian_houses\": brazilian_result.cv_results,\n",
+ " \"onlinenewspopularity\": news_result.cv_results,\n",
+ " \"abalone\": abalone_result.cv_results,\n",
+ " \"house_sales\": house_result.cv_results,\n",
+ "}\n",
+ "all_cv_results_df = pd.DataFrame(all_cv_results).T\n",
+ "all_cv_results_df.to_csv(\"benchmark_cv_results.csv\")\n",
+ "\n",
+ "# Collect all Wasserstein results \n",
+ "all_wasserstein_results = {\n",
+ " \"space_ga\": space_ga_wasserstein, \n",
+ " \"elevators\": elevators_wasserstein, \n",
+ " \"brazilian_houses\": brazilian_wasserstein, \n",
+ " \"onlinenewspopularity\": news_wasserstein, \n",
+ " \"abalone\": abalone_wasserstein, \n",
+ " \"house_sales\": house_wasserstein, \n",
+ "} \n",
+ "all_wasserstein_results_df = pd.DataFrame(all_wasserstein_results).T\n",
+ "all_wasserstein_results_df.to_csv(\"benchmark_wasserstein_results.csv\")\n",
+ "\n",
+ "# Collect all CIA results\n",
+ "all_cia_results = {\n",
+ " \"space_ga\": space_ga_cia_results,\n",
+ " \"elevators\": elevators_cia_results,\n",
+ " \"brazilian_houses\": brazilian_cia_results,\n",
+ " \"onlinenewspopularity\": news_cia_results,\n",
+ " \"abalone\": abalone_cia_results,\n",
+ " \"house_sales\": house_cia_results,\n",
+ "}\n",
+ "all_cia_results_df = pd.DataFrame(all_cia_results).T\n",
+ "all_cia_results_df.to_csv(\"benchmark_cia_results.csv\")\n",
+ "\n",
+ "# Create summary table\n",
+ "print(\"\\n=== Cross-Dataset Benchmark Summary Table ===\\n\")\n",
+ "summary_table = create_benchmark_summary_table(all_cv_results, all_cia_results)\n",
+ "display(summary_table)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "all_cv_results = pd.read_csv(\"benchmark_cv_results.csv\", index_col=0).to_dict()\n",
+ "all_wasserstein_results = pd.read_csv(\"benchmark_wasserstein_results.csv\", index_col=0).to_dict()\n",
+ "all_cia_results = pd.read_csv(\"benchmark_cia_results.csv\", index_col=0).to_dict()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "=== Cross-Dataset Benchmark Heatmap ===\n",
+ "\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.plotly.v1+json": {
+ "config": {
+ "plotlyServerURL": "https://plot.ly"
+ },
+ "data": [
+ {
+ "marker": {
+ "color": "#88CCEE",
+ "size": 12
+ },
+ "mode": "markers",
+ "name": "QRF",
+ "type": "scatter",
+ "x": [
+ null
+ ],
+ "y": [
+ null
+ ]
+ },
+ {
+ "marker": {
+ "color": "#CC6677",
+ "size": 12
+ },
+ "mode": "markers",
+ "name": "OLS",
+ "type": "scatter",
+ "x": [
+ null
+ ],
+ "y": [
+ null
+ ]
+ },
+ {
+ "marker": {
+ "color": "#DDCC77",
+ "size": 12
+ },
+ "mode": "markers",
+ "name": "QuantReg",
+ "type": "scatter",
+ "x": [
+ null
+ ],
+ "y": [
+ null
+ ]
+ },
+ {
+ "marker": {
+ "color": "#117733",
+ "size": 12
+ },
+ "mode": "markers",
+ "name": "Matching",
+ "type": "scatter",
+ "x": [
+ null
+ ],
+ "y": [
+ null
+ ]
+ },
+ {
+ "marker": {
+ "color": "#332288",
+ "size": 12
+ },
+ "mode": "markers",
+ "name": "MDN",
+ "type": "scatter",
+ "x": [
+ null
+ ],
+ "y": [
+ null
+ ]
+ }
+ ],
+ "layout": {
+ "annotations": [
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "0.0389",
+ "x": 0,
+ "xanchor": "center",
+ "y": 0,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 12
+ },
+ "showarrow": false,
+ "text": "0.0362",
+ "x": 1,
+ "xanchor": "center",
+ "y": 0,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "0.0841",
+ "x": 2,
+ "xanchor": "center",
+ "y": 0,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "0.0628",
+ "x": 3,
+ "xanchor": "center",
+ "y": 0,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "white",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "0.0504",
+ "x": 4,
+ "xanchor": "center",
+ "y": 0,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 12
+ },
+ "showarrow": false,
+ "text": "0.0101",
+ "x": 0,
+ "xanchor": "center",
+ "y": 1,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "0.0272",
+ "x": 1,
+ "xanchor": "center",
+ "y": 1,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "0.0981",
+ "x": 2,
+ "xanchor": "center",
+ "y": 1,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "0.0111",
+ "x": 3,
+ "xanchor": "center",
+ "y": 1,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "white",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "0.0504",
+ "x": 4,
+ "xanchor": "center",
+ "y": 1,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "0.0013",
+ "x": 0,
+ "xanchor": "center",
+ "y": 2,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "0.0013",
+ "x": 1,
+ "xanchor": "center",
+ "y": 2,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 12
+ },
+ "showarrow": false,
+ "text": "0.0012",
+ "x": 2,
+ "xanchor": "center",
+ "y": 2,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "0.0026",
+ "x": 3,
+ "xanchor": "center",
+ "y": 2,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "N/A",
+ "x": 4,
+ "xanchor": "center",
+ "y": 2,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "0.0003",
+ "x": 0,
+ "xanchor": "center",
+ "y": 3,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "0.0014",
+ "x": 1,
+ "xanchor": "center",
+ "y": 3,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "0.0015",
+ "x": 2,
+ "xanchor": "center",
+ "y": 3,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 12
+ },
+ "showarrow": false,
+ "text": "0.0002",
+ "x": 3,
+ "xanchor": "center",
+ "y": 3,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "white",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "0.0200",
+ "x": 4,
+ "xanchor": "center",
+ "y": 3,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 12
+ },
+ "showarrow": false,
+ "text": "52.1647",
+ "x": 0,
+ "xanchor": "center",
+ "y": 4,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "126.9268",
+ "x": 1,
+ "xanchor": "center",
+ "y": 4,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "104.9360",
+ "x": 2,
+ "xanchor": "center",
+ "y": 4,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "398.7284",
+ "x": 3,
+ "xanchor": "center",
+ "y": 4,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "N/A",
+ "x": 4,
+ "xanchor": "center",
+ "y": 4,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 12
+ },
+ "showarrow": false,
+ "text": "27.1747",
+ "x": 0,
+ "xanchor": "center",
+ "y": 5,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "99.3468",
+ "x": 1,
+ "xanchor": "center",
+ "y": 5,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "112.3643",
+ "x": 2,
+ "xanchor": "center",
+ "y": 5,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "120.4604",
+ "x": 3,
+ "xanchor": "center",
+ "y": 5,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "white",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "3378.6172",
+ "x": 4,
+ "xanchor": "center",
+ "y": 5,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "1357.2102",
+ "x": 0,
+ "xanchor": "center",
+ "y": 6,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "1800.6697",
+ "x": 1,
+ "xanchor": "center",
+ "y": 6,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 12
+ },
+ "showarrow": false,
+ "text": "1045.3193",
+ "x": 2,
+ "xanchor": "center",
+ "y": 6,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "1984.7382",
+ "x": 3,
+ "xanchor": "center",
+ "y": 6,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "white",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "1227.6932",
+ "x": 4,
+ "xanchor": "center",
+ "y": 6,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "783.7171",
+ "x": 0,
+ "xanchor": "center",
+ "y": 7,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "2777.1152",
+ "x": 1,
+ "xanchor": "center",
+ "y": 7,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "2376.8148",
+ "x": 2,
+ "xanchor": "center",
+ "y": 7,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 12
+ },
+ "showarrow": false,
+ "text": "239.8596",
+ "x": 3,
+ "xanchor": "center",
+ "y": 7,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "white",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "2038.0328",
+ "x": 4,
+ "xanchor": "center",
+ "y": 7,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "0.0091",
+ "x": 0,
+ "xanchor": "center",
+ "y": 8,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "0.0074",
+ "x": 1,
+ "xanchor": "center",
+ "y": 8,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 12
+ },
+ "showarrow": false,
+ "text": "0.0065",
+ "x": 2,
+ "xanchor": "center",
+ "y": 8,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "0.0151",
+ "x": 3,
+ "xanchor": "center",
+ "y": 8,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "white",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "0.0258",
+ "x": 4,
+ "xanchor": "center",
+ "y": 8,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "0.0028",
+ "x": 0,
+ "xanchor": "center",
+ "y": 9,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "0.0037",
+ "x": 1,
+ "xanchor": "center",
+ "y": 9,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "0.0027",
+ "x": 2,
+ "xanchor": "center",
+ "y": 9,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 12
+ },
+ "showarrow": false,
+ "text": "0.0025",
+ "x": 3,
+ "xanchor": "center",
+ "y": 9,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "white",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "0.0404",
+ "x": 4,
+ "xanchor": "center",
+ "y": 9,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "60952.4892",
+ "x": 0,
+ "xanchor": "center",
+ "y": 10,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "58409.5243",
+ "x": 1,
+ "xanchor": "center",
+ "y": 10,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 12
+ },
+ "showarrow": false,
+ "text": "54897.7190",
+ "x": 2,
+ "xanchor": "center",
+ "y": 10,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "102881.8309",
+ "x": 3,
+ "xanchor": "center",
+ "y": 10,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "N/A",
+ "x": 4,
+ "xanchor": "center",
+ "y": 10,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "12576.6113",
+ "x": 0,
+ "xanchor": "center",
+ "y": 11,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "41862.2873",
+ "x": 1,
+ "xanchor": "center",
+ "y": 11,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "64725.2760",
+ "x": 2,
+ "xanchor": "center",
+ "y": 11,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "black",
+ "size": 12
+ },
+ "showarrow": false,
+ "text": "8797.8007",
+ "x": 3,
+ "xanchor": "center",
+ "y": 11,
+ "yanchor": "middle"
+ },
+ {
+ "font": {
+ "color": "white",
+ "size": 10
+ },
+ "showarrow": false,
+ "text": "544730.3917",
+ "x": 4,
+ "xanchor": "center",
+ "y": 11,
+ "yanchor": "middle"
+ }
+ ],
+ "height": 580,
+ "legend": {
+ "orientation": "h",
+ "x": 0.5,
+ "xanchor": "center",
+ "y": -0.02,
+ "yanchor": "top"
+ },
+ "margin": {
+ "b": 40,
+ "l": 180,
+ "r": 20,
+ "t": 60
+ },
+ "paper_bgcolor": "#FAFAFA",
+ "plot_bgcolor": "#FAFAFA",
+ "shapes": [
+ {
+ "fillcolor": "rgba(136, 204, 238, 0.80)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": -0.5,
+ "x1": 0.5,
+ "y0": -0.5,
+ "y1": 0.5
+ },
+ {
+ "fillcolor": "rgba(204, 102, 119, 0.80)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 0.5,
+ "x1": 1.5,
+ "y0": -0.5,
+ "y1": 0.5
+ },
+ {
+ "fillcolor": "rgba(221, 204, 119, 0.80)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 1.5,
+ "x1": 2.5,
+ "y0": -0.5,
+ "y1": 0.5
+ },
+ {
+ "fillcolor": "rgba(17, 119, 51, 0.80)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 2.5,
+ "x1": 3.5,
+ "y0": -0.5,
+ "y1": 0.5
+ },
+ {
+ "fillcolor": "rgba(51, 34, 136, 0.80)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 3.5,
+ "x1": 4.5,
+ "y0": -0.5,
+ "y1": 0.5
+ },
+ {
+ "fillcolor": "rgba(136, 204, 238, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": -0.5,
+ "x1": 0.5,
+ "y0": 0.5,
+ "y1": 1.5
+ },
+ {
+ "fillcolor": "rgba(204, 102, 119, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 0.5,
+ "x1": 1.5,
+ "y0": 0.5,
+ "y1": 1.5
+ },
+ {
+ "fillcolor": "rgba(221, 204, 119, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 1.5,
+ "x1": 2.5,
+ "y0": 0.5,
+ "y1": 1.5
+ },
+ {
+ "fillcolor": "rgba(17, 119, 51, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 2.5,
+ "x1": 3.5,
+ "y0": 0.5,
+ "y1": 1.5
+ },
+ {
+ "fillcolor": "rgba(51, 34, 136, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 3.5,
+ "x1": 4.5,
+ "y0": 0.5,
+ "y1": 1.5
+ },
+ {
+ "fillcolor": "rgba(136, 204, 238, 0.80)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": -0.5,
+ "x1": 0.5,
+ "y0": 1.5,
+ "y1": 2.5
+ },
+ {
+ "fillcolor": "rgba(204, 102, 119, 0.80)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 0.5,
+ "x1": 1.5,
+ "y0": 1.5,
+ "y1": 2.5
+ },
+ {
+ "fillcolor": "rgba(221, 204, 119, 0.80)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 1.5,
+ "x1": 2.5,
+ "y0": 1.5,
+ "y1": 2.5
+ },
+ {
+ "fillcolor": "rgba(17, 119, 51, 0.80)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 2.5,
+ "x1": 3.5,
+ "y0": 1.5,
+ "y1": 2.5
+ },
+ {
+ "fillcolor": "rgba(200, 200, 200, 0.3)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 3.5,
+ "x1": 4.5,
+ "y0": 1.5,
+ "y1": 2.5
+ },
+ {
+ "fillcolor": "rgba(136, 204, 238, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": -0.5,
+ "x1": 0.5,
+ "y0": 2.5,
+ "y1": 3.5
+ },
+ {
+ "fillcolor": "rgba(204, 102, 119, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 0.5,
+ "x1": 1.5,
+ "y0": 2.5,
+ "y1": 3.5
+ },
+ {
+ "fillcolor": "rgba(221, 204, 119, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 1.5,
+ "x1": 2.5,
+ "y0": 2.5,
+ "y1": 3.5
+ },
+ {
+ "fillcolor": "rgba(17, 119, 51, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 2.5,
+ "x1": 3.5,
+ "y0": 2.5,
+ "y1": 3.5
+ },
+ {
+ "fillcolor": "rgba(51, 34, 136, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 3.5,
+ "x1": 4.5,
+ "y0": 2.5,
+ "y1": 3.5
+ },
+ {
+ "fillcolor": "rgba(136, 204, 238, 0.80)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": -0.5,
+ "x1": 0.5,
+ "y0": 3.5,
+ "y1": 4.5
+ },
+ {
+ "fillcolor": "rgba(204, 102, 119, 0.80)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 0.5,
+ "x1": 1.5,
+ "y0": 3.5,
+ "y1": 4.5
+ },
+ {
+ "fillcolor": "rgba(221, 204, 119, 0.80)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 1.5,
+ "x1": 2.5,
+ "y0": 3.5,
+ "y1": 4.5
+ },
+ {
+ "fillcolor": "rgba(17, 119, 51, 0.80)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 2.5,
+ "x1": 3.5,
+ "y0": 3.5,
+ "y1": 4.5
+ },
+ {
+ "fillcolor": "rgba(200, 200, 200, 0.3)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 3.5,
+ "x1": 4.5,
+ "y0": 3.5,
+ "y1": 4.5
+ },
+ {
+ "fillcolor": "rgba(136, 204, 238, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": -0.5,
+ "x1": 0.5,
+ "y0": 4.5,
+ "y1": 5.5
+ },
+ {
+ "fillcolor": "rgba(204, 102, 119, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 0.5,
+ "x1": 1.5,
+ "y0": 4.5,
+ "y1": 5.5
+ },
+ {
+ "fillcolor": "rgba(221, 204, 119, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 1.5,
+ "x1": 2.5,
+ "y0": 4.5,
+ "y1": 5.5
+ },
+ {
+ "fillcolor": "rgba(17, 119, 51, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 2.5,
+ "x1": 3.5,
+ "y0": 4.5,
+ "y1": 5.5
+ },
+ {
+ "fillcolor": "rgba(51, 34, 136, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 3.5,
+ "x1": 4.5,
+ "y0": 4.5,
+ "y1": 5.5
+ },
+ {
+ "fillcolor": "rgba(136, 204, 238, 0.80)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": -0.5,
+ "x1": 0.5,
+ "y0": 5.5,
+ "y1": 6.5
+ },
+ {
+ "fillcolor": "rgba(204, 102, 119, 0.80)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 0.5,
+ "x1": 1.5,
+ "y0": 5.5,
+ "y1": 6.5
+ },
+ {
+ "fillcolor": "rgba(221, 204, 119, 0.80)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 1.5,
+ "x1": 2.5,
+ "y0": 5.5,
+ "y1": 6.5
+ },
+ {
+ "fillcolor": "rgba(17, 119, 51, 0.80)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 2.5,
+ "x1": 3.5,
+ "y0": 5.5,
+ "y1": 6.5
+ },
+ {
+ "fillcolor": "rgba(51, 34, 136, 0.80)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 3.5,
+ "x1": 4.5,
+ "y0": 5.5,
+ "y1": 6.5
+ },
+ {
+ "fillcolor": "rgba(136, 204, 238, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": -0.5,
+ "x1": 0.5,
+ "y0": 6.5,
+ "y1": 7.5
+ },
+ {
+ "fillcolor": "rgba(204, 102, 119, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 0.5,
+ "x1": 1.5,
+ "y0": 6.5,
+ "y1": 7.5
+ },
+ {
+ "fillcolor": "rgba(221, 204, 119, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 1.5,
+ "x1": 2.5,
+ "y0": 6.5,
+ "y1": 7.5
+ },
+ {
+ "fillcolor": "rgba(17, 119, 51, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 2.5,
+ "x1": 3.5,
+ "y0": 6.5,
+ "y1": 7.5
+ },
+ {
+ "fillcolor": "rgba(51, 34, 136, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 3.5,
+ "x1": 4.5,
+ "y0": 6.5,
+ "y1": 7.5
+ },
+ {
+ "fillcolor": "rgba(136, 204, 238, 0.80)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": -0.5,
+ "x1": 0.5,
+ "y0": 7.5,
+ "y1": 8.5
+ },
+ {
+ "fillcolor": "rgba(204, 102, 119, 0.80)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 0.5,
+ "x1": 1.5,
+ "y0": 7.5,
+ "y1": 8.5
+ },
+ {
+ "fillcolor": "rgba(221, 204, 119, 0.80)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 1.5,
+ "x1": 2.5,
+ "y0": 7.5,
+ "y1": 8.5
+ },
+ {
+ "fillcolor": "rgba(17, 119, 51, 0.80)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 2.5,
+ "x1": 3.5,
+ "y0": 7.5,
+ "y1": 8.5
+ },
+ {
+ "fillcolor": "rgba(51, 34, 136, 0.80)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 3.5,
+ "x1": 4.5,
+ "y0": 7.5,
+ "y1": 8.5
+ },
+ {
+ "fillcolor": "rgba(136, 204, 238, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": -0.5,
+ "x1": 0.5,
+ "y0": 8.5,
+ "y1": 9.5
+ },
+ {
+ "fillcolor": "rgba(204, 102, 119, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 0.5,
+ "x1": 1.5,
+ "y0": 8.5,
+ "y1": 9.5
+ },
+ {
+ "fillcolor": "rgba(221, 204, 119, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 1.5,
+ "x1": 2.5,
+ "y0": 8.5,
+ "y1": 9.5
+ },
+ {
+ "fillcolor": "rgba(17, 119, 51, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 2.5,
+ "x1": 3.5,
+ "y0": 8.5,
+ "y1": 9.5
+ },
+ {
+ "fillcolor": "rgba(51, 34, 136, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 3.5,
+ "x1": 4.5,
+ "y0": 8.5,
+ "y1": 9.5
+ },
+ {
+ "fillcolor": "rgba(136, 204, 238, 0.80)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": -0.5,
+ "x1": 0.5,
+ "y0": 9.5,
+ "y1": 10.5
+ },
+ {
+ "fillcolor": "rgba(204, 102, 119, 0.80)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 0.5,
+ "x1": 1.5,
+ "y0": 9.5,
+ "y1": 10.5
+ },
+ {
+ "fillcolor": "rgba(221, 204, 119, 0.80)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 1.5,
+ "x1": 2.5,
+ "y0": 9.5,
+ "y1": 10.5
+ },
+ {
+ "fillcolor": "rgba(17, 119, 51, 0.80)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 2.5,
+ "x1": 3.5,
+ "y0": 9.5,
+ "y1": 10.5
+ },
+ {
+ "fillcolor": "rgba(200, 200, 200, 0.3)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 3.5,
+ "x1": 4.5,
+ "y0": 9.5,
+ "y1": 10.5
+ },
+ {
+ "fillcolor": "rgba(136, 204, 238, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": -0.5,
+ "x1": 0.5,
+ "y0": 10.5,
+ "y1": 11.5
+ },
+ {
+ "fillcolor": "rgba(204, 102, 119, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 0.5,
+ "x1": 1.5,
+ "y0": 10.5,
+ "y1": 11.5
+ },
+ {
+ "fillcolor": "rgba(221, 204, 119, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 1.5,
+ "x1": 2.5,
+ "y0": 10.5,
+ "y1": 11.5
+ },
+ {
+ "fillcolor": "rgba(17, 119, 51, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 2.5,
+ "x1": 3.5,
+ "y0": 10.5,
+ "y1": 11.5
+ },
+ {
+ "fillcolor": "rgba(51, 34, 136, 0.50)",
+ "line": {
+ "color": "white",
+ "width": 1
+ },
+ "type": "rect",
+ "x0": 3.5,
+ "x1": 4.5,
+ "y0": 10.5,
+ "y1": 11.5
+ }
+ ],
+ "template": {
+ "data": {
+ "bar": [
+ {
+ "error_x": {
+ "color": "#2a3f5f"
+ },
+ "error_y": {
+ "color": "#2a3f5f"
+ },
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "bar"
+ }
+ ],
+ "barpolar": [
+ {
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ },
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "barpolar"
+ }
+ ],
+ "carpet": [
+ {
+ "aaxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "baxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "type": "carpet"
+ }
+ ],
+ "choropleth": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "choropleth"
+ }
+ ],
+ "contour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "contour"
+ }
+ ],
+ "contourcarpet": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "contourcarpet"
+ }
+ ],
+ "heatmap": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmap"
+ }
+ ],
+ "heatmapgl": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmapgl"
+ }
+ ],
+ "histogram": [
+ {
+ "marker": {
+ "pattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ }
+ },
+ "type": "histogram"
+ }
+ ],
+ "histogram2d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2d"
+ }
+ ],
+ "histogram2dcontour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2dcontour"
+ }
+ ],
+ "mesh3d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "mesh3d"
+ }
+ ],
+ "parcoords": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "parcoords"
+ }
+ ],
+ "pie": [
+ {
+ "automargin": true,
+ "type": "pie"
+ }
+ ],
+ "scatter": [
+ {
+ "fillpattern": {
+ "fillmode": "overlay",
+ "size": 10,
+ "solidity": 0.2
+ },
+ "type": "scatter"
+ }
+ ],
+ "scatter3d": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter3d"
+ }
+ ],
+ "scattercarpet": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattercarpet"
+ }
+ ],
+ "scattergeo": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergeo"
+ }
+ ],
+ "scattergl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergl"
+ }
+ ],
+ "scattermapbox": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermapbox"
+ }
+ ],
+ "scatterpolar": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolar"
+ }
+ ],
+ "scatterpolargl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolargl"
+ }
+ ],
+ "scatterternary": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterternary"
+ }
+ ],
+ "surface": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "surface"
+ }
+ ],
+ "table": [
+ {
+ "cells": {
+ "fill": {
+ "color": "#EBF0F8"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "header": {
+ "fill": {
+ "color": "#C8D4E3"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "type": "table"
+ }
+ ]
+ },
+ "layout": {
+ "annotationdefaults": {
+ "arrowcolor": "#2a3f5f",
+ "arrowhead": 0,
+ "arrowwidth": 1
+ },
+ "autotypenumbers": "strict",
+ "coloraxis": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "colorscale": {
+ "diverging": [
+ [
+ 0,
+ "#8e0152"
+ ],
+ [
+ 0.1,
+ "#c51b7d"
+ ],
+ [
+ 0.2,
+ "#de77ae"
+ ],
+ [
+ 0.3,
+ "#f1b6da"
+ ],
+ [
+ 0.4,
+ "#fde0ef"
+ ],
+ [
+ 0.5,
+ "#f7f7f7"
+ ],
+ [
+ 0.6,
+ "#e6f5d0"
+ ],
+ [
+ 0.7,
+ "#b8e186"
+ ],
+ [
+ 0.8,
+ "#7fbc41"
+ ],
+ [
+ 0.9,
+ "#4d9221"
+ ],
+ [
+ 1,
+ "#276419"
+ ]
+ ],
+ "sequential": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "sequentialminus": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ]
+ },
+ "colorway": [
+ "#636efa",
+ "#EF553B",
+ "#00cc96",
+ "#ab63fa",
+ "#FFA15A",
+ "#19d3f3",
+ "#FF6692",
+ "#B6E880",
+ "#FF97FF",
+ "#FECB52"
+ ],
+ "font": {
+ "color": "#2a3f5f"
+ },
+ "geo": {
+ "bgcolor": "white",
+ "lakecolor": "white",
+ "landcolor": "#E5ECF6",
+ "showlakes": true,
+ "showland": true,
+ "subunitcolor": "white"
+ },
+ "hoverlabel": {
+ "align": "left"
+ },
+ "hovermode": "closest",
+ "mapbox": {
+ "style": "light"
+ },
+ "paper_bgcolor": "white",
+ "plot_bgcolor": "#E5ECF6",
+ "polar": {
+ "angularaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "radialaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "scene": {
+ "xaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "yaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "zaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ }
+ },
+ "shapedefaults": {
+ "line": {
+ "color": "#2a3f5f"
+ }
+ },
+ "ternary": {
+ "aaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "baxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "caxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "title": {
+ "x": 0.05
+ },
+ "xaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ },
+ "yaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ }
+ }
+ },
+ "title": {
+ "font": {
+ "size": 14
+ },
+ "text": "Cross-Dataset Performance Comparison"
+ },
+ "width": 700,
+ "xaxis": {
+ "range": [
+ -0.5,
+ 4.5
+ ],
+ "showgrid": false,
+ "side": "top",
+ "tickmode": "array",
+ "ticktext": [
+ "QRF",
+ "OLS",
+ "QuantReg",
+ "Matching",
+ "MDN"
+ ],
+ "tickvals": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4
+ ],
+ "zeroline": false
+ },
+ "yaxis": {
+ "autorange": "reversed",
+ "range": [
+ -0.5,
+ 11.5
+ ],
+ "showgrid": false,
+ "tickmode": "array",
+ "ticktext": [
+ "space_ga (Q-Loss)",
+ "space_ga (W-Dist)",
+ "elevators (Q-Loss)",
+ "elevators (W-Dist)",
+ "brazilian_houses (Q-Loss)",
+ "brazilian_houses (W-Dist)",
+ "onlinenewspopularity (Q-Loss)",
+ "onlinenewspopularity (W-Dist)",
+ "abalone (Q-Loss)",
+ "abalone (W-Dist)",
+ "house_sales (Q-Loss)",
+ "house_sales (W-Dist)"
+ ],
+ "tickvals": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 9,
+ 10,
+ 11
+ ],
+ "zeroline": false
+ }
+ }
+ }
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Create benchmark heatmap\n",
+ "print(\"\\n=== Cross-Dataset Benchmark Heatmap ===\\n\")\n",
+ "heatmap_fig = create_benchmark_heatmap(\n",
+ " all_cv_results,\n",
+ " wasserstein_results=all_wasserstein_results,\n",
+ " title=\"Cross-Dataset Performance Comparison\"\n",
+ ")\n",
+ "heatmap_fig.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "## Summary\n",
+ "\n",
+ "This notebook has analyzed 6 benchmarking datasets:\n",
+ "\n",
+ "1. **space_ga** - Georgia voting data (3,107 rows, 7 cols)\n",
+ "2. **elevators** - Aircraft control data (16,599 rows, 19 cols)\n",
+ "3. **brazilian_houses** - Real estate data (10,692 rows, 13 cols)\n",
+ "4. **onlinenewspopularity** - News article data (39,644 rows, 60 cols)\n",
+ "5. **abalone** - Marine biology data (4,177 rows, 9 cols)\n",
+ "6. **house_sales** - King County real estate (21,613 rows, 22 cols)\n",
+ "\n",
+ "For each dataset:\n",
+ "- Computed predictor correlations (Pearson, Spearman, Mutual Information)\n",
+ "- Performed leave-one-out analysis to identify important predictors\n",
+ "- Split data into donor/receiver sets (60/40)\n",
+ "- Ran autoimpute to compare QRF, OLS, QuantReg, Matching, and MDN models\n",
+ "- Compared imputed vs ground truth distributions"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "pe3.13",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.13.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/tests/test_autoimpute.py b/tests/test_autoimpute.py
index c58ef5f..8b2ceb8 100644
--- a/tests/test_autoimpute.py
+++ b/tests/test_autoimpute.py
@@ -448,4 +448,4 @@ def test_autoimpute_consistency(simple_data: tuple) -> None:
"mean_test"
]
if not np.isnan(loss1) and not np.isnan(loss2):
- np.testing.assert_allclose(loss1, loss2, rtol=0.05)
+ np.testing.assert_allclose(loss1, loss2, rtol=0.10)
diff --git a/tests/test_dashboard_formatter.py b/tests/test_dashboard_formatter.py
index 9cc49d4..f42b3d4 100644
--- a/tests/test_dashboard_formatter.py
+++ b/tests/test_dashboard_formatter.py
@@ -14,7 +14,6 @@
from microimpute.utils.dashboard_formatter import format_csv
-
# Valid type values that should appear in the output
VALID_TYPES = {
"benchmark_loss",
@@ -34,6 +33,7 @@
"quantile",
"metric_name",
"metric_value",
+ "metric_std",
"split",
"additional_info",
]
@@ -57,8 +57,18 @@ def sample_autoimpute_result():
},
index=["train", "test"],
),
+ "results_std": pd.DataFrame(
+ {
+ 0.1: [0.001, 0.002],
+ 0.5: [0.0015, 0.0025],
+ 0.9: [0.002, 0.003],
+ },
+ index=["train", "test"],
+ ),
"mean_train": 0.015,
"mean_test": 0.025,
+ "std_train": 0.0015,
+ "std_test": 0.0025,
"variables": ["var1", "var2"],
},
"log_loss": {
@@ -68,8 +78,16 @@ def sample_autoimpute_result():
},
index=["train", "test"],
),
+ "results_std": pd.DataFrame(
+ {
+ 0.5: [0.01, 0.015],
+ },
+ index=["train", "test"],
+ ),
"mean_train": 0.1,
"mean_test": 0.15,
+ "std_train": 0.01,
+ "std_test": 0.015,
"variables": ["cat_var"],
},
},
@@ -83,8 +101,18 @@ def sample_autoimpute_result():
},
index=["train", "test"],
),
+ "results_std": pd.DataFrame(
+ {
+ 0.1: [0.0012, 0.0022],
+ 0.5: [0.0017, 0.0027],
+ 0.9: [0.0022, 0.0032],
+ },
+ index=["train", "test"],
+ ),
"mean_train": 0.017,
"mean_test": 0.027,
+ "std_train": 0.0017,
+ "std_test": 0.0027,
"variables": ["var1", "var2"],
},
},
@@ -350,6 +378,60 @@ def test_best_method_marked_correctly(self, sample_autoimpute_result):
finally:
Path(output_path).unlink()
+ def test_metric_std_column_populated(self, sample_autoimpute_result):
+ """Test that metric_std column is populated from CV results."""
+ with tempfile.NamedTemporaryFile(
+ mode="w", delete=False, suffix=".csv"
+ ) as f:
+ output_path = f.name
+
+ try:
+ result = format_csv(
+ output_path=output_path,
+ autoimpute_result=sample_autoimpute_result,
+ )
+
+ # Check that metric_std column exists
+ assert "metric_std" in result.columns
+
+ benchmark_rows = result[result["type"] == "benchmark_loss"]
+
+ # Check per-quantile rows have std values
+ ols_quantile_rows = benchmark_rows[
+ (benchmark_rows["method"] == "OLS")
+ & (benchmark_rows["quantile"] != "mean")
+ & (benchmark_rows["metric_name"] == "quantile_loss")
+ ]
+
+ # All per-quantile rows should have non-null std
+ assert not ols_quantile_rows["metric_std"].isna().any()
+ assert (ols_quantile_rows["metric_std"] >= 0).all()
+
+ # Check mean rows have std values
+ ols_mean_rows = benchmark_rows[
+ (benchmark_rows["method"] == "OLS")
+ & (benchmark_rows["quantile"] == "mean")
+ & (benchmark_rows["metric_name"] == "quantile_loss")
+ ]
+
+ assert not ols_mean_rows["metric_std"].isna().any()
+
+ # Verify specific std value matches fixture
+ # Test split should have std_test = 0.0025 for OLS quantile_loss
+ test_mean_row = benchmark_rows[
+ (benchmark_rows["method"] == "OLS")
+ & (benchmark_rows["quantile"] == "mean")
+ & (benchmark_rows["metric_name"] == "quantile_loss")
+ & (benchmark_rows["split"] == "test")
+ ]
+ assert len(test_mean_row) == 1
+ assert np.isclose(
+ test_mean_row.iloc[0]["metric_std"], 0.0025, rtol=1e-10
+ )
+
+ finally:
+ Path(output_path).unlink()
+
class TestFormatCSVDistributionDistance:
"""Tests for distribution_distance type formatting."""
diff --git a/tests/test_models/test_imputers.py b/tests/test_models/test_imputers.py
index 18548b8..762d45d 100644
--- a/tests/test_models/test_imputers.py
+++ b/tests/test_models/test_imputers.py
@@ -17,7 +17,6 @@
from microimpute.models import *
from microimpute.utils.data import preprocess_data
-
# === Fixtures ===
diff --git a/tests/test_models/test_mdn.py b/tests/test_models/test_mdn.py
index ea74842..9e76bc6 100644
--- a/tests/test_models/test_mdn.py
+++ b/tests/test_models/test_mdn.py
@@ -20,7 +20,6 @@
_generate_data_hash,
)
-
# === Fixtures ===
diff --git a/tests/test_models/test_ols.py b/tests/test_models/test_ols.py
index 94adfbb..4db9686 100644
--- a/tests/test_models/test_ols.py
+++ b/tests/test_models/test_ols.py
@@ -13,7 +13,6 @@
from microimpute.utils.data import preprocess_data
from microimpute.visualizations import *
-
# === Fixtures ===
diff --git a/tests/test_models/test_qrf.py b/tests/test_models/test_qrf.py
index 2f26c52..04d740a 100644
--- a/tests/test_models/test_qrf.py
+++ b/tests/test_models/test_qrf.py
@@ -21,7 +21,6 @@
from microimpute.utils.data import preprocess_data
from microimpute.visualizations import *
-
# === Fixtures and Test Data ===
diff --git a/tests/test_models/test_quantreg.py b/tests/test_models/test_quantreg.py
index 5caee74..74e241f 100644
--- a/tests/test_models/test_quantreg.py
+++ b/tests/test_models/test_quantreg.py
@@ -13,7 +13,6 @@
from microimpute.utils.data import preprocess_data
from microimpute.visualizations import *
-
# === Fixtures ===
diff --git a/tests/test_visualizations.py b/tests/test_visualizations.py
index df0e5c3..ad1475d 100644
--- a/tests/test_visualizations.py
+++ b/tests/test_visualizations.py
@@ -661,3 +661,124 @@ def test_nan_handling(self):
# Should handle NaNs gracefully
fig = viz.plot()
assert fig is not None
+
+
+class TestErrorBars:
+ """Test error bar functionality in visualizations."""
+
+ def test_performance_results_with_error_bars(self):
+ """Test PerformanceResults displays error bars when std data is present."""
+ np.random.seed(42)
+ quantiles = [0.1, 0.5, 0.9]
+
+ results_df = pd.DataFrame(
+ {q: np.random.uniform(0.1, 0.5, 2) for q in quantiles},
+ index=["train", "test"],
+ )
+ std_df = pd.DataFrame(
+ {q: np.random.uniform(0.01, 0.05, 2) for q in quantiles},
+ index=["train", "test"],
+ )
+
+ results = {
+ "quantile_loss": {
+ "results": results_df,
+ "results_std": std_df,
+ "mean_train": results_df.loc["train"].mean(),
+ "mean_test": results_df.loc["test"].mean(),
+ "std_train": std_df.loc["train"].mean(),
+ "std_test": std_df.loc["test"].mean(),
+ "variables": ["var1"],
+ }
+ }
+
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore")
+
+ viz = PerformanceResults(
+ results=results,
+ metric="quantile_loss",
+ model_name="TestModel",
+ )
+
+ fig = viz.plot(show_error_bars=True)
+ assert fig is not None
+
+ # Check that error bars are present
+ has_error_bars = any(
+ trace.error_y is not None and trace.error_y.array is not None
+ for trace in fig.data
+ )
+ assert has_error_bars
+
+ def test_comparison_results_with_error_bars(self):
+ """Test MethodComparisonResults displays error bars when std is present."""
+ np.random.seed(42)
+ quantiles = [0.1, 0.5, 0.9]
+
+ results_df = pd.DataFrame(
+ {q: np.random.uniform(0.1, 0.5, 2) for q in quantiles},
+ index=["train", "test"],
+ )
+ std_df = pd.DataFrame(
+ {q: np.random.uniform(0.01, 0.05, 2) for q in quantiles},
+ index=["train", "test"],
+ )
+
+ comparison_results = {
+ "OLS": {
+ "quantile_loss": {
+ "results": results_df,
+ "results_std": std_df,
+ "mean_test": results_df.loc["test"].mean(),
+ "std_test": std_df.loc["test"].mean(),
+ "variables": ["var1"],
+ }
+ },
+ "QRF": {
+ "quantile_loss": {
+ "results": results_df * 0.9,
+ "results_std": std_df * 1.1,
+ "mean_test": (results_df * 0.9).loc["test"].mean(),
+ "std_test": (std_df * 1.1).loc["test"].mean(),
+ "variables": ["var1"],
+ }
+ },
+ }
+
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore")
+
+ viz = MethodComparisonResults(
+ comparison_results, metric="quantile_loss"
+ )
+
+ fig = viz.plot(show_error_bars=True)
+ assert fig is not None
+
+ # Check that Std column is in the data
+ assert "Std" in viz.comparison_data.columns
+ assert not viz.comparison_data["Std"].isna().all()
+
+ def test_cv_results_contain_std(self, diabetes_data):
+ """Test that cross_validate_model returns std information."""
+ predictors = ["age", "sex", "bmi", "bp"]
+ imputed_variables = ["s1"]
+
+ data = diabetes_data[predictors + imputed_variables]
+ data = preprocess_data(data, full_data=True)
+
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore")
+
+ results = cross_validate_model(
+ OLS, data, predictors, imputed_variables, n_splits=3
+ )
+
+ ql = results["quantile_loss"]
+ assert "results_std" in ql
+ assert "std_train" in ql
+ assert "std_test" in ql
+ assert ql["std_train"] >= 0
+ assert ql["std_test"] >= 0
+ assert (ql["results_std"] >= 0).all().all()