fix

soodoku · soodoku · commit 8b5cef542786 · 2025-12-26T00:33:38.000-08:00
diff --git a/docs/conf.py b/docs/conf.py
@@ -55,7 +55,7 @@
 ]
 
 templates_path = ["_templates"]
-exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "examples/*.ipynb"]
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 
 # -- nbsphinx configuration --------------------------------------------------
 nbsphinx_execute = 'always'  # Force execution of notebooks
@@ -67,10 +67,9 @@
     "--InlineBackend.rc={'figure.dpi': 96}",
 ]
 
-# Remove custom formats that might interfere
-# nbsphinx_custom_formats = {
-#     '.ipynb': ['nbsphinx', 'Jupyter Notebook'],
-# }
+# Prevent nbsphinx from auto-creating orphaned notebook documents
+# Only process notebooks explicitly included in toctrees
+nbsphinx_orphan_path = []  # Don't auto-include orphaned notebooks
 
 # -- Options for HTML output -------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
diff --git a/docs/examples/04_interactive_demo.ipynb b/docs/examples/04_interactive_demo.ipynb
@@ -58,70 +58,7 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": [
-    "def create_interactive_demo():\n",
-    "    \"\"\"Create an interactive widget for exploring piecewise-constant behavior.\"\"\"\n",
-    "\n",
-    "    # Create sliders for data generation\n",
-    "    n_samples_slider = widgets.IntSlider(\n",
-    "        value=10, min=5, max=20, step=1,\n",
-    "        description='N Samples:'\n",
-    "    )\n",
-    "\n",
-    "    pos_ratio_slider = widgets.FloatSlider(\n",
-    "        value=0.5, min=0.1, max=0.9, step=0.1,\n",
-    "        description='Pos Ratio:'\n",
-    "    )\n",
-    "\n",
-    "    seed_slider = widgets.IntSlider(\n",
-    "        value=42, min=0, max=100, step=1,\n",
-    "        description='Random Seed:'\n",
-    "    )\n",
-    "\n",
-    "    metric_dropdown = widgets.Dropdown(\n",
-    "        options=['f1', 'accuracy', 'precision', 'recall'],\n",
-    "        value='f1',\n",
-    "        description='Metric:'\n",
-    "    )\n",
-    "\n",
-    "    def update_plot(n_samples, pos_ratio, seed, metric):\n",
-    "        # Generate random data\n",
-    "        np.random.seed(seed)\n",
-    "        n_pos = int(n_samples * pos_ratio)\n",
-    "        n_neg = n_samples - n_pos\n",
-    "\n",
-    "        y_true = np.concatenate([np.zeros(n_neg), np.ones(n_pos)])\n",
-    "        y_prob = np.random.beta(2, 2, n_samples)  # Bell-shaped distribution\n",
-    "\n",
-    "        # Sort by probability for cleaner visualization\n",
-    "        sort_idx = np.argsort(y_prob)\n",
-    "        y_true = y_true[sort_idx]\n",
-    "        y_prob = y_prob[sort_idx]\n",
-    "\n",
-    "        # Plot\n",
-    "        plt.clf()\n",
-    "        fig, opt_thresh, opt_score = plot_piecewise_metric(\n",
-    "            y_true, y_prob, metric,\n",
-    "            title_suffix=f'\\n{n_samples} samples, {len(np.unique(y_prob))} unique probabilities'\n",
-    "        )\n",
-    "\n",
-    "        print(f\"Generated {n_samples} samples ({n_pos} positive, {n_neg} negative)\")\n",
-    "        print(f\"Optimal {metric} threshold: {opt_thresh:.3f} (score = {opt_score:.3f})\")\n",
-    "        print(f\"Number of breakpoints: {len(np.unique(y_prob))}\")\n",
-    "\n",
-    "    # Create interactive widget\n",
-    "    interactive_plot = widgets.interactive(\n",
-    "        update_plot,\n",
-    "        n_samples=n_samples_slider,\n",
-    "        pos_ratio=pos_ratio_slider,\n",
-    "        seed=seed_slider,\n",
-    "        metric=metric_dropdown\n",
-    "    )\n",
-    "\n",
-    "    display(interactive_plot)\n",
-    "\n",
-    "create_interactive_demo()"
-   ]
+   "source": "def create_static_demo():\n    \"\"\"Create static examples showing piecewise-constant behavior with different data characteristics.\"\"\"\n    \n    print(\"📊 STATIC EXAMPLES: Different Data Characteristics\")\n    print(\"=\" * 55)\n    \n    # Example 1: Small imbalanced dataset  \n    print(\"\\n1️⃣ Small Imbalanced Dataset (5 samples, 20% positive)\")\n    np.random.seed(42)\n    y_ex1 = np.array([0, 0, 0, 1, 1])\n    p_ex1 = np.array([0.1, 0.3, 0.4, 0.7, 0.9])\n    fig1, opt1, score1 = plot_piecewise_metric(y_ex1, p_ex1, 'f1', \n                                               title_suffix='\\nSmall Imbalanced Dataset')\n    print(f\"   → Optimal F1: {opt1:.3f} (score = {score1:.3f})\")\n    print(f\"   → Breakpoints: {len(np.unique(p_ex1))} unique probabilities\")\n    \n    # Example 2: Larger balanced dataset\n    print(\"\\n2️⃣ Larger Balanced Dataset (20 samples, ~50% positive)\")\n    np.random.seed(123)\n    y_ex2 = np.random.randint(0, 2, 20)\n    p_ex2 = np.random.beta(2, 2, 20)  # Bell-shaped distribution\n    # Sort for cleaner visualization\n    sort_idx = np.argsort(p_ex2)\n    y_ex2, p_ex2 = y_ex2[sort_idx], p_ex2[sort_idx]\n    \n    fig2, opt2, score2 = plot_piecewise_metric(y_ex2, p_ex2, 'f1', \n                                               title_suffix='\\nLarger Balanced Dataset')\n    print(f\"   → Optimal F1: {opt2:.3f} (score = {score2:.3f})\")\n    print(f\"   → Breakpoints: {len(np.unique(p_ex2))} unique probabilities\")\n    \n    # Example 3: Precision vs Recall trade-off\n    print(\"\\n3️⃣ Precision vs Recall Comparison\")\n    y_ex3 = np.array([0, 0, 1, 1, 0, 1, 0, 1])\n    p_ex3 = np.array([0.1, 0.3, 0.4, 0.6, 0.65, 0.8, 0.85, 0.9])\n    \n    # Compare different metrics on same data\n    metrics_to_compare = ['precision', 'recall', 'f1']\n    print(f\"   Data: {len(y_ex3)} samples, {y_ex3.sum()} positive\")\n    \n    for metric in metrics_to_compare:\n        result = optimize_thresholds(y_ex3, p_ex3, metric=metric)\n        optimal_thresh = result.thresholds[0]\n        optimal_score = _metric_score(y_ex3, p_ex3, optimal_thresh, metric)\n        print(f\"   → {metric.capitalize()}: t={optimal_thresh:.3f}, score={optimal_score:.3f}\")\n        \n    # Plot the trade-off\n    thresholds = np.linspace(0.05, 0.95, 100)\n    precision_scores = [_metric_score(y_ex3, p_ex3, t, 'precision') for t in thresholds]\n    recall_scores = [_metric_score(y_ex3, p_ex3, t, 'recall') for t in thresholds]\n    f1_scores = [_metric_score(y_ex3, p_ex3, t, 'f1') for t in thresholds]\n    \n    fig, ax = plt.subplots(1, 1, figsize=(12, 6))\n    ax.plot(thresholds, precision_scores, 'g-', linewidth=2, label='Precision')\n    ax.plot(thresholds, recall_scores, 'r-', linewidth=2, label='Recall') \n    ax.plot(thresholds, f1_scores, 'b-', linewidth=2, label='F1 Score')\n    \n    # Mark optimal points\n    for metric, color in zip(['precision', 'recall', 'f1'], ['green', 'red', 'blue']):\n        result = optimize_thresholds(y_ex3, p_ex3, metric=metric)\n        opt_t = result.thresholds[0]\n        opt_s = _metric_score(y_ex3, p_ex3, opt_t, metric)\n        ax.scatter([opt_t], [opt_s], color=color, s=150, marker='*', \n                  edgecolors='black', zorder=5)\n    \n    ax.set_xlabel('Decision Threshold')\n    ax.set_ylabel('Metric Score')\n    ax.set_title('Precision vs Recall Trade-off\\nStars show optimal thresholds for each metric')\n    ax.grid(True, alpha=0.3)\n    ax.legend()\n    ax.set_ylim(0, 1.05)\n    \n    plt.tight_layout()\n    plt.show()\n    \n    print(\"\\n💡 Key Insights:\")\n    print(\"   • Precision optimal: High threshold (fewer false positives)\")\n    print(\"   • Recall optimal: Low threshold (fewer false negatives)\")  \n    print(\"   • F1 optimal: Balanced trade-off between precision and recall\")\n\n# Run the static demo\ncreate_static_demo()"
   },
   {
    "cell_type": "markdown",