From 4aee02b6585e30ff3909722f09ffea59799f66ec Mon Sep 17 00:00:00 2001
From: Staging Server <tim@alloralabs.xyz>
Date: Wed, 10 Jun 2026 19:21:47 +0000
Subject: [PATCH] feat: volatility targets, CZAR loss, and testnet topic
 examples
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Core library changes:
- workflow.py: Add target_type parameter (log_return | volatility) and
  compute_volatility_target_polars() for std of 1-min log returns
- czar_loss.py (new): CZAR directional loss with gradient/hessian for
  custom LightGBM training — penalizes wrong-sign predictions, softens
  near-zero returns, normalizes by local volatility
- __init__.py: Export czar_loss, czar_gradient, czar_hessian,
  make_czar_objective

Tests:
- test_volatility_target.py: 8 tests for volatility target computation

Notebooks (example/illustration code):
- Reorganize all topic scripts under notebooks/testnet/topic_*/
- Add example scripts for all testnet topics: 38, 41, 42 (8h price),
  57, 83, 84 (8h log-return), 61, 62, 63 (24h log-return),
  71 (NEAR 8h), 79-82 (15m volatility)
- Add CZAR V1 model scripts for 8h price topics (38, 41, 42)
- Add dashboard.sh convenience script
- Remove notebooks/shared/ — keep deploy scripts at top level

Docs:
- README: Add topic reference tables, volatility workflow example
- AGENTS.md: Fix paths for testnet/ subfolder
- .gitignore: Add **/runs/ for training artifacts
---
 .gitignore                                    |   1 +
 AGENTS.md                                     |  16 +-
 README.md                                     |  64 +-
 allora_forge_builder_kit/__init__.py          |   5 +
 allora_forge_builder_kit/czar_loss.py         | 167 +++++
 allora_forge_builder_kit/workflow.py          |  76 ++-
 notebooks/dashboard.sh                        |  13 +
 ...ple_topic_79_btc_volatility_walkthrough.py | 547 +++++++++++++++
 .../testnet/topic_38_sol_8h_price/example.py  | 462 +++++++++++++
 .../topic_38_sol_8h_price/model_czar.py       | 339 ++++++++++
 .../model_v3_methodology.py                   | 316 +++++++++
 .../testnet/topic_41_eth_8h_price/example.py  | 625 ++++++++++++++++++
 .../topic_41_eth_8h_price/model_czar.py       | 339 ++++++++++
 .../testnet/topic_42_btc_8h_price/example.py  | 625 ++++++++++++++++++
 .../model_v2_directional.py                   | 370 +++++++++++
 .../topic_42_btc_8h_price/model_v3_czar.py    | 338 ++++++++++
 .../topic_57_sol_8h_logreturn/example.py      | 427 ++++++++++++
 .../topic_61_btc_24h_logreturn/example.py     | 427 ++++++++++++
 .../topic_62_sol_24h_logreturn/example.py     | 427 ++++++++++++
 .../topic_63_eth_24h_logreturn/example.py     | 427 ++++++++++++
 .../topic_71_near_8h_logreturn/example.py     | 427 ++++++++++++
 ...ple_topic_79_btc_volatility_walkthrough.py | 547 +++++++++++++++
 .../topic_79_model_a_deep_lookback.py         | 258 ++++++++
 .../topic_79_model_b_multiscale.py            | 280 ++++++++
 .../topic_79_model_c_colab.ipynb              | 525 +++++++++++++++
 .../topic_79_model_c_neural.py                | 377 +++++++++++
 .../topic_79_model_d_iterative.py             | 373 +++++++++++
 .../topic_79_model_e_calibrated.py            | 433 ++++++++++++
 ...ple_topic_80_eth_volatility_walkthrough.py | 547 +++++++++++++++
 .../topic_80_model_a_deep_lookback.py         | 258 ++++++++
 .../topic_80_model_b_multiscale.py            | 280 ++++++++
 .../topic_80_model_d_iterative.py             | 373 +++++++++++
 .../topic_81_model_e_calibrated.py            | 433 ++++++++++++
 .../topic_82_model_e_calibrated.py            | 433 ++++++++++++
 .../topic_83_btc_8h_logreturn/example.py      | 427 ++++++++++++
 .../topic_84_eth_8h_logreturn/example.py      | 427 ++++++++++++
 skills/allora-model-builder/SKILL.md          |  58 ++
 tests/README.md                               |   2 +-
 tests/test_volatility_target.py               | 145 ++++
 39 files changed, 12593 insertions(+), 21 deletions(-)
 create mode 100644 allora_forge_builder_kit/czar_loss.py
 create mode 100755 notebooks/dashboard.sh
 create mode 100644 notebooks/example_topic_79_btc_volatility_walkthrough.py
 create mode 100644 notebooks/testnet/topic_38_sol_8h_price/example.py
 create mode 100644 notebooks/testnet/topic_38_sol_8h_price/model_czar.py
 create mode 100644 notebooks/testnet/topic_38_sol_8h_price/model_v3_methodology.py
 create mode 100644 notebooks/testnet/topic_41_eth_8h_price/example.py
 create mode 100644 notebooks/testnet/topic_41_eth_8h_price/model_czar.py
 create mode 100644 notebooks/testnet/topic_42_btc_8h_price/example.py
 create mode 100644 notebooks/testnet/topic_42_btc_8h_price/model_v2_directional.py
 create mode 100644 notebooks/testnet/topic_42_btc_8h_price/model_v3_czar.py
 create mode 100644 notebooks/testnet/topic_57_sol_8h_logreturn/example.py
 create mode 100644 notebooks/testnet/topic_61_btc_24h_logreturn/example.py
 create mode 100644 notebooks/testnet/topic_62_sol_24h_logreturn/example.py
 create mode 100644 notebooks/testnet/topic_63_eth_24h_logreturn/example.py
 create mode 100644 notebooks/testnet/topic_71_near_8h_logreturn/example.py
 create mode 100644 notebooks/testnet/topic_79_btc_vol/example_topic_79_btc_volatility_walkthrough.py
 create mode 100644 notebooks/testnet/topic_79_btc_vol/topic_79_model_a_deep_lookback.py
 create mode 100644 notebooks/testnet/topic_79_btc_vol/topic_79_model_b_multiscale.py
 create mode 100644 notebooks/testnet/topic_79_btc_vol/topic_79_model_c_colab.ipynb
 create mode 100644 notebooks/testnet/topic_79_btc_vol/topic_79_model_c_neural.py
 create mode 100644 notebooks/testnet/topic_79_btc_vol/topic_79_model_d_iterative.py
 create mode 100644 notebooks/testnet/topic_79_btc_vol/topic_79_model_e_calibrated.py
 create mode 100644 notebooks/testnet/topic_80_eth_vol/example_topic_80_eth_volatility_walkthrough.py
 create mode 100644 notebooks/testnet/topic_80_eth_vol/topic_80_model_a_deep_lookback.py
 create mode 100644 notebooks/testnet/topic_80_eth_vol/topic_80_model_b_multiscale.py
 create mode 100644 notebooks/testnet/topic_80_eth_vol/topic_80_model_d_iterative.py
 create mode 100644 notebooks/testnet/topic_81_xrp_vol/topic_81_model_e_calibrated.py
 create mode 100644 notebooks/testnet/topic_82_sol_vol/topic_82_model_e_calibrated.py
 create mode 100644 notebooks/testnet/topic_83_btc_8h_logreturn/example.py
 create mode 100644 notebooks/testnet/topic_84_eth_8h_logreturn/example.py
 create mode 100644 tests/test_volatility_target.py

diff --git a/.gitignore b/.gitignore
index b78a9f2..1fe5542 100644
--- a/.gitignore
+++ b/.gitignore
@@ -75,3 +75,4 @@ managed_artifacts/
 artifacts/
 notebooks/artifacts/
 notebooks/runs/
+**/runs/
diff --git a/AGENTS.md b/AGENTS.md
index 2f2a41d..bc2ea19 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -39,8 +39,14 @@ In one working session, get a user from clone to live worker submissions with vi
 Run one of the whitelist-free examples to train + evaluate + save `predict.pkl`:
 
 ```bash
-python notebooks/example_topic_69_bitcoin_walkthrough.py
-python notebooks/example_topic_77_bitcoin_5min_walkthrough.py
+python notebooks/example_topic_69_bitcoin_walkthrough.py   # price prediction (1h bars)
+python notebooks/example_topic_77_bitcoin_5min_walkthrough.py  # price prediction (5m bars)
+
+# Volatility topics (1m bars, std of log returns over 15-min horizon)
+python notebooks/testnet/topic_79_btc_vol/topic_79_model_e_calibrated.py  # BTC best model
+python notebooks/testnet/topic_80_eth_vol/topic_80_model_e_calibrated.py  # ETH
+python notebooks/testnet/topic_81_xrp_vol/topic_81_model_e_calibrated.py  # XRP
+python notebooks/testnet/topic_82_sol_vol/topic_82_model_e_calibrated.py  # SOL
 ```
 
 Then deploy. The deploy scripts use `WorkerManager` internally — wallet creation,
@@ -108,6 +114,12 @@ All base features are therefore **ratios**, not raw prices. Any additional engin
 Before deployment, verify topic prediction format:
 - **Price topic** → absolute price prediction
 - **Log-return topic** → `log(future/current)` prediction
+- **Volatility topic** → std of 1-minute log returns over the horizon (non-negative float)
+
+## Volatility topics
+Topics 79–82 predict 15-minute realised volatility for BTC, ETH, XRP, SOL.
+Best model architecture: Model E (log-space prediction + bias correction + GARCH features).
+Scripts are organized in `notebooks/topic_{id}_{asset}_vol/` subdirectories.
 
 ## Repo hygiene rules
 - Never commit secrets or keys.
diff --git a/README.md b/README.md
index 094efcc..5af772b 100644
--- a/README.md
+++ b/README.md
@@ -99,6 +99,9 @@ python example_topic_69_bitcoin_walkthrough.py
 
 # Topic 77 — 5-min BTC/USD price prediction (5m bars, ~2 min)
 python example_topic_77_bitcoin_5min_walkthrough.py
+
+# Topic 79 — 15-min BTC/USD volatility prediction (1m bars, ~5 min)
+python example_topic_79_btc_volatility_walkthrough.py
 ```
 
 Each script backfills historical data, engineers features, trains and evaluates a model, and saves a `predict.pkl` artifact.
@@ -182,18 +185,24 @@ for t in d.get_all_topics():
 
 Playground topics (testnet only) are the recommended starting point — no whitelist required.
 
-| Testnet ID | Name | Notes |
-|-----------|------|-------|
-| **69** | BTC/USD - 1 Day Price Prediction | Playground — example walkthroughs use this |
-| **77** | BTC/USD - 5 Min Price Prediction | Playground Fast |
+| Testnet ID | Name | Target type | Notes |
+|-----------|------|-------------|-------|
+| **69** | BTC/USD - 1 Day Price Prediction | Price | Playground — example walkthroughs use this |
+| **77** | BTC/USD - 5 Min Price Prediction | Price | Playground Fast |
+| **79** | BTC/USD - 15 Min Volatility Prediction | Volatility | Std of 1-min log returns over 15-min horizon |
+| **80** | ETH/USD - 15 Min Volatility Prediction | Volatility | Same definition as 79, ETH pair |
+| **81** | XRP/USD - 15 Min Volatility Prediction | Volatility | Same definition as 79, XRP pair |
+| **82** | SOL/USD - 15 Min Volatility Prediction | Volatility | Same definition as 79, SOL pair |
+| **83** | BTC/USD - 8h Log-Return Prediction | Log-return | 8h BTC/USD log return — testnet equivalent of mainnet topic 1 |
+| **84** | ETH/USD - 8h Log-Return Prediction | Log-return | 8h ETH/USD log return — testnet equivalent of mainnet topic 2 |
 
 Mainnet topics and their testnet equivalents:
 
 | Mainnet ID | Mainnet Name | Testnet ID | Testnet Name |
 |-----------|-------------|-----------|-------------|
-| 1  | BTC/USD - Log Returns - 8h  | 64 | 8h BTC/USD Log-Return (5min updates) |
-| 2  | ETH/USD - Log Returns - 8h  | — | Missing |
-| 3  | SOL/USD - Log Returns - 8h  | 57 | 8h SOL/USD Log-Return *(inactive)* |
+| 1  | BTC/USD - Log Returns - 8h  | 83 | BTC/USD - 8h Log-Return Prediction |
+| 2  | ETH/USD - Log Returns - 8h  | 84 | ETH/USD - 8h Log-Return Prediction |
+| 3  | SOL/USD - Log Returns - 8h  | 57 | 8h SOL/USD Log-Return Prediction |
 | 9  | ETH/USD - Price Prediction - 8h | 41 | ETH/USD - 8h Price Prediction |
 | 10 | SOL/USD - Price Prediction - 8h | 38 | SOL/USD - 8h Price Prediction |
 | 14 | BTC/USD - Price Prediction - 8h | 42 | BTC/USD - 8h Price Prediction |
@@ -210,21 +219,33 @@ Mainnet topics and their testnet equivalents:
 ```python
 from allora_forge_builder_kit import AlloraMLWorkflow
 
-# Build a training dataset
+# Build a training dataset (log-return target — default)
 workflow = AlloraMLWorkflow(
     tickers=["btcusd"],
-    topic_id=69,
+    number_of_input_bars=48,
+    target_bars=24,
     interval="1h",
-    n_input_bars=48,
-    n_target_bars=24,
+    data_source="allora",
+    api_key="UP-...",
 )
 workflow.backfill(days=500)
 df = workflow.get_full_feature_target_dataframe()
 
+# Volatility target (std of 1-min log returns over the horizon)
+vol_workflow = AlloraMLWorkflow(
+    tickers=["btcusd"],
+    number_of_input_bars=15,
+    target_bars=15,           # 15-minute volatility window
+    interval="1m",
+    target_type="volatility", # NEW: "log_return" (default) or "volatility"
+    data_source="allora",
+    api_key="UP-...",
+)
+
 # Evaluate a predict function
 from allora_forge_builder_kit import PerformanceEvaluator
-evaluator = PerformanceEvaluator(workflow)
-grade = evaluator.evaluate(predict_fn)
+evaluator = PerformanceEvaluator()
+report = evaluator.evaluate(y_true, y_pred)
 ```
 
 ---
@@ -233,7 +254,12 @@ grade = evaluator.evaluate(predict_fn)
 
 ### Framing forecasting as supervised learning
 
-At any point in time $t$, the model observes a window of $N$ past bars as input features $\mathbf{x} \in \mathbb{R}^d$ and predicts a future outcome $y$ — a price or log return over the next $H$ bars. By sliding this window across the full history, a single time series becomes thousands of labeled examples $(\mathbf{x}_i, y_i)$, turning forecasting into a standard supervised learning problem.
+At any point in time $t$, the model observes a window of $N$ past bars as input features $\mathbf{x} \in \mathbb{R}^d$ and predicts a future outcome $y$ over the next $H$ bars. The target $y$ depends on the topic type:
+
+- **Price / log-return topics** — $y = \log(p_{t+H} / p_t)$ or the absolute price $p_{t+H}$
+- **Volatility topics** — $y = \text{std}(r_1, \ldots, r_H)$ where $r_i = \log(p_{t+i} / p_{t+i-1})$ are consecutive 1-minute log returns over the horizon
+
+By sliding this window across the full history, a single time series becomes thousands of labeled examples $(\mathbf{x}_i, y_i)$, turning forecasting into a standard supervised learning problem.
 
 The `AlloraMLWorkflow` handles this construction: `backfill()` fetches historical data, `get_full_feature_target_dataframe()` builds the feature matrix and target vector, ready for any scikit-learn compatible model.
 
@@ -321,9 +347,13 @@ All three produce a complete, runnable pipeline and satisfy the same nine method
 |------|---------|
 | `notebooks/example_topic_69_bitcoin_walkthrough.py` | End-to-end example for topic 69: data → features → model → artifact |
 | `notebooks/example_topic_77_bitcoin_5min_walkthrough.py` | End-to-end example for topic 77: 5-min BTC prediction |
-| `notebooks/deploy_worker.py` | Deploy any topic with WorkerManager (`TOPIC_ID=N python deploy_worker.py`) |
-| `notebooks/deploy_worker_raw.py` | Minimal SDK-only deployment reference (no WorkerManager) |
-| `notebooks/feature_engineering_example.py` | Standalone feature engineering reference |
+| `notebooks/topic_79_btc_vol/` | Topic 79 BTC/USD volatility: example + models A/B/D/E |
+| `notebooks/topic_80_eth_vol/` | Topic 80 ETH/USD volatility: models A/B/D/E |
+| `notebooks/topic_81_xrp_vol/` | Topic 81 XRP/USD volatility: model E |
+| `notebooks/topic_82_sol_vol/` | Topic 82 SOL/USD volatility: model E |
+| `notebooks/shared/deploy_worker.py` | Deploy any topic with WorkerManager (`TOPIC_ID=N python deploy_worker.py`) |
+| `notebooks/shared/deploy_worker_raw.py` | Minimal SDK-only deployment reference (no WorkerManager) |
+| `notebooks/shared/feature_engineering_example.py` | Standalone feature engineering reference |
 | `allora_forge_builder_kit/workflow.py` | Data + feature pipeline |
 | `allora_forge_builder_kit/evaluation.py` | Model scoring (7 primary metrics + grading) |
 | `allora_forge_builder_kit/topic_discovery.py` | Query live topics on testnet/mainnet |
diff --git a/allora_forge_builder_kit/__init__.py b/allora_forge_builder_kit/__init__.py
index 9c6b015..672e4c5 100644
--- a/allora_forge_builder_kit/__init__.py
+++ b/allora_forge_builder_kit/__init__.py
@@ -10,6 +10,7 @@
 from .topic_discovery import AlloraTopicDiscovery, TopicInfo
 from .worker_manager import WorkerManager, WorkerSpec, DeployResult, Identity, build_topic_desc_resolver
 from .worker_monitor import WorkerMonitor, MonitorTarget, AlloraSDKEventFetcher
+from .czar_loss import czar_loss, czar_gradient, czar_hessian, make_czar_objective
 
 __all__ = [
     "__version__",
@@ -31,6 +32,10 @@
     "WorkerMonitor",
     "MonitorTarget",
     "AlloraSDKEventFetcher",
+    "czar_loss",
+    "czar_gradient",
+    "czar_hessian",
+    "make_czar_objective",
 ]
 
 
diff --git a/allora_forge_builder_kit/czar_loss.py b/allora_forge_builder_kit/czar_loss.py
new file mode 100644
index 0000000..f450663
--- /dev/null
+++ b/allora_forge_builder_kit/czar_loss.py
@@ -0,0 +1,167 @@
+"""
+CZAR Loss (Composite Zero-Agnostic Returns)
+============================================
+
+A piecewise loss built on the Cauchy kernel that:
+- Z-scores by local volatility
+- Applies steep wrong-sign penalties
+- Uses bounded arctan transitions for same-sign predictions
+- Smoothly reduces loss near zero returns
+
+Provides gradient and hessian for use as a custom LightGBM objective.
+"""
+
+import numpy as np
+
+
+def derivative(x):
+    return 1.0 / (1.0 + x**2)
+
+
+def antiderivative(x):
+    return np.arctan(x)
+
+
+def double_derivative(x):
+    return 2.0 * np.abs(x) / (1.0 + x**2)**2
+
+
+def eps_effective(eps, delta):
+    if abs(delta) == 0:
+        return np.arctan(eps)
+    A = (1 + delta**2) * (antiderivative(eps + delta) - antiderivative(delta))
+    beta = delta / (1 + delta**2)
+    return (-1 + np.sqrt(1 + 4 * beta * A)) / (2 * beta)
+
+
+def softplus(x):
+    return np.maximum(x, 0.0) + np.log1p(np.exp(-np.abs(x)))
+
+
+def norm_smooth(z_true, eps, delta, tau):
+    a = np.abs(z_true)
+    d2p1 = delta**2 + 1
+    num = d2p1 * (antiderivative(a + delta) - antiderivative(delta))
+    denom = eps + delta / d2p1 * eps**2
+    norm_min = 1.0 - num / denom
+
+    if tau <= 0:
+        return np.maximum(norm_min, 0.0)
+
+    num_inf = d2p1 * (0.5 * np.pi - antiderivative(delta))
+    norm_inf = 1.0 - num_inf / denom
+    tau_eff = np.abs(tau) * np.abs(norm_inf)
+    return softplus(norm_min / tau_eff) / softplus(1 / tau_eff)
+
+
+def czar_loss(y_true, y_pred, std, mean=0, alpha=1, epsilon=1, tau=0.05):
+    if alpha < 0 or alpha > 1:
+        raise ValueError(f"alpha must be between 0 and 1, got {alpha}")
+
+    z_true = (y_true - mean) / std
+    z_pred = (y_pred - mean) / std
+
+    s = np.where(z_true == 0, 1, np.sign(z_true))
+    s_pred = np.where(z_pred == 0, 1, np.sign(z_pred))
+    a = np.abs(z_true)
+    u = s * z_pred
+
+    delta = alpha / np.sqrt(3)
+    d2p1 = delta**2 + 1
+
+    d_true = z_true + s * delta
+    d_pred = z_pred + s_pred * delta
+
+    h1 = d2p1 * double_derivative(delta)
+    h3 = d2p1 * double_derivative(d_true)
+
+    C = s * d2p1 * (antiderivative(d_true) - antiderivative(s * delta))
+    L1 = 0.5 * h1 * z_pred**2 - s * z_pred + C
+    L2 = s * d2p1 * (antiderivative(d_true) - antiderivative(d_pred))
+    dz = z_pred - z_true
+    L3 = 0.5 * np.minimum(h3, h1) * dz**2 + s * d2p1 * derivative(d_true) * dz
+
+    if epsilon > 0:
+        eps_eff = eps_effective(epsilon, delta)
+        softening_0 = czar_loss(0, eps_eff, 1.0, epsilon=0, alpha=alpha)
+        norm = norm_smooth(z_true, eps_eff, delta, tau)
+        Lsoft = norm * softening_0
+    else:
+        Lsoft = 0
+
+    return np.where(u <= 0, L1, np.where(u <= a, L2, L3)) + Lsoft
+
+
+def czar_gradient(y_true, y_pred, std, mean=0, alpha=1):
+    z_true = (y_true - mean) / std
+    z_pred = (y_pred - mean) / std
+
+    s = np.where(z_true == 0, 1, np.sign(z_true))
+    s_pred = np.where(z_pred == 0, 1, np.sign(z_pred))
+    a = np.abs(z_true)
+    u = s * z_pred
+
+    delta = alpha / np.sqrt(3)
+    d2p1 = delta**2 + 1
+
+    d_true = z_true + s * delta
+    d_pred = z_pred + s_pred * delta
+
+    h1 = d2p1 * double_derivative(delta)
+    h3 = d2p1 * double_derivative(d_true)
+
+    G1 = h1 * z_pred - np.sign(z_true)
+    G2 = -s * d2p1 * derivative(d_pred)
+    G3 = np.minimum(h3, h1) * (z_pred - z_true)
+
+    return np.where(u <= 0, G1, np.where(u <= a, G2, G3)) / std
+
+
+def czar_hessian(y_true, y_pred, std, mean=0, alpha=1):
+    z_true = (y_true - mean) / std
+    z_pred = (y_pred - mean) / std
+
+    s = np.where(z_true == 0, 1.0, np.sign(z_true))
+    s_pred = np.where(z_pred == 0, 1.0, np.sign(z_pred))
+    a = np.abs(z_true)
+    u = s * z_pred
+
+    delta = alpha / np.sqrt(3)
+    d2p1 = delta**2 + 1
+
+    d_true = s * (np.abs(z_true) + delta)
+    d_pred = s_pred * (np.abs(z_pred) + delta)
+
+    h1 = d2p1 * double_derivative(delta)
+    H1 = np.full_like(d_pred, h1)
+    H2 = (1.0 + d_pred**2) * double_derivative(d_pred)
+    h3 = (1.0 + d_true**2) * double_derivative(d_true)
+    H3 = np.full_like(d_pred, np.minimum(h1, h3))
+
+    return np.where(u <= 0, H1, np.where(u <= a, H2, H3)) / std**2
+
+
+def make_czar_objective(std, mean=0, alpha=1):
+    """
+    Create a LightGBM-compatible custom objective using CZAR loss.
+    
+    Args:
+        std: Rolling volatility for z-scoring (scalar or array matching training data)
+        mean: Mean for z-scoring (usually 0 for returns)
+        alpha: CZAR alpha parameter (0-1, controls MSE curvature)
+    
+    Returns:
+        objective function compatible with LightGBM's fobj parameter
+    """
+    def objective(y_true_or_dataset, y_pred):
+        # Handle both LightGBM Dataset objects and raw arrays
+        if hasattr(y_true_or_dataset, 'get_label'):
+            y_true = y_true_or_dataset.get_label()
+        else:
+            y_true = np.asarray(y_true_or_dataset)
+        grad = czar_gradient(y_true, y_pred, std=std, mean=mean, alpha=alpha)
+        hess = czar_hessian(y_true, y_pred, std=std, mean=mean, alpha=alpha)
+        # Clip hessian to avoid numerical issues
+        hess = np.maximum(hess, 1e-6)
+        return grad, hess
+    return objective
diff --git a/allora_forge_builder_kit/workflow.py b/allora_forge_builder_kit/workflow.py
index c74b5dc..9034af7 100644
--- a/allora_forge_builder_kit/workflow.py
+++ b/allora_forge_builder_kit/workflow.py
@@ -81,6 +81,7 @@ def __init__(
         number_of_input_bars,
         target_bars,
         interval="5m",
+        target_type="log_return",
         data_source="binance",  # Simple string API
         data_manager=None,  # Advanced: explicit instance
         **data_manager_kwargs  # Pass through to data manager (market, api_key, etc.)
@@ -93,6 +94,11 @@ def __init__(
             number_of_input_bars: Number of resampled bars to use as features (at the specified interval)
             target_bars: Number of bars ahead to predict (at the specified interval)
             interval: Bar interval (e.g. "5m", "1h")
+            target_type: Type of prediction target. One of:
+                - "log_return" (default): log(close[t+H] / close[t])
+                - "volatility": std of 1-minute log returns over the target horizon.
+                  For volatility targets, interval should be "1m" and target_bars
+                  defines the horizon window in minutes.
             data_source: Data source string ("binance" or "allora") - simple API
             data_manager: Optional pre-configured data manager instance - advanced API
             **data_manager_kwargs: Arguments passed to DataManager factory:
@@ -120,13 +126,31 @@ def __init__(
                 api_key="your-key"  # Allora-specific param
             )
             
+            # Volatility target - 15-minute BTC/USD volatility (Topic 79)
+            workflow = AlloraMLWorkflow(
+                tickers=["btcusd"],
+                number_of_input_bars=15,
+                target_bars=15,  # 15-minute horizon
+                interval="1m",
+                target_type="volatility",
+                data_source="allora",
+                api_key="your-key"
+            )
+            
             # Advanced API - explicit instance
             dm = DataManager(source="binance", interval="5m", market="futures")
             workflow = AlloraMLWorkflow(..., data_manager=dm)
         """
+        _valid_target_types = ("log_return", "volatility")
+        if target_type not in _valid_target_types:
+            raise ValueError(
+                f"target_type must be one of {_valid_target_types}, got {target_type!r}"
+            )
+
         self.tickers = tickers
         self.number_of_input_bars = number_of_input_bars
         self.target_bars = target_bars
+        self.target_type = target_type
         self.test_targets = None
         self.interval = interval
 
@@ -328,7 +352,12 @@ def get_full_feature_target_dataframe(self, start_date=None, end_date=None) -> p
                 continue
 
             df = self.stand_alone_features_from_1min_bars(df, live_mode=False)
-            df = self.compute_target_polars(df, self.target_bars)
+
+            if self.target_type == "volatility":
+                df = self.compute_volatility_target_polars(df, self.target_bars)
+            else:
+                df = self.compute_target_polars(df, self.target_bars)
+
             df = df.with_columns([pl.lit(t).alias("ticker")])
             datasets.append(df)
 
@@ -440,6 +469,51 @@ def compute_target_polars(self, df: pl.DataFrame, target_bars: int) -> pl.DataFr
             (pl.col("future_close").log() - pl.col("close").log()).alias("target")
         ])
         return df
+
+    def compute_volatility_target_polars(
+        self, df: pl.DataFrame, target_bars: int
+    ) -> pl.DataFrame:
+        """
+        Compute realised volatility target: the standard deviation of consecutive
+        1-minute log returns over the next *target_bars* bars.
+
+        Definition:
+            For each row at time t, let r_i = log(close[t+i] / close[t+i-1])
+            for i in 1..target_bars.  target[t] = std(r_1, ..., r_{target_bars}).
+
+        This matches the ground-truth definition used by the Allora volatility
+        reputer (allora-reputer-volatility-prediction).
+
+        Args:
+            df: Polars DataFrame with OHLCV data sorted by time.  Must be at
+                1-minute resolution for the result to be meaningful.
+            target_bars: Number of forward bars defining the volatility window.
+
+        Returns:
+            Polars DataFrame with 'target' column added.  Rows where the full
+            forward window is unavailable will have null targets.
+        """
+        # Compute per-bar log returns: log(close[t] / close[t-1])
+        log_returns = (pl.col("close").log() - pl.col("close").shift(1).log()).alias(
+            "_log_return"
+        )
+        df = df.with_columns([log_returns])
+
+        # Rolling std over the *next* target_bars log returns.
+        # Strategy: shift the log_return column backwards by 1 so that row t
+        # sees returns from t+1..t+target_bars, then apply a forward-looking
+        # rolling window.  Polars rolling_std is backward-looking, so we reverse
+        # the column, apply rolling_std, then reverse back.
+        lr = df["_log_return"].shift(-1)  # align: row t now holds return at t+1
+
+        # Reverse, apply backward rolling std, reverse back → forward rolling std
+        lr_reversed = lr.reverse()
+        vol_reversed = lr_reversed.rolling_std(window_size=target_bars, min_samples=target_bars)
+        vol = vol_reversed.reverse()
+
+        df = df.with_columns([vol.alias("target")])
+        df = df.drop("_log_return")
+        return df
     
     def extract_features_polars(
         self,
diff --git a/notebooks/dashboard.sh b/notebooks/dashboard.sh
new file mode 100755
index 0000000..d0a7b78
--- /dev/null
+++ b/notebooks/dashboard.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+# Worker dashboard — run from the notebooks/ directory
+# Usage: ./dashboard.sh          (full dashboard with on-chain data)
+#        ./dashboard.sh --fast   (local-only, skip on-chain sync)
+
+cd "$(dirname "$0")"
+source ../.venv/bin/activate
+
+if [[ "$1" == "--fast" ]]; then
+    python -m allora_forge_builder_kit.workerctl dashboard --no-monitor
+else
+    python -m allora_forge_builder_kit.workerctl dashboard
+fi
diff --git a/notebooks/example_topic_79_btc_volatility_walkthrough.py b/notebooks/example_topic_79_btc_volatility_walkthrough.py
new file mode 100644
index 0000000..c8c5569
--- /dev/null
+++ b/notebooks/example_topic_79_btc_volatility_walkthrough.py
@@ -0,0 +1,547 @@
+#!/usr/bin/env python3
+"""
+================================================================================
+Allora Forge Builder Kit v3.0 - Topic 79 BTC/USD 15-Minute Volatility Prediction
+================================================================================
+
+This walkthrough demonstrates 15-minute realised volatility prediction for
+BTC/USD using the Allora ML Workflow Kit with base features and LightGBM.
+
+Target definition:
+    The standard deviation of consecutive 1-minute log returns over the next
+    15 minutes.  Formally, for each timestamp t:
+
+        r_i = log(close[t+i] / close[t+i-1])   for i in 1..15
+        target[t] = std(r_1, r_2, ..., r_15)
+
+    This matches the ground-truth definition used by the Allora volatility
+    reputer (allora-reputer-volatility-prediction).
+
+Data is sourced from the Atlas data service (Tiingo 1-min candles).
+
+================================================================================
+"""
+
+import numpy as np
+import pandas as pd
+import os
+import json
+from datetime import datetime, timedelta, timezone
+from sklearn.model_selection import TimeSeriesSplit
+from lightgbm import LGBMRegressor
+from scipy.stats import pearsonr, spearmanr
+import matplotlib.pyplot as plt
+import cloudpickle
+from allora_forge_builder_kit import AlloraMLWorkflow
+
+# =============================================================================
+# EXPERIMENT CONFIGURATION
+# =============================================================================
+
+# Data Configuration
+TICKERS = ["btcusd"]
+DAYS_OF_HISTORY = 60
+INTERVAL = "1m"  # 1-minute base interval for volatility
+
+# Feature Configuration
+NUMBER_OF_INPUT_BARS = 15  # 15 minutes of 1-minute bars for input features
+TARGET_BARS = 15           # 15-minute volatility horizon
+
+# Target type: volatility (std of 1-min log returns over the horizon)
+TARGET_TYPE = "volatility"
+
+# Cross-Validation Configuration
+N_SPLITS = 3               # Number of CV folds
+MAX_TRAIN_SIZE = 100_000_000  # Maximum training samples per fold
+
+# Model Configuration
+N_ESTIMATORS_MAX = 500    # Train with max trees, evaluate at checkpoints
+N_ESTIMATORS_CHECKPOINTS = [100, 300, 500]
+LEARNING_RATES = [0.01, 0.05, 0.1]
+MAX_DEPTHS = [3, 5, 7]
+NUM_LEAVES = [15, 31, 63]
+
+# =============================================================================
+# SCRIPT START
+# =============================================================================
+
+print("=" * 80)
+print("Allora Forge Builder Kit v3.0 - Topic 79 Walkthrough")
+print("BTC/USD 15-Minute Volatility Prediction")
+print("=" * 80)
+
+
+def _to_serializable(obj):
+    """Convert numpy/pandas objects into JSON-serializable Python types."""
+    if isinstance(obj, (np.floating, np.integer)):
+        return obj.item()
+    if isinstance(obj, (np.bool_,)):
+        return bool(obj)
+    if isinstance(obj, np.ndarray):
+        return obj.tolist()
+    if isinstance(obj, (pd.Timestamp, datetime)):
+        return obj.isoformat()
+    if isinstance(obj, dict):
+        return {k: _to_serializable(v) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [_to_serializable(v) for v in obj]
+    return obj
+
+
+# =============================================================================
+# VOLATILITY-SPECIFIC METRICS
+# =============================================================================
+def vol_metrics(y_true, y_pred):
+    """
+    Compute volatility-specific evaluation metrics.
+
+    These replace the standard log-return metrics (DA, CZAR) which are not
+    meaningful for volatility prediction.
+    """
+    y_true = np.asarray(y_true)
+    y_pred = np.asarray(y_pred)
+    r, _ = pearsonr(y_true, y_pred)
+    rho, _ = spearmanr(y_true, y_pred)
+    mse = np.mean((y_true - y_pred) ** 2)
+    rmse = np.sqrt(mse)
+    mae = np.mean(np.abs(y_true - y_pred))
+    ss_res = np.sum((y_true - y_pred) ** 2)
+    ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
+    r2 = 1 - ss_res / ss_tot
+    rel_mae = mae / np.mean(y_true)
+    # QLIKE: quasi-likelihood loss (standard for volatility forecasting)
+    mask = y_pred > 0
+    if mask.sum() > 0:
+        ratio = y_true[mask] / y_pred[mask]
+        qlike = np.mean(ratio - np.log(ratio) - 1)
+    else:
+        qlike = float("inf")
+    return {
+        "pearson_r": r,
+        "spearman_rho": rho,
+        "r2": r2,
+        "rmse": rmse,
+        "mae": mae,
+        "rel_mae": rel_mae,
+        "qlike": qlike,
+    }
+
+
+def print_vol_metrics(metrics, label=""):
+    """Pretty-print volatility metrics."""
+    print(f"\n  {'─' * 50}")
+    if label:
+        print(f"  {label}")
+        print(f"  {'─' * 50}")
+    print(f"  Pearson r:   {metrics['pearson_r']:.4f}")
+    print(f"  Spearman ρ:  {metrics['spearman_rho']:.4f}")
+    print(f"  R²:          {metrics['r2']:.4f}")
+    print(f"  RMSE:        {metrics['rmse']:.6f}")
+    print(f"  MAE:         {metrics['mae']:.6f}")
+    print(f"  Rel MAE:     {metrics['rel_mae']*100:.2f}%")
+    print(f"  QLIKE:       {metrics['qlike']:.6f}")
+    print(f"  {'─' * 50}")
+
+
+def save_run_artifacts(df_eval, best_result, best_params, run_dir, feature_cols):
+    """Persist config/metrics/predictions and basic diagnostic plots."""
+    os.makedirs(run_dir, exist_ok=True)
+
+    # 1) Run config
+    config = {
+        "topic_id": 79,
+        "target_type": TARGET_TYPE,
+        "tickers": TICKERS,
+        "days_of_history": DAYS_OF_HISTORY,
+        "interval": INTERVAL,
+        "number_of_input_bars": NUMBER_OF_INPUT_BARS,
+        "target_bars": TARGET_BARS,
+        "n_splits": N_SPLITS,
+        "max_train_size": MAX_TRAIN_SIZE,
+        "n_estimators_checkpoints": N_ESTIMATORS_CHECKPOINTS,
+        "learning_rates": LEARNING_RATES,
+        "max_depths": MAX_DEPTHS,
+        "num_leaves": NUM_LEAVES,
+        "best_params": best_params,
+        "feature_count": len(feature_cols),
+    }
+    with open(os.path.join(run_dir, "config.json"), "w") as f:
+        json.dump(_to_serializable(config), f, indent=2)
+
+    # 2) Metrics
+    metrics_payload = {k: v for k, v in best_result.items() if k != "predictions"}
+    with open(os.path.join(run_dir, "metrics.json"), "w") as f:
+        json.dump(_to_serializable(metrics_payload), f, indent=2)
+
+    # 3) Predictions table
+    export_df = df_eval.copy()
+    if "predictions" in best_result:
+        export_df["pred"] = best_result["predictions"].values
+
+    export_cols = ["open_time", "target", "pred"]
+    export_cols = [c for c in export_cols if c in export_df.columns]
+    preds_df = export_df[export_cols].dropna(subset=["pred"]).copy()
+    preds_csv_path = os.path.join(run_dir, "predictions.csv")
+    preds_df.to_csv(preds_csv_path, index=False)
+
+    # 4) Scatter plot: pred vs target
+    plt.figure(figsize=(8, 8))
+    plt.scatter(preds_df["target"], preds_df["pred"], s=8, alpha=0.35)
+    lim_min = float(min(preds_df["target"].min(), preds_df["pred"].min()))
+    lim_max = float(max(preds_df["target"].max(), preds_df["pred"].max()))
+    plt.plot([lim_min, lim_max], [lim_min, lim_max], linestyle="--", linewidth=1)
+    plt.xlabel("Target (realised volatility)")
+    plt.ylabel("Prediction (realised volatility)")
+    plt.title("Predictions vs Target — 15-min BTC Volatility")
+    plt.tight_layout()
+    scatter_path = os.path.join(run_dir, "scatter_pred_vs_target.png")
+    plt.savefig(scatter_path, dpi=150)
+    plt.close()
+
+    # 5) Human-readable report
+    with open(os.path.join(run_dir, "report.txt"), "w") as f:
+        f.write("Allora Topic 79 Run Report\n")
+        f.write("BTC/USD 15-Minute Volatility Prediction\n")
+        f.write("=" * 40 + "\n")
+        f.write(f"Best params: {best_params}\n\n")
+        f.write("Volatility Metrics:\n")
+        for key in ["pearson_r", "spearman_rho", "r2", "rmse", "mae", "rel_mae", "qlike"]:
+            if key in best_result:
+                f.write(f"  {key}: {best_result[key]:.6f}\n")
+
+    return {
+        "run_dir": run_dir,
+        "predictions_csv": preds_csv_path,
+        "scatter_png": scatter_path,
+    }
+
+
+# =============================================================================
+# STEP 1: Initialize Workflow
+# =============================================================================
+print("\n[1/6] Initializing workflow...")
+
+# Resolve Allora API key (env var → file → prompt).
+# Get a free key at https://developer.allora.network
+# Alternatively, set data_source="binance" below to skip the API key entirely.
+from allora_forge_builder_kit.utils import get_api_key
+
+api_key = get_api_key(
+    api_key_file=os.path.join(os.path.dirname(__file__), ".allora_api_key")
+)
+
+workflow = AlloraMLWorkflow(
+    tickers=TICKERS,
+    number_of_input_bars=NUMBER_OF_INPUT_BARS,
+    target_bars=TARGET_BARS,
+    interval=INTERVAL,
+    target_type=TARGET_TYPE,
+    data_source="allora",
+    api_key=api_key,
+)
+
+print("✅ Workflow initialized")
+print(f"   Assets: {TICKERS} | Interval: {INTERVAL}")
+print(f"   Input: {NUMBER_OF_INPUT_BARS} bars → Features: {NUMBER_OF_INPUT_BARS * 5}")
+print(f"   Target: {TARGET_TYPE} over {TARGET_BARS}-minute horizon")
+
+# =============================================================================
+# STEP 2: Backfill Historical Data
+# =============================================================================
+print(f"\n[2/6] Backfilling {DAYS_OF_HISTORY} days of historical data...")
+
+start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)
+try:
+    workflow.backfill(start=start_date)
+    print("✅ Backfill complete")
+except Exception as e:
+    print(f"⚠️ Backfill failed: {e}")
+    print("   Will attempt to use locally cached parquet data...")
+
+# =============================================================================
+# STEP 3: Extract Features & Engineer New Features
+# =============================================================================
+print("\n[3/6] Extracting and engineering features...")
+
+try:
+    df_all = workflow.get_full_feature_target_dataframe(
+        start_date=start_date
+    ).reset_index()
+except Exception as e:
+    raise RuntimeError(
+        f"No data available: {e}\n\n"
+        "This usually means the backfill failed (bad/missing API key) and there is "
+        "no locally cached parquet data.\n\n"
+        "Fix options:\n"
+        "  1. Set a valid ALLORA_API_KEY (free at https://developer.allora.network)\n"
+        "  2. Use data_source='binance' in AlloraMLWorkflow() to skip the API key\n"
+    ) from e
+
+
+# Feature Engineering: Add volatility-relevant features from the lookback window
+def engineer_vol_features(row):
+    """Engineer volatility-predictive features (no data leakage — same row only)."""
+    closes = np.array(
+        [row[f"feature_close_{i}"] for i in range(NUMBER_OF_INPUT_BARS)]
+    )
+    highs = np.array(
+        [row[f"feature_high_{i}"] for i in range(NUMBER_OF_INPUT_BARS)]
+    )
+    lows = np.array(
+        [row[f"feature_low_{i}"] for i in range(NUMBER_OF_INPUT_BARS)]
+    )
+
+    features = {}
+
+    # Realised volatility of the lookback window (std of 1-min log returns)
+    log_returns = np.diff(np.log(closes + 1e-12))
+    features["hist_vol_full"] = np.std(log_returns, ddof=1) if len(log_returns) > 1 else 0.0
+
+    # Short-term vs long-term vol ratio (regime detection)
+    if len(log_returns) >= 5:
+        features["hist_vol_5m"] = np.std(log_returns[-5:], ddof=1)
+        features["vol_ratio_5_full"] = (
+            features["hist_vol_5m"] / (features["hist_vol_full"] + 1e-12)
+        )
+    else:
+        features["hist_vol_5m"] = features["hist_vol_full"]
+        features["vol_ratio_5_full"] = 1.0
+
+    # High-low range (Parkinson-style proxy)
+    hl_range = highs - lows
+    features["hl_range_mean"] = np.mean(hl_range)
+    features["hl_range_recent"] = np.mean(hl_range[-3:]) if len(hl_range) >= 3 else hl_range[-1]
+    features["hl_range_ratio"] = (
+        features["hl_range_recent"] / (features["hl_range_mean"] + 1e-12)
+    )
+
+    # Absolute return (magnitude of recent move)
+    features["abs_return_1m"] = abs(log_returns[-1]) if len(log_returns) > 0 else 0.0
+    features["abs_return_5m"] = abs(np.log(closes[-1] + 1e-12) - np.log(closes[-5] + 1e-12)) if len(closes) >= 5 else 0.0
+
+    return pd.Series(features)
+
+
+# Get base features
+base_feature_cols = [col for col in df_all.columns if col.startswith("feature_")]
+
+# Apply feature engineering
+print("   Engineering volatility-predictive features...")
+engineered_features = df_all.apply(engineer_vol_features, axis=1)
+df_all = pd.concat([df_all, engineered_features], axis=1)
+
+# Use base features + engineered volatility features
+feature_cols = base_feature_cols + list(engineered_features.columns)
+df_all = df_all.dropna(subset=feature_cols + ["target"])
+
+print(
+    f"✅ Dataset: {len(df_all):,} samples "
+    f"({df_all['open_time'].min().date()} to {df_all['open_time'].max().date()})"
+)
+print(
+    f"   Features: {len(base_feature_cols)} base + "
+    f"{len(engineered_features.columns)} vol = {len(feature_cols)} total"
+)
+
+# Setup time series cross-validation
+tscv = TimeSeriesSplit(
+    n_splits=N_SPLITS,
+    gap=TARGET_BARS,
+    max_train_size=MAX_TRAIN_SIZE,
+)
+
+print(f"✅ Walk-forward CV: {N_SPLITS} splits, {TARGET_BARS}-bar embargo")
+for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(df_all)):
+    print(f"   Fold {fold_idx + 1}: Train={len(train_idx):,}, Test={len(test_idx):,}")
+
+# =============================================================================
+# STEP 4: Grid Search with Walk-Forward Cross-Validation
+# =============================================================================
+print("\n[4/6] Running grid search...")
+
+results = []
+config_num = 0
+
+for lr in LEARNING_RATES:
+    for depth in MAX_DEPTHS:
+        for leaves in NUM_LEAVES:
+
+            # Train once with max trees, evaluate at checkpoints
+            fold_models = []
+            for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(df_all)):
+                X_train = df_all.iloc[train_idx][feature_cols]
+                y_train = df_all.iloc[train_idx]["target"]
+
+                lgb = LGBMRegressor(
+                    n_estimators=N_ESTIMATORS_MAX,
+                    learning_rate=lr,
+                    max_depth=depth,
+                    num_leaves=leaves,
+                    random_state=42,
+                    verbose=-1,
+                )
+                lgb.fit(X_train, y_train)
+                fold_models.append((lgb, test_idx))
+
+            # Evaluate at tree count checkpoints
+            for n_est in N_ESTIMATORS_CHECKPOINTS:
+                config_num += 1
+                df_all["pred"] = np.nan
+
+                # Generate predictions using first n_est trees
+                for lgb, test_idx in fold_models:
+                    X_test = df_all.iloc[test_idx][feature_cols]
+                    preds = lgb.predict(X_test, num_iteration=n_est)
+                    df_all.iloc[test_idx, df_all.columns.get_loc("pred")] = preds
+
+                # Evaluate with volatility-specific metrics
+                valid_mask = ~df_all["pred"].isna()
+                y_true_cv = df_all.loc[valid_mask, "target"].values
+                y_pred_cv = np.maximum(df_all.loc[valid_mask, "pred"].values, 0)
+                metrics = vol_metrics(y_true_cv, y_pred_cv)
+
+                # Store results
+                results.append(
+                    {
+                        "config_num": config_num,
+                        "n_estimators": n_est,
+                        "learning_rate": lr,
+                        "max_depth": depth,
+                        "num_leaves": leaves,
+                        "predictions": df_all["pred"].copy(),
+                        **metrics,
+                    }
+                )
+
+                print(
+                    f"   [{config_num:2d}] n={n_est:4d}, lr={lr:.2f}, "
+                    f"d={depth}, l={leaves:2d} -> "
+                    f"r={metrics['pearson_r']:.4f} R²={metrics['r2']:.4f} "
+                    f"QLIKE={metrics['qlike']:.4f}"
+                )
+
+# Analyze results — rank by R² (primary), then QLIKE (secondary, lower=better)
+results_df = pd.DataFrame(
+    [{k: v for k, v in r.items() if k != "predictions"} for r in results]
+)
+results_df = results_df.sort_values(["r2", "qlike"], ascending=[False, True])
+
+print(f"\n✅ Tested {len(results)} configurations")
+print("\n   Top 5 models:")
+top5_cols = [
+    "config_num",
+    "n_estimators",
+    "learning_rate",
+    "max_depth",
+    "num_leaves",
+    "pearson_r",
+    "r2",
+    "qlike",
+]
+print(results_df[top5_cols].head().to_string(index=False, float_format="%.4f"))
+
+# Select best model
+best_result = results[results_df.iloc[0]["config_num"] - 1]
+best_params = {
+    k: best_result[k]
+    for k in ["n_estimators", "learning_rate", "max_depth", "num_leaves"]
+}
+
+print(f"\nBest: Config #{best_result['config_num']}")
+print(
+    f"   r={best_result['pearson_r']:.4f} R²={best_result['r2']:.4f} "
+    f"QLIKE={best_result['qlike']:.4f} | "
+    f"n={best_params['n_estimators']}, lr={best_params['learning_rate']}, "
+    f"d={best_params['max_depth']}, l={best_params['num_leaves']}"
+)
+
+# =============================================================================
+# STEP 5: Evaluate Best Model
+# =============================================================================
+print("\n[5/6] Detailed evaluation...")
+print_vol_metrics(best_result, "BEST MODEL — Volatility Metrics")
+
+# Save reproducibility artifacts + diagnostic plot
+run_timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
+run_dir = os.path.join(os.path.dirname(__file__), "runs", run_timestamp)
+artifacts = save_run_artifacts(
+    df_eval=df_all,
+    best_result=best_result,
+    best_params=best_params,
+    run_dir=run_dir,
+    feature_cols=feature_cols,
+)
+
+# =============================================================================
+# STEP 6: Train Production Model & Create Predict Function
+# =============================================================================
+print("\n[6/6] Training production model...")
+
+final_model = LGBMRegressor(
+    n_estimators=best_params["n_estimators"],
+    learning_rate=best_params["learning_rate"],
+    max_depth=best_params["max_depth"],
+    num_leaves=best_params["num_leaves"],
+    random_state=42,
+    verbose=-1,
+)
+final_model.fit(df_all[feature_cols], df_all["target"])
+print(f"✅ Final model trained on {len(df_all):,} samples")
+
+
+def predict(nonce: int = None) -> float:
+    """
+    Predict BTC/USD 15-minute realised volatility.
+
+    This is the function submitted to the Allora network for Topic 79.
+    It returns the predicted standard deviation of 1-minute log returns
+    over the next 15 minutes.
+
+    Args:
+        nonce: Block nonce from Allora SDK (unused).
+
+    Returns:
+        float: Predicted 15-minute realised volatility.
+    """
+    # Get live features from workflow (1-minute bars)
+    live_row = workflow.get_live_features(ticker=TICKERS[0])
+
+    if live_row is None or len(live_row) == 0:
+        raise ValueError("Could not get live features")
+
+    # Engineer volatility features from live data (same as training)
+    live_vol_features = engineer_vol_features(live_row.iloc[0])
+
+    # Combine base features + engineered vol features
+    live_features = pd.concat([live_row[base_feature_cols].iloc[0], live_vol_features])
+
+    # Predict volatility directly (no price conversion needed)
+    predicted_volatility = final_model.predict(
+        live_features[feature_cols].values.reshape(1, -1)
+    )[0]
+
+    # Volatility must be non-negative
+    predicted_volatility = max(0.0, float(predicted_volatility))
+
+    print(f"\nLive Prediction: {predicted_volatility:.6f} (15-min realised vol)")
+
+    return predicted_volatility
+
+
+# Test and save
+print("\n🧪 Testing prediction...")
+test_prediction = predict()
+
+with open("predict.pkl", "wb") as f:
+    cloudpickle.dump(predict, f)
+
+print("\n" + "=" * 80)
+print("COMPLETE!")
+print("=" * 80)
+print(
+    f"{len(feature_cols)} features | "
+    f"r={best_result['pearson_r']:.4f} | R²={best_result['r2']:.4f} | "
+    f"QLIKE={best_result['qlike']:.4f}"
+)
+print(f"\nTo deploy this worker:")
+print(f"  TOPIC_ID=79 python notebooks/deploy_worker_raw.py")
diff --git a/notebooks/testnet/topic_38_sol_8h_price/example.py b/notebooks/testnet/topic_38_sol_8h_price/example.py
new file mode 100644
index 0000000..ccf8f82
--- /dev/null
+++ b/notebooks/testnet/topic_38_sol_8h_price/example.py
@@ -0,0 +1,462 @@
+#!/usr/bin/env python3
+"""
+================================================================================
+Allora Forge Builder Kit v3.0 - Topic 38 SOL/USD Price Prediction Walkthrough
+================================================================================
+
+This walkthrough demonstrates 8-hour SOL/USD price prediction using the 
+Allora ML Workflow Kit with base features and LightGBM.
+
+Data is sourced from the Atlas data service (Tiingo 1-min candles).
+
+================================================================================
+"""
+
+import numpy as np
+import pandas as pd
+import os
+import json
+from datetime import datetime, timedelta, timezone
+from sklearn.model_selection import TimeSeriesSplit
+from lightgbm import LGBMRegressor
+import matplotlib.pyplot as plt
+import cloudpickle
+from allora_forge_builder_kit import AlloraMLWorkflow, PerformanceEvaluator
+
+# =============================================================================
+# EXPERIMENT CONFIGURATION
+# =============================================================================
+
+# Data Configuration
+TICKERS = ["solusd"]
+DAYS_OF_HISTORY = 1825     # ~5 years
+INTERVAL = "1h"            # 1-hour bars
+
+# Feature Configuration
+NUMBER_OF_INPUT_BARS = 48   # 2 days of hourly bars (48h lookback)
+TARGET_BARS = 8             # Predict 8 bars (8 hours) ahead
+
+# Cross-Validation Configuration
+N_SPLITS = 3               # Number of CV folds
+MAX_TRAIN_SIZE = 100_000_000  # Maximum training samples per fold
+
+# Model Configuration
+N_ESTIMATORS_MAX = 500    # Train with max trees, evaluate at checkpoints
+N_ESTIMATORS_CHECKPOINTS = [100, 300, 500]
+LEARNING_RATES = [0.01, 0.05, 0.1]
+MAX_DEPTHS = [3, 5, 7]
+NUM_LEAVES = [15, 31, 63]
+
+# =============================================================================
+# SCRIPT START
+# =============================================================================
+
+print("="*80)
+print("Allora Forge Builder Kit v3.0 - Topic 38 Walkthrough")
+print("="*80)
+
+
+def _to_serializable(obj):
+    """Convert numpy/pandas objects into JSON-serializable Python types."""
+    if isinstance(obj, (np.floating, np.integer)):
+        return obj.item()
+    if isinstance(obj, (np.bool_,)):
+        return bool(obj)
+    if isinstance(obj, np.ndarray):
+        return obj.tolist()
+    if isinstance(obj, (pd.Timestamp, datetime)):
+        return obj.isoformat()
+    if isinstance(obj, dict):
+        return {k: _to_serializable(v) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [_to_serializable(v) for v in obj]
+    return obj
+
+
+def save_run_artifacts(df_eval, best_result, best_params, run_dir, feature_cols):
+    """Persist config/metrics/predictions and basic diagnostic plots for reproducibility."""
+    os.makedirs(run_dir, exist_ok=True)
+
+    # 1) Run config
+    config = {
+        "tickers": TICKERS,
+        "days_of_history": DAYS_OF_HISTORY,
+        "interval": INTERVAL,
+        "number_of_input_bars": NUMBER_OF_INPUT_BARS,
+        "target_bars": TARGET_BARS,
+        "n_splits": N_SPLITS,
+        "max_train_size": MAX_TRAIN_SIZE,
+        "n_estimators_checkpoints": N_ESTIMATORS_CHECKPOINTS,
+        "learning_rates": LEARNING_RATES,
+        "max_depths": MAX_DEPTHS,
+        "num_leaves": NUM_LEAVES,
+        "best_params": best_params,
+        "feature_count": len(feature_cols),
+    }
+    with open(os.path.join(run_dir, "config.json"), "w") as f:
+        json.dump(_to_serializable(config), f, indent=2)
+
+    # 2) Metrics
+    metrics_payload = {
+        "score": best_result["score"],
+        "grade": best_result["grade"],
+        "num_passed": best_result["num_passed"],
+        "num_primary_metrics": best_result.get("num_primary_metrics"),
+        "thresholds": best_result.get("thresholds", {}),
+        "passed": best_result.get("passed", {}),
+        "metrics": best_result.get("metrics", {}),
+    }
+    with open(os.path.join(run_dir, "metrics.json"), "w") as f:
+        json.dump(_to_serializable(metrics_payload), f, indent=2)
+
+    # 3) Predictions table
+    export_df = df_eval.copy()
+    if "predictions" in best_result:
+        export_df["pred"] = best_result["predictions"].values
+
+    export_cols = ["open_time", "target", "pred"]
+    export_cols = [c for c in export_cols if c in export_df.columns]
+    preds_df = export_df[export_cols].dropna(subset=["pred"]).copy()
+    preds_csv_path = os.path.join(run_dir, "predictions.csv")
+    preds_df.to_csv(preds_csv_path, index=False)
+
+    # 4) Scatter plot: pred vs target
+    plt.figure(figsize=(8, 8))
+    plt.scatter(preds_df["target"], preds_df["pred"], s=8, alpha=0.35)
+    lim_min = float(min(preds_df["target"].min(), preds_df["pred"].min()))
+    lim_max = float(max(preds_df["target"].max(), preds_df["pred"].max()))
+    plt.plot([lim_min, lim_max], [lim_min, lim_max], linestyle="--", linewidth=1)
+    plt.xlabel("Target (log return)")
+    plt.ylabel("Prediction (log return)")
+    plt.title("Predictions vs Target")
+    plt.tight_layout()
+    scatter_path = os.path.join(run_dir, "scatter_pred_vs_target.png")
+    plt.savefig(scatter_path, dpi=150)
+    plt.close()
+
+    # 5) Human-readable report
+    with open(os.path.join(run_dir, "report.txt"), "w") as f:
+        f.write("Allora Topic 38 Run Report\n")
+        f.write("=" * 40 + "\n")
+        f.write(f"Score: {best_result['score']:.1%} ({best_result['num_passed']}/7)\n")
+        f.write(f"Grade: {best_result['grade']}\n")
+        f.write(f"Best params: {best_params}\n\n")
+        f.write("Primary metric pass/fail:\n")
+        for metric_name, did_pass in best_result.get("passed", {}).items():
+            f.write(f"- {metric_name}: {'PASS' if did_pass else 'FAIL'}\n")
+
+    return {
+        "run_dir": run_dir,
+        "predictions_csv": preds_csv_path,
+        "scatter_png": scatter_path,
+    }
+
+# =============================================================================
+# STEP 1: Initialize Workflow
+# =============================================================================
+print("\n[1/6] Initializing workflow...")
+
+# Resolve Allora API key (env var → file → prompt).
+# Get a free key at https://developer.allora.network
+# Alternatively, set data_source="binance" below to skip the API key entirely.
+from allora_forge_builder_kit.utils import get_api_key
+api_key = get_api_key(api_key_file=os.path.join(os.path.dirname(__file__), "..", "..", ".allora_api_key"))
+
+workflow = AlloraMLWorkflow(
+    tickers=TICKERS,
+    number_of_input_bars=NUMBER_OF_INPUT_BARS,
+    target_bars=TARGET_BARS,
+    interval=INTERVAL,
+    data_source="allora",
+    api_key=api_key
+)
+
+print(f"✅ Workflow initialized")
+print(f"   Assets: {TICKERS} | Interval: {INTERVAL}")
+print(f"   Input: {NUMBER_OF_INPUT_BARS} bars → Features: {NUMBER_OF_INPUT_BARS*5}")
+print(f"   Target: {TARGET_BARS} bars ahead")
+
+# =============================================================================
+# STEP 2: Backfill Historical Data
+# =============================================================================
+print(f"\n[2/6] Backfilling {DAYS_OF_HISTORY} days of historical data...")
+
+start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)
+try:
+    workflow.backfill(start=start_date)
+    print("✅ Backfill complete")
+except Exception as e:
+    print(f"⚠️ Backfill failed: {e}")
+    print("   Will attempt to use locally cached parquet data...")
+
+# =============================================================================
+# STEP 3: Extract Features & Engineer New Features
+# =============================================================================
+print("\n[3/6] Extracting and engineering features...")
+
+try:
+    df_all = workflow.get_full_feature_target_dataframe(start_date=start_date).reset_index()
+except Exception as e:
+    raise RuntimeError(
+        f"No data available: {e}\n\n"
+        "This usually means the backfill failed (bad/missing API key) and there is "
+        "no locally cached parquet data.\n\n"
+        "Fix options:\n"
+        "  1. Set a valid ALLORA_API_KEY (free at https://developer.allora.network)\n"
+        "  2. Use data_source='binance' in AlloraMLWorkflow() to skip the API key\n"
+    ) from e
+
+# Feature Engineering: Add log returns to base features
+# For detailed TA indicators and visualizations, see: feature_engineering_example.py
+
+def engineer_returns(row):
+    """Add return, momentum, and volatility features (no data leakage - same row only)"""
+    n = NUMBER_OF_INPUT_BARS
+    closes = np.array([row[f'feature_close_{i}'] for i in range(n)])
+    highs = np.array([row[f'feature_high_{i}'] for i in range(n)])
+    lows = np.array([row[f'feature_low_{i}'] for i in range(n)])
+    volumes = np.array([row[f'feature_volume_{i}'] for i in range(n)])
+    
+    log_rets = np.diff(np.log(closes + 1e-12))
+    features = {}
+    
+    # Log returns at multiple horizons
+    features['ret_1h'] = log_rets[-1] if len(log_rets) >= 1 else 0
+    features['ret_4h'] = np.sum(log_rets[-4:]) if len(log_rets) >= 4 else 0
+    features['ret_8h'] = np.sum(log_rets[-8:]) if len(log_rets) >= 8 else 0
+    features['ret_24h'] = np.sum(log_rets[-24:]) if len(log_rets) >= 24 else 0
+    features['ret_48h'] = np.sum(log_rets) if len(log_rets) >= 2 else 0
+    
+    # Realised volatility at multiple horizons
+    features['vol_8h'] = np.std(log_rets[-8:], ddof=1) if len(log_rets) >= 8 else 0
+    features['vol_24h'] = np.std(log_rets[-24:], ddof=1) if len(log_rets) >= 24 else 0
+    features['vol_48h'] = np.std(log_rets, ddof=1) if len(log_rets) >= 2 else 0
+    
+    # Momentum: short vs long return
+    features['momentum_ratio'] = features['ret_8h'] / (abs(features['ret_48h']) + 1e-12)
+    
+    # Mean reversion signal: distance from recent mean
+    features['mean_reversion'] = (closes[-1] - np.mean(closes[-24:])) / (np.std(closes[-24:]) + 1e-12) if n >= 24 else 0
+    
+    # High-low range (proxy for intraday vol)
+    hl_range = highs - lows
+    features['hl_range_8h'] = np.mean(hl_range[-8:])
+    features['hl_range_ratio'] = np.mean(hl_range[-8:]) / (np.mean(hl_range) + 1e-12)
+    
+    # Volume trend
+    features['volume_ratio'] = np.mean(volumes[-8:]) / (np.mean(volumes) + 1e-12)
+    
+    # Trend strength (efficiency ratio)
+    net_move = abs(np.sum(log_rets[-8:]))
+    total_path = np.sum(np.abs(log_rets[-8:]))
+    features['efficiency_8h'] = net_move / (total_path + 1e-12)
+    
+    return pd.Series(features)
+
+# Get base features
+base_feature_cols = [col for col in df_all.columns if col.startswith('feature_')]
+
+# Apply feature engineering
+print("   Engineering log return features...")
+engineered_features = df_all.apply(engineer_returns, axis=1)
+df_all = pd.concat([df_all, engineered_features], axis=1)
+
+# Use base features + engineered returns
+feature_cols = base_feature_cols + list(engineered_features.columns)
+df_all = df_all.dropna(subset=feature_cols + ['target'])
+
+print(f"✅ Dataset: {len(df_all):,} samples ({df_all['open_time'].min().date()} to {df_all['open_time'].max().date()})")
+print(f"   Features: {len(base_feature_cols)} base + {len(engineered_features.columns)} returns = {len(feature_cols)} total")
+print(f"   📚 See feature_engineering_example.py for more TA indicators")
+
+# Setup time series cross-validation
+tscv = TimeSeriesSplit(
+    n_splits=N_SPLITS, 
+    gap=TARGET_BARS, 
+    max_train_size=MAX_TRAIN_SIZE
+)
+
+print(f"✅ Walk-forward CV: {N_SPLITS} splits, {TARGET_BARS}-bar embargo")
+for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(df_all)):
+    print(f"   Fold {fold_idx+1}: Train={len(train_idx):,}, Test={len(test_idx):,}")
+
+# =============================================================================
+# STEP 4: Grid Search with Walk-Forward Cross-Validation
+# =============================================================================
+print("\n[4/6] Running grid search...")
+
+results = []
+evaluator = PerformanceEvaluator()
+config_num = 0
+
+for lr in LEARNING_RATES:
+    for depth in MAX_DEPTHS:
+        for leaves in NUM_LEAVES:
+            
+            # Train once with max trees, evaluate at checkpoints
+            fold_models = []
+            for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(df_all)):
+                X_train = df_all.iloc[train_idx][feature_cols]
+                y_train = df_all.iloc[train_idx]['target']
+                
+                lgb = LGBMRegressor(
+                    n_estimators=N_ESTIMATORS_MAX,
+                    learning_rate=lr,
+                    max_depth=depth,
+                    num_leaves=leaves,
+                    subsample=0.8,
+                    colsample_bytree=0.7,
+                    min_child_samples=50,
+                    reg_alpha=0.1,
+                    reg_lambda=1.0,
+                    random_state=42,
+                    verbose=-1
+                )
+                lgb.fit(X_train, y_train)
+                fold_models.append((lgb, test_idx))
+            
+            # Evaluate at tree count checkpoints
+            for n_est in N_ESTIMATORS_CHECKPOINTS:
+                config_num += 1
+                df_all['pred'] = np.nan
+                
+                # Generate predictions using first n_est trees
+                for lgb, test_idx in fold_models:
+                    X_test = df_all.iloc[test_idx][feature_cols]
+                    preds = lgb.predict(X_test, num_iteration=n_est)
+                    df_all.iloc[test_idx, df_all.columns.get_loc('pred')] = preds
+                
+                # Evaluate
+                valid_mask = ~df_all['pred'].isna()
+                metrics = evaluator.evaluate(
+                    y_true=df_all.loc[valid_mask, 'target'],
+                    y_pred=df_all.loc[valid_mask, 'pred']
+                )
+                
+                # Store results
+                results.append({
+                    'config_num': config_num,
+                    'n_estimators': n_est,
+                    'learning_rate': lr,
+                    'max_depth': depth,
+                    'num_leaves': leaves,
+                    'predictions': df_all['pred'].copy(),
+                    **metrics
+                })
+                
+                print(f"   [{config_num:2d}] n={n_est:4d}, lr={lr:.2f}, d={depth}, l={leaves:2d} -> "
+                      f"{metrics['num_passed']}/7 ({metrics['score']:.1%} - {metrics['grade']})")
+
+# Analyze results
+results_df = pd.DataFrame([{k: v for k, v in r.items() if k != 'predictions'} for r in results])
+results_df = results_df.sort_values(['num_passed', 'score'], ascending=[False, False])
+
+print(f"\n✅ Tested {len(results)} configurations")
+print(f"\n   Top 5 models:")
+top5_cols = ['config_num', 'n_estimators', 'learning_rate', 'max_depth', 'num_leaves', 'num_passed', 'score']
+print(results_df[top5_cols].head().to_string(index=False))
+
+# Select best model
+best_result = results[results_df.iloc[0]['config_num'] - 1]
+best_params = {k: best_result[k] for k in ['n_estimators', 'learning_rate', 'max_depth', 'num_leaves']}
+
+print(f"\nBest: Config #{best_result['config_num']}")
+print(f"   {best_result['num_passed']}/7 points ({best_result['score']:.1%}) | "
+      f"n={best_params['n_estimators']}, lr={best_params['learning_rate']}, d={best_params['max_depth']}, l={best_params['num_leaves']}")
+
+# =============================================================================
+# STEP 5: Evaluate Best Model
+# =============================================================================
+print("\n[5/6] Detailed evaluation...")
+print("="*80)
+evaluator.print_report(best_result, detailed=False)
+print("="*80)
+
+# Save reproducibility artifacts + diagnostic plot
+run_timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
+run_dir = os.path.join(os.path.dirname(__file__), "runs", run_timestamp)
+artifacts = save_run_artifacts(
+    df_eval=df_all,
+    best_result=best_result,
+    best_params=best_params,
+    run_dir=run_dir,
+    feature_cols=feature_cols,
+)
+
+# =============================================================================
+# STEP 6: Train Production Model
+# =============================================================================
+print("\n[6/6] Training production model...")
+
+final_model = LGBMRegressor(
+    n_estimators=best_params['n_estimators'],
+    learning_rate=best_params['learning_rate'],
+    max_depth=best_params['max_depth'],
+    num_leaves=best_params['num_leaves'],
+    random_state=42,
+    verbose=-1
+)
+final_model.fit(df_all[feature_cols], df_all['target'])
+print(f"✅ Final model trained on {len(df_all):,} samples")
+
+def predict(nonce: int = None) -> float:
+    """
+    Predict SOL/USD price 8 hours into the future.
+    
+    Args:
+        nonce: Block nonce from Allora SDK (unused)
+    
+    Returns:
+        float: Predicted BTC price in USD
+    """
+    # Get live features from workflow
+    live_row = workflow.get_live_features(ticker=TICKERS[0])
+    
+    if live_row is None or len(live_row) == 0:
+        raise ValueError("Could not get live features")
+    
+    # Engineer return features from live data (same as training)
+    live_returns = engineer_returns(live_row.iloc[0])
+    
+    # Combine base features + engineered returns
+    live_features = pd.concat([live_row[base_feature_cols].iloc[0], live_returns])
+    
+    # Get current price from live feature context (remote-only path)
+    current_price = float(live_row.attrs.get("current_price", np.nan))
+    if not np.isfinite(current_price) or current_price <= 0:
+        # Fallback to live snapshot (still remote API; no local parquet)
+        snap = workflow._dm.get_live_snapshot(TICKERS)
+        if snap is not None and len(snap) > 0 and "close" in snap.columns:
+            current_price = float(snap["close"].iloc[-1])
+
+    if not np.isfinite(current_price) or current_price <= 0:
+        raise ValueError(f"Invalid current price for inference: {current_price}")
+    
+    # Predict log return
+    predicted_log_return = final_model.predict(live_features[feature_cols].values.reshape(1, -1))[0]
+    
+    # Convert log return to price
+    predicted_price = current_price * np.exp(predicted_log_return)
+    
+    print(f"\nLive Prediction: ${predicted_price:,.2f} ({predicted_log_return:+.4f} log return)")
+    
+    return float(predicted_price)
+
+# Test and save
+print("\n🧪 Testing prediction...")
+test_prediction = predict()
+
+with open("predict_38.pkl", "wb") as f:
+    cloudpickle.dump(predict, f)
+
+print("\n" + "="*80)
+print("COMPLETE!")
+print("="*80)
+print(f"{len(feature_cols)} features | {best_result['num_passed']}/7 points ({best_result['score']:.1%})")
+print("Saved to predict_38.pkl")
+print(f"Run artifacts: {artifacts['run_dir']}")
+print(f"- Predictions: {artifacts['predictions_csv']}")
+print(f"- Scatter plot: {artifacts['scatter_png']}")
+print("="*80)
+print("\nDeploy: python deploy_worker.py")
+
diff --git a/notebooks/testnet/topic_38_sol_8h_price/model_czar.py b/notebooks/testnet/topic_38_sol_8h_price/model_czar.py
new file mode 100644
index 0000000..94b817c
--- /dev/null
+++ b/notebooks/testnet/topic_38_sol_8h_price/model_czar.py
@@ -0,0 +1,339 @@
+#!/usr/bin/env python3
+"""
+Topic 38 — SOL/USD 8h Price — v3 (CZAR Loss)
+=============================================
+
+Uses CZAR loss instead of MSE/Huber. CZAR penalizes wrong-sign predictions
+heavily, softens near-zero returns, and normalizes by local volatility.
+This should help SOL where the signal is weak — CZAR won't waste capacity
+fitting noise on near-zero returns.
+
+Combined with the directional features from v2.
+"""
+
+import numpy as np
+import pandas as pd
+import os
+from datetime import datetime, timedelta, timezone
+from sklearn.model_selection import TimeSeriesSplit
+from lightgbm import LGBMRegressor
+from scipy.stats import pearsonr
+import cloudpickle
+from allora_forge_builder_kit import AlloraMLWorkflow, PerformanceEvaluator, make_czar_objective
+from allora_forge_builder_kit.utils import get_api_key
+
+# =============================================================================
+# CONFIG
+# =============================================================================
+TICKERS = ["solusd"]
+DAYS_OF_HISTORY = 1825
+INTERVAL = "1h"
+NUMBER_OF_INPUT_BARS = 48
+TARGET_BARS = 8
+
+N_SPLITS = 3
+# Smaller grid — focus on CZAR-specific params
+N_ESTIMATORS_MAX = 600
+N_ESTIMATORS_CHECKPOINTS = [100, 300, 600]
+LEARNING_RATES = [0.01, 0.03, 0.07]
+MAX_DEPTHS = [3, 5]
+NUM_LEAVES = [15, 31]
+CZAR_ALPHAS = [0.3, 0.5, 0.7, 1.0]  # CZAR alpha param (MSE curvature)
+
+print("=" * 70)
+print("Topic 38 — SOL/USD 8h Price — v3 (CZAR Loss)")
+print("=" * 70)
+
+# =============================================================================
+# LOAD DATA
+# =============================================================================
+print("\n[1/5] Loading data...")
+api_key = get_api_key(
+    api_key_file=os.path.join(os.path.dirname(__file__), "..", "..", ".allora_api_key")
+)
+
+workflow = AlloraMLWorkflow(
+    tickers=TICKERS, number_of_input_bars=NUMBER_OF_INPUT_BARS,
+    target_bars=TARGET_BARS, interval=INTERVAL,
+    data_source="allora", api_key=api_key,
+)
+
+start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)
+workflow.backfill(start=start_date)
+df_all = workflow.get_full_feature_target_dataframe(start_date=start_date).reset_index()
+base_feature_cols = [c for c in df_all.columns if c.startswith("feature_")]
+df_all = df_all.dropna(subset=base_feature_cols + ["target"])
+print(f"✅ {len(df_all):,} samples")
+
+# =============================================================================
+# DIRECTIONAL FEATURES (same as v2)
+# =============================================================================
+print("\n[2/5] Engineering directional features...")
+
+
+def engineer_directional_features(row):
+    n = NUMBER_OF_INPUT_BARS
+    closes = np.array([row[f"feature_close_{i}"] for i in range(n)])
+    highs = np.array([row[f"feature_high_{i}"] for i in range(n)])
+    lows = np.array([row[f"feature_low_{i}"] for i in range(n)])
+    volumes = np.array([row[f"feature_volume_{i}"] for i in range(n)])
+
+    log_rets = np.diff(np.log(closes + 1e-12))
+    abs_rets = np.abs(log_rets)
+    f = {}
+
+    f["ret_1h"] = log_rets[-1] if len(log_rets) >= 1 else 0
+    f["ret_4h"] = np.sum(log_rets[-4:]) if len(log_rets) >= 4 else 0
+    f["ret_8h"] = np.sum(log_rets[-8:]) if len(log_rets) >= 8 else 0
+    f["ret_24h"] = np.sum(log_rets[-24:]) if len(log_rets) >= 24 else 0
+    f["ret_48h"] = np.sum(log_rets) if len(log_rets) >= 2 else 0
+
+    vol_8h = np.std(log_rets[-8:], ddof=1) if len(log_rets) >= 8 else 1e-6
+    vol_24h = np.std(log_rets[-24:], ddof=1) if len(log_rets) >= 24 else 1e-6
+    vol_48h = np.std(log_rets, ddof=1) if len(log_rets) >= 2 else 1e-6
+    f["vol_8h"] = vol_8h
+    f["vol_24h"] = vol_24h
+    f["vol_48h"] = vol_48h
+
+    f["znorm_ret_1h"] = log_rets[-1] / (vol_8h + 1e-12) if len(log_rets) >= 1 else 0
+    f["znorm_ret_4h"] = np.sum(log_rets[-4:]) / (vol_8h * 2 + 1e-12) if len(log_rets) >= 4 else 0
+    f["znorm_ret_8h"] = np.sum(log_rets[-8:]) / (vol_8h * np.sqrt(8) + 1e-12) if len(log_rets) >= 8 else 0
+
+    if len(log_rets) >= 24:
+        up_rets = log_rets[-24:][log_rets[-24:] > 0]
+        dn_rets = log_rets[-24:][log_rets[-24:] < 0]
+        up_vol = np.std(up_rets, ddof=1) if len(up_rets) > 1 else 1e-6
+        dn_vol = np.std(np.abs(dn_rets), ddof=1) if len(dn_rets) > 1 else 1e-6
+        f["vol_skew_24h"] = (up_vol - dn_vol) / (up_vol + dn_vol + 1e-12)
+        f["up_fraction_24h"] = np.mean(log_rets[-24:] > 0)
+    else:
+        f["vol_skew_24h"] = 0
+        f["up_fraction_24h"] = 0.5
+
+    f["up_fraction_8h"] = np.mean(log_rets[-8:] > 0) if len(log_rets) >= 8 else 0.5
+
+    if len(log_rets) >= 10:
+        f["ret_autocorr"] = np.corrcoef(log_rets[-9:], log_rets[-10:-1])[0, 1]
+        if not np.isfinite(f["ret_autocorr"]):
+            f["ret_autocorr"] = 0
+    else:
+        f["ret_autocorr"] = 0
+
+    f["vol_ratio_8_48"] = vol_8h / (vol_48h + 1e-12)
+    f["vol_expanding"] = 1.0 if vol_8h > vol_24h else 0.0
+
+    vol_trend = np.mean(volumes[-4:]) / (np.mean(volumes[-24:]) + 1e-12) if len(volumes) >= 24 else 1
+    f["vol_price_divergence"] = vol_trend * np.sign(-f["ret_4h"])
+    f["volume_ratio"] = vol_trend
+
+    if len(closes) >= 24:
+        f["zscore_24h"] = (closes[-1] - np.mean(closes[-24:])) / (np.std(closes[-24:], ddof=1) + 1e-12)
+    else:
+        f["zscore_24h"] = 0
+
+    if len(log_rets) >= 8:
+        net = abs(np.sum(log_rets[-8:]))
+        path = np.sum(abs_rets[-8:])
+        f["efficiency_8h"] = net / (path + 1e-12)
+    else:
+        f["efficiency_8h"] = 0
+
+    hl = highs - lows
+    f["hl_range_8h"] = np.mean(hl[-8:])
+    f["hl_range_ratio"] = np.mean(hl[-8:]) / (np.mean(hl) + 1e-12)
+
+    return pd.Series(f)
+
+
+engineered = df_all.apply(engineer_directional_features, axis=1)
+df_all = pd.concat([df_all, engineered], axis=1)
+feature_cols = list(engineered.columns)
+df_all = df_all.dropna(subset=feature_cols + ["target"])
+print(f"✅ {len(feature_cols)} features")
+
+# =============================================================================
+# COMPUTE ROLLING VOL FOR CZAR (needed for the loss)
+# =============================================================================
+# Use 8h rolling std of target as the vol normalization for CZAR
+targets = df_all["target"].values
+rolling_std = pd.Series(targets).rolling(8, min_periods=2).std().fillna(targets.std()).values
+df_all["_rolling_std"] = rolling_std
+
+# =============================================================================
+# GRID SEARCH WITH CZAR LOSS
+# =============================================================================
+print(f"\n[3/5] Grid search with CZAR loss...")
+tscv = TimeSeriesSplit(n_splits=N_SPLITS, gap=TARGET_BARS)
+evaluator = PerformanceEvaluator()
+results = []
+config_num = 0
+
+total = len(LEARNING_RATES) * len(MAX_DEPTHS) * len(NUM_LEAVES) * len(CZAR_ALPHAS) * len(N_ESTIMATORS_CHECKPOINTS)
+print(f"   {total} configs (CZAR alpha × LR × depth × leaves × checkpoints)")
+
+for czar_alpha in CZAR_ALPHAS:
+    for lr in LEARNING_RATES:
+        for depth in MAX_DEPTHS:
+            for leaves in NUM_LEAVES:
+                    fold_models = []
+                    for train_idx, test_idx in tscv.split(df_all):
+                        y_train = df_all.iloc[train_idx]["target"].values
+                        std_train = df_all.iloc[train_idx]["_rolling_std"].values
+
+                        # Create CZAR objective for this fold
+                        czar_obj = make_czar_objective(
+                            std=std_train, alpha=czar_alpha
+                        )
+
+                        lgb = LGBMRegressor(
+                            objective=czar_obj,
+                            n_estimators=N_ESTIMATORS_MAX,
+                            learning_rate=lr,
+                            max_depth=depth,
+                            num_leaves=leaves,
+                            subsample=0.8,
+                            colsample_bytree=0.7,
+                            min_child_samples=50,
+                            reg_alpha=0.1,
+                            reg_lambda=1.0,
+                            random_state=42,
+                            verbose=-1,
+                        )
+                        lgb.fit(
+                            df_all.iloc[train_idx][feature_cols],
+                            y_train,
+                        )
+                        fold_models.append((lgb, test_idx))
+
+                    for n_est in N_ESTIMATORS_CHECKPOINTS:
+                        config_num += 1
+                        df_all["pred"] = np.nan
+                        for lgb, test_idx in fold_models:
+                            preds = lgb.predict(
+                                df_all.iloc[test_idx][feature_cols],
+                                num_iteration=n_est,
+                            )
+                            df_all.iloc[test_idx, df_all.columns.get_loc("pred")] = preds
+
+                        valid = ~df_all["pred"].isna()
+                        y_t = df_all.loc[valid, "target"].values
+                        y_p = df_all.loc[valid, "pred"].values
+                        metrics = evaluator.evaluate(y_true=y_t, y_pred=y_p)
+                        r_val, _ = pearsonr(y_t, y_p)
+                        cal = np.std(y_p) / (np.std(y_t) + 1e-12)
+
+                        m = metrics.get("metrics", metrics)
+                        results.append({
+                            "config_num": config_num,
+                            "n_estimators": n_est,
+                            "learning_rate": lr,
+                            "max_depth": depth,
+                            "num_leaves": leaves,
+                            "czar_alpha": czar_alpha,
+                            "da": m.get("directional_accuracy", 0),
+                            "da_ci": m.get("da_ci_lower", 0),
+                            "da_pval": m.get("da_pvalue", 1),
+                            "pearson": r_val,
+                            "wrmse_imp": m.get("wrmse_improvement", 0),
+                            "czar_imp": m.get("czar_improvement", 0),
+                            "cal_ratio": cal,
+                            "num_passed": metrics["num_passed"],
+                        })
+
+                        if config_num % 10 == 0 or config_num <= 3:
+                            print(
+                                f"   [{config_num:3d}/{total}] a={czar_alpha:.1f} "
+                                f"lr={lr:.2f} d={depth} l={leaves:2d} n={n_est:3d} "
+                                f"→ {metrics['num_passed']}/7 r={r_val:+.4f} DA={m.get('directional_accuracy',0):.3f}"
+                            )
+
+# =============================================================================
+# RANK & SELECT
+# =============================================================================
+results_df = pd.DataFrame(results)
+results_df = results_df.sort_values(["num_passed", "pearson"], ascending=[False, False])
+
+print(f"\n✅ Tested {len(results)} configs")
+print(f"\n   Top 10:")
+print(f"   {'#':>3} {'a':>3} {'e':>3} {'n':>4} {'lr':>5} {'d':>2} {'l':>3} │ {'DA':>5} {'r':>7} {'WRMSE':>6} {'CZAR':>6} {'cal':>5} │ {'pts':>3}")
+print(f"   {'─'*70}")
+for _, row in results_df.head(10).iterrows():
+    print(
+        f"   {int(row['config_num']):3d} {row['czar_alpha']:.1f} "
+        f"{int(row['n_estimators']):4d} {row['learning_rate']:5.2f} "
+        f"{int(row['max_depth']):2d} {int(row['num_leaves']):3d} │ "
+        f"{row['da']:.3f} {row['pearson']:+.4f} "
+        f"{row['wrmse_imp']:+.4f} {row['czar_imp']:+.4f} {row['cal_ratio']:.3f} │ "
+        f"{int(row['num_passed']):3d}"
+    )
+
+best_cfg = int(results_df.iloc[0]["config_num"])
+best = next(r for r in results if r["config_num"] == best_cfg)
+print(f"\n   Best: #{best_cfg} r={best['pearson']:+.4f} DA={best['da']:.3f} ({best['num_passed']}/7)")
+
+# =============================================================================
+# TRAIN & SAVE TOP 3
+# =============================================================================
+print(f"\n[4/5] Training top 3...")
+top3 = results_df.head(3)
+trained = []
+for rank_idx, (_, row) in enumerate(top3.iterrows()):
+    std_all = df_all["_rolling_std"].values
+    czar_obj = make_czar_objective(
+        std=std_all, alpha=row["czar_alpha"]
+    )
+    model = LGBMRegressor(
+        objective=czar_obj,
+        n_estimators=int(row["n_estimators"]),
+        learning_rate=row["learning_rate"],
+        max_depth=int(row["max_depth"]),
+        num_leaves=int(row["num_leaves"]),
+        subsample=0.8, colsample_bytree=0.7, min_child_samples=50,
+        reg_alpha=0.1, reg_lambda=1.0, random_state=42, verbose=-1,
+    )
+    model.fit(df_all[feature_cols], df_all["target"])
+    trained.append((int(row["config_num"]), model, row))
+    print(f"   Model {rank_idx+1}: #{int(row['config_num'])} "
+          f"(a={row['czar_alpha']:.1f} r={row['pearson']:+.4f} DA={row['da']:.3f})")
+
+print(f"\n[5/5] Saving...")
+for rank_idx, (cfg, model, row) in enumerate(trained):
+    def _make_predict(m):
+        # Serialize booster to string — avoids pickling czar_loss module
+        _model_str = m.booster_.model_to_string()
+        _feature_cols = feature_cols[:]
+        _tickers = TICKERS[:]
+        _n_input = NUMBER_OF_INPUT_BARS
+        # Capture feature engineering as a standalone function
+        _eng_fn = engineer_directional_features
+        _wf = workflow
+        def predict(nonce=None):
+            import lightgbm as lgb
+            import numpy as np
+            booster = lgb.Booster(model_str=_model_str)
+            live_row = _wf.get_live_features(ticker=_tickers[0])
+            if live_row is None or len(live_row) == 0:
+                raise ValueError("No live features")
+            live_eng = _eng_fn(live_row.iloc[0])
+            current_price = float(live_row.attrs.get("current_price", float("nan")))
+            if not np.isfinite(current_price) or current_price <= 0:
+                snap = _wf._dm.get_live_snapshot(_tickers)
+                if snap is not None and len(snap) > 0:
+                    current_price = float(snap["close"].iloc[-1])
+            log_ret = booster.predict(live_eng[_feature_cols].values.reshape(1, -1))[0]
+            return float(current_price * np.exp(log_ret))
+        return predict
+
+    fn = _make_predict(model)
+    pkl = f"predict_38_czar_rank{rank_idx+1}.pkl"
+    try:
+        price = fn()
+        print(f"   Model {rank_idx+1} (#{cfg}): ${price:,.2f} → {pkl}")
+    except Exception as e:
+        print(f"   Model {rank_idx+1} (#{cfg}): FAILED ({e}) → {pkl}")
+    with open(pkl, "wb") as f:
+        cloudpickle.dump(fn, f)
+
+print("\n" + "=" * 70)
+print("COMPLETE!")
+print("=" * 70)
diff --git a/notebooks/testnet/topic_38_sol_8h_price/model_v3_methodology.py b/notebooks/testnet/topic_38_sol_8h_price/model_v3_methodology.py
new file mode 100644
index 0000000..2df3973
--- /dev/null
+++ b/notebooks/testnet/topic_38_sol_8h_price/model_v3_methodology.py
@@ -0,0 +1,316 @@
+#!/usr/bin/env python3
+"""
+Topic 38 — SOL/USD 8h Price Prediction — v3 (Methodology-Driven)
+================================================================
+
+Follows the 9 Principles from allora_research_model_skills:
+- Principle 1: Every feature answers "what economic quantity does this estimate?"
+- Principle 2: All windows derived from H=8 (horizon-adaptive)
+- Principle 3: No lookahead — trailing windows only
+- Principle 5: Huber loss (robust to fat tails)
+- Principle 6: Purged walk-forward CV with gap >= max_feature_window
+
+Key changes from v1/v2:
+- DROP all 240 raw OHLCV base features (overfitting trap)
+- Use ONLY ~25 engineered features organized by estimation goal
+- Horizon-adaptive windows: [H/4, H/2, H, 2H, 5H, 10H] = [2, 4, 8, 16, 40, 80]
+- All features normalized by rolling volatility
+- Gap buffer = 80 bars (= max feature window)
+"""
+
+import numpy as np
+import pandas as pd
+import os
+from datetime import datetime, timedelta, timezone
+from sklearn.model_selection import TimeSeriesSplit
+from lightgbm import LGBMRegressor
+from scipy.stats import pearsonr
+import cloudpickle
+from allora_forge_builder_kit import AlloraMLWorkflow, PerformanceEvaluator
+
+# =============================================================================
+# CONFIGURATION
+# =============================================================================
+TICKERS = ["solusd"]
+DAYS_OF_HISTORY = 1825      # 5 years
+INTERVAL = "1h"
+NUMBER_OF_INPUT_BARS = 80   # 80h lookback (= 10H, max feature window)
+TARGET_BARS = 8             # 8h ahead
+H = TARGET_BARS             # horizon shorthand
+
+# CV config
+N_SPLITS = 5
+GAP_BARS = 80               # purged gap = max feature window
+
+# Grid search (smaller, focused)
+N_ESTIMATORS_CHECKPOINTS = [200, 500]
+LEARNING_RATES = [0.01, 0.03]
+MAX_DEPTHS = [3, 5]
+NUM_LEAVES = [15, 31]
+
+print("=" * 70)
+print("Topic 38 — SOL/USD 8h Price — v3 (Methodology-Driven)")
+print("=" * 70)
+
+# =============================================================================
+# STEP 1: Load Data
+# =============================================================================
+print("\n[1/5] Loading data...")
+from allora_forge_builder_kit.utils import get_api_key
+
+api_key = get_api_key(
+    api_key_file=os.path.join(os.path.dirname(__file__), "..", "..", ".allora_api_key")
+)
+
+workflow = AlloraMLWorkflow(
+    tickers=TICKERS,
+    number_of_input_bars=NUMBER_OF_INPUT_BARS,
+    target_bars=TARGET_BARS,
+    interval=INTERVAL,
+    data_source="allora",
+    api_key=api_key,
+)
+
+start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)
+workflow.backfill(start=start_date)
+
+df_all = workflow.get_full_feature_target_dataframe(start_date=start_date).reset_index()
+base_feature_cols = [c for c in df_all.columns if c.startswith("feature_")]
+df_all = df_all.dropna(subset=base_feature_cols + ["target"])
+print(f"✅ Raw dataset: {len(df_all):,} samples")
+
+# =============================================================================
+# STEP 2: Horizon-Adaptive Feature Engineering
+# =============================================================================
+print("\n[2/5] Engineering horizon-adaptive features (H={H})...")
+
+# Windows derived from H=8: [H/4, H/2, H, 2H, 5H, 10H]
+WINDOWS = [2, 4, 8, 16, 40, 80]
+
+
+def engineer_methodology_features(row):
+    """
+    ~25 features organized by estimation goal, all horizon-adaptive.
+    Uses ONLY the base OHLCV features as raw inputs, then computes
+    economically meaningful quantities.
+    """
+    n = NUMBER_OF_INPUT_BARS
+    closes = np.array([row[f"feature_close_{i}"] for i in range(n)])
+    highs = np.array([row[f"feature_high_{i}"] for i in range(n)])
+    lows = np.array([row[f"feature_low_{i}"] for i in range(n)])
+    volumes = np.array([row[f"feature_volume_{i}"] for i in range(n)])
+
+    log_rets = np.diff(np.log(closes + 1e-12))
+    abs_rets = np.abs(log_rets)
+    f = {}
+
+    # === ESTIMATION GOAL 1: Trend / Momentum ===
+    # "What is the recent directional move at each timescale?"
+    for w in WINDOWS:
+        if len(log_rets) >= w:
+            raw_ret = np.sum(log_rets[-w:])
+            # Normalize by rolling vol at same window
+            vol_w = np.std(log_rets[-w:], ddof=1) if w > 1 else abs_rets[-1]
+            f[f"trend_{w}h"] = raw_ret / (vol_w * np.sqrt(w) + 1e-12)
+
+    # === ESTIMATION GOAL 2: Mean Reversion ===
+    # "How far is price from its recent average, in vol units?"
+    for w in [8, 40, 80]:
+        if len(closes) >= w:
+            mean_w = np.mean(closes[-w:])
+            std_w = np.std(closes[-w:], ddof=1)
+            f[f"zscore_{w}h"] = (closes[-1] - mean_w) / (std_w + 1e-12)
+
+    # === ESTIMATION GOAL 3: Volatility Regime ===
+    # "What is the current vol level and is it expanding or contracting?"
+    for w in [8, 40, 80]:
+        if len(log_rets) >= w:
+            f[f"vol_{w}h"] = np.std(log_rets[-w:], ddof=1)
+
+    # Vol ratios (regime transitions)
+    if f.get("vol_8h") and f.get("vol_40h"):
+        f["vol_ratio_8_40"] = f["vol_8h"] / (f["vol_40h"] + 1e-12)
+    if f.get("vol_8h") and f.get("vol_80h"):
+        f["vol_ratio_8_80"] = f["vol_8h"] / (f["vol_80h"] + 1e-12)
+
+    # === ESTIMATION GOAL 4: Market Microstructure ===
+    # "What does the bid-ask spread proxy (HL range) tell us?"
+    hl_range = highs - lows
+    f["hl_range_8h"] = np.mean(hl_range[-8:])
+    f["hl_range_ratio"] = np.mean(hl_range[-8:]) / (np.mean(hl_range[-40:]) + 1e-12) if len(hl_range) >= 40 else 1.0
+
+    # === ESTIMATION GOAL 5: Volume Dynamics ===
+    # "Is attention/liquidity increasing or decreasing?"
+    f["vol_flow_ratio"] = np.mean(volumes[-8:]) / (np.mean(volumes[-40:]) + 1e-12) if len(volumes) >= 40 else 1.0
+    f["vol_spike"] = np.max(volumes[-8:]) / (np.mean(volumes[-40:]) + 1e-12) if len(volumes) >= 40 else 1.0
+
+    # === ESTIMATION GOAL 6: Trend Quality ===
+    # "Is the move directional (trending) or choppy (mean-reverting)?"
+    if len(log_rets) >= 8:
+        net_move = abs(np.sum(log_rets[-8:]))
+        total_path = np.sum(abs_rets[-8:])
+        f["efficiency_8h"] = net_move / (total_path + 1e-12)
+
+    if len(log_rets) >= 40:
+        net_move = abs(np.sum(log_rets[-40:]))
+        total_path = np.sum(abs_rets[-40:])
+        f["efficiency_40h"] = net_move / (total_path + 1e-12)
+
+    return pd.Series(f)
+
+
+print("   Computing features...")
+engineered = df_all.apply(engineer_methodology_features, axis=1)
+df_all = pd.concat([df_all, engineered], axis=1)
+
+# USE ONLY ENGINEERED FEATURES — drop raw base features
+eng_cols = list(engineered.columns)
+feature_cols = eng_cols  # NOT base_feature_cols + eng_cols
+df_all = df_all.dropna(subset=feature_cols + ["target"])
+
+print(f"✅ {len(df_all):,} samples | {len(feature_cols)} engineered features (no raw OHLCV)")
+print(f"   Features: {feature_cols}")
+
+# =============================================================================
+# STEP 3: Purged Walk-Forward CV with Grid Search
+# =============================================================================
+print(f"\n[3/5] Grid search (purged CV, gap={GAP_BARS} bars)...")
+
+tscv = TimeSeriesSplit(n_splits=N_SPLITS, gap=GAP_BARS)
+evaluator = PerformanceEvaluator()
+results = []
+config_num = 0
+
+for lr in LEARNING_RATES:
+    for depth in MAX_DEPTHS:
+        for leaves in NUM_LEAVES:
+            fold_models = []
+            for train_idx, test_idx in tscv.split(df_all):
+                lgb = LGBMRegressor(
+                    objective="huber",
+                    alpha=0.9,
+                    n_estimators=500,
+                    learning_rate=lr,
+                    max_depth=depth,
+                    num_leaves=leaves,
+                    subsample=0.8,
+                    colsample_bytree=0.8,
+                    min_child_samples=100,
+                    reg_alpha=0.5,
+                    reg_lambda=2.0,
+                    random_state=42,
+                    verbose=-1,
+                )
+                lgb.fit(
+                    df_all.iloc[train_idx][feature_cols],
+                    df_all.iloc[train_idx]["target"],
+                )
+                fold_models.append((lgb, test_idx))
+
+            for n_est in N_ESTIMATORS_CHECKPOINTS:
+                config_num += 1
+                df_all["pred"] = np.nan
+                for lgb, test_idx in fold_models:
+                    preds = lgb.predict(
+                        df_all.iloc[test_idx][feature_cols], num_iteration=n_est
+                    )
+                    df_all.iloc[test_idx, df_all.columns.get_loc("pred")] = preds
+
+                valid = ~df_all["pred"].isna()
+                y_true = df_all.loc[valid, "target"].values
+                y_pred = df_all.loc[valid, "pred"].values
+
+                # Compute our own metrics (Pearson, RMSE)
+                r, _ = pearsonr(y_true, y_pred)
+                rmse = np.sqrt(np.mean((y_true - y_pred) ** 2))
+
+                # Also get the standard evaluator metrics
+                eval_metrics = evaluator.evaluate(y_true=y_true, y_pred=y_pred)
+
+                results.append({
+                    "config_num": config_num,
+                    "n_est": n_est,
+                    "lr": lr,
+                    "depth": depth,
+                    "leaves": leaves,
+                    "pearson_r": r,
+                    "rmse": rmse,
+                    **eval_metrics,
+                })
+
+                print(
+                    f"   [{config_num:2d}] n={n_est:3d} lr={lr:.2f} d={depth} l={leaves:2d} "
+                    f"→ r={r:+.4f} RMSE={rmse:.6f} "
+                    f"DA={eval_metrics.get('metrics', eval_metrics).get('da', 0):.3f} "
+                    f"({eval_metrics['num_passed']}/7)"
+                )
+
+# Rank by Pearson r (primary for log-return prediction)
+results_df = pd.DataFrame(results).sort_values("pearson_r", ascending=False)
+best = results_df.iloc[0]
+
+print(f"\n✅ Best: r={best['pearson_r']:+.4f} RMSE={best['rmse']:.6f} "
+      f"({best['num_passed']}/7) — n={int(best['n_est'])}, lr={best['lr']}, "
+      f"d={int(best['depth'])}, l={int(best['leaves'])}")
+
+# =============================================================================
+# STEP 4: Detailed Evaluation
+# =============================================================================
+print("\n[4/5] Detailed evaluation...")
+best_result = results[int(best["config_num"]) - 1]
+evaluator.print_report(best_result, detailed=False)
+
+# =============================================================================
+# STEP 5: Train Final Model & Save
+# =============================================================================
+print("\n[5/5] Training final model...")
+
+final_model = LGBMRegressor(
+    objective="huber",
+    alpha=0.9,
+    n_estimators=int(best["n_est"]),
+    learning_rate=best["lr"],
+    max_depth=int(best["depth"]),
+    num_leaves=int(best["leaves"]),
+    subsample=0.8,
+    colsample_bytree=0.8,
+    min_child_samples=100,
+    reg_alpha=0.5,
+    reg_lambda=2.0,
+    random_state=42,
+    verbose=-1,
+)
+final_model.fit(df_all[feature_cols], df_all["target"])
+print(f"✅ Trained on {len(df_all):,} samples, {len(feature_cols)} features")
+
+
+def predict(nonce=None):
+    """Predict SOL/USD price 8 hours ahead."""
+    live_row = workflow.get_live_features(ticker=TICKERS[0])
+    if live_row is None or len(live_row) == 0:
+        raise ValueError("Could not get live features")
+
+    live_eng = engineer_methodology_features(live_row.iloc[0])
+    x = live_eng[feature_cols].values.reshape(1, -1)
+    predicted_log_return = final_model.predict(x)[0]
+
+    current_price = float(live_row.attrs.get("current_price", np.nan))
+    if not np.isfinite(current_price) or current_price <= 0:
+        snap = workflow._dm.get_live_snapshot(TICKERS)
+        if snap is not None and len(snap) > 0 and "close" in snap.columns:
+            current_price = float(snap["close"].iloc[-1])
+
+    predicted_price = current_price * np.exp(predicted_log_return)
+    print(f"\nPrediction: ${predicted_price:,.2f} ({predicted_log_return:+.6f} log return)")
+    return float(predicted_price)
+
+
+print("\n🧪 Testing prediction...")
+test_pred = predict()
+
+with open("predict_38.pkl", "wb") as f:
+    cloudpickle.dump(predict, f)
+
+print(f"\n✅ Saved predict_38.pkl")
+print(f"   Pearson r: {best['pearson_r']:+.4f} | RMSE: {best['rmse']:.6f}")
+print(f"   Features: {len(feature_cols)} (engineered only, no raw OHLCV)")
diff --git a/notebooks/testnet/topic_41_eth_8h_price/example.py b/notebooks/testnet/topic_41_eth_8h_price/example.py
new file mode 100644
index 0000000..c9d3a42
--- /dev/null
+++ b/notebooks/testnet/topic_41_eth_8h_price/example.py
@@ -0,0 +1,625 @@
+#!/usr/bin/env python3
+"""
+================================================================================
+Allora Forge Builder Kit v3.0 - Topic 41 ETH/USD Price Prediction Walkthrough
+================================================================================
+
+This walkthrough demonstrates 8-hour ETH/USD price prediction using the 
+Allora ML Workflow Kit with base features and LightGBM.
+
+Data is sourced from the Atlas data service (Tiingo 1-min candles).
+
+================================================================================
+"""
+
+import numpy as np
+import pandas as pd
+import os
+import json
+from datetime import datetime, timedelta, timezone
+from sklearn.model_selection import TimeSeriesSplit
+from lightgbm import LGBMRegressor
+import matplotlib.pyplot as plt
+import cloudpickle
+from allora_forge_builder_kit import AlloraMLWorkflow, PerformanceEvaluator
+
+# =============================================================================
+# EXPERIMENT CONFIGURATION
+# =============================================================================
+
+# Data Configuration
+TICKERS = ["ethusd"]
+DAYS_OF_HISTORY = 1825     # ~5 years
+INTERVAL = "1h"            # 1-hour bars
+
+# Feature Configuration
+NUMBER_OF_INPUT_BARS = 48   # 2 days of hourly bars (48h lookback)
+TARGET_BARS = 8             # Predict 8 bars (8 hours) ahead
+
+# Cross-Validation Configuration
+N_SPLITS = 3               # Number of CV folds
+MAX_TRAIN_SIZE = 100_000_000  # Maximum training samples per fold
+
+# Model Configuration
+N_ESTIMATORS_MAX = 800
+N_ESTIMATORS_CHECKPOINTS = [100, 300, 600]
+LEARNING_RATES = [0.01, 0.03, 0.07, 0.1]
+MAX_DEPTHS = [3, 5, 7]
+NUM_LEAVES = [15, 31]
+TOP_K_FEATURES_GRID = [5, 10, 25, 50]
+
+# =============================================================================
+# SCRIPT START
+# =============================================================================
+
+print("="*80)
+print("Allora Forge Builder Kit v3.0 - Topic 41 Walkthrough")
+print("="*80)
+
+
+def _to_serializable(obj):
+    """Convert numpy/pandas objects into JSON-serializable Python types."""
+    if isinstance(obj, (np.floating, np.integer)):
+        return obj.item()
+    if isinstance(obj, (np.bool_,)):
+        return bool(obj)
+    if isinstance(obj, np.ndarray):
+        return obj.tolist()
+    if isinstance(obj, (pd.Timestamp, datetime)):
+        return obj.isoformat()
+    if isinstance(obj, dict):
+        return {k: _to_serializable(v) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [_to_serializable(v) for v in obj]
+    return obj
+
+
+def save_run_artifacts(df_eval, best_result, best_params, run_dir, feature_cols):
+    """Persist config/metrics/predictions and basic diagnostic plots for reproducibility."""
+    os.makedirs(run_dir, exist_ok=True)
+
+    # 1) Run config
+    config = {
+        "tickers": TICKERS,
+        "days_of_history": DAYS_OF_HISTORY,
+        "interval": INTERVAL,
+        "number_of_input_bars": NUMBER_OF_INPUT_BARS,
+        "target_bars": TARGET_BARS,
+        "n_splits": N_SPLITS,
+        "max_train_size": MAX_TRAIN_SIZE,
+        "n_estimators_checkpoints": N_ESTIMATORS_CHECKPOINTS,
+        "learning_rates": LEARNING_RATES,
+        "max_depths": MAX_DEPTHS,
+        "num_leaves": NUM_LEAVES,
+        "best_params": best_params,
+        "feature_count": len(feature_cols),
+    }
+    with open(os.path.join(run_dir, "config.json"), "w") as f:
+        json.dump(_to_serializable(config), f, indent=2)
+
+    # 2) Metrics
+    metrics_payload = {
+        "score": best_result["score"],
+        "grade": best_result["grade"],
+        "num_passed": best_result["num_passed"],
+        "num_primary_metrics": best_result.get("num_primary_metrics"),
+        "thresholds": best_result.get("thresholds", {}),
+        "passed": best_result.get("passed", {}),
+        "metrics": best_result.get("metrics", {}),
+    }
+    with open(os.path.join(run_dir, "metrics.json"), "w") as f:
+        json.dump(_to_serializable(metrics_payload), f, indent=2)
+
+    # 3) Predictions table
+    export_df = df_eval.copy()
+    if "predictions" in best_result:
+        export_df["pred"] = best_result["predictions"].values
+
+    export_cols = ["open_time", "target", "pred"]
+    export_cols = [c for c in export_cols if c in export_df.columns]
+    preds_df = export_df[export_cols].dropna(subset=["pred"]).copy()
+    preds_csv_path = os.path.join(run_dir, "predictions.csv")
+    preds_df.to_csv(preds_csv_path, index=False)
+
+    # 4) Scatter plot: pred vs target
+    plt.figure(figsize=(8, 8))
+    plt.scatter(preds_df["target"], preds_df["pred"], s=8, alpha=0.35)
+    lim_min = float(min(preds_df["target"].min(), preds_df["pred"].min()))
+    lim_max = float(max(preds_df["target"].max(), preds_df["pred"].max()))
+    plt.plot([lim_min, lim_max], [lim_min, lim_max], linestyle="--", linewidth=1)
+    plt.xlabel("Target (log return)")
+    plt.ylabel("Prediction (log return)")
+    plt.title("Predictions vs Target")
+    plt.tight_layout()
+    scatter_path = os.path.join(run_dir, "scatter_pred_vs_target.png")
+    plt.savefig(scatter_path, dpi=150)
+    plt.close()
+
+    # 5) Human-readable report
+    with open(os.path.join(run_dir, "report.txt"), "w") as f:
+        f.write("Allora Topic 41 Run Report\n")
+        f.write("=" * 40 + "\n")
+        f.write(f"Score: {best_result['score']:.1%} ({best_result['num_passed']}/7)\n")
+        f.write(f"Grade: {best_result['grade']}\n")
+        f.write(f"Best params: {best_params}\n\n")
+        f.write("Primary metric pass/fail:\n")
+        for metric_name, did_pass in best_result.get("passed", {}).items():
+            f.write(f"- {metric_name}: {'PASS' if did_pass else 'FAIL'}\n")
+
+    return {
+        "run_dir": run_dir,
+        "predictions_csv": preds_csv_path,
+        "scatter_png": scatter_path,
+    }
+
+# =============================================================================
+# STEP 1: Initialize Workflow
+# =============================================================================
+print("\n[1/6] Initializing workflow...")
+
+# Resolve Allora API key (env var → file → prompt).
+# Get a free key at https://developer.allora.network
+# Alternatively, set data_source="binance" below to skip the API key entirely.
+from allora_forge_builder_kit.utils import get_api_key
+api_key = get_api_key(api_key_file=os.path.join(os.path.dirname(__file__), "..", "..", ".allora_api_key"))
+
+workflow = AlloraMLWorkflow(
+    tickers=TICKERS,
+    number_of_input_bars=NUMBER_OF_INPUT_BARS,
+    target_bars=TARGET_BARS,
+    interval=INTERVAL,
+    data_source="allora",
+    api_key=api_key
+)
+
+print(f"✅ Workflow initialized")
+print(f"   Assets: {TICKERS} | Interval: {INTERVAL}")
+print(f"   Input: {NUMBER_OF_INPUT_BARS} bars → Features: {NUMBER_OF_INPUT_BARS*5}")
+print(f"   Target: {TARGET_BARS} bars ahead")
+
+# =============================================================================
+# STEP 2: Backfill Historical Data
+# =============================================================================
+print(f"\n[2/6] Backfilling {DAYS_OF_HISTORY} days of historical data...")
+
+start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)
+try:
+    workflow.backfill(start=start_date)
+    print("✅ Backfill complete")
+except Exception as e:
+    print(f"⚠️ Backfill failed: {e}")
+    print("   Will attempt to use locally cached parquet data...")
+
+# =============================================================================
+# STEP 3: Extract Features & Engineer New Features
+# =============================================================================
+print("\n[3/6] Extracting and engineering features...")
+
+try:
+    df_all = workflow.get_full_feature_target_dataframe(start_date=start_date).reset_index()
+except Exception as e:
+    raise RuntimeError(
+        f"No data available: {e}\n\n"
+        "This usually means the backfill failed (bad/missing API key) and there is "
+        "no locally cached parquet data.\n\n"
+        "Fix options:\n"
+        "  1. Set a valid ALLORA_API_KEY (free at https://developer.allora.network)\n"
+        "  2. Use data_source='binance' in AlloraMLWorkflow() to skip the API key\n"
+    ) from e
+
+# Feature Engineering: Add log returns to base features
+# For detailed TA indicators and visualizations, see: feature_engineering_example.py
+
+def engineer_returns(row):
+    """Add return, momentum, and volatility features (no data leakage - same row only)"""
+    n = NUMBER_OF_INPUT_BARS
+    closes = np.array([row[f'feature_close_{i}'] for i in range(n)])
+    highs = np.array([row[f'feature_high_{i}'] for i in range(n)])
+    lows = np.array([row[f'feature_low_{i}'] for i in range(n)])
+    volumes = np.array([row[f'feature_volume_{i}'] for i in range(n)])
+    
+    log_rets = np.diff(np.log(closes + 1e-12))
+    features = {}
+    
+    # Log returns at multiple horizons
+    features['ret_1h'] = log_rets[-1] if len(log_rets) >= 1 else 0
+    features['ret_4h'] = np.sum(log_rets[-4:]) if len(log_rets) >= 4 else 0
+    features['ret_8h'] = np.sum(log_rets[-8:]) if len(log_rets) >= 8 else 0
+    features['ret_24h'] = np.sum(log_rets[-24:]) if len(log_rets) >= 24 else 0
+    features['ret_48h'] = np.sum(log_rets) if len(log_rets) >= 2 else 0
+    
+    # Realised volatility at multiple horizons
+    features['vol_8h'] = np.std(log_rets[-8:], ddof=1) if len(log_rets) >= 8 else 0
+    features['vol_24h'] = np.std(log_rets[-24:], ddof=1) if len(log_rets) >= 24 else 0
+    features['vol_48h'] = np.std(log_rets, ddof=1) if len(log_rets) >= 2 else 0
+    
+    # Momentum: short vs long return
+    features['momentum_ratio'] = features['ret_8h'] / (abs(features['ret_48h']) + 1e-12)
+    
+    # Mean reversion signal: distance from recent mean
+    features['mean_reversion'] = (closes[-1] - np.mean(closes[-24:])) / (np.std(closes[-24:]) + 1e-12) if n >= 24 else 0
+    
+    # High-low range (proxy for intraday vol)
+    hl_range = highs - lows
+    features['hl_range_8h'] = np.mean(hl_range[-8:])
+    features['hl_range_ratio'] = np.mean(hl_range[-8:]) / (np.mean(hl_range) + 1e-12)
+    
+    # Volume trend
+    features['volume_ratio'] = np.mean(volumes[-8:]) / (np.mean(volumes) + 1e-12)
+    
+    # Trend strength (efficiency ratio)
+    net_move = abs(np.sum(log_rets[-8:]))
+    total_path = np.sum(np.abs(log_rets[-8:]))
+    features['efficiency_8h'] = net_move / (total_path + 1e-12)
+    
+    return pd.Series(features)
+
+# Get base features
+base_feature_cols = [col for col in df_all.columns if col.startswith('feature_')]
+
+# Apply feature engineering
+print("   Engineering log return features...")
+engineered_features = df_all.apply(engineer_returns, axis=1)
+df_all = pd.concat([df_all, engineered_features], axis=1)
+
+# Use base features + engineered returns
+feature_cols = base_feature_cols + list(engineered_features.columns)
+df_all = df_all.dropna(subset=feature_cols + ['target'])
+
+print(f"✅ Dataset: {len(df_all):,} samples ({df_all['open_time'].min().date()} to {df_all['open_time'].max().date()})")
+print(f"   Features: {len(base_feature_cols)} base + {len(engineered_features.columns)} returns = {len(feature_cols)} total")
+print(f"   📚 See feature_engineering_example.py for more TA indicators")
+
+# Setup time series cross-validation
+tscv = TimeSeriesSplit(
+    n_splits=N_SPLITS, 
+    gap=TARGET_BARS, 
+    max_train_size=MAX_TRAIN_SIZE
+)
+
+print(f"✅ Walk-forward CV: {N_SPLITS} splits, {TARGET_BARS}-bar embargo")
+for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(df_all)):
+    print(f"   Fold {fold_idx+1}: Train={len(train_idx):,}, Test={len(test_idx):,}")
+
+# =============================================================================
+# STEP 4: Grid Search with Walk-Forward Cross-Validation
+# =============================================================================
+print("\n[4/6] Running grid search...")
+
+results = []
+evaluator = PerformanceEvaluator()
+config_num = 0
+
+# Track feature importance across all runs
+all_feature_importances = {}
+
+total_configs = len(LEARNING_RATES) * len(MAX_DEPTHS) * len(NUM_LEAVES) * len(TOP_K_FEATURES_GRID) * len(N_ESTIMATORS_CHECKPOINTS)
+print(f"   Grid: {len(LEARNING_RATES)} lr × {len(MAX_DEPTHS)} depth × {len(NUM_LEAVES)} leaves × {len(TOP_K_FEATURES_GRID)} topK × {len(N_ESTIMATORS_CHECKPOINTS)} checkpoints = {total_configs} configs")
+
+for lr in LEARNING_RATES:
+    for depth in MAX_DEPTHS:
+        for leaves in NUM_LEAVES:
+            
+            # Stage 1: Get feature importances ONCE per (lr, depth, leaves) combo
+            # (shared across TOP_K values to save compute)
+            fold_importances = []
+            fold_selectors = []
+            for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(df_all)):
+                X_train_full = df_all.iloc[train_idx][feature_cols]
+                y_train = df_all.iloc[train_idx]['target']
+                
+                selector = LGBMRegressor(
+                    n_estimators=200,
+                    learning_rate=0.05,
+                    max_depth=depth,
+                    num_leaves=leaves,
+                    subsample=0.8,
+                    colsample_bytree=0.7,
+                    min_child_samples=50,
+                    random_state=42,
+                    verbose=-1
+                )
+                selector.fit(X_train_full, y_train)
+                fold_importances.append(selector.feature_importances_)
+                fold_selectors.append((train_idx, test_idx, y_train))
+            
+            # Log top features (averaged across folds)
+            avg_imp = np.mean(fold_importances, axis=0)
+            for feat_idx in np.argsort(avg_imp)[-10:]:
+                fname = feature_cols[feat_idx]
+                all_feature_importances[fname] = all_feature_importances.get(fname, 0) + avg_imp[feat_idx]
+            
+            # Stage 2: For each TOP_K, select features and retrain
+            for top_k in TOP_K_FEATURES_GRID:
+                fold_models = []
+                for fold_idx, (train_idx, test_idx, y_train) in enumerate(fold_selectors):
+                    importances = fold_importances[fold_idx]
+                    top_idx = np.argsort(importances)[-top_k:]
+                    selected = [feature_cols[i] for i in top_idx]
+                    
+                    X_train_sel = df_all.iloc[train_idx][selected]
+                    lgb = LGBMRegressor(
+                        n_estimators=N_ESTIMATORS_MAX,
+                        learning_rate=lr,
+                        max_depth=depth,
+                        num_leaves=leaves,
+                        subsample=0.8,
+                        colsample_bytree=0.7,
+                        min_child_samples=50,
+                        reg_alpha=0.1,
+                        reg_lambda=1.0,
+                        random_state=42,
+                        verbose=-1
+                    )
+                    lgb.fit(X_train_sel, y_train)
+                    fold_models.append((lgb, test_idx, selected))
+                
+                # Evaluate at tree count checkpoints
+                for n_est in N_ESTIMATORS_CHECKPOINTS:
+                    config_num += 1
+                    df_all['pred'] = np.nan
+                    
+                    for lgb, test_idx, selected in fold_models:
+                        X_test = df_all.iloc[test_idx][selected]
+                        preds = lgb.predict(X_test, num_iteration=n_est)
+                        df_all.iloc[test_idx, df_all.columns.get_loc('pred')] = preds
+                
+                # Evaluate
+                valid_mask = ~df_all['pred'].isna()
+                metrics = evaluator.evaluate(
+                    y_true=df_all.loc[valid_mask, 'target'],
+                    y_pred=df_all.loc[valid_mask, 'pred']
+                )
+                
+                # Store results
+                results.append({
+                    'config_num': config_num,
+                    'n_estimators': n_est,
+                    'learning_rate': lr,
+                    'max_depth': depth,
+                    'num_leaves': leaves,
+                    'top_k': top_k,
+                    'predictions': df_all['pred'].copy(),
+                    **metrics
+                })
+                
+                # Track calibration: std(pred) / std(target) — want ~1.0
+                y_t = df_all.loc[valid_mask, 'target'].values
+                y_p = df_all.loc[valid_mask, 'pred'].values
+                cal_ratio = np.std(y_p) / (np.std(y_t) + 1e-12)
+                from scipy.stats import pearsonr as _pr
+                r_val, _ = _pr(y_t, y_p)
+                results[-1]['cal_ratio'] = cal_ratio
+                results[-1]['pearson_r_raw'] = r_val
+                
+                if config_num % 10 == 0 or config_num <= 3:
+                    print(f"   [{config_num:3d}/{total_configs}] n={n_est:3d} lr={lr:.2f} d={depth} l={leaves:2d} k={top_k:2d} "
+                          f"→ {metrics['num_passed']}/7 r={r_val:+.4f} cal={cal_ratio:.3f}")
+
+# Analyze results — rank by AVERAGE RANK across ALL 7 core metrics + calibration
+results_df = pd.DataFrame([{k: v for k, v in r.items() if k != 'predictions'} for r in results])
+
+# Extract the 7 core metrics from the nested 'metrics' dict
+for r_idx, r in enumerate(results):
+    m = r.get('metrics', r)
+    results_df.loc[r_idx, 'da'] = m.get('directional_accuracy', 0)
+    results_df.loc[r_idx, 'da_ci'] = m.get('da_ci_lower', 0)
+    results_df.loc[r_idx, 'da_pval'] = m.get('da_pvalue', 1)
+    results_df.loc[r_idx, 'pearson'] = m.get('pearson_r', 0)
+    results_df.loc[r_idx, 'pearson_pval'] = m.get('pearson_pvalue', 1)
+    results_df.loc[r_idx, 'wrmse_imp'] = m.get('wrmse_improvement', 0)
+    results_df.loc[r_idx, 'czar_imp'] = m.get('czar_improvement', 0)
+
+# Compute ranks for each metric (lower rank = better)
+rank_cols = {}
+rank_cols['rk_da'] = results_df['da'].rank(ascending=False)
+rank_cols['rk_da_ci'] = results_df['da_ci'].rank(ascending=False)
+rank_cols['rk_da_pval'] = results_df['da_pval'].rank(ascending=True)       # lower p = better
+rank_cols['rk_pearson'] = results_df['pearson'].rank(ascending=False)
+rank_cols['rk_pear_pval'] = results_df['pearson_pval'].rank(ascending=True) # lower p = better
+rank_cols['rk_wrmse'] = results_df['wrmse_imp'].rank(ascending=False)
+rank_cols['rk_czar'] = results_df['czar_imp'].rank(ascending=False)
+rank_cols['rk_cal'] = (1 - results_df['cal_ratio']).abs().rank(ascending=True)  # closer to 1.0 = better
+
+for col, vals in rank_cols.items():
+    results_df[col] = vals
+
+# Average rank of core 7 metrics (primary) and calibration (secondary)
+core_rank_cols = [c for c in rank_cols if c != 'rk_cal']
+results_df['core_avg_rank'] = results_df[core_rank_cols].mean(axis=1)
+results_df['avg_rank'] = results_df[list(rank_cols.keys())].mean(axis=1)
+
+# Sort: num_passed DESC (most important), then core_avg_rank ASC (tiebreaker)
+results_df = results_df.sort_values(['num_passed', 'core_avg_rank'], ascending=[False, True])
+
+print(f"\n✅ Tested {len(results)} configurations")
+
+# Feature importance report
+print(f"\n   Top 20 most important features (aggregated across all configs):")
+sorted_feats = sorted(all_feature_importances.items(), key=lambda x: x[1], reverse=True)[:20]
+for i, (fname, imp) in enumerate(sorted_feats):
+    print(f"   {i+1:2d}. {fname:<35s} {imp:>10.1f}")
+
+print(f"\n   Top 10 models (ranked by avg rank across 7 metrics + calibration):")
+print(f"   {'#':>3} {'n':>4} {'lr':>5} {'d':>2} {'l':>3} {'k':>3} │ {'DA':>5} {'CI':>5} {'pval':>5} {'r':>6} {'WRMSE':>6} {'CZAR':>6} {'cal':>5} │ {'pts':>3} {'rk':>5}")
+print(f"   {'─'*85}")
+for _, row in results_df.head(10).iterrows():
+    print(f"   {int(row['config_num']):3d} {int(row['n_estimators']):4d} {row['learning_rate']:5.2f} "
+          f"{int(row['max_depth']):2d} {int(row['num_leaves']):3d} {int(row['top_k']):3d} │ "
+          f"{row['da']:.3f} {row['da_ci']:.3f} {row['da_pval']:.3f} {row['pearson']:+.4f} "
+          f"{row['wrmse_imp']:+.4f} {row['czar_imp']:+.4f} {row['cal_ratio']:.3f} │ "
+          f"{int(row['num_passed']):3d} {row['avg_rank']:5.1f}")
+
+# Select best by average rank — look up by config_num in the results list
+best_cfg_num = int(results_df.iloc[0]['config_num'])
+best_result = next(r for r in results if r['config_num'] == best_cfg_num)
+best_params = {k: best_result[k] for k in ['n_estimators', 'learning_rate', 'max_depth', 'num_leaves']}
+
+print(f"\n   Best (avg rank): Config #{best_result['config_num']}")
+print(f"   r={results_df.iloc[0]['pearson']:+.4f} cal={results_df.iloc[0]['cal_ratio']:.3f} "
+      f"DA={results_df.iloc[0]['da']:.3f} WRMSE={results_df.iloc[0]['wrmse_imp']:+.4f} "
+      f"({best_result['num_passed']}/7)")
+
+# =============================================================================
+# STEP 5: Evaluate Best Model
+# =============================================================================
+print("\n[5/6] Detailed evaluation...")
+print("="*80)
+evaluator.print_report(best_result, detailed=False)
+print("="*80)
+
+# Save reproducibility artifacts + diagnostic plot
+run_timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
+run_dir = os.path.join(os.path.dirname(__file__), "runs", run_timestamp)
+artifacts = save_run_artifacts(
+    df_eval=df_all,
+    best_result=best_result,
+    best_params=best_params,
+    run_dir=run_dir,
+    feature_cols=feature_cols,
+)
+
+# =============================================================================
+# STEP 6: Train Production Models (top 3 diverse configs)
+# =============================================================================
+print("\n[6/6] Training production models from top cohort...")
+
+# Pick top 3 by average rank for deployment diversity
+top_configs = results_df.head(3)
+trained_models = []
+
+for rank_idx, (_, row) in enumerate(top_configs.iterrows()):
+    params = {
+        'n_estimators': int(row['n_estimators']),
+        'learning_rate': row['learning_rate'],
+        'max_depth': int(row['max_depth']),
+        'num_leaves': int(row['num_leaves']),
+    }
+    
+    # Feature selection on full training data (same approach as CV)
+    k = int(row['top_k'])
+    selector = LGBMRegressor(
+        n_estimators=200, learning_rate=0.05,
+        max_depth=int(row['max_depth']), num_leaves=int(row['num_leaves']),
+        subsample=0.8, colsample_bytree=0.7, min_child_samples=50,
+        random_state=42, verbose=-1,
+    )
+    selector.fit(df_all[feature_cols], df_all['target'])
+    top_idx = np.argsort(selector.feature_importances_)[-k:]
+    selected = [feature_cols[i] for i in top_idx]
+    
+    # Train final model on selected features
+    model = LGBMRegressor(
+        **params,
+        subsample=0.8,
+        colsample_bytree=0.7,
+        min_child_samples=50,
+        reg_alpha=0.1,
+        reg_lambda=1.0,
+        random_state=42,
+        verbose=-1,
+    )
+    model.fit(df_all[selected], df_all['target'])
+    trained_models.append((int(row['config_num']), params, model, selected, row))
+    print(f"   Model {rank_idx+1}: Config #{int(row['config_num'])} "
+          f"(k={k}, r={row['pearson']:+.4f} cal={row['cal_ratio']:.3f} pts={int(row['num_passed'])})")
+
+# Use the best (rank 1) as the primary
+best_config_num, best_params, final_model, best_selected, best_row = trained_models[0]
+best_result = next(r for r in results if r['config_num'] == best_config_num)
+print(f"\n✅ Trained {len(trained_models)} models from top cohort")
+
+def predict(nonce: int = None) -> float:
+    """
+    Predict ETH/USD price 8 hours into the future.
+    
+    Args:
+        nonce: Block nonce from Allora SDK (unused)
+    
+    Returns:
+        float: Predicted BTC price in USD
+    """
+    # Get live features from workflow
+    live_row = workflow.get_live_features(ticker=TICKERS[0])
+    
+    if live_row is None or len(live_row) == 0:
+        raise ValueError("Could not get live features")
+    
+    # Engineer return features from live data (same as training)
+    live_returns = engineer_returns(live_row.iloc[0])
+    
+    # Combine base features + engineered returns
+    live_features = pd.concat([live_row[base_feature_cols].iloc[0], live_returns])
+    
+    # Get current price from live feature context (remote-only path)
+    current_price = float(live_row.attrs.get("current_price", np.nan))
+    if not np.isfinite(current_price) or current_price <= 0:
+        # Fallback to live snapshot (still remote API; no local parquet)
+        snap = workflow._dm.get_live_snapshot(TICKERS)
+        if snap is not None and len(snap) > 0 and "close" in snap.columns:
+            current_price = float(snap["close"].iloc[-1])
+
+    if not np.isfinite(current_price) or current_price <= 0:
+        raise ValueError(f"Invalid current price for inference: {current_price}")
+    
+    # Predict log return
+    predicted_log_return = final_model.predict(live_features[feature_cols].values.reshape(1, -1))[0]
+    
+    # Convert log return to price
+    predicted_price = current_price * np.exp(predicted_log_return)
+    
+    print(f"\nLive Prediction: ${predicted_price:,.2f} ({predicted_log_return:+.4f} log return)")
+    
+    return float(predicted_price)
+
+# Test and save ALL models from the cohort
+print("\n🧪 Testing and saving models...")
+
+for rank_idx, (cfg_num, params, model, selected, row) in enumerate(trained_models):
+    # Create a predict function that captures this specific model + its selected features
+    def _make_predict(m, sel):
+        def predict(nonce=None):
+            live_row = workflow.get_live_features(ticker=TICKERS[0])
+            if live_row is None or len(live_row) == 0:
+                raise ValueError("Could not get live features")
+            live_returns = engineer_returns(live_row.iloc[0])
+            live_features = pd.concat([live_row[base_feature_cols].iloc[0], live_returns])
+            current_price = float(live_row.attrs.get("current_price", np.nan))
+            if not np.isfinite(current_price) or current_price <= 0:
+                snap = workflow._dm.get_live_snapshot(TICKERS)
+                if snap is not None and len(snap) > 0 and "close" in snap.columns:
+                    current_price = float(snap["close"].iloc[-1])
+            if not np.isfinite(current_price) or current_price <= 0:
+                raise ValueError(f"Invalid current price: {current_price}")
+            predicted_log_return = m.predict(live_features[sel].values.reshape(1, -1))[0]
+            predicted_price = current_price * np.exp(predicted_log_return)
+            return float(predicted_price)
+        return predict
+    
+    predict_fn = _make_predict(model, selected)
+    pkl_name = f"predict_41_rank{rank_idx+1}.pkl"
+    
+    try:
+        price = predict_fn()
+        r_val = row.get('pearson_r_raw', row.get('pearson', 0))
+        cal = row.get('cal_ratio', 0)
+        pts = int(row.get('num_passed', 0))
+        print(f"   Model {rank_idx+1} (#{cfg_num}): ${price:,.2f} "
+              f"(r={r_val:+.4f} cal={cal:.3f} pts={pts}) → {pkl_name}")
+    except Exception as e:
+        print(f"   Model {rank_idx+1} (#{cfg_num}): FAILED ({e}) → {pkl_name}")
+    
+    with open(pkl_name, "wb") as f:
+        cloudpickle.dump(predict_fn, f)
+
+# Also save rank1 as the default predict_41.pkl
+with open("predict_41.pkl", "wb") as f:
+    cloudpickle.dump(_make_predict(trained_models[0][2], trained_models[0][3]), f)
+
+print("\n" + "="*80)
+print("COMPLETE!")
+print("="*80)
+print(f"{len(feature_cols)} features | {len(trained_models)} models saved")
+print(f"Pickles: predict_41.pkl (best), predict_41_rank1/2/3.pkl (cohort)")
+print("="*80)
+
diff --git a/notebooks/testnet/topic_41_eth_8h_price/model_czar.py b/notebooks/testnet/topic_41_eth_8h_price/model_czar.py
new file mode 100644
index 0000000..483f9b4
--- /dev/null
+++ b/notebooks/testnet/topic_41_eth_8h_price/model_czar.py
@@ -0,0 +1,339 @@
+#!/usr/bin/env python3
+"""
+Topic 41 — ETH/USD 8h Price — v3 (CZAR Loss)
+=============================================
+
+Uses CZAR loss instead of MSE/Huber. CZAR penalizes wrong-sign predictions
+heavily, softens near-zero returns, and normalizes by local volatility.
+This should help ETH where the signal is weak — CZAR won't waste capacity
+fitting noise on near-zero returns.
+
+Combined with the directional features from v2.
+"""
+
+import numpy as np
+import pandas as pd
+import os
+from datetime import datetime, timedelta, timezone
+from sklearn.model_selection import TimeSeriesSplit
+from lightgbm import LGBMRegressor
+from scipy.stats import pearsonr
+import cloudpickle
+from allora_forge_builder_kit import AlloraMLWorkflow, PerformanceEvaluator, make_czar_objective
+from allora_forge_builder_kit.utils import get_api_key
+
+# =============================================================================
+# CONFIG
+# =============================================================================
+TICKERS = ["ethusd"]
+DAYS_OF_HISTORY = 1825
+INTERVAL = "1h"
+NUMBER_OF_INPUT_BARS = 48
+TARGET_BARS = 8
+
+N_SPLITS = 3
+# Smaller grid — focus on CZAR-specific params
+N_ESTIMATORS_MAX = 600
+N_ESTIMATORS_CHECKPOINTS = [100, 300, 600]
+LEARNING_RATES = [0.01, 0.03, 0.07]
+MAX_DEPTHS = [3, 5]
+NUM_LEAVES = [15, 31]
+CZAR_ALPHAS = [0.3, 0.5, 0.7, 1.0]  # CZAR alpha param (MSE curvature)
+
+print("=" * 70)
+print("Topic 41 — ETH/USD 8h Price — v3 (CZAR Loss)")
+print("=" * 70)
+
+# =============================================================================
+# LOAD DATA
+# =============================================================================
+print("\n[1/5] Loading data...")
+api_key = get_api_key(
+    api_key_file=os.path.join(os.path.dirname(__file__), "..", "..", ".allora_api_key")
+)
+
+workflow = AlloraMLWorkflow(
+    tickers=TICKERS, number_of_input_bars=NUMBER_OF_INPUT_BARS,
+    target_bars=TARGET_BARS, interval=INTERVAL,
+    data_source="allora", api_key=api_key,
+)
+
+start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)
+workflow.backfill(start=start_date)
+df_all = workflow.get_full_feature_target_dataframe(start_date=start_date).reset_index()
+base_feature_cols = [c for c in df_all.columns if c.startswith("feature_")]
+df_all = df_all.dropna(subset=base_feature_cols + ["target"])
+print(f"✅ {len(df_all):,} samples")
+
+# =============================================================================
+# DIRECTIONAL FEATURES (same as v2)
+# =============================================================================
+print("\n[2/5] Engineering directional features...")
+
+
+def engineer_directional_features(row):
+    n = NUMBER_OF_INPUT_BARS
+    closes = np.array([row[f"feature_close_{i}"] for i in range(n)])
+    highs = np.array([row[f"feature_high_{i}"] for i in range(n)])
+    lows = np.array([row[f"feature_low_{i}"] for i in range(n)])
+    volumes = np.array([row[f"feature_volume_{i}"] for i in range(n)])
+
+    log_rets = np.diff(np.log(closes + 1e-12))
+    abs_rets = np.abs(log_rets)
+    f = {}
+
+    f["ret_1h"] = log_rets[-1] if len(log_rets) >= 1 else 0
+    f["ret_4h"] = np.sum(log_rets[-4:]) if len(log_rets) >= 4 else 0
+    f["ret_8h"] = np.sum(log_rets[-8:]) if len(log_rets) >= 8 else 0
+    f["ret_24h"] = np.sum(log_rets[-24:]) if len(log_rets) >= 24 else 0
+    f["ret_48h"] = np.sum(log_rets) if len(log_rets) >= 2 else 0
+
+    vol_8h = np.std(log_rets[-8:], ddof=1) if len(log_rets) >= 8 else 1e-6
+    vol_24h = np.std(log_rets[-24:], ddof=1) if len(log_rets) >= 24 else 1e-6
+    vol_48h = np.std(log_rets, ddof=1) if len(log_rets) >= 2 else 1e-6
+    f["vol_8h"] = vol_8h
+    f["vol_24h"] = vol_24h
+    f["vol_48h"] = vol_48h
+
+    f["znorm_ret_1h"] = log_rets[-1] / (vol_8h + 1e-12) if len(log_rets) >= 1 else 0
+    f["znorm_ret_4h"] = np.sum(log_rets[-4:]) / (vol_8h * 2 + 1e-12) if len(log_rets) >= 4 else 0
+    f["znorm_ret_8h"] = np.sum(log_rets[-8:]) / (vol_8h * np.sqrt(8) + 1e-12) if len(log_rets) >= 8 else 0
+
+    if len(log_rets) >= 24:
+        up_rets = log_rets[-24:][log_rets[-24:] > 0]
+        dn_rets = log_rets[-24:][log_rets[-24:] < 0]
+        up_vol = np.std(up_rets, ddof=1) if len(up_rets) > 1 else 1e-6
+        dn_vol = np.std(np.abs(dn_rets), ddof=1) if len(dn_rets) > 1 else 1e-6
+        f["vol_skew_24h"] = (up_vol - dn_vol) / (up_vol + dn_vol + 1e-12)
+        f["up_fraction_24h"] = np.mean(log_rets[-24:] > 0)
+    else:
+        f["vol_skew_24h"] = 0
+        f["up_fraction_24h"] = 0.5
+
+    f["up_fraction_8h"] = np.mean(log_rets[-8:] > 0) if len(log_rets) >= 8 else 0.5
+
+    if len(log_rets) >= 10:
+        f["ret_autocorr"] = np.corrcoef(log_rets[-9:], log_rets[-10:-1])[0, 1]
+        if not np.isfinite(f["ret_autocorr"]):
+            f["ret_autocorr"] = 0
+    else:
+        f["ret_autocorr"] = 0
+
+    f["vol_ratio_8_48"] = vol_8h / (vol_48h + 1e-12)
+    f["vol_expanding"] = 1.0 if vol_8h > vol_24h else 0.0
+
+    vol_trend = np.mean(volumes[-4:]) / (np.mean(volumes[-24:]) + 1e-12) if len(volumes) >= 24 else 1
+    f["vol_price_divergence"] = vol_trend * np.sign(-f["ret_4h"])
+    f["volume_ratio"] = vol_trend
+
+    if len(closes) >= 24:
+        f["zscore_24h"] = (closes[-1] - np.mean(closes[-24:])) / (np.std(closes[-24:], ddof=1) + 1e-12)
+    else:
+        f["zscore_24h"] = 0
+
+    if len(log_rets) >= 8:
+        net = abs(np.sum(log_rets[-8:]))
+        path = np.sum(abs_rets[-8:])
+        f["efficiency_8h"] = net / (path + 1e-12)
+    else:
+        f["efficiency_8h"] = 0
+
+    hl = highs - lows
+    f["hl_range_8h"] = np.mean(hl[-8:])
+    f["hl_range_ratio"] = np.mean(hl[-8:]) / (np.mean(hl) + 1e-12)
+
+    return pd.Series(f)
+
+
+engineered = df_all.apply(engineer_directional_features, axis=1)
+df_all = pd.concat([df_all, engineered], axis=1)
+feature_cols = list(engineered.columns)
+df_all = df_all.dropna(subset=feature_cols + ["target"])
+print(f"✅ {len(feature_cols)} features")
+
+# =============================================================================
+# COMPUTE ROLLING VOL FOR CZAR (needed for the loss)
+# =============================================================================
+# Use 8h rolling std of target as the vol normalization for CZAR
+targets = df_all["target"].values
+rolling_std = pd.Series(targets).rolling(8, min_periods=2).std().fillna(targets.std()).values
+df_all["_rolling_std"] = rolling_std
+
+# =============================================================================
+# GRID SEARCH WITH CZAR LOSS
+# =============================================================================
+print(f"\n[3/5] Grid search with CZAR loss...")
+tscv = TimeSeriesSplit(n_splits=N_SPLITS, gap=TARGET_BARS)
+evaluator = PerformanceEvaluator()
+results = []
+config_num = 0
+
+total = len(LEARNING_RATES) * len(MAX_DEPTHS) * len(NUM_LEAVES) * len(CZAR_ALPHAS) * len(N_ESTIMATORS_CHECKPOINTS)
+print(f"   {total} configs (CZAR alpha × LR × depth × leaves × checkpoints)")
+
+for czar_alpha in CZAR_ALPHAS:
+    for lr in LEARNING_RATES:
+        for depth in MAX_DEPTHS:
+            for leaves in NUM_LEAVES:
+                    fold_models = []
+                    for train_idx, test_idx in tscv.split(df_all):
+                        y_train = df_all.iloc[train_idx]["target"].values
+                        std_train = df_all.iloc[train_idx]["_rolling_std"].values
+
+                        # Create CZAR objective for this fold
+                        czar_obj = make_czar_objective(
+                            std=std_train, alpha=czar_alpha
+                        )
+
+                        lgb = LGBMRegressor(
+                            objective=czar_obj,
+                            n_estimators=N_ESTIMATORS_MAX,
+                            learning_rate=lr,
+                            max_depth=depth,
+                            num_leaves=leaves,
+                            subsample=0.8,
+                            colsample_bytree=0.7,
+                            min_child_samples=50,
+                            reg_alpha=0.1,
+                            reg_lambda=1.0,
+                            random_state=42,
+                            verbose=-1,
+                        )
+                        lgb.fit(
+                            df_all.iloc[train_idx][feature_cols],
+                            y_train,
+                        )
+                        fold_models.append((lgb, test_idx))
+
+                    for n_est in N_ESTIMATORS_CHECKPOINTS:
+                        config_num += 1
+                        df_all["pred"] = np.nan
+                        for lgb, test_idx in fold_models:
+                            preds = lgb.predict(
+                                df_all.iloc[test_idx][feature_cols],
+                                num_iteration=n_est,
+                            )
+                            df_all.iloc[test_idx, df_all.columns.get_loc("pred")] = preds
+
+                        valid = ~df_all["pred"].isna()
+                        y_t = df_all.loc[valid, "target"].values
+                        y_p = df_all.loc[valid, "pred"].values
+                        metrics = evaluator.evaluate(y_true=y_t, y_pred=y_p)
+                        r_val, _ = pearsonr(y_t, y_p)
+                        cal = np.std(y_p) / (np.std(y_t) + 1e-12)
+
+                        m = metrics.get("metrics", metrics)
+                        results.append({
+                            "config_num": config_num,
+                            "n_estimators": n_est,
+                            "learning_rate": lr,
+                            "max_depth": depth,
+                            "num_leaves": leaves,
+                            "czar_alpha": czar_alpha,
+                            "da": m.get("directional_accuracy", 0),
+                            "da_ci": m.get("da_ci_lower", 0),
+                            "da_pval": m.get("da_pvalue", 1),
+                            "pearson": r_val,
+                            "wrmse_imp": m.get("wrmse_improvement", 0),
+                            "czar_imp": m.get("czar_improvement", 0),
+                            "cal_ratio": cal,
+                            "num_passed": metrics["num_passed"],
+                        })
+
+                        if config_num % 10 == 0 or config_num <= 3:
+                            print(
+                                f"   [{config_num:3d}/{total}] a={czar_alpha:.1f} "
+                                f"lr={lr:.2f} d={depth} l={leaves:2d} n={n_est:3d} "
+                                f"→ {metrics['num_passed']}/7 r={r_val:+.4f} DA={m.get('directional_accuracy',0):.3f}"
+                            )
+
+# =============================================================================
+# RANK & SELECT
+# =============================================================================
+results_df = pd.DataFrame(results)
+results_df = results_df.sort_values(["num_passed", "pearson"], ascending=[False, False])
+
+print(f"\n✅ Tested {len(results)} configs")
+print(f"\n   Top 10:")
+print(f"   {'#':>3} {'a':>3} {'e':>3} {'n':>4} {'lr':>5} {'d':>2} {'l':>3} │ {'DA':>5} {'r':>7} {'WRMSE':>6} {'CZAR':>6} {'cal':>5} │ {'pts':>3}")
+print(f"   {'─'*70}")
+for _, row in results_df.head(10).iterrows():
+    print(
+        f"   {int(row['config_num']):3d} {row['czar_alpha']:.1f} "
+        f"{int(row['n_estimators']):4d} {row['learning_rate']:5.2f} "
+        f"{int(row['max_depth']):2d} {int(row['num_leaves']):3d} │ "
+        f"{row['da']:.3f} {row['pearson']:+.4f} "
+        f"{row['wrmse_imp']:+.4f} {row['czar_imp']:+.4f} {row['cal_ratio']:.3f} │ "
+        f"{int(row['num_passed']):3d}"
+    )
+
+best_cfg = int(results_df.iloc[0]["config_num"])
+best = next(r for r in results if r["config_num"] == best_cfg)
+print(f"\n   Best: #{best_cfg} r={best['pearson']:+.4f} DA={best['da']:.3f} ({best['num_passed']}/7)")
+
+# =============================================================================
+# TRAIN & SAVE TOP 3
+# =============================================================================
+print(f"\n[4/5] Training top 3...")
+top3 = results_df.head(3)
+trained = []
+for rank_idx, (_, row) in enumerate(top3.iterrows()):
+    std_all = df_all["_rolling_std"].values
+    czar_obj = make_czar_objective(
+        std=std_all, alpha=row["czar_alpha"]
+    )
+    model = LGBMRegressor(
+        objective=czar_obj,
+        n_estimators=int(row["n_estimators"]),
+        learning_rate=row["learning_rate"],
+        max_depth=int(row["max_depth"]),
+        num_leaves=int(row["num_leaves"]),
+        subsample=0.8, colsample_bytree=0.7, min_child_samples=50,
+        reg_alpha=0.1, reg_lambda=1.0, random_state=42, verbose=-1,
+    )
+    model.fit(df_all[feature_cols], df_all["target"])
+    trained.append((int(row["config_num"]), model, row))
+    print(f"   Model {rank_idx+1}: #{int(row['config_num'])} "
+          f"(a={row['czar_alpha']:.1f} r={row['pearson']:+.4f} DA={row['da']:.3f})")
+
+print(f"\n[5/5] Saving...")
+for rank_idx, (cfg, model, row) in enumerate(trained):
+    def _make_predict(m):
+        # Serialize booster to string — avoids pickling czar_loss module
+        _model_str = m.booster_.model_to_string()
+        _feature_cols = feature_cols[:]
+        _tickers = TICKERS[:]
+        _n_input = NUMBER_OF_INPUT_BARS
+        # Capture feature engineering as a standalone function
+        _eng_fn = engineer_directional_features
+        _wf = workflow
+        def predict(nonce=None):
+            import lightgbm as lgb
+            import numpy as np
+            booster = lgb.Booster(model_str=_model_str)
+            live_row = _wf.get_live_features(ticker=_tickers[0])
+            if live_row is None or len(live_row) == 0:
+                raise ValueError("No live features")
+            live_eng = _eng_fn(live_row.iloc[0])
+            current_price = float(live_row.attrs.get("current_price", float("nan")))
+            if not np.isfinite(current_price) or current_price <= 0:
+                snap = _wf._dm.get_live_snapshot(_tickers)
+                if snap is not None and len(snap) > 0:
+                    current_price = float(snap["close"].iloc[-1])
+            log_ret = booster.predict(live_eng[_feature_cols].values.reshape(1, -1))[0]
+            return float(current_price * np.exp(log_ret))
+        return predict
+
+    fn = _make_predict(model)
+    pkl = f"predict_41_czar_rank{rank_idx+1}.pkl"
+    try:
+        price = fn()
+        print(f"   Model {rank_idx+1} (#{cfg}): ${price:,.2f} → {pkl}")
+    except Exception as e:
+        print(f"   Model {rank_idx+1} (#{cfg}): FAILED ({e}) → {pkl}")
+    with open(pkl, "wb") as f:
+        cloudpickle.dump(fn, f)
+
+print("\n" + "=" * 70)
+print("COMPLETE!")
+print("=" * 70)
diff --git a/notebooks/testnet/topic_42_btc_8h_price/example.py b/notebooks/testnet/topic_42_btc_8h_price/example.py
new file mode 100644
index 0000000..7991e74
--- /dev/null
+++ b/notebooks/testnet/topic_42_btc_8h_price/example.py
@@ -0,0 +1,625 @@
+#!/usr/bin/env python3
+"""
+================================================================================
+Allora Forge Builder Kit v3.0 - Topic 42 BTC/USD Price Prediction Walkthrough
+================================================================================
+
+This walkthrough demonstrates 8-hour BTC/USD price prediction using the 
+Allora ML Workflow Kit with base features and LightGBM.
+
+Data is sourced from the Atlas data service (Tiingo 1-min candles).
+
+================================================================================
+"""
+
+import numpy as np
+import pandas as pd
+import os
+import json
+from datetime import datetime, timedelta, timezone
+from sklearn.model_selection import TimeSeriesSplit
+from lightgbm import LGBMRegressor
+import matplotlib.pyplot as plt
+import cloudpickle
+from allora_forge_builder_kit import AlloraMLWorkflow, PerformanceEvaluator
+
+# =============================================================================
+# EXPERIMENT CONFIGURATION
+# =============================================================================
+
+# Data Configuration
+TICKERS = ["btcusd"]
+DAYS_OF_HISTORY = 1825     # ~5 years
+INTERVAL = "1h"            # 1-hour bars
+
+# Feature Configuration
+NUMBER_OF_INPUT_BARS = 48   # 2 days of hourly bars (48h lookback)
+TARGET_BARS = 8             # Predict 8 bars (8 hours) ahead
+
+# Cross-Validation Configuration
+N_SPLITS = 3               # Number of CV folds
+MAX_TRAIN_SIZE = 100_000_000  # Maximum training samples per fold
+
+# Model Configuration
+N_ESTIMATORS_MAX = 800
+N_ESTIMATORS_CHECKPOINTS = [100, 300, 600]
+LEARNING_RATES = [0.01, 0.03, 0.07, 0.1]
+MAX_DEPTHS = [3, 5, 7]
+NUM_LEAVES = [15, 31]
+TOP_K_FEATURES_GRID = [5, 10, 25, 50]
+
+# =============================================================================
+# SCRIPT START
+# =============================================================================
+
+print("="*80)
+print("Allora Forge Builder Kit v3.0 - Topic 42 Walkthrough")
+print("="*80)
+
+
+def _to_serializable(obj):
+    """Convert numpy/pandas objects into JSON-serializable Python types."""
+    if isinstance(obj, (np.floating, np.integer)):
+        return obj.item()
+    if isinstance(obj, (np.bool_,)):
+        return bool(obj)
+    if isinstance(obj, np.ndarray):
+        return obj.tolist()
+    if isinstance(obj, (pd.Timestamp, datetime)):
+        return obj.isoformat()
+    if isinstance(obj, dict):
+        return {k: _to_serializable(v) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [_to_serializable(v) for v in obj]
+    return obj
+
+
+def save_run_artifacts(df_eval, best_result, best_params, run_dir, feature_cols):
+    """Persist config/metrics/predictions and basic diagnostic plots for reproducibility."""
+    os.makedirs(run_dir, exist_ok=True)
+
+    # 1) Run config
+    config = {
+        "tickers": TICKERS,
+        "days_of_history": DAYS_OF_HISTORY,
+        "interval": INTERVAL,
+        "number_of_input_bars": NUMBER_OF_INPUT_BARS,
+        "target_bars": TARGET_BARS,
+        "n_splits": N_SPLITS,
+        "max_train_size": MAX_TRAIN_SIZE,
+        "n_estimators_checkpoints": N_ESTIMATORS_CHECKPOINTS,
+        "learning_rates": LEARNING_RATES,
+        "max_depths": MAX_DEPTHS,
+        "num_leaves": NUM_LEAVES,
+        "best_params": best_params,
+        "feature_count": len(feature_cols),
+    }
+    with open(os.path.join(run_dir, "config.json"), "w") as f:
+        json.dump(_to_serializable(config), f, indent=2)
+
+    # 2) Metrics
+    metrics_payload = {
+        "score": best_result["score"],
+        "grade": best_result["grade"],
+        "num_passed": best_result["num_passed"],
+        "num_primary_metrics": best_result.get("num_primary_metrics"),
+        "thresholds": best_result.get("thresholds", {}),
+        "passed": best_result.get("passed", {}),
+        "metrics": best_result.get("metrics", {}),
+    }
+    with open(os.path.join(run_dir, "metrics.json"), "w") as f:
+        json.dump(_to_serializable(metrics_payload), f, indent=2)
+
+    # 3) Predictions table
+    export_df = df_eval.copy()
+    if "predictions" in best_result:
+        export_df["pred"] = best_result["predictions"].values
+
+    export_cols = ["open_time", "target", "pred"]
+    export_cols = [c for c in export_cols if c in export_df.columns]
+    preds_df = export_df[export_cols].dropna(subset=["pred"]).copy()
+    preds_csv_path = os.path.join(run_dir, "predictions.csv")
+    preds_df.to_csv(preds_csv_path, index=False)
+
+    # 4) Scatter plot: pred vs target
+    plt.figure(figsize=(8, 8))
+    plt.scatter(preds_df["target"], preds_df["pred"], s=8, alpha=0.35)
+    lim_min = float(min(preds_df["target"].min(), preds_df["pred"].min()))
+    lim_max = float(max(preds_df["target"].max(), preds_df["pred"].max()))
+    plt.plot([lim_min, lim_max], [lim_min, lim_max], linestyle="--", linewidth=1)
+    plt.xlabel("Target (log return)")
+    plt.ylabel("Prediction (log return)")
+    plt.title("Predictions vs Target")
+    plt.tight_layout()
+    scatter_path = os.path.join(run_dir, "scatter_pred_vs_target.png")
+    plt.savefig(scatter_path, dpi=150)
+    plt.close()
+
+    # 5) Human-readable report
+    with open(os.path.join(run_dir, "report.txt"), "w") as f:
+        f.write("Allora Topic 42 Run Report\n")
+        f.write("=" * 40 + "\n")
+        f.write(f"Score: {best_result['score']:.1%} ({best_result['num_passed']}/7)\n")
+        f.write(f"Grade: {best_result['grade']}\n")
+        f.write(f"Best params: {best_params}\n\n")
+        f.write("Primary metric pass/fail:\n")
+        for metric_name, did_pass in best_result.get("passed", {}).items():
+            f.write(f"- {metric_name}: {'PASS' if did_pass else 'FAIL'}\n")
+
+    return {
+        "run_dir": run_dir,
+        "predictions_csv": preds_csv_path,
+        "scatter_png": scatter_path,
+    }
+
+# =============================================================================
+# STEP 1: Initialize Workflow
+# =============================================================================
+print("\n[1/6] Initializing workflow...")
+
+# Resolve Allora API key (env var → file → prompt).
+# Get a free key at https://developer.allora.network
+# Alternatively, set data_source="binance" below to skip the API key entirely.
+from allora_forge_builder_kit.utils import get_api_key
+api_key = get_api_key(api_key_file=os.path.join(os.path.dirname(__file__), "..", "..", ".allora_api_key"))
+
+workflow = AlloraMLWorkflow(
+    tickers=TICKERS,
+    number_of_input_bars=NUMBER_OF_INPUT_BARS,
+    target_bars=TARGET_BARS,
+    interval=INTERVAL,
+    data_source="allora",
+    api_key=api_key
+)
+
+print(f"✅ Workflow initialized")
+print(f"   Assets: {TICKERS} | Interval: {INTERVAL}")
+print(f"   Input: {NUMBER_OF_INPUT_BARS} bars → Features: {NUMBER_OF_INPUT_BARS*5}")
+print(f"   Target: {TARGET_BARS} bars ahead")
+
+# =============================================================================
+# STEP 2: Backfill Historical Data
+# =============================================================================
+print(f"\n[2/6] Backfilling {DAYS_OF_HISTORY} days of historical data...")
+
+start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)
+try:
+    workflow.backfill(start=start_date)
+    print("✅ Backfill complete")
+except Exception as e:
+    print(f"⚠️ Backfill failed: {e}")
+    print("   Will attempt to use locally cached parquet data...")
+
+# =============================================================================
+# STEP 3: Extract Features & Engineer New Features
+# =============================================================================
+print("\n[3/6] Extracting and engineering features...")
+
+try:
+    df_all = workflow.get_full_feature_target_dataframe(start_date=start_date).reset_index()
+except Exception as e:
+    raise RuntimeError(
+        f"No data available: {e}\n\n"
+        "This usually means the backfill failed (bad/missing API key) and there is "
+        "no locally cached parquet data.\n\n"
+        "Fix options:\n"
+        "  1. Set a valid ALLORA_API_KEY (free at https://developer.allora.network)\n"
+        "  2. Use data_source='binance' in AlloraMLWorkflow() to skip the API key\n"
+    ) from e
+
+# Feature Engineering: Add log returns to base features
+# For detailed TA indicators and visualizations, see: feature_engineering_example.py
+
+def engineer_returns(row):
+    """Add return, momentum, and volatility features (no data leakage - same row only)"""
+    n = NUMBER_OF_INPUT_BARS
+    closes = np.array([row[f'feature_close_{i}'] for i in range(n)])
+    highs = np.array([row[f'feature_high_{i}'] for i in range(n)])
+    lows = np.array([row[f'feature_low_{i}'] for i in range(n)])
+    volumes = np.array([row[f'feature_volume_{i}'] for i in range(n)])
+    
+    log_rets = np.diff(np.log(closes + 1e-12))
+    features = {}
+    
+    # Log returns at multiple horizons
+    features['ret_1h'] = log_rets[-1] if len(log_rets) >= 1 else 0
+    features['ret_4h'] = np.sum(log_rets[-4:]) if len(log_rets) >= 4 else 0
+    features['ret_8h'] = np.sum(log_rets[-8:]) if len(log_rets) >= 8 else 0
+    features['ret_24h'] = np.sum(log_rets[-24:]) if len(log_rets) >= 24 else 0
+    features['ret_48h'] = np.sum(log_rets) if len(log_rets) >= 2 else 0
+    
+    # Realised volatility at multiple horizons
+    features['vol_8h'] = np.std(log_rets[-8:], ddof=1) if len(log_rets) >= 8 else 0
+    features['vol_24h'] = np.std(log_rets[-24:], ddof=1) if len(log_rets) >= 24 else 0
+    features['vol_48h'] = np.std(log_rets, ddof=1) if len(log_rets) >= 2 else 0
+    
+    # Momentum: short vs long return
+    features['momentum_ratio'] = features['ret_8h'] / (abs(features['ret_48h']) + 1e-12)
+    
+    # Mean reversion signal: distance from recent mean
+    features['mean_reversion'] = (closes[-1] - np.mean(closes[-24:])) / (np.std(closes[-24:]) + 1e-12) if n >= 24 else 0
+    
+    # High-low range (proxy for intraday vol)
+    hl_range = highs - lows
+    features['hl_range_8h'] = np.mean(hl_range[-8:])
+    features['hl_range_ratio'] = np.mean(hl_range[-8:]) / (np.mean(hl_range) + 1e-12)
+    
+    # Volume trend
+    features['volume_ratio'] = np.mean(volumes[-8:]) / (np.mean(volumes) + 1e-12)
+    
+    # Trend strength (efficiency ratio)
+    net_move = abs(np.sum(log_rets[-8:]))
+    total_path = np.sum(np.abs(log_rets[-8:]))
+    features['efficiency_8h'] = net_move / (total_path + 1e-12)
+    
+    return pd.Series(features)
+
+# Get base features
+base_feature_cols = [col for col in df_all.columns if col.startswith('feature_')]
+
+# Apply feature engineering
+print("   Engineering log return features...")
+engineered_features = df_all.apply(engineer_returns, axis=1)
+df_all = pd.concat([df_all, engineered_features], axis=1)
+
+# Use base features + engineered returns
+feature_cols = base_feature_cols + list(engineered_features.columns)
+df_all = df_all.dropna(subset=feature_cols + ['target'])
+
+print(f"✅ Dataset: {len(df_all):,} samples ({df_all['open_time'].min().date()} to {df_all['open_time'].max().date()})")
+print(f"   Features: {len(base_feature_cols)} base + {len(engineered_features.columns)} returns = {len(feature_cols)} total")
+print(f"   📚 See feature_engineering_example.py for more TA indicators")
+
+# Setup time series cross-validation
+tscv = TimeSeriesSplit(
+    n_splits=N_SPLITS, 
+    gap=TARGET_BARS, 
+    max_train_size=MAX_TRAIN_SIZE
+)
+
+print(f"✅ Walk-forward CV: {N_SPLITS} splits, {TARGET_BARS}-bar embargo")
+for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(df_all)):
+    print(f"   Fold {fold_idx+1}: Train={len(train_idx):,}, Test={len(test_idx):,}")
+
+# =============================================================================
+# STEP 4: Grid Search with Walk-Forward Cross-Validation
+# =============================================================================
+print("\n[4/6] Running grid search...")
+
+results = []
+evaluator = PerformanceEvaluator()
+config_num = 0
+
+# Track feature importance across all runs
+all_feature_importances = {}
+
+total_configs = len(LEARNING_RATES) * len(MAX_DEPTHS) * len(NUM_LEAVES) * len(TOP_K_FEATURES_GRID) * len(N_ESTIMATORS_CHECKPOINTS)
+print(f"   Grid: {len(LEARNING_RATES)} lr × {len(MAX_DEPTHS)} depth × {len(NUM_LEAVES)} leaves × {len(TOP_K_FEATURES_GRID)} topK × {len(N_ESTIMATORS_CHECKPOINTS)} checkpoints = {total_configs} configs")
+
+for lr in LEARNING_RATES:
+    for depth in MAX_DEPTHS:
+        for leaves in NUM_LEAVES:
+            
+            # Stage 1: Get feature importances ONCE per (lr, depth, leaves) combo
+            # (shared across TOP_K values to save compute)
+            fold_importances = []
+            fold_selectors = []
+            for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(df_all)):
+                X_train_full = df_all.iloc[train_idx][feature_cols]
+                y_train = df_all.iloc[train_idx]['target']
+                
+                selector = LGBMRegressor(
+                    n_estimators=200,
+                    learning_rate=0.05,
+                    max_depth=depth,
+                    num_leaves=leaves,
+                    subsample=0.8,
+                    colsample_bytree=0.7,
+                    min_child_samples=50,
+                    random_state=42,
+                    verbose=-1
+                )
+                selector.fit(X_train_full, y_train)
+                fold_importances.append(selector.feature_importances_)
+                fold_selectors.append((train_idx, test_idx, y_train))
+            
+            # Log top features (averaged across folds)
+            avg_imp = np.mean(fold_importances, axis=0)
+            for feat_idx in np.argsort(avg_imp)[-10:]:
+                fname = feature_cols[feat_idx]
+                all_feature_importances[fname] = all_feature_importances.get(fname, 0) + avg_imp[feat_idx]
+            
+            # Stage 2: For each TOP_K, select features and retrain
+            for top_k in TOP_K_FEATURES_GRID:
+                fold_models = []
+                for fold_idx, (train_idx, test_idx, y_train) in enumerate(fold_selectors):
+                    importances = fold_importances[fold_idx]
+                    top_idx = np.argsort(importances)[-top_k:]
+                    selected = [feature_cols[i] for i in top_idx]
+                    
+                    X_train_sel = df_all.iloc[train_idx][selected]
+                    lgb = LGBMRegressor(
+                        n_estimators=N_ESTIMATORS_MAX,
+                        learning_rate=lr,
+                        max_depth=depth,
+                        num_leaves=leaves,
+                        subsample=0.8,
+                        colsample_bytree=0.7,
+                        min_child_samples=50,
+                        reg_alpha=0.1,
+                        reg_lambda=1.0,
+                        random_state=42,
+                        verbose=-1
+                    )
+                    lgb.fit(X_train_sel, y_train)
+                    fold_models.append((lgb, test_idx, selected))
+                
+                # Evaluate at tree count checkpoints
+                for n_est in N_ESTIMATORS_CHECKPOINTS:
+                    config_num += 1
+                    df_all['pred'] = np.nan
+                    
+                    for lgb, test_idx, selected in fold_models:
+                        X_test = df_all.iloc[test_idx][selected]
+                        preds = lgb.predict(X_test, num_iteration=n_est)
+                        df_all.iloc[test_idx, df_all.columns.get_loc('pred')] = preds
+                
+                # Evaluate
+                valid_mask = ~df_all['pred'].isna()
+                metrics = evaluator.evaluate(
+                    y_true=df_all.loc[valid_mask, 'target'],
+                    y_pred=df_all.loc[valid_mask, 'pred']
+                )
+                
+                # Store results
+                results.append({
+                    'config_num': config_num,
+                    'n_estimators': n_est,
+                    'learning_rate': lr,
+                    'max_depth': depth,
+                    'num_leaves': leaves,
+                    'top_k': top_k,
+                    'predictions': df_all['pred'].copy(),
+                    **metrics
+                })
+                
+                # Track calibration: std(pred) / std(target) — want ~1.0
+                y_t = df_all.loc[valid_mask, 'target'].values
+                y_p = df_all.loc[valid_mask, 'pred'].values
+                cal_ratio = np.std(y_p) / (np.std(y_t) + 1e-12)
+                from scipy.stats import pearsonr as _pr
+                r_val, _ = _pr(y_t, y_p)
+                results[-1]['cal_ratio'] = cal_ratio
+                results[-1]['pearson_r_raw'] = r_val
+                
+                if config_num % 10 == 0 or config_num <= 3:
+                    print(f"   [{config_num:3d}/{total_configs}] n={n_est:3d} lr={lr:.2f} d={depth} l={leaves:2d} k={top_k:2d} "
+                          f"→ {metrics['num_passed']}/7 r={r_val:+.4f} cal={cal_ratio:.3f}")
+
+# Analyze results — rank by AVERAGE RANK across ALL 7 core metrics + calibration
+results_df = pd.DataFrame([{k: v for k, v in r.items() if k != 'predictions'} for r in results])
+
+# Extract the 7 core metrics from the nested 'metrics' dict
+for r_idx, r in enumerate(results):
+    m = r.get('metrics', r)
+    results_df.loc[r_idx, 'da'] = m.get('directional_accuracy', 0)
+    results_df.loc[r_idx, 'da_ci'] = m.get('da_ci_lower', 0)
+    results_df.loc[r_idx, 'da_pval'] = m.get('da_pvalue', 1)
+    results_df.loc[r_idx, 'pearson'] = m.get('pearson_r', 0)
+    results_df.loc[r_idx, 'pearson_pval'] = m.get('pearson_pvalue', 1)
+    results_df.loc[r_idx, 'wrmse_imp'] = m.get('wrmse_improvement', 0)
+    results_df.loc[r_idx, 'czar_imp'] = m.get('czar_improvement', 0)
+
+# Compute ranks for each metric (lower rank = better)
+rank_cols = {}
+rank_cols['rk_da'] = results_df['da'].rank(ascending=False)
+rank_cols['rk_da_ci'] = results_df['da_ci'].rank(ascending=False)
+rank_cols['rk_da_pval'] = results_df['da_pval'].rank(ascending=True)       # lower p = better
+rank_cols['rk_pearson'] = results_df['pearson'].rank(ascending=False)
+rank_cols['rk_pear_pval'] = results_df['pearson_pval'].rank(ascending=True) # lower p = better
+rank_cols['rk_wrmse'] = results_df['wrmse_imp'].rank(ascending=False)
+rank_cols['rk_czar'] = results_df['czar_imp'].rank(ascending=False)
+rank_cols['rk_cal'] = (1 - results_df['cal_ratio']).abs().rank(ascending=True)  # closer to 1.0 = better
+
+for col, vals in rank_cols.items():
+    results_df[col] = vals
+
+# Average rank of core 7 metrics (primary) and calibration (secondary)
+core_rank_cols = [c for c in rank_cols if c != 'rk_cal']
+results_df['core_avg_rank'] = results_df[core_rank_cols].mean(axis=1)
+results_df['avg_rank'] = results_df[list(rank_cols.keys())].mean(axis=1)
+
+# Sort: num_passed DESC (most important), then core_avg_rank ASC (tiebreaker)
+results_df = results_df.sort_values(['num_passed', 'core_avg_rank'], ascending=[False, True])
+
+print(f"\n✅ Tested {len(results)} configurations")
+
+# Feature importance report
+print(f"\n   Top 20 most important features (aggregated across all configs):")
+sorted_feats = sorted(all_feature_importances.items(), key=lambda x: x[1], reverse=True)[:20]
+for i, (fname, imp) in enumerate(sorted_feats):
+    print(f"   {i+1:2d}. {fname:<35s} {imp:>10.1f}")
+
+print(f"\n   Top 10 models (ranked by avg rank across 7 metrics + calibration):")
+print(f"   {'#':>3} {'n':>4} {'lr':>5} {'d':>2} {'l':>3} {'k':>3} │ {'DA':>5} {'CI':>5} {'pval':>5} {'r':>6} {'WRMSE':>6} {'CZAR':>6} {'cal':>5} │ {'pts':>3} {'rk':>5}")
+print(f"   {'─'*85}")
+for _, row in results_df.head(10).iterrows():
+    print(f"   {int(row['config_num']):3d} {int(row['n_estimators']):4d} {row['learning_rate']:5.2f} "
+          f"{int(row['max_depth']):2d} {int(row['num_leaves']):3d} {int(row['top_k']):3d} │ "
+          f"{row['da']:.3f} {row['da_ci']:.3f} {row['da_pval']:.3f} {row['pearson']:+.4f} "
+          f"{row['wrmse_imp']:+.4f} {row['czar_imp']:+.4f} {row['cal_ratio']:.3f} │ "
+          f"{int(row['num_passed']):3d} {row['avg_rank']:5.1f}")
+
+# Select best by average rank — look up by config_num in the results list
+best_cfg_num = int(results_df.iloc[0]['config_num'])
+best_result = next(r for r in results if r['config_num'] == best_cfg_num)
+best_params = {k: best_result[k] for k in ['n_estimators', 'learning_rate', 'max_depth', 'num_leaves']}
+
+print(f"\n   Best (avg rank): Config #{best_result['config_num']}")
+print(f"   r={results_df.iloc[0]['pearson']:+.4f} cal={results_df.iloc[0]['cal_ratio']:.3f} "
+      f"DA={results_df.iloc[0]['da']:.3f} WRMSE={results_df.iloc[0]['wrmse_imp']:+.4f} "
+      f"({best_result['num_passed']}/7)")
+
+# =============================================================================
+# STEP 5: Evaluate Best Model
+# =============================================================================
+print("\n[5/6] Detailed evaluation...")
+print("="*80)
+evaluator.print_report(best_result, detailed=False)
+print("="*80)
+
+# Save reproducibility artifacts + diagnostic plot
+run_timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
+run_dir = os.path.join(os.path.dirname(__file__), "runs", run_timestamp)
+artifacts = save_run_artifacts(
+    df_eval=df_all,
+    best_result=best_result,
+    best_params=best_params,
+    run_dir=run_dir,
+    feature_cols=feature_cols,
+)
+
+# =============================================================================
+# STEP 6: Train Production Models (top 3 diverse configs)
+# =============================================================================
+print("\n[6/6] Training production models from top cohort...")
+
+# Pick top 3 by average rank for deployment diversity
+top_configs = results_df.head(3)
+trained_models = []
+
+for rank_idx, (_, row) in enumerate(top_configs.iterrows()):
+    params = {
+        'n_estimators': int(row['n_estimators']),
+        'learning_rate': row['learning_rate'],
+        'max_depth': int(row['max_depth']),
+        'num_leaves': int(row['num_leaves']),
+    }
+    
+    # Feature selection on full training data (same approach as CV)
+    k = int(row['top_k'])
+    selector = LGBMRegressor(
+        n_estimators=200, learning_rate=0.05,
+        max_depth=int(row['max_depth']), num_leaves=int(row['num_leaves']),
+        subsample=0.8, colsample_bytree=0.7, min_child_samples=50,
+        random_state=42, verbose=-1,
+    )
+    selector.fit(df_all[feature_cols], df_all['target'])
+    top_idx = np.argsort(selector.feature_importances_)[-k:]
+    selected = [feature_cols[i] for i in top_idx]
+    
+    # Train final model on selected features
+    model = LGBMRegressor(
+        **params,
+        subsample=0.8,
+        colsample_bytree=0.7,
+        min_child_samples=50,
+        reg_alpha=0.1,
+        reg_lambda=1.0,
+        random_state=42,
+        verbose=-1,
+    )
+    model.fit(df_all[selected], df_all['target'])
+    trained_models.append((int(row['config_num']), params, model, selected, row))
+    print(f"   Model {rank_idx+1}: Config #{int(row['config_num'])} "
+          f"(k={k}, r={row['pearson']:+.4f} cal={row['cal_ratio']:.3f} pts={int(row['num_passed'])})")
+
+# Use the best (rank 1) as the primary
+best_config_num, best_params, final_model, best_selected, best_row = trained_models[0]
+best_result = next(r for r in results if r['config_num'] == best_config_num)
+print(f"\n✅ Trained {len(trained_models)} models from top cohort")
+
+def predict(nonce: int = None) -> float:
+    """
+    Predict BTC/USD price 8 hours into the future.
+    
+    Args:
+        nonce: Block nonce from Allora SDK (unused)
+    
+    Returns:
+        float: Predicted BTC price in USD
+    """
+    # Get live features from workflow
+    live_row = workflow.get_live_features(ticker=TICKERS[0])
+    
+    if live_row is None or len(live_row) == 0:
+        raise ValueError("Could not get live features")
+    
+    # Engineer return features from live data (same as training)
+    live_returns = engineer_returns(live_row.iloc[0])
+    
+    # Combine base features + engineered returns
+    live_features = pd.concat([live_row[base_feature_cols].iloc[0], live_returns])
+    
+    # Get current price from live feature context (remote-only path)
+    current_price = float(live_row.attrs.get("current_price", np.nan))
+    if not np.isfinite(current_price) or current_price <= 0:
+        # Fallback to live snapshot (still remote API; no local parquet)
+        snap = workflow._dm.get_live_snapshot(TICKERS)
+        if snap is not None and len(snap) > 0 and "close" in snap.columns:
+            current_price = float(snap["close"].iloc[-1])
+
+    if not np.isfinite(current_price) or current_price <= 0:
+        raise ValueError(f"Invalid current price for inference: {current_price}")
+    
+    # Predict log return
+    predicted_log_return = final_model.predict(live_features[feature_cols].values.reshape(1, -1))[0]
+    
+    # Convert log return to price
+    predicted_price = current_price * np.exp(predicted_log_return)
+    
+    print(f"\nLive Prediction: ${predicted_price:,.2f} ({predicted_log_return:+.4f} log return)")
+    
+    return float(predicted_price)
+
+# Test and save ALL models from the cohort
+print("\n🧪 Testing and saving models...")
+
+for rank_idx, (cfg_num, params, model, selected, row) in enumerate(trained_models):
+    # Create a predict function that captures this specific model + its selected features
+    def _make_predict(m, sel):
+        def predict(nonce=None):
+            live_row = workflow.get_live_features(ticker=TICKERS[0])
+            if live_row is None or len(live_row) == 0:
+                raise ValueError("Could not get live features")
+            live_returns = engineer_returns(live_row.iloc[0])
+            live_features = pd.concat([live_row[base_feature_cols].iloc[0], live_returns])
+            current_price = float(live_row.attrs.get("current_price", np.nan))
+            if not np.isfinite(current_price) or current_price <= 0:
+                snap = workflow._dm.get_live_snapshot(TICKERS)
+                if snap is not None and len(snap) > 0 and "close" in snap.columns:
+                    current_price = float(snap["close"].iloc[-1])
+            if not np.isfinite(current_price) or current_price <= 0:
+                raise ValueError(f"Invalid current price: {current_price}")
+            predicted_log_return = m.predict(live_features[sel].values.reshape(1, -1))[0]
+            predicted_price = current_price * np.exp(predicted_log_return)
+            return float(predicted_price)
+        return predict
+    
+    predict_fn = _make_predict(model, selected)
+    pkl_name = f"predict_42_rank{rank_idx+1}.pkl"
+    
+    try:
+        price = predict_fn()
+        r_val = row.get('pearson_r_raw', row.get('pearson', 0))
+        cal = row.get('cal_ratio', 0)
+        pts = int(row.get('num_passed', 0))
+        print(f"   Model {rank_idx+1} (#{cfg_num}): ${price:,.2f} "
+              f"(r={r_val:+.4f} cal={cal:.3f} pts={pts}) → {pkl_name}")
+    except Exception as e:
+        print(f"   Model {rank_idx+1} (#{cfg_num}): FAILED ({e}) → {pkl_name}")
+    
+    with open(pkl_name, "wb") as f:
+        cloudpickle.dump(predict_fn, f)
+
+# Also save rank1 as the default predict_42.pkl
+with open("predict_42.pkl", "wb") as f:
+    cloudpickle.dump(_make_predict(trained_models[0][2], trained_models[0][3]), f)
+
+print("\n" + "="*80)
+print("COMPLETE!")
+print("="*80)
+print(f"{len(feature_cols)} features | {len(trained_models)} models saved")
+print(f"Pickles: predict_42.pkl (best), predict_42_rank1/2/3.pkl (cohort)")
+print("="*80)
+
diff --git a/notebooks/testnet/topic_42_btc_8h_price/model_v2_directional.py b/notebooks/testnet/topic_42_btc_8h_price/model_v2_directional.py
new file mode 100644
index 0000000..074c7a6
--- /dev/null
+++ b/notebooks/testnet/topic_42_btc_8h_price/model_v2_directional.py
@@ -0,0 +1,370 @@
+#!/usr/bin/env python3
+"""
+Topic 42 — BTC/USD 8h Price — v2 (Directional Signals Conditioned on Vol)
+=========================================================================
+
+Key insight: Vol features alone predict magnitude, not direction.
+BTC is efficient — we need directional signals conditioned on vol regime.
+
+Features:
+- Vol-normalized momentum (signed returns / vol)
+- Upside vs downside vol asymmetry (skew)
+- Return autocorrelation (trending vs mean-reverting regime)
+- Vol regime indicator (high/low vol → different dynamics)
+- Volume-price divergence (volume up + price down = bearish)
+"""
+
+import numpy as np
+import pandas as pd
+import os
+from datetime import datetime, timedelta, timezone
+from sklearn.model_selection import TimeSeriesSplit
+from lightgbm import LGBMRegressor
+from scipy.stats import pearsonr
+import cloudpickle
+from allora_forge_builder_kit import AlloraMLWorkflow, PerformanceEvaluator
+from allora_forge_builder_kit.utils import get_api_key
+
+# =============================================================================
+# CONFIG
+# =============================================================================
+TICKERS = ["btcusd"]
+DAYS_OF_HISTORY = 1825
+INTERVAL = "1h"
+NUMBER_OF_INPUT_BARS = 48
+TARGET_BARS = 8
+H = TARGET_BARS
+
+N_SPLITS = 3
+N_ESTIMATORS_MAX = 800
+N_ESTIMATORS_CHECKPOINTS = [100, 300, 600]
+LEARNING_RATES = [0.01, 0.03, 0.07, 0.1]
+MAX_DEPTHS = [3, 5, 7]
+NUM_LEAVES = [15, 31]
+TOP_K_FEATURES_GRID = [5, 10, 25, 50]
+
+print("=" * 70)
+print("Topic 42 — BTC/USD 8h Price — v2 (Directional + Vol-Conditioned)")
+print("=" * 70)
+
+# =============================================================================
+# LOAD DATA
+# =============================================================================
+print("\n[1/6] Loading data...")
+api_key = get_api_key(
+    api_key_file=os.path.join(os.path.dirname(__file__), "..", "..", ".allora_api_key")
+)
+
+workflow = AlloraMLWorkflow(
+    tickers=TICKERS, number_of_input_bars=NUMBER_OF_INPUT_BARS,
+    target_bars=TARGET_BARS, interval=INTERVAL,
+    data_source="allora", api_key=api_key,
+)
+
+start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)
+workflow.backfill(start=start_date)
+df_all = workflow.get_full_feature_target_dataframe(start_date=start_date).reset_index()
+base_feature_cols = [c for c in df_all.columns if c.startswith("feature_")]
+df_all = df_all.dropna(subset=base_feature_cols + ["target"])
+print(f"✅ {len(df_all):,} samples")
+
+# =============================================================================
+# DIRECTIONAL FEATURE ENGINEERING
+# =============================================================================
+print("\n[2/6] Engineering directional features...")
+
+
+def engineer_directional_features(row):
+    """Directional signals conditioned on volatility regime."""
+    n = NUMBER_OF_INPUT_BARS
+    closes = np.array([row[f"feature_close_{i}"] for i in range(n)])
+    highs = np.array([row[f"feature_high_{i}"] for i in range(n)])
+    lows = np.array([row[f"feature_low_{i}"] for i in range(n)])
+    volumes = np.array([row[f"feature_volume_{i}"] for i in range(n)])
+
+    log_rets = np.diff(np.log(closes + 1e-12))
+    abs_rets = np.abs(log_rets)
+    f = {}
+
+    # === RAW RETURNS (for baseline) ===
+    f["ret_1h"] = log_rets[-1] if len(log_rets) >= 1 else 0
+    f["ret_4h"] = np.sum(log_rets[-4:]) if len(log_rets) >= 4 else 0
+    f["ret_8h"] = np.sum(log_rets[-8:]) if len(log_rets) >= 8 else 0
+    f["ret_24h"] = np.sum(log_rets[-24:]) if len(log_rets) >= 24 else 0
+    f["ret_48h"] = np.sum(log_rets) if len(log_rets) >= 2 else 0
+
+    # === VOLATILITY (for conditioning) ===
+    vol_8h = np.std(log_rets[-8:], ddof=1) if len(log_rets) >= 8 else 1e-6
+    vol_24h = np.std(log_rets[-24:], ddof=1) if len(log_rets) >= 24 else 1e-6
+    vol_48h = np.std(log_rets, ddof=1) if len(log_rets) >= 2 else 1e-6
+    f["vol_8h"] = vol_8h
+    f["vol_24h"] = vol_24h
+    f["vol_48h"] = vol_48h
+
+    # === VOL-NORMALIZED MOMENTUM (the key directional signal) ===
+    # "How many sigmas has price moved?" — direction + magnitude in vol context
+    f["znorm_ret_1h"] = log_rets[-1] / (vol_8h + 1e-12) if len(log_rets) >= 1 else 0
+    f["znorm_ret_4h"] = np.sum(log_rets[-4:]) / (vol_8h * 2 + 1e-12) if len(log_rets) >= 4 else 0
+    f["znorm_ret_8h"] = np.sum(log_rets[-8:]) / (vol_8h * np.sqrt(8) + 1e-12) if len(log_rets) >= 8 else 0
+    f["znorm_ret_24h"] = np.sum(log_rets[-24:]) / (vol_24h * np.sqrt(24) + 1e-12) if len(log_rets) >= 24 else 0
+
+    # === UPSIDE vs DOWNSIDE VOL (skew — directional asymmetry) ===
+    if len(log_rets) >= 24:
+        up_rets = log_rets[-24:][log_rets[-24:] > 0]
+        dn_rets = log_rets[-24:][log_rets[-24:] < 0]
+        up_vol = np.std(up_rets, ddof=1) if len(up_rets) > 1 else 1e-6
+        dn_vol = np.std(np.abs(dn_rets), ddof=1) if len(dn_rets) > 1 else 1e-6
+        f["vol_skew_24h"] = (up_vol - dn_vol) / (up_vol + dn_vol + 1e-12)
+        # Fraction of positive returns (directional bias)
+        f["up_fraction_24h"] = np.mean(log_rets[-24:] > 0)
+    else:
+        f["vol_skew_24h"] = 0
+        f["up_fraction_24h"] = 0.5
+
+    if len(log_rets) >= 8:
+        f["up_fraction_8h"] = np.mean(log_rets[-8:] > 0)
+    else:
+        f["up_fraction_8h"] = 0.5
+
+    # === RETURN AUTOCORRELATION (trending vs mean-reverting) ===
+    if len(log_rets) >= 10:
+        f["ret_autocorr"] = np.corrcoef(log_rets[-9:], log_rets[-10:-1])[0, 1]
+        if not np.isfinite(f["ret_autocorr"]):
+            f["ret_autocorr"] = 0
+    else:
+        f["ret_autocorr"] = 0
+
+    # Signed autocorrelation of absolute returns (vol clustering direction)
+    if len(abs_rets) >= 10:
+        f["absret_autocorr"] = np.corrcoef(abs_rets[-9:], abs_rets[-10:-1])[0, 1]
+        if not np.isfinite(f["absret_autocorr"]):
+            f["absret_autocorr"] = 0
+    else:
+        f["absret_autocorr"] = 0
+
+    # === VOL REGIME (high vol vs low vol — different dynamics) ===
+    f["vol_ratio_8_48"] = vol_8h / (vol_48h + 1e-12)
+    f["vol_expanding"] = 1.0 if vol_8h > vol_24h else 0.0
+
+    # === VOLUME-PRICE DIVERGENCE ===
+    # Volume up + price down = bearish divergence
+    vol_trend = np.mean(volumes[-4:]) / (np.mean(volumes[-24:]) + 1e-12) if len(volumes) >= 24 else 1
+    price_trend = f["ret_4h"]
+    f["vol_price_divergence"] = vol_trend * np.sign(-price_trend)  # positive = bearish divergence
+    f["volume_ratio"] = vol_trend
+
+    # === MEAN REVERSION SIGNAL ===
+    if len(closes) >= 24:
+        f["zscore_24h"] = (closes[-1] - np.mean(closes[-24:])) / (np.std(closes[-24:], ddof=1) + 1e-12)
+    else:
+        f["zscore_24h"] = 0
+
+    # === EFFICIENCY RATIO (trending vs choppy) ===
+    if len(log_rets) >= 8:
+        net = abs(np.sum(log_rets[-8:]))
+        path = np.sum(abs_rets[-8:])
+        f["efficiency_8h"] = net / (path + 1e-12)
+    else:
+        f["efficiency_8h"] = 0
+
+    # === HIGH-LOW RANGE ===
+    hl = highs - lows
+    f["hl_range_8h"] = np.mean(hl[-8:])
+    f["hl_range_ratio"] = np.mean(hl[-8:]) / (np.mean(hl) + 1e-12)
+
+    return pd.Series(f)
+
+
+engineered = df_all.apply(engineer_directional_features, axis=1)
+df_all = pd.concat([df_all, engineered], axis=1)
+
+# Use ONLY engineered features — no raw OHLCV base features
+feature_cols = list(engineered.columns)
+df_all = df_all.dropna(subset=feature_cols + ["target"])
+print(f"✅ {len(feature_cols)} directional features (no raw OHLCV)")
+
+# =============================================================================
+# GRID SEARCH
+# =============================================================================
+print(f"\n[3/6] Grid search...")
+tscv = TimeSeriesSplit(n_splits=N_SPLITS, gap=TARGET_BARS)
+evaluator = PerformanceEvaluator()
+results = []
+config_num = 0
+all_feature_importances = {}
+
+total_configs = len(LEARNING_RATES) * len(MAX_DEPTHS) * len(NUM_LEAVES) * len(TOP_K_FEATURES_GRID) * len(N_ESTIMATORS_CHECKPOINTS)
+print(f"   {total_configs} configs")
+
+for lr in LEARNING_RATES:
+    for depth in MAX_DEPTHS:
+        for leaves in NUM_LEAVES:
+            fold_importances = []
+            fold_selectors = []
+            for train_idx, test_idx in tscv.split(df_all):
+                selector = LGBMRegressor(
+                    n_estimators=200, learning_rate=0.05,
+                    max_depth=depth, num_leaves=leaves,
+                    subsample=0.8, colsample_bytree=0.7, min_child_samples=50,
+                    random_state=42, verbose=-1,
+                )
+                selector.fit(df_all.iloc[train_idx][feature_cols], df_all.iloc[train_idx]["target"])
+                fold_importances.append(selector.feature_importances_)
+                fold_selectors.append((train_idx, test_idx, df_all.iloc[train_idx]["target"]))
+
+            avg_imp = np.mean(fold_importances, axis=0)
+            for fi in np.argsort(avg_imp)[-10:]:
+                fn = feature_cols[fi]
+                all_feature_importances[fn] = all_feature_importances.get(fn, 0) + avg_imp[fi]
+
+            for top_k in TOP_K_FEATURES_GRID:
+                fold_models = []
+                for fold_idx, (train_idx, test_idx, y_train) in enumerate(fold_selectors):
+                    top_idx = np.argsort(fold_importances[fold_idx])[-top_k:]
+                    selected = [feature_cols[i] for i in top_idx]
+                    lgb = LGBMRegressor(
+                        n_estimators=N_ESTIMATORS_MAX, learning_rate=lr,
+                        max_depth=depth, num_leaves=leaves,
+                        subsample=0.8, colsample_bytree=0.7, min_child_samples=50,
+                        reg_alpha=0.1, reg_lambda=1.0, random_state=42, verbose=-1,
+                    )
+                    lgb.fit(df_all.iloc[train_idx][selected], y_train)
+                    fold_models.append((lgb, test_idx, selected))
+
+                for n_est in N_ESTIMATORS_CHECKPOINTS:
+                    config_num += 1
+                    df_all["pred"] = np.nan
+                    for lgb, test_idx, selected in fold_models:
+                        preds = lgb.predict(df_all.iloc[test_idx][selected], num_iteration=n_est)
+                        df_all.iloc[test_idx, df_all.columns.get_loc("pred")] = preds
+
+                    valid = ~df_all["pred"].isna()
+                    y_t = df_all.loc[valid, "target"].values
+                    y_p = df_all.loc[valid, "pred"].values
+                    metrics = evaluator.evaluate(y_true=y_t, y_pred=y_p)
+                    r_val, _ = pearsonr(y_t, y_p)
+                    cal = np.std(y_p) / (np.std(y_t) + 1e-12)
+
+                    m = metrics.get("metrics", metrics)
+                    results.append({
+                        "config_num": config_num, "n_estimators": n_est,
+                        "learning_rate": lr, "max_depth": depth,
+                        "num_leaves": leaves, "top_k": top_k,
+                        "da": m.get("directional_accuracy", 0),
+                        "da_ci": m.get("da_ci_lower", 0),
+                        "da_pval": m.get("da_pvalue", 1),
+                        "pearson": r_val,
+                        "pearson_pval": m.get("pearson_pvalue", 1),
+                        "wrmse_imp": m.get("wrmse_improvement", 0),
+                        "czar_imp": m.get("czar_improvement", 0),
+                        "cal_ratio": cal,
+                        "num_passed": metrics["num_passed"],
+                    })
+
+                    if config_num % 20 == 0 or config_num <= 3:
+                        print(f"   [{config_num:3d}/{total_configs}] n={n_est:3d} lr={lr:.2f} d={depth} l={leaves:2d} k={top_k:2d} "
+                              f"→ {metrics['num_passed']}/7 r={r_val:+.4f} cal={cal:.3f}")
+
+# =============================================================================
+# RANK & SELECT
+# =============================================================================
+results_df = pd.DataFrame(results)
+
+rank_cols = {}
+rank_cols["rk_da"] = results_df["da"].rank(ascending=False)
+rank_cols["rk_da_ci"] = results_df["da_ci"].rank(ascending=False)
+rank_cols["rk_da_pval"] = results_df["da_pval"].rank(ascending=True)
+rank_cols["rk_pearson"] = results_df["pearson"].rank(ascending=False)
+rank_cols["rk_pear_pval"] = results_df["pearson_pval"].rank(ascending=True)
+rank_cols["rk_wrmse"] = results_df["wrmse_imp"].rank(ascending=False)
+rank_cols["rk_czar"] = results_df["czar_imp"].rank(ascending=False)
+rank_cols["rk_cal"] = (1 - results_df["cal_ratio"]).abs().rank(ascending=True)
+
+for col, vals in rank_cols.items():
+    results_df[col] = vals
+
+core_rank_cols = [c for c in rank_cols if c != "rk_cal"]
+results_df["core_avg_rank"] = results_df[core_rank_cols].mean(axis=1)
+results_df = results_df.sort_values(["num_passed", "core_avg_rank"], ascending=[False, True])
+
+print(f"\n✅ Tested {len(results)} configs")
+
+print(f"\n   Top 20 features:")
+for i, (fn, imp) in enumerate(sorted(all_feature_importances.items(), key=lambda x: x[1], reverse=True)[:20]):
+    print(f"   {i+1:2d}. {fn:<30s} {imp:>10.1f}")
+
+print(f"\n   Top 10 models:")
+print(f"   {'#':>3} {'n':>4} {'lr':>5} {'d':>2} {'l':>3} {'k':>3} │ {'DA':>5} {'CI':>5} {'pval':>5} {'r':>6} {'WRMSE':>6} {'CZAR':>6} {'cal':>5} │ {'pts':>3}")
+print(f"   {'─'*80}")
+for _, row in results_df.head(10).iterrows():
+    print(f"   {int(row['config_num']):3d} {int(row['n_estimators']):4d} {row['learning_rate']:5.2f} "
+          f"{int(row['max_depth']):2d} {int(row['num_leaves']):3d} {int(row['top_k']):3d} │ "
+          f"{row['da']:.3f} {row['da_ci']:.3f} {row['da_pval']:.3f} {row['pearson']:+.4f} "
+          f"{row['wrmse_imp']:+.4f} {row['czar_imp']:+.4f} {row['cal_ratio']:.3f} │ "
+          f"{int(row['num_passed']):3d}")
+
+best_cfg = int(results_df.iloc[0]["config_num"])
+best = next(r for r in results if r["config_num"] == best_cfg)
+print(f"\n   Best: #{best_cfg} r={best['pearson']:+.4f} DA={best['da']:.3f} ({best['num_passed']}/7)")
+
+# =============================================================================
+# TRAIN & DEPLOY TOP 3
+# =============================================================================
+print(f"\n[4/6] Training top 3...")
+top3 = results_df.head(3)
+trained = []
+for rank_idx, (_, row) in enumerate(top3.iterrows()):
+    k = int(row["top_k"])
+    sel_model = LGBMRegressor(n_estimators=200, learning_rate=0.05,
+        max_depth=int(row["max_depth"]), num_leaves=int(row["num_leaves"]),
+        subsample=0.8, colsample_bytree=0.7, min_child_samples=50,
+        random_state=42, verbose=-1)
+    sel_model.fit(df_all[feature_cols], df_all["target"])
+    top_idx = np.argsort(sel_model.feature_importances_)[-k:]
+    selected = [feature_cols[i] for i in top_idx]
+
+    model = LGBMRegressor(
+        n_estimators=int(row["n_estimators"]), learning_rate=row["learning_rate"],
+        max_depth=int(row["max_depth"]), num_leaves=int(row["num_leaves"]),
+        subsample=0.8, colsample_bytree=0.7, min_child_samples=50,
+        reg_alpha=0.1, reg_lambda=1.0, random_state=42, verbose=-1)
+    model.fit(df_all[selected], df_all["target"])
+    trained.append((int(row["config_num"]), model, selected, row))
+    print(f"   Model {rank_idx+1}: #{int(row['config_num'])} (k={k}, r={row['pearson']:+.4f} DA={row['da']:.3f})")
+
+# =============================================================================
+# SAVE PICKLES
+# =============================================================================
+print(f"\n[5/6] Testing & saving...")
+for rank_idx, (cfg, model, selected, row) in enumerate(trained):
+    def _make_predict(m, sel):
+        def predict(nonce=None):
+            live_row = workflow.get_live_features(ticker=TICKERS[0])
+            if live_row is None or len(live_row) == 0:
+                raise ValueError("No live features")
+            live_eng = engineer_directional_features(live_row.iloc[0])
+            current_price = float(live_row.attrs.get("current_price", np.nan))
+            if not np.isfinite(current_price) or current_price <= 0:
+                snap = workflow._dm.get_live_snapshot(TICKERS)
+                if snap is not None and len(snap) > 0:
+                    current_price = float(snap["close"].iloc[-1])
+            log_ret = m.predict(live_eng[sel].values.reshape(1, -1))[0]
+            return float(current_price * np.exp(log_ret))
+        return predict
+
+    fn = _make_predict(model, selected)
+    pkl = f"predict_42_rank{rank_idx+1}.pkl"
+    try:
+        price = fn()
+        print(f"   Model {rank_idx+1} (#{cfg}): ${price:,.2f} → {pkl}")
+    except Exception as e:
+        print(f"   Model {rank_idx+1} (#{cfg}): FAILED ({e}) → {pkl}")
+    with open(pkl, "wb") as f:
+        cloudpickle.dump(fn, f)
+
+with open("predict_42.pkl", "wb") as fout:
+    cloudpickle.dump(_make_predict(trained[0][1], trained[0][2]), fout)
+
+print("\n" + "=" * 70)
+print("COMPLETE!")
+print("=" * 70)
diff --git a/notebooks/testnet/topic_42_btc_8h_price/model_v3_czar.py b/notebooks/testnet/topic_42_btc_8h_price/model_v3_czar.py
new file mode 100644
index 0000000..c0db20b
--- /dev/null
+++ b/notebooks/testnet/topic_42_btc_8h_price/model_v3_czar.py
@@ -0,0 +1,338 @@
+#!/usr/bin/env python3
+"""
+Topic 42 — BTC/USD 8h Price — v3 (CZAR Loss)
+=============================================
+
+Uses CZAR loss instead of MSE/Huber. CZAR penalizes wrong-sign predictions
+heavily, softens near-zero returns, and normalizes by local volatility.
+This should help BTC where the signal is weak — CZAR won't waste capacity
+fitting noise on near-zero returns.
+
+Combined with the directional features from v2.
+"""
+
+import numpy as np
+import pandas as pd
+import os
+from datetime import datetime, timedelta, timezone
+from sklearn.model_selection import TimeSeriesSplit
+from lightgbm import LGBMRegressor
+from scipy.stats import pearsonr
+import cloudpickle
+from allora_forge_builder_kit import AlloraMLWorkflow, PerformanceEvaluator, make_czar_objective
+from allora_forge_builder_kit.utils import get_api_key
+
+# =============================================================================
+# CONFIG
+# =============================================================================
+TICKERS = ["btcusd"]
+DAYS_OF_HISTORY = 1825
+INTERVAL = "1h"
+NUMBER_OF_INPUT_BARS = 48
+TARGET_BARS = 8
+
+N_SPLITS = 3
+# Smaller grid — focus on CZAR-specific params
+N_ESTIMATORS_MAX = 600
+N_ESTIMATORS_CHECKPOINTS = [100, 300, 600]
+LEARNING_RATES = [0.01, 0.03, 0.07]
+MAX_DEPTHS = [3, 5]
+NUM_LEAVES = [15, 31]
+CZAR_ALPHAS = [0.3, 0.5, 0.7, 1.0]  # CZAR alpha param (MSE curvature)
+
+print("=" * 70)
+print("Topic 42 — BTC/USD 8h Price — v3 (CZAR Loss)")
+print("=" * 70)
+
+# =============================================================================
+# LOAD DATA
+# =============================================================================
+print("\n[1/5] Loading data...")
+api_key = get_api_key(
+    api_key_file=os.path.join(os.path.dirname(__file__), "..", "..", ".allora_api_key")
+)
+
+workflow = AlloraMLWorkflow(
+    tickers=TICKERS, number_of_input_bars=NUMBER_OF_INPUT_BARS,
+    target_bars=TARGET_BARS, interval=INTERVAL,
+    data_source="allora", api_key=api_key,
+)
+
+start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)
+workflow.backfill(start=start_date)
+df_all = workflow.get_full_feature_target_dataframe(start_date=start_date).reset_index()
+base_feature_cols = [c for c in df_all.columns if c.startswith("feature_")]
+df_all = df_all.dropna(subset=base_feature_cols + ["target"])
+print(f"✅ {len(df_all):,} samples")
+
+# =============================================================================
+# DIRECTIONAL FEATURES (same as v2)
+# =============================================================================
+print("\n[2/5] Engineering directional features...")
+
+
+def engineer_directional_features(row):
+    n = NUMBER_OF_INPUT_BARS
+    closes = np.array([row[f"feature_close_{i}"] for i in range(n)])
+    highs = np.array([row[f"feature_high_{i}"] for i in range(n)])
+    lows = np.array([row[f"feature_low_{i}"] for i in range(n)])
+    volumes = np.array([row[f"feature_volume_{i}"] for i in range(n)])
+
+    log_rets = np.diff(np.log(closes + 1e-12))
+    abs_rets = np.abs(log_rets)
+    f = {}
+
+    f["ret_1h"] = log_rets[-1] if len(log_rets) >= 1 else 0
+    f["ret_4h"] = np.sum(log_rets[-4:]) if len(log_rets) >= 4 else 0
+    f["ret_8h"] = np.sum(log_rets[-8:]) if len(log_rets) >= 8 else 0
+    f["ret_24h"] = np.sum(log_rets[-24:]) if len(log_rets) >= 24 else 0
+    f["ret_48h"] = np.sum(log_rets) if len(log_rets) >= 2 else 0
+
+    vol_8h = np.std(log_rets[-8:], ddof=1) if len(log_rets) >= 8 else 1e-6
+    vol_24h = np.std(log_rets[-24:], ddof=1) if len(log_rets) >= 24 else 1e-6
+    vol_48h = np.std(log_rets, ddof=1) if len(log_rets) >= 2 else 1e-6
+    f["vol_8h"] = vol_8h
+    f["vol_24h"] = vol_24h
+    f["vol_48h"] = vol_48h
+
+    f["znorm_ret_1h"] = log_rets[-1] / (vol_8h + 1e-12) if len(log_rets) >= 1 else 0
+    f["znorm_ret_4h"] = np.sum(log_rets[-4:]) / (vol_8h * 2 + 1e-12) if len(log_rets) >= 4 else 0
+    f["znorm_ret_8h"] = np.sum(log_rets[-8:]) / (vol_8h * np.sqrt(8) + 1e-12) if len(log_rets) >= 8 else 0
+
+    if len(log_rets) >= 24:
+        up_rets = log_rets[-24:][log_rets[-24:] > 0]
+        dn_rets = log_rets[-24:][log_rets[-24:] < 0]
+        up_vol = np.std(up_rets, ddof=1) if len(up_rets) > 1 else 1e-6
+        dn_vol = np.std(np.abs(dn_rets), ddof=1) if len(dn_rets) > 1 else 1e-6
+        f["vol_skew_24h"] = (up_vol - dn_vol) / (up_vol + dn_vol + 1e-12)
+        f["up_fraction_24h"] = np.mean(log_rets[-24:] > 0)
+    else:
+        f["vol_skew_24h"] = 0
+        f["up_fraction_24h"] = 0.5
+
+    f["up_fraction_8h"] = np.mean(log_rets[-8:] > 0) if len(log_rets) >= 8 else 0.5
+
+    if len(log_rets) >= 10:
+        f["ret_autocorr"] = np.corrcoef(log_rets[-9:], log_rets[-10:-1])[0, 1]
+        if not np.isfinite(f["ret_autocorr"]):
+            f["ret_autocorr"] = 0
+    else:
+        f["ret_autocorr"] = 0
+
+    f["vol_ratio_8_48"] = vol_8h / (vol_48h + 1e-12)
+    f["vol_expanding"] = 1.0 if vol_8h > vol_24h else 0.0
+
+    vol_trend = np.mean(volumes[-4:]) / (np.mean(volumes[-24:]) + 1e-12) if len(volumes) >= 24 else 1
+    f["vol_price_divergence"] = vol_trend * np.sign(-f["ret_4h"])
+    f["volume_ratio"] = vol_trend
+
+    if len(closes) >= 24:
+        f["zscore_24h"] = (closes[-1] - np.mean(closes[-24:])) / (np.std(closes[-24:], ddof=1) + 1e-12)
+    else:
+        f["zscore_24h"] = 0
+
+    if len(log_rets) >= 8:
+        net = abs(np.sum(log_rets[-8:]))
+        path = np.sum(abs_rets[-8:])
+        f["efficiency_8h"] = net / (path + 1e-12)
+    else:
+        f["efficiency_8h"] = 0
+
+    hl = highs - lows
+    f["hl_range_8h"] = np.mean(hl[-8:])
+    f["hl_range_ratio"] = np.mean(hl[-8:]) / (np.mean(hl) + 1e-12)
+
+    return pd.Series(f)
+
+
+engineered = df_all.apply(engineer_directional_features, axis=1)
+df_all = pd.concat([df_all, engineered], axis=1)
+feature_cols = list(engineered.columns)
+df_all = df_all.dropna(subset=feature_cols + ["target"])
+print(f"✅ {len(feature_cols)} features")
+
+# =============================================================================
+# COMPUTE ROLLING VOL FOR CZAR (needed for the loss)
+# =============================================================================
+# Use 8h rolling std of target as the vol normalization for CZAR
+targets = df_all["target"].values
+rolling_std = pd.Series(targets).rolling(8, min_periods=2).std().fillna(targets.std()).values
+df_all["_rolling_std"] = rolling_std
+
+# =============================================================================
+# GRID SEARCH WITH CZAR LOSS
+# =============================================================================
+print(f"\n[3/5] Grid search with CZAR loss...")
+tscv = TimeSeriesSplit(n_splits=N_SPLITS, gap=TARGET_BARS)
+evaluator = PerformanceEvaluator()
+results = []
+config_num = 0
+
+total = len(LEARNING_RATES) * len(MAX_DEPTHS) * len(NUM_LEAVES) * len(CZAR_ALPHAS) * len(N_ESTIMATORS_CHECKPOINTS)
+print(f"   {total} configs (CZAR alpha × LR × depth × leaves × checkpoints)")
+
+for czar_alpha in CZAR_ALPHAS:
+    for lr in LEARNING_RATES:
+        for depth in MAX_DEPTHS:
+            for leaves in NUM_LEAVES:
+                    fold_models = []
+                    for train_idx, test_idx in tscv.split(df_all):
+                        y_train = df_all.iloc[train_idx]["target"].values
+                        std_train = df_all.iloc[train_idx]["_rolling_std"].values
+
+                        # Create CZAR objective for this fold
+                        czar_obj = make_czar_objective(
+                            std=std_train, alpha=czar_alpha
+                        )
+
+                        lgb = LGBMRegressor(
+                            objective=czar_obj,
+                            n_estimators=N_ESTIMATORS_MAX,
+                            learning_rate=lr,
+                            max_depth=depth,
+                            num_leaves=leaves,
+                            subsample=0.8,
+                            colsample_bytree=0.7,
+                            min_child_samples=50,
+                            reg_alpha=0.1,
+                            reg_lambda=1.0,
+                            random_state=42,
+                            verbose=-1,
+                        )
+                        lgb.fit(
+                            df_all.iloc[train_idx][feature_cols],
+                            y_train,
+                        )
+                        fold_models.append((lgb, test_idx))
+
+                    for n_est in N_ESTIMATORS_CHECKPOINTS:
+                        config_num += 1
+                        df_all["pred"] = np.nan
+                        for lgb, test_idx in fold_models:
+                            preds = lgb.predict(
+                                df_all.iloc[test_idx][feature_cols],
+                                num_iteration=n_est,
+                            )
+                            df_all.iloc[test_idx, df_all.columns.get_loc("pred")] = preds
+
+                        valid = ~df_all["pred"].isna()
+                        y_t = df_all.loc[valid, "target"].values
+                        y_p = df_all.loc[valid, "pred"].values
+                        metrics = evaluator.evaluate(y_true=y_t, y_pred=y_p)
+                        r_val, _ = pearsonr(y_t, y_p)
+                        cal = np.std(y_p) / (np.std(y_t) + 1e-12)
+
+                        m = metrics.get("metrics", metrics)
+                        results.append({
+                            "config_num": config_num,
+                            "n_estimators": n_est,
+                            "learning_rate": lr,
+                            "max_depth": depth,
+                            "num_leaves": leaves,
+                            "czar_alpha": czar_alpha,
+                            "da": m.get("directional_accuracy", 0),
+                            "da_ci": m.get("da_ci_lower", 0),
+                            "da_pval": m.get("da_pvalue", 1),
+                            "pearson": r_val,
+                            "wrmse_imp": m.get("wrmse_improvement", 0),
+                            "czar_imp": m.get("czar_improvement", 0),
+                            "cal_ratio": cal,
+                            "num_passed": metrics["num_passed"],
+                        })
+
+                        if config_num % 10 == 0 or config_num <= 3:
+                            print(
+                                f"   [{config_num:3d}/{total}] a={czar_alpha:.1f} "
+                                f"lr={lr:.2f} d={depth} l={leaves:2d} n={n_est:3d} "
+                                f"→ {metrics['num_passed']}/7 r={r_val:+.4f} DA={m.get('directional_accuracy',0):.3f}"
+                            )
+
+# =============================================================================
+# RANK & SELECT
+# =============================================================================
+results_df = pd.DataFrame(results)
+results_df = results_df.sort_values(["num_passed", "pearson"], ascending=[False, False])
+
+print(f"\n✅ Tested {len(results)} configs")
+print(f"\n   Top 10:")
+print(f"   {'#':>3} {'a':>3} {'e':>3} {'n':>4} {'lr':>5} {'d':>2} {'l':>3} │ {'DA':>5} {'r':>7} {'WRMSE':>6} {'CZAR':>6} {'cal':>5} │ {'pts':>3}")
+print(f"   {'─'*70}")
+for _, row in results_df.head(10).iterrows():
+    print(
+        f"   {int(row['config_num']):3d} {row['czar_alpha']:.1f} "
+        f"{int(row['n_estimators']):4d} {row['learning_rate']:5.2f} "
+        f"{int(row['max_depth']):2d} {int(row['num_leaves']):3d} │ "
+        f"{row['da']:.3f} {row['pearson']:+.4f} "
+        f"{row['wrmse_imp']:+.4f} {row['czar_imp']:+.4f} {row['cal_ratio']:.3f} │ "
+        f"{int(row['num_passed']):3d}"
+    )
+
+best_cfg = int(results_df.iloc[0]["config_num"])
+best = next(r for r in results if r["config_num"] == best_cfg)
+print(f"\n   Best: #{best_cfg} r={best['pearson']:+.4f} DA={best['da']:.3f} ({best['num_passed']}/7)")
+
+# =============================================================================
+# TRAIN & SAVE TOP 3
+# =============================================================================
+print(f"\n[4/5] Training top 3...")
+top3 = results_df.head(3)
+trained = []
+for rank_idx, (_, row) in enumerate(top3.iterrows()):
+    std_all = df_all["_rolling_std"].values
+    czar_obj = make_czar_objective(
+        std=std_all, alpha=row["czar_alpha"]
+    )
+    model = LGBMRegressor(
+        objective=czar_obj,
+        n_estimators=int(row["n_estimators"]),
+        learning_rate=row["learning_rate"],
+        max_depth=int(row["max_depth"]),
+        num_leaves=int(row["num_leaves"]),
+        subsample=0.8, colsample_bytree=0.7, min_child_samples=50,
+        reg_alpha=0.1, reg_lambda=1.0, random_state=42, verbose=-1,
+    )
+    model.fit(df_all[feature_cols], df_all["target"])
+    trained.append((int(row["config_num"]), model, row))
+    print(f"   Model {rank_idx+1}: #{int(row['config_num'])} "
+          f"(a={row['czar_alpha']:.1f} r={row['pearson']:+.4f} DA={row['da']:.3f})")
+
+print(f"\n[5/5] Saving...")
+# For pickling: use the raw booster for prediction (avoids serializing CZAR objective)
+for rank_idx, (cfg, model, row) in enumerate(trained):
+    def _make_predict(m):
+        # Serialize booster to string — avoids pickling czar_loss module
+        _model_str = m.booster_.model_to_string()
+        _feature_cols = feature_cols[:]
+        _tickers = TICKERS[:]
+        _eng_fn = engineer_directional_features
+        _wf = workflow
+        def predict(nonce=None):
+            import lightgbm as lgb
+            import numpy as np
+            booster = lgb.Booster(model_str=_model_str)
+            live_row = _wf.get_live_features(ticker=_tickers[0])
+            if live_row is None or len(live_row) == 0:
+                raise ValueError("No live features")
+            live_eng = _eng_fn(live_row.iloc[0])
+            current_price = float(live_row.attrs.get("current_price", float("nan")))
+            if not np.isfinite(current_price) or current_price <= 0:
+                snap = _wf._dm.get_live_snapshot(_tickers)
+                if snap is not None and len(snap) > 0:
+                    current_price = float(snap["close"].iloc[-1])
+            log_ret = booster.predict(live_eng[_feature_cols].values.reshape(1, -1))[0]
+            return float(current_price * np.exp(log_ret))
+        return predict
+
+    fn = _make_predict(model)
+    pkl = f"predict_42_czar_rank{rank_idx+1}.pkl"
+    try:
+        price = fn()
+        print(f"   Model {rank_idx+1} (#{cfg}): ${price:,.2f} → {pkl}")
+    except Exception as e:
+        print(f"   Model {rank_idx+1} (#{cfg}): FAILED ({e}) → {pkl}")
+    with open(pkl, "wb") as f:
+        cloudpickle.dump(fn, f)
+
+print("\n" + "=" * 70)
+print("COMPLETE!")
+print("=" * 70)
diff --git a/notebooks/testnet/topic_57_sol_8h_logreturn/example.py b/notebooks/testnet/topic_57_sol_8h_logreturn/example.py
new file mode 100644
index 0000000..5620a5e
--- /dev/null
+++ b/notebooks/testnet/topic_57_sol_8h_logreturn/example.py
@@ -0,0 +1,427 @@
+#!/usr/bin/env python3
+"""
+================================================================================
+Allora Forge Builder Kit v3.0 - Topic 57 SOL/USD Log-Return Prediction Walkthrough
+================================================================================
+
+This walkthrough demonstrates 8-hour SOL/USD price prediction using the 
+Allora ML Workflow Kit with base features and LightGBM.
+
+Data is sourced from the Atlas data service (Tiingo 1-min candles).
+
+================================================================================
+"""
+
+import numpy as np
+import pandas as pd
+import os
+import json
+from datetime import datetime, timedelta, timezone
+from sklearn.model_selection import TimeSeriesSplit
+from lightgbm import LGBMRegressor
+import matplotlib.pyplot as plt
+import cloudpickle
+from allora_forge_builder_kit import AlloraMLWorkflow, PerformanceEvaluator
+
+# =============================================================================
+# EXPERIMENT CONFIGURATION
+# =============================================================================
+
+# Data Configuration
+TICKERS = ["solusd"]
+DAYS_OF_HISTORY = 500
+INTERVAL = "5m"
+
+# Feature Configuration
+NUMBER_OF_INPUT_BARS = 288  # Number of hourly bars for input features
+TARGET_BARS = 96           # Predict 24 bars (hours) ahead
+
+# Cross-Validation Configuration
+N_SPLITS = 3               # Number of CV folds
+MAX_TRAIN_SIZE = 100_000_000  # Maximum training samples per fold
+
+# Model Configuration
+N_ESTIMATORS_MAX = 500    # Train with max trees, evaluate at checkpoints
+N_ESTIMATORS_CHECKPOINTS = [100, 300, 500]
+LEARNING_RATES = [0.01, 0.05, 0.1]
+MAX_DEPTHS = [3, 5, 7]
+NUM_LEAVES = [15, 31, 63]
+
+# =============================================================================
+# SCRIPT START
+# =============================================================================
+
+print("="*80)
+print("Allora Forge Builder Kit v3.0 - Topic 57 Walkthrough")
+print("="*80)
+
+
+def _to_serializable(obj):
+    """Convert numpy/pandas objects into JSON-serializable Python types."""
+    if isinstance(obj, (np.floating, np.integer)):
+        return obj.item()
+    if isinstance(obj, (np.bool_,)):
+        return bool(obj)
+    if isinstance(obj, np.ndarray):
+        return obj.tolist()
+    if isinstance(obj, (pd.Timestamp, datetime)):
+        return obj.isoformat()
+    if isinstance(obj, dict):
+        return {k: _to_serializable(v) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [_to_serializable(v) for v in obj]
+    return obj
+
+
+def save_run_artifacts(df_eval, best_result, best_params, run_dir, feature_cols):
+    """Persist config/metrics/predictions and basic diagnostic plots for reproducibility."""
+    os.makedirs(run_dir, exist_ok=True)
+
+    # 1) Run config
+    config = {
+        "tickers": TICKERS,
+        "days_of_history": DAYS_OF_HISTORY,
+        "interval": INTERVAL,
+        "number_of_input_bars": NUMBER_OF_INPUT_BARS,
+        "target_bars": TARGET_BARS,
+        "n_splits": N_SPLITS,
+        "max_train_size": MAX_TRAIN_SIZE,
+        "n_estimators_checkpoints": N_ESTIMATORS_CHECKPOINTS,
+        "learning_rates": LEARNING_RATES,
+        "max_depths": MAX_DEPTHS,
+        "num_leaves": NUM_LEAVES,
+        "best_params": best_params,
+        "feature_count": len(feature_cols),
+    }
+    with open(os.path.join(run_dir, "config.json"), "w") as f:
+        json.dump(_to_serializable(config), f, indent=2)
+
+    # 2) Metrics
+    metrics_payload = {
+        "score": best_result["score"],
+        "grade": best_result["grade"],
+        "num_passed": best_result["num_passed"],
+        "num_primary_metrics": best_result.get("num_primary_metrics"),
+        "thresholds": best_result.get("thresholds", {}),
+        "passed": best_result.get("passed", {}),
+        "metrics": best_result.get("metrics", {}),
+    }
+    with open(os.path.join(run_dir, "metrics.json"), "w") as f:
+        json.dump(_to_serializable(metrics_payload), f, indent=2)
+
+    # 3) Predictions table
+    export_df = df_eval.copy()
+    if "predictions" in best_result:
+        export_df["pred"] = best_result["predictions"].values
+
+    export_cols = ["open_time", "target", "pred"]
+    export_cols = [c for c in export_cols if c in export_df.columns]
+    preds_df = export_df[export_cols].dropna(subset=["pred"]).copy()
+    preds_csv_path = os.path.join(run_dir, "predictions.csv")
+    preds_df.to_csv(preds_csv_path, index=False)
+
+    # 4) Scatter plot: pred vs target
+    plt.figure(figsize=(8, 8))
+    plt.scatter(preds_df["target"], preds_df["pred"], s=8, alpha=0.35)
+    lim_min = float(min(preds_df["target"].min(), preds_df["pred"].min()))
+    lim_max = float(max(preds_df["target"].max(), preds_df["pred"].max()))
+    plt.plot([lim_min, lim_max], [lim_min, lim_max], linestyle="--", linewidth=1)
+    plt.xlabel("Target (log return)")
+    plt.ylabel("Prediction (log return)")
+    plt.title("Predictions vs Target")
+    plt.tight_layout()
+    scatter_path = os.path.join(run_dir, "scatter_pred_vs_target.png")
+    plt.savefig(scatter_path, dpi=150)
+    plt.close()
+
+    # 5) Human-readable report
+    with open(os.path.join(run_dir, "report.txt"), "w") as f:
+        f.write("Allora Topic 57 Run Report\n")
+        f.write("=" * 40 + "\n")
+        f.write(f"Score: {best_result['score']:.1%} ({best_result['num_passed']}/7)\n")
+        f.write(f"Grade: {best_result['grade']}\n")
+        f.write(f"Best params: {best_params}\n\n")
+        f.write("Primary metric pass/fail:\n")
+        for metric_name, did_pass in best_result.get("passed", {}).items():
+            f.write(f"- {metric_name}: {'PASS' if did_pass else 'FAIL'}\n")
+
+    return {
+        "run_dir": run_dir,
+        "predictions_csv": preds_csv_path,
+        "scatter_png": scatter_path,
+    }
+
+# =============================================================================
+# STEP 1: Initialize Workflow
+# =============================================================================
+print("\n[1/6] Initializing workflow...")
+
+# Resolve Allora API key (env var → file → prompt).
+# Get a free key at https://developer.allora.network
+# Alternatively, set data_source="binance" below to skip the API key entirely.
+from allora_forge_builder_kit.utils import get_api_key
+api_key = get_api_key(api_key_file=os.path.join(os.path.dirname(__file__), "..", "..", ".allora_api_key"))
+
+workflow = AlloraMLWorkflow(
+    tickers=TICKERS,
+    number_of_input_bars=NUMBER_OF_INPUT_BARS,
+    target_bars=TARGET_BARS,
+    interval=INTERVAL,
+    data_source="allora",
+    api_key=api_key
+)
+
+print(f"✅ Workflow initialized")
+print(f"   Assets: {TICKERS} | Interval: {INTERVAL}")
+print(f"   Input: {NUMBER_OF_INPUT_BARS} bars → Features: {NUMBER_OF_INPUT_BARS*5}")
+print(f"   Target: {TARGET_BARS} bars ahead")
+
+# =============================================================================
+# STEP 2: Backfill Historical Data
+# =============================================================================
+print(f"\n[2/6] Backfilling {DAYS_OF_HISTORY} days of historical data...")
+
+start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)
+try:
+    workflow.backfill(start=start_date)
+    print("✅ Backfill complete")
+except Exception as e:
+    print(f"⚠️ Backfill failed: {e}")
+    print("   Will attempt to use locally cached parquet data...")
+
+# =============================================================================
+# STEP 3: Extract Features & Engineer New Features
+# =============================================================================
+print("\n[3/6] Extracting and engineering features...")
+
+try:
+    df_all = workflow.get_full_feature_target_dataframe(start_date=start_date).reset_index()
+except Exception as e:
+    raise RuntimeError(
+        f"No data available: {e}\n\n"
+        "This usually means the backfill failed (bad/missing API key) and there is "
+        "no locally cached parquet data.\n\n"
+        "Fix options:\n"
+        "  1. Set a valid ALLORA_API_KEY (free at https://developer.allora.network)\n"
+        "  2. Use data_source='binance' in AlloraMLWorkflow() to skip the API key\n"
+    ) from e
+
+# Feature Engineering: Add log returns to base features
+# For detailed TA indicators and visualizations, see: feature_engineering_example.py
+
+def engineer_returns(row):
+    """Add log return features over multiple horizons (no data leakage - same row only)"""
+    # NOTE: Base features are already normalized (z-scored) by the workflow
+    closes = np.array([row[f'feature_close_{i}'] for i in range(NUMBER_OF_INPUT_BARS)])
+    
+    # Log returns over different time horizons
+    returns = {}
+    returns['log_return_1h'] = np.log(closes[-1] + 1e-8) - np.log(closes[-2] + 1e-8) if NUMBER_OF_INPUT_BARS >= 2 else 0
+    returns['log_return_6h'] = np.log(closes[-1] + 1e-8) - np.log(closes[-7] + 1e-8) if NUMBER_OF_INPUT_BARS >= 7 else 0
+    returns['log_return_12h'] = np.log(closes[-1] + 1e-8) - np.log(closes[-13] + 1e-8) if NUMBER_OF_INPUT_BARS >= 13 else 0
+    returns['log_return_24h'] = np.log(closes[-1] + 1e-8) - np.log(closes[-25] + 1e-8) if NUMBER_OF_INPUT_BARS >= 25 else 0
+    
+    return pd.Series(returns)
+
+# Get base features
+base_feature_cols = [col for col in df_all.columns if col.startswith('feature_')]
+
+# Apply feature engineering
+print("   Engineering log return features...")
+engineered_features = df_all.apply(engineer_returns, axis=1)
+df_all = pd.concat([df_all, engineered_features], axis=1)
+
+# Use base features + engineered returns
+feature_cols = base_feature_cols + list(engineered_features.columns)
+df_all = df_all.dropna(subset=feature_cols + ['target'])
+
+print(f"✅ Dataset: {len(df_all):,} samples ({df_all['open_time'].min().date()} to {df_all['open_time'].max().date()})")
+print(f"   Features: {len(base_feature_cols)} base + {len(engineered_features.columns)} returns = {len(feature_cols)} total")
+print(f"   📚 See feature_engineering_example.py for more TA indicators")
+
+# Setup time series cross-validation
+tscv = TimeSeriesSplit(
+    n_splits=N_SPLITS, 
+    gap=TARGET_BARS, 
+    max_train_size=MAX_TRAIN_SIZE
+)
+
+print(f"✅ Walk-forward CV: {N_SPLITS} splits, {TARGET_BARS}-bar embargo")
+for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(df_all)):
+    print(f"   Fold {fold_idx+1}: Train={len(train_idx):,}, Test={len(test_idx):,}")
+
+# =============================================================================
+# STEP 4: Grid Search with Walk-Forward Cross-Validation
+# =============================================================================
+print("\n[4/6] Running grid search...")
+
+results = []
+evaluator = PerformanceEvaluator()
+config_num = 0
+
+for lr in LEARNING_RATES:
+    for depth in MAX_DEPTHS:
+        for leaves in NUM_LEAVES:
+            
+            # Train once with max trees, evaluate at checkpoints
+            fold_models = []
+            for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(df_all)):
+                X_train = df_all.iloc[train_idx][feature_cols]
+                y_train = df_all.iloc[train_idx]['target']
+                
+                lgb = LGBMRegressor(
+                    n_estimators=N_ESTIMATORS_MAX,
+                    learning_rate=lr,
+                    max_depth=depth,
+                    num_leaves=leaves,
+                    random_state=42,
+                    verbose=-1
+                )
+                lgb.fit(X_train, y_train)
+                fold_models.append((lgb, test_idx))
+            
+            # Evaluate at tree count checkpoints
+            for n_est in N_ESTIMATORS_CHECKPOINTS:
+                config_num += 1
+                df_all['pred'] = np.nan
+                
+                # Generate predictions using first n_est trees
+                for lgb, test_idx in fold_models:
+                    X_test = df_all.iloc[test_idx][feature_cols]
+                    preds = lgb.predict(X_test, num_iteration=n_est)
+                    df_all.iloc[test_idx, df_all.columns.get_loc('pred')] = preds
+                
+                # Evaluate
+                valid_mask = ~df_all['pred'].isna()
+                metrics = evaluator.evaluate(
+                    y_true=df_all.loc[valid_mask, 'target'],
+                    y_pred=df_all.loc[valid_mask, 'pred']
+                )
+                
+                # Store results
+                results.append({
+                    'config_num': config_num,
+                    'n_estimators': n_est,
+                    'learning_rate': lr,
+                    'max_depth': depth,
+                    'num_leaves': leaves,
+                    'predictions': df_all['pred'].copy(),
+                    **metrics
+                })
+                
+                print(f"   [{config_num:2d}] n={n_est:4d}, lr={lr:.2f}, d={depth}, l={leaves:2d} -> "
+                      f"{metrics['num_passed']}/7 ({metrics['score']:.1%} - {metrics['grade']})")
+
+# Analyze results
+results_df = pd.DataFrame([{k: v for k, v in r.items() if k != 'predictions'} for r in results])
+results_df = results_df.sort_values(['num_passed', 'score'], ascending=[False, False])
+
+print(f"\n✅ Tested {len(results)} configurations")
+print(f"\n   Top 5 models:")
+top5_cols = ['config_num', 'n_estimators', 'learning_rate', 'max_depth', 'num_leaves', 'num_passed', 'score']
+print(results_df[top5_cols].head().to_string(index=False))
+
+# Select best model
+best_result = results[results_df.iloc[0]['config_num'] - 1]
+best_params = {k: best_result[k] for k in ['n_estimators', 'learning_rate', 'max_depth', 'num_leaves']}
+
+print(f"\nBest: Config #{best_result['config_num']}")
+print(f"   {best_result['num_passed']}/7 points ({best_result['score']:.1%}) | "
+      f"n={best_params['n_estimators']}, lr={best_params['learning_rate']}, d={best_params['max_depth']}, l={best_params['num_leaves']}")
+
+# =============================================================================
+# STEP 5: Evaluate Best Model
+# =============================================================================
+print("\n[5/6] Detailed evaluation...")
+print("="*80)
+evaluator.print_report(best_result, detailed=False)
+print("="*80)
+
+# Save reproducibility artifacts + diagnostic plot
+run_timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
+run_dir = os.path.join(os.path.dirname(__file__), "runs", run_timestamp)
+artifacts = save_run_artifacts(
+    df_eval=df_all,
+    best_result=best_result,
+    best_params=best_params,
+    run_dir=run_dir,
+    feature_cols=feature_cols,
+)
+
+# =============================================================================
+# STEP 6: Train Production Model
+# =============================================================================
+print("\n[6/6] Training production model...")
+
+final_model = LGBMRegressor(
+    n_estimators=best_params['n_estimators'],
+    learning_rate=best_params['learning_rate'],
+    max_depth=best_params['max_depth'],
+    num_leaves=best_params['num_leaves'],
+    random_state=42,
+    verbose=-1
+)
+final_model.fit(df_all[feature_cols], df_all['target'])
+print(f"✅ Final model trained on {len(df_all):,} samples")
+
+def predict(nonce: int = None) -> float:
+    """
+    Predict SOL/USD price 8 hours into the future.
+    
+    Args:
+        nonce: Block nonce from Allora SDK (unused)
+    
+    Returns:
+        float: Predicted BTC price in USD
+    """
+    # Get live features from workflow
+    live_row = workflow.get_live_features(ticker=TICKERS[0])
+    
+    if live_row is None or len(live_row) == 0:
+        raise ValueError("Could not get live features")
+    
+    # Engineer return features from live data (same as training)
+    live_returns = engineer_returns(live_row.iloc[0])
+    
+    # Combine base features + engineered returns
+    live_features = pd.concat([live_row[base_feature_cols].iloc[0], live_returns])
+    
+    # Get current price from live feature context (remote-only path)
+    current_price = float(live_row.attrs.get("current_price", np.nan))
+    if not np.isfinite(current_price) or current_price <= 0:
+        # Fallback to live snapshot (still remote API; no local parquet)
+        snap = workflow._dm.get_live_snapshot(TICKERS)
+        if snap is not None and len(snap) > 0 and "close" in snap.columns:
+            current_price = float(snap["close"].iloc[-1])
+
+    if not np.isfinite(current_price) or current_price <= 0:
+        raise ValueError(f"Invalid current price for inference: {current_price}")
+    
+    # Predict log return
+    predicted_log_return = final_model.predict(live_features[feature_cols].values.reshape(1, -1))[0]
+    
+    # Convert log return to price
+    # Log-return topic: return the log return directly
+    
+    print(f"\nLive Prediction: {predicted_log_return:+.6f} ({predicted_log_return:+.4f} log return)")
+    
+    return float(predicted_log_return)
+
+# Test and save
+print("\n🧪 Testing prediction...")
+test_prediction = predict()
+
+with open("predict_57.pkl", "wb") as f:
+    cloudpickle.dump(predict, f)
+
+print("\n" + "="*80)
+print("COMPLETE!")
+print("="*80)
+print(f"{len(feature_cols)} features | {best_result['num_passed']}/7 points ({best_result['score']:.1%})")
+print("Saved to predict_57.pkl")
+print(f"Run artifacts: {artifacts['run_dir']}")
+print(f"- Predictions: {artifacts['predictions_csv']}")
+print(f"- Scatter plot: {artifacts['scatter_png']}")
+print("="*80)
+print("\nDeploy: python deploy_worker.py")
+
diff --git a/notebooks/testnet/topic_61_btc_24h_logreturn/example.py b/notebooks/testnet/topic_61_btc_24h_logreturn/example.py
new file mode 100644
index 0000000..405aae9
--- /dev/null
+++ b/notebooks/testnet/topic_61_btc_24h_logreturn/example.py
@@ -0,0 +1,427 @@
+#!/usr/bin/env python3
+"""
+================================================================================
+Allora Forge Builder Kit v3.0 - Topic 61 BTC/USD Log-Return Prediction Walkthrough
+================================================================================
+
+This walkthrough demonstrates 24-hour BTC/USD price prediction using the 
+Allora ML Workflow Kit with base features and LightGBM.
+
+Data is sourced from the Atlas data service (Tiingo 1-min candles).
+
+================================================================================
+"""
+
+import numpy as np
+import pandas as pd
+import os
+import json
+from datetime import datetime, timedelta, timezone
+from sklearn.model_selection import TimeSeriesSplit
+from lightgbm import LGBMRegressor
+import matplotlib.pyplot as plt
+import cloudpickle
+from allora_forge_builder_kit import AlloraMLWorkflow, PerformanceEvaluator
+
+# =============================================================================
+# EXPERIMENT CONFIGURATION
+# =============================================================================
+
+# Data Configuration
+TICKERS = ["btcusd"]
+DAYS_OF_HISTORY = 500
+INTERVAL = "1h"
+
+# Feature Configuration
+NUMBER_OF_INPUT_BARS = 48  # Number of hourly bars for input features
+TARGET_BARS = 24           # Predict 24 bars (hours) ahead
+
+# Cross-Validation Configuration
+N_SPLITS = 3               # Number of CV folds
+MAX_TRAIN_SIZE = 100_000_000  # Maximum training samples per fold
+
+# Model Configuration
+N_ESTIMATORS_MAX = 500    # Train with max trees, evaluate at checkpoints
+N_ESTIMATORS_CHECKPOINTS = [100, 300, 500]
+LEARNING_RATES = [0.01, 0.05, 0.1]
+MAX_DEPTHS = [3, 5, 7]
+NUM_LEAVES = [15, 31, 63]
+
+# =============================================================================
+# SCRIPT START
+# =============================================================================
+
+print("="*80)
+print("Allora Forge Builder Kit v3.0 - Topic 61 Walkthrough")
+print("="*80)
+
+
+def _to_serializable(obj):
+    """Convert numpy/pandas objects into JSON-serializable Python types."""
+    if isinstance(obj, (np.floating, np.integer)):
+        return obj.item()
+    if isinstance(obj, (np.bool_,)):
+        return bool(obj)
+    if isinstance(obj, np.ndarray):
+        return obj.tolist()
+    if isinstance(obj, (pd.Timestamp, datetime)):
+        return obj.isoformat()
+    if isinstance(obj, dict):
+        return {k: _to_serializable(v) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [_to_serializable(v) for v in obj]
+    return obj
+
+
+def save_run_artifacts(df_eval, best_result, best_params, run_dir, feature_cols):
+    """Persist config/metrics/predictions and basic diagnostic plots for reproducibility."""
+    os.makedirs(run_dir, exist_ok=True)
+
+    # 1) Run config
+    config = {
+        "tickers": TICKERS,
+        "days_of_history": DAYS_OF_HISTORY,
+        "interval": INTERVAL,
+        "number_of_input_bars": NUMBER_OF_INPUT_BARS,
+        "target_bars": TARGET_BARS,
+        "n_splits": N_SPLITS,
+        "max_train_size": MAX_TRAIN_SIZE,
+        "n_estimators_checkpoints": N_ESTIMATORS_CHECKPOINTS,
+        "learning_rates": LEARNING_RATES,
+        "max_depths": MAX_DEPTHS,
+        "num_leaves": NUM_LEAVES,
+        "best_params": best_params,
+        "feature_count": len(feature_cols),
+    }
+    with open(os.path.join(run_dir, "config.json"), "w") as f:
+        json.dump(_to_serializable(config), f, indent=2)
+
+    # 2) Metrics
+    metrics_payload = {
+        "score": best_result["score"],
+        "grade": best_result["grade"],
+        "num_passed": best_result["num_passed"],
+        "num_primary_metrics": best_result.get("num_primary_metrics"),
+        "thresholds": best_result.get("thresholds", {}),
+        "passed": best_result.get("passed", {}),
+        "metrics": best_result.get("metrics", {}),
+    }
+    with open(os.path.join(run_dir, "metrics.json"), "w") as f:
+        json.dump(_to_serializable(metrics_payload), f, indent=2)
+
+    # 3) Predictions table
+    export_df = df_eval.copy()
+    if "predictions" in best_result:
+        export_df["pred"] = best_result["predictions"].values
+
+    export_cols = ["open_time", "target", "pred"]
+    export_cols = [c for c in export_cols if c in export_df.columns]
+    preds_df = export_df[export_cols].dropna(subset=["pred"]).copy()
+    preds_csv_path = os.path.join(run_dir, "predictions.csv")
+    preds_df.to_csv(preds_csv_path, index=False)
+
+    # 4) Scatter plot: pred vs target
+    plt.figure(figsize=(8, 8))
+    plt.scatter(preds_df["target"], preds_df["pred"], s=8, alpha=0.35)
+    lim_min = float(min(preds_df["target"].min(), preds_df["pred"].min()))
+    lim_max = float(max(preds_df["target"].max(), preds_df["pred"].max()))
+    plt.plot([lim_min, lim_max], [lim_min, lim_max], linestyle="--", linewidth=1)
+    plt.xlabel("Target (log return)")
+    plt.ylabel("Prediction (log return)")
+    plt.title("Predictions vs Target")
+    plt.tight_layout()
+    scatter_path = os.path.join(run_dir, "scatter_pred_vs_target.png")
+    plt.savefig(scatter_path, dpi=150)
+    plt.close()
+
+    # 5) Human-readable report
+    with open(os.path.join(run_dir, "report.txt"), "w") as f:
+        f.write("Allora Topic 61 Run Report\n")
+        f.write("=" * 40 + "\n")
+        f.write(f"Score: {best_result['score']:.1%} ({best_result['num_passed']}/7)\n")
+        f.write(f"Grade: {best_result['grade']}\n")
+        f.write(f"Best params: {best_params}\n\n")
+        f.write("Primary metric pass/fail:\n")
+        for metric_name, did_pass in best_result.get("passed", {}).items():
+            f.write(f"- {metric_name}: {'PASS' if did_pass else 'FAIL'}\n")
+
+    return {
+        "run_dir": run_dir,
+        "predictions_csv": preds_csv_path,
+        "scatter_png": scatter_path,
+    }
+
+# =============================================================================
+# STEP 1: Initialize Workflow
+# =============================================================================
+print("\n[1/6] Initializing workflow...")
+
+# Resolve Allora API key (env var → file → prompt).
+# Get a free key at https://developer.allora.network
+# Alternatively, set data_source="binance" below to skip the API key entirely.
+from allora_forge_builder_kit.utils import get_api_key
+api_key = get_api_key(api_key_file=os.path.join(os.path.dirname(__file__), "..", "..", ".allora_api_key"))
+
+workflow = AlloraMLWorkflow(
+    tickers=TICKERS,
+    number_of_input_bars=NUMBER_OF_INPUT_BARS,
+    target_bars=TARGET_BARS,
+    interval=INTERVAL,
+    data_source="allora",
+    api_key=api_key
+)
+
+print(f"✅ Workflow initialized")
+print(f"   Assets: {TICKERS} | Interval: {INTERVAL}")
+print(f"   Input: {NUMBER_OF_INPUT_BARS} bars → Features: {NUMBER_OF_INPUT_BARS*5}")
+print(f"   Target: {TARGET_BARS} bars ahead")
+
+# =============================================================================
+# STEP 2: Backfill Historical Data
+# =============================================================================
+print(f"\n[2/6] Backfilling {DAYS_OF_HISTORY} days of historical data...")
+
+start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)
+try:
+    workflow.backfill(start=start_date)
+    print("✅ Backfill complete")
+except Exception as e:
+    print(f"⚠️ Backfill failed: {e}")
+    print("   Will attempt to use locally cached parquet data...")
+
+# =============================================================================
+# STEP 3: Extract Features & Engineer New Features
+# =============================================================================
+print("\n[3/6] Extracting and engineering features...")
+
+try:
+    df_all = workflow.get_full_feature_target_dataframe(start_date=start_date).reset_index()
+except Exception as e:
+    raise RuntimeError(
+        f"No data available: {e}\n\n"
+        "This usually means the backfill failed (bad/missing API key) and there is "
+        "no locally cached parquet data.\n\n"
+        "Fix options:\n"
+        "  1. Set a valid ALLORA_API_KEY (free at https://developer.allora.network)\n"
+        "  2. Use data_source='binance' in AlloraMLWorkflow() to skip the API key\n"
+    ) from e
+
+# Feature Engineering: Add log returns to base features
+# For detailed TA indicators and visualizations, see: feature_engineering_example.py
+
+def engineer_returns(row):
+    """Add log return features over multiple horizons (no data leakage - same row only)"""
+    # NOTE: Base features are already normalized (z-scored) by the workflow
+    closes = np.array([row[f'feature_close_{i}'] for i in range(NUMBER_OF_INPUT_BARS)])
+    
+    # Log returns over different time horizons
+    returns = {}
+    returns['log_return_1h'] = np.log(closes[-1] + 1e-8) - np.log(closes[-2] + 1e-8) if NUMBER_OF_INPUT_BARS >= 2 else 0
+    returns['log_return_6h'] = np.log(closes[-1] + 1e-8) - np.log(closes[-7] + 1e-8) if NUMBER_OF_INPUT_BARS >= 7 else 0
+    returns['log_return_12h'] = np.log(closes[-1] + 1e-8) - np.log(closes[-13] + 1e-8) if NUMBER_OF_INPUT_BARS >= 13 else 0
+    returns['log_return_24h'] = np.log(closes[-1] + 1e-8) - np.log(closes[-25] + 1e-8) if NUMBER_OF_INPUT_BARS >= 25 else 0
+    
+    return pd.Series(returns)
+
+# Get base features
+base_feature_cols = [col for col in df_all.columns if col.startswith('feature_')]
+
+# Apply feature engineering
+print("   Engineering log return features...")
+engineered_features = df_all.apply(engineer_returns, axis=1)
+df_all = pd.concat([df_all, engineered_features], axis=1)
+
+# Use base features + engineered returns
+feature_cols = base_feature_cols + list(engineered_features.columns)
+df_all = df_all.dropna(subset=feature_cols + ['target'])
+
+print(f"✅ Dataset: {len(df_all):,} samples ({df_all['open_time'].min().date()} to {df_all['open_time'].max().date()})")
+print(f"   Features: {len(base_feature_cols)} base + {len(engineered_features.columns)} returns = {len(feature_cols)} total")
+print(f"   📚 See feature_engineering_example.py for more TA indicators")
+
+# Setup time series cross-validation
+tscv = TimeSeriesSplit(
+    n_splits=N_SPLITS, 
+    gap=TARGET_BARS, 
+    max_train_size=MAX_TRAIN_SIZE
+)
+
+print(f"✅ Walk-forward CV: {N_SPLITS} splits, {TARGET_BARS}-bar embargo")
+for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(df_all)):
+    print(f"   Fold {fold_idx+1}: Train={len(train_idx):,}, Test={len(test_idx):,}")
+
+# =============================================================================
+# STEP 4: Grid Search with Walk-Forward Cross-Validation
+# =============================================================================
+print("\n[4/6] Running grid search...")
+
+results = []
+evaluator = PerformanceEvaluator()
+config_num = 0
+
+for lr in LEARNING_RATES:
+    for depth in MAX_DEPTHS:
+        for leaves in NUM_LEAVES:
+            
+            # Train once with max trees, evaluate at checkpoints
+            fold_models = []
+            for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(df_all)):
+                X_train = df_all.iloc[train_idx][feature_cols]
+                y_train = df_all.iloc[train_idx]['target']
+                
+                lgb = LGBMRegressor(
+                    n_estimators=N_ESTIMATORS_MAX,
+                    learning_rate=lr,
+                    max_depth=depth,
+                    num_leaves=leaves,
+                    random_state=42,
+                    verbose=-1
+                )
+                lgb.fit(X_train, y_train)
+                fold_models.append((lgb, test_idx))
+            
+            # Evaluate at tree count checkpoints
+            for n_est in N_ESTIMATORS_CHECKPOINTS:
+                config_num += 1
+                df_all['pred'] = np.nan
+                
+                # Generate predictions using first n_est trees
+                for lgb, test_idx in fold_models:
+                    X_test = df_all.iloc[test_idx][feature_cols]
+                    preds = lgb.predict(X_test, num_iteration=n_est)
+                    df_all.iloc[test_idx, df_all.columns.get_loc('pred')] = preds
+                
+                # Evaluate
+                valid_mask = ~df_all['pred'].isna()
+                metrics = evaluator.evaluate(
+                    y_true=df_all.loc[valid_mask, 'target'],
+                    y_pred=df_all.loc[valid_mask, 'pred']
+                )
+                
+                # Store results
+                results.append({
+                    'config_num': config_num,
+                    'n_estimators': n_est,
+                    'learning_rate': lr,
+                    'max_depth': depth,
+                    'num_leaves': leaves,
+                    'predictions': df_all['pred'].copy(),
+                    **metrics
+                })
+                
+                print(f"   [{config_num:2d}] n={n_est:4d}, lr={lr:.2f}, d={depth}, l={leaves:2d} -> "
+                      f"{metrics['num_passed']}/7 ({metrics['score']:.1%} - {metrics['grade']})")
+
+# Analyze results
+results_df = pd.DataFrame([{k: v for k, v in r.items() if k != 'predictions'} for r in results])
+results_df = results_df.sort_values(['num_passed', 'score'], ascending=[False, False])
+
+print(f"\n✅ Tested {len(results)} configurations")
+print(f"\n   Top 5 models:")
+top5_cols = ['config_num', 'n_estimators', 'learning_rate', 'max_depth', 'num_leaves', 'num_passed', 'score']
+print(results_df[top5_cols].head().to_string(index=False))
+
+# Select best model
+best_result = results[results_df.iloc[0]['config_num'] - 1]
+best_params = {k: best_result[k] for k in ['n_estimators', 'learning_rate', 'max_depth', 'num_leaves']}
+
+print(f"\nBest: Config #{best_result['config_num']}")
+print(f"   {best_result['num_passed']}/7 points ({best_result['score']:.1%}) | "
+      f"n={best_params['n_estimators']}, lr={best_params['learning_rate']}, d={best_params['max_depth']}, l={best_params['num_leaves']}")
+
+# =============================================================================
+# STEP 5: Evaluate Best Model
+# =============================================================================
+print("\n[5/6] Detailed evaluation...")
+print("="*80)
+evaluator.print_report(best_result, detailed=False)
+print("="*80)
+
+# Save reproducibility artifacts + diagnostic plot
+run_timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
+run_dir = os.path.join(os.path.dirname(__file__), "runs", run_timestamp)
+artifacts = save_run_artifacts(
+    df_eval=df_all,
+    best_result=best_result,
+    best_params=best_params,
+    run_dir=run_dir,
+    feature_cols=feature_cols,
+)
+
+# =============================================================================
+# STEP 6: Train Production Model
+# =============================================================================
+print("\n[6/6] Training production model...")
+
+final_model = LGBMRegressor(
+    n_estimators=best_params['n_estimators'],
+    learning_rate=best_params['learning_rate'],
+    max_depth=best_params['max_depth'],
+    num_leaves=best_params['num_leaves'],
+    random_state=42,
+    verbose=-1
+)
+final_model.fit(df_all[feature_cols], df_all['target'])
+print(f"✅ Final model trained on {len(df_all):,} samples")
+
+def predict(nonce: int = None) -> float:
+    """
+    Predict BTC/USD price 24 hours into the future.
+    
+    Args:
+        nonce: Block nonce from Allora SDK (unused)
+    
+    Returns:
+        float: Predicted BTC price in USD
+    """
+    # Get live features from workflow
+    live_row = workflow.get_live_features(ticker=TICKERS[0])
+    
+    if live_row is None or len(live_row) == 0:
+        raise ValueError("Could not get live features")
+    
+    # Engineer return features from live data (same as training)
+    live_returns = engineer_returns(live_row.iloc[0])
+    
+    # Combine base features + engineered returns
+    live_features = pd.concat([live_row[base_feature_cols].iloc[0], live_returns])
+    
+    # Get current price from live feature context (remote-only path)
+    current_price = float(live_row.attrs.get("current_price", np.nan))
+    if not np.isfinite(current_price) or current_price <= 0:
+        # Fallback to live snapshot (still remote API; no local parquet)
+        snap = workflow._dm.get_live_snapshot(TICKERS)
+        if snap is not None and len(snap) > 0 and "close" in snap.columns:
+            current_price = float(snap["close"].iloc[-1])
+
+    if not np.isfinite(current_price) or current_price <= 0:
+        raise ValueError(f"Invalid current price for inference: {current_price}")
+    
+    # Predict log return
+    predicted_log_return = final_model.predict(live_features[feature_cols].values.reshape(1, -1))[0]
+    
+    # Convert log return to price
+    # Log-return topic: return the log return directly
+    
+    print(f"\nLive Prediction: {predicted_log_return:+.6f} ({predicted_log_return:+.4f} log return)")
+    
+    return float(predicted_log_return)
+
+# Test and save
+print("\n🧪 Testing prediction...")
+test_prediction = predict()
+
+with open("predict_61.pkl", "wb") as f:
+    cloudpickle.dump(predict, f)
+
+print("\n" + "="*80)
+print("COMPLETE!")
+print("="*80)
+print(f"{len(feature_cols)} features | {best_result['num_passed']}/7 points ({best_result['score']:.1%})")
+print("Saved to predict_61.pkl")
+print(f"Run artifacts: {artifacts['run_dir']}")
+print(f"- Predictions: {artifacts['predictions_csv']}")
+print(f"- Scatter plot: {artifacts['scatter_png']}")
+print("="*80)
+print("\nDeploy: python deploy_worker.py")
+
diff --git a/notebooks/testnet/topic_62_sol_24h_logreturn/example.py b/notebooks/testnet/topic_62_sol_24h_logreturn/example.py
new file mode 100644
index 0000000..e96bff5
--- /dev/null
+++ b/notebooks/testnet/topic_62_sol_24h_logreturn/example.py
@@ -0,0 +1,427 @@
+#!/usr/bin/env python3
+"""
+================================================================================
+Allora Forge Builder Kit v3.0 - Topic 62 SOL/USD Log-Return Prediction Walkthrough
+================================================================================
+
+This walkthrough demonstrates 24-hour SOL/USD price prediction using the 
+Allora ML Workflow Kit with base features and LightGBM.
+
+Data is sourced from the Atlas data service (Tiingo 1-min candles).
+
+================================================================================
+"""
+
+import numpy as np
+import pandas as pd
+import os
+import json
+from datetime import datetime, timedelta, timezone
+from sklearn.model_selection import TimeSeriesSplit
+from lightgbm import LGBMRegressor
+import matplotlib.pyplot as plt
+import cloudpickle
+from allora_forge_builder_kit import AlloraMLWorkflow, PerformanceEvaluator
+
+# =============================================================================
+# EXPERIMENT CONFIGURATION
+# =============================================================================
+
+# Data Configuration
+TICKERS = ["solusd"]
+DAYS_OF_HISTORY = 500
+INTERVAL = "1h"
+
+# Feature Configuration
+NUMBER_OF_INPUT_BARS = 48  # Number of hourly bars for input features
+TARGET_BARS = 24           # Predict 24 bars (hours) ahead
+
+# Cross-Validation Configuration
+N_SPLITS = 3               # Number of CV folds
+MAX_TRAIN_SIZE = 100_000_000  # Maximum training samples per fold
+
+# Model Configuration
+N_ESTIMATORS_MAX = 500    # Train with max trees, evaluate at checkpoints
+N_ESTIMATORS_CHECKPOINTS = [100, 300, 500]
+LEARNING_RATES = [0.01, 0.05, 0.1]
+MAX_DEPTHS = [3, 5, 7]
+NUM_LEAVES = [15, 31, 63]
+
+# =============================================================================
+# SCRIPT START
+# =============================================================================
+
+print("="*80)
+print("Allora Forge Builder Kit v3.0 - Topic 62 Walkthrough")
+print("="*80)
+
+
+def _to_serializable(obj):
+    """Convert numpy/pandas objects into JSON-serializable Python types."""
+    if isinstance(obj, (np.floating, np.integer)):
+        return obj.item()
+    if isinstance(obj, (np.bool_,)):
+        return bool(obj)
+    if isinstance(obj, np.ndarray):
+        return obj.tolist()
+    if isinstance(obj, (pd.Timestamp, datetime)):
+        return obj.isoformat()
+    if isinstance(obj, dict):
+        return {k: _to_serializable(v) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [_to_serializable(v) for v in obj]
+    return obj
+
+
+def save_run_artifacts(df_eval, best_result, best_params, run_dir, feature_cols):
+    """Persist config/metrics/predictions and basic diagnostic plots for reproducibility."""
+    os.makedirs(run_dir, exist_ok=True)
+
+    # 1) Run config
+    config = {
+        "tickers": TICKERS,
+        "days_of_history": DAYS_OF_HISTORY,
+        "interval": INTERVAL,
+        "number_of_input_bars": NUMBER_OF_INPUT_BARS,
+        "target_bars": TARGET_BARS,
+        "n_splits": N_SPLITS,
+        "max_train_size": MAX_TRAIN_SIZE,
+        "n_estimators_checkpoints": N_ESTIMATORS_CHECKPOINTS,
+        "learning_rates": LEARNING_RATES,
+        "max_depths": MAX_DEPTHS,
+        "num_leaves": NUM_LEAVES,
+        "best_params": best_params,
+        "feature_count": len(feature_cols),
+    }
+    with open(os.path.join(run_dir, "config.json"), "w") as f:
+        json.dump(_to_serializable(config), f, indent=2)
+
+    # 2) Metrics
+    metrics_payload = {
+        "score": best_result["score"],
+        "grade": best_result["grade"],
+        "num_passed": best_result["num_passed"],
+        "num_primary_metrics": best_result.get("num_primary_metrics"),
+        "thresholds": best_result.get("thresholds", {}),
+        "passed": best_result.get("passed", {}),
+        "metrics": best_result.get("metrics", {}),
+    }
+    with open(os.path.join(run_dir, "metrics.json"), "w") as f:
+        json.dump(_to_serializable(metrics_payload), f, indent=2)
+
+    # 3) Predictions table
+    export_df = df_eval.copy()
+    if "predictions" in best_result:
+        export_df["pred"] = best_result["predictions"].values
+
+    export_cols = ["open_time", "target", "pred"]
+    export_cols = [c for c in export_cols if c in export_df.columns]
+    preds_df = export_df[export_cols].dropna(subset=["pred"]).copy()
+    preds_csv_path = os.path.join(run_dir, "predictions.csv")
+    preds_df.to_csv(preds_csv_path, index=False)
+
+    # 4) Scatter plot: pred vs target
+    plt.figure(figsize=(8, 8))
+    plt.scatter(preds_df["target"], preds_df["pred"], s=8, alpha=0.35)
+    lim_min = float(min(preds_df["target"].min(), preds_df["pred"].min()))
+    lim_max = float(max(preds_df["target"].max(), preds_df["pred"].max()))
+    plt.plot([lim_min, lim_max], [lim_min, lim_max], linestyle="--", linewidth=1)
+    plt.xlabel("Target (log return)")
+    plt.ylabel("Prediction (log return)")
+    plt.title("Predictions vs Target")
+    plt.tight_layout()
+    scatter_path = os.path.join(run_dir, "scatter_pred_vs_target.png")
+    plt.savefig(scatter_path, dpi=150)
+    plt.close()
+
+    # 5) Human-readable report
+    with open(os.path.join(run_dir, "report.txt"), "w") as f:
+        f.write("Allora Topic 62 Run Report\n")
+        f.write("=" * 40 + "\n")
+        f.write(f"Score: {best_result['score']:.1%} ({best_result['num_passed']}/7)\n")
+        f.write(f"Grade: {best_result['grade']}\n")
+        f.write(f"Best params: {best_params}\n\n")
+        f.write("Primary metric pass/fail:\n")
+        for metric_name, did_pass in best_result.get("passed", {}).items():
+            f.write(f"- {metric_name}: {'PASS' if did_pass else 'FAIL'}\n")
+
+    return {
+        "run_dir": run_dir,
+        "predictions_csv": preds_csv_path,
+        "scatter_png": scatter_path,
+    }
+
+# =============================================================================
+# STEP 1: Initialize Workflow
+# =============================================================================
+print("\n[1/6] Initializing workflow...")
+
+# Resolve Allora API key (env var → file → prompt).
+# Get a free key at https://developer.allora.network
+# Alternatively, set data_source="binance" below to skip the API key entirely.
+from allora_forge_builder_kit.utils import get_api_key
+api_key = get_api_key(api_key_file=os.path.join(os.path.dirname(__file__), "..", "..", ".allora_api_key"))
+
+workflow = AlloraMLWorkflow(
+    tickers=TICKERS,
+    number_of_input_bars=NUMBER_OF_INPUT_BARS,
+    target_bars=TARGET_BARS,
+    interval=INTERVAL,
+    data_source="allora",
+    api_key=api_key
+)
+
+print(f"✅ Workflow initialized")
+print(f"   Assets: {TICKERS} | Interval: {INTERVAL}")
+print(f"   Input: {NUMBER_OF_INPUT_BARS} bars → Features: {NUMBER_OF_INPUT_BARS*5}")
+print(f"   Target: {TARGET_BARS} bars ahead")
+
+# =============================================================================
+# STEP 2: Backfill Historical Data
+# =============================================================================
+print(f"\n[2/6] Backfilling {DAYS_OF_HISTORY} days of historical data...")
+
+start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)
+try:
+    workflow.backfill(start=start_date)
+    print("✅ Backfill complete")
+except Exception as e:
+    print(f"⚠️ Backfill failed: {e}")
+    print("   Will attempt to use locally cached parquet data...")
+
+# =============================================================================
+# STEP 3: Extract Features & Engineer New Features
+# =============================================================================
+print("\n[3/6] Extracting and engineering features...")
+
+try:
+    df_all = workflow.get_full_feature_target_dataframe(start_date=start_date).reset_index()
+except Exception as e:
+    raise RuntimeError(
+        f"No data available: {e}\n\n"
+        "This usually means the backfill failed (bad/missing API key) and there is "
+        "no locally cached parquet data.\n\n"
+        "Fix options:\n"
+        "  1. Set a valid ALLORA_API_KEY (free at https://developer.allora.network)\n"
+        "  2. Use data_source='binance' in AlloraMLWorkflow() to skip the API key\n"
+    ) from e
+
+# Feature Engineering: Add log returns to base features
+# For detailed TA indicators and visualizations, see: feature_engineering_example.py
+
+def engineer_returns(row):
+    """Add log return features over multiple horizons (no data leakage - same row only)"""
+    # NOTE: Base features are already normalized (z-scored) by the workflow
+    closes = np.array([row[f'feature_close_{i}'] for i in range(NUMBER_OF_INPUT_BARS)])
+    
+    # Log returns over different time horizons
+    returns = {}
+    returns['log_return_1h'] = np.log(closes[-1] + 1e-8) - np.log(closes[-2] + 1e-8) if NUMBER_OF_INPUT_BARS >= 2 else 0
+    returns['log_return_6h'] = np.log(closes[-1] + 1e-8) - np.log(closes[-7] + 1e-8) if NUMBER_OF_INPUT_BARS >= 7 else 0
+    returns['log_return_12h'] = np.log(closes[-1] + 1e-8) - np.log(closes[-13] + 1e-8) if NUMBER_OF_INPUT_BARS >= 13 else 0
+    returns['log_return_24h'] = np.log(closes[-1] + 1e-8) - np.log(closes[-25] + 1e-8) if NUMBER_OF_INPUT_BARS >= 25 else 0
+    
+    return pd.Series(returns)
+
+# Get base features
+base_feature_cols = [col for col in df_all.columns if col.startswith('feature_')]
+
+# Apply feature engineering
+print("   Engineering log return features...")
+engineered_features = df_all.apply(engineer_returns, axis=1)
+df_all = pd.concat([df_all, engineered_features], axis=1)
+
+# Use base features + engineered returns
+feature_cols = base_feature_cols + list(engineered_features.columns)
+df_all = df_all.dropna(subset=feature_cols + ['target'])
+
+print(f"✅ Dataset: {len(df_all):,} samples ({df_all['open_time'].min().date()} to {df_all['open_time'].max().date()})")
+print(f"   Features: {len(base_feature_cols)} base + {len(engineered_features.columns)} returns = {len(feature_cols)} total")
+print(f"   📚 See feature_engineering_example.py for more TA indicators")
+
+# Setup time series cross-validation
+tscv = TimeSeriesSplit(
+    n_splits=N_SPLITS, 
+    gap=TARGET_BARS, 
+    max_train_size=MAX_TRAIN_SIZE
+)
+
+print(f"✅ Walk-forward CV: {N_SPLITS} splits, {TARGET_BARS}-bar embargo")
+for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(df_all)):
+    print(f"   Fold {fold_idx+1}: Train={len(train_idx):,}, Test={len(test_idx):,}")
+
+# =============================================================================
+# STEP 4: Grid Search with Walk-Forward Cross-Validation
+# =============================================================================
+print("\n[4/6] Running grid search...")
+
+results = []
+evaluator = PerformanceEvaluator()
+config_num = 0
+
+for lr in LEARNING_RATES:
+    for depth in MAX_DEPTHS:
+        for leaves in NUM_LEAVES:
+            
+            # Train once with max trees, evaluate at checkpoints
+            fold_models = []
+            for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(df_all)):
+                X_train = df_all.iloc[train_idx][feature_cols]
+                y_train = df_all.iloc[train_idx]['target']
+                
+                lgb = LGBMRegressor(
+                    n_estimators=N_ESTIMATORS_MAX,
+                    learning_rate=lr,
+                    max_depth=depth,
+                    num_leaves=leaves,
+                    random_state=42,
+                    verbose=-1
+                )
+                lgb.fit(X_train, y_train)
+                fold_models.append((lgb, test_idx))
+            
+            # Evaluate at tree count checkpoints
+            for n_est in N_ESTIMATORS_CHECKPOINTS:
+                config_num += 1
+                df_all['pred'] = np.nan
+                
+                # Generate predictions using first n_est trees
+                for lgb, test_idx in fold_models:
+                    X_test = df_all.iloc[test_idx][feature_cols]
+                    preds = lgb.predict(X_test, num_iteration=n_est)
+                    df_all.iloc[test_idx, df_all.columns.get_loc('pred')] = preds
+                
+                # Evaluate
+                valid_mask = ~df_all['pred'].isna()
+                metrics = evaluator.evaluate(
+                    y_true=df_all.loc[valid_mask, 'target'],
+                    y_pred=df_all.loc[valid_mask, 'pred']
+                )
+                
+                # Store results
+                results.append({
+                    'config_num': config_num,
+                    'n_estimators': n_est,
+                    'learning_rate': lr,
+                    'max_depth': depth,
+                    'num_leaves': leaves,
+                    'predictions': df_all['pred'].copy(),
+                    **metrics
+                })
+                
+                print(f"   [{config_num:2d}] n={n_est:4d}, lr={lr:.2f}, d={depth}, l={leaves:2d} -> "
+                      f"{metrics['num_passed']}/7 ({metrics['score']:.1%} - {metrics['grade']})")
+
+# Analyze results
+results_df = pd.DataFrame([{k: v for k, v in r.items() if k != 'predictions'} for r in results])
+results_df = results_df.sort_values(['num_passed', 'score'], ascending=[False, False])
+
+print(f"\n✅ Tested {len(results)} configurations")
+print(f"\n   Top 5 models:")
+top5_cols = ['config_num', 'n_estimators', 'learning_rate', 'max_depth', 'num_leaves', 'num_passed', 'score']
+print(results_df[top5_cols].head().to_string(index=False))
+
+# Select best model
+best_result = results[results_df.iloc[0]['config_num'] - 1]
+best_params = {k: best_result[k] for k in ['n_estimators', 'learning_rate', 'max_depth', 'num_leaves']}
+
+print(f"\nBest: Config #{best_result['config_num']}")
+print(f"   {best_result['num_passed']}/7 points ({best_result['score']:.1%}) | "
+      f"n={best_params['n_estimators']}, lr={best_params['learning_rate']}, d={best_params['max_depth']}, l={best_params['num_leaves']}")
+
+# =============================================================================
+# STEP 5: Evaluate Best Model
+# =============================================================================
+print("\n[5/6] Detailed evaluation...")
+print("="*80)
+evaluator.print_report(best_result, detailed=False)
+print("="*80)
+
+# Save reproducibility artifacts + diagnostic plot
+run_timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
+run_dir = os.path.join(os.path.dirname(__file__), "runs", run_timestamp)
+artifacts = save_run_artifacts(
+    df_eval=df_all,
+    best_result=best_result,
+    best_params=best_params,
+    run_dir=run_dir,
+    feature_cols=feature_cols,
+)
+
+# =============================================================================
+# STEP 6: Train Production Model
+# =============================================================================
+print("\n[6/6] Training production model...")
+
+final_model = LGBMRegressor(
+    n_estimators=best_params['n_estimators'],
+    learning_rate=best_params['learning_rate'],
+    max_depth=best_params['max_depth'],
+    num_leaves=best_params['num_leaves'],
+    random_state=42,
+    verbose=-1
+)
+final_model.fit(df_all[feature_cols], df_all['target'])
+print(f"✅ Final model trained on {len(df_all):,} samples")
+
+def predict(nonce: int = None) -> float:
+    """
+    Predict SOL/USD price 24 hours into the future.
+    
+    Args:
+        nonce: Block nonce from Allora SDK (unused)
+    
+    Returns:
+        float: Predicted BTC price in USD
+    """
+    # Get live features from workflow
+    live_row = workflow.get_live_features(ticker=TICKERS[0])
+    
+    if live_row is None or len(live_row) == 0:
+        raise ValueError("Could not get live features")
+    
+    # Engineer return features from live data (same as training)
+    live_returns = engineer_returns(live_row.iloc[0])
+    
+    # Combine base features + engineered returns
+    live_features = pd.concat([live_row[base_feature_cols].iloc[0], live_returns])
+    
+    # Get current price from live feature context (remote-only path)
+    current_price = float(live_row.attrs.get("current_price", np.nan))
+    if not np.isfinite(current_price) or current_price <= 0:
+        # Fallback to live snapshot (still remote API; no local parquet)
+        snap = workflow._dm.get_live_snapshot(TICKERS)
+        if snap is not None and len(snap) > 0 and "close" in snap.columns:
+            current_price = float(snap["close"].iloc[-1])
+
+    if not np.isfinite(current_price) or current_price <= 0:
+        raise ValueError(f"Invalid current price for inference: {current_price}")
+    
+    # Predict log return
+    predicted_log_return = final_model.predict(live_features[feature_cols].values.reshape(1, -1))[0]
+    
+    # Convert log return to price
+    # Log-return topic: return the log return directly
+    
+    print(f"\nLive Prediction: {predicted_log_return:+.6f} ({predicted_log_return:+.4f} log return)")
+    
+    return float(predicted_log_return)
+
+# Test and save
+print("\n🧪 Testing prediction...")
+test_prediction = predict()
+
+with open("predict_62.pkl", "wb") as f:
+    cloudpickle.dump(predict, f)
+
+print("\n" + "="*80)
+print("COMPLETE!")
+print("="*80)
+print(f"{len(feature_cols)} features | {best_result['num_passed']}/7 points ({best_result['score']:.1%})")
+print("Saved to predict_62.pkl")
+print(f"Run artifacts: {artifacts['run_dir']}")
+print(f"- Predictions: {artifacts['predictions_csv']}")
+print(f"- Scatter plot: {artifacts['scatter_png']}")
+print("="*80)
+print("\nDeploy: python deploy_worker.py")
+
diff --git a/notebooks/testnet/topic_63_eth_24h_logreturn/example.py b/notebooks/testnet/topic_63_eth_24h_logreturn/example.py
new file mode 100644
index 0000000..f2fa820
--- /dev/null
+++ b/notebooks/testnet/topic_63_eth_24h_logreturn/example.py
@@ -0,0 +1,427 @@
+#!/usr/bin/env python3
+"""
+================================================================================
+Allora Forge Builder Kit v3.0 - Topic 63 ETH/USD Log-Return Prediction Walkthrough
+================================================================================
+
+This walkthrough demonstrates 24-hour ETH/USD price prediction using the 
+Allora ML Workflow Kit with base features and LightGBM.
+
+Data is sourced from the Atlas data service (Tiingo 1-min candles).
+
+================================================================================
+"""
+
+import numpy as np
+import pandas as pd
+import os
+import json
+from datetime import datetime, timedelta, timezone
+from sklearn.model_selection import TimeSeriesSplit
+from lightgbm import LGBMRegressor
+import matplotlib.pyplot as plt
+import cloudpickle
+from allora_forge_builder_kit import AlloraMLWorkflow, PerformanceEvaluator
+
+# =============================================================================
+# EXPERIMENT CONFIGURATION
+# =============================================================================
+
+# Data Configuration
+TICKERS = ["ethusd"]
+DAYS_OF_HISTORY = 500
+INTERVAL = "1h"
+
+# Feature Configuration
+NUMBER_OF_INPUT_BARS = 48  # Number of hourly bars for input features
+TARGET_BARS = 24           # Predict 24 bars (hours) ahead
+
+# Cross-Validation Configuration
+N_SPLITS = 3               # Number of CV folds
+MAX_TRAIN_SIZE = 100_000_000  # Maximum training samples per fold
+
+# Model Configuration
+N_ESTIMATORS_MAX = 500    # Train with max trees, evaluate at checkpoints
+N_ESTIMATORS_CHECKPOINTS = [100, 300, 500]
+LEARNING_RATES = [0.01, 0.05, 0.1]
+MAX_DEPTHS = [3, 5, 7]
+NUM_LEAVES = [15, 31, 63]
+
+# =============================================================================
+# SCRIPT START
+# =============================================================================
+
+print("="*80)
+print("Allora Forge Builder Kit v3.0 - Topic 63 Walkthrough")
+print("="*80)
+
+
+def _to_serializable(obj):
+    """Convert numpy/pandas objects into JSON-serializable Python types."""
+    if isinstance(obj, (np.floating, np.integer)):
+        return obj.item()
+    if isinstance(obj, (np.bool_,)):
+        return bool(obj)
+    if isinstance(obj, np.ndarray):
+        return obj.tolist()
+    if isinstance(obj, (pd.Timestamp, datetime)):
+        return obj.isoformat()
+    if isinstance(obj, dict):
+        return {k: _to_serializable(v) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [_to_serializable(v) for v in obj]
+    return obj
+
+
+def save_run_artifacts(df_eval, best_result, best_params, run_dir, feature_cols):
+    """Persist config/metrics/predictions and basic diagnostic plots for reproducibility."""
+    os.makedirs(run_dir, exist_ok=True)
+
+    # 1) Run config
+    config = {
+        "tickers": TICKERS,
+        "days_of_history": DAYS_OF_HISTORY,
+        "interval": INTERVAL,
+        "number_of_input_bars": NUMBER_OF_INPUT_BARS,
+        "target_bars": TARGET_BARS,
+        "n_splits": N_SPLITS,
+        "max_train_size": MAX_TRAIN_SIZE,
+        "n_estimators_checkpoints": N_ESTIMATORS_CHECKPOINTS,
+        "learning_rates": LEARNING_RATES,
+        "max_depths": MAX_DEPTHS,
+        "num_leaves": NUM_LEAVES,
+        "best_params": best_params,
+        "feature_count": len(feature_cols),
+    }
+    with open(os.path.join(run_dir, "config.json"), "w") as f:
+        json.dump(_to_serializable(config), f, indent=2)
+
+    # 2) Metrics
+    metrics_payload = {
+        "score": best_result["score"],
+        "grade": best_result["grade"],
+        "num_passed": best_result["num_passed"],
+        "num_primary_metrics": best_result.get("num_primary_metrics"),
+        "thresholds": best_result.get("thresholds", {}),
+        "passed": best_result.get("passed", {}),
+        "metrics": best_result.get("metrics", {}),
+    }
+    with open(os.path.join(run_dir, "metrics.json"), "w") as f:
+        json.dump(_to_serializable(metrics_payload), f, indent=2)
+
+    # 3) Predictions table
+    export_df = df_eval.copy()
+    if "predictions" in best_result:
+        export_df["pred"] = best_result["predictions"].values
+
+    export_cols = ["open_time", "target", "pred"]
+    export_cols = [c for c in export_cols if c in export_df.columns]
+    preds_df = export_df[export_cols].dropna(subset=["pred"]).copy()
+    preds_csv_path = os.path.join(run_dir, "predictions.csv")
+    preds_df.to_csv(preds_csv_path, index=False)
+
+    # 4) Scatter plot: pred vs target
+    plt.figure(figsize=(8, 8))
+    plt.scatter(preds_df["target"], preds_df["pred"], s=8, alpha=0.35)
+    lim_min = float(min(preds_df["target"].min(), preds_df["pred"].min()))
+    lim_max = float(max(preds_df["target"].max(), preds_df["pred"].max()))
+    plt.plot([lim_min, lim_max], [lim_min, lim_max], linestyle="--", linewidth=1)
+    plt.xlabel("Target (log return)")
+    plt.ylabel("Prediction (log return)")
+    plt.title("Predictions vs Target")
+    plt.tight_layout()
+    scatter_path = os.path.join(run_dir, "scatter_pred_vs_target.png")
+    plt.savefig(scatter_path, dpi=150)
+    plt.close()
+
+    # 5) Human-readable report
+    with open(os.path.join(run_dir, "report.txt"), "w") as f:
+        f.write("Allora Topic 63 Run Report\n")
+        f.write("=" * 40 + "\n")
+        f.write(f"Score: {best_result['score']:.1%} ({best_result['num_passed']}/7)\n")
+        f.write(f"Grade: {best_result['grade']}\n")
+        f.write(f"Best params: {best_params}\n\n")
+        f.write("Primary metric pass/fail:\n")
+        for metric_name, did_pass in best_result.get("passed", {}).items():
+            f.write(f"- {metric_name}: {'PASS' if did_pass else 'FAIL'}\n")
+
+    return {
+        "run_dir": run_dir,
+        "predictions_csv": preds_csv_path,
+        "scatter_png": scatter_path,
+    }
+
+# =============================================================================
+# STEP 1: Initialize Workflow
+# =============================================================================
+print("\n[1/6] Initializing workflow...")
+
+# Resolve Allora API key (env var → file → prompt).
+# Get a free key at https://developer.allora.network
+# Alternatively, set data_source="binance" below to skip the API key entirely.
+from allora_forge_builder_kit.utils import get_api_key
+api_key = get_api_key(api_key_file=os.path.join(os.path.dirname(__file__), "..", "..", ".allora_api_key"))
+
+workflow = AlloraMLWorkflow(
+    tickers=TICKERS,
+    number_of_input_bars=NUMBER_OF_INPUT_BARS,
+    target_bars=TARGET_BARS,
+    interval=INTERVAL,
+    data_source="allora",
+    api_key=api_key
+)
+
+print(f"✅ Workflow initialized")
+print(f"   Assets: {TICKERS} | Interval: {INTERVAL}")
+print(f"   Input: {NUMBER_OF_INPUT_BARS} bars → Features: {NUMBER_OF_INPUT_BARS*5}")
+print(f"   Target: {TARGET_BARS} bars ahead")
+
+# =============================================================================
+# STEP 2: Backfill Historical Data
+# =============================================================================
+print(f"\n[2/6] Backfilling {DAYS_OF_HISTORY} days of historical data...")
+
+start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)
+try:
+    workflow.backfill(start=start_date)
+    print("✅ Backfill complete")
+except Exception as e:
+    print(f"⚠️ Backfill failed: {e}")
+    print("   Will attempt to use locally cached parquet data...")
+
+# =============================================================================
+# STEP 3: Extract Features & Engineer New Features
+# =============================================================================
+print("\n[3/6] Extracting and engineering features...")
+
+try:
+    df_all = workflow.get_full_feature_target_dataframe(start_date=start_date).reset_index()
+except Exception as e:
+    raise RuntimeError(
+        f"No data available: {e}\n\n"
+        "This usually means the backfill failed (bad/missing API key) and there is "
+        "no locally cached parquet data.\n\n"
+        "Fix options:\n"
+        "  1. Set a valid ALLORA_API_KEY (free at https://developer.allora.network)\n"
+        "  2. Use data_source='binance' in AlloraMLWorkflow() to skip the API key\n"
+    ) from e
+
+# Feature Engineering: Add log returns to base features
+# For detailed TA indicators and visualizations, see: feature_engineering_example.py
+
+def engineer_returns(row):
+    """Add log return features over multiple horizons (no data leakage - same row only)"""
+    # NOTE: Base features are already normalized (z-scored) by the workflow
+    closes = np.array([row[f'feature_close_{i}'] for i in range(NUMBER_OF_INPUT_BARS)])
+    
+    # Log returns over different time horizons
+    returns = {}
+    returns['log_return_1h'] = np.log(closes[-1] + 1e-8) - np.log(closes[-2] + 1e-8) if NUMBER_OF_INPUT_BARS >= 2 else 0
+    returns['log_return_6h'] = np.log(closes[-1] + 1e-8) - np.log(closes[-7] + 1e-8) if NUMBER_OF_INPUT_BARS >= 7 else 0
+    returns['log_return_12h'] = np.log(closes[-1] + 1e-8) - np.log(closes[-13] + 1e-8) if NUMBER_OF_INPUT_BARS >= 13 else 0
+    returns['log_return_24h'] = np.log(closes[-1] + 1e-8) - np.log(closes[-25] + 1e-8) if NUMBER_OF_INPUT_BARS >= 25 else 0
+    
+    return pd.Series(returns)
+
+# Get base features
+base_feature_cols = [col for col in df_all.columns if col.startswith('feature_')]
+
+# Apply feature engineering
+print("   Engineering log return features...")
+engineered_features = df_all.apply(engineer_returns, axis=1)
+df_all = pd.concat([df_all, engineered_features], axis=1)
+
+# Use base features + engineered returns
+feature_cols = base_feature_cols + list(engineered_features.columns)
+df_all = df_all.dropna(subset=feature_cols + ['target'])
+
+print(f"✅ Dataset: {len(df_all):,} samples ({df_all['open_time'].min().date()} to {df_all['open_time'].max().date()})")
+print(f"   Features: {len(base_feature_cols)} base + {len(engineered_features.columns)} returns = {len(feature_cols)} total")
+print(f"   📚 See feature_engineering_example.py for more TA indicators")
+
+# Setup time series cross-validation
+tscv = TimeSeriesSplit(
+    n_splits=N_SPLITS, 
+    gap=TARGET_BARS, 
+    max_train_size=MAX_TRAIN_SIZE
+)
+
+print(f"✅ Walk-forward CV: {N_SPLITS} splits, {TARGET_BARS}-bar embargo")
+for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(df_all)):
+    print(f"   Fold {fold_idx+1}: Train={len(train_idx):,}, Test={len(test_idx):,}")
+
+# =============================================================================
+# STEP 4: Grid Search with Walk-Forward Cross-Validation
+# =============================================================================
+print("\n[4/6] Running grid search...")
+
+results = []
+evaluator = PerformanceEvaluator()
+config_num = 0
+
+for lr in LEARNING_RATES:
+    for depth in MAX_DEPTHS:
+        for leaves in NUM_LEAVES:
+            
+            # Train once with max trees, evaluate at checkpoints
+            fold_models = []
+            for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(df_all)):
+                X_train = df_all.iloc[train_idx][feature_cols]
+                y_train = df_all.iloc[train_idx]['target']
+                
+                lgb = LGBMRegressor(
+                    n_estimators=N_ESTIMATORS_MAX,
+                    learning_rate=lr,
+                    max_depth=depth,
+                    num_leaves=leaves,
+                    random_state=42,
+                    verbose=-1
+                )
+                lgb.fit(X_train, y_train)
+                fold_models.append((lgb, test_idx))
+            
+            # Evaluate at tree count checkpoints
+            for n_est in N_ESTIMATORS_CHECKPOINTS:
+                config_num += 1
+                df_all['pred'] = np.nan
+                
+                # Generate predictions using first n_est trees
+                for lgb, test_idx in fold_models:
+                    X_test = df_all.iloc[test_idx][feature_cols]
+                    preds = lgb.predict(X_test, num_iteration=n_est)
+                    df_all.iloc[test_idx, df_all.columns.get_loc('pred')] = preds
+                
+                # Evaluate
+                valid_mask = ~df_all['pred'].isna()
+                metrics = evaluator.evaluate(
+                    y_true=df_all.loc[valid_mask, 'target'],
+                    y_pred=df_all.loc[valid_mask, 'pred']
+                )
+                
+                # Store results
+                results.append({
+                    'config_num': config_num,
+                    'n_estimators': n_est,
+                    'learning_rate': lr,
+                    'max_depth': depth,
+                    'num_leaves': leaves,
+                    'predictions': df_all['pred'].copy(),
+                    **metrics
+                })
+                
+                print(f"   [{config_num:2d}] n={n_est:4d}, lr={lr:.2f}, d={depth}, l={leaves:2d} -> "
+                      f"{metrics['num_passed']}/7 ({metrics['score']:.1%} - {metrics['grade']})")
+
+# Analyze results
+results_df = pd.DataFrame([{k: v for k, v in r.items() if k != 'predictions'} for r in results])
+results_df = results_df.sort_values(['num_passed', 'score'], ascending=[False, False])
+
+print(f"\n✅ Tested {len(results)} configurations")
+print(f"\n   Top 5 models:")
+top5_cols = ['config_num', 'n_estimators', 'learning_rate', 'max_depth', 'num_leaves', 'num_passed', 'score']
+print(results_df[top5_cols].head().to_string(index=False))
+
+# Select best model
+best_result = results[results_df.iloc[0]['config_num'] - 1]
+best_params = {k: best_result[k] for k in ['n_estimators', 'learning_rate', 'max_depth', 'num_leaves']}
+
+print(f"\nBest: Config #{best_result['config_num']}")
+print(f"   {best_result['num_passed']}/7 points ({best_result['score']:.1%}) | "
+      f"n={best_params['n_estimators']}, lr={best_params['learning_rate']}, d={best_params['max_depth']}, l={best_params['num_leaves']}")
+
+# =============================================================================
+# STEP 5: Evaluate Best Model
+# =============================================================================
+print("\n[5/6] Detailed evaluation...")
+print("="*80)
+evaluator.print_report(best_result, detailed=False)
+print("="*80)
+
+# Save reproducibility artifacts + diagnostic plot
+run_timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
+run_dir = os.path.join(os.path.dirname(__file__), "runs", run_timestamp)
+artifacts = save_run_artifacts(
+    df_eval=df_all,
+    best_result=best_result,
+    best_params=best_params,
+    run_dir=run_dir,
+    feature_cols=feature_cols,
+)
+
+# =============================================================================
+# STEP 6: Train Production Model
+# =============================================================================
+print("\n[6/6] Training production model...")
+
+final_model = LGBMRegressor(
+    n_estimators=best_params['n_estimators'],
+    learning_rate=best_params['learning_rate'],
+    max_depth=best_params['max_depth'],
+    num_leaves=best_params['num_leaves'],
+    random_state=42,
+    verbose=-1
+)
+final_model.fit(df_all[feature_cols], df_all['target'])
+print(f"✅ Final model trained on {len(df_all):,} samples")
+
+def predict(nonce: int = None) -> float:
+    """
+    Predict ETH/USD price 24 hours into the future.
+    
+    Args:
+        nonce: Block nonce from Allora SDK (unused)
+    
+    Returns:
+        float: Predicted BTC price in USD
+    """
+    # Get live features from workflow
+    live_row = workflow.get_live_features(ticker=TICKERS[0])
+    
+    if live_row is None or len(live_row) == 0:
+        raise ValueError("Could not get live features")
+    
+    # Engineer return features from live data (same as training)
+    live_returns = engineer_returns(live_row.iloc[0])
+    
+    # Combine base features + engineered returns
+    live_features = pd.concat([live_row[base_feature_cols].iloc[0], live_returns])
+    
+    # Get current price from live feature context (remote-only path)
+    current_price = float(live_row.attrs.get("current_price", np.nan))
+    if not np.isfinite(current_price) or current_price <= 0:
+        # Fallback to live snapshot (still remote API; no local parquet)
+        snap = workflow._dm.get_live_snapshot(TICKERS)
+        if snap is not None and len(snap) > 0 and "close" in snap.columns:
+            current_price = float(snap["close"].iloc[-1])
+
+    if not np.isfinite(current_price) or current_price <= 0:
+        raise ValueError(f"Invalid current price for inference: {current_price}")
+    
+    # Predict log return
+    predicted_log_return = final_model.predict(live_features[feature_cols].values.reshape(1, -1))[0]
+    
+    # Convert log return to price
+    # Log-return topic: return the log return directly
+    
+    print(f"\nLive Prediction: {predicted_log_return:+.6f} ({predicted_log_return:+.4f} log return)")
+    
+    return float(predicted_log_return)
+
+# Test and save
+print("\n🧪 Testing prediction...")
+test_prediction = predict()
+
+with open("predict_63.pkl", "wb") as f:
+    cloudpickle.dump(predict, f)
+
+print("\n" + "="*80)
+print("COMPLETE!")
+print("="*80)
+print(f"{len(feature_cols)} features | {best_result['num_passed']}/7 points ({best_result['score']:.1%})")
+print("Saved to predict_63.pkl")
+print(f"Run artifacts: {artifacts['run_dir']}")
+print(f"- Predictions: {artifacts['predictions_csv']}")
+print(f"- Scatter plot: {artifacts['scatter_png']}")
+print("="*80)
+print("\nDeploy: python deploy_worker.py")
+
diff --git a/notebooks/testnet/topic_71_near_8h_logreturn/example.py b/notebooks/testnet/topic_71_near_8h_logreturn/example.py
new file mode 100644
index 0000000..c86ba63
--- /dev/null
+++ b/notebooks/testnet/topic_71_near_8h_logreturn/example.py
@@ -0,0 +1,427 @@
+#!/usr/bin/env python3
+"""
+================================================================================
+Allora Forge Builder Kit v3.0 - Topic 71 NEAR/USD Log-Return Prediction Walkthrough
+================================================================================
+
+This walkthrough demonstrates 8-hour NEAR/USD price prediction using the 
+Allora ML Workflow Kit with base features and LightGBM.
+
+Data is sourced from the Atlas data service (Tiingo 1-min candles).
+
+================================================================================
+"""
+
+import numpy as np
+import pandas as pd
+import os
+import json
+from datetime import datetime, timedelta, timezone
+from sklearn.model_selection import TimeSeriesSplit
+from lightgbm import LGBMRegressor
+import matplotlib.pyplot as plt
+import cloudpickle
+from allora_forge_builder_kit import AlloraMLWorkflow, PerformanceEvaluator
+
+# =============================================================================
+# EXPERIMENT CONFIGURATION
+# =============================================================================
+
+# Data Configuration
+TICKERS = ["nearusd"]
+DAYS_OF_HISTORY = 500
+INTERVAL = "5m"
+
+# Feature Configuration
+NUMBER_OF_INPUT_BARS = 288  # Number of hourly bars for input features
+TARGET_BARS = 96           # Predict 24 bars (hours) ahead
+
+# Cross-Validation Configuration
+N_SPLITS = 3               # Number of CV folds
+MAX_TRAIN_SIZE = 100_000_000  # Maximum training samples per fold
+
+# Model Configuration
+N_ESTIMATORS_MAX = 500    # Train with max trees, evaluate at checkpoints
+N_ESTIMATORS_CHECKPOINTS = [100, 300, 500]
+LEARNING_RATES = [0.01, 0.05, 0.1]
+MAX_DEPTHS = [3, 5, 7]
+NUM_LEAVES = [15, 31, 63]
+
+# =============================================================================
+# SCRIPT START
+# =============================================================================
+
+print("="*80)
+print("Allora Forge Builder Kit v3.0 - Topic 71 Walkthrough")
+print("="*80)
+
+
+def _to_serializable(obj):
+    """Convert numpy/pandas objects into JSON-serializable Python types."""
+    if isinstance(obj, (np.floating, np.integer)):
+        return obj.item()
+    if isinstance(obj, (np.bool_,)):
+        return bool(obj)
+    if isinstance(obj, np.ndarray):
+        return obj.tolist()
+    if isinstance(obj, (pd.Timestamp, datetime)):
+        return obj.isoformat()
+    if isinstance(obj, dict):
+        return {k: _to_serializable(v) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [_to_serializable(v) for v in obj]
+    return obj
+
+
+def save_run_artifacts(df_eval, best_result, best_params, run_dir, feature_cols):
+    """Persist config/metrics/predictions and basic diagnostic plots for reproducibility."""
+    os.makedirs(run_dir, exist_ok=True)
+
+    # 1) Run config
+    config = {
+        "tickers": TICKERS,
+        "days_of_history": DAYS_OF_HISTORY,
+        "interval": INTERVAL,
+        "number_of_input_bars": NUMBER_OF_INPUT_BARS,
+        "target_bars": TARGET_BARS,
+        "n_splits": N_SPLITS,
+        "max_train_size": MAX_TRAIN_SIZE,
+        "n_estimators_checkpoints": N_ESTIMATORS_CHECKPOINTS,
+        "learning_rates": LEARNING_RATES,
+        "max_depths": MAX_DEPTHS,
+        "num_leaves": NUM_LEAVES,
+        "best_params": best_params,
+        "feature_count": len(feature_cols),
+    }
+    with open(os.path.join(run_dir, "config.json"), "w") as f:
+        json.dump(_to_serializable(config), f, indent=2)
+
+    # 2) Metrics
+    metrics_payload = {
+        "score": best_result["score"],
+        "grade": best_result["grade"],
+        "num_passed": best_result["num_passed"],
+        "num_primary_metrics": best_result.get("num_primary_metrics"),
+        "thresholds": best_result.get("thresholds", {}),
+        "passed": best_result.get("passed", {}),
+        "metrics": best_result.get("metrics", {}),
+    }
+    with open(os.path.join(run_dir, "metrics.json"), "w") as f:
+        json.dump(_to_serializable(metrics_payload), f, indent=2)
+
+    # 3) Predictions table
+    export_df = df_eval.copy()
+    if "predictions" in best_result:
+        export_df["pred"] = best_result["predictions"].values
+
+    export_cols = ["open_time", "target", "pred"]
+    export_cols = [c for c in export_cols if c in export_df.columns]
+    preds_df = export_df[export_cols].dropna(subset=["pred"]).copy()
+    preds_csv_path = os.path.join(run_dir, "predictions.csv")
+    preds_df.to_csv(preds_csv_path, index=False)
+
+    # 4) Scatter plot: pred vs target
+    plt.figure(figsize=(8, 8))
+    plt.scatter(preds_df["target"], preds_df["pred"], s=8, alpha=0.35)
+    lim_min = float(min(preds_df["target"].min(), preds_df["pred"].min()))
+    lim_max = float(max(preds_df["target"].max(), preds_df["pred"].max()))
+    plt.plot([lim_min, lim_max], [lim_min, lim_max], linestyle="--", linewidth=1)
+    plt.xlabel("Target (log return)")
+    plt.ylabel("Prediction (log return)")
+    plt.title("Predictions vs Target")
+    plt.tight_layout()
+    scatter_path = os.path.join(run_dir, "scatter_pred_vs_target.png")
+    plt.savefig(scatter_path, dpi=150)
+    plt.close()
+
+    # 5) Human-readable report
+    with open(os.path.join(run_dir, "report.txt"), "w") as f:
+        f.write("Allora Topic 71 Run Report\n")
+        f.write("=" * 40 + "\n")
+        f.write(f"Score: {best_result['score']:.1%} ({best_result['num_passed']}/7)\n")
+        f.write(f"Grade: {best_result['grade']}\n")
+        f.write(f"Best params: {best_params}\n\n")
+        f.write("Primary metric pass/fail:\n")
+        for metric_name, did_pass in best_result.get("passed", {}).items():
+            f.write(f"- {metric_name}: {'PASS' if did_pass else 'FAIL'}\n")
+
+    return {
+        "run_dir": run_dir,
+        "predictions_csv": preds_csv_path,
+        "scatter_png": scatter_path,
+    }
+
+# =============================================================================
+# STEP 1: Initialize Workflow
+# =============================================================================
+print("\n[1/6] Initializing workflow...")
+
+# Resolve Allora API key (env var → file → prompt).
+# Get a free key at https://developer.allora.network
+# Alternatively, set data_source="binance" below to skip the API key entirely.
+from allora_forge_builder_kit.utils import get_api_key
+api_key = get_api_key(api_key_file=os.path.join(os.path.dirname(__file__), "..", "..", ".allora_api_key"))
+
+workflow = AlloraMLWorkflow(
+    tickers=TICKERS,
+    number_of_input_bars=NUMBER_OF_INPUT_BARS,
+    target_bars=TARGET_BARS,
+    interval=INTERVAL,
+    data_source="allora",
+    api_key=api_key
+)
+
+print(f"✅ Workflow initialized")
+print(f"   Assets: {TICKERS} | Interval: {INTERVAL}")
+print(f"   Input: {NUMBER_OF_INPUT_BARS} bars → Features: {NUMBER_OF_INPUT_BARS*5}")
+print(f"   Target: {TARGET_BARS} bars ahead")
+
+# =============================================================================
+# STEP 2: Backfill Historical Data
+# =============================================================================
+print(f"\n[2/6] Backfilling {DAYS_OF_HISTORY} days of historical data...")
+
+start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)
+try:
+    workflow.backfill(start=start_date)
+    print("✅ Backfill complete")
+except Exception as e:
+    print(f"⚠️ Backfill failed: {e}")
+    print("   Will attempt to use locally cached parquet data...")
+
+# =============================================================================
+# STEP 3: Extract Features & Engineer New Features
+# =============================================================================
+print("\n[3/6] Extracting and engineering features...")
+
+try:
+    df_all = workflow.get_full_feature_target_dataframe(start_date=start_date).reset_index()
+except Exception as e:
+    raise RuntimeError(
+        f"No data available: {e}\n\n"
+        "This usually means the backfill failed (bad/missing API key) and there is "
+        "no locally cached parquet data.\n\n"
+        "Fix options:\n"
+        "  1. Set a valid ALLORA_API_KEY (free at https://developer.allora.network)\n"
+        "  2. Use data_source='binance' in AlloraMLWorkflow() to skip the API key\n"
+    ) from e
+
+# Feature Engineering: Add log returns to base features
+# For detailed TA indicators and visualizations, see: feature_engineering_example.py
+
+def engineer_returns(row):
+    """Add log return features over multiple horizons (no data leakage - same row only)"""
+    # NOTE: Base features are already normalized (z-scored) by the workflow
+    closes = np.array([row[f'feature_close_{i}'] for i in range(NUMBER_OF_INPUT_BARS)])
+    
+    # Log returns over different time horizons
+    returns = {}
+    returns['log_return_1h'] = np.log(closes[-1] + 1e-8) - np.log(closes[-2] + 1e-8) if NUMBER_OF_INPUT_BARS >= 2 else 0
+    returns['log_return_6h'] = np.log(closes[-1] + 1e-8) - np.log(closes[-7] + 1e-8) if NUMBER_OF_INPUT_BARS >= 7 else 0
+    returns['log_return_12h'] = np.log(closes[-1] + 1e-8) - np.log(closes[-13] + 1e-8) if NUMBER_OF_INPUT_BARS >= 13 else 0
+    returns['log_return_24h'] = np.log(closes[-1] + 1e-8) - np.log(closes[-25] + 1e-8) if NUMBER_OF_INPUT_BARS >= 25 else 0
+    
+    return pd.Series(returns)
+
+# Get base features
+base_feature_cols = [col for col in df_all.columns if col.startswith('feature_')]
+
+# Apply feature engineering
+print("   Engineering log return features...")
+engineered_features = df_all.apply(engineer_returns, axis=1)
+df_all = pd.concat([df_all, engineered_features], axis=1)
+
+# Use base features + engineered returns
+feature_cols = base_feature_cols + list(engineered_features.columns)
+df_all = df_all.dropna(subset=feature_cols + ['target'])
+
+print(f"✅ Dataset: {len(df_all):,} samples ({df_all['open_time'].min().date()} to {df_all['open_time'].max().date()})")
+print(f"   Features: {len(base_feature_cols)} base + {len(engineered_features.columns)} returns = {len(feature_cols)} total")
+print(f"   📚 See feature_engineering_example.py for more TA indicators")
+
+# Setup time series cross-validation
+tscv = TimeSeriesSplit(
+    n_splits=N_SPLITS, 
+    gap=TARGET_BARS, 
+    max_train_size=MAX_TRAIN_SIZE
+)
+
+print(f"✅ Walk-forward CV: {N_SPLITS} splits, {TARGET_BARS}-bar embargo")
+for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(df_all)):
+    print(f"   Fold {fold_idx+1}: Train={len(train_idx):,}, Test={len(test_idx):,}")
+
+# =============================================================================
+# STEP 4: Grid Search with Walk-Forward Cross-Validation
+# =============================================================================
+print("\n[4/6] Running grid search...")
+
+results = []
+evaluator = PerformanceEvaluator()
+config_num = 0
+
+for lr in LEARNING_RATES:
+    for depth in MAX_DEPTHS:
+        for leaves in NUM_LEAVES:
+            
+            # Train once with max trees, evaluate at checkpoints
+            fold_models = []
+            for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(df_all)):
+                X_train = df_all.iloc[train_idx][feature_cols]
+                y_train = df_all.iloc[train_idx]['target']
+                
+                lgb = LGBMRegressor(
+                    n_estimators=N_ESTIMATORS_MAX,
+                    learning_rate=lr,
+                    max_depth=depth,
+                    num_leaves=leaves,
+                    random_state=42,
+                    verbose=-1
+                )
+                lgb.fit(X_train, y_train)
+                fold_models.append((lgb, test_idx))
+            
+            # Evaluate at tree count checkpoints
+            for n_est in N_ESTIMATORS_CHECKPOINTS:
+                config_num += 1
+                df_all['pred'] = np.nan
+                
+                # Generate predictions using first n_est trees
+                for lgb, test_idx in fold_models:
+                    X_test = df_all.iloc[test_idx][feature_cols]
+                    preds = lgb.predict(X_test, num_iteration=n_est)
+                    df_all.iloc[test_idx, df_all.columns.get_loc('pred')] = preds
+                
+                # Evaluate
+                valid_mask = ~df_all['pred'].isna()
+                metrics = evaluator.evaluate(
+                    y_true=df_all.loc[valid_mask, 'target'],
+                    y_pred=df_all.loc[valid_mask, 'pred']
+                )
+                
+                # Store results
+                results.append({
+                    'config_num': config_num,
+                    'n_estimators': n_est,
+                    'learning_rate': lr,
+                    'max_depth': depth,
+                    'num_leaves': leaves,
+                    'predictions': df_all['pred'].copy(),
+                    **metrics
+                })
+                
+                print(f"   [{config_num:2d}] n={n_est:4d}, lr={lr:.2f}, d={depth}, l={leaves:2d} -> "
+                      f"{metrics['num_passed']}/7 ({metrics['score']:.1%} - {metrics['grade']})")
+
+# Analyze results
+results_df = pd.DataFrame([{k: v for k, v in r.items() if k != 'predictions'} for r in results])
+results_df = results_df.sort_values(['num_passed', 'score'], ascending=[False, False])
+
+print(f"\n✅ Tested {len(results)} configurations")
+print(f"\n   Top 5 models:")
+top5_cols = ['config_num', 'n_estimators', 'learning_rate', 'max_depth', 'num_leaves', 'num_passed', 'score']
+print(results_df[top5_cols].head().to_string(index=False))
+
+# Select best model
+best_result = results[results_df.iloc[0]['config_num'] - 1]
+best_params = {k: best_result[k] for k in ['n_estimators', 'learning_rate', 'max_depth', 'num_leaves']}
+
+print(f"\nBest: Config #{best_result['config_num']}")
+print(f"   {best_result['num_passed']}/7 points ({best_result['score']:.1%}) | "
+      f"n={best_params['n_estimators']}, lr={best_params['learning_rate']}, d={best_params['max_depth']}, l={best_params['num_leaves']}")
+
+# =============================================================================
+# STEP 5: Evaluate Best Model
+# =============================================================================
+print("\n[5/6] Detailed evaluation...")
+print("="*80)
+evaluator.print_report(best_result, detailed=False)
+print("="*80)
+
+# Save reproducibility artifacts + diagnostic plot
+run_timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
+run_dir = os.path.join(os.path.dirname(__file__), "runs", run_timestamp)
+artifacts = save_run_artifacts(
+    df_eval=df_all,
+    best_result=best_result,
+    best_params=best_params,
+    run_dir=run_dir,
+    feature_cols=feature_cols,
+)
+
+# =============================================================================
+# STEP 6: Train Production Model
+# =============================================================================
+print("\n[6/6] Training production model...")
+
+final_model = LGBMRegressor(
+    n_estimators=best_params['n_estimators'],
+    learning_rate=best_params['learning_rate'],
+    max_depth=best_params['max_depth'],
+    num_leaves=best_params['num_leaves'],
+    random_state=42,
+    verbose=-1
+)
+final_model.fit(df_all[feature_cols], df_all['target'])
+print(f"✅ Final model trained on {len(df_all):,} samples")
+
+def predict(nonce: int = None) -> float:
+    """
+    Predict NEAR/USD price 8 hours into the future.
+    
+    Args:
+        nonce: Block nonce from Allora SDK (unused)
+    
+    Returns:
+        float: Predicted BTC price in USD
+    """
+    # Get live features from workflow
+    live_row = workflow.get_live_features(ticker=TICKERS[0])
+    
+    if live_row is None or len(live_row) == 0:
+        raise ValueError("Could not get live features")
+    
+    # Engineer return features from live data (same as training)
+    live_returns = engineer_returns(live_row.iloc[0])
+    
+    # Combine base features + engineered returns
+    live_features = pd.concat([live_row[base_feature_cols].iloc[0], live_returns])
+    
+    # Get current price from live feature context (remote-only path)
+    current_price = float(live_row.attrs.get("current_price", np.nan))
+    if not np.isfinite(current_price) or current_price <= 0:
+        # Fallback to live snapshot (still remote API; no local parquet)
+        snap = workflow._dm.get_live_snapshot(TICKERS)
+        if snap is not None and len(snap) > 0 and "close" in snap.columns:
+            current_price = float(snap["close"].iloc[-1])
+
+    if not np.isfinite(current_price) or current_price <= 0:
+        raise ValueError(f"Invalid current price for inference: {current_price}")
+    
+    # Predict log return
+    predicted_log_return = final_model.predict(live_features[feature_cols].values.reshape(1, -1))[0]
+    
+    # Convert log return to price
+    # Log-return topic: return the log return directly
+    
+    print(f"\nLive Prediction: {predicted_log_return:+.6f} ({predicted_log_return:+.4f} log return)")
+    
+    return float(predicted_log_return)
+
+# Test and save
+print("\n🧪 Testing prediction...")
+test_prediction = predict()
+
+with open("predict_71.pkl", "wb") as f:
+    cloudpickle.dump(predict, f)
+
+print("\n" + "="*80)
+print("COMPLETE!")
+print("="*80)
+print(f"{len(feature_cols)} features | {best_result['num_passed']}/7 points ({best_result['score']:.1%})")
+print("Saved to predict_71.pkl")
+print(f"Run artifacts: {artifacts['run_dir']}")
+print(f"- Predictions: {artifacts['predictions_csv']}")
+print(f"- Scatter plot: {artifacts['scatter_png']}")
+print("="*80)
+print("\nDeploy: python deploy_worker.py")
+
diff --git a/notebooks/testnet/topic_79_btc_vol/example_topic_79_btc_volatility_walkthrough.py b/notebooks/testnet/topic_79_btc_vol/example_topic_79_btc_volatility_walkthrough.py
new file mode 100644
index 0000000..0323059
--- /dev/null
+++ b/notebooks/testnet/topic_79_btc_vol/example_topic_79_btc_volatility_walkthrough.py
@@ -0,0 +1,547 @@
+#!/usr/bin/env python3
+"""
+================================================================================
+Allora Forge Builder Kit v3.0 - Topic 79 BTC/USD 15-Minute Volatility Prediction
+================================================================================
+
+This walkthrough demonstrates 15-minute realised volatility prediction for
+BTC/USD using the Allora ML Workflow Kit with base features and LightGBM.
+
+Target definition:
+    The standard deviation of consecutive 1-minute log returns over the next
+    15 minutes.  Formally, for each timestamp t:
+
+        r_i = log(close[t+i] / close[t+i-1])   for i in 1..15
+        target[t] = std(r_1, r_2, ..., r_15)
+
+    This matches the ground-truth definition used by the Allora volatility
+    reputer (allora-reputer-volatility-prediction).
+
+Data is sourced from the Atlas data service (Tiingo 1-min candles).
+
+================================================================================
+"""
+
+import numpy as np
+import pandas as pd
+import os
+import json
+from datetime import datetime, timedelta, timezone
+from sklearn.model_selection import TimeSeriesSplit
+from lightgbm import LGBMRegressor
+from scipy.stats import pearsonr, spearmanr
+import matplotlib.pyplot as plt
+import cloudpickle
+from allora_forge_builder_kit import AlloraMLWorkflow
+
+# =============================================================================
+# EXPERIMENT CONFIGURATION
+# =============================================================================
+
+# Data Configuration
+TICKERS = ["btcusd"]
+DAYS_OF_HISTORY = 60
+INTERVAL = "1m"  # 1-minute base interval for volatility
+
+# Feature Configuration
+NUMBER_OF_INPUT_BARS = 15  # 15 minutes of 1-minute bars for input features
+TARGET_BARS = 15           # 15-minute volatility horizon
+
+# Target type: volatility (std of 1-min log returns over the horizon)
+TARGET_TYPE = "volatility"
+
+# Cross-Validation Configuration
+N_SPLITS = 3               # Number of CV folds
+MAX_TRAIN_SIZE = 100_000_000  # Maximum training samples per fold
+
+# Model Configuration
+N_ESTIMATORS_MAX = 500    # Train with max trees, evaluate at checkpoints
+N_ESTIMATORS_CHECKPOINTS = [100, 300, 500]
+LEARNING_RATES = [0.01, 0.05, 0.1]
+MAX_DEPTHS = [3, 5, 7]
+NUM_LEAVES = [15, 31, 63]
+
+# =============================================================================
+# SCRIPT START
+# =============================================================================
+
+print("=" * 80)
+print("Allora Forge Builder Kit v3.0 - Topic 79 Walkthrough")
+print("BTC/USD 15-Minute Volatility Prediction")
+print("=" * 80)
+
+
+def _to_serializable(obj):
+    """Convert numpy/pandas objects into JSON-serializable Python types."""
+    if isinstance(obj, (np.floating, np.integer)):
+        return obj.item()
+    if isinstance(obj, (np.bool_,)):
+        return bool(obj)
+    if isinstance(obj, np.ndarray):
+        return obj.tolist()
+    if isinstance(obj, (pd.Timestamp, datetime)):
+        return obj.isoformat()
+    if isinstance(obj, dict):
+        return {k: _to_serializable(v) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [_to_serializable(v) for v in obj]
+    return obj
+
+
+# =============================================================================
+# VOLATILITY-SPECIFIC METRICS
+# =============================================================================
+def vol_metrics(y_true, y_pred):
+    """
+    Compute volatility-specific evaluation metrics.
+
+    These replace the standard log-return metrics (DA, CZAR) which are not
+    meaningful for volatility prediction.
+    """
+    y_true = np.asarray(y_true)
+    y_pred = np.asarray(y_pred)
+    r, _ = pearsonr(y_true, y_pred)
+    rho, _ = spearmanr(y_true, y_pred)
+    mse = np.mean((y_true - y_pred) ** 2)
+    rmse = np.sqrt(mse)
+    mae = np.mean(np.abs(y_true - y_pred))
+    ss_res = np.sum((y_true - y_pred) ** 2)
+    ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
+    r2 = 1 - ss_res / ss_tot
+    rel_mae = mae / np.mean(y_true)
+    # QLIKE: quasi-likelihood loss (standard for volatility forecasting)
+    mask = y_pred > 0
+    if mask.sum() > 0:
+        ratio = y_true[mask] / y_pred[mask]
+        qlike = np.mean(ratio - np.log(ratio) - 1)
+    else:
+        qlike = float("inf")
+    return {
+        "pearson_r": r,
+        "spearman_rho": rho,
+        "r2": r2,
+        "rmse": rmse,
+        "mae": mae,
+        "rel_mae": rel_mae,
+        "qlike": qlike,
+    }
+
+
+def print_vol_metrics(metrics, label=""):
+    """Pretty-print volatility metrics."""
+    print(f"\n  {'─' * 50}")
+    if label:
+        print(f"  {label}")
+        print(f"  {'─' * 50}")
+    print(f"  Pearson r:   {metrics['pearson_r']:.4f}")
+    print(f"  Spearman ρ:  {metrics['spearman_rho']:.4f}")
+    print(f"  R²:          {metrics['r2']:.4f}")
+    print(f"  RMSE:        {metrics['rmse']:.6f}")
+    print(f"  MAE:         {metrics['mae']:.6f}")
+    print(f"  Rel MAE:     {metrics['rel_mae']*100:.2f}%")
+    print(f"  QLIKE:       {metrics['qlike']:.6f}")
+    print(f"  {'─' * 50}")
+
+
+def save_run_artifacts(df_eval, best_result, best_params, run_dir, feature_cols):
+    """Persist config/metrics/predictions and basic diagnostic plots."""
+    os.makedirs(run_dir, exist_ok=True)
+
+    # 1) Run config
+    config = {
+        "topic_id": 79,
+        "target_type": TARGET_TYPE,
+        "tickers": TICKERS,
+        "days_of_history": DAYS_OF_HISTORY,
+        "interval": INTERVAL,
+        "number_of_input_bars": NUMBER_OF_INPUT_BARS,
+        "target_bars": TARGET_BARS,
+        "n_splits": N_SPLITS,
+        "max_train_size": MAX_TRAIN_SIZE,
+        "n_estimators_checkpoints": N_ESTIMATORS_CHECKPOINTS,
+        "learning_rates": LEARNING_RATES,
+        "max_depths": MAX_DEPTHS,
+        "num_leaves": NUM_LEAVES,
+        "best_params": best_params,
+        "feature_count": len(feature_cols),
+    }
+    with open(os.path.join(run_dir, "config.json"), "w") as f:
+        json.dump(_to_serializable(config), f, indent=2)
+
+    # 2) Metrics
+    metrics_payload = {k: v for k, v in best_result.items() if k != "predictions"}
+    with open(os.path.join(run_dir, "metrics.json"), "w") as f:
+        json.dump(_to_serializable(metrics_payload), f, indent=2)
+
+    # 3) Predictions table
+    export_df = df_eval.copy()
+    if "predictions" in best_result:
+        export_df["pred"] = best_result["predictions"].values
+
+    export_cols = ["open_time", "target", "pred"]
+    export_cols = [c for c in export_cols if c in export_df.columns]
+    preds_df = export_df[export_cols].dropna(subset=["pred"]).copy()
+    preds_csv_path = os.path.join(run_dir, "predictions.csv")
+    preds_df.to_csv(preds_csv_path, index=False)
+
+    # 4) Scatter plot: pred vs target
+    plt.figure(figsize=(8, 8))
+    plt.scatter(preds_df["target"], preds_df["pred"], s=8, alpha=0.35)
+    lim_min = float(min(preds_df["target"].min(), preds_df["pred"].min()))
+    lim_max = float(max(preds_df["target"].max(), preds_df["pred"].max()))
+    plt.plot([lim_min, lim_max], [lim_min, lim_max], linestyle="--", linewidth=1)
+    plt.xlabel("Target (realised volatility)")
+    plt.ylabel("Prediction (realised volatility)")
+    plt.title("Predictions vs Target — 15-min BTC Volatility")
+    plt.tight_layout()
+    scatter_path = os.path.join(run_dir, "scatter_pred_vs_target.png")
+    plt.savefig(scatter_path, dpi=150)
+    plt.close()
+
+    # 5) Human-readable report
+    with open(os.path.join(run_dir, "report.txt"), "w") as f:
+        f.write("Allora Topic 79 Run Report\n")
+        f.write("BTC/USD 15-Minute Volatility Prediction\n")
+        f.write("=" * 40 + "\n")
+        f.write(f"Best params: {best_params}\n\n")
+        f.write("Volatility Metrics:\n")
+        for key in ["pearson_r", "spearman_rho", "r2", "rmse", "mae", "rel_mae", "qlike"]:
+            if key in best_result:
+                f.write(f"  {key}: {best_result[key]:.6f}\n")
+
+    return {
+        "run_dir": run_dir,
+        "predictions_csv": preds_csv_path,
+        "scatter_png": scatter_path,
+    }
+
+
+# =============================================================================
+# STEP 1: Initialize Workflow
+# =============================================================================
+print("\n[1/6] Initializing workflow...")
+
+# Resolve Allora API key (env var → file → prompt).
+# Get a free key at https://developer.allora.network
+# Alternatively, set data_source="binance" below to skip the API key entirely.
+from allora_forge_builder_kit.utils import get_api_key
+
+api_key = get_api_key(
+    api_key_file=os.path.join(os.path.dirname(__file__), "..", ".allora_api_key")
+)
+
+workflow = AlloraMLWorkflow(
+    tickers=TICKERS,
+    number_of_input_bars=NUMBER_OF_INPUT_BARS,
+    target_bars=TARGET_BARS,
+    interval=INTERVAL,
+    target_type=TARGET_TYPE,
+    data_source="allora",
+    api_key=api_key,
+)
+
+print("✅ Workflow initialized")
+print(f"   Assets: {TICKERS} | Interval: {INTERVAL}")
+print(f"   Input: {NUMBER_OF_INPUT_BARS} bars → Features: {NUMBER_OF_INPUT_BARS * 5}")
+print(f"   Target: {TARGET_TYPE} over {TARGET_BARS}-minute horizon")
+
+# =============================================================================
+# STEP 2: Backfill Historical Data
+# =============================================================================
+print(f"\n[2/6] Backfilling {DAYS_OF_HISTORY} days of historical data...")
+
+start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)
+try:
+    workflow.backfill(start=start_date)
+    print("✅ Backfill complete")
+except Exception as e:
+    print(f"⚠️ Backfill failed: {e}")
+    print("   Will attempt to use locally cached parquet data...")
+
+# =============================================================================
+# STEP 3: Extract Features & Engineer New Features
+# =============================================================================
+print("\n[3/6] Extracting and engineering features...")
+
+try:
+    df_all = workflow.get_full_feature_target_dataframe(
+        start_date=start_date
+    ).reset_index()
+except Exception as e:
+    raise RuntimeError(
+        f"No data available: {e}\n\n"
+        "This usually means the backfill failed (bad/missing API key) and there is "
+        "no locally cached parquet data.\n\n"
+        "Fix options:\n"
+        "  1. Set a valid ALLORA_API_KEY (free at https://developer.allora.network)\n"
+        "  2. Use data_source='binance' in AlloraMLWorkflow() to skip the API key\n"
+    ) from e
+
+
+# Feature Engineering: Add volatility-relevant features from the lookback window
+def engineer_vol_features(row):
+    """Engineer volatility-predictive features (no data leakage — same row only)."""
+    closes = np.array(
+        [row[f"feature_close_{i}"] for i in range(NUMBER_OF_INPUT_BARS)]
+    )
+    highs = np.array(
+        [row[f"feature_high_{i}"] for i in range(NUMBER_OF_INPUT_BARS)]
+    )
+    lows = np.array(
+        [row[f"feature_low_{i}"] for i in range(NUMBER_OF_INPUT_BARS)]
+    )
+
+    features = {}
+
+    # Realised volatility of the lookback window (std of 1-min log returns)
+    log_returns = np.diff(np.log(closes + 1e-12))
+    features["hist_vol_full"] = np.std(log_returns, ddof=1) if len(log_returns) > 1 else 0.0
+
+    # Short-term vs long-term vol ratio (regime detection)
+    if len(log_returns) >= 5:
+        features["hist_vol_5m"] = np.std(log_returns[-5:], ddof=1)
+        features["vol_ratio_5_full"] = (
+            features["hist_vol_5m"] / (features["hist_vol_full"] + 1e-12)
+        )
+    else:
+        features["hist_vol_5m"] = features["hist_vol_full"]
+        features["vol_ratio_5_full"] = 1.0
+
+    # High-low range (Parkinson-style proxy)
+    hl_range = highs - lows
+    features["hl_range_mean"] = np.mean(hl_range)
+    features["hl_range_recent"] = np.mean(hl_range[-3:]) if len(hl_range) >= 3 else hl_range[-1]
+    features["hl_range_ratio"] = (
+        features["hl_range_recent"] / (features["hl_range_mean"] + 1e-12)
+    )
+
+    # Absolute return (magnitude of recent move)
+    features["abs_return_1m"] = abs(log_returns[-1]) if len(log_returns) > 0 else 0.0
+    features["abs_return_5m"] = abs(np.log(closes[-1] + 1e-12) - np.log(closes[-5] + 1e-12)) if len(closes) >= 5 else 0.0
+
+    return pd.Series(features)
+
+
+# Get base features
+base_feature_cols = [col for col in df_all.columns if col.startswith("feature_")]
+
+# Apply feature engineering
+print("   Engineering volatility-predictive features...")
+engineered_features = df_all.apply(engineer_vol_features, axis=1)
+df_all = pd.concat([df_all, engineered_features], axis=1)
+
+# Use base features + engineered volatility features
+feature_cols = base_feature_cols + list(engineered_features.columns)
+df_all = df_all.dropna(subset=feature_cols + ["target"])
+
+print(
+    f"✅ Dataset: {len(df_all):,} samples "
+    f"({df_all['open_time'].min().date()} to {df_all['open_time'].max().date()})"
+)
+print(
+    f"   Features: {len(base_feature_cols)} base + "
+    f"{len(engineered_features.columns)} vol = {len(feature_cols)} total"
+)
+
+# Setup time series cross-validation
+tscv = TimeSeriesSplit(
+    n_splits=N_SPLITS,
+    gap=TARGET_BARS,
+    max_train_size=MAX_TRAIN_SIZE,
+)
+
+print(f"✅ Walk-forward CV: {N_SPLITS} splits, {TARGET_BARS}-bar embargo")
+for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(df_all)):
+    print(f"   Fold {fold_idx + 1}: Train={len(train_idx):,}, Test={len(test_idx):,}")
+
+# =============================================================================
+# STEP 4: Grid Search with Walk-Forward Cross-Validation
+# =============================================================================
+print("\n[4/6] Running grid search...")
+
+results = []
+config_num = 0
+
+for lr in LEARNING_RATES:
+    for depth in MAX_DEPTHS:
+        for leaves in NUM_LEAVES:
+
+            # Train once with max trees, evaluate at checkpoints
+            fold_models = []
+            for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(df_all)):
+                X_train = df_all.iloc[train_idx][feature_cols]
+                y_train = df_all.iloc[train_idx]["target"]
+
+                lgb = LGBMRegressor(
+                    n_estimators=N_ESTIMATORS_MAX,
+                    learning_rate=lr,
+                    max_depth=depth,
+                    num_leaves=leaves,
+                    random_state=42,
+                    verbose=-1,
+                )
+                lgb.fit(X_train, y_train)
+                fold_models.append((lgb, test_idx))
+
+            # Evaluate at tree count checkpoints
+            for n_est in N_ESTIMATORS_CHECKPOINTS:
+                config_num += 1
+                df_all["pred"] = np.nan
+
+                # Generate predictions using first n_est trees
+                for lgb, test_idx in fold_models:
+                    X_test = df_all.iloc[test_idx][feature_cols]
+                    preds = lgb.predict(X_test, num_iteration=n_est)
+                    df_all.iloc[test_idx, df_all.columns.get_loc("pred")] = preds
+
+                # Evaluate with volatility-specific metrics
+                valid_mask = ~df_all["pred"].isna()
+                y_true_cv = df_all.loc[valid_mask, "target"].values
+                y_pred_cv = np.maximum(df_all.loc[valid_mask, "pred"].values, 0)
+                metrics = vol_metrics(y_true_cv, y_pred_cv)
+
+                # Store results
+                results.append(
+                    {
+                        "config_num": config_num,
+                        "n_estimators": n_est,
+                        "learning_rate": lr,
+                        "max_depth": depth,
+                        "num_leaves": leaves,
+                        "predictions": df_all["pred"].copy(),
+                        **metrics,
+                    }
+                )
+
+                print(
+                    f"   [{config_num:2d}] n={n_est:4d}, lr={lr:.2f}, "
+                    f"d={depth}, l={leaves:2d} -> "
+                    f"r={metrics['pearson_r']:.4f} R²={metrics['r2']:.4f} "
+                    f"QLIKE={metrics['qlike']:.4f}"
+                )
+
+# Analyze results — rank by R² (primary), then QLIKE (secondary, lower=better)
+results_df = pd.DataFrame(
+    [{k: v for k, v in r.items() if k != "predictions"} for r in results]
+)
+results_df = results_df.sort_values(["r2", "qlike"], ascending=[False, True])
+
+print(f"\n✅ Tested {len(results)} configurations")
+print("\n   Top 5 models:")
+top5_cols = [
+    "config_num",
+    "n_estimators",
+    "learning_rate",
+    "max_depth",
+    "num_leaves",
+    "pearson_r",
+    "r2",
+    "qlike",
+]
+print(results_df[top5_cols].head().to_string(index=False, float_format="%.4f"))
+
+# Select best model
+best_result = results[int(results_df.iloc[0]["config_num"]) - 1]
+best_params = {
+    k: best_result[k]
+    for k in ["n_estimators", "learning_rate", "max_depth", "num_leaves"]
+}
+
+print(f"\nBest: Config #{best_result['config_num']}")
+print(
+    f"   r={best_result['pearson_r']:.4f} R²={best_result['r2']:.4f} "
+    f"QLIKE={best_result['qlike']:.4f} | "
+    f"n={best_params['n_estimators']}, lr={best_params['learning_rate']}, "
+    f"d={best_params['max_depth']}, l={best_params['num_leaves']}"
+)
+
+# =============================================================================
+# STEP 5: Evaluate Best Model
+# =============================================================================
+print("\n[5/6] Detailed evaluation...")
+print_vol_metrics(best_result, "BEST MODEL — Volatility Metrics")
+
+# Save reproducibility artifacts + diagnostic plot
+run_timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
+run_dir = os.path.join(os.path.dirname(__file__), "runs", run_timestamp)
+artifacts = save_run_artifacts(
+    df_eval=df_all,
+    best_result=best_result,
+    best_params=best_params,
+    run_dir=run_dir,
+    feature_cols=feature_cols,
+)
+
+# =============================================================================
+# STEP 6: Train Production Model & Create Predict Function
+# =============================================================================
+print("\n[6/6] Training production model...")
+
+final_model = LGBMRegressor(
+    n_estimators=best_params["n_estimators"],
+    learning_rate=best_params["learning_rate"],
+    max_depth=best_params["max_depth"],
+    num_leaves=best_params["num_leaves"],
+    random_state=42,
+    verbose=-1,
+)
+final_model.fit(df_all[feature_cols], df_all["target"])
+print(f"✅ Final model trained on {len(df_all):,} samples")
+
+
+def predict(nonce: int = None) -> float:
+    """
+    Predict BTC/USD 15-minute realised volatility.
+
+    This is the function submitted to the Allora network for Topic 79.
+    It returns the predicted standard deviation of 1-minute log returns
+    over the next 15 minutes.
+
+    Args:
+        nonce: Block nonce from Allora SDK (unused).
+
+    Returns:
+        float: Predicted 15-minute realised volatility.
+    """
+    # Get live features from workflow (1-minute bars)
+    live_row = workflow.get_live_features(ticker=TICKERS[0])
+
+    if live_row is None or len(live_row) == 0:
+        raise ValueError("Could not get live features")
+
+    # Engineer volatility features from live data (same as training)
+    live_vol_features = engineer_vol_features(live_row.iloc[0])
+
+    # Combine base features + engineered vol features
+    live_features = pd.concat([live_row[base_feature_cols].iloc[0], live_vol_features])
+
+    # Predict volatility directly (no price conversion needed)
+    predicted_volatility = final_model.predict(
+        live_features[feature_cols].values.reshape(1, -1)
+    )[0]
+
+    # Volatility must be non-negative
+    predicted_volatility = max(0.0, float(predicted_volatility))
+
+    print(f"\nLive Prediction: {predicted_volatility:.6f} (15-min realised vol)")
+
+    return predicted_volatility
+
+
+# Test and save
+print("\n🧪 Testing prediction...")
+test_prediction = predict()
+
+with open("predict.pkl", "wb") as f:
+    cloudpickle.dump(predict, f)
+
+print("\n" + "=" * 80)
+print("COMPLETE!")
+print("=" * 80)
+print(
+    f"{len(feature_cols)} features | "
+    f"r={best_result['pearson_r']:.4f} | R²={best_result['r2']:.4f} | "
+    f"QLIKE={best_result['qlike']:.4f}"
+)
+print(f"\nTo deploy this worker:")
+print(f"  TOPIC_ID=79 python notebooks/deploy_worker_raw.py")
diff --git a/notebooks/testnet/topic_79_btc_vol/topic_79_model_a_deep_lookback.py b/notebooks/testnet/topic_79_btc_vol/topic_79_model_a_deep_lookback.py
new file mode 100644
index 0000000..6cd0519
--- /dev/null
+++ b/notebooks/testnet/topic_79_btc_vol/topic_79_model_a_deep_lookback.py
@@ -0,0 +1,258 @@
+#!/usr/bin/env python3
+"""
+Topic 79 — Model A: Deep Lookback + Rich Volatility Features
+=============================================================
+
+Strategy: Use a 60-bar (1-hour) lookback window with extensive volatility-
+predictive features including multi-horizon realised vol, return autocorrelation,
+Parkinson/Garman-Klass estimators, and volume-volatility interaction.
+
+Trained on 2+ years of 1-minute BTC/USD data.
+"""
+
+import numpy as np
+import pandas as pd
+import os
+from datetime import datetime, timedelta, timezone
+from sklearn.model_selection import TimeSeriesSplit
+from lightgbm import LGBMRegressor
+import cloudpickle
+from allora_forge_builder_kit import AlloraMLWorkflow, PerformanceEvaluator
+
+# =============================================================================
+# CONFIGURATION
+# =============================================================================
+TICKERS = ["btcusd"]
+DAYS_OF_HISTORY = 800  # ~2.2 years
+INTERVAL = "1m"
+NUMBER_OF_INPUT_BARS = 60  # 1 hour of 1-min bars
+TARGET_BARS = 15
+TARGET_TYPE = "volatility"
+
+N_SPLITS = 5
+N_ESTIMATORS_MAX = 1000
+N_ESTIMATORS_CHECKPOINTS = [200, 500, 800, 1000]
+LEARNING_RATES = [0.01, 0.03]
+MAX_DEPTHS = [5, 7]
+NUM_LEAVES = [31, 63]
+
+print("=" * 80)
+print("Topic 79 — Model A: Deep Lookback (60-bar, 2+ years)")
+print("=" * 80)
+
+# =============================================================================
+# STEP 1: Initialize & Backfill
+# =============================================================================
+print("\n[1/5] Initializing workflow...")
+from allora_forge_builder_kit.utils import get_api_key
+
+api_key = get_api_key(
+    api_key_file=os.path.join(os.path.dirname(__file__), "..", ".allora_api_key")
+)
+
+workflow = AlloraMLWorkflow(
+    tickers=TICKERS,
+    number_of_input_bars=NUMBER_OF_INPUT_BARS,
+    target_bars=TARGET_BARS,
+    interval=INTERVAL,
+    target_type=TARGET_TYPE,
+    data_source="allora",
+    api_key=api_key,
+)
+print(f"✅ {NUMBER_OF_INPUT_BARS} bars lookback, {TARGET_BARS}-min vol target")
+
+print(f"\n[2/5] Backfilling {DAYS_OF_HISTORY} days...")
+start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)
+workflow.backfill(start=start_date)
+print("✅ Backfill complete")
+
+# =============================================================================
+# STEP 2: Features
+# =============================================================================
+print("\n[3/5] Extracting features...")
+df_all = workflow.get_full_feature_target_dataframe(start_date=start_date).reset_index()
+
+base_feature_cols = [col for col in df_all.columns if col.startswith("feature_")]
+
+
+def engineer_deep_vol_features(row):
+    """Rich volatility features from 60-bar lookback."""
+    n = NUMBER_OF_INPUT_BARS
+    closes = np.array([row[f"feature_close_{i}"] for i in range(n)])
+    highs = np.array([row[f"feature_high_{i}"] for i in range(n)])
+    lows = np.array([row[f"feature_low_{i}"] for i in range(n)])
+    volumes = np.array([row[f"feature_volume_{i}"] for i in range(n)])
+
+    log_rets = np.diff(np.log(closes + 1e-12))
+    features = {}
+
+    # --- Multi-horizon realised volatility ---
+    features["vol_5m"] = np.std(log_rets[-5:], ddof=1) if len(log_rets) >= 5 else 0.0
+    features["vol_10m"] = np.std(log_rets[-10:], ddof=1) if len(log_rets) >= 10 else 0.0
+    features["vol_15m"] = np.std(log_rets[-15:], ddof=1) if len(log_rets) >= 15 else 0.0
+    features["vol_30m"] = np.std(log_rets[-30:], ddof=1) if len(log_rets) >= 30 else 0.0
+    features["vol_60m"] = np.std(log_rets, ddof=1) if len(log_rets) >= 2 else 0.0
+
+    # --- Vol ratios (regime detection) ---
+    features["vol_ratio_5_60"] = features["vol_5m"] / (features["vol_60m"] + 1e-12)
+    features["vol_ratio_15_60"] = features["vol_15m"] / (features["vol_60m"] + 1e-12)
+    features["vol_ratio_5_30"] = features["vol_5m"] / (features["vol_30m"] + 1e-12)
+
+    # --- Return autocorrelation (vol clustering signal) ---
+    if len(log_rets) >= 10:
+        features["ret_autocorr_1"] = np.corrcoef(log_rets[1:], log_rets[:-1])[0, 1]
+        abs_rets = np.abs(log_rets)
+        features["absret_autocorr_1"] = np.corrcoef(abs_rets[1:], abs_rets[:-1])[0, 1]
+    else:
+        features["ret_autocorr_1"] = 0.0
+        features["absret_autocorr_1"] = 0.0
+
+    # Handle NaN from corrcoef
+    for k in ["ret_autocorr_1", "absret_autocorr_1"]:
+        if not np.isfinite(features[k]):
+            features[k] = 0.0
+
+    # --- Parkinson volatility estimator (uses high-low) ---
+    hl_log = np.log(highs + 1e-12) - np.log(lows + 1e-12)
+    features["parkinson_vol_15m"] = np.sqrt(np.mean(hl_log[-15:] ** 2) / (4 * np.log(2)))
+    features["parkinson_vol_60m"] = np.sqrt(np.mean(hl_log ** 2) / (4 * np.log(2)))
+    features["parkinson_ratio"] = features["parkinson_vol_15m"] / (features["parkinson_vol_60m"] + 1e-12)
+
+    # --- High-low range features ---
+    hl_range = highs - lows
+    features["hl_range_mean"] = np.mean(hl_range)
+    features["hl_range_5m"] = np.mean(hl_range[-5:])
+    features["hl_range_ratio"] = features["hl_range_5m"] / (features["hl_range_mean"] + 1e-12)
+    features["hl_range_max"] = np.max(hl_range[-15:])
+
+    # --- Absolute returns (magnitude) ---
+    abs_rets = np.abs(log_rets)
+    features["abs_ret_mean_5m"] = np.mean(abs_rets[-5:])
+    features["abs_ret_mean_15m"] = np.mean(abs_rets[-15:])
+    features["abs_ret_max_15m"] = np.max(abs_rets[-15:])
+    features["abs_ret_mean_60m"] = np.mean(abs_rets)
+
+    # --- Volume-volatility interaction ---
+    features["volume_mean_ratio"] = np.mean(volumes[-5:]) / (np.mean(volumes) + 1e-12)
+    features["volume_spike"] = np.max(volumes[-5:]) / (np.mean(volumes) + 1e-12)
+
+    # Volume-weighted volatility
+    vol_weights = volumes[1:] / (np.sum(volumes[1:]) + 1e-12)
+    features["vol_weighted_absret"] = np.sum(abs_rets * vol_weights)
+
+    # --- Trend strength (directional move vs vol) ---
+    net_return = log_rets[-15:].sum() if len(log_rets) >= 15 else 0.0
+    features["trend_vs_vol"] = abs(net_return) / (features["vol_15m"] + 1e-12)
+
+    # --- Kurtosis (tail risk) ---
+    if len(log_rets) >= 20:
+        mean_r = np.mean(log_rets[-30:])
+        std_r = np.std(log_rets[-30:], ddof=1)
+        if std_r > 1e-12:
+            features["kurtosis_30m"] = np.mean(((log_rets[-30:] - mean_r) / std_r) ** 4)
+        else:
+            features["kurtosis_30m"] = 3.0
+    else:
+        features["kurtosis_30m"] = 3.0
+
+    return pd.Series(features)
+
+
+print("   Engineering deep volatility features...")
+engineered = df_all.apply(engineer_deep_vol_features, axis=1)
+df_all = pd.concat([df_all, engineered], axis=1)
+
+feature_cols = base_feature_cols + list(engineered.columns)
+df_all = df_all.dropna(subset=feature_cols + ["target"])
+
+print(f"✅ Dataset: {len(df_all):,} samples")
+print(f"   Features: {len(base_feature_cols)} base + {len(engineered.columns)} engineered = {len(feature_cols)} total")
+
+# =============================================================================
+# STEP 3: Grid Search
+# =============================================================================
+print("\n[4/5] Grid search...")
+tscv = TimeSeriesSplit(n_splits=N_SPLITS, gap=TARGET_BARS)
+evaluator = PerformanceEvaluator()
+results = []
+config_num = 0
+
+for lr in LEARNING_RATES:
+    for depth in MAX_DEPTHS:
+        for leaves in NUM_LEAVES:
+            fold_models = []
+            for train_idx, test_idx in tscv.split(df_all):
+                lgb = LGBMRegressor(
+                    n_estimators=N_ESTIMATORS_MAX,
+                    learning_rate=lr,
+                    max_depth=depth,
+                    num_leaves=leaves,
+                    subsample=0.8,
+                    colsample_bytree=0.8,
+                    reg_alpha=0.1,
+                    reg_lambda=1.0,
+                    random_state=42,
+                    verbose=-1,
+                )
+                lgb.fit(df_all.iloc[train_idx][feature_cols], df_all.iloc[train_idx]["target"])
+                fold_models.append((lgb, test_idx))
+
+            for n_est in N_ESTIMATORS_CHECKPOINTS:
+                config_num += 1
+                df_all["pred"] = np.nan
+                for lgb, test_idx in fold_models:
+                    preds = lgb.predict(df_all.iloc[test_idx][feature_cols], num_iteration=n_est)
+                    df_all.iloc[test_idx, df_all.columns.get_loc("pred")] = preds
+
+                valid_mask = ~df_all["pred"].isna()
+                metrics = evaluator.evaluate(
+                    y_true=df_all.loc[valid_mask, "target"],
+                    y_pred=df_all.loc[valid_mask, "pred"],
+                )
+                results.append({"config_num": config_num, "n_est": n_est, "lr": lr, "depth": depth, "leaves": leaves, **metrics})
+                print(f"   [{config_num:2d}] n={n_est:4d} lr={lr:.2f} d={depth} l={leaves:2d} → {metrics['score']:.1%} ({metrics['grade']})")
+
+results_df = pd.DataFrame(results).sort_values(["num_passed", "score"], ascending=[False, False])
+best = results_df.iloc[0]
+print(f"\n✅ Best: {best['num_passed']}/7 ({best['score']:.1%}) — n={int(best['n_est'])}, lr={best['lr']}, d={int(best['depth'])}, l={int(best['leaves'])}")
+
+# =============================================================================
+# STEP 4: Train Final & Deploy
+# =============================================================================
+print("\n[5/5] Training final model...")
+final_model = LGBMRegressor(
+    n_estimators=int(best["n_est"]),
+    learning_rate=best["lr"],
+    max_depth=int(best["depth"]),
+    num_leaves=int(best["leaves"]),
+    subsample=0.8,
+    colsample_bytree=0.8,
+    reg_alpha=0.1,
+    reg_lambda=1.0,
+    random_state=42,
+    verbose=-1,
+)
+final_model.fit(df_all[feature_cols], df_all["target"])
+print(f"✅ Trained on {len(df_all):,} samples")
+
+
+def predict(nonce=None):
+    live_row = workflow.get_live_features(ticker=TICKERS[0])
+    if live_row is None or len(live_row) == 0:
+        raise ValueError("Could not get live features")
+    live_eng = engineer_deep_vol_features(live_row.iloc[0])
+    live_features = pd.concat([live_row[base_feature_cols].iloc[0], live_eng])
+    vol = final_model.predict(live_features[feature_cols].values.reshape(1, -1))[0]
+    vol = max(0.0, float(vol))
+    print(f"\nModel A prediction: {vol:.6f} (15-min vol)")
+    return vol
+
+
+print("\n🧪 Testing...")
+test_pred = predict()
+
+with open("predict_79_model_a.pkl", "wb") as f:
+    cloudpickle.dump(predict, f)
+
+print(f"\n✅ Saved predict_79_model_a.pkl")
+print(f"   Score: {best['score']:.1%} | Features: {len(feature_cols)}")
diff --git a/notebooks/testnet/topic_79_btc_vol/topic_79_model_b_multiscale.py b/notebooks/testnet/topic_79_btc_vol/topic_79_model_b_multiscale.py
new file mode 100644
index 0000000..a1fa4a2
--- /dev/null
+++ b/notebooks/testnet/topic_79_btc_vol/topic_79_model_b_multiscale.py
@@ -0,0 +1,280 @@
+#!/usr/bin/env python3
+"""
+Topic 79 — Model B: Multi-Scale Regime Detection
+=================================================
+
+Strategy: Use a 30-bar (30-min) lookback with features designed to capture
+volatility clustering (GARCH-like persistence), intraday seasonality proxies,
+and multi-scale decomposition of price action. Emphasizes regime transitions
+and mean-reversion in volatility.
+
+Trained on 2+ years of 1-minute BTC/USD data.
+"""
+
+import numpy as np
+import pandas as pd
+import os
+from datetime import datetime, timedelta, timezone
+from sklearn.model_selection import TimeSeriesSplit
+from lightgbm import LGBMRegressor
+import cloudpickle
+from allora_forge_builder_kit import AlloraMLWorkflow, PerformanceEvaluator
+
+# =============================================================================
+# CONFIGURATION
+# =============================================================================
+TICKERS = ["btcusd"]
+DAYS_OF_HISTORY = 800  # ~2.2 years
+INTERVAL = "1m"
+NUMBER_OF_INPUT_BARS = 30  # 30 minutes of 1-min bars
+TARGET_BARS = 15
+TARGET_TYPE = "volatility"
+
+N_SPLITS = 5
+N_ESTIMATORS_MAX = 1500
+N_ESTIMATORS_CHECKPOINTS = [300, 600, 1000, 1500]
+LEARNING_RATES = [0.005, 0.02]
+MAX_DEPTHS = [4, 6]
+NUM_LEAVES = [15, 31]
+
+print("=" * 80)
+print("Topic 79 — Model B: Multi-Scale Regime (30-bar, 2+ years)")
+print("=" * 80)
+
+# =============================================================================
+# STEP 1: Initialize & Backfill
+# =============================================================================
+print("\n[1/5] Initializing workflow...")
+from allora_forge_builder_kit.utils import get_api_key
+
+api_key = get_api_key(
+    api_key_file=os.path.join(os.path.dirname(__file__), "..", ".allora_api_key")
+)
+
+workflow = AlloraMLWorkflow(
+    tickers=TICKERS,
+    number_of_input_bars=NUMBER_OF_INPUT_BARS,
+    target_bars=TARGET_BARS,
+    interval=INTERVAL,
+    target_type=TARGET_TYPE,
+    data_source="allora",
+    api_key=api_key,
+)
+print(f"✅ {NUMBER_OF_INPUT_BARS} bars lookback, {TARGET_BARS}-min vol target")
+
+print(f"\n[2/5] Backfilling {DAYS_OF_HISTORY} days...")
+start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)
+workflow.backfill(start=start_date)
+print("✅ Backfill complete")
+
+# =============================================================================
+# STEP 2: Features
+# =============================================================================
+print("\n[3/5] Extracting features...")
+df_all = workflow.get_full_feature_target_dataframe(start_date=start_date).reset_index()
+
+base_feature_cols = [col for col in df_all.columns if col.startswith("feature_")]
+
+
+def engineer_multiscale_features(row):
+    """Multi-scale regime features from 30-bar lookback."""
+    n = NUMBER_OF_INPUT_BARS
+    closes = np.array([row[f"feature_close_{i}"] for i in range(n)])
+    highs = np.array([row[f"feature_high_{i}"] for i in range(n)])
+    lows = np.array([row[f"feature_low_{i}"] for i in range(n)])
+    volumes = np.array([row[f"feature_volume_{i}"] for i in range(n)])
+
+    log_rets = np.diff(np.log(closes + 1e-12))
+    abs_rets = np.abs(log_rets)
+    features = {}
+
+    # --- Realised vol at multiple scales ---
+    features["vol_5m"] = np.std(log_rets[-5:], ddof=1) if len(log_rets) >= 5 else 0.0
+    features["vol_10m"] = np.std(log_rets[-10:], ddof=1) if len(log_rets) >= 10 else 0.0
+    features["vol_15m"] = np.std(log_rets[-15:], ddof=1) if len(log_rets) >= 15 else 0.0
+    features["vol_30m"] = np.std(log_rets, ddof=1) if len(log_rets) >= 2 else 0.0
+
+    # --- Vol persistence (GARCH-like) ---
+    # Exponentially weighted vol (lambda=0.94, like RiskMetrics)
+    lam = 0.94
+    sq_rets = log_rets ** 2
+    ewma_var = sq_rets[0]
+    for r2 in sq_rets[1:]:
+        ewma_var = lam * ewma_var + (1 - lam) * r2
+    features["ewma_vol"] = np.sqrt(ewma_var)
+    features["ewma_vs_realized"] = features["ewma_vol"] / (features["vol_30m"] + 1e-12)
+
+    # --- Vol of vol (second-order clustering) ---
+    if len(abs_rets) >= 10:
+        rolling_vols = [np.std(abs_rets[i : i + 5], ddof=1) for i in range(len(abs_rets) - 5)]
+        if len(rolling_vols) >= 2:
+            features["vol_of_vol"] = np.std(rolling_vols, ddof=1)
+        else:
+            features["vol_of_vol"] = 0.0
+    else:
+        features["vol_of_vol"] = 0.0
+
+    # --- Regime indicators ---
+    # Vol ratio (short/long) — high = vol expanding, low = vol contracting
+    features["vol_ratio_5_30"] = features["vol_5m"] / (features["vol_30m"] + 1e-12)
+    features["vol_ratio_10_30"] = features["vol_10m"] / (features["vol_30m"] + 1e-12)
+
+    # Vol percentile within the window (where are we in the local distribution?)
+    if len(abs_rets) >= 15:
+        recent_vol = features["vol_5m"]
+        rolling_5m_vols = [np.std(log_rets[i : i + 5], ddof=1) for i in range(len(log_rets) - 5)]
+        if len(rolling_5m_vols) > 0:
+            features["vol_percentile"] = np.mean([1 for v in rolling_5m_vols if v <= recent_vol])
+        else:
+            features["vol_percentile"] = 0.5
+    else:
+        features["vol_percentile"] = 0.5
+
+    # --- Mean reversion signal ---
+    # Distance from "normal" vol (z-score of current vol)
+    if len(abs_rets) >= 20:
+        rolling_vols = [np.std(log_rets[i : i + 5], ddof=1) for i in range(len(log_rets) - 5)]
+        if len(rolling_vols) >= 5:
+            vol_mean = np.mean(rolling_vols)
+            vol_std = np.std(rolling_vols, ddof=1)
+            features["vol_zscore"] = (features["vol_5m"] - vol_mean) / (vol_std + 1e-12)
+        else:
+            features["vol_zscore"] = 0.0
+    else:
+        features["vol_zscore"] = 0.0
+
+    # --- Directional features ---
+    features["signed_ret_5m"] = np.sum(log_rets[-5:])
+    features["signed_ret_15m"] = np.sum(log_rets[-15:]) if len(log_rets) >= 15 else np.sum(log_rets)
+    features["abs_ret_5m"] = np.sum(abs_rets[-5:])
+
+    # Efficiency ratio: |net move| / sum(|moves|) — 1 = trending, 0 = choppy
+    net_move = abs(features["signed_ret_15m"])
+    total_path = np.sum(abs_rets[-15:]) if len(abs_rets) >= 15 else np.sum(abs_rets)
+    features["efficiency_ratio"] = net_move / (total_path + 1e-12)
+
+    # --- High-low based estimators ---
+    hl_log = np.log(highs + 1e-12) - np.log(lows + 1e-12)
+    features["parkinson_5m"] = np.sqrt(np.mean(hl_log[-5:] ** 2) / (4 * np.log(2)))
+    features["parkinson_15m"] = np.sqrt(np.mean(hl_log[-15:] ** 2) / (4 * np.log(2)))
+    features["parkinson_ratio"] = features["parkinson_5m"] / (features["parkinson_15m"] + 1e-12)
+
+    # --- Volume dynamics ---
+    features["volume_trend"] = np.mean(volumes[-5:]) / (np.mean(volumes[-15:]) + 1e-12)
+    features["volume_spike_ratio"] = np.max(volumes[-5:]) / (np.mean(volumes) + 1e-12)
+
+    # --- Autocorrelation of absolute returns (persistence) ---
+    if len(abs_rets) >= 6:
+        features["absret_autocorr"] = np.corrcoef(abs_rets[1:], abs_rets[:-1])[0, 1]
+        if not np.isfinite(features["absret_autocorr"]):
+            features["absret_autocorr"] = 0.0
+    else:
+        features["absret_autocorr"] = 0.0
+
+    # --- Recent extreme moves ---
+    features["max_abs_ret_5m"] = np.max(abs_rets[-5:])
+    features["max_abs_ret_15m"] = np.max(abs_rets[-15:]) if len(abs_rets) >= 15 else np.max(abs_rets)
+
+    return pd.Series(features)
+
+
+print("   Engineering multi-scale regime features...")
+engineered = df_all.apply(engineer_multiscale_features, axis=1)
+df_all = pd.concat([df_all, engineered], axis=1)
+
+feature_cols = base_feature_cols + list(engineered.columns)
+df_all = df_all.dropna(subset=feature_cols + ["target"])
+
+print(f"✅ Dataset: {len(df_all):,} samples")
+print(f"   Features: {len(base_feature_cols)} base + {len(engineered.columns)} engineered = {len(feature_cols)} total")
+
+# =============================================================================
+# STEP 3: Grid Search
+# =============================================================================
+print("\n[4/5] Grid search...")
+tscv = TimeSeriesSplit(n_splits=N_SPLITS, gap=TARGET_BARS)
+evaluator = PerformanceEvaluator()
+results = []
+config_num = 0
+
+for lr in LEARNING_RATES:
+    for depth in MAX_DEPTHS:
+        for leaves in NUM_LEAVES:
+            fold_models = []
+            for train_idx, test_idx in tscv.split(df_all):
+                lgb = LGBMRegressor(
+                    n_estimators=N_ESTIMATORS_MAX,
+                    learning_rate=lr,
+                    max_depth=depth,
+                    num_leaves=leaves,
+                    subsample=0.7,
+                    colsample_bytree=0.7,
+                    min_child_samples=50,
+                    reg_alpha=0.5,
+                    reg_lambda=2.0,
+                    random_state=123,
+                    verbose=-1,
+                )
+                lgb.fit(df_all.iloc[train_idx][feature_cols], df_all.iloc[train_idx]["target"])
+                fold_models.append((lgb, test_idx))
+
+            for n_est in N_ESTIMATORS_CHECKPOINTS:
+                config_num += 1
+                df_all["pred"] = np.nan
+                for lgb, test_idx in fold_models:
+                    preds = lgb.predict(df_all.iloc[test_idx][feature_cols], num_iteration=n_est)
+                    df_all.iloc[test_idx, df_all.columns.get_loc("pred")] = preds
+
+                valid_mask = ~df_all["pred"].isna()
+                metrics = evaluator.evaluate(
+                    y_true=df_all.loc[valid_mask, "target"],
+                    y_pred=df_all.loc[valid_mask, "pred"],
+                )
+                results.append({"config_num": config_num, "n_est": n_est, "lr": lr, "depth": depth, "leaves": leaves, **metrics})
+                print(f"   [{config_num:2d}] n={n_est:4d} lr={lr:.3f} d={depth} l={leaves:2d} → {metrics['score']:.1%} ({metrics['grade']})")
+
+results_df = pd.DataFrame(results).sort_values(["num_passed", "score"], ascending=[False, False])
+best = results_df.iloc[0]
+print(f"\n✅ Best: {best['num_passed']}/7 ({best['score']:.1%}) — n={int(best['n_est'])}, lr={best['lr']}, d={int(best['depth'])}, l={int(best['leaves'])}")
+
+# =============================================================================
+# STEP 4: Train Final & Deploy
+# =============================================================================
+print("\n[5/5] Training final model...")
+final_model = LGBMRegressor(
+    n_estimators=int(best["n_est"]),
+    learning_rate=best["lr"],
+    max_depth=int(best["depth"]),
+    num_leaves=int(best["leaves"]),
+    subsample=0.7,
+    colsample_bytree=0.7,
+    min_child_samples=50,
+    reg_alpha=0.5,
+    reg_lambda=2.0,
+    random_state=123,
+    verbose=-1,
+)
+final_model.fit(df_all[feature_cols], df_all["target"])
+print(f"✅ Trained on {len(df_all):,} samples")
+
+
+def predict(nonce=None):
+    live_row = workflow.get_live_features(ticker=TICKERS[0])
+    if live_row is None or len(live_row) == 0:
+        raise ValueError("Could not get live features")
+    live_eng = engineer_multiscale_features(live_row.iloc[0])
+    live_features = pd.concat([live_row[base_feature_cols].iloc[0], live_eng])
+    vol = final_model.predict(live_features[feature_cols].values.reshape(1, -1))[0]
+    vol = max(0.0, float(vol))
+    print(f"\nModel B prediction: {vol:.6f} (15-min vol)")
+    return vol
+
+
+print("\n🧪 Testing...")
+test_pred = predict()
+
+with open("predict_79_model_b.pkl", "wb") as f:
+    cloudpickle.dump(predict, f)
+
+print(f"\n✅ Saved predict_79_model_b.pkl")
+print(f"   Score: {best['score']:.1%} | Features: {len(feature_cols)}")
diff --git a/notebooks/testnet/topic_79_btc_vol/topic_79_model_c_colab.ipynb b/notebooks/testnet/topic_79_btc_vol/topic_79_model_c_colab.ipynb
new file mode 100644
index 0000000..5a3ec6c
--- /dev/null
+++ b/notebooks/testnet/topic_79_btc_vol/topic_79_model_c_colab.ipynb
@@ -0,0 +1,525 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "e1377303",
+   "metadata": {},
+   "source": [
+    "# Topic 79 — Model C: TCN Neural Network (GPU)\n",
+    "\n",
+    "**BTC/USD 15-Minute Volatility Prediction using a Temporal Convolutional Network**\n",
+    "\n",
+    "Run this on Google Colab with a T4 GPU runtime for fast training.\n",
+    "\n",
+    "After training, download `predict_79_model_c.pkl` and deploy with:\n",
+    "```bash\n",
+    "TOPIC_ID=79 PREDICT_PKL=predict_79_model_c.pkl python deploy_worker_raw.py\n",
+    "```\n",
+    "\n",
+    "---\n",
+    "**Runtime setup:** Runtime → Change runtime type → T4 GPU"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "93e4e5ed",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Install dependencies\n",
+    "!pip install -q git+https://github.com/allora-network/allora-forge-builder-kit.git@feature/topic-79-volatility-target\n",
+    "!pip install -q cloudpickle torch numpy pandas scikit-learn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fdf7d457",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "print(f\"PyTorch {torch.__version__}\")\n",
+    "print(f\"CUDA available: {torch.cuda.is_available()}\")\n",
+    "if torch.cuda.is_available():\n",
+    "    print(f\"GPU: {torch.cuda.get_device_name(0)}\")\n",
+    "    print(f\"Memory: {torch.cuda.get_device_properties(0).total_mem / 1e9:.1f} GB\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e501ee02",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# === CONFIGURATION ===\n",
+    "# Paste your Allora API key here (get free at https://developer.allora.network)\n",
+    "ALLORA_API_KEY = \"UP-xxx\"  # <-- PASTE YOUR KEY\n",
+    "\n",
+    "TICKERS = [\"btcusd\"]\n",
+    "DAYS_OF_HISTORY = 800  # 2+ years — GPU makes this feasible\n",
+    "INTERVAL = \"1m\"\n",
+    "NUMBER_OF_INPUT_BARS = 30\n",
+    "TARGET_BARS = 15\n",
+    "TARGET_TYPE = \"volatility\"\n",
+    "\n",
+    "# Training config (GPU-optimized)\n",
+    "EPOCHS = 80\n",
+    "BATCH_SIZE = 8192\n",
+    "LEARNING_RATE = 1e-3\n",
+    "WEIGHT_DECAY = 1e-5\n",
+    "PATIENCE = 12"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6642f0db",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.environ[\"ALLORA_API_KEY\"] = ALLORA_API_KEY\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from datetime import datetime, timedelta, timezone\n",
+    "from sklearn.model_selection import TimeSeriesSplit\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import cloudpickle\n",
+    "from allora_forge_builder_kit import AlloraMLWorkflow, PerformanceEvaluator\n",
+    "\n",
+    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+    "print(f\"Using device: {device}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e7c184f8",
+   "metadata": {},
+   "source": [
+    "## Model Architecture\n",
+    "\n",
+    "Temporal Convolutional Network with dilated causal convolutions.\n",
+    "Receptive field covers the full 30-bar input with dilations [1, 2, 4, 8, 16]."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "82a7645e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class CausalConv1d(nn.Module):\n",
+    "    \"\"\"Causal convolution: output at time t only depends on inputs at t and before.\"\"\"\n",
+    "    def __init__(self, in_channels, out_channels, kernel_size, dilation=1):\n",
+    "        super().__init__()\n",
+    "        self.padding = (kernel_size - 1) * dilation\n",
+    "        self.conv = nn.Conv1d(in_channels, out_channels, kernel_size,\n",
+    "                              padding=self.padding, dilation=dilation)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        out = self.conv(x)\n",
+    "        if self.padding > 0:\n",
+    "            out = out[:, :, :-self.padding]\n",
+    "        return out\n",
+    "\n",
+    "\n",
+    "class TCNBlock(nn.Module):\n",
+    "    \"\"\"Residual TCN block with dilated causal convolution.\"\"\"\n",
+    "    def __init__(self, channels, kernel_size, dilation, dropout=0.05):\n",
+    "        super().__init__()\n",
+    "        self.net = nn.Sequential(\n",
+    "            CausalConv1d(channels, channels, kernel_size, dilation),\n",
+    "            nn.BatchNorm1d(channels),\n",
+    "            nn.GELU(),\n",
+    "            nn.Dropout(dropout),\n",
+    "            CausalConv1d(channels, channels, kernel_size, dilation),\n",
+    "            nn.BatchNorm1d(channels),\n",
+    "            nn.GELU(),\n",
+    "            nn.Dropout(dropout),\n",
+    "        )\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        return x + self.net(x)\n",
+    "\n",
+    "\n",
+    "class VolatilityTCN(nn.Module):\n",
+    "    \"\"\"\n",
+    "    TCN for volatility prediction.\n",
+    "    Input: (batch, seq_len, 5) — normalized OHLCV bars\n",
+    "    Output: (batch, 1) — predicted volatility\n",
+    "    \"\"\"\n",
+    "    def __init__(self, input_features=5, seq_len=30, hidden_channels=128,\n",
+    "                 kernel_size=3, dilations=(1, 2, 4, 8, 16), dropout=0.05):\n",
+    "        super().__init__()\n",
+    "        self.input_proj = nn.Conv1d(input_features, hidden_channels, 1)\n",
+    "        self.tcn_blocks = nn.ModuleList([\n",
+    "            TCNBlock(hidden_channels, kernel_size, d, dropout) for d in dilations\n",
+    "        ])\n",
+    "        self.head = nn.Sequential(\n",
+    "            nn.AdaptiveAvgPool1d(1),\n",
+    "            nn.Flatten(),\n",
+    "            nn.Linear(hidden_channels, 64),\n",
+    "            nn.GELU(),\n",
+    "            nn.Dropout(dropout),\n",
+    "            nn.Linear(64, 1),\n",
+    "            nn.Softplus(),  # non-negative output\n",
+    "        )\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        x = x.transpose(1, 2)  # (B, seq, feat) -> (B, feat, seq)\n",
+    "        x = self.input_proj(x)\n",
+    "        for block in self.tcn_blocks:\n",
+    "            x = block(x)\n",
+    "        return self.head(x)\n",
+    "\n",
+    "\n",
+    "# Count parameters\n",
+    "model_test = VolatilityTCN()\n",
+    "n_params = sum(p.numel() for p in model_test.parameters())\n",
+    "print(f\"Model parameters: {n_params:,}\")\n",
+    "del model_test"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a0790988",
+   "metadata": {},
+   "source": [
+    "## Data Loading & Backfill"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "978525b2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"[1/4] Initializing workflow...\")\n",
+    "workflow = AlloraMLWorkflow(\n",
+    "    tickers=TICKERS,\n",
+    "    number_of_input_bars=NUMBER_OF_INPUT_BARS,\n",
+    "    target_bars=TARGET_BARS,\n",
+    "    interval=INTERVAL,\n",
+    "    target_type=TARGET_TYPE,\n",
+    "    data_source=\"allora\",\n",
+    "    api_key=ALLORA_API_KEY,\n",
+    ")\n",
+    "\n",
+    "print(f\"[2/4] Backfilling {DAYS_OF_HISTORY} days...\")\n",
+    "start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)\n",
+    "workflow.backfill(start=start_date)\n",
+    "print(\"✅ Backfill complete\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "86bc8266",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"[3/4] Preparing tensors...\")\n",
+    "df_all = workflow.get_full_feature_target_dataframe(start_date=start_date).reset_index()\n",
+    "base_feature_cols = [col for col in df_all.columns if col.startswith(\"feature_\")]\n",
+    "df_all = df_all.dropna(subset=base_feature_cols + [\"target\"])\n",
+    "\n",
+    "n_samples = len(df_all)\n",
+    "seq_len = NUMBER_OF_INPUT_BARS\n",
+    "n_features = 5\n",
+    "\n",
+    "# Build (samples, seq_len, 5) array\n",
+    "X_seq = np.zeros((n_samples, seq_len, n_features), dtype=np.float32)\n",
+    "for i in range(seq_len):\n",
+    "    X_seq[:, i, 0] = df_all[f\"feature_open_{i}\"].values\n",
+    "    X_seq[:, i, 1] = df_all[f\"feature_high_{i}\"].values\n",
+    "    X_seq[:, i, 2] = df_all[f\"feature_low_{i}\"].values\n",
+    "    X_seq[:, i, 3] = df_all[f\"feature_close_{i}\"].values\n",
+    "    X_seq[:, i, 4] = df_all[f\"feature_volume_{i}\"].values\n",
+    "\n",
+    "y_all = df_all[\"target\"].values.astype(np.float32)\n",
+    "\n",
+    "# Pre-allocate GPU tensors\n",
+    "X_tensor = torch.from_numpy(X_seq).to(device)\n",
+    "y_tensor = torch.from_numpy(y_all).unsqueeze(1).to(device)\n",
+    "\n",
+    "print(f\"✅ Dataset: {n_samples:,} samples on {device}\")\n",
+    "print(f\"   Tensor shape: {X_tensor.shape}\")\n",
+    "print(f\"   GPU memory: {torch.cuda.memory_allocated()/1e9:.2f} GB\" if torch.cuda.is_available() else \"\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "124534f9",
+   "metadata": {},
+   "source": [
+    "## Training with Walk-Forward CV"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9e42c0cd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"[4/4] Training...\")\n",
+    "\n",
+    "VAL_BATCH = 16384  # chunk size for validation (avoids OOM)\n",
+    "\n",
+    "tscv = TimeSeriesSplit(n_splits=3, gap=TARGET_BARS)\n",
+    "fold_predictions = np.full(n_samples, np.nan)\n",
+    "\n",
+    "for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(X_seq)):\n",
+    "    print(f\"\\n{'='*60}\")\n",
+    "    print(f\"Fold {fold_idx+1}/3: Train={len(train_idx):,}, Test={len(test_idx):,}\")\n",
+    "    print(f\"{'='*60}\")\n",
+    "\n",
+    "    # Slice tensors (already on GPU)\n",
+    "    X_train = X_tensor[train_idx]\n",
+    "    y_train = y_tensor[train_idx]\n",
+    "    X_test = X_tensor[test_idx]\n",
+    "    y_test = y_tensor[test_idx]\n",
+    "\n",
+    "    n_train = len(train_idx)\n",
+    "    n_batches = (n_train + BATCH_SIZE - 1) // BATCH_SIZE\n",
+    "\n",
+    "    model = VolatilityTCN(\n",
+    "        input_features=n_features,\n",
+    "        seq_len=seq_len,\n",
+    "        hidden_channels=128,\n",
+    "        kernel_size=3,\n",
+    "        dilations=(1, 2, 4, 8, 16),\n",
+    "        dropout=0.05,\n",
+    "    ).to(device)\n",
+    "\n",
+    "    optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)\n",
+    "    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=20, T_mult=2)\n",
+    "\n",
+    "    best_val_loss = float(\"inf\")\n",
+    "    patience_counter = 0\n",
+    "    best_state = None\n",
+    "\n",
+    "    for epoch in range(EPOCHS):\n",
+    "        model.train()\n",
+    "        perm = torch.randperm(n_train, device=device)\n",
+    "        train_loss = 0.0\n",
+    "        for bi in range(n_batches):\n",
+    "            idx = perm[bi * BATCH_SIZE : (bi + 1) * BATCH_SIZE]\n",
+    "            xb = X_train[idx]\n",
+    "            yb = y_train[idx]\n",
+    "            pred = model(xb)\n",
+    "            loss = nn.MSELoss()(pred, yb)\n",
+    "            optimizer.zero_grad()\n",
+    "            loss.backward()\n",
+    "            nn.utils.clip_grad_norm_(model.parameters(), 1.0)\n",
+    "            optimizer.step()\n",
+    "            train_loss += loss.item() * len(idx)\n",
+    "        train_loss /= n_train\n",
+    "        scheduler.step()\n",
+    "\n",
+    "        # Validate (chunked to avoid OOM)\n",
+    "        model.eval()\n",
+    "        with torch.no_grad():\n",
+    "            val_preds = []\n",
+    "            for vi in range(0, len(test_idx), VAL_BATCH):\n",
+    "                val_preds.append(model(X_test[vi:vi+VAL_BATCH]))\n",
+    "            val_pred = torch.cat(val_preds)\n",
+    "            val_loss = nn.MSELoss()(val_pred, y_test).item()\n",
+    "\n",
+    "        if val_loss < best_val_loss:\n",
+    "            best_val_loss = val_loss\n",
+    "            patience_counter = 0\n",
+    "            best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}\n",
+    "        else:\n",
+    "            patience_counter += 1\n",
+    "\n",
+    "        if (epoch + 1) % 5 == 0:\n",
+    "            lr_now = optimizer.param_groups[0]['lr']\n",
+    "            print(f\"  Epoch {epoch+1:3d}: train={train_loss:.8f} val={val_loss:.8f} best={best_val_loss:.8f} lr={lr_now:.6f}\")\n",
+    "\n",
+    "        if patience_counter >= PATIENCE:\n",
+    "            print(f\"  Early stopping at epoch {epoch+1} (best val_loss={best_val_loss:.8f})\")\n",
+    "            break\n",
+    "\n",
+    "    # Predict on test set (chunked)\n",
+    "    model.load_state_dict(best_state)\n",
+    "    model.eval()\n",
+    "    with torch.no_grad():\n",
+    "        chunks = []\n",
+    "        for vi in range(0, len(test_idx), VAL_BATCH):\n",
+    "            chunks.append(model(X_test[vi:vi+VAL_BATCH]).cpu())\n",
+    "        test_preds = torch.cat(chunks).numpy().flatten()\n",
+    "    fold_predictions[test_idx] = test_preds\n",
+    "    print(f\"  ✅ Fold {fold_idx+1} done. Best val_loss: {best_val_loss:.8f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "64b3277d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Evaluate CV results\n",
+    "valid_mask = ~np.isnan(fold_predictions)\n",
+    "evaluator = PerformanceEvaluator()\n",
+    "metrics = evaluator.evaluate(y_true=y_all[valid_mask], y_pred=fold_predictions[valid_mask])\n",
+    "\n",
+    "print(f\"\\n{'='*60}\")\n",
+    "print(f\"CV RESULTS: {metrics['num_passed']}/7 ({metrics['score']:.1%} — {metrics['grade']})\")\n",
+    "print(f\"{'='*60}\")\n",
+    "evaluator.print_report(metrics, detailed=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "46fe8154",
+   "metadata": {},
+   "source": [
+    "## Train Final Model on All Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "35f20448",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"Training final model on all data...\")\n",
+    "\n",
+    "final_model = VolatilityTCN(\n",
+    "    input_features=n_features,\n",
+    "    seq_len=seq_len,\n",
+    "    hidden_channels=128,\n",
+    "    kernel_size=3,\n",
+    "    dilations=(1, 2, 4, 8, 16),\n",
+    "    dropout=0.05,\n",
+    ").to(device)\n",
+    "\n",
+    "n_batches_all = (n_samples + BATCH_SIZE - 1) // BATCH_SIZE\n",
+    "optimizer = torch.optim.AdamW(final_model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)\n",
+    "scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=20, T_mult=2)\n",
+    "\n",
+    "for epoch in range(EPOCHS):\n",
+    "    final_model.train()\n",
+    "    perm = torch.randperm(n_samples, device=device)\n",
+    "    epoch_loss = 0.0\n",
+    "    for bi in range(n_batches_all):\n",
+    "        idx = perm[bi * BATCH_SIZE : (bi + 1) * BATCH_SIZE]\n",
+    "        xb = X_tensor[idx]\n",
+    "        yb = y_tensor[idx]\n",
+    "        pred = final_model(xb)\n",
+    "        loss = nn.MSELoss()(pred, yb)\n",
+    "        optimizer.zero_grad()\n",
+    "        loss.backward()\n",
+    "        nn.utils.clip_grad_norm_(final_model.parameters(), 1.0)\n",
+    "        optimizer.step()\n",
+    "        epoch_loss += loss.item() * len(idx)\n",
+    "    epoch_loss /= n_samples\n",
+    "    scheduler.step()\n",
+    "    if (epoch + 1) % 10 == 0:\n",
+    "        print(f\"  Epoch {epoch+1:3d}: loss={epoch_loss:.8f}\")\n",
+    "\n",
+    "final_model.eval()\n",
+    "final_model_cpu = final_model.cpu()\n",
+    "print(f\"\\n✅ Final model trained on {n_samples:,} samples\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "572c07e7",
+   "metadata": {},
+   "source": [
+    "## Create Predict Function & Save Pickle"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "576287b2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# The predict function runs on CPU at inference time (worker deployment)\n",
+    "def predict(nonce=None):\n",
+    "    \"\"\"Predict 15-min BTC volatility using the TCN model.\"\"\"\n",
+    "    live_row = workflow.get_live_features(ticker=TICKERS[0])\n",
+    "    if live_row is None or len(live_row) == 0:\n",
+    "        raise ValueError(\"Could not get live features\")\n",
+    "\n",
+    "    x = np.zeros((1, NUMBER_OF_INPUT_BARS, 5), dtype=np.float32)\n",
+    "    row = live_row.iloc[0]\n",
+    "    for i in range(NUMBER_OF_INPUT_BARS):\n",
+    "        x[0, i, 0] = row[f\"feature_open_{i}\"]\n",
+    "        x[0, i, 1] = row[f\"feature_high_{i}\"]\n",
+    "        x[0, i, 2] = row[f\"feature_low_{i}\"]\n",
+    "        x[0, i, 3] = row[f\"feature_close_{i}\"]\n",
+    "        x[0, i, 4] = row[f\"feature_volume_{i}\"]\n",
+    "\n",
+    "    x_tensor = torch.tensor(x, dtype=torch.float32)\n",
+    "    with torch.no_grad():\n",
+    "        vol = final_model_cpu(x_tensor).item()\n",
+    "\n",
+    "    vol = max(0.0, vol)\n",
+    "    print(f\"Model C (TCN) prediction: {vol:.6f} (15-min vol)\")\n",
+    "    return vol\n",
+    "\n",
+    "\n",
+    "# Test it\n",
+    "print(\"Testing prediction...\")\n",
+    "test_pred = predict()\n",
+    "print(f\"\\n✅ Test prediction: {test_pred:.6f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "91a43b60",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Save the pickle\n",
+    "with open(\"predict_79_model_c.pkl\", \"wb\") as f:\n",
+    "    cloudpickle.dump(predict, f)\n",
+    "\n",
+    "import os\n",
+    "pkl_size = os.path.getsize(\"predict_79_model_c.pkl\") / 1024\n",
+    "print(f\"✅ Saved predict_79_model_c.pkl ({pkl_size:.0f} KB)\")\n",
+    "print(f\"   CV Score: {metrics['num_passed']}/7 ({metrics['score']:.1%} — {metrics['grade']})\")\n",
+    "print(f\"   Architecture: TCN (128ch, dilations 1/2/4/8/16)\")\n",
+    "print(f\"   Training samples: {n_samples:,}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "05e481ea",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Download the pickle file\n",
+    "try:\n",
+    "    from google.colab import files\n",
+    "    files.download(\"predict_79_model_c.pkl\")\n",
+    "    print(\"📥 Download started!\")\n",
+    "except ImportError:\n",
+    "    print(\"Not running on Colab — file saved locally as predict_79_model_c.pkl\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/testnet/topic_79_btc_vol/topic_79_model_c_neural.py b/notebooks/testnet/topic_79_btc_vol/topic_79_model_c_neural.py
new file mode 100644
index 0000000..24def70
--- /dev/null
+++ b/notebooks/testnet/topic_79_btc_vol/topic_79_model_c_neural.py
@@ -0,0 +1,377 @@
+#!/usr/bin/env python3
+"""
+Topic 79 — Model C: Neural Network (TCN-style)
+===============================================
+
+Strategy: A Temporal Convolutional Network (TCN) that processes the raw
+OHLCV sequence directly, learning multi-scale temporal patterns for
+volatility prediction. Uses dilated causal convolutions to capture both
+short-term microstructure and longer-term regime dynamics.
+
+Architecture:
+    Input (30 bars × 5 OHLCV) → Conv1D blocks with dilations [1,2,4,8]
+    → Global pooling → Dense layers → Volatility prediction
+
+Trained on 2+ years of 1-minute BTC/USD data.
+"""
+
+import numpy as np
+import pandas as pd
+import os
+from datetime import datetime, timedelta, timezone
+from sklearn.model_selection import TimeSeriesSplit
+import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader, TensorDataset
+import cloudpickle
+from allora_forge_builder_kit import AlloraMLWorkflow, PerformanceEvaluator
+
+# =============================================================================
+# CONFIGURATION
+# =============================================================================
+TICKERS = ["btcusd"]
+DAYS_OF_HISTORY = 120  # ~4 months (fast verification; increase for production)
+INTERVAL = "1m"
+NUMBER_OF_INPUT_BARS = 30  # 30 minutes of 1-min bars
+TARGET_BARS = 15
+TARGET_TYPE = "volatility"
+
+# Training config
+EPOCHS = 60
+BATCH_SIZE = 4096
+LEARNING_RATE = 1e-3
+WEIGHT_DECAY = 1e-5
+PATIENCE = 10  # early stopping — train longer
+
+print("=" * 80)
+print("Topic 79 — Model C: Neural Network (TCN-style, 2+ years)")
+print("=" * 80)
+
+
+# =============================================================================
+# MODEL DEFINITION
+# =============================================================================
+class CausalConv1d(nn.Module):
+    """Causal convolution: output at time t only depends on inputs at t and before."""
+
+    def __init__(self, in_channels, out_channels, kernel_size, dilation=1):
+        super().__init__()
+        self.padding = (kernel_size - 1) * dilation
+        self.conv = nn.Conv1d(
+            in_channels, out_channels, kernel_size,
+            padding=self.padding, dilation=dilation,
+        )
+
+    def forward(self, x):
+        out = self.conv(x)
+        # Remove future padding (causal)
+        if self.padding > 0:
+            out = out[:, :, : -self.padding]
+        return out
+
+
+class TCNBlock(nn.Module):
+    """Residual TCN block with dilated causal convolution."""
+
+    def __init__(self, channels, kernel_size, dilation, dropout=0.1):
+        super().__init__()
+        self.net = nn.Sequential(
+            CausalConv1d(channels, channels, kernel_size, dilation),
+            nn.BatchNorm1d(channels),
+            nn.GELU(),
+            nn.Dropout(dropout),
+            CausalConv1d(channels, channels, kernel_size, dilation),
+            nn.BatchNorm1d(channels),
+            nn.GELU(),
+            nn.Dropout(dropout),
+        )
+
+    def forward(self, x):
+        return x + self.net(x)
+
+
+class VolatilityTCN(nn.Module):
+    """
+    TCN for volatility prediction.
+
+    Input: (batch, seq_len, 5) — OHLCV bars
+    Output: (batch, 1) — predicted volatility
+    """
+
+    def __init__(
+        self,
+        input_features=5,
+        seq_len=30,
+        hidden_channels=64,
+        kernel_size=3,
+        dilations=(1, 2, 4, 8),
+        dropout=0.1,
+    ):
+        super().__init__()
+        self.input_proj = nn.Conv1d(input_features, hidden_channels, 1)
+
+        self.tcn_blocks = nn.ModuleList([
+            TCNBlock(hidden_channels, kernel_size, d, dropout)
+            for d in dilations
+        ])
+
+        self.head = nn.Sequential(
+            nn.AdaptiveAvgPool1d(1),  # global average pooling
+            nn.Flatten(),
+            nn.Linear(hidden_channels, 32),
+            nn.GELU(),
+            nn.Dropout(dropout),
+            nn.Linear(32, 1),
+            nn.Softplus(),  # ensure non-negative output (volatility)
+        )
+
+    def forward(self, x):
+        # x: (batch, seq_len, features) → (batch, features, seq_len)
+        x = x.transpose(1, 2)
+        x = self.input_proj(x)
+        for block in self.tcn_blocks:
+            x = block(x)
+        return self.head(x)
+
+
+# =============================================================================
+# STEP 1: Initialize & Backfill
+# =============================================================================
+print("\n[1/5] Initializing workflow...")
+from allora_forge_builder_kit.utils import get_api_key
+
+api_key = get_api_key(
+    api_key_file=os.path.join(os.path.dirname(__file__), "..", ".allora_api_key")
+)
+
+workflow = AlloraMLWorkflow(
+    tickers=TICKERS,
+    number_of_input_bars=NUMBER_OF_INPUT_BARS,
+    target_bars=TARGET_BARS,
+    interval=INTERVAL,
+    target_type=TARGET_TYPE,
+    data_source="allora",
+    api_key=api_key,
+)
+print(f"✅ {NUMBER_OF_INPUT_BARS} bars lookback, {TARGET_BARS}-min vol target")
+
+print(f"\n[2/5] Backfilling {DAYS_OF_HISTORY} days...")
+start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)
+workflow.backfill(start=start_date)
+print("✅ Backfill complete")
+
+# =============================================================================
+# STEP 2: Prepare Data
+# =============================================================================
+print("\n[3/5] Preparing data for neural network...")
+df_all = workflow.get_full_feature_target_dataframe(start_date=start_date).reset_index()
+
+base_feature_cols = [col for col in df_all.columns if col.startswith("feature_")]
+df_all = df_all.dropna(subset=base_feature_cols + ["target"])
+
+# Reshape features into (samples, seq_len, 5) tensor
+# Features are: feature_open_0..29, feature_high_0..29, etc.
+n_samples = len(df_all)
+seq_len = NUMBER_OF_INPUT_BARS
+n_features = 5  # OHLCV
+
+X_seq = np.zeros((n_samples, seq_len, n_features), dtype=np.float32)
+for i in range(seq_len):
+    X_seq[:, i, 0] = df_all[f"feature_open_{i}"].values
+    X_seq[:, i, 1] = df_all[f"feature_high_{i}"].values
+    X_seq[:, i, 2] = df_all[f"feature_low_{i}"].values
+    X_seq[:, i, 3] = df_all[f"feature_close_{i}"].values
+    X_seq[:, i, 4] = df_all[f"feature_volume_{i}"].values
+
+y_all = df_all["target"].values.astype(np.float32)
+
+print(f"✅ Dataset: {n_samples:,} samples, shape: ({seq_len}, {n_features})")
+
+# =============================================================================
+# STEP 3: Train with Walk-Forward CV
+# =============================================================================
+print("\n[4/5] Training neural network...")
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"   Device: {device}")
+
+tscv = TimeSeriesSplit(n_splits=2, gap=TARGET_BARS)
+fold_predictions = np.full(n_samples, np.nan)
+
+# Pre-allocate full tensors ONCE (zero-copy from numpy)
+X_all_tensor = torch.from_numpy(X_seq)  # zero-copy, shares memory
+y_all_tensor = torch.from_numpy(y_all).unsqueeze(1)
+
+for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(X_seq)):
+    print(f"\n   Fold {fold_idx + 1}/2: Train={len(train_idx):,}, Test={len(test_idx):,}")
+
+    # Slice pre-allocated tensors (no copy, just views)
+    X_train = X_all_tensor[train_idx]
+    y_train = y_all_tensor[train_idx]
+    X_test = X_all_tensor[test_idx]
+    y_test = y_all_tensor[test_idx]
+
+    # Simple index-based batching (faster than DataLoader for in-memory data)
+    n_train = len(train_idx)
+    n_batches = (n_train + BATCH_SIZE - 1) // BATCH_SIZE
+
+    model = VolatilityTCN(
+        input_features=n_features,
+        seq_len=seq_len,
+        hidden_channels=96,  # increased capacity
+        kernel_size=3,
+        dilations=(1, 2, 4, 8, 16),  # added dilation=16 for longer range
+        dropout=0.05,
+    ).to(device)
+
+    optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
+    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=15, T_mult=2)
+
+    best_val_loss = float("inf")
+    patience_counter = 0
+    best_state = None
+
+    for epoch in range(EPOCHS):
+        # Train with manual batching (avoids DataLoader overhead)
+        model.train()
+        perm = torch.randperm(n_train)
+        train_loss = 0.0
+        for bi in range(n_batches):
+            idx = perm[bi * BATCH_SIZE : (bi + 1) * BATCH_SIZE]
+            xb = X_train[idx].to(device)
+            yb = y_train[idx].to(device)
+            pred = model(xb)
+            loss = nn.MSELoss()(pred, yb)
+            optimizer.zero_grad()
+            loss.backward()
+            nn.utils.clip_grad_norm_(model.parameters(), 1.0)
+            optimizer.step()
+            train_loss += loss.item() * len(idx)
+        train_loss /= n_train
+        scheduler.step()
+
+        # Validate (in chunks to avoid OOM on large test sets)
+        model.eval()
+        with torch.no_grad():
+            val_preds = []
+            for vi in range(0, len(test_idx), BATCH_SIZE):
+                vx = X_test[vi : vi + BATCH_SIZE].to(device)
+                val_preds.append(model(vx))
+            val_pred = torch.cat(val_preds)
+            val_loss = nn.MSELoss()(val_pred, y_test.to(device)).item()
+
+        if val_loss < best_val_loss:
+            best_val_loss = val_loss
+            patience_counter = 0
+            best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
+        else:
+            patience_counter += 1
+
+        if (epoch + 1) % 10 == 0 or patience_counter >= PATIENCE:
+            print(f"      Epoch {epoch+1:2d}: train_loss={train_loss:.8f}, val_loss={val_loss:.8f}")
+
+        if patience_counter >= PATIENCE:
+            print(f"      Early stopping at epoch {epoch+1}")
+            break
+
+    # Load best model and predict on test set
+    model.load_state_dict(best_state)
+    model.eval()
+    with torch.no_grad():
+        test_preds = []
+        for vi in range(0, len(test_idx), BATCH_SIZE):
+            vx = X_test[vi : vi + BATCH_SIZE].to(device)
+            test_preds.append(model(vx))
+        test_preds = torch.cat(test_preds).cpu().numpy().flatten()
+    fold_predictions[test_idx] = test_preds
+
+# Evaluate
+valid_mask = ~np.isnan(fold_predictions)
+evaluator = PerformanceEvaluator()
+metrics = evaluator.evaluate(
+    y_true=y_all[valid_mask],
+    y_pred=fold_predictions[valid_mask],
+)
+print(f"\n✅ Neural network CV results: {metrics['num_passed']}/7 ({metrics['score']:.1%} — {metrics['grade']})")
+
+# =============================================================================
+# STEP 4: Train Final Model on All Data
+# =============================================================================
+print("\n[5/5] Training final model on all data...")
+
+final_model = VolatilityTCN(
+    input_features=n_features,
+    seq_len=seq_len,
+    hidden_channels=96,
+    kernel_size=3,
+    dilations=(1, 2, 4, 8, 16),
+    dropout=0.05,
+).to(device)
+
+# Reuse pre-allocated tensors
+n_all = len(X_all_tensor)
+n_batches_all = (n_all + BATCH_SIZE - 1) // BATCH_SIZE
+
+optimizer = torch.optim.AdamW(final_model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
+scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=15, T_mult=2)
+
+for epoch in range(EPOCHS):
+    final_model.train()
+    perm = torch.randperm(n_all)
+    epoch_loss = 0.0
+    for bi in range(n_batches_all):
+        idx = perm[bi * BATCH_SIZE : (bi + 1) * BATCH_SIZE]
+        xb = X_all_tensor[idx].to(device)
+        yb = y_all_tensor[idx].to(device)
+        pred = final_model(xb)
+        loss = nn.MSELoss()(pred, yb)
+        optimizer.zero_grad()
+        loss.backward()
+        nn.utils.clip_grad_norm_(final_model.parameters(), 1.0)
+        optimizer.step()
+        epoch_loss += loss.item() * len(idx)
+    epoch_loss /= n_all
+    scheduler.step()
+    if (epoch + 1) % 10 == 0:
+        print(f"   Epoch {epoch+1:2d}: loss={epoch_loss:.8f}")
+
+final_model.eval()
+print(f"✅ Final model trained on {n_samples:,} samples")
+
+# Move model to CPU for inference (deployment)
+final_model = final_model.cpu()
+
+
+def predict(nonce=None):
+    """Predict 15-min BTC volatility using the TCN model."""
+    live_row = workflow.get_live_features(ticker=TICKERS[0])
+    if live_row is None or len(live_row) == 0:
+        raise ValueError("Could not get live features")
+
+    # Reshape to (1, seq_len, 5)
+    x = np.zeros((1, NUMBER_OF_INPUT_BARS, 5), dtype=np.float32)
+    row = live_row.iloc[0]
+    for i in range(NUMBER_OF_INPUT_BARS):
+        x[0, i, 0] = row[f"feature_open_{i}"]
+        x[0, i, 1] = row[f"feature_high_{i}"]
+        x[0, i, 2] = row[f"feature_low_{i}"]
+        x[0, i, 3] = row[f"feature_close_{i}"]
+        x[0, i, 4] = row[f"feature_volume_{i}"]
+
+    x_tensor = torch.tensor(x, dtype=torch.float32)
+    with torch.no_grad():
+        vol = final_model(x_tensor).item()
+
+    vol = max(0.0, vol)
+    print(f"\nModel C (TCN) prediction: {vol:.6f} (15-min vol)")
+    return vol
+
+
+print("\n🧪 Testing...")
+test_pred = predict()
+
+with open("predict_79_model_c.pkl", "wb") as f:
+    cloudpickle.dump(predict, f)
+
+print(f"\n✅ Saved predict_79_model_c.pkl")
+print(f"   Score: {metrics['score']:.1%} | Architecture: TCN (dilations 1,2,4,8)")
diff --git a/notebooks/testnet/topic_79_btc_vol/topic_79_model_d_iterative.py b/notebooks/testnet/topic_79_btc_vol/topic_79_model_d_iterative.py
new file mode 100644
index 0000000..5b5ac5a
--- /dev/null
+++ b/notebooks/testnet/topic_79_btc_vol/topic_79_model_d_iterative.py
@@ -0,0 +1,373 @@
+#!/usr/bin/env python3
+"""
+Topic 79 — Model D: Iterative Improvement
+==========================================
+
+Starting from Model A's baseline (best performer), iteratively adding
+features and tuning to push volatility metrics higher.
+
+Baseline (Model A, 60-bar raw features only):
+    Pearson r:   0.695
+    Spearman ρ:  0.714
+    R²:          0.457
+    RMSE:        0.000290
+    MAE:         0.000194
+    Rel MAE:     34.2%
+    QLIKE:       0.100
+"""
+
+import numpy as np
+import pandas as pd
+import os
+from datetime import datetime, timedelta, timezone
+from scipy.stats import pearsonr, spearmanr
+from lightgbm import LGBMRegressor
+import cloudpickle
+from allora_forge_builder_kit import AlloraMLWorkflow
+
+# =============================================================================
+# CONFIGURATION
+# =============================================================================
+TICKERS = ["btcusd"]
+DAYS_OF_HISTORY = 800
+INTERVAL = "1m"
+NUMBER_OF_INPUT_BARS = 60
+TARGET_BARS = 15
+TARGET_TYPE = "volatility"
+
+print("=" * 80)
+print("Topic 79 — Model D: Iterative Improvement")
+print("=" * 80)
+
+
+# =============================================================================
+# METRICS
+# =============================================================================
+def vol_metrics(y_true, y_pred):
+    """Compute volatility-specific metrics."""
+    r, _ = pearsonr(y_true, y_pred)
+    rho, _ = spearmanr(y_true, y_pred)
+    mse = np.mean((y_true - y_pred) ** 2)
+    rmse = np.sqrt(mse)
+    mae = np.mean(np.abs(y_true - y_pred))
+    ss_res = np.sum((y_true - y_pred) ** 2)
+    ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
+    r2 = 1 - ss_res / ss_tot
+    rel_mae = mae / np.mean(y_true)
+    # QLIKE (quasi-likelihood — standard vol forecast loss)
+    mask = y_pred > 0
+    if mask.sum() > 0:
+        ratio = y_true[mask] / y_pred[mask]
+        qlike = np.mean(ratio - np.log(ratio) - 1)
+    else:
+        qlike = float("inf")
+    return {
+        "pearson_r": r,
+        "spearman_rho": rho,
+        "r2": r2,
+        "rmse": rmse,
+        "mae": mae,
+        "rel_mae": rel_mae,
+        "qlike": qlike,
+    }
+
+
+def print_metrics(metrics, label=""):
+    """Pretty-print volatility metrics."""
+    print(f"\n  {'─'*50}")
+    if label:
+        print(f"  {label}")
+        print(f"  {'─'*50}")
+    print(f"  Pearson r:   {metrics['pearson_r']:.4f}")
+    print(f"  Spearman ρ:  {metrics['spearman_rho']:.4f}")
+    print(f"  R²:          {metrics['r2']:.4f}")
+    print(f"  RMSE:        {metrics['rmse']:.6f}")
+    print(f"  MAE:         {metrics['mae']:.6f}")
+    print(f"  Rel MAE:     {metrics['rel_mae']*100:.2f}%")
+    print(f"  QLIKE:       {metrics['qlike']:.6f}")
+    print(f"  {'─'*50}")
+
+
+def compare_metrics(baseline, current):
+    """Show improvement over baseline."""
+    print(f"\n  {'Metric':<14} {'Baseline':<10} {'Current':<10} {'Δ':<10}")
+    print(f"  {'─'*44}")
+    for key in ["pearson_r", "spearman_rho", "r2", "rmse", "mae", "rel_mae", "qlike"]:
+        b, c = baseline[key], current[key]
+        if key in ["rmse", "mae", "rel_mae", "qlike"]:
+            # Lower is better
+            delta = (b - c) / b * 100
+            arrow = "↓" if c < b else "↑"
+        else:
+            # Higher is better
+            delta = (c - b) / abs(b) * 100 if b != 0 else 0
+            arrow = "↑" if c > b else "↓"
+        fmt = ".6f" if key in ["rmse", "mae"] else ".4f"
+        if key == "rel_mae":
+            print(f"  {key:<14} {b*100:<10.2f} {c*100:<10.2f} {arrow}{abs(delta):.1f}%")
+        else:
+            print(f"  {key:<14} {b:<10{fmt}} {c:<10{fmt}} {arrow}{abs(delta):.1f}%")
+
+
+# =============================================================================
+# LOAD DATA
+# =============================================================================
+print("\n[1/4] Loading data...")
+from allora_forge_builder_kit.utils import get_api_key
+
+api_key = get_api_key(
+    api_key_file=os.path.join(os.path.dirname(__file__), "..", ".allora_api_key")
+)
+
+wf = AlloraMLWorkflow(
+    tickers=TICKERS,
+    number_of_input_bars=NUMBER_OF_INPUT_BARS,
+    target_bars=TARGET_BARS,
+    interval=INTERVAL,
+    target_type=TARGET_TYPE,
+    data_source="allora",
+    api_key=api_key,
+)
+
+start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)
+wf.backfill(start=start_date)
+df = wf.get_full_feature_target_dataframe(start_date=start_date).reset_index()
+base_feature_cols = [c for c in df.columns if c.startswith("feature_")]
+df = df.dropna(subset=base_feature_cols + ["target"])
+
+# 80/20 temporal split
+split = int(len(df) * 0.8)
+df_train = df.iloc[:split].copy()
+df_test = df.iloc[split:].copy()
+y_test = df_test["target"].values
+
+print(f"✅ {len(df):,} samples | Train: {len(df_train):,} | Test: {len(df_test):,}")
+print(f"   Mean vol: {y_test.mean():.6f} | Std vol: {y_test.std():.6f}")
+
+# =============================================================================
+# BASELINE: Model A (raw 60-bar features only)
+# =============================================================================
+print("\n[2/4] Baseline (Model A: 60 raw bars, no engineering)...")
+model_baseline = LGBMRegressor(
+    n_estimators=200, learning_rate=0.01, max_depth=5, num_leaves=31,
+    subsample=0.8, colsample_bytree=0.8, reg_alpha=0.1, reg_lambda=1.0,
+    random_state=42, verbose=-1,
+)
+model_baseline.fit(df_train[base_feature_cols], df_train["target"])
+preds_baseline = np.maximum(model_baseline.predict(df_test[base_feature_cols]), 0)
+baseline_metrics = vol_metrics(y_test, preds_baseline)
+print_metrics(baseline_metrics, "BASELINE (Model A)")
+
+
+# =============================================================================
+# MODEL D: Feature Engineering
+# =============================================================================
+print("\n[3/4] Engineering Model D features...")
+
+
+def engineer_model_d_features(row):
+    """
+    Model D features: combine best of all previous models + new ideas.
+
+    Strategy:
+    - Multi-horizon realised vol (from Model A)
+    - EWMA vol / vol persistence (from Model B)
+    - Parkinson & Garman-Klass estimators (from Model A)
+    - NEW: GARCH-inspired features (conditional vol)
+    - NEW: Vol regime quantiles
+    - NEW: Microstructure features (bid-ask proxy from HL spread)
+    - NEW: Return distribution shape (skewness, kurtosis)
+    """
+    n = NUMBER_OF_INPUT_BARS
+    closes = np.array([row[f"feature_close_{i}"] for i in range(n)])
+    highs = np.array([row[f"feature_high_{i}"] for i in range(n)])
+    lows = np.array([row[f"feature_low_{i}"] for i in range(n)])
+    volumes = np.array([row[f"feature_volume_{i}"] for i in range(n)])
+
+    log_rets = np.diff(np.log(closes + 1e-12))
+    abs_rets = np.abs(log_rets)
+    sq_rets = log_rets ** 2
+    features = {}
+
+    # === Multi-horizon realised vol ===
+    features["vol_5m"] = np.std(log_rets[-5:], ddof=1)
+    features["vol_10m"] = np.std(log_rets[-10:], ddof=1)
+    features["vol_15m"] = np.std(log_rets[-15:], ddof=1)
+    features["vol_30m"] = np.std(log_rets[-30:], ddof=1)
+    features["vol_60m"] = np.std(log_rets, ddof=1)
+
+    # === Vol ratios (regime) ===
+    features["vol_ratio_5_15"] = features["vol_5m"] / (features["vol_15m"] + 1e-12)
+    features["vol_ratio_5_60"] = features["vol_5m"] / (features["vol_60m"] + 1e-12)
+    features["vol_ratio_15_60"] = features["vol_15m"] / (features["vol_60m"] + 1e-12)
+
+    # === GARCH(1,1)-inspired features ===
+    # Exponentially weighted variance (RiskMetrics lambda=0.94)
+    lam = 0.94
+    ewma_var = sq_rets[0]
+    for r2 in sq_rets[1:]:
+        ewma_var = lam * ewma_var + (1 - lam) * r2
+    features["ewma_vol"] = np.sqrt(ewma_var)
+
+    # GARCH persistence: how much does yesterday's vol predict today's?
+    # Approximate with ratio of EWMA to realised
+    features["garch_persistence"] = features["ewma_vol"] / (features["vol_60m"] + 1e-12)
+
+    # Conditional vol: EWMA computed at different lambdas
+    lam_fast = 0.85  # faster decay — more reactive
+    ewma_fast = sq_rets[0]
+    for r2 in sq_rets[1:]:
+        ewma_fast = lam_fast * ewma_fast + (1 - lam_fast) * r2
+    features["ewma_vol_fast"] = np.sqrt(ewma_fast)
+    features["ewma_fast_slow_ratio"] = features["ewma_vol_fast"] / (features["ewma_vol"] + 1e-12)
+
+    # === Parkinson volatility (high-low based) ===
+    hl_log = np.log(highs + 1e-12) - np.log(lows + 1e-12)
+    features["parkinson_15m"] = np.sqrt(np.mean(hl_log[-15:] ** 2) / (4 * np.log(2)))
+    features["parkinson_60m"] = np.sqrt(np.mean(hl_log ** 2) / (4 * np.log(2)))
+    features["parkinson_ratio"] = features["parkinson_15m"] / (features["parkinson_60m"] + 1e-12)
+
+    # === Garman-Klass volatility (uses OHLC) ===
+    opens = np.array([row[f"feature_open_{i}"] for i in range(n)])
+    gk_terms = 0.5 * hl_log ** 2 - (2 * np.log(2) - 1) * (np.log(closes + 1e-12) - np.log(opens + 1e-12)) ** 2
+    features["garman_klass_15m"] = np.sqrt(np.mean(gk_terms[-15:]))
+    features["garman_klass_60m"] = np.sqrt(np.mean(gk_terms))
+
+    # === Vol of vol (second-order) ===
+    rolling_5m_vols = np.array([
+        np.std(log_rets[i:i+5], ddof=1) for i in range(len(log_rets) - 5)
+    ])
+    if len(rolling_5m_vols) >= 2:
+        features["vol_of_vol"] = np.std(rolling_5m_vols, ddof=1)
+        features["vol_mean_reversion"] = (features["vol_5m"] - np.mean(rolling_5m_vols)) / (np.std(rolling_5m_vols, ddof=1) + 1e-12)
+    else:
+        features["vol_of_vol"] = 0.0
+        features["vol_mean_reversion"] = 0.0
+
+    # === Vol quantile (where are we in the local distribution?) ===
+    if len(rolling_5m_vols) > 0:
+        features["vol_percentile"] = np.mean(rolling_5m_vols <= features["vol_5m"])
+    else:
+        features["vol_percentile"] = 0.5
+
+    # === Return autocorrelation (clustering signal) ===
+    features["ret_autocorr_1"] = np.corrcoef(log_rets[1:], log_rets[:-1])[0, 1] if len(log_rets) > 2 else 0.0
+    features["absret_autocorr_1"] = np.corrcoef(abs_rets[1:], abs_rets[:-1])[0, 1] if len(abs_rets) > 2 else 0.0
+    # Fix NaN
+    for k in ["ret_autocorr_1", "absret_autocorr_1"]:
+        if not np.isfinite(features[k]):
+            features[k] = 0.0
+
+    # === Return distribution shape ===
+    if len(log_rets) >= 15:
+        recent = log_rets[-15:]
+        mean_r = np.mean(recent)
+        std_r = np.std(recent, ddof=1)
+        if std_r > 1e-12:
+            features["skewness_15m"] = np.mean(((recent - mean_r) / std_r) ** 3)
+            features["kurtosis_15m"] = np.mean(((recent - mean_r) / std_r) ** 4)
+        else:
+            features["skewness_15m"] = 0.0
+            features["kurtosis_15m"] = 3.0
+    else:
+        features["skewness_15m"] = 0.0
+        features["kurtosis_15m"] = 3.0
+
+    # === Absolute returns (magnitude features) ===
+    features["abs_ret_mean_5m"] = np.mean(abs_rets[-5:])
+    features["abs_ret_mean_15m"] = np.mean(abs_rets[-15:])
+    features["abs_ret_max_15m"] = np.max(abs_rets[-15:])
+    features["abs_ret_max_60m"] = np.max(abs_rets)
+
+    # === Volume-volatility interaction ===
+    features["volume_ratio_5_60"] = np.mean(volumes[-5:]) / (np.mean(volumes) + 1e-12)
+    features["volume_spike"] = np.max(volumes[-5:]) / (np.mean(volumes) + 1e-12)
+    # Volume-weighted absolute return
+    vol_weights = volumes[1:] / (np.sum(volumes[1:]) + 1e-12)
+    features["vwap_absret"] = np.sum(abs_rets * vol_weights)
+
+    # === Trend vs chop (efficiency ratio) ===
+    net_move = abs(np.sum(log_rets[-15:]))
+    total_path = np.sum(abs_rets[-15:])
+    features["efficiency_15m"] = net_move / (total_path + 1e-12)
+
+    # === Recent extreme moves ===
+    features["max_abs_ret_5m"] = np.max(abs_rets[-5:])
+    features["max_abs_ret_ratio"] = features["max_abs_ret_5m"] / (features["vol_5m"] + 1e-12)
+
+    return pd.Series(features)
+
+
+print("   Engineering features...")
+eng_train = df_train.apply(engineer_model_d_features, axis=1)
+eng_test = df_test.apply(engineer_model_d_features, axis=1)
+
+df_train = pd.concat([df_train, eng_train], axis=1)
+df_test = pd.concat([df_test, eng_test], axis=1)
+
+eng_cols = list(eng_train.columns)
+all_feature_cols = base_feature_cols + eng_cols
+
+# Drop any rows with NaN in engineered features
+df_train = df_train.dropna(subset=all_feature_cols)
+df_test = df_test.dropna(subset=all_feature_cols)
+y_test = df_test["target"].values
+
+print(f"   Features: {len(base_feature_cols)} base + {len(eng_cols)} engineered = {len(all_feature_cols)} total")
+
+# =============================================================================
+# MODEL D: Train with Huber loss
+# =============================================================================
+print("\n[4/4] Training Model D (Huber loss, heavy regularization)...")
+
+model_d = LGBMRegressor(
+    objective="huber",          # Robust to vol spikes
+    alpha=0.5,                  # Huber delta (transition point)
+    n_estimators=500,
+    learning_rate=0.01,
+    max_depth=6,
+    num_leaves=31,
+    subsample=0.8,
+    colsample_bytree=0.7,
+    min_child_samples=100,      # Conservative splits
+    reg_alpha=0.5,              # L1
+    reg_lambda=2.0,             # L2
+    random_state=42,
+    verbose=-1,
+)
+model_d.fit(df_train[all_feature_cols], df_train["target"])
+preds_d = np.maximum(model_d.predict(df_test[all_feature_cols]), 0)
+
+model_d_metrics = vol_metrics(y_test, preds_d)
+print_metrics(model_d_metrics, "MODEL D (Huber + GARCH features)")
+
+print("\n📊 Improvement over baseline:")
+compare_metrics(baseline_metrics, model_d_metrics)
+
+# =============================================================================
+# SAVE
+# =============================================================================
+print("\n\nSaving Model D...")
+
+# For deployment, we need the workflow and feature engineering in the predict fn
+workflow = wf
+
+def predict(nonce=None):
+    live_row = workflow.get_live_features(ticker=TICKERS[0])
+    if live_row is None or len(live_row) == 0:
+        raise ValueError("Could not get live features")
+    live_eng = engineer_model_d_features(live_row.iloc[0])
+    live_features = pd.concat([live_row[base_feature_cols].iloc[0], live_eng])
+    vol = model_d.predict(live_features[all_feature_cols].values.reshape(1, -1))[0]
+    vol = max(0.0, float(vol))
+    print(f"Model D prediction: {vol:.6f} (15-min vol)")
+    return vol
+
+print("🧪 Testing prediction...")
+test_pred = predict()
+
+with open("predict_79_model_d.pkl", "wb") as f:
+    cloudpickle.dump(predict, f)
+
+print(f"\n✅ Saved predict_79_model_d.pkl")
+print(f"   Pearson r: {model_d_metrics['pearson_r']:.4f} | R²: {model_d_metrics['r2']:.4f} | QLIKE: {model_d_metrics['qlike']:.6f}")
diff --git a/notebooks/testnet/topic_79_btc_vol/topic_79_model_e_calibrated.py b/notebooks/testnet/topic_79_btc_vol/topic_79_model_e_calibrated.py
new file mode 100644
index 0000000..25d1a4c
--- /dev/null
+++ b/notebooks/testnet/topic_79_btc_vol/topic_79_model_e_calibrated.py
@@ -0,0 +1,433 @@
+#!/usr/bin/env python3
+"""
+Topic 79 — Model E: Calibrated Volatility (Distribution-Aware)
+==============================================================
+
+Problem: Previous models compress predictions into a narrow band because
+tree models with MSE/Huber loss regress toward the mean. The scatter plot
+shows predictions trapped in [0.0004, 0.0011] while targets range to 0.006+.
+
+Solution: Three techniques to match the target distribution:
+1. Log-space prediction: predict log(vol) to equalize error across magnitudes
+2. Quantile ensemble: blend median prediction with upper quantile for calibration
+3. Regime-aware: separate models for calm vs volatile periods
+
+The goal is to match both the SHAPE and MAGNITUDE of the target distribution,
+not just minimize average error.
+"""
+
+import numpy as np
+import pandas as pd
+import os
+from datetime import datetime, timedelta, timezone
+from scipy.stats import pearsonr, spearmanr
+from lightgbm import LGBMRegressor
+import cloudpickle
+from allora_forge_builder_kit import AlloraMLWorkflow
+
+# =============================================================================
+# CONFIGURATION
+# =============================================================================
+TICKERS = ["btcusd"]
+DAYS_OF_HISTORY = 800
+INTERVAL = "1m"
+NUMBER_OF_INPUT_BARS = 60
+TARGET_BARS = 15
+TARGET_TYPE = "volatility"
+
+print("=" * 80)
+print("Topic 79 — Model E: Calibrated Volatility (Distribution-Aware)")
+print("=" * 80)
+
+
+# =============================================================================
+# METRICS
+# =============================================================================
+def vol_metrics(y_true, y_pred):
+    """Compute volatility-specific metrics."""
+    y_true, y_pred = np.asarray(y_true), np.asarray(y_pred)
+    r, _ = pearsonr(y_true, y_pred)
+    rho, _ = spearmanr(y_true, y_pred)
+    rmse = np.sqrt(np.mean((y_true - y_pred) ** 2))
+    mae = np.mean(np.abs(y_true - y_pred))
+    ss_res = np.sum((y_true - y_pred) ** 2)
+    ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
+    r2 = 1 - ss_res / ss_tot
+    rel_mae = mae / np.mean(y_true)
+    mask = y_pred > 0
+    ratio = y_true[mask] / y_pred[mask]
+    qlike = np.mean(ratio - np.log(ratio) - 1) if mask.sum() > 0 else float("inf")
+    # Distribution match: ratio of pred std to target std (1.0 = perfect calibration)
+    cal_ratio = np.std(y_pred) / np.std(y_true)
+    # Tail coverage: what fraction of target > p90 does the model predict > p90?
+    p90 = np.percentile(y_true, 90)
+    tail_mask = y_true > p90
+    if tail_mask.sum() > 0:
+        tail_capture = np.mean(y_pred[tail_mask] > np.percentile(y_pred, 90))
+    else:
+        tail_capture = 0.0
+    return {
+        "pearson_r": r, "spearman_rho": rho, "r2": r2,
+        "rmse": rmse, "mae": mae, "rel_mae": rel_mae, "qlike": qlike,
+        "cal_ratio": cal_ratio, "tail_capture": tail_capture,
+    }
+
+
+def print_metrics(metrics, label=""):
+    print(f"\n  {'─' * 55}")
+    if label:
+        print(f"  {label}")
+        print(f"  {'─' * 55}")
+    print(f"  Pearson r:      {metrics['pearson_r']:.4f}")
+    print(f"  Spearman ρ:     {metrics['spearman_rho']:.4f}")
+    print(f"  R²:             {metrics['r2']:.4f}")
+    print(f"  RMSE:           {metrics['rmse']:.6f}")
+    print(f"  MAE:            {metrics['mae']:.6f}")
+    print(f"  Rel MAE:        {metrics['rel_mae']*100:.2f}%")
+    print(f"  QLIKE:          {metrics['qlike']:.6f}")
+    print(f"  Cal ratio:      {metrics['cal_ratio']:.4f}  (1.0 = perfect spread)")
+    print(f"  Tail capture:   {metrics['tail_capture']:.4f}  (1.0 = perfect tail)")
+    print(f"  {'─' * 55}")
+
+
+def compare_metrics(baseline, current):
+    print(f"\n  {'Metric':<16} {'Baseline':<10} {'Current':<10} {'Δ':<10}")
+    print(f"  {'─'*46}")
+    for key in ["pearson_r", "spearman_rho", "r2", "rmse", "mae", "rel_mae", "qlike", "cal_ratio"]:
+        b, c = baseline[key], current[key]
+        if key in ["rmse", "mae", "rel_mae", "qlike"]:
+            delta = (b - c) / abs(b) * 100 if b != 0 else 0
+            arrow = "↓" if c < b else "↑"
+        elif key == "cal_ratio":
+            # Closer to 1.0 is better
+            delta = abs(1 - c) - abs(1 - b)
+            arrow = "✓" if abs(1 - c) < abs(1 - b) else "✗"
+            print(f"  {key:<16} {b:<10.4f} {c:<10.4f} {arrow}")
+            continue
+        else:
+            delta = (c - b) / abs(b) * 100 if b != 0 else 0
+            arrow = "↑" if c > b else "↓"
+        fmt = ".6f" if key in ["rmse", "mae"] else ".4f"
+        if key == "rel_mae":
+            print(f"  {key:<16} {b*100:<10.2f} {c*100:<10.2f} {arrow}{abs(delta):.1f}%")
+        else:
+            print(f"  {key:<16} {b:<10{fmt}} {c:<10{fmt}} {arrow}{abs(delta):.1f}%")
+
+
+# =============================================================================
+# LOAD DATA
+# =============================================================================
+print("\n[1/5] Loading data...")
+from allora_forge_builder_kit.utils import get_api_key
+
+api_key = get_api_key(
+    api_key_file=os.path.join(os.path.dirname(__file__), "..", ".allora_api_key")
+)
+
+wf = AlloraMLWorkflow(
+    tickers=TICKERS, number_of_input_bars=NUMBER_OF_INPUT_BARS,
+    target_bars=TARGET_BARS, interval=INTERVAL, target_type=TARGET_TYPE,
+    data_source="allora", api_key=api_key,
+)
+
+start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)
+wf.backfill(start=start_date)
+df = wf.get_full_feature_target_dataframe(start_date=start_date).reset_index()
+base_feature_cols = [c for c in df.columns if c.startswith("feature_")]
+df = df.dropna(subset=base_feature_cols + ["target"])
+
+split = int(len(df) * 0.8)
+df_train = df.iloc[:split].copy()
+df_test = df.iloc[split:].copy()
+y_test = df_test["target"].values
+
+print(f"✅ {len(df):,} samples | Train: {len(df_train):,} | Test: {len(df_test):,}")
+print(f"   Target stats: mean={y_test.mean():.6f} std={y_test.std():.6f} "
+      f"p90={np.percentile(y_test, 90):.6f} max={y_test.max():.6f}")
+
+
+# =============================================================================
+# FEATURE ENGINEERING (same as Model D)
+# =============================================================================
+print("\n[2/5] Engineering features...")
+
+
+def engineer_features(row):
+    n = NUMBER_OF_INPUT_BARS
+    closes = np.array([row[f"feature_close_{i}"] for i in range(n)])
+    highs = np.array([row[f"feature_high_{i}"] for i in range(n)])
+    lows = np.array([row[f"feature_low_{i}"] for i in range(n)])
+    volumes = np.array([row[f"feature_volume_{i}"] for i in range(n)])
+    opens = np.array([row[f"feature_open_{i}"] for i in range(n)])
+
+    log_rets = np.diff(np.log(closes + 1e-12))
+    abs_rets = np.abs(log_rets)
+    sq_rets = log_rets ** 2
+    features = {}
+
+    # Multi-horizon vol
+    features["vol_5m"] = np.std(log_rets[-5:], ddof=1)
+    features["vol_10m"] = np.std(log_rets[-10:], ddof=1)
+    features["vol_15m"] = np.std(log_rets[-15:], ddof=1)
+    features["vol_30m"] = np.std(log_rets[-30:], ddof=1)
+    features["vol_60m"] = np.std(log_rets, ddof=1)
+
+    # Vol ratios
+    features["vol_ratio_5_15"] = features["vol_5m"] / (features["vol_15m"] + 1e-12)
+    features["vol_ratio_5_60"] = features["vol_5m"] / (features["vol_60m"] + 1e-12)
+    features["vol_ratio_15_60"] = features["vol_15m"] / (features["vol_60m"] + 1e-12)
+
+    # EWMA (fast and slow)
+    lam = 0.94
+    ewma_var = sq_rets[0]
+    for r2 in sq_rets[1:]:
+        ewma_var = lam * ewma_var + (1 - lam) * r2
+    features["ewma_vol"] = np.sqrt(ewma_var)
+
+    lam_fast = 0.85
+    ewma_fast = sq_rets[0]
+    for r2 in sq_rets[1:]:
+        ewma_fast = lam_fast * ewma_fast + (1 - lam_fast) * r2
+    features["ewma_vol_fast"] = np.sqrt(ewma_fast)
+    features["ewma_fast_slow_ratio"] = features["ewma_vol_fast"] / (features["ewma_vol"] + 1e-12)
+    features["garch_persistence"] = features["ewma_vol"] / (features["vol_60m"] + 1e-12)
+
+    # Parkinson & Garman-Klass
+    hl_log = np.log(highs + 1e-12) - np.log(lows + 1e-12)
+    features["parkinson_15m"] = np.sqrt(np.mean(hl_log[-15:] ** 2) / (4 * np.log(2)))
+    features["parkinson_60m"] = np.sqrt(np.mean(hl_log ** 2) / (4 * np.log(2)))
+    features["parkinson_ratio"] = features["parkinson_15m"] / (features["parkinson_60m"] + 1e-12)
+    gk_terms = 0.5 * hl_log ** 2 - (2 * np.log(2) - 1) * (np.log(closes + 1e-12) - np.log(opens + 1e-12)) ** 2
+    features["garman_klass_15m"] = np.sqrt(np.abs(np.mean(gk_terms[-15:])))
+    features["garman_klass_60m"] = np.sqrt(np.abs(np.mean(gk_terms)))
+
+    # Vol of vol & mean reversion
+    rolling_5m_vols = np.array([np.std(log_rets[i:i+5], ddof=1) for i in range(len(log_rets) - 5)])
+    if len(rolling_5m_vols) >= 2:
+        features["vol_of_vol"] = np.std(rolling_5m_vols, ddof=1)
+        features["vol_mean_reversion"] = (features["vol_5m"] - np.mean(rolling_5m_vols)) / (np.std(rolling_5m_vols, ddof=1) + 1e-12)
+        features["vol_percentile"] = np.mean(rolling_5m_vols <= features["vol_5m"])
+    else:
+        features["vol_of_vol"] = 0.0
+        features["vol_mean_reversion"] = 0.0
+        features["vol_percentile"] = 0.5
+
+    # Autocorrelation
+    features["absret_autocorr_1"] = np.corrcoef(abs_rets[1:], abs_rets[:-1])[0, 1] if len(abs_rets) > 2 else 0.0
+    if not np.isfinite(features["absret_autocorr_1"]):
+        features["absret_autocorr_1"] = 0.0
+
+    # Distribution shape
+    if len(log_rets) >= 15:
+        recent = log_rets[-15:]
+        std_r = np.std(recent, ddof=1)
+        if std_r > 1e-12:
+            features["kurtosis_15m"] = np.mean(((recent - np.mean(recent)) / std_r) ** 4)
+        else:
+            features["kurtosis_15m"] = 3.0
+    else:
+        features["kurtosis_15m"] = 3.0
+
+    # Magnitude features
+    features["abs_ret_mean_5m"] = np.mean(abs_rets[-5:])
+    features["abs_ret_max_15m"] = np.max(abs_rets[-15:])
+    features["abs_ret_max_60m"] = np.max(abs_rets)
+
+    # Volume interaction
+    features["volume_ratio_5_60"] = np.mean(volumes[-5:]) / (np.mean(volumes) + 1e-12)
+    features["volume_spike"] = np.max(volumes[-5:]) / (np.mean(volumes) + 1e-12)
+
+    # Efficiency ratio
+    net_move = abs(np.sum(log_rets[-15:]))
+    total_path = np.sum(abs_rets[-15:])
+    features["efficiency_15m"] = net_move / (total_path + 1e-12)
+
+    return pd.Series(features)
+
+
+print("   Engineering features (this takes ~40 min on 1.15M rows)...")
+eng_train = df_train.apply(engineer_features, axis=1)
+eng_test = df_test.apply(engineer_features, axis=1)
+
+df_train = pd.concat([df_train.reset_index(drop=True), eng_train.reset_index(drop=True)], axis=1)
+df_test = pd.concat([df_test.reset_index(drop=True), eng_test.reset_index(drop=True)], axis=1)
+
+eng_cols = list(eng_train.columns)
+all_feature_cols = base_feature_cols + eng_cols
+df_train = df_train.dropna(subset=all_feature_cols)
+df_test = df_test.dropna(subset=all_feature_cols)
+y_test = df_test["target"].values
+
+print(f"   ✅ {len(all_feature_cols)} features ready")
+
+
+# =============================================================================
+# BASELINE: Model D approach (Huber, single model)
+# =============================================================================
+print("\n[3/5] Baseline (Model D: Huber loss)...")
+model_baseline = LGBMRegressor(
+    objective="huber", alpha=0.5, n_estimators=500, learning_rate=0.01,
+    max_depth=6, num_leaves=31, subsample=0.8, colsample_bytree=0.7,
+    min_child_samples=100, reg_alpha=0.5, reg_lambda=2.0,
+    random_state=42, verbose=-1,
+)
+model_baseline.fit(df_train[all_feature_cols], df_train["target"])
+preds_baseline = np.maximum(model_baseline.predict(df_test[all_feature_cols]), 0)
+baseline_metrics = vol_metrics(y_test, preds_baseline)
+print_metrics(baseline_metrics, "BASELINE (Model D: Huber)")
+
+
+# =============================================================================
+# MODEL E: Log-space + Quantile Ensemble
+# =============================================================================
+print("\n[4/5] Training Model E (log-space + quantile ensemble)...")
+
+# --- Strategy 1: Predict in log-space ---
+# Transform: log(vol) is more Gaussian, equalizes error across magnitudes
+y_train_log = np.log(df_train["target"].values + 1e-10)
+y_test_log = np.log(y_test + 1e-10)
+
+model_log = LGBMRegressor(
+    objective="regression",  # MSE in log-space = multiplicative error in real space
+    n_estimators=800,
+    learning_rate=0.01,
+    max_depth=7,
+    num_leaves=63,
+    subsample=0.8,
+    colsample_bytree=0.7,
+    min_child_samples=50,
+    reg_alpha=0.1,
+    reg_lambda=1.0,
+    random_state=42,
+    verbose=-1,
+)
+model_log.fit(df_train[all_feature_cols], y_train_log)
+preds_log_space = model_log.predict(df_test[all_feature_cols])
+preds_from_log = np.exp(preds_log_space)  # back to real space
+
+log_metrics = vol_metrics(y_test, preds_from_log)
+print_metrics(log_metrics, "Log-space model (exp transform back)")
+
+# --- Strategy 2: Quantile models for calibration ---
+# Train at 50th percentile (median) and 75th percentile
+model_q50 = LGBMRegressor(
+    objective="quantile", alpha=0.5,  # median
+    n_estimators=500, learning_rate=0.01, max_depth=6, num_leaves=31,
+    subsample=0.8, colsample_bytree=0.7, min_child_samples=100,
+    reg_alpha=0.3, reg_lambda=1.5, random_state=42, verbose=-1,
+)
+model_q50.fit(df_train[all_feature_cols], df_train["target"])
+preds_q50 = np.maximum(model_q50.predict(df_test[all_feature_cols]), 0)
+
+model_q75 = LGBMRegressor(
+    objective="quantile", alpha=0.75,  # upper quartile
+    n_estimators=500, learning_rate=0.01, max_depth=6, num_leaves=31,
+    subsample=0.8, colsample_bytree=0.7, min_child_samples=100,
+    reg_alpha=0.3, reg_lambda=1.5, random_state=42, verbose=-1,
+)
+model_q75.fit(df_train[all_feature_cols], df_train["target"])
+preds_q75 = np.maximum(model_q75.predict(df_test[all_feature_cols]), 0)
+
+# --- Strategy 3: Ensemble blend ---
+# Blend log-space model (good at shape) with quantile shift (good at tails)
+# The log model captures the full range; we blend with q50 for stability
+alpha = 0.6  # weight on log-space model
+preds_ensemble = alpha * preds_from_log + (1 - alpha) * preds_q50
+
+ensemble_metrics = vol_metrics(y_test, preds_ensemble)
+print_metrics(ensemble_metrics, "Ensemble (0.6*log + 0.4*q50)")
+
+# --- Strategy 4: Log-space with bias correction ---
+# exp(E[log(x)]) underestimates E[x] for log-normal. Apply correction.
+# Correction factor: exp(0.5 * residual_variance_in_log_space)
+log_residuals = y_train_log - model_log.predict(df_train[all_feature_cols])
+bias_correction = np.exp(0.5 * np.var(log_residuals))
+preds_corrected = preds_from_log * bias_correction
+
+corrected_metrics = vol_metrics(y_test, preds_corrected)
+print_metrics(corrected_metrics, f"Log-space + bias correction (factor={bias_correction:.4f})")
+
+# --- Pick the best ---
+candidates = [
+    ("baseline_huber", baseline_metrics, preds_baseline),
+    ("log_space", log_metrics, preds_from_log),
+    ("ensemble_log_q50", ensemble_metrics, preds_ensemble),
+    ("log_corrected", corrected_metrics, preds_corrected),
+]
+
+# Rank by a composite: prioritize QLIKE (vol-specific) and cal_ratio (distribution match)
+def composite_score(m):
+    # Lower QLIKE is better, cal_ratio closer to 1.0 is better, higher r2 is better
+    return m["r2"] - 0.5 * m["qlike"] - 0.3 * abs(1 - m["cal_ratio"])
+
+print("\n\n  Candidate ranking (composite score):")
+print(f"  {'Name':<20} {'R²':<8} {'QLIKE':<8} {'Cal':<8} {'Score':<8}")
+print(f"  {'─'*52}")
+ranked = sorted(candidates, key=lambda x: composite_score(x[1]), reverse=True)
+for name, m, _ in ranked:
+    score = composite_score(m)
+    print(f"  {name:<20} {m['r2']:.4f}  {m['qlike']:.4f}  {m['cal_ratio']:.4f}  {score:.4f}")
+
+best_name, best_metrics, best_preds = ranked[0]
+print(f"\n  → Winner: {best_name}")
+
+
+# =============================================================================
+# STEP 5: Save best model for deployment
+# =============================================================================
+print(f"\n[5/5] Saving Model E ({best_name})...")
+print_metrics(best_metrics, f"MODEL E FINAL ({best_name})")
+print("\n📊 Improvement over Model D baseline:")
+compare_metrics(baseline_metrics, best_metrics)
+
+# For deployment, we need to package the right predict function
+workflow = wf
+
+if best_name == "log_space":
+    _deploy_model = model_log
+    _bias = 1.0
+elif best_name == "log_corrected":
+    _deploy_model = model_log
+    _bias = bias_correction
+elif best_name == "ensemble_log_q50":
+    _deploy_model_log = model_log
+    _deploy_model_q50 = model_q50
+    _alpha = alpha
+else:
+    _deploy_model = model_baseline
+    _bias = None
+
+
+def predict(nonce=None):
+    live_row = workflow.get_live_features(ticker=TICKERS[0])
+    if live_row is None or len(live_row) == 0:
+        raise ValueError("Could not get live features")
+    live_eng = engineer_features(live_row.iloc[0])
+    live_features = pd.concat([live_row[base_feature_cols].iloc[0], live_eng])
+    x = live_features[all_feature_cols].values.reshape(1, -1)
+
+    if best_name in ("log_space", "log_corrected"):
+        log_pred = _deploy_model.predict(x)[0]
+        vol = np.exp(log_pred) * _bias
+    elif best_name == "ensemble_log_q50":
+        log_pred = np.exp(_deploy_model_log.predict(x)[0])
+        q50_pred = max(0, _deploy_model_q50.predict(x)[0])
+        vol = _alpha * log_pred + (1 - _alpha) * q50_pred
+    else:
+        vol = _deploy_model.predict(x)[0]
+
+    vol = max(0.0, float(vol))
+    print(f"Model E prediction: {vol:.6f} (15-min vol)")
+    return vol
+
+
+print("\n🧪 Testing prediction...")
+test_pred = predict()
+
+with open("predict_79_model_e.pkl", "wb") as f:
+    cloudpickle.dump(predict, f)
+
+print(f"\n✅ Saved predict_79_model_e.pkl")
+print(f"   Strategy: {best_name}")
+print(f"   Pearson r: {best_metrics['pearson_r']:.4f} | R²: {best_metrics['r2']:.4f}")
+print(f"   Cal ratio: {best_metrics['cal_ratio']:.4f} | QLIKE: {best_metrics['qlike']:.6f}")
diff --git a/notebooks/testnet/topic_80_eth_vol/example_topic_80_eth_volatility_walkthrough.py b/notebooks/testnet/topic_80_eth_vol/example_topic_80_eth_volatility_walkthrough.py
new file mode 100644
index 0000000..3c8c3a4
--- /dev/null
+++ b/notebooks/testnet/topic_80_eth_vol/example_topic_80_eth_volatility_walkthrough.py
@@ -0,0 +1,547 @@
+#!/usr/bin/env python3
+"""
+================================================================================
+Allora Forge Builder Kit v3.0 - Topic 80 ETH/USD 15-Minute Volatility Prediction
+================================================================================
+
+This walkthrough demonstrates 15-minute realised volatility prediction for
+ETH/USD using the Allora ML Workflow Kit with base features and LightGBM.
+
+Target definition:
+    The standard deviation of consecutive 1-minute log returns over the next
+    15 minutes.  Formally, for each timestamp t:
+
+        r_i = log(close[t+i] / close[t+i-1])   for i in 1..15
+        target[t] = std(r_1, r_2, ..., r_15)
+
+    This matches the ground-truth definition used by the Allora volatility
+    reputer (allora-reputer-volatility-prediction).
+
+Data is sourced from the Atlas data service (Tiingo 1-min candles).
+
+================================================================================
+"""
+
+import numpy as np
+import pandas as pd
+import os
+import json
+from datetime import datetime, timedelta, timezone
+from sklearn.model_selection import TimeSeriesSplit
+from lightgbm import LGBMRegressor
+from scipy.stats import pearsonr, spearmanr
+import matplotlib.pyplot as plt
+import cloudpickle
+from allora_forge_builder_kit import AlloraMLWorkflow
+
+# =============================================================================
+# EXPERIMENT CONFIGURATION
+# =============================================================================
+
+# Data Configuration
+TICKERS = ["ethusd"]
+DAYS_OF_HISTORY = 60
+INTERVAL = "1m"  # 1-minute base interval for volatility
+
+# Feature Configuration
+NUMBER_OF_INPUT_BARS = 15  # 15 minutes of 1-minute bars for input features
+TARGET_BARS = 15           # 15-minute volatility horizon
+
+# Target type: volatility (std of 1-min log returns over the horizon)
+TARGET_TYPE = "volatility"
+
+# Cross-Validation Configuration
+N_SPLITS = 3               # Number of CV folds
+MAX_TRAIN_SIZE = 100_000_000  # Maximum training samples per fold
+
+# Model Configuration
+N_ESTIMATORS_MAX = 500    # Train with max trees, evaluate at checkpoints
+N_ESTIMATORS_CHECKPOINTS = [100, 300, 500]
+LEARNING_RATES = [0.01, 0.05, 0.1]
+MAX_DEPTHS = [3, 5, 7]
+NUM_LEAVES = [15, 31, 63]
+
+# =============================================================================
+# SCRIPT START
+# =============================================================================
+
+print("=" * 80)
+print("Allora Forge Builder Kit v3.0 - Topic 80 Walkthrough")
+print("ETH/USD 15-Minute Volatility Prediction")
+print("=" * 80)
+
+
+def _to_serializable(obj):
+    """Convert numpy/pandas objects into JSON-serializable Python types."""
+    if isinstance(obj, (np.floating, np.integer)):
+        return obj.item()
+    if isinstance(obj, (np.bool_,)):
+        return bool(obj)
+    if isinstance(obj, np.ndarray):
+        return obj.tolist()
+    if isinstance(obj, (pd.Timestamp, datetime)):
+        return obj.isoformat()
+    if isinstance(obj, dict):
+        return {k: _to_serializable(v) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [_to_serializable(v) for v in obj]
+    return obj
+
+
+# =============================================================================
+# VOLATILITY-SPECIFIC METRICS
+# =============================================================================
+def vol_metrics(y_true, y_pred):
+    """
+    Compute volatility-specific evaluation metrics.
+
+    These replace the standard log-return metrics (DA, CZAR) which are not
+    meaningful for volatility prediction.
+    """
+    y_true = np.asarray(y_true)
+    y_pred = np.asarray(y_pred)
+    r, _ = pearsonr(y_true, y_pred)
+    rho, _ = spearmanr(y_true, y_pred)
+    mse = np.mean((y_true - y_pred) ** 2)
+    rmse = np.sqrt(mse)
+    mae = np.mean(np.abs(y_true - y_pred))
+    ss_res = np.sum((y_true - y_pred) ** 2)
+    ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
+    r2 = 1 - ss_res / ss_tot
+    rel_mae = mae / np.mean(y_true)
+    # QLIKE: quasi-likelihood loss (standard for volatility forecasting)
+    mask = y_pred > 0
+    if mask.sum() > 0:
+        ratio = y_true[mask] / y_pred[mask]
+        qlike = np.mean(ratio - np.log(ratio) - 1)
+    else:
+        qlike = float("inf")
+    return {
+        "pearson_r": r,
+        "spearman_rho": rho,
+        "r2": r2,
+        "rmse": rmse,
+        "mae": mae,
+        "rel_mae": rel_mae,
+        "qlike": qlike,
+    }
+
+
+def print_vol_metrics(metrics, label=""):
+    """Pretty-print volatility metrics."""
+    print(f"\n  {'─' * 50}")
+    if label:
+        print(f"  {label}")
+        print(f"  {'─' * 50}")
+    print(f"  Pearson r:   {metrics['pearson_r']:.4f}")
+    print(f"  Spearman ρ:  {metrics['spearman_rho']:.4f}")
+    print(f"  R²:          {metrics['r2']:.4f}")
+    print(f"  RMSE:        {metrics['rmse']:.6f}")
+    print(f"  MAE:         {metrics['mae']:.6f}")
+    print(f"  Rel MAE:     {metrics['rel_mae']*100:.2f}%")
+    print(f"  QLIKE:       {metrics['qlike']:.6f}")
+    print(f"  {'─' * 50}")
+
+
+def save_run_artifacts(df_eval, best_result, best_params, run_dir, feature_cols):
+    """Persist config/metrics/predictions and basic diagnostic plots."""
+    os.makedirs(run_dir, exist_ok=True)
+
+    # 1) Run config
+    config = {
+        "topic_id": 79,
+        "target_type": TARGET_TYPE,
+        "tickers": TICKERS,
+        "days_of_history": DAYS_OF_HISTORY,
+        "interval": INTERVAL,
+        "number_of_input_bars": NUMBER_OF_INPUT_BARS,
+        "target_bars": TARGET_BARS,
+        "n_splits": N_SPLITS,
+        "max_train_size": MAX_TRAIN_SIZE,
+        "n_estimators_checkpoints": N_ESTIMATORS_CHECKPOINTS,
+        "learning_rates": LEARNING_RATES,
+        "max_depths": MAX_DEPTHS,
+        "num_leaves": NUM_LEAVES,
+        "best_params": best_params,
+        "feature_count": len(feature_cols),
+    }
+    with open(os.path.join(run_dir, "config.json"), "w") as f:
+        json.dump(_to_serializable(config), f, indent=2)
+
+    # 2) Metrics
+    metrics_payload = {k: v for k, v in best_result.items() if k != "predictions"}
+    with open(os.path.join(run_dir, "metrics.json"), "w") as f:
+        json.dump(_to_serializable(metrics_payload), f, indent=2)
+
+    # 3) Predictions table
+    export_df = df_eval.copy()
+    if "predictions" in best_result:
+        export_df["pred"] = best_result["predictions"].values
+
+    export_cols = ["open_time", "target", "pred"]
+    export_cols = [c for c in export_cols if c in export_df.columns]
+    preds_df = export_df[export_cols].dropna(subset=["pred"]).copy()
+    preds_csv_path = os.path.join(run_dir, "predictions.csv")
+    preds_df.to_csv(preds_csv_path, index=False)
+
+    # 4) Scatter plot: pred vs target
+    plt.figure(figsize=(8, 8))
+    plt.scatter(preds_df["target"], preds_df["pred"], s=8, alpha=0.35)
+    lim_min = float(min(preds_df["target"].min(), preds_df["pred"].min()))
+    lim_max = float(max(preds_df["target"].max(), preds_df["pred"].max()))
+    plt.plot([lim_min, lim_max], [lim_min, lim_max], linestyle="--", linewidth=1)
+    plt.xlabel("Target (realised volatility)")
+    plt.ylabel("Prediction (realised volatility)")
+    plt.title("Predictions vs Target — 15-min ETH Volatility")
+    plt.tight_layout()
+    scatter_path = os.path.join(run_dir, "scatter_pred_vs_target.png")
+    plt.savefig(scatter_path, dpi=150)
+    plt.close()
+
+    # 5) Human-readable report
+    with open(os.path.join(run_dir, "report.txt"), "w") as f:
+        f.write("Allora Topic 80 Run Report\n")
+        f.write("ETH/USD 15-Minute Volatility Prediction\n")
+        f.write("=" * 40 + "\n")
+        f.write(f"Best params: {best_params}\n\n")
+        f.write("Volatility Metrics:\n")
+        for key in ["pearson_r", "spearman_rho", "r2", "rmse", "mae", "rel_mae", "qlike"]:
+            if key in best_result:
+                f.write(f"  {key}: {best_result[key]:.6f}\n")
+
+    return {
+        "run_dir": run_dir,
+        "predictions_csv": preds_csv_path,
+        "scatter_png": scatter_path,
+    }
+
+
+# =============================================================================
+# STEP 1: Initialize Workflow
+# =============================================================================
+print("\n[1/6] Initializing workflow...")
+
+# Resolve Allora API key (env var → file → prompt).
+# Get a free key at https://developer.allora.network
+# Alternatively, set data_source="binance" below to skip the API key entirely.
+from allora_forge_builder_kit.utils import get_api_key
+
+api_key = get_api_key(
+    api_key_file=os.path.join(os.path.dirname(__file__), "..", ".allora_api_key")
+)
+
+workflow = AlloraMLWorkflow(
+    tickers=TICKERS,
+    number_of_input_bars=NUMBER_OF_INPUT_BARS,
+    target_bars=TARGET_BARS,
+    interval=INTERVAL,
+    target_type=TARGET_TYPE,
+    data_source="allora",
+    api_key=api_key,
+)
+
+print("✅ Workflow initialized")
+print(f"   Assets: {TICKERS} | Interval: {INTERVAL}")
+print(f"   Input: {NUMBER_OF_INPUT_BARS} bars → Features: {NUMBER_OF_INPUT_BARS * 5}")
+print(f"   Target: {TARGET_TYPE} over {TARGET_BARS}-minute horizon")
+
+# =============================================================================
+# STEP 2: Backfill Historical Data
+# =============================================================================
+print(f"\n[2/6] Backfilling {DAYS_OF_HISTORY} days of historical data...")
+
+start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)
+try:
+    workflow.backfill(start=start_date)
+    print("✅ Backfill complete")
+except Exception as e:
+    print(f"⚠️ Backfill failed: {e}")
+    print("   Will attempt to use locally cached parquet data...")
+
+# =============================================================================
+# STEP 3: Extract Features & Engineer New Features
+# =============================================================================
+print("\n[3/6] Extracting and engineering features...")
+
+try:
+    df_all = workflow.get_full_feature_target_dataframe(
+        start_date=start_date
+    ).reset_index()
+except Exception as e:
+    raise RuntimeError(
+        f"No data available: {e}\n\n"
+        "This usually means the backfill failed (bad/missing API key) and there is "
+        "no locally cached parquet data.\n\n"
+        "Fix options:\n"
+        "  1. Set a valid ALLORA_API_KEY (free at https://developer.allora.network)\n"
+        "  2. Use data_source='binance' in AlloraMLWorkflow() to skip the API key\n"
+    ) from e
+
+
+# Feature Engineering: Add volatility-relevant features from the lookback window
+def engineer_vol_features(row):
+    """Engineer volatility-predictive features (no data leakage — same row only)."""
+    closes = np.array(
+        [row[f"feature_close_{i}"] for i in range(NUMBER_OF_INPUT_BARS)]
+    )
+    highs = np.array(
+        [row[f"feature_high_{i}"] for i in range(NUMBER_OF_INPUT_BARS)]
+    )
+    lows = np.array(
+        [row[f"feature_low_{i}"] for i in range(NUMBER_OF_INPUT_BARS)]
+    )
+
+    features = {}
+
+    # Realised volatility of the lookback window (std of 1-min log returns)
+    log_returns = np.diff(np.log(closes + 1e-12))
+    features["hist_vol_full"] = np.std(log_returns, ddof=1) if len(log_returns) > 1 else 0.0
+
+    # Short-term vs long-term vol ratio (regime detection)
+    if len(log_returns) >= 5:
+        features["hist_vol_5m"] = np.std(log_returns[-5:], ddof=1)
+        features["vol_ratio_5_full"] = (
+            features["hist_vol_5m"] / (features["hist_vol_full"] + 1e-12)
+        )
+    else:
+        features["hist_vol_5m"] = features["hist_vol_full"]
+        features["vol_ratio_5_full"] = 1.0
+
+    # High-low range (Parkinson-style proxy)
+    hl_range = highs - lows
+    features["hl_range_mean"] = np.mean(hl_range)
+    features["hl_range_recent"] = np.mean(hl_range[-3:]) if len(hl_range) >= 3 else hl_range[-1]
+    features["hl_range_ratio"] = (
+        features["hl_range_recent"] / (features["hl_range_mean"] + 1e-12)
+    )
+
+    # Absolute return (magnitude of recent move)
+    features["abs_return_1m"] = abs(log_returns[-1]) if len(log_returns) > 0 else 0.0
+    features["abs_return_5m"] = abs(np.log(closes[-1] + 1e-12) - np.log(closes[-5] + 1e-12)) if len(closes) >= 5 else 0.0
+
+    return pd.Series(features)
+
+
+# Get base features
+base_feature_cols = [col for col in df_all.columns if col.startswith("feature_")]
+
+# Apply feature engineering
+print("   Engineering volatility-predictive features...")
+engineered_features = df_all.apply(engineer_vol_features, axis=1)
+df_all = pd.concat([df_all, engineered_features], axis=1)
+
+# Use base features + engineered volatility features
+feature_cols = base_feature_cols + list(engineered_features.columns)
+df_all = df_all.dropna(subset=feature_cols + ["target"])
+
+print(
+    f"✅ Dataset: {len(df_all):,} samples "
+    f"({df_all['open_time'].min().date()} to {df_all['open_time'].max().date()})"
+)
+print(
+    f"   Features: {len(base_feature_cols)} base + "
+    f"{len(engineered_features.columns)} vol = {len(feature_cols)} total"
+)
+
+# Setup time series cross-validation
+tscv = TimeSeriesSplit(
+    n_splits=N_SPLITS,
+    gap=TARGET_BARS,
+    max_train_size=MAX_TRAIN_SIZE,
+)
+
+print(f"✅ Walk-forward CV: {N_SPLITS} splits, {TARGET_BARS}-bar embargo")
+for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(df_all)):
+    print(f"   Fold {fold_idx + 1}: Train={len(train_idx):,}, Test={len(test_idx):,}")
+
+# =============================================================================
+# STEP 4: Grid Search with Walk-Forward Cross-Validation
+# =============================================================================
+print("\n[4/6] Running grid search...")
+
+results = []
+config_num = 0
+
+for lr in LEARNING_RATES:
+    for depth in MAX_DEPTHS:
+        for leaves in NUM_LEAVES:
+
+            # Train once with max trees, evaluate at checkpoints
+            fold_models = []
+            for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(df_all)):
+                X_train = df_all.iloc[train_idx][feature_cols]
+                y_train = df_all.iloc[train_idx]["target"]
+
+                lgb = LGBMRegressor(
+                    n_estimators=N_ESTIMATORS_MAX,
+                    learning_rate=lr,
+                    max_depth=depth,
+                    num_leaves=leaves,
+                    random_state=42,
+                    verbose=-1,
+                )
+                lgb.fit(X_train, y_train)
+                fold_models.append((lgb, test_idx))
+
+            # Evaluate at tree count checkpoints
+            for n_est in N_ESTIMATORS_CHECKPOINTS:
+                config_num += 1
+                df_all["pred"] = np.nan
+
+                # Generate predictions using first n_est trees
+                for lgb, test_idx in fold_models:
+                    X_test = df_all.iloc[test_idx][feature_cols]
+                    preds = lgb.predict(X_test, num_iteration=n_est)
+                    df_all.iloc[test_idx, df_all.columns.get_loc("pred")] = preds
+
+                # Evaluate with volatility-specific metrics
+                valid_mask = ~df_all["pred"].isna()
+                y_true_cv = df_all.loc[valid_mask, "target"].values
+                y_pred_cv = np.maximum(df_all.loc[valid_mask, "pred"].values, 0)
+                metrics = vol_metrics(y_true_cv, y_pred_cv)
+
+                # Store results
+                results.append(
+                    {
+                        "config_num": config_num,
+                        "n_estimators": n_est,
+                        "learning_rate": lr,
+                        "max_depth": depth,
+                        "num_leaves": leaves,
+                        "predictions": df_all["pred"].copy(),
+                        **metrics,
+                    }
+                )
+
+                print(
+                    f"   [{config_num:2d}] n={n_est:4d}, lr={lr:.2f}, "
+                    f"d={depth}, l={leaves:2d} -> "
+                    f"r={metrics['pearson_r']:.4f} R²={metrics['r2']:.4f} "
+                    f"QLIKE={metrics['qlike']:.4f}"
+                )
+
+# Analyze results — rank by R² (primary), then QLIKE (secondary, lower=better)
+results_df = pd.DataFrame(
+    [{k: v for k, v in r.items() if k != "predictions"} for r in results]
+)
+results_df = results_df.sort_values(["r2", "qlike"], ascending=[False, True])
+
+print(f"\n✅ Tested {len(results)} configurations")
+print("\n   Top 5 models:")
+top5_cols = [
+    "config_num",
+    "n_estimators",
+    "learning_rate",
+    "max_depth",
+    "num_leaves",
+    "pearson_r",
+    "r2",
+    "qlike",
+]
+print(results_df[top5_cols].head().to_string(index=False, float_format="%.4f"))
+
+# Select best model
+best_result = results[int(results_df.iloc[0]["config_num"]) - 1]
+best_params = {
+    k: best_result[k]
+    for k in ["n_estimators", "learning_rate", "max_depth", "num_leaves"]
+}
+
+print(f"\nBest: Config #{best_result['config_num']}")
+print(
+    f"   r={best_result['pearson_r']:.4f} R²={best_result['r2']:.4f} "
+    f"QLIKE={best_result['qlike']:.4f} | "
+    f"n={best_params['n_estimators']}, lr={best_params['learning_rate']}, "
+    f"d={best_params['max_depth']}, l={best_params['num_leaves']}"
+)
+
+# =============================================================================
+# STEP 5: Evaluate Best Model
+# =============================================================================
+print("\n[5/6] Detailed evaluation...")
+print_vol_metrics(best_result, "BEST MODEL — Volatility Metrics")
+
+# Save reproducibility artifacts + diagnostic plot
+run_timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
+run_dir = os.path.join(os.path.dirname(__file__), "runs", run_timestamp)
+artifacts = save_run_artifacts(
+    df_eval=df_all,
+    best_result=best_result,
+    best_params=best_params,
+    run_dir=run_dir,
+    feature_cols=feature_cols,
+)
+
+# =============================================================================
+# STEP 6: Train Production Model & Create Predict Function
+# =============================================================================
+print("\n[6/6] Training production model...")
+
+final_model = LGBMRegressor(
+    n_estimators=best_params["n_estimators"],
+    learning_rate=best_params["learning_rate"],
+    max_depth=best_params["max_depth"],
+    num_leaves=best_params["num_leaves"],
+    random_state=42,
+    verbose=-1,
+)
+final_model.fit(df_all[feature_cols], df_all["target"])
+print(f"✅ Final model trained on {len(df_all):,} samples")
+
+
+def predict(nonce: int = None) -> float:
+    """
+    Predict ETH/USD 15-minute realised volatility.
+
+    This is the function submitted to the Allora network for Topic 80.
+    It returns the predicted standard deviation of 1-minute log returns
+    over the next 15 minutes.
+
+    Args:
+        nonce: Block nonce from Allora SDK (unused).
+
+    Returns:
+        float: Predicted 15-minute realised volatility.
+    """
+    # Get live features from workflow (1-minute bars)
+    live_row = workflow.get_live_features(ticker=TICKERS[0])
+
+    if live_row is None or len(live_row) == 0:
+        raise ValueError("Could not get live features")
+
+    # Engineer volatility features from live data (same as training)
+    live_vol_features = engineer_vol_features(live_row.iloc[0])
+
+    # Combine base features + engineered vol features
+    live_features = pd.concat([live_row[base_feature_cols].iloc[0], live_vol_features])
+
+    # Predict volatility directly (no price conversion needed)
+    predicted_volatility = final_model.predict(
+        live_features[feature_cols].values.reshape(1, -1)
+    )[0]
+
+    # Volatility must be non-negative
+    predicted_volatility = max(0.0, float(predicted_volatility))
+
+    print(f"\nLive Prediction: {predicted_volatility:.6f} (15-min realised vol)")
+
+    return predicted_volatility
+
+
+# Test and save
+print("\n🧪 Testing prediction...")
+test_prediction = predict()
+
+with open("predict_80.pkl", "wb") as f:
+    cloudpickle.dump(predict, f)
+
+print("\n" + "=" * 80)
+print("COMPLETE!")
+print("=" * 80)
+print(
+    f"{len(feature_cols)} features | "
+    f"r={best_result['pearson_r']:.4f} | R²={best_result['r2']:.4f} | "
+    f"QLIKE={best_result['qlike']:.4f}"
+)
+print(f"\nTo deploy this worker:")
+print(f"  TOPIC_ID=80 python notebooks/deploy_worker_raw.py")
diff --git a/notebooks/testnet/topic_80_eth_vol/topic_80_model_a_deep_lookback.py b/notebooks/testnet/topic_80_eth_vol/topic_80_model_a_deep_lookback.py
new file mode 100644
index 0000000..7c90d22
--- /dev/null
+++ b/notebooks/testnet/topic_80_eth_vol/topic_80_model_a_deep_lookback.py
@@ -0,0 +1,258 @@
+#!/usr/bin/env python3
+"""
+Topic 80 — Model A: Deep Lookback + Rich Volatility Features
+=============================================================
+
+Strategy: Use a 60-bar (1-hour) lookback window with extensive volatility-
+predictive features including multi-horizon realised vol, return autocorrelation,
+Parkinson/Garman-Klass estimators, and volume-volatility interaction.
+
+Trained on 2+ years of 1-minute ETH/USD data.
+"""
+
+import numpy as np
+import pandas as pd
+import os
+from datetime import datetime, timedelta, timezone
+from sklearn.model_selection import TimeSeriesSplit
+from lightgbm import LGBMRegressor
+import cloudpickle
+from allora_forge_builder_kit import AlloraMLWorkflow, PerformanceEvaluator
+
+# =============================================================================
+# CONFIGURATION
+# =============================================================================
+TICKERS = ["ethusd"]
+DAYS_OF_HISTORY = 800  # ~2.2 years
+INTERVAL = "1m"
+NUMBER_OF_INPUT_BARS = 60  # 1 hour of 1-min bars
+TARGET_BARS = 15
+TARGET_TYPE = "volatility"
+
+N_SPLITS = 5
+N_ESTIMATORS_MAX = 1000
+N_ESTIMATORS_CHECKPOINTS = [200, 500, 800, 1000]
+LEARNING_RATES = [0.01, 0.03]
+MAX_DEPTHS = [5, 7]
+NUM_LEAVES = [31, 63]
+
+print("=" * 80)
+print("Topic 80 — Model A: Deep Lookback (60-bar, 2+ years)")
+print("=" * 80)
+
+# =============================================================================
+# STEP 1: Initialize & Backfill
+# =============================================================================
+print("\n[1/5] Initializing workflow...")
+from allora_forge_builder_kit.utils import get_api_key
+
+api_key = get_api_key(
+    api_key_file=os.path.join(os.path.dirname(__file__), "..", ".allora_api_key")
+)
+
+workflow = AlloraMLWorkflow(
+    tickers=TICKERS,
+    number_of_input_bars=NUMBER_OF_INPUT_BARS,
+    target_bars=TARGET_BARS,
+    interval=INTERVAL,
+    target_type=TARGET_TYPE,
+    data_source="allora",
+    api_key=api_key,
+)
+print(f"✅ {NUMBER_OF_INPUT_BARS} bars lookback, {TARGET_BARS}-min vol target")
+
+print(f"\n[2/5] Backfilling {DAYS_OF_HISTORY} days...")
+start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)
+workflow.backfill(start=start_date)
+print("✅ Backfill complete")
+
+# =============================================================================
+# STEP 2: Features
+# =============================================================================
+print("\n[3/5] Extracting features...")
+df_all = workflow.get_full_feature_target_dataframe(start_date=start_date).reset_index()
+
+base_feature_cols = [col for col in df_all.columns if col.startswith("feature_")]
+
+
+def engineer_deep_vol_features(row):
+    """Rich volatility features from 60-bar lookback."""
+    n = NUMBER_OF_INPUT_BARS
+    closes = np.array([row[f"feature_close_{i}"] for i in range(n)])
+    highs = np.array([row[f"feature_high_{i}"] for i in range(n)])
+    lows = np.array([row[f"feature_low_{i}"] for i in range(n)])
+    volumes = np.array([row[f"feature_volume_{i}"] for i in range(n)])
+
+    log_rets = np.diff(np.log(closes + 1e-12))
+    features = {}
+
+    # --- Multi-horizon realised volatility ---
+    features["vol_5m"] = np.std(log_rets[-5:], ddof=1) if len(log_rets) >= 5 else 0.0
+    features["vol_10m"] = np.std(log_rets[-10:], ddof=1) if len(log_rets) >= 10 else 0.0
+    features["vol_15m"] = np.std(log_rets[-15:], ddof=1) if len(log_rets) >= 15 else 0.0
+    features["vol_30m"] = np.std(log_rets[-30:], ddof=1) if len(log_rets) >= 30 else 0.0
+    features["vol_60m"] = np.std(log_rets, ddof=1) if len(log_rets) >= 2 else 0.0
+
+    # --- Vol ratios (regime detection) ---
+    features["vol_ratio_5_60"] = features["vol_5m"] / (features["vol_60m"] + 1e-12)
+    features["vol_ratio_15_60"] = features["vol_15m"] / (features["vol_60m"] + 1e-12)
+    features["vol_ratio_5_30"] = features["vol_5m"] / (features["vol_30m"] + 1e-12)
+
+    # --- Return autocorrelation (vol clustering signal) ---
+    if len(log_rets) >= 10:
+        features["ret_autocorr_1"] = np.corrcoef(log_rets[1:], log_rets[:-1])[0, 1]
+        abs_rets = np.abs(log_rets)
+        features["absret_autocorr_1"] = np.corrcoef(abs_rets[1:], abs_rets[:-1])[0, 1]
+    else:
+        features["ret_autocorr_1"] = 0.0
+        features["absret_autocorr_1"] = 0.0
+
+    # Handle NaN from corrcoef
+    for k in ["ret_autocorr_1", "absret_autocorr_1"]:
+        if not np.isfinite(features[k]):
+            features[k] = 0.0
+
+    # --- Parkinson volatility estimator (uses high-low) ---
+    hl_log = np.log(highs + 1e-12) - np.log(lows + 1e-12)
+    features["parkinson_vol_15m"] = np.sqrt(np.mean(hl_log[-15:] ** 2) / (4 * np.log(2)))
+    features["parkinson_vol_60m"] = np.sqrt(np.mean(hl_log ** 2) / (4 * np.log(2)))
+    features["parkinson_ratio"] = features["parkinson_vol_15m"] / (features["parkinson_vol_60m"] + 1e-12)
+
+    # --- High-low range features ---
+    hl_range = highs - lows
+    features["hl_range_mean"] = np.mean(hl_range)
+    features["hl_range_5m"] = np.mean(hl_range[-5:])
+    features["hl_range_ratio"] = features["hl_range_5m"] / (features["hl_range_mean"] + 1e-12)
+    features["hl_range_max"] = np.max(hl_range[-15:])
+
+    # --- Absolute returns (magnitude) ---
+    abs_rets = np.abs(log_rets)
+    features["abs_ret_mean_5m"] = np.mean(abs_rets[-5:])
+    features["abs_ret_mean_15m"] = np.mean(abs_rets[-15:])
+    features["abs_ret_max_15m"] = np.max(abs_rets[-15:])
+    features["abs_ret_mean_60m"] = np.mean(abs_rets)
+
+    # --- Volume-volatility interaction ---
+    features["volume_mean_ratio"] = np.mean(volumes[-5:]) / (np.mean(volumes) + 1e-12)
+    features["volume_spike"] = np.max(volumes[-5:]) / (np.mean(volumes) + 1e-12)
+
+    # Volume-weighted volatility
+    vol_weights = volumes[1:] / (np.sum(volumes[1:]) + 1e-12)
+    features["vol_weighted_absret"] = np.sum(abs_rets * vol_weights)
+
+    # --- Trend strength (directional move vs vol) ---
+    net_return = log_rets[-15:].sum() if len(log_rets) >= 15 else 0.0
+    features["trend_vs_vol"] = abs(net_return) / (features["vol_15m"] + 1e-12)
+
+    # --- Kurtosis (tail risk) ---
+    if len(log_rets) >= 20:
+        mean_r = np.mean(log_rets[-30:])
+        std_r = np.std(log_rets[-30:], ddof=1)
+        if std_r > 1e-12:
+            features["kurtosis_30m"] = np.mean(((log_rets[-30:] - mean_r) / std_r) ** 4)
+        else:
+            features["kurtosis_30m"] = 3.0
+    else:
+        features["kurtosis_30m"] = 3.0
+
+    return pd.Series(features)
+
+
+print("   Engineering deep volatility features...")
+engineered = df_all.apply(engineer_deep_vol_features, axis=1)
+df_all = pd.concat([df_all, engineered], axis=1)
+
+feature_cols = base_feature_cols + list(engineered.columns)
+df_all = df_all.dropna(subset=feature_cols + ["target"])
+
+print(f"✅ Dataset: {len(df_all):,} samples")
+print(f"   Features: {len(base_feature_cols)} base + {len(engineered.columns)} engineered = {len(feature_cols)} total")
+
+# =============================================================================
+# STEP 3: Grid Search
+# =============================================================================
+print("\n[4/5] Grid search...")
+tscv = TimeSeriesSplit(n_splits=N_SPLITS, gap=TARGET_BARS)
+evaluator = PerformanceEvaluator()
+results = []
+config_num = 0
+
+for lr in LEARNING_RATES:
+    for depth in MAX_DEPTHS:
+        for leaves in NUM_LEAVES:
+            fold_models = []
+            for train_idx, test_idx in tscv.split(df_all):
+                lgb = LGBMRegressor(
+                    n_estimators=N_ESTIMATORS_MAX,
+                    learning_rate=lr,
+                    max_depth=depth,
+                    num_leaves=leaves,
+                    subsample=0.8,
+                    colsample_bytree=0.8,
+                    reg_alpha=0.1,
+                    reg_lambda=1.0,
+                    random_state=42,
+                    verbose=-1,
+                )
+                lgb.fit(df_all.iloc[train_idx][feature_cols], df_all.iloc[train_idx]["target"])
+                fold_models.append((lgb, test_idx))
+
+            for n_est in N_ESTIMATORS_CHECKPOINTS:
+                config_num += 1
+                df_all["pred"] = np.nan
+                for lgb, test_idx in fold_models:
+                    preds = lgb.predict(df_all.iloc[test_idx][feature_cols], num_iteration=n_est)
+                    df_all.iloc[test_idx, df_all.columns.get_loc("pred")] = preds
+
+                valid_mask = ~df_all["pred"].isna()
+                metrics = evaluator.evaluate(
+                    y_true=df_all.loc[valid_mask, "target"],
+                    y_pred=df_all.loc[valid_mask, "pred"],
+                )
+                results.append({"config_num": config_num, "n_est": n_est, "lr": lr, "depth": depth, "leaves": leaves, **metrics})
+                print(f"   [{config_num:2d}] n={n_est:4d} lr={lr:.2f} d={depth} l={leaves:2d} → {metrics['score']:.1%} ({metrics['grade']})")
+
+results_df = pd.DataFrame(results).sort_values(["num_passed", "score"], ascending=[False, False])
+best = results_df.iloc[0]
+print(f"\n✅ Best: {best['num_passed']}/7 ({best['score']:.1%}) — n={int(best['n_est'])}, lr={best['lr']}, d={int(best['depth'])}, l={int(best['leaves'])}")
+
+# =============================================================================
+# STEP 4: Train Final & Deploy
+# =============================================================================
+print("\n[5/5] Training final model...")
+final_model = LGBMRegressor(
+    n_estimators=int(best["n_est"]),
+    learning_rate=best["lr"],
+    max_depth=int(best["depth"]),
+    num_leaves=int(best["leaves"]),
+    subsample=0.8,
+    colsample_bytree=0.8,
+    reg_alpha=0.1,
+    reg_lambda=1.0,
+    random_state=42,
+    verbose=-1,
+)
+final_model.fit(df_all[feature_cols], df_all["target"])
+print(f"✅ Trained on {len(df_all):,} samples")
+
+
+def predict(nonce=None):
+    live_row = workflow.get_live_features(ticker=TICKERS[0])
+    if live_row is None or len(live_row) == 0:
+        raise ValueError("Could not get live features")
+    live_eng = engineer_deep_vol_features(live_row.iloc[0])
+    live_features = pd.concat([live_row[base_feature_cols].iloc[0], live_eng])
+    vol = final_model.predict(live_features[feature_cols].values.reshape(1, -1))[0]
+    vol = max(0.0, float(vol))
+    print(f"\nModel A prediction: {vol:.6f} (15-min vol)")
+    return vol
+
+
+print("\n🧪 Testing...")
+test_pred = predict()
+
+with open("predict_80_model_a.pkl", "wb") as f:
+    cloudpickle.dump(predict, f)
+
+print(f"\n✅ Saved predict_80_model_a.pkl")
+print(f"   Score: {best['score']:.1%} | Features: {len(feature_cols)}")
diff --git a/notebooks/testnet/topic_80_eth_vol/topic_80_model_b_multiscale.py b/notebooks/testnet/topic_80_eth_vol/topic_80_model_b_multiscale.py
new file mode 100644
index 0000000..7ba25d7
--- /dev/null
+++ b/notebooks/testnet/topic_80_eth_vol/topic_80_model_b_multiscale.py
@@ -0,0 +1,280 @@
+#!/usr/bin/env python3
+"""
+Topic 80 — Model B: Multi-Scale Regime Detection
+=================================================
+
+Strategy: Use a 30-bar (30-min) lookback with features designed to capture
+volatility clustering (GARCH-like persistence), intraday seasonality proxies,
+and multi-scale decomposition of price action. Emphasizes regime transitions
+and mean-reversion in volatility.
+
+Trained on 2+ years of 1-minute ETH/USD data.
+"""
+
+import numpy as np
+import pandas as pd
+import os
+from datetime import datetime, timedelta, timezone
+from sklearn.model_selection import TimeSeriesSplit
+from lightgbm import LGBMRegressor
+import cloudpickle
+from allora_forge_builder_kit import AlloraMLWorkflow, PerformanceEvaluator
+
+# =============================================================================
+# CONFIGURATION
+# =============================================================================
+TICKERS = ["ethusd"]
+DAYS_OF_HISTORY = 800  # ~2.2 years
+INTERVAL = "1m"
+NUMBER_OF_INPUT_BARS = 30  # 30 minutes of 1-min bars
+TARGET_BARS = 15
+TARGET_TYPE = "volatility"
+
+N_SPLITS = 5
+N_ESTIMATORS_MAX = 1500
+N_ESTIMATORS_CHECKPOINTS = [300, 600, 1000, 1500]
+LEARNING_RATES = [0.005, 0.02]
+MAX_DEPTHS = [4, 6]
+NUM_LEAVES = [15, 31]
+
+print("=" * 80)
+print("Topic 80 — Model B: Multi-Scale Regime (30-bar, 2+ years)")
+print("=" * 80)
+
+# =============================================================================
+# STEP 1: Initialize & Backfill
+# =============================================================================
+print("\n[1/5] Initializing workflow...")
+from allora_forge_builder_kit.utils import get_api_key
+
+api_key = get_api_key(
+    api_key_file=os.path.join(os.path.dirname(__file__), "..", ".allora_api_key")
+)
+
+workflow = AlloraMLWorkflow(
+    tickers=TICKERS,
+    number_of_input_bars=NUMBER_OF_INPUT_BARS,
+    target_bars=TARGET_BARS,
+    interval=INTERVAL,
+    target_type=TARGET_TYPE,
+    data_source="allora",
+    api_key=api_key,
+)
+print(f"✅ {NUMBER_OF_INPUT_BARS} bars lookback, {TARGET_BARS}-min vol target")
+
+print(f"\n[2/5] Backfilling {DAYS_OF_HISTORY} days...")
+start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)
+workflow.backfill(start=start_date)
+print("✅ Backfill complete")
+
+# =============================================================================
+# STEP 2: Features
+# =============================================================================
+print("\n[3/5] Extracting features...")
+df_all = workflow.get_full_feature_target_dataframe(start_date=start_date).reset_index()
+
+base_feature_cols = [col for col in df_all.columns if col.startswith("feature_")]
+
+
+def engineer_multiscale_features(row):
+    """Multi-scale regime features from 30-bar lookback."""
+    n = NUMBER_OF_INPUT_BARS
+    closes = np.array([row[f"feature_close_{i}"] for i in range(n)])
+    highs = np.array([row[f"feature_high_{i}"] for i in range(n)])
+    lows = np.array([row[f"feature_low_{i}"] for i in range(n)])
+    volumes = np.array([row[f"feature_volume_{i}"] for i in range(n)])
+
+    log_rets = np.diff(np.log(closes + 1e-12))
+    abs_rets = np.abs(log_rets)
+    features = {}
+
+    # --- Realised vol at multiple scales ---
+    features["vol_5m"] = np.std(log_rets[-5:], ddof=1) if len(log_rets) >= 5 else 0.0
+    features["vol_10m"] = np.std(log_rets[-10:], ddof=1) if len(log_rets) >= 10 else 0.0
+    features["vol_15m"] = np.std(log_rets[-15:], ddof=1) if len(log_rets) >= 15 else 0.0
+    features["vol_30m"] = np.std(log_rets, ddof=1) if len(log_rets) >= 2 else 0.0
+
+    # --- Vol persistence (GARCH-like) ---
+    # Exponentially weighted vol (lambda=0.94, like RiskMetrics)
+    lam = 0.94
+    sq_rets = log_rets ** 2
+    ewma_var = sq_rets[0]
+    for r2 in sq_rets[1:]:
+        ewma_var = lam * ewma_var + (1 - lam) * r2
+    features["ewma_vol"] = np.sqrt(ewma_var)
+    features["ewma_vs_realized"] = features["ewma_vol"] / (features["vol_30m"] + 1e-12)
+
+    # --- Vol of vol (second-order clustering) ---
+    if len(abs_rets) >= 10:
+        rolling_vols = [np.std(abs_rets[i : i + 5], ddof=1) for i in range(len(abs_rets) - 5)]
+        if len(rolling_vols) >= 2:
+            features["vol_of_vol"] = np.std(rolling_vols, ddof=1)
+        else:
+            features["vol_of_vol"] = 0.0
+    else:
+        features["vol_of_vol"] = 0.0
+
+    # --- Regime indicators ---
+    # Vol ratio (short/long) — high = vol expanding, low = vol contracting
+    features["vol_ratio_5_30"] = features["vol_5m"] / (features["vol_30m"] + 1e-12)
+    features["vol_ratio_10_30"] = features["vol_10m"] / (features["vol_30m"] + 1e-12)
+
+    # Vol percentile within the window (where are we in the local distribution?)
+    if len(abs_rets) >= 15:
+        recent_vol = features["vol_5m"]
+        rolling_5m_vols = [np.std(log_rets[i : i + 5], ddof=1) for i in range(len(log_rets) - 5)]
+        if len(rolling_5m_vols) > 0:
+            features["vol_percentile"] = np.mean([1 for v in rolling_5m_vols if v <= recent_vol])
+        else:
+            features["vol_percentile"] = 0.5
+    else:
+        features["vol_percentile"] = 0.5
+
+    # --- Mean reversion signal ---
+    # Distance from "normal" vol (z-score of current vol)
+    if len(abs_rets) >= 20:
+        rolling_vols = [np.std(log_rets[i : i + 5], ddof=1) for i in range(len(log_rets) - 5)]
+        if len(rolling_vols) >= 5:
+            vol_mean = np.mean(rolling_vols)
+            vol_std = np.std(rolling_vols, ddof=1)
+            features["vol_zscore"] = (features["vol_5m"] - vol_mean) / (vol_std + 1e-12)
+        else:
+            features["vol_zscore"] = 0.0
+    else:
+        features["vol_zscore"] = 0.0
+
+    # --- Directional features ---
+    features["signed_ret_5m"] = np.sum(log_rets[-5:])
+    features["signed_ret_15m"] = np.sum(log_rets[-15:]) if len(log_rets) >= 15 else np.sum(log_rets)
+    features["abs_ret_5m"] = np.sum(abs_rets[-5:])
+
+    # Efficiency ratio: |net move| / sum(|moves|) — 1 = trending, 0 = choppy
+    net_move = abs(features["signed_ret_15m"])
+    total_path = np.sum(abs_rets[-15:]) if len(abs_rets) >= 15 else np.sum(abs_rets)
+    features["efficiency_ratio"] = net_move / (total_path + 1e-12)
+
+    # --- High-low based estimators ---
+    hl_log = np.log(highs + 1e-12) - np.log(lows + 1e-12)
+    features["parkinson_5m"] = np.sqrt(np.mean(hl_log[-5:] ** 2) / (4 * np.log(2)))
+    features["parkinson_15m"] = np.sqrt(np.mean(hl_log[-15:] ** 2) / (4 * np.log(2)))
+    features["parkinson_ratio"] = features["parkinson_5m"] / (features["parkinson_15m"] + 1e-12)
+
+    # --- Volume dynamics ---
+    features["volume_trend"] = np.mean(volumes[-5:]) / (np.mean(volumes[-15:]) + 1e-12)
+    features["volume_spike_ratio"] = np.max(volumes[-5:]) / (np.mean(volumes) + 1e-12)
+
+    # --- Autocorrelation of absolute returns (persistence) ---
+    if len(abs_rets) >= 6:
+        features["absret_autocorr"] = np.corrcoef(abs_rets[1:], abs_rets[:-1])[0, 1]
+        if not np.isfinite(features["absret_autocorr"]):
+            features["absret_autocorr"] = 0.0
+    else:
+        features["absret_autocorr"] = 0.0
+
+    # --- Recent extreme moves ---
+    features["max_abs_ret_5m"] = np.max(abs_rets[-5:])
+    features["max_abs_ret_15m"] = np.max(abs_rets[-15:]) if len(abs_rets) >= 15 else np.max(abs_rets)
+
+    return pd.Series(features)
+
+
+print("   Engineering multi-scale regime features...")
+engineered = df_all.apply(engineer_multiscale_features, axis=1)
+df_all = pd.concat([df_all, engineered], axis=1)
+
+feature_cols = base_feature_cols + list(engineered.columns)
+df_all = df_all.dropna(subset=feature_cols + ["target"])
+
+print(f"✅ Dataset: {len(df_all):,} samples")
+print(f"   Features: {len(base_feature_cols)} base + {len(engineered.columns)} engineered = {len(feature_cols)} total")
+
+# =============================================================================
+# STEP 3: Grid Search
+# =============================================================================
+print("\n[4/5] Grid search...")
+tscv = TimeSeriesSplit(n_splits=N_SPLITS, gap=TARGET_BARS)
+evaluator = PerformanceEvaluator()
+results = []
+config_num = 0
+
+for lr in LEARNING_RATES:
+    for depth in MAX_DEPTHS:
+        for leaves in NUM_LEAVES:
+            fold_models = []
+            for train_idx, test_idx in tscv.split(df_all):
+                lgb = LGBMRegressor(
+                    n_estimators=N_ESTIMATORS_MAX,
+                    learning_rate=lr,
+                    max_depth=depth,
+                    num_leaves=leaves,
+                    subsample=0.7,
+                    colsample_bytree=0.7,
+                    min_child_samples=50,
+                    reg_alpha=0.5,
+                    reg_lambda=2.0,
+                    random_state=123,
+                    verbose=-1,
+                )
+                lgb.fit(df_all.iloc[train_idx][feature_cols], df_all.iloc[train_idx]["target"])
+                fold_models.append((lgb, test_idx))
+
+            for n_est in N_ESTIMATORS_CHECKPOINTS:
+                config_num += 1
+                df_all["pred"] = np.nan
+                for lgb, test_idx in fold_models:
+                    preds = lgb.predict(df_all.iloc[test_idx][feature_cols], num_iteration=n_est)
+                    df_all.iloc[test_idx, df_all.columns.get_loc("pred")] = preds
+
+                valid_mask = ~df_all["pred"].isna()
+                metrics = evaluator.evaluate(
+                    y_true=df_all.loc[valid_mask, "target"],
+                    y_pred=df_all.loc[valid_mask, "pred"],
+                )
+                results.append({"config_num": config_num, "n_est": n_est, "lr": lr, "depth": depth, "leaves": leaves, **metrics})
+                print(f"   [{config_num:2d}] n={n_est:4d} lr={lr:.3f} d={depth} l={leaves:2d} → {metrics['score']:.1%} ({metrics['grade']})")
+
+results_df = pd.DataFrame(results).sort_values(["num_passed", "score"], ascending=[False, False])
+best = results_df.iloc[0]
+print(f"\n✅ Best: {best['num_passed']}/7 ({best['score']:.1%}) — n={int(best['n_est'])}, lr={best['lr']}, d={int(best['depth'])}, l={int(best['leaves'])}")
+
+# =============================================================================
+# STEP 4: Train Final & Deploy
+# =============================================================================
+print("\n[5/5] Training final model...")
+final_model = LGBMRegressor(
+    n_estimators=int(best["n_est"]),
+    learning_rate=best["lr"],
+    max_depth=int(best["depth"]),
+    num_leaves=int(best["leaves"]),
+    subsample=0.7,
+    colsample_bytree=0.7,
+    min_child_samples=50,
+    reg_alpha=0.5,
+    reg_lambda=2.0,
+    random_state=123,
+    verbose=-1,
+)
+final_model.fit(df_all[feature_cols], df_all["target"])
+print(f"✅ Trained on {len(df_all):,} samples")
+
+
+def predict(nonce=None):
+    live_row = workflow.get_live_features(ticker=TICKERS[0])
+    if live_row is None or len(live_row) == 0:
+        raise ValueError("Could not get live features")
+    live_eng = engineer_multiscale_features(live_row.iloc[0])
+    live_features = pd.concat([live_row[base_feature_cols].iloc[0], live_eng])
+    vol = final_model.predict(live_features[feature_cols].values.reshape(1, -1))[0]
+    vol = max(0.0, float(vol))
+    print(f"\nModel B prediction: {vol:.6f} (15-min vol)")
+    return vol
+
+
+print("\n🧪 Testing...")
+test_pred = predict()
+
+with open("predict_80_model_b.pkl", "wb") as f:
+    cloudpickle.dump(predict, f)
+
+print(f"\n✅ Saved predict_80_model_b.pkl")
+print(f"   Score: {best['score']:.1%} | Features: {len(feature_cols)}")
diff --git a/notebooks/testnet/topic_80_eth_vol/topic_80_model_d_iterative.py b/notebooks/testnet/topic_80_eth_vol/topic_80_model_d_iterative.py
new file mode 100644
index 0000000..2268693
--- /dev/null
+++ b/notebooks/testnet/topic_80_eth_vol/topic_80_model_d_iterative.py
@@ -0,0 +1,373 @@
+#!/usr/bin/env python3
+"""
+Topic 80 — Model D: Iterative Improvement
+==========================================
+
+Starting from Model A's baseline (best performer), iteratively adding
+features and tuning to push volatility metrics higher.
+
+Baseline (Model A, 60-bar raw features only):
+    Pearson r:   0.695
+    Spearman ρ:  0.714
+    R²:          0.457
+    RMSE:        0.000290
+    MAE:         0.000194
+    Rel MAE:     34.2%
+    QLIKE:       0.100
+"""
+
+import numpy as np
+import pandas as pd
+import os
+from datetime import datetime, timedelta, timezone
+from scipy.stats import pearsonr, spearmanr
+from lightgbm import LGBMRegressor
+import cloudpickle
+from allora_forge_builder_kit import AlloraMLWorkflow
+
+# =============================================================================
+# CONFIGURATION
+# =============================================================================
+TICKERS = ["ethusd"]
+DAYS_OF_HISTORY = 800
+INTERVAL = "1m"
+NUMBER_OF_INPUT_BARS = 60
+TARGET_BARS = 15
+TARGET_TYPE = "volatility"
+
+print("=" * 80)
+print("Topic 80 — Model D: Iterative Improvement")
+print("=" * 80)
+
+
+# =============================================================================
+# METRICS
+# =============================================================================
+def vol_metrics(y_true, y_pred):
+    """Compute volatility-specific metrics."""
+    r, _ = pearsonr(y_true, y_pred)
+    rho, _ = spearmanr(y_true, y_pred)
+    mse = np.mean((y_true - y_pred) ** 2)
+    rmse = np.sqrt(mse)
+    mae = np.mean(np.abs(y_true - y_pred))
+    ss_res = np.sum((y_true - y_pred) ** 2)
+    ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
+    r2 = 1 - ss_res / ss_tot
+    rel_mae = mae / np.mean(y_true)
+    # QLIKE (quasi-likelihood — standard vol forecast loss)
+    mask = y_pred > 0
+    if mask.sum() > 0:
+        ratio = y_true[mask] / y_pred[mask]
+        qlike = np.mean(ratio - np.log(ratio) - 1)
+    else:
+        qlike = float("inf")
+    return {
+        "pearson_r": r,
+        "spearman_rho": rho,
+        "r2": r2,
+        "rmse": rmse,
+        "mae": mae,
+        "rel_mae": rel_mae,
+        "qlike": qlike,
+    }
+
+
+def print_metrics(metrics, label=""):
+    """Pretty-print volatility metrics."""
+    print(f"\n  {'─'*50}")
+    if label:
+        print(f"  {label}")
+        print(f"  {'─'*50}")
+    print(f"  Pearson r:   {metrics['pearson_r']:.4f}")
+    print(f"  Spearman ρ:  {metrics['spearman_rho']:.4f}")
+    print(f"  R²:          {metrics['r2']:.4f}")
+    print(f"  RMSE:        {metrics['rmse']:.6f}")
+    print(f"  MAE:         {metrics['mae']:.6f}")
+    print(f"  Rel MAE:     {metrics['rel_mae']*100:.2f}%")
+    print(f"  QLIKE:       {metrics['qlike']:.6f}")
+    print(f"  {'─'*50}")
+
+
+def compare_metrics(baseline, current):
+    """Show improvement over baseline."""
+    print(f"\n  {'Metric':<14} {'Baseline':<10} {'Current':<10} {'Δ':<10}")
+    print(f"  {'─'*44}")
+    for key in ["pearson_r", "spearman_rho", "r2", "rmse", "mae", "rel_mae", "qlike"]:
+        b, c = baseline[key], current[key]
+        if key in ["rmse", "mae", "rel_mae", "qlike"]:
+            # Lower is better
+            delta = (b - c) / b * 100
+            arrow = "↓" if c < b else "↑"
+        else:
+            # Higher is better
+            delta = (c - b) / abs(b) * 100 if b != 0 else 0
+            arrow = "↑" if c > b else "↓"
+        fmt = ".6f" if key in ["rmse", "mae"] else ".4f"
+        if key == "rel_mae":
+            print(f"  {key:<14} {b*100:<10.2f} {c*100:<10.2f} {arrow}{abs(delta):.1f}%")
+        else:
+            print(f"  {key:<14} {b:<10{fmt}} {c:<10{fmt}} {arrow}{abs(delta):.1f}%")
+
+
+# =============================================================================
+# LOAD DATA
+# =============================================================================
+print("\n[1/4] Loading data...")
+from allora_forge_builder_kit.utils import get_api_key
+
+api_key = get_api_key(
+    api_key_file=os.path.join(os.path.dirname(__file__), "..", ".allora_api_key")
+)
+
+wf = AlloraMLWorkflow(
+    tickers=TICKERS,
+    number_of_input_bars=NUMBER_OF_INPUT_BARS,
+    target_bars=TARGET_BARS,
+    interval=INTERVAL,
+    target_type=TARGET_TYPE,
+    data_source="allora",
+    api_key=api_key,
+)
+
+start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)
+wf.backfill(start=start_date)
+df = wf.get_full_feature_target_dataframe(start_date=start_date).reset_index()
+base_feature_cols = [c for c in df.columns if c.startswith("feature_")]
+df = df.dropna(subset=base_feature_cols + ["target"])
+
+# 80/20 temporal split
+split = int(len(df) * 0.8)
+df_train = df.iloc[:split].copy()
+df_test = df.iloc[split:].copy()
+y_test = df_test["target"].values
+
+print(f"✅ {len(df):,} samples | Train: {len(df_train):,} | Test: {len(df_test):,}")
+print(f"   Mean vol: {y_test.mean():.6f} | Std vol: {y_test.std():.6f}")
+
+# =============================================================================
+# BASELINE: Model A (raw 60-bar features only)
+# =============================================================================
+print("\n[2/4] Baseline (Model A: 60 raw bars, no engineering)...")
+model_baseline = LGBMRegressor(
+    n_estimators=200, learning_rate=0.01, max_depth=5, num_leaves=31,
+    subsample=0.8, colsample_bytree=0.8, reg_alpha=0.1, reg_lambda=1.0,
+    random_state=42, verbose=-1,
+)
+model_baseline.fit(df_train[base_feature_cols], df_train["target"])
+preds_baseline = np.maximum(model_baseline.predict(df_test[base_feature_cols]), 0)
+baseline_metrics = vol_metrics(y_test, preds_baseline)
+print_metrics(baseline_metrics, "BASELINE (Model A)")
+
+
+# =============================================================================
+# MODEL D: Feature Engineering
+# =============================================================================
+print("\n[3/4] Engineering Model D features...")
+
+
+def engineer_model_d_features(row):
+    """
+    Model D features: combine best of all previous models + new ideas.
+
+    Strategy:
+    - Multi-horizon realised vol (from Model A)
+    - EWMA vol / vol persistence (from Model B)
+    - Parkinson & Garman-Klass estimators (from Model A)
+    - NEW: GARCH-inspired features (conditional vol)
+    - NEW: Vol regime quantiles
+    - NEW: Microstructure features (bid-ask proxy from HL spread)
+    - NEW: Return distribution shape (skewness, kurtosis)
+    """
+    n = NUMBER_OF_INPUT_BARS
+    closes = np.array([row[f"feature_close_{i}"] for i in range(n)])
+    highs = np.array([row[f"feature_high_{i}"] for i in range(n)])
+    lows = np.array([row[f"feature_low_{i}"] for i in range(n)])
+    volumes = np.array([row[f"feature_volume_{i}"] for i in range(n)])
+
+    log_rets = np.diff(np.log(closes + 1e-12))
+    abs_rets = np.abs(log_rets)
+    sq_rets = log_rets ** 2
+    features = {}
+
+    # === Multi-horizon realised vol ===
+    features["vol_5m"] = np.std(log_rets[-5:], ddof=1)
+    features["vol_10m"] = np.std(log_rets[-10:], ddof=1)
+    features["vol_15m"] = np.std(log_rets[-15:], ddof=1)
+    features["vol_30m"] = np.std(log_rets[-30:], ddof=1)
+    features["vol_60m"] = np.std(log_rets, ddof=1)
+
+    # === Vol ratios (regime) ===
+    features["vol_ratio_5_15"] = features["vol_5m"] / (features["vol_15m"] + 1e-12)
+    features["vol_ratio_5_60"] = features["vol_5m"] / (features["vol_60m"] + 1e-12)
+    features["vol_ratio_15_60"] = features["vol_15m"] / (features["vol_60m"] + 1e-12)
+
+    # === GARCH(1,1)-inspired features ===
+    # Exponentially weighted variance (RiskMetrics lambda=0.94)
+    lam = 0.94
+    ewma_var = sq_rets[0]
+    for r2 in sq_rets[1:]:
+        ewma_var = lam * ewma_var + (1 - lam) * r2
+    features["ewma_vol"] = np.sqrt(ewma_var)
+
+    # GARCH persistence: how much does yesterday's vol predict today's?
+    # Approximate with ratio of EWMA to realised
+    features["garch_persistence"] = features["ewma_vol"] / (features["vol_60m"] + 1e-12)
+
+    # Conditional vol: EWMA computed at different lambdas
+    lam_fast = 0.85  # faster decay — more reactive
+    ewma_fast = sq_rets[0]
+    for r2 in sq_rets[1:]:
+        ewma_fast = lam_fast * ewma_fast + (1 - lam_fast) * r2
+    features["ewma_vol_fast"] = np.sqrt(ewma_fast)
+    features["ewma_fast_slow_ratio"] = features["ewma_vol_fast"] / (features["ewma_vol"] + 1e-12)
+
+    # === Parkinson volatility (high-low based) ===
+    hl_log = np.log(highs + 1e-12) - np.log(lows + 1e-12)
+    features["parkinson_15m"] = np.sqrt(np.mean(hl_log[-15:] ** 2) / (4 * np.log(2)))
+    features["parkinson_60m"] = np.sqrt(np.mean(hl_log ** 2) / (4 * np.log(2)))
+    features["parkinson_ratio"] = features["parkinson_15m"] / (features["parkinson_60m"] + 1e-12)
+
+    # === Garman-Klass volatility (uses OHLC) ===
+    opens = np.array([row[f"feature_open_{i}"] for i in range(n)])
+    gk_terms = 0.5 * hl_log ** 2 - (2 * np.log(2) - 1) * (np.log(closes + 1e-12) - np.log(opens + 1e-12)) ** 2
+    features["garman_klass_15m"] = np.sqrt(np.mean(gk_terms[-15:]))
+    features["garman_klass_60m"] = np.sqrt(np.mean(gk_terms))
+
+    # === Vol of vol (second-order) ===
+    rolling_5m_vols = np.array([
+        np.std(log_rets[i:i+5], ddof=1) for i in range(len(log_rets) - 5)
+    ])
+    if len(rolling_5m_vols) >= 2:
+        features["vol_of_vol"] = np.std(rolling_5m_vols, ddof=1)
+        features["vol_mean_reversion"] = (features["vol_5m"] - np.mean(rolling_5m_vols)) / (np.std(rolling_5m_vols, ddof=1) + 1e-12)
+    else:
+        features["vol_of_vol"] = 0.0
+        features["vol_mean_reversion"] = 0.0
+
+    # === Vol quantile (where are we in the local distribution?) ===
+    if len(rolling_5m_vols) > 0:
+        features["vol_percentile"] = np.mean(rolling_5m_vols <= features["vol_5m"])
+    else:
+        features["vol_percentile"] = 0.5
+
+    # === Return autocorrelation (clustering signal) ===
+    features["ret_autocorr_1"] = np.corrcoef(log_rets[1:], log_rets[:-1])[0, 1] if len(log_rets) > 2 else 0.0
+    features["absret_autocorr_1"] = np.corrcoef(abs_rets[1:], abs_rets[:-1])[0, 1] if len(abs_rets) > 2 else 0.0
+    # Fix NaN
+    for k in ["ret_autocorr_1", "absret_autocorr_1"]:
+        if not np.isfinite(features[k]):
+            features[k] = 0.0
+
+    # === Return distribution shape ===
+    if len(log_rets) >= 15:
+        recent = log_rets[-15:]
+        mean_r = np.mean(recent)
+        std_r = np.std(recent, ddof=1)
+        if std_r > 1e-12:
+            features["skewness_15m"] = np.mean(((recent - mean_r) / std_r) ** 3)
+            features["kurtosis_15m"] = np.mean(((recent - mean_r) / std_r) ** 4)
+        else:
+            features["skewness_15m"] = 0.0
+            features["kurtosis_15m"] = 3.0
+    else:
+        features["skewness_15m"] = 0.0
+        features["kurtosis_15m"] = 3.0
+
+    # === Absolute returns (magnitude features) ===
+    features["abs_ret_mean_5m"] = np.mean(abs_rets[-5:])
+    features["abs_ret_mean_15m"] = np.mean(abs_rets[-15:])
+    features["abs_ret_max_15m"] = np.max(abs_rets[-15:])
+    features["abs_ret_max_60m"] = np.max(abs_rets)
+
+    # === Volume-volatility interaction ===
+    features["volume_ratio_5_60"] = np.mean(volumes[-5:]) / (np.mean(volumes) + 1e-12)
+    features["volume_spike"] = np.max(volumes[-5:]) / (np.mean(volumes) + 1e-12)
+    # Volume-weighted absolute return
+    vol_weights = volumes[1:] / (np.sum(volumes[1:]) + 1e-12)
+    features["vwap_absret"] = np.sum(abs_rets * vol_weights)
+
+    # === Trend vs chop (efficiency ratio) ===
+    net_move = abs(np.sum(log_rets[-15:]))
+    total_path = np.sum(abs_rets[-15:])
+    features["efficiency_15m"] = net_move / (total_path + 1e-12)
+
+    # === Recent extreme moves ===
+    features["max_abs_ret_5m"] = np.max(abs_rets[-5:])
+    features["max_abs_ret_ratio"] = features["max_abs_ret_5m"] / (features["vol_5m"] + 1e-12)
+
+    return pd.Series(features)
+
+
+print("   Engineering features...")
+eng_train = df_train.apply(engineer_model_d_features, axis=1)
+eng_test = df_test.apply(engineer_model_d_features, axis=1)
+
+df_train = pd.concat([df_train, eng_train], axis=1)
+df_test = pd.concat([df_test, eng_test], axis=1)
+
+eng_cols = list(eng_train.columns)
+all_feature_cols = base_feature_cols + eng_cols
+
+# Drop any rows with NaN in engineered features
+df_train = df_train.dropna(subset=all_feature_cols)
+df_test = df_test.dropna(subset=all_feature_cols)
+y_test = df_test["target"].values
+
+print(f"   Features: {len(base_feature_cols)} base + {len(eng_cols)} engineered = {len(all_feature_cols)} total")
+
+# =============================================================================
+# MODEL D: Train with Huber loss
+# =============================================================================
+print("\n[4/4] Training Model D (Huber loss, heavy regularization)...")
+
+model_d = LGBMRegressor(
+    objective="huber",          # Robust to vol spikes
+    alpha=0.5,                  # Huber delta (transition point)
+    n_estimators=500,
+    learning_rate=0.01,
+    max_depth=6,
+    num_leaves=31,
+    subsample=0.8,
+    colsample_bytree=0.7,
+    min_child_samples=100,      # Conservative splits
+    reg_alpha=0.5,              # L1
+    reg_lambda=2.0,             # L2
+    random_state=42,
+    verbose=-1,
+)
+model_d.fit(df_train[all_feature_cols], df_train["target"])
+preds_d = np.maximum(model_d.predict(df_test[all_feature_cols]), 0)
+
+model_d_metrics = vol_metrics(y_test, preds_d)
+print_metrics(model_d_metrics, "MODEL D (Huber + GARCH features)")
+
+print("\n📊 Improvement over baseline:")
+compare_metrics(baseline_metrics, model_d_metrics)
+
+# =============================================================================
+# SAVE
+# =============================================================================
+print("\n\nSaving Model D...")
+
+# For deployment, we need the workflow and feature engineering in the predict fn
+workflow = wf
+
+def predict(nonce=None):
+    live_row = workflow.get_live_features(ticker=TICKERS[0])
+    if live_row is None or len(live_row) == 0:
+        raise ValueError("Could not get live features")
+    live_eng = engineer_model_d_features(live_row.iloc[0])
+    live_features = pd.concat([live_row[base_feature_cols].iloc[0], live_eng])
+    vol = model_d.predict(live_features[all_feature_cols].values.reshape(1, -1))[0]
+    vol = max(0.0, float(vol))
+    print(f"Model D prediction: {vol:.6f} (15-min vol)")
+    return vol
+
+print("🧪 Testing prediction...")
+test_pred = predict()
+
+with open("predict_80_model_d.pkl", "wb") as f:
+    cloudpickle.dump(predict, f)
+
+print(f"\n✅ Saved predict_80_model_d.pkl")
+print(f"   Pearson r: {model_d_metrics['pearson_r']:.4f} | R²: {model_d_metrics['r2']:.4f} | QLIKE: {model_d_metrics['qlike']:.6f}")
diff --git a/notebooks/testnet/topic_81_xrp_vol/topic_81_model_e_calibrated.py b/notebooks/testnet/topic_81_xrp_vol/topic_81_model_e_calibrated.py
new file mode 100644
index 0000000..9cbef57
--- /dev/null
+++ b/notebooks/testnet/topic_81_xrp_vol/topic_81_model_e_calibrated.py
@@ -0,0 +1,433 @@
+#!/usr/bin/env python3
+"""
+Topic 81 — Model E: Calibrated Volatility (Distribution-Aware)
+==============================================================
+
+Problem: Previous models compress predictions into a narrow band because
+tree models with MSE/Huber loss regress toward the mean. The scatter plot
+shows predictions trapped in [0.0004, 0.0011] while targets range to 0.006+.
+
+Solution: Three techniques to match the target distribution:
+1. Log-space prediction: predict log(vol) to equalize error across magnitudes
+2. Quantile ensemble: blend median prediction with upper quantile for calibration
+3. Regime-aware: separate models for calm vs volatile periods
+
+The goal is to match both the SHAPE and MAGNITUDE of the target distribution,
+not just minimize average error.
+"""
+
+import numpy as np
+import pandas as pd
+import os
+from datetime import datetime, timedelta, timezone
+from scipy.stats import pearsonr, spearmanr
+from lightgbm import LGBMRegressor
+import cloudpickle
+from allora_forge_builder_kit import AlloraMLWorkflow
+
+# =============================================================================
+# CONFIGURATION
+# =============================================================================
+TICKERS = ["xrpusd"]
+DAYS_OF_HISTORY = 800
+INTERVAL = "1m"
+NUMBER_OF_INPUT_BARS = 60
+TARGET_BARS = 15
+TARGET_TYPE = "volatility"
+
+print("=" * 80)
+print("Topic 81 — Model E: Calibrated Volatility (Distribution-Aware)")
+print("=" * 80)
+
+
+# =============================================================================
+# METRICS
+# =============================================================================
+def vol_metrics(y_true, y_pred):
+    """Compute volatility-specific metrics."""
+    y_true, y_pred = np.asarray(y_true), np.asarray(y_pred)
+    r, _ = pearsonr(y_true, y_pred)
+    rho, _ = spearmanr(y_true, y_pred)
+    rmse = np.sqrt(np.mean((y_true - y_pred) ** 2))
+    mae = np.mean(np.abs(y_true - y_pred))
+    ss_res = np.sum((y_true - y_pred) ** 2)
+    ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
+    r2 = 1 - ss_res / ss_tot
+    rel_mae = mae / np.mean(y_true)
+    mask = y_pred > 0
+    ratio = y_true[mask] / y_pred[mask]
+    qlike = np.mean(ratio - np.log(ratio) - 1) if mask.sum() > 0 else float("inf")
+    # Distribution match: ratio of pred std to target std (1.0 = perfect calibration)
+    cal_ratio = np.std(y_pred) / np.std(y_true)
+    # Tail coverage: what fraction of target > p90 does the model predict > p90?
+    p90 = np.percentile(y_true, 90)
+    tail_mask = y_true > p90
+    if tail_mask.sum() > 0:
+        tail_capture = np.mean(y_pred[tail_mask] > np.percentile(y_pred, 90))
+    else:
+        tail_capture = 0.0
+    return {
+        "pearson_r": r, "spearman_rho": rho, "r2": r2,
+        "rmse": rmse, "mae": mae, "rel_mae": rel_mae, "qlike": qlike,
+        "cal_ratio": cal_ratio, "tail_capture": tail_capture,
+    }
+
+
+def print_metrics(metrics, label=""):
+    print(f"\n  {'─' * 55}")
+    if label:
+        print(f"  {label}")
+        print(f"  {'─' * 55}")
+    print(f"  Pearson r:      {metrics['pearson_r']:.4f}")
+    print(f"  Spearman ρ:     {metrics['spearman_rho']:.4f}")
+    print(f"  R²:             {metrics['r2']:.4f}")
+    print(f"  RMSE:           {metrics['rmse']:.6f}")
+    print(f"  MAE:            {metrics['mae']:.6f}")
+    print(f"  Rel MAE:        {metrics['rel_mae']*100:.2f}%")
+    print(f"  QLIKE:          {metrics['qlike']:.6f}")
+    print(f"  Cal ratio:      {metrics['cal_ratio']:.4f}  (1.0 = perfect spread)")
+    print(f"  Tail capture:   {metrics['tail_capture']:.4f}  (1.0 = perfect tail)")
+    print(f"  {'─' * 55}")
+
+
+def compare_metrics(baseline, current):
+    print(f"\n  {'Metric':<16} {'Baseline':<10} {'Current':<10} {'Δ':<10}")
+    print(f"  {'─'*46}")
+    for key in ["pearson_r", "spearman_rho", "r2", "rmse", "mae", "rel_mae", "qlike", "cal_ratio"]:
+        b, c = baseline[key], current[key]
+        if key in ["rmse", "mae", "rel_mae", "qlike"]:
+            delta = (b - c) / abs(b) * 100 if b != 0 else 0
+            arrow = "↓" if c < b else "↑"
+        elif key == "cal_ratio":
+            # Closer to 1.0 is better
+            delta = abs(1 - c) - abs(1 - b)
+            arrow = "✓" if abs(1 - c) < abs(1 - b) else "✗"
+            print(f"  {key:<16} {b:<10.4f} {c:<10.4f} {arrow}")
+            continue
+        else:
+            delta = (c - b) / abs(b) * 100 if b != 0 else 0
+            arrow = "↑" if c > b else "↓"
+        fmt = ".6f" if key in ["rmse", "mae"] else ".4f"
+        if key == "rel_mae":
+            print(f"  {key:<16} {b*100:<10.2f} {c*100:<10.2f} {arrow}{abs(delta):.1f}%")
+        else:
+            print(f"  {key:<16} {b:<10{fmt}} {c:<10{fmt}} {arrow}{abs(delta):.1f}%")
+
+
+# =============================================================================
+# LOAD DATA
+# =============================================================================
+print("\n[1/5] Loading data...")
+from allora_forge_builder_kit.utils import get_api_key
+
+api_key = get_api_key(
+    api_key_file=os.path.join(os.path.dirname(__file__), "..", ".allora_api_key")
+)
+
+wf = AlloraMLWorkflow(
+    tickers=TICKERS, number_of_input_bars=NUMBER_OF_INPUT_BARS,
+    target_bars=TARGET_BARS, interval=INTERVAL, target_type=TARGET_TYPE,
+    data_source="allora", api_key=api_key,
+)
+
+start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)
+wf.backfill(start=start_date)
+df = wf.get_full_feature_target_dataframe(start_date=start_date).reset_index()
+base_feature_cols = [c for c in df.columns if c.startswith("feature_")]
+df = df.dropna(subset=base_feature_cols + ["target"])
+
+split = int(len(df) * 0.8)
+df_train = df.iloc[:split].copy()
+df_test = df.iloc[split:].copy()
+y_test = df_test["target"].values
+
+print(f"✅ {len(df):,} samples | Train: {len(df_train):,} | Test: {len(df_test):,}")
+print(f"   Target stats: mean={y_test.mean():.6f} std={y_test.std():.6f} "
+      f"p90={np.percentile(y_test, 90):.6f} max={y_test.max():.6f}")
+
+
+# =============================================================================
+# FEATURE ENGINEERING (same as Model D)
+# =============================================================================
+print("\n[2/5] Engineering features...")
+
+
+def engineer_features(row):
+    n = NUMBER_OF_INPUT_BARS
+    closes = np.array([row[f"feature_close_{i}"] for i in range(n)])
+    highs = np.array([row[f"feature_high_{i}"] for i in range(n)])
+    lows = np.array([row[f"feature_low_{i}"] for i in range(n)])
+    volumes = np.array([row[f"feature_volume_{i}"] for i in range(n)])
+    opens = np.array([row[f"feature_open_{i}"] for i in range(n)])
+
+    log_rets = np.diff(np.log(closes + 1e-12))
+    abs_rets = np.abs(log_rets)
+    sq_rets = log_rets ** 2
+    features = {}
+
+    # Multi-horizon vol
+    features["vol_5m"] = np.std(log_rets[-5:], ddof=1)
+    features["vol_10m"] = np.std(log_rets[-10:], ddof=1)
+    features["vol_15m"] = np.std(log_rets[-15:], ddof=1)
+    features["vol_30m"] = np.std(log_rets[-30:], ddof=1)
+    features["vol_60m"] = np.std(log_rets, ddof=1)
+
+    # Vol ratios
+    features["vol_ratio_5_15"] = features["vol_5m"] / (features["vol_15m"] + 1e-12)
+    features["vol_ratio_5_60"] = features["vol_5m"] / (features["vol_60m"] + 1e-12)
+    features["vol_ratio_15_60"] = features["vol_15m"] / (features["vol_60m"] + 1e-12)
+
+    # EWMA (fast and slow)
+    lam = 0.94
+    ewma_var = sq_rets[0]
+    for r2 in sq_rets[1:]:
+        ewma_var = lam * ewma_var + (1 - lam) * r2
+    features["ewma_vol"] = np.sqrt(ewma_var)
+
+    lam_fast = 0.85
+    ewma_fast = sq_rets[0]
+    for r2 in sq_rets[1:]:
+        ewma_fast = lam_fast * ewma_fast + (1 - lam_fast) * r2
+    features["ewma_vol_fast"] = np.sqrt(ewma_fast)
+    features["ewma_fast_slow_ratio"] = features["ewma_vol_fast"] / (features["ewma_vol"] + 1e-12)
+    features["garch_persistence"] = features["ewma_vol"] / (features["vol_60m"] + 1e-12)
+
+    # Parkinson & Garman-Klass
+    hl_log = np.log(highs + 1e-12) - np.log(lows + 1e-12)
+    features["parkinson_15m"] = np.sqrt(np.mean(hl_log[-15:] ** 2) / (4 * np.log(2)))
+    features["parkinson_60m"] = np.sqrt(np.mean(hl_log ** 2) / (4 * np.log(2)))
+    features["parkinson_ratio"] = features["parkinson_15m"] / (features["parkinson_60m"] + 1e-12)
+    gk_terms = 0.5 * hl_log ** 2 - (2 * np.log(2) - 1) * (np.log(closes + 1e-12) - np.log(opens + 1e-12)) ** 2
+    features["garman_klass_15m"] = np.sqrt(np.abs(np.mean(gk_terms[-15:])))
+    features["garman_klass_60m"] = np.sqrt(np.abs(np.mean(gk_terms)))
+
+    # Vol of vol & mean reversion
+    rolling_5m_vols = np.array([np.std(log_rets[i:i+5], ddof=1) for i in range(len(log_rets) - 5)])
+    if len(rolling_5m_vols) >= 2:
+        features["vol_of_vol"] = np.std(rolling_5m_vols, ddof=1)
+        features["vol_mean_reversion"] = (features["vol_5m"] - np.mean(rolling_5m_vols)) / (np.std(rolling_5m_vols, ddof=1) + 1e-12)
+        features["vol_percentile"] = np.mean(rolling_5m_vols <= features["vol_5m"])
+    else:
+        features["vol_of_vol"] = 0.0
+        features["vol_mean_reversion"] = 0.0
+        features["vol_percentile"] = 0.5
+
+    # Autocorrelation
+    features["absret_autocorr_1"] = np.corrcoef(abs_rets[1:], abs_rets[:-1])[0, 1] if len(abs_rets) > 2 else 0.0
+    if not np.isfinite(features["absret_autocorr_1"]):
+        features["absret_autocorr_1"] = 0.0
+
+    # Distribution shape
+    if len(log_rets) >= 15:
+        recent = log_rets[-15:]
+        std_r = np.std(recent, ddof=1)
+        if std_r > 1e-12:
+            features["kurtosis_15m"] = np.mean(((recent - np.mean(recent)) / std_r) ** 4)
+        else:
+            features["kurtosis_15m"] = 3.0
+    else:
+        features["kurtosis_15m"] = 3.0
+
+    # Magnitude features
+    features["abs_ret_mean_5m"] = np.mean(abs_rets[-5:])
+    features["abs_ret_max_15m"] = np.max(abs_rets[-15:])
+    features["abs_ret_max_60m"] = np.max(abs_rets)
+
+    # Volume interaction
+    features["volume_ratio_5_60"] = np.mean(volumes[-5:]) / (np.mean(volumes) + 1e-12)
+    features["volume_spike"] = np.max(volumes[-5:]) / (np.mean(volumes) + 1e-12)
+
+    # Efficiency ratio
+    net_move = abs(np.sum(log_rets[-15:]))
+    total_path = np.sum(abs_rets[-15:])
+    features["efficiency_15m"] = net_move / (total_path + 1e-12)
+
+    return pd.Series(features)
+
+
+print("   Engineering features (this takes ~40 min on 1.15M rows)...")
+eng_train = df_train.apply(engineer_features, axis=1)
+eng_test = df_test.apply(engineer_features, axis=1)
+
+df_train = pd.concat([df_train.reset_index(drop=True), eng_train.reset_index(drop=True)], axis=1)
+df_test = pd.concat([df_test.reset_index(drop=True), eng_test.reset_index(drop=True)], axis=1)
+
+eng_cols = list(eng_train.columns)
+all_feature_cols = base_feature_cols + eng_cols
+df_train = df_train.dropna(subset=all_feature_cols)
+df_test = df_test.dropna(subset=all_feature_cols)
+y_test = df_test["target"].values
+
+print(f"   ✅ {len(all_feature_cols)} features ready")
+
+
+# =============================================================================
+# BASELINE: Model D approach (Huber, single model)
+# =============================================================================
+print("\n[3/5] Baseline (Model D: Huber loss)...")
+model_baseline = LGBMRegressor(
+    objective="huber", alpha=0.5, n_estimators=500, learning_rate=0.01,
+    max_depth=6, num_leaves=31, subsample=0.8, colsample_bytree=0.7,
+    min_child_samples=100, reg_alpha=0.5, reg_lambda=2.0,
+    random_state=42, verbose=-1,
+)
+model_baseline.fit(df_train[all_feature_cols], df_train["target"])
+preds_baseline = np.maximum(model_baseline.predict(df_test[all_feature_cols]), 0)
+baseline_metrics = vol_metrics(y_test, preds_baseline)
+print_metrics(baseline_metrics, "BASELINE (Model D: Huber)")
+
+
+# =============================================================================
+# MODEL E: Log-space + Quantile Ensemble
+# =============================================================================
+print("\n[4/5] Training Model E (log-space + quantile ensemble)...")
+
+# --- Strategy 1: Predict in log-space ---
+# Transform: log(vol) is more Gaussian, equalizes error across magnitudes
+y_train_log = np.log(df_train["target"].values + 1e-10)
+y_test_log = np.log(y_test + 1e-10)
+
+model_log = LGBMRegressor(
+    objective="regression",  # MSE in log-space = multiplicative error in real space
+    n_estimators=800,
+    learning_rate=0.01,
+    max_depth=7,
+    num_leaves=63,
+    subsample=0.8,
+    colsample_bytree=0.7,
+    min_child_samples=50,
+    reg_alpha=0.1,
+    reg_lambda=1.0,
+    random_state=42,
+    verbose=-1,
+)
+model_log.fit(df_train[all_feature_cols], y_train_log)
+preds_log_space = model_log.predict(df_test[all_feature_cols])
+preds_from_log = np.exp(preds_log_space)  # back to real space
+
+log_metrics = vol_metrics(y_test, preds_from_log)
+print_metrics(log_metrics, "Log-space model (exp transform back)")
+
+# --- Strategy 2: Quantile models for calibration ---
+# Train at 50th percentile (median) and 75th percentile
+model_q50 = LGBMRegressor(
+    objective="quantile", alpha=0.5,  # median
+    n_estimators=500, learning_rate=0.01, max_depth=6, num_leaves=31,
+    subsample=0.8, colsample_bytree=0.7, min_child_samples=100,
+    reg_alpha=0.3, reg_lambda=1.5, random_state=42, verbose=-1,
+)
+model_q50.fit(df_train[all_feature_cols], df_train["target"])
+preds_q50 = np.maximum(model_q50.predict(df_test[all_feature_cols]), 0)
+
+model_q75 = LGBMRegressor(
+    objective="quantile", alpha=0.75,  # upper quartile
+    n_estimators=500, learning_rate=0.01, max_depth=6, num_leaves=31,
+    subsample=0.8, colsample_bytree=0.7, min_child_samples=100,
+    reg_alpha=0.3, reg_lambda=1.5, random_state=42, verbose=-1,
+)
+model_q75.fit(df_train[all_feature_cols], df_train["target"])
+preds_q75 = np.maximum(model_q75.predict(df_test[all_feature_cols]), 0)
+
+# --- Strategy 3: Ensemble blend ---
+# Blend log-space model (good at shape) with quantile shift (good at tails)
+# The log model captures the full range; we blend with q50 for stability
+alpha = 0.6  # weight on log-space model
+preds_ensemble = alpha * preds_from_log + (1 - alpha) * preds_q50
+
+ensemble_metrics = vol_metrics(y_test, preds_ensemble)
+print_metrics(ensemble_metrics, "Ensemble (0.6*log + 0.4*q50)")
+
+# --- Strategy 4: Log-space with bias correction ---
+# exp(E[log(x)]) underestimates E[x] for log-normal. Apply correction.
+# Correction factor: exp(0.5 * residual_variance_in_log_space)
+log_residuals = y_train_log - model_log.predict(df_train[all_feature_cols])
+bias_correction = np.exp(0.5 * np.var(log_residuals))
+preds_corrected = preds_from_log * bias_correction
+
+corrected_metrics = vol_metrics(y_test, preds_corrected)
+print_metrics(corrected_metrics, f"Log-space + bias correction (factor={bias_correction:.4f})")
+
+# --- Pick the best ---
+candidates = [
+    ("baseline_huber", baseline_metrics, preds_baseline),
+    ("log_space", log_metrics, preds_from_log),
+    ("ensemble_log_q50", ensemble_metrics, preds_ensemble),
+    ("log_corrected", corrected_metrics, preds_corrected),
+]
+
+# Rank by a composite: prioritize QLIKE (vol-specific) and cal_ratio (distribution match)
+def composite_score(m):
+    # Lower QLIKE is better, cal_ratio closer to 1.0 is better, higher r2 is better
+    return m["r2"] - 0.5 * m["qlike"] - 0.3 * abs(1 - m["cal_ratio"])
+
+print("\n\n  Candidate ranking (composite score):")
+print(f"  {'Name':<20} {'R²':<8} {'QLIKE':<8} {'Cal':<8} {'Score':<8}")
+print(f"  {'─'*52}")
+ranked = sorted(candidates, key=lambda x: composite_score(x[1]), reverse=True)
+for name, m, _ in ranked:
+    score = composite_score(m)
+    print(f"  {name:<20} {m['r2']:.4f}  {m['qlike']:.4f}  {m['cal_ratio']:.4f}  {score:.4f}")
+
+best_name, best_metrics, best_preds = ranked[0]
+print(f"\n  → Winner: {best_name}")
+
+
+# =============================================================================
+# STEP 5: Save best model for deployment
+# =============================================================================
+print(f"\n[5/5] Saving Model E ({best_name})...")
+print_metrics(best_metrics, f"MODEL E FINAL ({best_name})")
+print("\n📊 Improvement over Model D baseline:")
+compare_metrics(baseline_metrics, best_metrics)
+
+# For deployment, we need to package the right predict function
+workflow = wf
+
+if best_name == "log_space":
+    _deploy_model = model_log
+    _bias = 1.0
+elif best_name == "log_corrected":
+    _deploy_model = model_log
+    _bias = bias_correction
+elif best_name == "ensemble_log_q50":
+    _deploy_model_log = model_log
+    _deploy_model_q50 = model_q50
+    _alpha = alpha
+else:
+    _deploy_model = model_baseline
+    _bias = None
+
+
+def predict(nonce=None):
+    live_row = workflow.get_live_features(ticker=TICKERS[0])
+    if live_row is None or len(live_row) == 0:
+        raise ValueError("Could not get live features")
+    live_eng = engineer_features(live_row.iloc[0])
+    live_features = pd.concat([live_row[base_feature_cols].iloc[0], live_eng])
+    x = live_features[all_feature_cols].values.reshape(1, -1)
+
+    if best_name in ("log_space", "log_corrected"):
+        log_pred = _deploy_model.predict(x)[0]
+        vol = np.exp(log_pred) * _bias
+    elif best_name == "ensemble_log_q50":
+        log_pred = np.exp(_deploy_model_log.predict(x)[0])
+        q50_pred = max(0, _deploy_model_q50.predict(x)[0])
+        vol = _alpha * log_pred + (1 - _alpha) * q50_pred
+    else:
+        vol = _deploy_model.predict(x)[0]
+
+    vol = max(0.0, float(vol))
+    print(f"Model E prediction: {vol:.6f} (15-min vol)")
+    return vol
+
+
+print("\n🧪 Testing prediction...")
+test_pred = predict()
+
+with open("predict_81_model_e.pkl", "wb") as f:
+    cloudpickle.dump(predict, f)
+
+print(f"\n✅ Saved predict_81_model_e.pkl")
+print(f"   Strategy: {best_name}")
+print(f"   Pearson r: {best_metrics['pearson_r']:.4f} | R²: {best_metrics['r2']:.4f}")
+print(f"   Cal ratio: {best_metrics['cal_ratio']:.4f} | QLIKE: {best_metrics['qlike']:.6f}")
diff --git a/notebooks/testnet/topic_82_sol_vol/topic_82_model_e_calibrated.py b/notebooks/testnet/topic_82_sol_vol/topic_82_model_e_calibrated.py
new file mode 100644
index 0000000..4fd436b
--- /dev/null
+++ b/notebooks/testnet/topic_82_sol_vol/topic_82_model_e_calibrated.py
@@ -0,0 +1,433 @@
+#!/usr/bin/env python3
+"""
+Topic 82 — Model E: Calibrated Volatility (Distribution-Aware)
+==============================================================
+
+Problem: Previous models compress predictions into a narrow band because
+tree models with MSE/Huber loss regress toward the mean. The scatter plot
+shows predictions trapped in [0.0004, 0.0011] while targets range to 0.006+.
+
+Solution: Three techniques to match the target distribution:
+1. Log-space prediction: predict log(vol) to equalize error across magnitudes
+2. Quantile ensemble: blend median prediction with upper quantile for calibration
+3. Regime-aware: separate models for calm vs volatile periods
+
+The goal is to match both the SHAPE and MAGNITUDE of the target distribution,
+not just minimize average error.
+"""
+
+import numpy as np
+import pandas as pd
+import os
+from datetime import datetime, timedelta, timezone
+from scipy.stats import pearsonr, spearmanr
+from lightgbm import LGBMRegressor
+import cloudpickle
+from allora_forge_builder_kit import AlloraMLWorkflow
+
+# =============================================================================
+# CONFIGURATION
+# =============================================================================
+TICKERS = ["solusd"]
+DAYS_OF_HISTORY = 800
+INTERVAL = "1m"
+NUMBER_OF_INPUT_BARS = 60
+TARGET_BARS = 15
+TARGET_TYPE = "volatility"
+
+print("=" * 80)
+print("Topic 82 — Model E: Calibrated Volatility (Distribution-Aware)")
+print("=" * 80)
+
+
+# =============================================================================
+# METRICS
+# =============================================================================
+def vol_metrics(y_true, y_pred):
+    """Compute volatility-specific metrics."""
+    y_true, y_pred = np.asarray(y_true), np.asarray(y_pred)
+    r, _ = pearsonr(y_true, y_pred)
+    rho, _ = spearmanr(y_true, y_pred)
+    rmse = np.sqrt(np.mean((y_true - y_pred) ** 2))
+    mae = np.mean(np.abs(y_true - y_pred))
+    ss_res = np.sum((y_true - y_pred) ** 2)
+    ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
+    r2 = 1 - ss_res / ss_tot
+    rel_mae = mae / np.mean(y_true)
+    mask = y_pred > 0
+    ratio = y_true[mask] / y_pred[mask]
+    qlike = np.mean(ratio - np.log(ratio) - 1) if mask.sum() > 0 else float("inf")
+    # Distribution match: ratio of pred std to target std (1.0 = perfect calibration)
+    cal_ratio = np.std(y_pred) / np.std(y_true)
+    # Tail coverage: what fraction of target > p90 does the model predict > p90?
+    p90 = np.percentile(y_true, 90)
+    tail_mask = y_true > p90
+    if tail_mask.sum() > 0:
+        tail_capture = np.mean(y_pred[tail_mask] > np.percentile(y_pred, 90))
+    else:
+        tail_capture = 0.0
+    return {
+        "pearson_r": r, "spearman_rho": rho, "r2": r2,
+        "rmse": rmse, "mae": mae, "rel_mae": rel_mae, "qlike": qlike,
+        "cal_ratio": cal_ratio, "tail_capture": tail_capture,
+    }
+
+
+def print_metrics(metrics, label=""):
+    print(f"\n  {'─' * 55}")
+    if label:
+        print(f"  {label}")
+        print(f"  {'─' * 55}")
+    print(f"  Pearson r:      {metrics['pearson_r']:.4f}")
+    print(f"  Spearman ρ:     {metrics['spearman_rho']:.4f}")
+    print(f"  R²:             {metrics['r2']:.4f}")
+    print(f"  RMSE:           {metrics['rmse']:.6f}")
+    print(f"  MAE:            {metrics['mae']:.6f}")
+    print(f"  Rel MAE:        {metrics['rel_mae']*100:.2f}%")
+    print(f"  QLIKE:          {metrics['qlike']:.6f}")
+    print(f"  Cal ratio:      {metrics['cal_ratio']:.4f}  (1.0 = perfect spread)")
+    print(f"  Tail capture:   {metrics['tail_capture']:.4f}  (1.0 = perfect tail)")
+    print(f"  {'─' * 55}")
+
+
+def compare_metrics(baseline, current):
+    print(f"\n  {'Metric':<16} {'Baseline':<10} {'Current':<10} {'Δ':<10}")
+    print(f"  {'─'*46}")
+    for key in ["pearson_r", "spearman_rho", "r2", "rmse", "mae", "rel_mae", "qlike", "cal_ratio"]:
+        b, c = baseline[key], current[key]
+        if key in ["rmse", "mae", "rel_mae", "qlike"]:
+            delta = (b - c) / abs(b) * 100 if b != 0 else 0
+            arrow = "↓" if c < b else "↑"
+        elif key == "cal_ratio":
+            # Closer to 1.0 is better
+            delta = abs(1 - c) - abs(1 - b)
+            arrow = "✓" if abs(1 - c) < abs(1 - b) else "✗"
+            print(f"  {key:<16} {b:<10.4f} {c:<10.4f} {arrow}")
+            continue
+        else:
+            delta = (c - b) / abs(b) * 100 if b != 0 else 0
+            arrow = "↑" if c > b else "↓"
+        fmt = ".6f" if key in ["rmse", "mae"] else ".4f"
+        if key == "rel_mae":
+            print(f"  {key:<16} {b*100:<10.2f} {c*100:<10.2f} {arrow}{abs(delta):.1f}%")
+        else:
+            print(f"  {key:<16} {b:<10{fmt}} {c:<10{fmt}} {arrow}{abs(delta):.1f}%")
+
+
+# =============================================================================
+# LOAD DATA
+# =============================================================================
+print("\n[1/5] Loading data...")
+from allora_forge_builder_kit.utils import get_api_key
+
+api_key = get_api_key(
+    api_key_file=os.path.join(os.path.dirname(__file__), "..", ".allora_api_key")
+)
+
+wf = AlloraMLWorkflow(
+    tickers=TICKERS, number_of_input_bars=NUMBER_OF_INPUT_BARS,
+    target_bars=TARGET_BARS, interval=INTERVAL, target_type=TARGET_TYPE,
+    data_source="allora", api_key=api_key,
+)
+
+start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)
+wf.backfill(start=start_date)
+df = wf.get_full_feature_target_dataframe(start_date=start_date).reset_index()
+base_feature_cols = [c for c in df.columns if c.startswith("feature_")]
+df = df.dropna(subset=base_feature_cols + ["target"])
+
+split = int(len(df) * 0.8)
+df_train = df.iloc[:split].copy()
+df_test = df.iloc[split:].copy()
+y_test = df_test["target"].values
+
+print(f"✅ {len(df):,} samples | Train: {len(df_train):,} | Test: {len(df_test):,}")
+print(f"   Target stats: mean={y_test.mean():.6f} std={y_test.std():.6f} "
+      f"p90={np.percentile(y_test, 90):.6f} max={y_test.max():.6f}")
+
+
+# =============================================================================
+# FEATURE ENGINEERING (same as Model D)
+# =============================================================================
+print("\n[2/5] Engineering features...")
+
+
+def engineer_features(row):
+    n = NUMBER_OF_INPUT_BARS
+    closes = np.array([row[f"feature_close_{i}"] for i in range(n)])
+    highs = np.array([row[f"feature_high_{i}"] for i in range(n)])
+    lows = np.array([row[f"feature_low_{i}"] for i in range(n)])
+    volumes = np.array([row[f"feature_volume_{i}"] for i in range(n)])
+    opens = np.array([row[f"feature_open_{i}"] for i in range(n)])
+
+    log_rets = np.diff(np.log(closes + 1e-12))
+    abs_rets = np.abs(log_rets)
+    sq_rets = log_rets ** 2
+    features = {}
+
+    # Multi-horizon vol
+    features["vol_5m"] = np.std(log_rets[-5:], ddof=1)
+    features["vol_10m"] = np.std(log_rets[-10:], ddof=1)
+    features["vol_15m"] = np.std(log_rets[-15:], ddof=1)
+    features["vol_30m"] = np.std(log_rets[-30:], ddof=1)
+    features["vol_60m"] = np.std(log_rets, ddof=1)
+
+    # Vol ratios
+    features["vol_ratio_5_15"] = features["vol_5m"] / (features["vol_15m"] + 1e-12)
+    features["vol_ratio_5_60"] = features["vol_5m"] / (features["vol_60m"] + 1e-12)
+    features["vol_ratio_15_60"] = features["vol_15m"] / (features["vol_60m"] + 1e-12)
+
+    # EWMA (fast and slow)
+    lam = 0.94
+    ewma_var = sq_rets[0]
+    for r2 in sq_rets[1:]:
+        ewma_var = lam * ewma_var + (1 - lam) * r2
+    features["ewma_vol"] = np.sqrt(ewma_var)
+
+    lam_fast = 0.85
+    ewma_fast = sq_rets[0]
+    for r2 in sq_rets[1:]:
+        ewma_fast = lam_fast * ewma_fast + (1 - lam_fast) * r2
+    features["ewma_vol_fast"] = np.sqrt(ewma_fast)
+    features["ewma_fast_slow_ratio"] = features["ewma_vol_fast"] / (features["ewma_vol"] + 1e-12)
+    features["garch_persistence"] = features["ewma_vol"] / (features["vol_60m"] + 1e-12)
+
+    # Parkinson & Garman-Klass
+    hl_log = np.log(highs + 1e-12) - np.log(lows + 1e-12)
+    features["parkinson_15m"] = np.sqrt(np.mean(hl_log[-15:] ** 2) / (4 * np.log(2)))
+    features["parkinson_60m"] = np.sqrt(np.mean(hl_log ** 2) / (4 * np.log(2)))
+    features["parkinson_ratio"] = features["parkinson_15m"] / (features["parkinson_60m"] + 1e-12)
+    gk_terms = 0.5 * hl_log ** 2 - (2 * np.log(2) - 1) * (np.log(closes + 1e-12) - np.log(opens + 1e-12)) ** 2
+    features["garman_klass_15m"] = np.sqrt(np.abs(np.mean(gk_terms[-15:])))
+    features["garman_klass_60m"] = np.sqrt(np.abs(np.mean(gk_terms)))
+
+    # Vol of vol & mean reversion
+    rolling_5m_vols = np.array([np.std(log_rets[i:i+5], ddof=1) for i in range(len(log_rets) - 5)])
+    if len(rolling_5m_vols) >= 2:
+        features["vol_of_vol"] = np.std(rolling_5m_vols, ddof=1)
+        features["vol_mean_reversion"] = (features["vol_5m"] - np.mean(rolling_5m_vols)) / (np.std(rolling_5m_vols, ddof=1) + 1e-12)
+        features["vol_percentile"] = np.mean(rolling_5m_vols <= features["vol_5m"])
+    else:
+        features["vol_of_vol"] = 0.0
+        features["vol_mean_reversion"] = 0.0
+        features["vol_percentile"] = 0.5
+
+    # Autocorrelation
+    features["absret_autocorr_1"] = np.corrcoef(abs_rets[1:], abs_rets[:-1])[0, 1] if len(abs_rets) > 2 else 0.0
+    if not np.isfinite(features["absret_autocorr_1"]):
+        features["absret_autocorr_1"] = 0.0
+
+    # Distribution shape
+    if len(log_rets) >= 15:
+        recent = log_rets[-15:]
+        std_r = np.std(recent, ddof=1)
+        if std_r > 1e-12:
+            features["kurtosis_15m"] = np.mean(((recent - np.mean(recent)) / std_r) ** 4)
+        else:
+            features["kurtosis_15m"] = 3.0
+    else:
+        features["kurtosis_15m"] = 3.0
+
+    # Magnitude features
+    features["abs_ret_mean_5m"] = np.mean(abs_rets[-5:])
+    features["abs_ret_max_15m"] = np.max(abs_rets[-15:])
+    features["abs_ret_max_60m"] = np.max(abs_rets)
+
+    # Volume interaction
+    features["volume_ratio_5_60"] = np.mean(volumes[-5:]) / (np.mean(volumes) + 1e-12)
+    features["volume_spike"] = np.max(volumes[-5:]) / (np.mean(volumes) + 1e-12)
+
+    # Efficiency ratio
+    net_move = abs(np.sum(log_rets[-15:]))
+    total_path = np.sum(abs_rets[-15:])
+    features["efficiency_15m"] = net_move / (total_path + 1e-12)
+
+    return pd.Series(features)
+
+
+print("   Engineering features (this takes ~40 min on 1.15M rows)...")
+eng_train = df_train.apply(engineer_features, axis=1)
+eng_test = df_test.apply(engineer_features, axis=1)
+
+df_train = pd.concat([df_train.reset_index(drop=True), eng_train.reset_index(drop=True)], axis=1)
+df_test = pd.concat([df_test.reset_index(drop=True), eng_test.reset_index(drop=True)], axis=1)
+
+eng_cols = list(eng_train.columns)
+all_feature_cols = base_feature_cols + eng_cols
+df_train = df_train.dropna(subset=all_feature_cols)
+df_test = df_test.dropna(subset=all_feature_cols)
+y_test = df_test["target"].values
+
+print(f"   ✅ {len(all_feature_cols)} features ready")
+
+
+# =============================================================================
+# BASELINE: Model D approach (Huber, single model)
+# =============================================================================
+print("\n[3/5] Baseline (Model D: Huber loss)...")
+model_baseline = LGBMRegressor(
+    objective="huber", alpha=0.5, n_estimators=500, learning_rate=0.01,
+    max_depth=6, num_leaves=31, subsample=0.8, colsample_bytree=0.7,
+    min_child_samples=100, reg_alpha=0.5, reg_lambda=2.0,
+    random_state=42, verbose=-1,
+)
+model_baseline.fit(df_train[all_feature_cols], df_train["target"])
+preds_baseline = np.maximum(model_baseline.predict(df_test[all_feature_cols]), 0)
+baseline_metrics = vol_metrics(y_test, preds_baseline)
+print_metrics(baseline_metrics, "BASELINE (Model D: Huber)")
+
+
+# =============================================================================
+# MODEL E: Log-space + Quantile Ensemble
+# =============================================================================
+print("\n[4/5] Training Model E (log-space + quantile ensemble)...")
+
+# --- Strategy 1: Predict in log-space ---
+# Transform: log(vol) is more Gaussian, equalizes error across magnitudes
+y_train_log = np.log(df_train["target"].values + 1e-10)
+y_test_log = np.log(y_test + 1e-10)
+
+model_log = LGBMRegressor(
+    objective="regression",  # MSE in log-space = multiplicative error in real space
+    n_estimators=800,
+    learning_rate=0.01,
+    max_depth=7,
+    num_leaves=63,
+    subsample=0.8,
+    colsample_bytree=0.7,
+    min_child_samples=50,
+    reg_alpha=0.1,
+    reg_lambda=1.0,
+    random_state=42,
+    verbose=-1,
+)
+model_log.fit(df_train[all_feature_cols], y_train_log)
+preds_log_space = model_log.predict(df_test[all_feature_cols])
+preds_from_log = np.exp(preds_log_space)  # back to real space
+
+log_metrics = vol_metrics(y_test, preds_from_log)
+print_metrics(log_metrics, "Log-space model (exp transform back)")
+
+# --- Strategy 2: Quantile models for calibration ---
+# Train at 50th percentile (median) and 75th percentile
+model_q50 = LGBMRegressor(
+    objective="quantile", alpha=0.5,  # median
+    n_estimators=500, learning_rate=0.01, max_depth=6, num_leaves=31,
+    subsample=0.8, colsample_bytree=0.7, min_child_samples=100,
+    reg_alpha=0.3, reg_lambda=1.5, random_state=42, verbose=-1,
+)
+model_q50.fit(df_train[all_feature_cols], df_train["target"])
+preds_q50 = np.maximum(model_q50.predict(df_test[all_feature_cols]), 0)
+
+model_q75 = LGBMRegressor(
+    objective="quantile", alpha=0.75,  # upper quartile
+    n_estimators=500, learning_rate=0.01, max_depth=6, num_leaves=31,
+    subsample=0.8, colsample_bytree=0.7, min_child_samples=100,
+    reg_alpha=0.3, reg_lambda=1.5, random_state=42, verbose=-1,
+)
+model_q75.fit(df_train[all_feature_cols], df_train["target"])
+preds_q75 = np.maximum(model_q75.predict(df_test[all_feature_cols]), 0)
+
+# --- Strategy 3: Ensemble blend ---
+# Blend log-space model (good at shape) with quantile shift (good at tails)
+# The log model captures the full range; we blend with q50 for stability
+alpha = 0.6  # weight on log-space model
+preds_ensemble = alpha * preds_from_log + (1 - alpha) * preds_q50
+
+ensemble_metrics = vol_metrics(y_test, preds_ensemble)
+print_metrics(ensemble_metrics, "Ensemble (0.6*log + 0.4*q50)")
+
+# --- Strategy 4: Log-space with bias correction ---
+# exp(E[log(x)]) underestimates E[x] for log-normal. Apply correction.
+# Correction factor: exp(0.5 * residual_variance_in_log_space)
+log_residuals = y_train_log - model_log.predict(df_train[all_feature_cols])
+bias_correction = np.exp(0.5 * np.var(log_residuals))
+preds_corrected = preds_from_log * bias_correction
+
+corrected_metrics = vol_metrics(y_test, preds_corrected)
+print_metrics(corrected_metrics, f"Log-space + bias correction (factor={bias_correction:.4f})")
+
+# --- Pick the best ---
+candidates = [
+    ("baseline_huber", baseline_metrics, preds_baseline),
+    ("log_space", log_metrics, preds_from_log),
+    ("ensemble_log_q50", ensemble_metrics, preds_ensemble),
+    ("log_corrected", corrected_metrics, preds_corrected),
+]
+
+# Rank by a composite: prioritize QLIKE (vol-specific) and cal_ratio (distribution match)
+def composite_score(m):
+    # Lower QLIKE is better, cal_ratio closer to 1.0 is better, higher r2 is better
+    return m["r2"] - 0.5 * m["qlike"] - 0.3 * abs(1 - m["cal_ratio"])
+
+print("\n\n  Candidate ranking (composite score):")
+print(f"  {'Name':<20} {'R²':<8} {'QLIKE':<8} {'Cal':<8} {'Score':<8}")
+print(f"  {'─'*52}")
+ranked = sorted(candidates, key=lambda x: composite_score(x[1]), reverse=True)
+for name, m, _ in ranked:
+    score = composite_score(m)
+    print(f"  {name:<20} {m['r2']:.4f}  {m['qlike']:.4f}  {m['cal_ratio']:.4f}  {score:.4f}")
+
+best_name, best_metrics, best_preds = ranked[0]
+print(f"\n  → Winner: {best_name}")
+
+
+# =============================================================================
+# STEP 5: Save best model for deployment
+# =============================================================================
+print(f"\n[5/5] Saving Model E ({best_name})...")
+print_metrics(best_metrics, f"MODEL E FINAL ({best_name})")
+print("\n📊 Improvement over Model D baseline:")
+compare_metrics(baseline_metrics, best_metrics)
+
+# For deployment, we need to package the right predict function
+workflow = wf
+
+if best_name == "log_space":
+    _deploy_model = model_log
+    _bias = 1.0
+elif best_name == "log_corrected":
+    _deploy_model = model_log
+    _bias = bias_correction
+elif best_name == "ensemble_log_q50":
+    _deploy_model_log = model_log
+    _deploy_model_q50 = model_q50
+    _alpha = alpha
+else:
+    _deploy_model = model_baseline
+    _bias = None
+
+
+def predict(nonce=None):
+    live_row = workflow.get_live_features(ticker=TICKERS[0])
+    if live_row is None or len(live_row) == 0:
+        raise ValueError("Could not get live features")
+    live_eng = engineer_features(live_row.iloc[0])
+    live_features = pd.concat([live_row[base_feature_cols].iloc[0], live_eng])
+    x = live_features[all_feature_cols].values.reshape(1, -1)
+
+    if best_name in ("log_space", "log_corrected"):
+        log_pred = _deploy_model.predict(x)[0]
+        vol = np.exp(log_pred) * _bias
+    elif best_name == "ensemble_log_q50":
+        log_pred = np.exp(_deploy_model_log.predict(x)[0])
+        q50_pred = max(0, _deploy_model_q50.predict(x)[0])
+        vol = _alpha * log_pred + (1 - _alpha) * q50_pred
+    else:
+        vol = _deploy_model.predict(x)[0]
+
+    vol = max(0.0, float(vol))
+    print(f"Model E prediction: {vol:.6f} (15-min vol)")
+    return vol
+
+
+print("\n🧪 Testing prediction...")
+test_pred = predict()
+
+with open("predict_82_model_e.pkl", "wb") as f:
+    cloudpickle.dump(predict, f)
+
+print(f"\n✅ Saved predict_82_model_e.pkl")
+print(f"   Strategy: {best_name}")
+print(f"   Pearson r: {best_metrics['pearson_r']:.4f} | R²: {best_metrics['r2']:.4f}")
+print(f"   Cal ratio: {best_metrics['cal_ratio']:.4f} | QLIKE: {best_metrics['qlike']:.6f}")
diff --git a/notebooks/testnet/topic_83_btc_8h_logreturn/example.py b/notebooks/testnet/topic_83_btc_8h_logreturn/example.py
new file mode 100644
index 0000000..20f3ecd
--- /dev/null
+++ b/notebooks/testnet/topic_83_btc_8h_logreturn/example.py
@@ -0,0 +1,427 @@
+#!/usr/bin/env python3
+"""
+================================================================================
+Allora Forge Builder Kit v3.0 - Topic 83 BTC/USD Log-Return Prediction Walkthrough
+================================================================================
+
+This walkthrough demonstrates 8-hour BTC/USD price prediction using the 
+Allora ML Workflow Kit with base features and LightGBM.
+
+Data is sourced from the Atlas data service (Tiingo 1-min candles).
+
+================================================================================
+"""
+
+import numpy as np
+import pandas as pd
+import os
+import json
+from datetime import datetime, timedelta, timezone
+from sklearn.model_selection import TimeSeriesSplit
+from lightgbm import LGBMRegressor
+import matplotlib.pyplot as plt
+import cloudpickle
+from allora_forge_builder_kit import AlloraMLWorkflow, PerformanceEvaluator
+
+# =============================================================================
+# EXPERIMENT CONFIGURATION
+# =============================================================================
+
+# Data Configuration
+TICKERS = ["btcusd"]
+DAYS_OF_HISTORY = 500
+INTERVAL = "5m"
+
+# Feature Configuration
+NUMBER_OF_INPUT_BARS = 288  # Number of hourly bars for input features
+TARGET_BARS = 96           # Predict 24 bars (hours) ahead
+
+# Cross-Validation Configuration
+N_SPLITS = 3               # Number of CV folds
+MAX_TRAIN_SIZE = 100_000_000  # Maximum training samples per fold
+
+# Model Configuration
+N_ESTIMATORS_MAX = 500    # Train with max trees, evaluate at checkpoints
+N_ESTIMATORS_CHECKPOINTS = [100, 300, 500]
+LEARNING_RATES = [0.01, 0.05, 0.1]
+MAX_DEPTHS = [3, 5, 7]
+NUM_LEAVES = [15, 31, 63]
+
+# =============================================================================
+# SCRIPT START
+# =============================================================================
+
+print("="*80)
+print("Allora Forge Builder Kit v3.0 - Topic 83 Walkthrough")
+print("="*80)
+
+
+def _to_serializable(obj):
+    """Convert numpy/pandas objects into JSON-serializable Python types."""
+    if isinstance(obj, (np.floating, np.integer)):
+        return obj.item()
+    if isinstance(obj, (np.bool_,)):
+        return bool(obj)
+    if isinstance(obj, np.ndarray):
+        return obj.tolist()
+    if isinstance(obj, (pd.Timestamp, datetime)):
+        return obj.isoformat()
+    if isinstance(obj, dict):
+        return {k: _to_serializable(v) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [_to_serializable(v) for v in obj]
+    return obj
+
+
+def save_run_artifacts(df_eval, best_result, best_params, run_dir, feature_cols):
+    """Persist config/metrics/predictions and basic diagnostic plots for reproducibility."""
+    os.makedirs(run_dir, exist_ok=True)
+
+    # 1) Run config
+    config = {
+        "tickers": TICKERS,
+        "days_of_history": DAYS_OF_HISTORY,
+        "interval": INTERVAL,
+        "number_of_input_bars": NUMBER_OF_INPUT_BARS,
+        "target_bars": TARGET_BARS,
+        "n_splits": N_SPLITS,
+        "max_train_size": MAX_TRAIN_SIZE,
+        "n_estimators_checkpoints": N_ESTIMATORS_CHECKPOINTS,
+        "learning_rates": LEARNING_RATES,
+        "max_depths": MAX_DEPTHS,
+        "num_leaves": NUM_LEAVES,
+        "best_params": best_params,
+        "feature_count": len(feature_cols),
+    }
+    with open(os.path.join(run_dir, "config.json"), "w") as f:
+        json.dump(_to_serializable(config), f, indent=2)
+
+    # 2) Metrics
+    metrics_payload = {
+        "score": best_result["score"],
+        "grade": best_result["grade"],
+        "num_passed": best_result["num_passed"],
+        "num_primary_metrics": best_result.get("num_primary_metrics"),
+        "thresholds": best_result.get("thresholds", {}),
+        "passed": best_result.get("passed", {}),
+        "metrics": best_result.get("metrics", {}),
+    }
+    with open(os.path.join(run_dir, "metrics.json"), "w") as f:
+        json.dump(_to_serializable(metrics_payload), f, indent=2)
+
+    # 3) Predictions table
+    export_df = df_eval.copy()
+    if "predictions" in best_result:
+        export_df["pred"] = best_result["predictions"].values
+
+    export_cols = ["open_time", "target", "pred"]
+    export_cols = [c for c in export_cols if c in export_df.columns]
+    preds_df = export_df[export_cols].dropna(subset=["pred"]).copy()
+    preds_csv_path = os.path.join(run_dir, "predictions.csv")
+    preds_df.to_csv(preds_csv_path, index=False)
+
+    # 4) Scatter plot: pred vs target
+    plt.figure(figsize=(8, 8))
+    plt.scatter(preds_df["target"], preds_df["pred"], s=8, alpha=0.35)
+    lim_min = float(min(preds_df["target"].min(), preds_df["pred"].min()))
+    lim_max = float(max(preds_df["target"].max(), preds_df["pred"].max()))
+    plt.plot([lim_min, lim_max], [lim_min, lim_max], linestyle="--", linewidth=1)
+    plt.xlabel("Target (log return)")
+    plt.ylabel("Prediction (log return)")
+    plt.title("Predictions vs Target")
+    plt.tight_layout()
+    scatter_path = os.path.join(run_dir, "scatter_pred_vs_target.png")
+    plt.savefig(scatter_path, dpi=150)
+    plt.close()
+
+    # 5) Human-readable report
+    with open(os.path.join(run_dir, "report.txt"), "w") as f:
+        f.write("Allora Topic 83 Run Report\n")
+        f.write("=" * 40 + "\n")
+        f.write(f"Score: {best_result['score']:.1%} ({best_result['num_passed']}/7)\n")
+        f.write(f"Grade: {best_result['grade']}\n")
+        f.write(f"Best params: {best_params}\n\n")
+        f.write("Primary metric pass/fail:\n")
+        for metric_name, did_pass in best_result.get("passed", {}).items():
+            f.write(f"- {metric_name}: {'PASS' if did_pass else 'FAIL'}\n")
+
+    return {
+        "run_dir": run_dir,
+        "predictions_csv": preds_csv_path,
+        "scatter_png": scatter_path,
+    }
+
+# =============================================================================
+# STEP 1: Initialize Workflow
+# =============================================================================
+print("\n[1/6] Initializing workflow...")
+
+# Resolve Allora API key (env var → file → prompt).
+# Get a free key at https://developer.allora.network
+# Alternatively, set data_source="binance" below to skip the API key entirely.
+from allora_forge_builder_kit.utils import get_api_key
+api_key = get_api_key(api_key_file=os.path.join(os.path.dirname(__file__), "..", "..", ".allora_api_key"))
+
+workflow = AlloraMLWorkflow(
+    tickers=TICKERS,
+    number_of_input_bars=NUMBER_OF_INPUT_BARS,
+    target_bars=TARGET_BARS,
+    interval=INTERVAL,
+    data_source="allora",
+    api_key=api_key
+)
+
+print(f"✅ Workflow initialized")
+print(f"   Assets: {TICKERS} | Interval: {INTERVAL}")
+print(f"   Input: {NUMBER_OF_INPUT_BARS} bars → Features: {NUMBER_OF_INPUT_BARS*5}")
+print(f"   Target: {TARGET_BARS} bars ahead")
+
+# =============================================================================
+# STEP 2: Backfill Historical Data
+# =============================================================================
+print(f"\n[2/6] Backfilling {DAYS_OF_HISTORY} days of historical data...")
+
+start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)
+try:
+    workflow.backfill(start=start_date)
+    print("✅ Backfill complete")
+except Exception as e:
+    print(f"⚠️ Backfill failed: {e}")
+    print("   Will attempt to use locally cached parquet data...")
+
+# =============================================================================
+# STEP 3: Extract Features & Engineer New Features
+# =============================================================================
+print("\n[3/6] Extracting and engineering features...")
+
+try:
+    df_all = workflow.get_full_feature_target_dataframe(start_date=start_date).reset_index()
+except Exception as e:
+    raise RuntimeError(
+        f"No data available: {e}\n\n"
+        "This usually means the backfill failed (bad/missing API key) and there is "
+        "no locally cached parquet data.\n\n"
+        "Fix options:\n"
+        "  1. Set a valid ALLORA_API_KEY (free at https://developer.allora.network)\n"
+        "  2. Use data_source='binance' in AlloraMLWorkflow() to skip the API key\n"
+    ) from e
+
+# Feature Engineering: Add log returns to base features
+# For detailed TA indicators and visualizations, see: feature_engineering_example.py
+
+def engineer_returns(row):
+    """Add log return features over multiple horizons (no data leakage - same row only)"""
+    # NOTE: Base features are already normalized (z-scored) by the workflow
+    closes = np.array([row[f'feature_close_{i}'] for i in range(NUMBER_OF_INPUT_BARS)])
+    
+    # Log returns over different time horizons
+    returns = {}
+    returns['log_return_1h'] = np.log(closes[-1] + 1e-8) - np.log(closes[-2] + 1e-8) if NUMBER_OF_INPUT_BARS >= 2 else 0
+    returns['log_return_6h'] = np.log(closes[-1] + 1e-8) - np.log(closes[-7] + 1e-8) if NUMBER_OF_INPUT_BARS >= 7 else 0
+    returns['log_return_12h'] = np.log(closes[-1] + 1e-8) - np.log(closes[-13] + 1e-8) if NUMBER_OF_INPUT_BARS >= 13 else 0
+    returns['log_return_24h'] = np.log(closes[-1] + 1e-8) - np.log(closes[-25] + 1e-8) if NUMBER_OF_INPUT_BARS >= 25 else 0
+    
+    return pd.Series(returns)
+
+# Get base features
+base_feature_cols = [col for col in df_all.columns if col.startswith('feature_')]
+
+# Apply feature engineering
+print("   Engineering log return features...")
+engineered_features = df_all.apply(engineer_returns, axis=1)
+df_all = pd.concat([df_all, engineered_features], axis=1)
+
+# Use base features + engineered returns
+feature_cols = base_feature_cols + list(engineered_features.columns)
+df_all = df_all.dropna(subset=feature_cols + ['target'])
+
+print(f"✅ Dataset: {len(df_all):,} samples ({df_all['open_time'].min().date()} to {df_all['open_time'].max().date()})")
+print(f"   Features: {len(base_feature_cols)} base + {len(engineered_features.columns)} returns = {len(feature_cols)} total")
+print(f"   📚 See feature_engineering_example.py for more TA indicators")
+
+# Setup time series cross-validation
+tscv = TimeSeriesSplit(
+    n_splits=N_SPLITS, 
+    gap=TARGET_BARS, 
+    max_train_size=MAX_TRAIN_SIZE
+)
+
+print(f"✅ Walk-forward CV: {N_SPLITS} splits, {TARGET_BARS}-bar embargo")
+for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(df_all)):
+    print(f"   Fold {fold_idx+1}: Train={len(train_idx):,}, Test={len(test_idx):,}")
+
+# =============================================================================
+# STEP 4: Grid Search with Walk-Forward Cross-Validation
+# =============================================================================
+print("\n[4/6] Running grid search...")
+
+results = []
+evaluator = PerformanceEvaluator()
+config_num = 0
+
+for lr in LEARNING_RATES:
+    for depth in MAX_DEPTHS:
+        for leaves in NUM_LEAVES:
+            
+            # Train once with max trees, evaluate at checkpoints
+            fold_models = []
+            for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(df_all)):
+                X_train = df_all.iloc[train_idx][feature_cols]
+                y_train = df_all.iloc[train_idx]['target']
+                
+                lgb = LGBMRegressor(
+                    n_estimators=N_ESTIMATORS_MAX,
+                    learning_rate=lr,
+                    max_depth=depth,
+                    num_leaves=leaves,
+                    random_state=42,
+                    verbose=-1
+                )
+                lgb.fit(X_train, y_train)
+                fold_models.append((lgb, test_idx))
+            
+            # Evaluate at tree count checkpoints
+            for n_est in N_ESTIMATORS_CHECKPOINTS:
+                config_num += 1
+                df_all['pred'] = np.nan
+                
+                # Generate predictions using first n_est trees
+                for lgb, test_idx in fold_models:
+                    X_test = df_all.iloc[test_idx][feature_cols]
+                    preds = lgb.predict(X_test, num_iteration=n_est)
+                    df_all.iloc[test_idx, df_all.columns.get_loc('pred')] = preds
+                
+                # Evaluate
+                valid_mask = ~df_all['pred'].isna()
+                metrics = evaluator.evaluate(
+                    y_true=df_all.loc[valid_mask, 'target'],
+                    y_pred=df_all.loc[valid_mask, 'pred']
+                )
+                
+                # Store results
+                results.append({
+                    'config_num': config_num,
+                    'n_estimators': n_est,
+                    'learning_rate': lr,
+                    'max_depth': depth,
+                    'num_leaves': leaves,
+                    'predictions': df_all['pred'].copy(),
+                    **metrics
+                })
+                
+                print(f"   [{config_num:2d}] n={n_est:4d}, lr={lr:.2f}, d={depth}, l={leaves:2d} -> "
+                      f"{metrics['num_passed']}/7 ({metrics['score']:.1%} - {metrics['grade']})")
+
+# Analyze results
+results_df = pd.DataFrame([{k: v for k, v in r.items() if k != 'predictions'} for r in results])
+results_df = results_df.sort_values(['num_passed', 'score'], ascending=[False, False])
+
+print(f"\n✅ Tested {len(results)} configurations")
+print(f"\n   Top 5 models:")
+top5_cols = ['config_num', 'n_estimators', 'learning_rate', 'max_depth', 'num_leaves', 'num_passed', 'score']
+print(results_df[top5_cols].head().to_string(index=False))
+
+# Select best model
+best_result = results[results_df.iloc[0]['config_num'] - 1]
+best_params = {k: best_result[k] for k in ['n_estimators', 'learning_rate', 'max_depth', 'num_leaves']}
+
+print(f"\nBest: Config #{best_result['config_num']}")
+print(f"   {best_result['num_passed']}/7 points ({best_result['score']:.1%}) | "
+      f"n={best_params['n_estimators']}, lr={best_params['learning_rate']}, d={best_params['max_depth']}, l={best_params['num_leaves']}")
+
+# =============================================================================
+# STEP 5: Evaluate Best Model
+# =============================================================================
+print("\n[5/6] Detailed evaluation...")
+print("="*80)
+evaluator.print_report(best_result, detailed=False)
+print("="*80)
+
+# Save reproducibility artifacts + diagnostic plot
+run_timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
+run_dir = os.path.join(os.path.dirname(__file__), "runs", run_timestamp)
+artifacts = save_run_artifacts(
+    df_eval=df_all,
+    best_result=best_result,
+    best_params=best_params,
+    run_dir=run_dir,
+    feature_cols=feature_cols,
+)
+
+# =============================================================================
+# STEP 6: Train Production Model
+# =============================================================================
+print("\n[6/6] Training production model...")
+
+final_model = LGBMRegressor(
+    n_estimators=best_params['n_estimators'],
+    learning_rate=best_params['learning_rate'],
+    max_depth=best_params['max_depth'],
+    num_leaves=best_params['num_leaves'],
+    random_state=42,
+    verbose=-1
+)
+final_model.fit(df_all[feature_cols], df_all['target'])
+print(f"✅ Final model trained on {len(df_all):,} samples")
+
+def predict(nonce: int = None) -> float:
+    """
+    Predict BTC/USD price 8 hours into the future.
+    
+    Args:
+        nonce: Block nonce from Allora SDK (unused)
+    
+    Returns:
+        float: Predicted BTC price in USD
+    """
+    # Get live features from workflow
+    live_row = workflow.get_live_features(ticker=TICKERS[0])
+    
+    if live_row is None or len(live_row) == 0:
+        raise ValueError("Could not get live features")
+    
+    # Engineer return features from live data (same as training)
+    live_returns = engineer_returns(live_row.iloc[0])
+    
+    # Combine base features + engineered returns
+    live_features = pd.concat([live_row[base_feature_cols].iloc[0], live_returns])
+    
+    # Get current price from live feature context (remote-only path)
+    current_price = float(live_row.attrs.get("current_price", np.nan))
+    if not np.isfinite(current_price) or current_price <= 0:
+        # Fallback to live snapshot (still remote API; no local parquet)
+        snap = workflow._dm.get_live_snapshot(TICKERS)
+        if snap is not None and len(snap) > 0 and "close" in snap.columns:
+            current_price = float(snap["close"].iloc[-1])
+
+    if not np.isfinite(current_price) or current_price <= 0:
+        raise ValueError(f"Invalid current price for inference: {current_price}")
+    
+    # Predict log return
+    predicted_log_return = final_model.predict(live_features[feature_cols].values.reshape(1, -1))[0]
+    
+    # Convert log return to price
+    # Log-return topic: return the log return directly
+    
+    print(f"\nLive Prediction: {predicted_log_return:+.6f} ({predicted_log_return:+.4f} log return)")
+    
+    return float(predicted_log_return)
+
+# Test and save
+print("\n🧪 Testing prediction...")
+test_prediction = predict()
+
+with open("predict_83.pkl", "wb") as f:
+    cloudpickle.dump(predict, f)
+
+print("\n" + "="*80)
+print("COMPLETE!")
+print("="*80)
+print(f"{len(feature_cols)} features | {best_result['num_passed']}/7 points ({best_result['score']:.1%})")
+print("Saved to predict_83.pkl")
+print(f"Run artifacts: {artifacts['run_dir']}")
+print(f"- Predictions: {artifacts['predictions_csv']}")
+print(f"- Scatter plot: {artifacts['scatter_png']}")
+print("="*80)
+print("\nDeploy: python deploy_worker.py")
+
diff --git a/notebooks/testnet/topic_84_eth_8h_logreturn/example.py b/notebooks/testnet/topic_84_eth_8h_logreturn/example.py
new file mode 100644
index 0000000..f6bedd7
--- /dev/null
+++ b/notebooks/testnet/topic_84_eth_8h_logreturn/example.py
@@ -0,0 +1,427 @@
+#!/usr/bin/env python3
+"""
+================================================================================
+Allora Forge Builder Kit v3.0 - Topic 84 ETH/USD Log-Return Prediction Walkthrough
+================================================================================
+
+This walkthrough demonstrates 8-hour ETH/USD price prediction using the 
+Allora ML Workflow Kit with base features and LightGBM.
+
+Data is sourced from the Atlas data service (Tiingo 1-min candles).
+
+================================================================================
+"""
+
+import numpy as np
+import pandas as pd
+import os
+import json
+from datetime import datetime, timedelta, timezone
+from sklearn.model_selection import TimeSeriesSplit
+from lightgbm import LGBMRegressor
+import matplotlib.pyplot as plt
+import cloudpickle
+from allora_forge_builder_kit import AlloraMLWorkflow, PerformanceEvaluator
+
+# =============================================================================
+# EXPERIMENT CONFIGURATION
+# =============================================================================
+
+# Data Configuration
+TICKERS = ["ethusd"]
+DAYS_OF_HISTORY = 500
+INTERVAL = "5m"
+
+# Feature Configuration
+NUMBER_OF_INPUT_BARS = 288  # Number of hourly bars for input features
+TARGET_BARS = 96           # Predict 24 bars (hours) ahead
+
+# Cross-Validation Configuration
+N_SPLITS = 3               # Number of CV folds
+MAX_TRAIN_SIZE = 100_000_000  # Maximum training samples per fold
+
+# Model Configuration
+N_ESTIMATORS_MAX = 500    # Train with max trees, evaluate at checkpoints
+N_ESTIMATORS_CHECKPOINTS = [100, 300, 500]
+LEARNING_RATES = [0.01, 0.05, 0.1]
+MAX_DEPTHS = [3, 5, 7]
+NUM_LEAVES = [15, 31, 63]
+
+# =============================================================================
+# SCRIPT START
+# =============================================================================
+
+print("="*80)
+print("Allora Forge Builder Kit v3.0 - Topic 84 Walkthrough")
+print("="*80)
+
+
+def _to_serializable(obj):
+    """Convert numpy/pandas objects into JSON-serializable Python types."""
+    if isinstance(obj, (np.floating, np.integer)):
+        return obj.item()
+    if isinstance(obj, (np.bool_,)):
+        return bool(obj)
+    if isinstance(obj, np.ndarray):
+        return obj.tolist()
+    if isinstance(obj, (pd.Timestamp, datetime)):
+        return obj.isoformat()
+    if isinstance(obj, dict):
+        return {k: _to_serializable(v) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [_to_serializable(v) for v in obj]
+    return obj
+
+
+def save_run_artifacts(df_eval, best_result, best_params, run_dir, feature_cols):
+    """Persist config/metrics/predictions and basic diagnostic plots for reproducibility."""
+    os.makedirs(run_dir, exist_ok=True)
+
+    # 1) Run config
+    config = {
+        "tickers": TICKERS,
+        "days_of_history": DAYS_OF_HISTORY,
+        "interval": INTERVAL,
+        "number_of_input_bars": NUMBER_OF_INPUT_BARS,
+        "target_bars": TARGET_BARS,
+        "n_splits": N_SPLITS,
+        "max_train_size": MAX_TRAIN_SIZE,
+        "n_estimators_checkpoints": N_ESTIMATORS_CHECKPOINTS,
+        "learning_rates": LEARNING_RATES,
+        "max_depths": MAX_DEPTHS,
+        "num_leaves": NUM_LEAVES,
+        "best_params": best_params,
+        "feature_count": len(feature_cols),
+    }
+    with open(os.path.join(run_dir, "config.json"), "w") as f:
+        json.dump(_to_serializable(config), f, indent=2)
+
+    # 2) Metrics
+    metrics_payload = {
+        "score": best_result["score"],
+        "grade": best_result["grade"],
+        "num_passed": best_result["num_passed"],
+        "num_primary_metrics": best_result.get("num_primary_metrics"),
+        "thresholds": best_result.get("thresholds", {}),
+        "passed": best_result.get("passed", {}),
+        "metrics": best_result.get("metrics", {}),
+    }
+    with open(os.path.join(run_dir, "metrics.json"), "w") as f:
+        json.dump(_to_serializable(metrics_payload), f, indent=2)
+
+    # 3) Predictions table
+    export_df = df_eval.copy()
+    if "predictions" in best_result:
+        export_df["pred"] = best_result["predictions"].values
+
+    export_cols = ["open_time", "target", "pred"]
+    export_cols = [c for c in export_cols if c in export_df.columns]
+    preds_df = export_df[export_cols].dropna(subset=["pred"]).copy()
+    preds_csv_path = os.path.join(run_dir, "predictions.csv")
+    preds_df.to_csv(preds_csv_path, index=False)
+
+    # 4) Scatter plot: pred vs target
+    plt.figure(figsize=(8, 8))
+    plt.scatter(preds_df["target"], preds_df["pred"], s=8, alpha=0.35)
+    lim_min = float(min(preds_df["target"].min(), preds_df["pred"].min()))
+    lim_max = float(max(preds_df["target"].max(), preds_df["pred"].max()))
+    plt.plot([lim_min, lim_max], [lim_min, lim_max], linestyle="--", linewidth=1)
+    plt.xlabel("Target (log return)")
+    plt.ylabel("Prediction (log return)")
+    plt.title("Predictions vs Target")
+    plt.tight_layout()
+    scatter_path = os.path.join(run_dir, "scatter_pred_vs_target.png")
+    plt.savefig(scatter_path, dpi=150)
+    plt.close()
+
+    # 5) Human-readable report
+    with open(os.path.join(run_dir, "report.txt"), "w") as f:
+        f.write("Allora Topic 84 Run Report\n")
+        f.write("=" * 40 + "\n")
+        f.write(f"Score: {best_result['score']:.1%} ({best_result['num_passed']}/7)\n")
+        f.write(f"Grade: {best_result['grade']}\n")
+        f.write(f"Best params: {best_params}\n\n")
+        f.write("Primary metric pass/fail:\n")
+        for metric_name, did_pass in best_result.get("passed", {}).items():
+            f.write(f"- {metric_name}: {'PASS' if did_pass else 'FAIL'}\n")
+
+    return {
+        "run_dir": run_dir,
+        "predictions_csv": preds_csv_path,
+        "scatter_png": scatter_path,
+    }
+
+# =============================================================================
+# STEP 1: Initialize Workflow
+# =============================================================================
+print("\n[1/6] Initializing workflow...")
+
+# Resolve Allora API key (env var → file → prompt).
+# Get a free key at https://developer.allora.network
+# Alternatively, set data_source="binance" below to skip the API key entirely.
+from allora_forge_builder_kit.utils import get_api_key
+api_key = get_api_key(api_key_file=os.path.join(os.path.dirname(__file__), "..", "..", ".allora_api_key"))
+
+workflow = AlloraMLWorkflow(
+    tickers=TICKERS,
+    number_of_input_bars=NUMBER_OF_INPUT_BARS,
+    target_bars=TARGET_BARS,
+    interval=INTERVAL,
+    data_source="allora",
+    api_key=api_key
+)
+
+print(f"✅ Workflow initialized")
+print(f"   Assets: {TICKERS} | Interval: {INTERVAL}")
+print(f"   Input: {NUMBER_OF_INPUT_BARS} bars → Features: {NUMBER_OF_INPUT_BARS*5}")
+print(f"   Target: {TARGET_BARS} bars ahead")
+
+# =============================================================================
+# STEP 2: Backfill Historical Data
+# =============================================================================
+print(f"\n[2/6] Backfilling {DAYS_OF_HISTORY} days of historical data...")
+
+start_date = datetime.now(timezone.utc) - timedelta(days=DAYS_OF_HISTORY)
+try:
+    workflow.backfill(start=start_date)
+    print("✅ Backfill complete")
+except Exception as e:
+    print(f"⚠️ Backfill failed: {e}")
+    print("   Will attempt to use locally cached parquet data...")
+
+# =============================================================================
+# STEP 3: Extract Features & Engineer New Features
+# =============================================================================
+print("\n[3/6] Extracting and engineering features...")
+
+try:
+    df_all = workflow.get_full_feature_target_dataframe(start_date=start_date).reset_index()
+except Exception as e:
+    raise RuntimeError(
+        f"No data available: {e}\n\n"
+        "This usually means the backfill failed (bad/missing API key) and there is "
+        "no locally cached parquet data.\n\n"
+        "Fix options:\n"
+        "  1. Set a valid ALLORA_API_KEY (free at https://developer.allora.network)\n"
+        "  2. Use data_source='binance' in AlloraMLWorkflow() to skip the API key\n"
+    ) from e
+
+# Feature Engineering: Add log returns to base features
+# For detailed TA indicators and visualizations, see: feature_engineering_example.py
+
+def engineer_returns(row):
+    """Add log return features over multiple horizons (no data leakage - same row only)"""
+    # NOTE: Base features are already normalized (z-scored) by the workflow
+    closes = np.array([row[f'feature_close_{i}'] for i in range(NUMBER_OF_INPUT_BARS)])
+    
+    # Log returns over different time horizons
+    returns = {}
+    returns['log_return_1h'] = np.log(closes[-1] + 1e-8) - np.log(closes[-2] + 1e-8) if NUMBER_OF_INPUT_BARS >= 2 else 0
+    returns['log_return_6h'] = np.log(closes[-1] + 1e-8) - np.log(closes[-7] + 1e-8) if NUMBER_OF_INPUT_BARS >= 7 else 0
+    returns['log_return_12h'] = np.log(closes[-1] + 1e-8) - np.log(closes[-13] + 1e-8) if NUMBER_OF_INPUT_BARS >= 13 else 0
+    returns['log_return_24h'] = np.log(closes[-1] + 1e-8) - np.log(closes[-25] + 1e-8) if NUMBER_OF_INPUT_BARS >= 25 else 0
+    
+    return pd.Series(returns)
+
+# Get base features
+base_feature_cols = [col for col in df_all.columns if col.startswith('feature_')]
+
+# Apply feature engineering
+print("   Engineering log return features...")
+engineered_features = df_all.apply(engineer_returns, axis=1)
+df_all = pd.concat([df_all, engineered_features], axis=1)
+
+# Use base features + engineered returns
+feature_cols = base_feature_cols + list(engineered_features.columns)
+df_all = df_all.dropna(subset=feature_cols + ['target'])
+
+print(f"✅ Dataset: {len(df_all):,} samples ({df_all['open_time'].min().date()} to {df_all['open_time'].max().date()})")
+print(f"   Features: {len(base_feature_cols)} base + {len(engineered_features.columns)} returns = {len(feature_cols)} total")
+print(f"   📚 See feature_engineering_example.py for more TA indicators")
+
+# Setup time series cross-validation
+tscv = TimeSeriesSplit(
+    n_splits=N_SPLITS, 
+    gap=TARGET_BARS, 
+    max_train_size=MAX_TRAIN_SIZE
+)
+
+print(f"✅ Walk-forward CV: {N_SPLITS} splits, {TARGET_BARS}-bar embargo")
+for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(df_all)):
+    print(f"   Fold {fold_idx+1}: Train={len(train_idx):,}, Test={len(test_idx):,}")
+
+# =============================================================================
+# STEP 4: Grid Search with Walk-Forward Cross-Validation
+# =============================================================================
+print("\n[4/6] Running grid search...")
+
+results = []
+evaluator = PerformanceEvaluator()
+config_num = 0
+
+for lr in LEARNING_RATES:
+    for depth in MAX_DEPTHS:
+        for leaves in NUM_LEAVES:
+            
+            # Train once with max trees, evaluate at checkpoints
+            fold_models = []
+            for fold_idx, (train_idx, test_idx) in enumerate(tscv.split(df_all)):
+                X_train = df_all.iloc[train_idx][feature_cols]
+                y_train = df_all.iloc[train_idx]['target']
+                
+                lgb = LGBMRegressor(
+                    n_estimators=N_ESTIMATORS_MAX,
+                    learning_rate=lr,
+                    max_depth=depth,
+                    num_leaves=leaves,
+                    random_state=42,
+                    verbose=-1
+                )
+                lgb.fit(X_train, y_train)
+                fold_models.append((lgb, test_idx))
+            
+            # Evaluate at tree count checkpoints
+            for n_est in N_ESTIMATORS_CHECKPOINTS:
+                config_num += 1
+                df_all['pred'] = np.nan
+                
+                # Generate predictions using first n_est trees
+                for lgb, test_idx in fold_models:
+                    X_test = df_all.iloc[test_idx][feature_cols]
+                    preds = lgb.predict(X_test, num_iteration=n_est)
+                    df_all.iloc[test_idx, df_all.columns.get_loc('pred')] = preds
+                
+                # Evaluate
+                valid_mask = ~df_all['pred'].isna()
+                metrics = evaluator.evaluate(
+                    y_true=df_all.loc[valid_mask, 'target'],
+                    y_pred=df_all.loc[valid_mask, 'pred']
+                )
+                
+                # Store results
+                results.append({
+                    'config_num': config_num,
+                    'n_estimators': n_est,
+                    'learning_rate': lr,
+                    'max_depth': depth,
+                    'num_leaves': leaves,
+                    'predictions': df_all['pred'].copy(),
+                    **metrics
+                })
+                
+                print(f"   [{config_num:2d}] n={n_est:4d}, lr={lr:.2f}, d={depth}, l={leaves:2d} -> "
+                      f"{metrics['num_passed']}/7 ({metrics['score']:.1%} - {metrics['grade']})")
+
+# Analyze results
+results_df = pd.DataFrame([{k: v for k, v in r.items() if k != 'predictions'} for r in results])
+results_df = results_df.sort_values(['num_passed', 'score'], ascending=[False, False])
+
+print(f"\n✅ Tested {len(results)} configurations")
+print(f"\n   Top 5 models:")
+top5_cols = ['config_num', 'n_estimators', 'learning_rate', 'max_depth', 'num_leaves', 'num_passed', 'score']
+print(results_df[top5_cols].head().to_string(index=False))
+
+# Select best model
+best_result = results[results_df.iloc[0]['config_num'] - 1]
+best_params = {k: best_result[k] for k in ['n_estimators', 'learning_rate', 'max_depth', 'num_leaves']}
+
+print(f"\nBest: Config #{best_result['config_num']}")
+print(f"   {best_result['num_passed']}/7 points ({best_result['score']:.1%}) | "
+      f"n={best_params['n_estimators']}, lr={best_params['learning_rate']}, d={best_params['max_depth']}, l={best_params['num_leaves']}")
+
+# =============================================================================
+# STEP 5: Evaluate Best Model
+# =============================================================================
+print("\n[5/6] Detailed evaluation...")
+print("="*80)
+evaluator.print_report(best_result, detailed=False)
+print("="*80)
+
+# Save reproducibility artifacts + diagnostic plot
+run_timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
+run_dir = os.path.join(os.path.dirname(__file__), "runs", run_timestamp)
+artifacts = save_run_artifacts(
+    df_eval=df_all,
+    best_result=best_result,
+    best_params=best_params,
+    run_dir=run_dir,
+    feature_cols=feature_cols,
+)
+
+# =============================================================================
+# STEP 6: Train Production Model
+# =============================================================================
+print("\n[6/6] Training production model...")
+
+final_model = LGBMRegressor(
+    n_estimators=best_params['n_estimators'],
+    learning_rate=best_params['learning_rate'],
+    max_depth=best_params['max_depth'],
+    num_leaves=best_params['num_leaves'],
+    random_state=42,
+    verbose=-1
+)
+final_model.fit(df_all[feature_cols], df_all['target'])
+print(f"✅ Final model trained on {len(df_all):,} samples")
+
+def predict(nonce: int = None) -> float:
+    """
+    Predict ETH/USD price 8 hours into the future.
+    
+    Args:
+        nonce: Block nonce from Allora SDK (unused)
+    
+    Returns:
+        float: Predicted BTC price in USD
+    """
+    # Get live features from workflow
+    live_row = workflow.get_live_features(ticker=TICKERS[0])
+    
+    if live_row is None or len(live_row) == 0:
+        raise ValueError("Could not get live features")
+    
+    # Engineer return features from live data (same as training)
+    live_returns = engineer_returns(live_row.iloc[0])
+    
+    # Combine base features + engineered returns
+    live_features = pd.concat([live_row[base_feature_cols].iloc[0], live_returns])
+    
+    # Get current price from live feature context (remote-only path)
+    current_price = float(live_row.attrs.get("current_price", np.nan))
+    if not np.isfinite(current_price) or current_price <= 0:
+        # Fallback to live snapshot (still remote API; no local parquet)
+        snap = workflow._dm.get_live_snapshot(TICKERS)
+        if snap is not None and len(snap) > 0 and "close" in snap.columns:
+            current_price = float(snap["close"].iloc[-1])
+
+    if not np.isfinite(current_price) or current_price <= 0:
+        raise ValueError(f"Invalid current price for inference: {current_price}")
+    
+    # Predict log return
+    predicted_log_return = final_model.predict(live_features[feature_cols].values.reshape(1, -1))[0]
+    
+    # Convert log return to price
+    # Log-return topic: return the log return directly
+    
+    print(f"\nLive Prediction: {predicted_log_return:+.6f} ({predicted_log_return:+.4f} log return)")
+    
+    return float(predicted_log_return)
+
+# Test and save
+print("\n🧪 Testing prediction...")
+test_prediction = predict()
+
+with open("predict_84.pkl", "wb") as f:
+    cloudpickle.dump(predict, f)
+
+print("\n" + "="*80)
+print("COMPLETE!")
+print("="*80)
+print(f"{len(feature_cols)} features | {best_result['num_passed']}/7 points ({best_result['score']:.1%})")
+print("Saved to predict_84.pkl")
+print(f"Run artifacts: {artifacts['run_dir']}")
+print(f"- Predictions: {artifacts['predictions_csv']}")
+print(f"- Scatter plot: {artifacts['scatter_png']}")
+print("="*80)
+print("\nDeploy: python deploy_worker.py")
+
diff --git a/skills/allora-model-builder/SKILL.md b/skills/allora-model-builder/SKILL.md
index a5fb9f2..04477da 100644
--- a/skills/allora-model-builder/SKILL.md
+++ b/skills/allora-model-builder/SKILL.md
@@ -120,6 +120,64 @@ worker = AlloraWorker(
   Pearson p-value, WRMSE improvement, CZAR improvement) scored out of 7.
 - For **price topics**, return an absolute price.
   For **log-return topics**, return the log return.
+- For **volatility topics**, return the predicted std of 1-minute log returns
+  over the horizon (a non-negative float). Use `target_type="volatility"`.
+
+## Volatility target workflow
+
+For topics that predict realised volatility (e.g. Topic 79):
+
+```python
+workflow = AlloraMLWorkflow(
+    tickers=["btcusd"],
+    number_of_input_bars=15,   # 15 minutes of 1-min bars
+    target_bars=15,            # 15-minute volatility horizon
+    interval="1m",             # base data interval
+    target_type="volatility",  # std of log returns over horizon
+    data_source="allora",
+    api_key="UP-...",
+)
+```
+
+The target is defined as:
+```
+r_i = log(close[t+i] / close[t+i-1])  for i in 1..target_bars
+target[t] = std(r_1, ..., r_{target_bars})
+```
+
+The predict function returns the volatility directly (no price conversion):
+```python
+def predict(nonce=None):
+    features = workflow.get_live_features("btcusd")
+    vol = model.predict(features[feature_cols].values.reshape(1, -1))[0]
+    return float(max(0.0, vol))  # volatility is non-negative
+```
+
+### Best-performing approach: log-space prediction
+
+Predicting `log(vol)` and transforming back with bias correction produces
+better calibrated predictions that match the target distribution:
+
+```python
+import numpy as np
+
+# Train in log-space
+y_train_log = np.log(y_train + 1e-10)
+model.fit(X_train, y_train_log)
+
+# Bias correction: exp(E[log(x)]) underestimates E[x]
+residuals = y_train_log - model.predict(X_train)
+bias_correction = np.exp(0.5 * np.var(residuals))
+
+def predict(nonce=None):
+    features = workflow.get_live_features("btcusd")
+    log_pred = model.predict(features[feature_cols].values.reshape(1, -1))[0]
+    vol = np.exp(log_pred) * bias_correction
+    return float(max(0.0, vol))
+```
+
+Volatility topics: 79 (BTC), 80 (ETH), 81 (XRP), 82 (SOL).
+See `notebooks/topic_79_btc_vol/topic_79_model_e_calibrated.py` for the full implementation.
 
 ## Base feature normalization
 
diff --git a/tests/README.md b/tests/README.md
index d37d2c9..b1c111d 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -12,7 +12,7 @@ Comprehensive test suite for the data manager architecture and ML workflow.
 - ✅ AtlasDataManager initialization
 - ✅ Data format parsing
 - ✅ Storage separation
-- ✅ Workflow integration
+- ✅ Workflow integration (log-return and volatility target types)
 
 ### Integration Tests (Requires network + API keys)
 - ✅ Binance backfill and load
diff --git a/tests/test_volatility_target.py b/tests/test_volatility_target.py
new file mode 100644
index 0000000..50bc419
--- /dev/null
+++ b/tests/test_volatility_target.py
@@ -0,0 +1,145 @@
+"""
+Tests for the volatility target computation.
+
+Verifies that target_type="volatility" correctly computes the standard
+deviation of consecutive 1-minute log returns over the target horizon.
+"""
+
+import numpy as np
+import polars as pl
+import pytest
+from datetime import datetime, timezone, timedelta
+
+from allora_forge_builder_kit.workflow import AlloraMLWorkflow
+
+
+@pytest.fixture
+def synthetic_ohlcv():
+    """Create synthetic 1-minute OHLCV data with known properties."""
+    np.random.seed(42)
+    n = 50
+    times = [
+        datetime(2025, 1, 1, tzinfo=timezone.utc) + timedelta(minutes=i)
+        for i in range(n)
+    ]
+    prices = 100.0 * np.exp(np.cumsum(np.random.normal(0, 0.001, n)))
+    return pl.DataFrame(
+        {
+            "open_time": times,
+            "open": prices,
+            "high": prices * 1.001,
+            "low": prices * 0.999,
+            "close": prices,
+            "volume": np.ones(n) * 1000.0,
+        }
+    ), prices
+
+
+def _make_workflow(target_bars=15):
+    """Create a bare workflow instance for calling target methods."""
+    wf = AlloraMLWorkflow.__new__(AlloraMLWorkflow)
+    wf.target_bars = target_bars
+    return wf
+
+
+class TestVolatilityTargetComputation:
+    """Tests for compute_volatility_target_polars."""
+
+    def test_matches_manual_calculation(self, synthetic_ohlcv):
+        """Verify each row matches manually computed std of forward log returns."""
+        df, prices = synthetic_ohlcv
+        target_bars = 15
+        wf = _make_workflow(target_bars)
+
+        result = wf.compute_volatility_target_polars(df, target_bars=target_bars)
+
+        for row_idx in range(len(prices) - target_bars):
+            window_prices = prices[row_idx : row_idx + target_bars + 1]
+            log_rets = np.diff(np.log(window_prices))
+            expected = np.std(log_rets, ddof=1)
+            computed = result["target"][row_idx]
+            assert np.isclose(expected, computed, rtol=1e-6), (
+                f"Row {row_idx}: expected={expected:.10f}, got={computed:.10f}"
+            )
+
+    def test_trailing_rows_are_null(self, synthetic_ohlcv):
+        """Rows without a full forward window should have null targets."""
+        df, _ = synthetic_ohlcv
+        target_bars = 15
+        wf = _make_workflow(target_bars)
+
+        result = wf.compute_volatility_target_polars(df, target_bars=target_bars)
+        null_count = result["target"].null_count()
+        assert null_count == target_bars
+
+    def test_all_valid_targets_are_non_negative(self, synthetic_ohlcv):
+        """Volatility (std) must be non-negative."""
+        df, _ = synthetic_ohlcv
+        wf = _make_workflow(15)
+
+        result = wf.compute_volatility_target_polars(df, target_bars=15)
+        valid = result["target"].drop_nulls()
+        assert (valid >= 0).all()
+
+    def test_different_horizon_sizes(self, synthetic_ohlcv):
+        """Verify correctness with different target_bars values."""
+        df, prices = synthetic_ohlcv
+
+        for target_bars in [3, 5, 10, 20]:
+            wf = _make_workflow(target_bars)
+            result = wf.compute_volatility_target_polars(df, target_bars=target_bars)
+
+            # Check first valid row
+            if len(prices) > target_bars:
+                window_prices = prices[0 : target_bars + 1]
+                log_rets = np.diff(np.log(window_prices))
+                expected = np.std(log_rets, ddof=1)
+                computed = result["target"][0]
+                assert np.isclose(expected, computed, rtol=1e-6), (
+                    f"target_bars={target_bars}: expected={expected}, got={computed}"
+                )
+
+
+class TestTargetTypeParameter:
+    """Tests for the target_type parameter on AlloraMLWorkflow."""
+
+    def test_default_is_log_return(self):
+        """Default target_type should be 'log_return'."""
+        wf = AlloraMLWorkflow(
+            tickers=["btcusd"],
+            number_of_input_bars=15,
+            target_bars=15,
+            data_source="binance",
+        )
+        assert wf.target_type == "log_return"
+
+    def test_volatility_accepted(self):
+        """target_type='volatility' should be accepted."""
+        wf = AlloraMLWorkflow(
+            tickers=["btcusd"],
+            number_of_input_bars=15,
+            target_bars=15,
+            target_type="volatility",
+            data_source="binance",
+        )
+        assert wf.target_type == "volatility"
+
+    def test_invalid_target_type_raises(self):
+        """Invalid target_type should raise ValueError."""
+        with pytest.raises(ValueError, match="target_type must be one of"):
+            AlloraMLWorkflow(
+                tickers=["btcusd"],
+                number_of_input_bars=15,
+                target_bars=15,
+                target_type="invalid",
+                data_source="binance",
+            )
+
+    def test_log_return_target_unchanged(self, synthetic_ohlcv):
+        """Existing log-return target should still work identically."""
+        df, prices = synthetic_ohlcv
+        wf = _make_workflow(15)
+
+        result = wf.compute_target_polars(df, target_bars=15)
+        expected = np.log(prices[15] / prices[0])
+        assert np.isclose(result["target"][0], expected, rtol=1e-6)