allora-network · jefferythewind · Jun 10, 2026
diff --git a/.gitignore b/.gitignore
@@ -75,3 +75,4 @@ managed_artifacts/
 artifacts/
 notebooks/artifacts/
 notebooks/runs/
+**/runs/
diff --git a/AGENTS.md b/AGENTS.md
@@ -39,8 +39,14 @@ In one working session, get a user from clone to live worker submissions with vi
 Run one of the whitelist-free examples to train + evaluate + save `predict.pkl`:
 
 ```bash
-python notebooks/example_topic_69_bitcoin_walkthrough.py
-python notebooks/example_topic_77_bitcoin_5min_walkthrough.py
+python notebooks/example_topic_69_bitcoin_walkthrough.py   # price prediction (1h bars)
+python notebooks/example_topic_77_bitcoin_5min_walkthrough.py  # price prediction (5m bars)
+
+# Volatility topics (1m bars, std of log returns over 15-min horizon)
+python notebooks/testnet/topic_79_btc_vol/topic_79_model_e_calibrated.py  # BTC best model
+python notebooks/testnet/topic_80_eth_vol/topic_80_model_e_calibrated.py  # ETH
+python notebooks/testnet/topic_81_xrp_vol/topic_81_model_e_calibrated.py  # XRP
+python notebooks/testnet/topic_82_sol_vol/topic_82_model_e_calibrated.py  # SOL
 ```
 
 Then deploy. The deploy scripts use `WorkerManager` internally — wallet creation,
@@ -108,6 +114,12 @@ All base features are therefore **ratios**, not raw prices. Any additional engin
 Before deployment, verify topic prediction format:
 - **Price topic** → absolute price prediction
 - **Log-return topic** → `log(future/current)` prediction
+- **Volatility topic** → std of 1-minute log returns over the horizon (non-negative float)
+
+## Volatility topics
+Topics 79–82 predict 15-minute realised volatility for BTC, ETH, XRP, SOL.
+Best model architecture: Model E (log-space prediction + bias correction + GARCH features).
+Scripts are organized in `notebooks/topic_{id}_{asset}_vol/` subdirectories.
 
 ## Repo hygiene rules
 - Never commit secrets or keys.

diff --git a/README.md b/README.md
@@ -99,6 +99,9 @@ python example_topic_69_bitcoin_walkthrough.py
 
 # Topic 77 — 5-min BTC/USD price prediction (5m bars, ~2 min)
 python example_topic_77_bitcoin_5min_walkthrough.py
+
+# Topic 79 — 15-min BTC/USD volatility prediction (1m bars, ~5 min)
+python example_topic_79_btc_volatility_walkthrough.py
 ```
 
 Each script backfills historical data, engineers features, trains and evaluates a model, and saves a `predict.pkl` artifact.
@@ -182,18 +185,24 @@ for t in d.get_all_topics():
 
 Playground topics (testnet only) are the recommended starting point — no whitelist required.
 
-| Testnet ID | Name | Notes |
-|-----------|------|-------|
-| **69** | BTC/USD - 1 Day Price Prediction | Playground — example walkthroughs use this |
-| **77** | BTC/USD - 5 Min Price Prediction | Playground Fast |
+| Testnet ID | Name | Target type | Notes |
+|-----------|------|-------------|-------|
+| **69** | BTC/USD - 1 Day Price Prediction | Price | Playground — example walkthroughs use this |
+| **77** | BTC/USD - 5 Min Price Prediction | Price | Playground Fast |
+| **79** | BTC/USD - 15 Min Volatility Prediction | Volatility | Std of 1-min log returns over 15-min horizon |
+| **80** | ETH/USD - 15 Min Volatility Prediction | Volatility | Same definition as 79, ETH pair |
+| **81** | XRP/USD - 15 Min Volatility Prediction | Volatility | Same definition as 79, XRP pair |
+| **82** | SOL/USD - 15 Min Volatility Prediction | Volatility | Same definition as 79, SOL pair |
+| **83** | BTC/USD - 8h Log-Return Prediction | Log-return | 8h BTC/USD log return — testnet equivalent of mainnet topic 1 |
+| **84** | ETH/USD - 8h Log-Return Prediction | Log-return | 8h ETH/USD log return — testnet equivalent of mainnet topic 2 |
 
 Mainnet topics and their testnet equivalents:
 
 | Mainnet ID | Mainnet Name | Testnet ID | Testnet Name |
 |-----------|-------------|-----------|-------------|
-| 1  | BTC/USD - Log Returns - 8h  | 64 | 8h BTC/USD Log-Return (5min updates) |
-| 2  | ETH/USD - Log Returns - 8h  | — | Missing |
-| 3  | SOL/USD - Log Returns - 8h  | 57 | 8h SOL/USD Log-Return *(inactive)* |
+| 1  | BTC/USD - Log Returns - 8h  | 83 | BTC/USD - 8h Log-Return Prediction |
+| 2  | ETH/USD - Log Returns - 8h  | 84 | ETH/USD - 8h Log-Return Prediction |
+| 3  | SOL/USD - Log Returns - 8h  | 57 | 8h SOL/USD Log-Return Prediction |
 | 9  | ETH/USD - Price Prediction - 8h | 41 | ETH/USD - 8h Price Prediction |
 | 10 | SOL/USD - Price Prediction - 8h | 38 | SOL/USD - 8h Price Prediction |
 | 14 | BTC/USD - Price Prediction - 8h | 42 | BTC/USD - 8h Price Prediction |
@@ -210,21 +219,33 @@ Mainnet topics and their testnet equivalents:
 ```python
 from allora_forge_builder_kit import AlloraMLWorkflow
 
-# Build a training dataset
+# Build a training dataset (log-return target — default)
 workflow = AlloraMLWorkflow(
     tickers=["btcusd"],
-    topic_id=69,
+    number_of_input_bars=48,
+    target_bars=24,
     interval="1h",
-    n_input_bars=48,
-    n_target_bars=24,
+    data_source="allora",
+    api_key="UP-...",
 )
 workflow.backfill(days=500)
 df = workflow.get_full_feature_target_dataframe()
 
+# Volatility target (std of 1-min log returns over the horizon)
+vol_workflow = AlloraMLWorkflow(
+    tickers=["btcusd"],
+    number_of_input_bars=15,
+    target_bars=15,           # 15-minute volatility window
+    interval="1m",
+    target_type="volatility", # NEW: "log_return" (default) or "volatility"
+    data_source="allora",
+    api_key="UP-...",
+)
+
 # Evaluate a predict function
 from allora_forge_builder_kit import PerformanceEvaluator
-evaluator = PerformanceEvaluator(workflow)
-grade = evaluator.evaluate(predict_fn)
+evaluator = PerformanceEvaluator()
+report = evaluator.evaluate(y_true, y_pred)
 ```
 
 ---
@@ -233,7 +254,12 @@ grade = evaluator.evaluate(predict_fn)
 
 ### Framing forecasting as supervised learning
 
-At any point in time $t$, the model observes a window of $N$ past bars as input features $\mathbf{x} \in \mathbb{R}^d$ and predicts a future outcome $y$ — a price or log return over the next $H$ bars. By sliding this window across the full history, a single time series becomes thousands of labeled examples $(\mathbf{x}_i, y_i)$, turning forecasting into a standard supervised learning problem.
+At any point in time $t$, the model observes a window of $N$ past bars as input features $\mathbf{x} \in \mathbb{R}^d$ and predicts a future outcome $y$ over the next $H$ bars. The target $y$ depends on the topic type:
+
+- **Price / log-return topics** — $y = \log(p_{t+H} / p_t)$ or the absolute price $p_{t+H}$
+- **Volatility topics** — $y = \text{std}(r_1, \ldots, r_H)$ where $r_i = \log(p_{t+i} / p_{t+i-1})$ are consecutive 1-minute log returns over the horizon
+
+By sliding this window across the full history, a single time series becomes thousands of labeled examples $(\mathbf{x}_i, y_i)$, turning forecasting into a standard supervised learning problem.
 
 The `AlloraMLWorkflow` handles this construction: `backfill()` fetches historical data, `get_full_feature_target_dataframe()` builds the feature matrix and target vector, ready for any scikit-learn compatible model.
 
@@ -321,9 +347,13 @@ All three produce a complete, runnable pipeline and satisfy the same nine method
 |------|---------|
 | `notebooks/example_topic_69_bitcoin_walkthrough.py` | End-to-end example for topic 69: data → features → model → artifact |
 | `notebooks/example_topic_77_bitcoin_5min_walkthrough.py` | End-to-end example for topic 77: 5-min BTC prediction |
-| `notebooks/deploy_worker.py` | Deploy any topic with WorkerManager (`TOPIC_ID=N python deploy_worker.py`) |
-| `notebooks/deploy_worker_raw.py` | Minimal SDK-only deployment reference (no WorkerManager) |
-| `notebooks/feature_engineering_example.py` | Standalone feature engineering reference |
+| `notebooks/topic_79_btc_vol/` | Topic 79 BTC/USD volatility: example + models A/B/D/E |
+| `notebooks/topic_80_eth_vol/` | Topic 80 ETH/USD volatility: models A/B/D/E |
+| `notebooks/topic_81_xrp_vol/` | Topic 81 XRP/USD volatility: model E |
+| `notebooks/topic_82_sol_vol/` | Topic 82 SOL/USD volatility: model E |
+| `notebooks/shared/deploy_worker.py` | Deploy any topic with WorkerManager (`TOPIC_ID=N python deploy_worker.py`) |
+| `notebooks/shared/deploy_worker_raw.py` | Minimal SDK-only deployment reference (no WorkerManager) |
+| `notebooks/shared/feature_engineering_example.py` | Standalone feature engineering reference |
 | `allora_forge_builder_kit/workflow.py` | Data + feature pipeline |
 | `allora_forge_builder_kit/evaluation.py` | Model scoring (7 primary metrics + grading) |
 | `allora_forge_builder_kit/topic_discovery.py` | Query live topics on testnet/mainnet |

diff --git a/allora_forge_builder_kit/__init__.py b/allora_forge_builder_kit/__init__.py
@@ -10,6 +10,7 @@
 from .topic_discovery import AlloraTopicDiscovery, TopicInfo
 from .worker_manager import WorkerManager, WorkerSpec, DeployResult, Identity, build_topic_desc_resolver
 from .worker_monitor import WorkerMonitor, MonitorTarget, AlloraSDKEventFetcher
+from .czar_loss import czar_loss, czar_gradient, czar_hessian, make_czar_objective
 
 __all__ = [
     "__version__",
@@ -31,6 +32,10 @@
     "WorkerMonitor",
     "MonitorTarget",
     "AlloraSDKEventFetcher",
+    "czar_loss",
+    "czar_gradient",
+    "czar_hessian",
+    "make_czar_objective",
 ]
 
 

diff --git a/allora_forge_builder_kit/czar_loss.py b/allora_forge_builder_kit/czar_loss.py
@@ -0,0 +1,167 @@
+"""
+CZAR Loss (Composite Zero-Agnostic Returns)
+============================================
+
+A piecewise loss built on the Cauchy kernel that:
+- Z-scores by local volatility
+- Applies steep wrong-sign penalties
+- Uses bounded arctan transitions for same-sign predictions
+- Smoothly reduces loss near zero returns
+
+Provides gradient and hessian for use as a custom LightGBM objective.
+"""
+
+import numpy as np
+
+
+def derivative(x):
+    return 1.0 / (1.0 + x**2)
+
+
+def antiderivative(x):
+    return np.arctan(x)
+
+
+def double_derivative(x):
+    return 2.0 * np.abs(x) / (1.0 + x**2)**2
+
+
+def eps_effective(eps, delta):
+    if abs(delta) == 0:
+        return np.arctan(eps)
+    A = (1 + delta**2) * (antiderivative(eps + delta) - antiderivative(delta))
+    beta = delta / (1 + delta**2)
+    return (-1 + np.sqrt(1 + 4 * beta * A)) / (2 * beta)
+
+
+def softplus(x):
+    return np.maximum(x, 0.0) + np.log1p(np.exp(-np.abs(x)))
+
+
+def norm_smooth(z_true, eps, delta, tau):
+    a = np.abs(z_true)
+    d2p1 = delta**2 + 1
+    num = d2p1 * (antiderivative(a + delta) - antiderivative(delta))
+    denom = eps + delta / d2p1 * eps**2
+    norm_min = 1.0 - num / denom
+
+    if tau <= 0:
+        return np.maximum(norm_min, 0.0)
+
+    num_inf = d2p1 * (0.5 * np.pi - antiderivative(delta))
+    norm_inf = 1.0 - num_inf / denom
+    tau_eff = np.abs(tau) * np.abs(norm_inf)
+    return softplus(norm_min / tau_eff) / softplus(1 / tau_eff)
+
+
+def czar_loss(y_true, y_pred, std, mean=0, alpha=1, epsilon=1, tau=0.05):
+    if alpha < 0 or alpha > 1:
+        raise ValueError(f"alpha must be between 0 and 1, got {alpha}")
+
+    z_true = (y_true - mean) / std
+    z_pred = (y_pred - mean) / std
+
+    s = np.where(z_true == 0, 1, np.sign(z_true))
+    s_pred = np.where(z_pred == 0, 1, np.sign(z_pred))
+    a = np.abs(z_true)
+    u = s * z_pred
+
+    delta = alpha / np.sqrt(3)
+    d2p1 = delta**2 + 1
+
+    d_true = z_true + s * delta
+    d_pred = z_pred + s_pred * delta
+
+    h1 = d2p1 * double_derivative(delta)
+    h3 = d2p1 * double_derivative(d_true)
+
+    C = s * d2p1 * (antiderivative(d_true) - antiderivative(s * delta))
+    L1 = 0.5 * h1 * z_pred**2 - s * z_pred + C
+    L2 = s * d2p1 * (antiderivative(d_true) - antiderivative(d_pred))
+    dz = z_pred - z_true
+    L3 = 0.5 * np.minimum(h3, h1) * dz**2 + s * d2p1 * derivative(d_true) * dz
+
+    if epsilon > 0:
+        eps_eff = eps_effective(epsilon, delta)
+        softening_0 = czar_loss(0, eps_eff, 1.0, epsilon=0, alpha=alpha)
+        norm = norm_smooth(z_true, eps_eff, delta, tau)
+        Lsoft = norm * softening_0
+    else:
+        Lsoft = 0
+
+    return np.where(u <= 0, L1, np.where(u <= a, L2, L3)) + Lsoft
+
+
+def czar_gradient(y_true, y_pred, std, mean=0, alpha=1):
+    z_true = (y_true - mean) / std
+    z_pred = (y_pred - mean) / std
+
+    s = np.where(z_true == 0, 1, np.sign(z_true))
+    s_pred = np.where(z_pred == 0, 1, np.sign(z_pred))
+    a = np.abs(z_true)
+    u = s * z_pred
+
+    delta = alpha / np.sqrt(3)
+    d2p1 = delta**2 + 1
+
+    d_true = z_true + s * delta
+    d_pred = z_pred + s_pred * delta
+
+    h1 = d2p1 * double_derivative(delta)
+    h3 = d2p1 * double_derivative(d_true)
+
+    G1 = h1 * z_pred - np.sign(z_true)
+    G2 = -s * d2p1 * derivative(d_pred)
+    G3 = np.minimum(h3, h1) * (z_pred - z_true)
+
+    return np.where(u <= 0, G1, np.where(u <= a, G2, G3)) / std
+
+
+def czar_hessian(y_true, y_pred, std, mean=0, alpha=1):
+    z_true = (y_true - mean) / std
+    z_pred = (y_pred - mean) / std
+
+    s = np.where(z_true == 0, 1.0, np.sign(z_true))
+    s_pred = np.where(z_pred == 0, 1.0, np.sign(z_pred))
+    a = np.abs(z_true)
+    u = s * z_pred
+
+    delta = alpha / np.sqrt(3)
+    d2p1 = delta**2 + 1
+
+    d_true = s * (np.abs(z_true) + delta)
+    d_pred = s_pred * (np.abs(z_pred) + delta)
+
+    h1 = d2p1 * double_derivative(delta)
+    H1 = np.full_like(d_pred, h1)
+    H2 = (1.0 + d_pred**2) * double_derivative(d_pred)
+    h3 = (1.0 + d_true**2) * double_derivative(d_true)
+    H3 = np.full_like(d_pred, np.minimum(h1, h3))
+
+    return np.where(u <= 0, H1, np.where(u <= a, H2, H3)) / std**2
+
+
+def make_czar_objective(std, mean=0, alpha=1):
+    """
+    Create a LightGBM-compatible custom objective using CZAR loss.
+
+    Args:
+        std: Rolling volatility for z-scoring (scalar or array matching training data)
+        mean: Mean for z-scoring (usually 0 for returns)
+        alpha: CZAR alpha parameter (0-1, controls MSE curvature)
+
+    Returns:
+        objective function compatible with LightGBM's fobj parameter
+    """
+    def objective(y_true_or_dataset, y_pred):
+        # Handle both LightGBM Dataset objects and raw arrays
+        if hasattr(y_true_or_dataset, 'get_label'):
+            y_true = y_true_or_dataset.get_label()
+        else:
+            y_true = np.asarray(y_true_or_dataset)
+        grad = czar_gradient(y_true, y_pred, std=std, mean=mean, alpha=alpha)
+        hess = czar_hessian(y_true, y_pred, std=std, mean=mean, alpha=alpha)
+        # Clip hessian to avoid numerical issues
+        hess = np.maximum(hess, 1e-6)
+        return grad, hess
+    return objective