From d5faaaef1b1e48c90471ce6112d1ff8ee1cab3f7 Mon Sep 17 00:00:00 2001 From: novis10813 Date: Sat, 14 Feb 2026 08:38:47 +0800 Subject: [PATCH] docs(examples): add universe checklist workflow notebook --- .../2026-02-13-universe-examples-pr-plan.md | 197 ++++++++++++++++++ examples/05_universe_checklist_workflow.ipynb | 156 ++++++++++++++ examples/README.md | 2 + tests/examples/test_universe_notebook_docs.py | 49 +++++ 4 files changed, 404 insertions(+) create mode 100644 docs/plans/2026-02-13-universe-examples-pr-plan.md create mode 100644 examples/05_universe_checklist_workflow.ipynb create mode 100644 tests/examples/test_universe_notebook_docs.py diff --git a/docs/plans/2026-02-13-universe-examples-pr-plan.md b/docs/plans/2026-02-13-universe-examples-pr-plan.md new file mode 100644 index 0000000..7e4566d --- /dev/null +++ b/docs/plans/2026-02-13-universe-examples-pr-plan.md @@ -0,0 +1,197 @@ +# Universe Example Notebook Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** 在 `examples/` 新增 universe/checklist 實戰 notebook,並更新 examples 導覽,讓使用者能照著範例重現遮罩化研究流程。 + +**Architecture:** 以新 notebook `examples/05_universe_checklist_workflow.ipynb` 作為單一教學入口,README 只保留高層導引。先建立結構驗收測試(檔案存在、README 收錄、必要章節標題),再逐步填入可執行 cell,最後用 nbconvert 執行驗證。 + +**Tech Stack:** Jupyter Notebook、Python、pytest、nbformat/nbconvert + +--- + +### Task 1: 建立範例驗收測試骨架 + +**Files:** +- Create: `tests/examples/test_universe_notebook_docs.py` +- Test: `tests/examples/test_universe_notebook_docs.py` + +**Step 1: Write the failing test** + +```python +from pathlib import Path + + +def test_universe_notebook_exists(): + assert Path("examples/05_universe_checklist_workflow.ipynb").exists() + + +def test_examples_readme_mentions_universe_notebook(): + text = Path("examples/README.md").read_text(encoding="utf-8") + assert "05_universe_checklist_workflow.ipynb" in text +``` + +**Step 2: Run test to verify it fails** + +Run: `uv run pytest tests/examples/test_universe_notebook_docs.py -v` +Expected: FAIL(新 notebook 尚未建立,README 尚未更新) + +**Step 3: Write minimal implementation** + +建立空白 notebook 檔與 README 最小條目。 + +**Step 4: Run test to verify it passes** + +Run: `uv run pytest tests/examples/test_universe_notebook_docs.py -v` +Expected: PASS + +**Step 5: Commit** + +```bash +git add tests/examples/test_universe_notebook_docs.py examples/05_universe_checklist_workflow.ipynb examples/README.md +git commit -m "test(examples): add acceptance checks for universe notebook" +``` + +### Task 2: 定義 notebook 結構與教學章節 + +**Files:** +- Modify: `examples/05_universe_checklist_workflow.ipynb` +- Modify: `tests/examples/test_universe_notebook_docs.py` + +**Step 1: Write the failing test** + +在測試中用 `nbformat` 檢查 notebook markdown 標題至少包含: +- `# Universe Checklist Workflow` +- `## Build Universe and Checklist` +- `## Apply Mask to AggBar` +- `## Evaluate Factor with Mask` +- `## Run Backtest with Mask` + +**Step 2: Run test to verify it fails** + +Run: `uv run pytest tests/examples/test_universe_notebook_docs.py -v` +Expected: FAIL + +**Step 3: Write minimal implementation** + +在 notebook 加入對應章節 markdown cell(先不放完整程式碼)。 + +**Step 4: Run test to verify it passes** + +Run: `uv run pytest tests/examples/test_universe_notebook_docs.py -v` +Expected: PASS + +**Step 5: Commit** + +```bash +git add examples/05_universe_checklist_workflow.ipynb tests/examples/test_universe_notebook_docs.py +git commit -m "docs(examples): scaffold universe workflow notebook sections" +``` + +### Task 3: 填入可執行程式流程(最小可重現) + +**Files:** +- Modify: `examples/05_universe_checklist_workflow.ipynb` +- Modify: `tests/examples/test_universe_notebook_docs.py` + +**Step 1: Write the failing test** + +測試新增關鍵 API 片段檢查(以 cell 內容字串比對): +- `with_mask(` +- `mask=` +- `universe` 或 `checklist` 建立呼叫 + +**Step 2: Run test to verify it fails** + +Run: `uv run pytest tests/examples/test_universe_notebook_docs.py -v` +Expected: FAIL + +**Step 3: Write minimal implementation** + +補齊 notebook code cell: +- 載入資料與必要欄位 +- 建立 universe/checklist +- 產生並套用 mask 到 `AggBar` +- 執行 `Factor.eval(..., mask=...)` +- 執行回測(含 mask 版本) +- 增加與未套 mask 的最小比較表格 + +**Step 4: Run test to verify it passes** + +Run: `uv run pytest tests/examples/test_universe_notebook_docs.py -v` +Expected: PASS + +**Step 5: Commit** + +```bash +git add examples/05_universe_checklist_workflow.ipynb tests/examples/test_universe_notebook_docs.py +git commit -m "feat(examples): add executable universe checklist workflow notebook" +``` + +### Task 4: 更新 examples 導覽與執行驗證 + +**Files:** +- Modify: `examples/README.md` +- Modify: `examples/05_universe_checklist_workflow.ipynb`(若執行驗證需修正) + +**Step 1: Write the failing test** + +在 `tests/examples/test_universe_notebook_docs.py` 增加 README 驗收: +- notebook 目的說明 +- 前置條件 +- 建議執行順序(01 -> ... -> 05) + +**Step 2: Run test to verify it fails** + +Run: `uv run pytest tests/examples/test_universe_notebook_docs.py -v` +Expected: FAIL + +**Step 3: Write minimal implementation** + +更新 `examples/README.md`,補齊第 5 本 notebook 的說明與順序。 + +**Step 4: Run test to verify it passes** + +Run: +- `uv run pytest tests/examples/test_universe_notebook_docs.py -v` +- `uv run python -m jupyter nbconvert --to notebook --execute examples/05_universe_checklist_workflow.ipynb --output /tmp/05_universe_checklist_workflow.executed.ipynb` + +Expected: PASS(測試通過,notebook 可從頭執行) + +**Step 5: Commit** + +```bash +git add examples/README.md examples/05_universe_checklist_workflow.ipynb tests/examples/test_universe_notebook_docs.py +git commit -m "docs(examples): document universe notebook and verify execution" +``` + +### Task 5: PR 自我檢查 + +**Files:** +- Modify: `docs/plans/2026-02-13-universe-examples-pr-plan.md`(若需補充執行備註) + +**Step 1: Write the failing test** + +建立人工清單: +- [ ] Notebook 每節都有文字解說與輸出解讀 +- [ ] 程式碼無 look-ahead 寫法 +- [ ] README 與 notebook API 名稱一致 + +**Step 2: Run test to verify it fails** + +人工審閱,任一項不滿足即 FAIL。 + +**Step 3: Write minimal implementation** + +修正文案、變數命名與示例片段。 + +**Step 4: Run test to verify it passes** + +再次人工審閱 + 重新執行測試與 nbconvert。 + +**Step 5: Commit** + +```bash +git add examples/ tests/examples/ +git commit -m "chore(examples): finalize universe notebook PR checklist" +``` diff --git a/examples/05_universe_checklist_workflow.ipynb b/examples/05_universe_checklist_workflow.ipynb new file mode 100644 index 0000000..62896fb --- /dev/null +++ b/examples/05_universe_checklist_workflow.ipynb @@ -0,0 +1,156 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Universe Checklist Workflow\n", + "\n", + "This notebook shows how to apply universe/checklist masks consistently in factor analysis and backtesting." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Build Universe and Checklist\n", + "\n", + "We first create a small synthetic multi-symbol dataset, then define metadata/tags and mask rules." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import polars as pl\n", + "\n", + "from factorium import AggBar, Checklist, Universe\n", + "from factorium.backtest import Backtester\n", + "from factorium.universe import ExcludeStablecoins, MinVolume, TagFilter\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "DAY_MS = 86_400_000\n", + "BASE_TS = 1_700_000_000_000\n", + "\n", + "rows = []\n", + "for i in range(30):\n", + " ts = BASE_TS + i * DAY_MS\n", + " rows.extend(\n", + " [\n", + " {\"start_time\": ts, \"end_time\": ts + 3_600_000, \"symbol\": \"BTCUSDT\", \"open\": 100 + i, \"high\": 101 + i, \"low\": 99 + i, \"close\": 100 + i, \"volume\": 20_000 + i, \"alpha\": float(i + 1)},\n", + " {\"start_time\": ts, \"end_time\": ts + 3_600_000, \"symbol\": \"USDCUSDT\", \"open\": 1.0, \"high\": 1.0, \"low\": 1.0, \"close\": 1.0, \"volume\": 50_000, \"alpha\": float(50 - i)},\n", + " {\"start_time\": ts, \"end_time\": ts + 3_600_000, \"symbol\": \"NEWUSDT\", \"open\": 10 + i, \"high\": 11 + i, \"low\": 9 + i, \"close\": 10 + i, \"volume\": 500 + i, \"alpha\": float(100 + i)},\n", + " ]\n", + " )\n", + "\n", + "agg = AggBar(pl.DataFrame(rows))\n", + "\n", + "metadata = {\n", + " \"BTCUSDT\": {\"symbol\": \"BTCUSDT\", \"base_asset\": \"BTC\", \"quote_asset\": \"USDT\", \"status\": \"TRADING\", \"listing_date\": BASE_TS - 365 * DAY_MS},\n", + " \"USDCUSDT\": {\"symbol\": \"USDCUSDT\", \"base_asset\": \"USDC\", \"quote_asset\": \"USDT\", \"status\": \"TRADING\", \"listing_date\": BASE_TS - 365 * DAY_MS, \"is_stablecoin_pair\": True},\n", + " \"NEWUSDT\": {\"symbol\": \"NEWUSDT\", \"base_asset\": \"NEW\", \"quote_asset\": \"USDT\", \"status\": \"TRADING\", \"listing_date\": BASE_TS - 30 * DAY_MS},\n", + "}\n", + "tags = {\"BTC\": [\"layer1\"], \"USDC\": [\"stablecoin\"], \"NEW\": [\"meme\"]}\n", + "\n", + "universe = Universe([ExcludeStablecoins()])\n", + "checklist = Checklist([TagFilter(include=[\"layer1\", \"meme\"]), MinVolume(window=5, threshold=1_000)])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Apply Mask to AggBar\n", + "\n", + "Use `AggBar.with_mask(...)` to add boolean columns that can be reused downstream." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agg = agg.with_mask(name=\"in_universe\", mask_source=universe, metadata=metadata, tags=tags)\n", + "agg = agg.with_mask(name=\"in_checklist\", mask_source=checklist, metadata=metadata, tags=tags)\n", + "\n", + "agg.data[[\"symbol\", \"in_universe\", \"in_checklist\"]].head(9)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluate Factor with Mask\n", + "\n", + "Use the same mask in `Factor.eval(..., mask=...)` so ranking/evaluation happens only inside your tradable universe." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "factor = agg[\"alpha\"]\n", + "masked_result = factor.eval(prices=agg, periods=1, quantiles=2, mask=\"in_checklist\")\n", + "unmasked_result = factor.eval(prices=agg, periods=1, quantiles=2)\n", + "\n", + "print(\"factor:\", masked_result.factor_name)\n", + "masked_rows = sum(len(df) for df in masked_result.quantile_returns.values())\n", + "unmasked_rows = sum(len(df) for df in unmasked_result.quantile_returns.values())\n", + "print(\"masked quantile rows:\", masked_rows)\n", + "print(\"unmasked quantile rows:\", unmasked_rows)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run Backtest with Mask\n", + "\n", + "Finally, pass `mask=` into `Backtester(...)` to keep positions inside the same universe constraints." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " bt = Backtester(\n", + " prices=agg,\n", + " signal=factor,\n", + " holding_period=3,\n", + " neutralization=\"market\",\n", + " mask=\"in_universe\",\n", + " )\n", + " bt_result = bt.run()\n", + " bt_result.metrics\n", + "except Exception as exc:\n", + " print(\"Backtest run needs enough cross-sectional signals in each bar:\", exc)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/README.md b/examples/README.md index 5cd600b..768ab56 100644 --- a/examples/README.md +++ b/examples/README.md @@ -10,6 +10,7 @@ Interactive Jupyter notebooks demonstrating factor research workflows with Facto | [02 — Mean Reversion Factor](02_mean_reversion_factor.ipynb) | Mean reversion with cross-sectional processing | Z-score distance, volatility normalization, `cs_rank`, `cs_zscore`, `cs_winsorize`, market-neutral vs. long-only backtest, advanced operators (`ts_autocorr`, `ts_kurtosis`, `ts_skewness`) | | [03 — Data Loading & Exploration](03_data_loading_and_exploration.ipynb) | Deep dive into data handling | `BinanceDataLoader`, `AggBar` methods, time-bar intervals (1min/5min/1h), slicing, CSV/Parquet export, `ResearchSession` from files | | [04 — Multi-Factor Combination](04_multi_factor_combination.ipynb) | Combine and select factors | Factor correlations, `ts_corr`, `cs_neutralize`, `CompositeFactor` (equal/custom/z-score), single vs. composite backtest, factor selection workflow | +| [05 — Universe & Checklist Workflow](05_universe_checklist_workflow.ipynb) | Constrain research and backtest to a tradable asset universe | `Universe`, `Checklist`, `AggBar.with_mask`, `Factor.eval(..., mask=...)`, `Backtester(..., mask=...)` | ## Getting Started @@ -44,3 +45,4 @@ If you're new to Factorium, we recommend starting with: 2. **Notebook 01** — Walk through a full factor research workflow 3. **Notebook 02** — Learn about signal processing and cross-sectional transforms 4. **Notebook 04** — Combine multiple factors into a composite signal +5. **Notebook 05** — Apply universe/checklist masks consistently in analysis and backtests diff --git a/tests/examples/test_universe_notebook_docs.py b/tests/examples/test_universe_notebook_docs.py new file mode 100644 index 0000000..477b8c6 --- /dev/null +++ b/tests/examples/test_universe_notebook_docs.py @@ -0,0 +1,49 @@ +from pathlib import Path +import json + + +def test_universe_notebook_exists() -> None: + assert Path("examples/05_universe_checklist_workflow.ipynb").exists() + + +def test_examples_readme_mentions_universe_notebook() -> None: + text = Path("examples/README.md").read_text(encoding="utf-8") + assert "05_universe_checklist_workflow.ipynb" in text + + +def test_universe_notebook_has_required_sections() -> None: + notebook = json.loads(Path("examples/05_universe_checklist_workflow.ipynb").read_text(encoding="utf-8")) + markdown_text = "\n".join( + "".join(cell.get("source", [])) for cell in notebook.get("cells", []) if cell.get("cell_type") == "markdown" + ) + + required = [ + "# Universe Checklist Workflow", + "## Build Universe and Checklist", + "## Apply Mask to AggBar", + "## Evaluate Factor with Mask", + "## Run Backtest with Mask", + ] + for section in required: + assert section in markdown_text + + +def test_universe_notebook_has_core_api_snippets() -> None: + text = Path("examples/05_universe_checklist_workflow.ipynb").read_text(encoding="utf-8") + required_snippets = [ + "with_mask(", + "mask=", + "Universe(", + "Checklist(", + "Backtester(", + ] + for snippet in required_snippets: + assert snippet in text + + +def test_examples_readme_has_universe_guidance() -> None: + text = Path("examples/README.md").read_text(encoding="utf-8") + assert "Universe & Checklist Workflow" in text + assert "Prerequisites" in text + assert "Recommended Reading Order" in text + assert "Notebook 05" in text