shiviancodes · shiviancodes · May 17, 2026 · May 17, 2026 · May 17, 2026 · May 17, 2026
diff --git a/PROGRESS.md b/PROGRESS.md
@@ -40,3 +40,4 @@ Phase docs live in `docs/progress/` (untracked, local only).
 | 9 | EDGAR XBRL fundamentals Session 1 | COMPLETE — migration 0011 adds form_type/accession_number/UNIQUE; `form_xbrl.py` parser (filed≥end integrity invariant, 5 metrics, 4 revenue aliases); 74,662 rows / 137 of 140 tickers covered; 4 factors; **`fundamental_margin_compression` t=+4.834 at 126d — first NEXUS factor to PASS HLZ M=400 Bonferroni** (composer sign-flipped vs literature: compression = buy; CALM-regime t=+6.35; sub-window late-third t=+5.13 — strengthening, not decaying); registered status='approved' in signal_registry; rolling_registry refreshed (464 rows); ROA decayed (late-third t=-0.06) → NOT registered; rd_intensity / asset_growth null; **paper trader CAGR +8.72%, Sharpe 0.488, Max DD -32.68%** (vs Phase 8 baseline +5.97% / 0.374 / -36.68%) | `docs/progress/phase_9.md` |
 | 9 | HGT retraining Session 2 | COMPLETE — retrained HGT on 140-ticker graph (no code changes; metadata extracted dynamically); val AUC **0.9807** at epoch 280 (vs 0.9803 / e=240 on the prior 30-ticker run); 7m 03s wall-clock; `MODEL_VERSION` bumped `hgt_link_pred_v1` → **`hgt_link_pred_v2`**; node_embeddings re-backfilled at 58 monthly snapshots → **8,120 rows** (140 × 58, dim=64); embedding validation passed (cos(NVDA,AMD)=0.98 > cos(NVDA,ARW)=0.63; per-component std median 0.05); **`graph_gnn_embedding_drift` IC backtest NULL at all horizons** (t=+0.382 @ 21d / +0.524 @ 63d / +0.368 @ 126d on N=52..57; HLZ fail by 10×); registered `status='rejected'` in signal_registry with full evidence record; paper trader unchanged from Phase 9 Session 1 | `docs/progress/phase_9.md` |
 | 10 | Regime-aware aggregator Session 1 | COMPLETE — **NEGATIVE result, hypothesis refuted, flag rolled back**. Built `_apply_regime_gate` in aggregator + `fsi_value` param on `load_factor_records` + FSI wiring in paper_trader (5 new unit tests, 155/156 suite pass). Tested `non_calm_action: 'zero'` on `fundamental_margin_compression` (126d NON-CALM t=−2.31, N=8). Paper trader: **CAGR +8.72% → +7.68%, Sharpe 0.488 → 0.450, Max DD −32.68% → −35.39%** — all three metrics worsened. Monthly-horizon audit: factor made money in 4 of 6 NON-CALM forward months (gated rebalances sat at start of late-2022 recovery). 126d drag is a horizon artifact; doesn't translate to monthly rebalancing. Registry flag rolled back; regime-aware *infrastructure* retained as opt-in capability for future factors | `docs/progress/phase_10.md` |
+| 10 | Conviction-weighted institutional flow Session 2 | COMPLETE — migration 0012 (`fund_strategy`, 22 rows: 9 T1 + 4 T2 + 6 T3 + 3 excluded banks); `compose_conviction_flow` pure helper (Δpct_portfolio, point-in-time gated on `available_as_of`); 9 TDD tests; institutional panel wired opt-in into backtest. **Primary `institutional_conviction_flow` NULL** (best raw t=+0.94 at 63d, HLZ fail by 4×) → `status='rejected'`. Ultra-T1 sub-test (5 funds: Lone Pine/Viking/Tiger/Coatue/Point72) full-window 21d t=+1.74; late-third 21d t=+3.02, 63d t=+3.33 — material but in-sample, fails HLZ M=400 (|t|≥3.78). Registered **`institutional_conviction_flow_ultra_t1` as `status='research'`** with dated review gate (2026-08-15, promote if full-window 21d t > 2.0 on extended Q2-2026 sample). NOT wired into aggregator. Paper trader unchanged. | `docs/progress/phase_10.md` |
diff --git a/migrations/versions/0012_fund_strategy.py b/migrations/versions/0012_fund_strategy.py
@@ -0,0 +1,135 @@
+"""fund_strategy classification table for conviction-weighted institutional flow.
+
+Revision ID: 0012
+Revises: 0011
+Create Date: 2026-05-17
+
+Phase 10 Session 2: classifies the 22 funds in companies (node_type='fund')
+into three conviction tiers used by institutional_conviction_flow:
+
+    Tier 1 (weight 1.0): active high-conviction managers
+    Tier 2 (weight 0.4): active but diversified
+    Tier 3 (weight 0.0): passive / index
+
+exclude_from_flow flags funds whose 13F filings are dominated by custody or
+client-facilitation activity (the three bank holding companies) and cannot
+be interpreted as a conviction signal; the factor loader drops these rows
+before any weighting step.
+
+Tier counts: 9 T1 + 4 T2 + 6 T3 (passive) + 3 T3 (excluded) = 22 rows.
+"""
+from collections.abc import Sequence
+from typing import Union
+
+import sqlalchemy as sa
+from alembic import op
+
+revision: str = "0012"
+down_revision: Union[str, Sequence[str], None] = "0011"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+# Seed rows: (fund_cik, conviction_tier, exclude_from_flow, strategy_note).
+# CIK-keyed (not id-keyed) so the migration is portable across environments
+# where companies.id autoincrement may have been re-sequenced.
+_SEED: list[tuple[str, int, bool, str]] = [
+    # Tier 1 — active high-conviction (weight 1.0)
+    ("1061165", 1, False, "Lone Pine Capital — concentrated long/short, avg 5.5 holdings"),
+    ("1103804", 1, False, "Viking Global — concentrated long/short, avg 11.6 holdings"),
+    ("1167483", 1, False, "Tiger Global — concentrated growth equity, avg 13.2 holdings"),
+    ("1135730", 1, False, "Coatue — concentrated TMT specialist, avg 16.6 holdings"),
+    ("1603466", 1, False, "Point72 — active discretionary equity"),
+    ("1273087", 1, False, "Millennium — multi-manager active equity"),
+    ("1037389", 1, False, "Renaissance Technologies — systematic active long-bias"),
+    ("1179392", 1, False, "Two Sigma — quant active"),
+    ("1423053", 1, False, "Citadel — multi-strat active research-driven sleeves"),
+    # Tier 2 — active diversified (weight 0.4)
+    ("1697748", 2, False, "ARK — active thematic, theme-driven rotation"),
+    ("1088875", 2, False, "Baillie Gifford — active growth, house-style"),
+    ("1422848", 2, False, "Capital Research Global Investors — American Funds family"),
+    ("315066", 2, False, "FMR (Fidelity) — active mutual fund family"),
+    # Tier 3 — passive / index (weight 0.0)
+    ("102909", 3, False, "Vanguard — index-dominant"),
+    ("1364742", 3, False, "BlackRock — iShares-dominant"),
+    ("93751", 3, False, "State Street — SPDR-dominant"),
+    ("1214717", 3, False, "Geode — Fidelity index sub-advisor"),
+    ("914208", 3, False, "Invesco — diversified, index-heavy (128 names)"),
+    ("1374170", 3, False, "Norges Bank — sovereign wealth, near-index"),
+    # Tier 3 + exclude_from_flow=TRUE — bank 13F mixes prop + custody + facilitation
+    ("886982", 3, True, "Goldman Sachs — bank 13F, not conviction-classifiable"),
+    ("895421", 3, True, "Morgan Stanley — bank 13F, not conviction-classifiable"),
+    ("19617", 3, True, "JPMorgan Chase — bank 13F, not conviction-classifiable"),
+]
+
+
+def upgrade() -> None:
+    op.create_table(
+        "fund_strategy",
+        sa.Column(
+            "fund_id",
+            sa.BigInteger,
+            sa.ForeignKey("companies.id"),
+            primary_key=True,
+        ),
+        sa.Column("conviction_tier", sa.SmallInteger, nullable=False),
+        sa.Column(
+            "exclude_from_flow",
+            sa.Boolean,
+            nullable=False,
+            server_default=sa.text("FALSE"),
+        ),
+        sa.Column("strategy_note", sa.Text, nullable=True),
+        sa.CheckConstraint(
+            "conviction_tier IN (1, 2, 3)",
+            name="ck_fund_strategy_tier_range",
+        ),
+    )
+    op.create_index(
+        "ix_fund_strategy_tier",
+        "fund_strategy",
+        ["conviction_tier", "exclude_from_flow"],
+    )
+
+    bind = op.get_bind()
+    for cik, tier, excl, note in _SEED:
+        bind.execute(
+            sa.text(
+                """
+                INSERT INTO fund_strategy
+                    (fund_id, conviction_tier, exclude_from_flow, strategy_note)
+                SELECT id, :tier, :excl, :note
+                FROM companies
+                WHERE cik = :cik AND node_type = 'fund'
+                """
+            ),
+            {"cik": cik, "tier": tier, "excl": excl, "note": note},
+        )
+
+    seeded = bind.execute(sa.text("SELECT COUNT(*) FROM fund_strategy")).scalar()
+    if seeded == 0:
+        # Fresh DB / pre-data-bootstrap environment (CI, new dev box). The
+        # 22 fund rows in `companies` are inserted as a side effect of
+        # scripts/run_ownership_extraction.py (EDGAR 13F ingestion), which
+        # cannot run during alembic upgrade. Defer the seed to the
+        # companion script `scripts/bootstrap_fund_strategy.py`, to be
+        # invoked AFTER 13F ingestion completes.
+        import warnings
+        warnings.warn(
+            "fund_strategy table created but seed skipped: no rows in "
+            "companies(node_type='fund') yet. Run "
+            "scripts/bootstrap_fund_strategy.py after EDGAR 13F ingestion "
+            "to populate.",
+            stacklevel=2,
+        )
+    elif seeded != len(_SEED):
+        raise RuntimeError(
+            f"fund_strategy seed integrity check failed: "
+            f"expected {len(_SEED)} rows, inserted {seeded}. "
+            f"A fund CIK in _SEED is missing from companies(node_type='fund')."
+        )
+
+
+def downgrade() -> None:
+    op.drop_index("ix_fund_strategy_tier", table_name="fund_strategy")
+    op.drop_table("fund_strategy")
diff --git a/nexus/signals/backtest.py b/nexus/signals/backtest.py
@@ -51,6 +51,7 @@
     prior_panel as _fund_prior_panel,
 )
 from nexus.signals.factors.graph_based import _compose_signal
+from nexus.signals.factors.institutional import compose_conviction_flow
 from nexus.signals.hlz import format_table, hlz_correct, update_registry_hlz
 
 FORWARD_TRADING_DAYS = 21
@@ -459,6 +460,75 @@ def _xs_fundamental_roa(
     return _df_to_dict(compose_roa(curr))
 
 
+# ---------------------------------------------------------------------------
+# Institutional conviction flow panel (Phase 10 Session 2)
+# ---------------------------------------------------------------------------
+
+@dataclass
+class _InstitutionalPanel:
+    """All qualifying ownership_edges rows + the T1/T2 fund weight map.
+
+    ``ownership`` columns: fund_id, ticker, quarter_end, available_as_of,
+    pct_portfolio (float). The cross-section helper filters by
+    ``available_as_of <= as_of`` BEFORE picking (curr, prev) per fund.
+
+    ``fund_weights`` comes from ``fund_strategy`` and contains only funds
+    with ``exclude_from_flow=FALSE AND conviction_tier IN (1, 2)``. T3 and
+    excluded banks are absent — they cannot contribute at all.
+    """
+    ownership: pd.DataFrame
+    fund_weights: dict[int, float]
+
+
+_TIER_WEIGHT_LOOKUP: dict[int, float] = {1: 1.0, 2: 0.4}
+
+
+def _load_institutional_panel(engine) -> _InstitutionalPanel:
+    own_df = pd.read_sql(
+        text(
+            """
+            SELECT oe.fund_id, c.ticker, oe.quarter_end, oe.available_as_of,
+                   CAST(oe.pct_portfolio AS FLOAT) AS pct_portfolio
+            FROM ownership_edges oe
+            JOIN companies c ON c.id = oe.company_id
+            WHERE oe.pct_portfolio IS NOT NULL
+            """
+        ),
+        engine,
+    )
+    if not own_df.empty:
+        own_df["quarter_end"] = pd.to_datetime(own_df["quarter_end"]).dt.date
+        own_df["available_as_of"] = pd.to_datetime(own_df["available_as_of"]).dt.date
+
+    weights_df = pd.read_sql(
+        text(
+            """
+            SELECT fund_id, conviction_tier
+            FROM fund_strategy
+            WHERE exclude_from_flow = FALSE
+              AND conviction_tier IN (1, 2)
+            """
+        ),
+        engine,
+    )
+    fund_weights = {
+        int(row.fund_id): _TIER_WEIGHT_LOOKUP[int(row.conviction_tier)]
+        for row in weights_df.itertuples()
+    }
+    return _InstitutionalPanel(ownership=own_df, fund_weights=fund_weights)
+
+
+def _xs_institutional_conviction_flow(
+    as_of: date, panel: _InstitutionalPanel
+) -> dict[str, float]:
+    if panel.ownership.empty or not panel.fund_weights:
+        return {}
+    composed = compose_conviction_flow(panel.ownership, as_of, panel.fund_weights)
+    if composed.empty:
+        return {}
+    return dict(zip(composed["ticker"], composed["signal"]))
+
+
 @dataclass(frozen=True)
 class _SupplyEdgeRow:
     filing_date: date
@@ -572,6 +642,7 @@ def _xs_gnn_embedding_drift(as_of: date, panel: _EmbeddingPanel) -> dict[str, fl
 def _factor_xs_dispatch(
     supply_panel: _SupplyEdgePanel | None = None,
     fundamentals_panel: _FundamentalsPanel | None = None,
+    institutional_panel: _InstitutionalPanel | None = None,
 ) -> dict[str, callable]:
     """Return name → cross-section closure for each Tier A factor.
 
@@ -611,6 +682,11 @@ def _factor_xs_dispatch(
         base["fundamental_roa"] = (
             lambda snap, pp, cp, ep: _xs_fundamental_roa(snap, fp)
         )
+    if institutional_panel is not None:
+        ip = institutional_panel
+        base["institutional_conviction_flow"] = (
+            lambda snap, pp, cp, ep: _xs_institutional_conviction_flow(snap, ip)
+        )
     return base
 
 
@@ -656,6 +732,7 @@ def compute_factor_ics(
     period_filter: set[date] | None = None,
     supply_panel: _SupplyEdgePanel | None = None,
     fundamentals_panel: _FundamentalsPanel | None = None,
+    institutional_panel: _InstitutionalPanel | None = None,
 ) -> tuple[list[float], list[int], list[date]]:
     """Compute per-period ICs for one factor across a set of snapshots.
 
@@ -668,7 +745,9 @@ def compute_factor_ics(
     Returns (ics, cross_section_sizes, periods).
     """
     dispatch = _factor_xs_dispatch(
-        supply_panel=supply_panel, fundamentals_panel=fundamentals_panel,
+        supply_panel=supply_panel,
+        fundamentals_panel=fundamentals_panel,
+        institutional_panel=institutional_panel,
     )
     xs_func = dispatch[name]
     ics: list[float] = []