From 22fe3d2130b83f2ac6108e9785719f4b8f85ab7e Mon Sep 17 00:00:00 2001 From: novis10813 Date: Sat, 14 Feb 2026 15:19:43 +0800 Subject: [PATCH 01/10] fix(data): align date range to UTC midnight --- src/factorium/data/utils.py | 4 ++-- tests/data/test_timestamp_utils.py | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/factorium/data/utils.py b/src/factorium/data/utils.py index 2bd70a3..5feb482 100644 --- a/src/factorium/data/utils.py +++ b/src/factorium/data/utils.py @@ -3,7 +3,7 @@ """ import logging -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone logger = logging.getLogger(__name__) @@ -52,7 +52,7 @@ def calculate_date_range( return start, start + timedelta(days=days) # Snap to UTC midnight for consistent daily boundaries - today_midnight = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) + today_midnight = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0) # end = start of tomorrow (exclusive) to include today's full data end = today_midnight + timedelta(days=1) diff --git a/tests/data/test_timestamp_utils.py b/tests/data/test_timestamp_utils.py index 6187d76..362cdfc 100644 --- a/tests/data/test_timestamp_utils.py +++ b/tests/data/test_timestamp_utils.py @@ -1,12 +1,16 @@ # tests/data/test_timestamp_utils.py +from datetime import datetime, timedelta, timezone + import polars as pl import pytest +from factorium.data import utils as data_utils from factorium.data.loader import ( _convert_to_target_unit, _detect_timestamp_unit, _normalize_timestamps_to_ms, ) +from factorium.data.utils import calculate_date_range def test_detect_timestamp_unit_seconds(): @@ -39,6 +43,23 @@ def test_convert_to_target_unit_invalid_unit(): _convert_to_target_unit(1704067200000, "invalid") +def test_calculate_date_range_uses_utc_midnight(monkeypatch): + class FakeDateTime(datetime): + @classmethod + def now(cls, tz=None): + if tz is None: + return cls(2026, 2, 14, 23, 30, tzinfo=timezone(timedelta(hours=8))) + return cls(2026, 2, 14, 15, 30, tzinfo=timezone.utc).astimezone(tz) + + monkeypatch.setattr(data_utils, "datetime", FakeDateTime) + + start, end = calculate_date_range(days=1) + + expected_today_midnight = datetime(2026, 2, 14, 0, 0, tzinfo=timezone.utc) + assert start == expected_today_midnight + assert end == expected_today_midnight + timedelta(days=1) + + class TestNormalizeTimestampsToMs: """Tests for _normalize_timestamps_to_ms function.""" From a02281e5db30e292dedd1168c482913432405ce0 Mon Sep 17 00:00:00 2001 From: novis10813 Date: Sat, 14 Feb 2026 15:19:43 +0800 Subject: [PATCH 02/10] fix(universe): use run_async wrappers and warn on full tag fetch --- src/factorium/universe/metadata.py | 5 +-- src/factorium/universe/tags.py | 16 ++++++++-- tests/universe/test_metadata.py | 33 ++++++++++++++++++++ tests/universe/test_tags.py | 50 ++++++++++++++++++++++++++++++ 4 files changed, 100 insertions(+), 4 deletions(-) diff --git a/src/factorium/universe/metadata.py b/src/factorium/universe/metadata.py index 09ca6cd..01e6b6a 100644 --- a/src/factorium/universe/metadata.py +++ b/src/factorium/universe/metadata.py @@ -1,6 +1,5 @@ from __future__ import annotations -import asyncio import json import time from pathlib import Path @@ -47,7 +46,9 @@ async def fetch_async(self) -> dict[str, SymbolMetadata]: return parsed def fetch(self) -> dict[str, SymbolMetadata]: - return asyncio.run(self.fetch_async()) + from ..data.loader import _run_async + + return _run_async(self.fetch_async()) def _parse_exchange_info(self, data: dict) -> dict[str, SymbolMetadata]: output: dict[str, SymbolMetadata] = {} diff --git a/src/factorium/universe/tags.py b/src/factorium/universe/tags.py index d55dfb1..67c6498 100644 --- a/src/factorium/universe/tags.py +++ b/src/factorium/universe/tags.py @@ -2,6 +2,7 @@ import asyncio import json +import logging import time from pathlib import Path @@ -9,10 +10,16 @@ COINGECKO_BASE_URL = "https://api.coingecko.com/api/v3" +logger = logging.getLogger(__name__) class TagProvider: - """Fetch and cache token categories from CoinGecko.""" + """Fetch and cache token categories from CoinGecko. + + Note: + Calling ``fetch``/``fetch_async`` with ``symbols=None`` performs + full-market category fetching and can be slow for large universes. + """ def __init__( self, @@ -46,6 +53,9 @@ async def fetch_async(self, symbols: list[str] | None = None) -> dict[str, list[ if all(sym in cached for sym in requested): return {sym: cached[sym] for sym in requested} + if requested is None: + logger.warning("Fetching tags for all symbols may take a long time; pass symbols to limit scope.") + headers: dict[str, str] | None = None if self.api_key: headers = {"x-cg-pro-api-key": self.api_key} @@ -81,7 +91,9 @@ async def fetch_async(self, symbols: list[str] | None = None) -> dict[str, list[ return {sym: result.get(sym, []) for sym in requested if sym in result} def fetch(self, symbols: list[str] | None = None) -> dict[str, list[str]]: - return asyncio.run(self.fetch_async(symbols=symbols)) + from ..data.loader import _run_async + + return _run_async(self.fetch_async(symbols=symbols)) def _load_cache(self) -> dict[str, list[str]] | None: if not self._cache_path.exists(): diff --git a/tests/universe/test_metadata.py b/tests/universe/test_metadata.py index 4b6e510..51f0e48 100644 --- a/tests/universe/test_metadata.py +++ b/tests/universe/test_metadata.py @@ -3,6 +3,7 @@ import pytest +import factorium.data.loader as data_loader from factorium.universe.metadata import MetadataProvider @@ -90,3 +91,35 @@ async def should_not_call(*args, **kwargs): out = provider.fetch() assert out == cached + + +def test_metadata_fetch_uses_run_async(monkeypatch: pytest.MonkeyPatch) -> None: + provider = MetadataProvider(market="um") + expected = { + "BTCUSDT": { + "symbol": "BTCUSDT", + "base_asset": "BTC", + "quote_asset": "USDT", + "status": "TRADING", + "listing_date": 1_700_000_000_000, + "is_leveraged": False, + "is_stablecoin_pair": False, + } + } + + async def fake_fetch_async() -> dict[str, dict[str, object]]: + return expected + + called = {"value": False} + + def fake_run_async(coro): + called["value"] = True + coro.close() + return expected + + monkeypatch.setattr(provider, "fetch_async", fake_fetch_async) + monkeypatch.setattr(data_loader, "_run_async", fake_run_async) + + out = provider.fetch() + assert out == expected + assert called["value"] is True diff --git a/tests/universe/test_tags.py b/tests/universe/test_tags.py index 1827602..309e477 100644 --- a/tests/universe/test_tags.py +++ b/tests/universe/test_tags.py @@ -3,6 +3,7 @@ import pytest +import factorium.data.loader as data_loader from factorium.universe.tags import TagProvider @@ -60,3 +61,52 @@ async def should_not_call(*args, **kwargs): out = provider.fetch(symbols=["BTC"]) assert out == {"BTC": ["Layer 1"]} + + +def test_tags_fetch_uses_run_async(monkeypatch: pytest.MonkeyPatch) -> None: + provider = TagProvider() + expected = {"BTC": ["Layer 1"]} + + async def fake_fetch_async(symbols=None): + del symbols + return expected + + called = {"value": False} + + def fake_run_async(coro): + called["value"] = True + coro.close() + return expected + + monkeypatch.setattr(provider, "fetch_async", fake_fetch_async) + monkeypatch.setattr(data_loader, "_run_async", fake_run_async) + + out = provider.fetch(symbols=["BTC"]) + assert out == expected + assert called["value"] is True + + +def test_fetch_warns_when_symbols_is_none( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path, caplog: pytest.LogCaptureFixture +) -> None: + provider = TagProvider(cache_dir=tmp_path, cache_ttl=0) + + async def fake_request_json(session, url, params=None, headers=None): + del session, params, headers + if url.endswith("/coins/list"): + return [{"id": "bitcoin", "symbol": "btc", "name": "Bitcoin"}] + if url.endswith("/coins/bitcoin"): + return {"categories": ["Layer 1"]} + raise AssertionError(f"unexpected url: {url}") + + async def no_sleep(seconds: float) -> None: + del seconds + + monkeypatch.setattr(provider, "_request_json", fake_request_json) + monkeypatch.setattr("factorium.universe.tags.asyncio.sleep", no_sleep) + + caplog.set_level("WARNING") + out = provider.fetch(symbols=None) + + assert "BTC" in out + assert "may take a long time" in caplog.text From a8b82d3093fe586138181399956f3a80a921a918 Mon Sep 17 00:00:00 2001 From: novis10813 Date: Sat, 14 Feb 2026 15:19:43 +0800 Subject: [PATCH 03/10] fix(analyzer): re-prepare data when requested periods are missing --- src/factorium/factors/analyzer.py | 9 +++++++-- tests/factors/test_analyzer.py | 14 ++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/src/factorium/factors/analyzer.py b/src/factorium/factors/analyzer.py index b90efc4..2488959 100644 --- a/src/factorium/factors/analyzer.py +++ b/src/factorium/factors/analyzer.py @@ -243,8 +243,13 @@ def __init__(self, factor: Factor, prices: AggBar | Factor, quantiles: int = 5, def _ensure_data_prepared(self, periods: list[int] | None = None, price_col: str | None = None) -> None: """Ensure data is prepared. Auto-calls prepare_data() if needed.""" - if not hasattr(self, "_clean_data"): - logger.info("Data not prepared. Auto-calling prepare_data()...") + has_missing_period = bool( + periods + and hasattr(self, "_clean_data") + and any(f"period_{p}" not in self._clean_data.columns for p in periods) + ) + if not hasattr(self, "_clean_data") or has_missing_period: + logger.info("Data not prepared or missing requested periods. Auto-calling prepare_data()...") self.prepare_data(periods=periods, price_col=price_col) def analyze(self, price_col: str = "close", periods: int | list[int] = 1) -> FactorAnalysisResult: diff --git a/tests/factors/test_analyzer.py b/tests/factors/test_analyzer.py index 3eab3c1..fba2582 100644 --- a/tests/factors/test_analyzer.py +++ b/tests/factors/test_analyzer.py @@ -475,3 +475,17 @@ def test_analyze_empty_periods_list_raises_error(sample_data): with pytest.raises(ValueError, match="Periods list cannot be empty"): analyzer.analyze(periods=[]) + + +def test_ensure_data_prepared_reprepare_when_period_missing(sample_data): + agg = AggBar(sample_data) + factor = agg["my_factor"] + prices = agg["close"] + analyzer = FactorAnalyzer(factor, prices) + + analyzer.prepare_data(periods=[1]) + assert "period_1" in analyzer._clean_data.columns + assert "period_5" not in analyzer._clean_data.columns + + analyzer._ensure_data_prepared(periods=[1, 5]) + assert "period_5" in analyzer._clean_data.columns From f14a06109d6bbcc8fca65a8923e4eb3ee128d17d Mon Sep 17 00:00:00 2001 From: novis10813 Date: Sat, 14 Feb 2026 15:19:43 +0800 Subject: [PATCH 04/10] refactor(backtest): remove redundant mask reapplication --- src/factorium/backtest/vectorized.py | 4 +--- tests/backtest/test_vectorized.py | 35 ++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/src/factorium/backtest/vectorized.py b/src/factorium/backtest/vectorized.py index 743b12a..52a30d7 100644 --- a/src/factorium/backtest/vectorized.py +++ b/src/factorium/backtest/vectorized.py @@ -195,9 +195,7 @@ def _calculate_weights(self, df: pl.DataFrame) -> pl.DataFrame: ) if self._mask is not None: - df = df.with_columns( - pl.when(pl.col(self._mask).fill_null(False)).then(pl.col("weight")).otherwise(0.0).alias("weight") - ).drop("_masked_signal") + df = df.drop("_masked_signal") # Apply constraints for constraint in self.constraints: diff --git a/tests/backtest/test_vectorized.py b/tests/backtest/test_vectorized.py index 1d69066..c8ae6ab 100644 --- a/tests/backtest/test_vectorized.py +++ b/tests/backtest/test_vectorized.py @@ -170,6 +170,41 @@ def test_long_only_weights_sum_to_one(self): if ws > 0: assert abs(ws - 1.0) < 1e-10 + def test_calculate_weights_masked_assets_remain_zero_after_neutralize(self): + timestamps = [1704067200000, 1704070800000, 1704074400000] + rows = [] + for i, ts in enumerate(timestamps): + for symbol, base_price, in_universe in [ + ("A", 100.0, True), + ("B", 80.0, True), + ("C", 60.0, False), + ]: + price = base_price * (1 + 0.01 * i) + rows.append( + { + "start_time": ts, + "end_time": ts + 3600000, + "symbol": symbol, + "open": price, + "high": price, + "low": price, + "close": price, + "volume": 1000.0, + "in_universe": in_universe, + } + ) + + prices = AggBar(pl.DataFrame(rows)) + signal = prices["close"].cs_rank() + bt = VectorizedBacktester(prices=prices, signal=signal, neutralization="market", mask="in_universe") + + combined = bt._prepare_data() + weighted = bt._calculate_weights(combined) + + masked = weighted.filter(~pl.col("in_universe").fill_null(False)) + assert masked["weight"].abs().max() == 0.0 + assert "_masked_signal" not in weighted.columns + class TestMetricsCalculation: """Tests for metrics calculation.""" From 5b5b2d28846519de1633183f39e6586f0e5da259 Mon Sep 17 00:00:00 2001 From: novis10813 Date: Sat, 14 Feb 2026 15:37:25 +0800 Subject: [PATCH 05/10] fix(data): make explicit end_date inclusive by day --- src/factorium/data/utils.py | 7 ++++--- tests/test_data_loader.py | 21 ++++++++++----------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/factorium/data/utils.py b/src/factorium/data/utils.py index 5feb482..147c2b9 100644 --- a/src/factorium/data/utils.py +++ b/src/factorium/data/utils.py @@ -23,7 +23,7 @@ def calculate_date_range( producing duplicate bars with partial OHLCV data. Priority: - 1. If both start_date and end_date are provided: [start, end] + 1. If both start_date and end_date are provided: [start, end + 1 day) 2. If start_date and days are provided: [start, start + days] 3. If neither: [today_midnight - default_days, today_midnight + 1] 4. If only days: [today_midnight - days, today_midnight + 1] @@ -40,9 +40,10 @@ def calculate_date_range( try: if start_date and end_date: start = datetime.strptime(start_date, "%Y-%m-%d") - end = datetime.strptime(end_date, "%Y-%m-%d") - if start > end: + end_inclusive = datetime.strptime(end_date, "%Y-%m-%d") + if start > end_inclusive: raise ValueError("Start date must be earlier than or equal to end date") + end = end_inclusive + timedelta(days=1) return start, end if start_date and days: diff --git a/tests/test_data_loader.py b/tests/test_data_loader.py index 5dc0f8d..e7a28a8 100644 --- a/tests/test_data_loader.py +++ b/tests/test_data_loader.py @@ -8,7 +8,7 @@ import pyarrow as pa import pyarrow.parquet as pq from pathlib import Path -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from unittest.mock import patch, MagicMock, AsyncMock from freezegun import freeze_time @@ -136,7 +136,7 @@ def test_with_start_and_end_date(self): start_dt, end_dt = calculate_date_range(start_date="2024-01-01", end_date="2024-01-07", days=None) assert start_dt == datetime(2024, 1, 1) - assert end_dt == datetime(2024, 1, 7) + assert end_dt == datetime(2024, 1, 8) def test_with_start_date_and_days(self): """Test with start_date and days specified.""" @@ -151,8 +151,8 @@ def test_with_only_days(self): start_dt, end_dt = calculate_date_range(start_date=None, end_date=None, days=7) # end = start of tomorrow (exclusive), start = end - days - assert end_dt == datetime(2024, 6, 16, 0, 0, 0) - assert start_dt == datetime(2024, 6, 9, 0, 0, 0) + assert end_dt == datetime(2024, 6, 16, 0, 0, 0, tzinfo=timezone.utc) + assert start_dt == datetime(2024, 6, 9, 0, 0, 0, tzinfo=timezone.utc) # Both must be midnight-aligned assert start_dt.hour == 0 and start_dt.minute == 0 and start_dt.second == 0 assert end_dt.hour == 0 and end_dt.minute == 0 and end_dt.second == 0 @@ -162,8 +162,8 @@ def test_default_7_days(self): """Test default behavior (no params = 7 days ending tomorrow midnight).""" start_dt, end_dt = calculate_date_range(start_date=None, end_date=None, days=None) - assert end_dt == datetime(2024, 6, 16, 0, 0, 0) - assert start_dt == datetime(2024, 6, 9, 0, 0, 0) + assert end_dt == datetime(2024, 6, 16, 0, 0, 0, tzinfo=timezone.utc) + assert start_dt == datetime(2024, 6, 9, 0, 0, 0, tzinfo=timezone.utc) # Both must be midnight-aligned assert start_dt.hour == 0 and start_dt.minute == 0 and start_dt.second == 0 assert end_dt.hour == 0 and end_dt.minute == 0 and end_dt.second == 0 @@ -174,8 +174,8 @@ def test_midnight_alignment_regardless_of_time(self): start_dt, end_dt = calculate_date_range(start_date=None, end_date=None, days=3) # Should snap to midnight boundaries - assert start_dt == datetime(2024, 6, 13, 0, 0, 0) - assert end_dt == datetime(2024, 6, 16, 0, 0, 0) + assert start_dt == datetime(2024, 6, 13, 0, 0, 0, tzinfo=timezone.utc) + assert end_dt == datetime(2024, 6, 16, 0, 0, 0, tzinfo=timezone.utc) assert start_dt.microsecond == 0 assert end_dt.microsecond == 0 @@ -191,14 +191,14 @@ def test_cross_year_range(self): start_dt, end_dt = calculate_date_range(start_date="2023-12-28", end_date="2024-01-05", days=None) assert start_dt == datetime(2023, 12, 28) - assert end_dt == datetime(2024, 1, 5) + assert end_dt == datetime(2024, 1, 6) def test_single_day_range(self): """Test single day range (start == end).""" start_dt, end_dt = calculate_date_range(start_date="2024-01-01", end_date="2024-01-01", days=None) assert start_dt == datetime(2024, 1, 1) - assert end_dt == datetime(2024, 1, 1) + assert end_dt == datetime(2024, 1, 2) def test_start_date_with_one_day(self): """Test start_date with days=1.""" @@ -208,7 +208,6 @@ def test_start_date_with_one_day(self): assert end_dt == datetime(2024, 1, 2) - # ============================================================================= # TestBuildDateFilter - 日期過濾條件測試 # ============================================================================= From df90c680a1c5e455600742927f2ff84a7ca8384f Mon Sep 17 00:00:00 2001 From: novis10813 Date: Sat, 14 Feb 2026 15:37:25 +0800 Subject: [PATCH 06/10] fix(universe): exclude missing listing dates in MinListingAge --- src/factorium/universe/rules.py | 4 ++-- tests/universe/test_universe_rules.py | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/factorium/universe/rules.py b/src/factorium/universe/rules.py index 440a172..cfcdac2 100644 --- a/src/factorium/universe/rules.py +++ b/src/factorium/universe/rules.py @@ -116,7 +116,7 @@ def apply( listing_map[sym] = int(listing_date) if not listing_map: - return pl.lit(True) + return pl.lit(False) listing_expr = pl.col("symbol").replace_strict(listing_map, default=None).cast(pl.Int64, strict=False) - return ((pl.col("start_time") - listing_expr) >= self._min_ms) | listing_expr.is_null() + return ((pl.col("start_time") - listing_expr) >= self._min_ms).fill_null(False) diff --git a/tests/universe/test_universe_rules.py b/tests/universe/test_universe_rules.py index 110f79f..bf60098 100644 --- a/tests/universe/test_universe_rules.py +++ b/tests/universe/test_universe_rules.py @@ -91,3 +91,17 @@ def test_universe_combines_rules_with_and_logic() -> None: out = _sample_df().lazy().with_columns(universe.apply(_sample_df().lazy(), metadata).alias("in_universe")).collect() kept_symbols = set(out.filter(pl.col("in_universe"))["symbol"].to_list()) assert kept_symbols == {"BTCUSDT", "NEWUSDT"} + + +def test_min_listing_age_excludes_symbol_when_listing_date_missing() -> None: + metadata = _sample_metadata() + metadata["NEWUSDT"].pop("listing_date") + + out = ( + _sample_df() + .lazy() + .with_columns(MinListingAge(days=90).apply(_sample_df().lazy(), metadata).alias("keep")) + .collect() + ) + new_rows = out.filter(pl.col("symbol") == "NEWUSDT").sort("start_time") + assert new_rows["keep"].to_list() == [False, False] From e37420b963c286f36f6423f07e456bdabe9e6a14 Mon Sep 17 00:00:00 2001 From: novis10813 Date: Sat, 14 Feb 2026 15:37:25 +0800 Subject: [PATCH 07/10] fix(universe): require symbols and handle CoinGecko symbol collisions --- src/factorium/universe/tags.py | 26 ++++++++--------- tests/universe/test_tags.py | 52 +++++++++++++++++++--------------- 2 files changed, 42 insertions(+), 36 deletions(-) diff --git a/src/factorium/universe/tags.py b/src/factorium/universe/tags.py index 67c6498..b00d9f8 100644 --- a/src/factorium/universe/tags.py +++ b/src/factorium/universe/tags.py @@ -17,8 +17,8 @@ class TagProvider: """Fetch and cache token categories from CoinGecko. Note: - Calling ``fetch``/``fetch_async`` with ``symbols=None`` performs - full-market category fetching and can be slow for large universes. + ``symbols`` must be explicitly provided to avoid full-market + category fetching from CoinGecko, which can be very slow. """ def __init__( @@ -44,18 +44,16 @@ async def _request_json( return await response.json() async def fetch_async(self, symbols: list[str] | None = None) -> dict[str, list[str]]: - requested = [s.upper() for s in symbols] if symbols is not None else None + if symbols is None: + raise ValueError("symbols must be provided to avoid fetching the entire CoinGecko database") + + requested = [s.upper() for s in symbols] cached = self._load_cache() if cached is not None: - if requested is None: - return cached if all(sym in cached for sym in requested): return {sym: cached[sym] for sym in requested} - if requested is None: - logger.warning("Fetching tags for all symbols may take a long time; pass symbols to limit scope.") - headers: dict[str, str] | None = None if self.api_key: headers = {"x-cg-pro-api-key": self.api_key} @@ -66,10 +64,14 @@ async def fetch_async(self, symbols: list[str] | None = None) -> dict[str, list[ for item in raw_list if isinstance(raw_list, list) else []: symbol = str(item.get("symbol", "")).upper() coin_id = item.get("id") - if symbol and coin_id and symbol not in symbol_to_id: - symbol_to_id[symbol] = str(coin_id) + if not symbol or not coin_id: + continue + + coin_id_str = str(coin_id) + if symbol not in symbol_to_id or coin_id_str == symbol.lower(): + symbol_to_id[symbol] = coin_id_str - targets = requested or sorted(symbol_to_id.keys()) + targets = requested result: dict[str, list[str]] = {} if cached is None else dict(cached) for symbol in targets: @@ -86,8 +88,6 @@ async def fetch_async(self, symbols: list[str] | None = None) -> dict[str, list[ await asyncio.sleep(0.12) self._save_cache(result) - if requested is None: - return result return {sym: result.get(sym, []) for sym in requested if sym in result} def fetch(self, symbols: list[str] | None = None) -> dict[str, list[str]]: diff --git a/tests/universe/test_tags.py b/tests/universe/test_tags.py index 309e477..20fd684 100644 --- a/tests/universe/test_tags.py +++ b/tests/universe/test_tags.py @@ -34,6 +34,32 @@ async def no_sleep(seconds: float) -> None: assert out["ETH"] == ["Layer 1", "Smart Contract Platform"] +def test_fetch_prefers_canonical_id_on_symbol_collision(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + provider = TagProvider(cache_dir=tmp_path, cache_ttl=0) + + async def fake_request_json(session, url, params=None, headers=None): + del session, params, headers + if url.endswith("/coins/list"): + return [ + {"id": "btc-token", "symbol": "btc", "name": "Some BTC Token"}, + {"id": "btc", "symbol": "btc", "name": "Canonical BTC"}, + ] + if url.endswith("/coins/btc"): + return {"categories": ["Store Of Value"]} + if url.endswith("/coins/btc-token"): + raise AssertionError("should use canonical id when collision exists") + raise AssertionError(f"unexpected url: {url}") + + async def no_sleep(seconds: float) -> None: + del seconds + + monkeypatch.setattr(provider, "_request_json", fake_request_json) + monkeypatch.setattr("factorium.universe.tags.asyncio.sleep", no_sleep) + + out = provider.fetch(symbols=["BTC"]) + assert out["BTC"] == ["Store Of Value"] + + def test_cache_load_save_and_ttl(tmp_path: Path) -> None: provider = TagProvider(cache_dir=tmp_path, cache_ttl=10) sample = {"BTC": ["Layer 1"], "ETH": ["Layer 1", "Smart Contract Platform"]} @@ -86,27 +112,7 @@ def fake_run_async(coro): assert called["value"] is True -def test_fetch_warns_when_symbols_is_none( - monkeypatch: pytest.MonkeyPatch, tmp_path: Path, caplog: pytest.LogCaptureFixture -) -> None: +def test_fetch_raises_when_symbols_is_none(tmp_path: Path) -> None: provider = TagProvider(cache_dir=tmp_path, cache_ttl=0) - - async def fake_request_json(session, url, params=None, headers=None): - del session, params, headers - if url.endswith("/coins/list"): - return [{"id": "bitcoin", "symbol": "btc", "name": "Bitcoin"}] - if url.endswith("/coins/bitcoin"): - return {"categories": ["Layer 1"]} - raise AssertionError(f"unexpected url: {url}") - - async def no_sleep(seconds: float) -> None: - del seconds - - monkeypatch.setattr(provider, "_request_json", fake_request_json) - monkeypatch.setattr("factorium.universe.tags.asyncio.sleep", no_sleep) - - caplog.set_level("WARNING") - out = provider.fetch(symbols=None) - - assert "BTC" in out - assert "may take a long time" in caplog.text + with pytest.raises(ValueError, match="symbols must be provided"): + provider.fetch(symbols=None) From a2555416772ee89d645259626e2ee273c71b8dc1 Mon Sep 17 00:00:00 2001 From: novis10813 Date: Sat, 14 Feb 2026 15:37:25 +0800 Subject: [PATCH 08/10] test(factors): normalize minute literal style in safe ops helper --- tests/factors/test_safe_operations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/factors/test_safe_operations.py b/tests/factors/test_safe_operations.py index c7cc9c3..3b78ad7 100644 --- a/tests/factors/test_safe_operations.py +++ b/tests/factors/test_safe_operations.py @@ -46,8 +46,8 @@ def _make_factor( for s_idx, sym in enumerate(symbols): rows.append( { - "start_time": t * 60000, - "end_time": (t + 1) * 60000, + "start_time": t * 60_000, + "end_time": (t + 1) * 60_000, "symbol": sym, "factor": values[t * n_symbols + s_idx], } From d759e39e4de6e906992841bb6ef88e8fdfb6bfde Mon Sep 17 00:00:00 2001 From: novis10813 Date: Mon, 16 Feb 2026 18:42:16 +0800 Subject: [PATCH 09/10] fix: put coingecko url into constants.py --- src/factorium/constants.py | 4 ++++ src/factorium/universe/tags.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/factorium/constants.py b/src/factorium/constants.py index 42e9115..b7120ec 100644 --- a/src/factorium/constants.py +++ b/src/factorium/constants.py @@ -15,9 +15,13 @@ MIN_PERIODS_PER_YEAR = 1.0 MAX_PERIODS_PER_YEAR = 365.25 * 24 * 60 # Minutes in a year +# External API URLs +COINGECKO_BASE_URL = "https://api.coingecko.com/api/v3" + __all__ = [ "EPSILON", "SECONDS_PER_YEAR", "MIN_PERIODS_PER_YEAR", "MAX_PERIODS_PER_YEAR", + "COINGECKO_BASE_URL", ] diff --git a/src/factorium/universe/tags.py b/src/factorium/universe/tags.py index b00d9f8..2edba28 100644 --- a/src/factorium/universe/tags.py +++ b/src/factorium/universe/tags.py @@ -8,8 +8,8 @@ import aiohttp +from ..constants import COINGECKO_BASE_URL -COINGECKO_BASE_URL = "https://api.coingecko.com/api/v3" logger = logging.getLogger(__name__) From 28a2244816ae2fe252c2d7aae544f67d7799a791 Mon Sep 17 00:00:00 2001 From: novis10813 Date: Mon, 16 Feb 2026 20:46:59 +0800 Subject: [PATCH 10/10] fix(test): correct mock target and missing imports in universe tests - Move _run_async import to module level in metadata.py and tags.py - Patch local module reference in tests to correctly mock _run_async - Fix NameError in test_metadata.py due to missing import --- src/factorium/universe/metadata.py | 3 +-- src/factorium/universe/tags.py | 4 ++-- tests/universe/test_metadata.py | 4 ++-- tests/universe/test_tags.py | 3 ++- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/factorium/universe/metadata.py b/src/factorium/universe/metadata.py index 01e6b6a..df37ca2 100644 --- a/src/factorium/universe/metadata.py +++ b/src/factorium/universe/metadata.py @@ -6,6 +6,7 @@ import aiohttp +from ..data.loader import _run_async from .rules import KNOWN_STABLECOINS, LEVERAGED_PATTERNS, SymbolMetadata @@ -46,8 +47,6 @@ async def fetch_async(self) -> dict[str, SymbolMetadata]: return parsed def fetch(self) -> dict[str, SymbolMetadata]: - from ..data.loader import _run_async - return _run_async(self.fetch_async()) def _parse_exchange_info(self, data: dict) -> dict[str, SymbolMetadata]: diff --git a/src/factorium/universe/tags.py b/src/factorium/universe/tags.py index 2edba28..2992677 100644 --- a/src/factorium/universe/tags.py +++ b/src/factorium/universe/tags.py @@ -9,6 +9,8 @@ import aiohttp from ..constants import COINGECKO_BASE_URL +from ..data.loader import _run_async + logger = logging.getLogger(__name__) @@ -91,8 +93,6 @@ async def fetch_async(self, symbols: list[str] | None = None) -> dict[str, list[ return {sym: result.get(sym, []) for sym in requested if sym in result} def fetch(self, symbols: list[str] | None = None) -> dict[str, list[str]]: - from ..data.loader import _run_async - return _run_async(self.fetch_async(symbols=symbols)) def _load_cache(self) -> dict[str, list[str]] | None: diff --git a/tests/universe/test_metadata.py b/tests/universe/test_metadata.py index 51f0e48..375b710 100644 --- a/tests/universe/test_metadata.py +++ b/tests/universe/test_metadata.py @@ -3,7 +3,7 @@ import pytest -import factorium.data.loader as data_loader +from factorium.universe import metadata as metadata_module from factorium.universe.metadata import MetadataProvider @@ -118,7 +118,7 @@ def fake_run_async(coro): return expected monkeypatch.setattr(provider, "fetch_async", fake_fetch_async) - monkeypatch.setattr(data_loader, "_run_async", fake_run_async) + monkeypatch.setattr(metadata_module, "_run_async", fake_run_async) out = provider.fetch() assert out == expected diff --git a/tests/universe/test_tags.py b/tests/universe/test_tags.py index 20fd684..8122f00 100644 --- a/tests/universe/test_tags.py +++ b/tests/universe/test_tags.py @@ -4,6 +4,7 @@ import pytest import factorium.data.loader as data_loader +from factorium.universe import tags as tags_module from factorium.universe.tags import TagProvider @@ -105,7 +106,7 @@ def fake_run_async(coro): return expected monkeypatch.setattr(provider, "fetch_async", fake_fetch_async) - monkeypatch.setattr(data_loader, "_run_async", fake_run_async) + monkeypatch.setattr(tags_module, "_run_async", fake_run_async) out = provider.fetch(symbols=["BTC"]) assert out == expected