diff --git a/.commitlintrc.js b/.commitlintrc.js new file mode 100755 index 00000000..0a4c1ac2 --- /dev/null +++ b/.commitlintrc.js @@ -0,0 +1,21 @@ +module.exports = { + extends: ["@commitlint/config-conventional"], + rules: { + // Configuration Format: [level, applicability, value] + // level: Error level, usually expressed as a number: + // 0 - disable rule + // 1 - Warning (does not prevent commits) + // 2 - Error (will block the commit) + // applicability: the conditions under which the rule applies, commonly used values: + // “always” - always apply the rule + // “never” - never apply the rule + // value: the specific value of the rule, e.g. a maximum length of 100. + // Refs: https://commitlint.js.org/reference/rules-configuration.html + "header-max-length": [2, "always", 100], + "type-enum": [ + 2, + "always", + ["build", "chore", "ci", "docs", "feat", "fix", "perf", "refactor", "revert", "style", "test", "Release-As"] + ] + } + }; diff --git a/.env.example b/.env.example old mode 100644 new mode 100755 index 3bfc722a..08c44d75 --- a/.env.example +++ b/.env.example @@ -1,9 +1,27 @@ -# 复制为 .env 并填写(勿提交 .env) +""" +This file is a template for the .env file. -# 拉 Panel 必填 -TUSHARE_TOKEN= +Please copy this file to .env and fill in the values. -# 因子挖掘(uv sync --extra mining) -OPENAI_API_KEY= -OPENAI_API_BASE= -MODEL= +For more information about configuration options, please refer to the documentation + +""" + +# Global configs: +USE_AZURE=False +CHAT_USE_AZURE_TOKEN_PROVIDER=False +EMBEDDING_USE_AZURE_TOKEN_PROVIDER=False +MAX_RETRY=5 +RETRY_WAIT_SECONDS=5 +FACTOR_MINING_TIMEOUT=10800 # 最大运行时间 +USE_LOCAL=True + +# LLM API Setting: +OPENAI_BASE_URL= +OPENAI_API_KEY= +CHAT_MODEL= # e.g., "deepseek-v3" +REASONING_MODEL= # e.g. "deepseek-reasoner" +EMBEDDING_MODEL=text-embedding-3-small # RAG + +CHAT_MAX_TOKENS=4000 +CHAT_TEMPERATURE=0.7 \ No newline at end of file diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100755 index 00000000..f30443ba --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,2 @@ +github: + - MIIC-finance diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md new file mode 100755 index 00000000..bde7b40e --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-report.md @@ -0,0 +1,51 @@ +--- +name: "\U0001F41B Bug Report" +about: Submit a bug report to help us improve RD-Agent +labels: bug + +--- + +## 🐛 Bug Description + + + +## To Reproduce + +Steps to reproduce the behavior: + +1. +2. +3. + + +## Expected Behavior + + + +## Screenshot + + + +## Environment + +**Note**: Users can run `rdagent collect_info` to get system information and paste it directly here. + + - Name of current operating system: + - Processor architecture: + - System, version, and hardware information: + - Version number of the system: + - Python version: + - Container ID: + - Container Name: + - Container Status: + - Image ID used by the container: + - Image tag used by the container: + - Container port mapping: + - Container Label: + - Startup Commands: + - RD-Agent version: + - Package version: + +## Additional Notes + + diff --git a/.github/ISSUE_TEMPLATE/documentation.md b/.github/ISSUE_TEMPLATE/documentation.md new file mode 100755 index 00000000..96c70e5a --- /dev/null +++ b/.github/ISSUE_TEMPLATE/documentation.md @@ -0,0 +1,9 @@ +--- +name: "\U0001F4D6 Documentation" +about: Report an issue related to documentation + +--- + +## 📖 Documentation + + diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md new file mode 100755 index 00000000..ef239738 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request.md @@ -0,0 +1,25 @@ +--- +name: "\U0001F31FFeature Request" +about: Request for a new RD-Agent feature +labels: enhancement + +--- + +## 🌟 Feature Description + + +## Motivation + +1. Application scenario +2. Related works (Papers, Github repos etc.): +3. Any other relevant and important information: + + + +## Alternatives + + + +## Additional Notes + + diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/question.md new file mode 100755 index 00000000..4b92da90 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/question.md @@ -0,0 +1,10 @@ +--- +name: "❓Questions & Help" +about: Have some questions? We can offer help. +labels: question + +--- + +## ❓ Questions and Help + +We sincerely suggest you to carefully read the [documentation](http://rdagent.readthedocs.io/). After that, if you still feel puzzled, please describe the question clearly under this issue. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100755 index 00000000..cae5ec0d --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,34 @@ + + + + + + + + + + + + + +## Description + + +## Motivation and Context + + + +## How Has This Been Tested? + +- [ ] If you are adding a new feature, test on your own test scripts. + + + +## Screenshots of Test Results (if appropriate): +1. Your own tests: + +## Types of changes + +- [ ] Fix bugs +- [ ] Add new feature +- [ ] Update documentation diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100755 index 00000000..cd494ea0 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,19 @@ +updates: + - commit-message: + prefix: build(actions) + directory: / + package-ecosystem: github-actions + schedule: + interval: weekly + - commit-message: + prefix: build(requirements) + directory: / + groups: + dev: + dependency-type: development + prod: + dependency-type: production + package-ecosystem: pip + schedule: + interval: weekly +version: 2 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100755 index 00000000..21d2f98c --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,70 @@ +# concurrency: +# cancel-in-progress: true +# group: ${{ github.workflow }}-${{ github.ref }} +# jobs: +# ci: +# if: ${{ !cancelled() && ! failure() }} +# needs: dependabot +# runs-on: ubuntu-latest +# steps: +# - name: checkout +# uses: actions/checkout@v4 +# with: +# fetch-depth: 0 +# submodules: recursive +# - name: Set up Python ${{ matrix.python-version }} +# uses: actions/setup-python@v5 +# with: +# cache: pip +# python-version: ${{ matrix.python-version }} +# - run: env | sort +# - run: make dev +# - name: lint test docs and build +# run: make lint docs-gen test-offline # test docs build +# strategy: +# matrix: +# python-version: +# - '3.10' +# - '3.11' +# dependabot: +# if: ${{ github.actor == 'dependabot[bot]' && startsWith(github.head_ref, 'dependabot/pip/') }} +# permissions: +# contents: write +# runs-on: ubuntu-latest +# steps: +# - uses: actions/checkout@v4 +# with: +# fetch-depth: 0 +# ref: ${{ github.head_ref }} +# - name: Set up Git +# run: | +# git config --global user.name github-actions +# git config --global user.email github-actions@github.com +# - name: Set up Python with multiple versions. +# uses: actions/setup-python@v5 +# with: +# cache: pip +# python-version: | +# 3.10 +# 3.11 +# - name: Install pipenv using pipx +# run: pipx install pipenv +# - name: Generate constraints for all supported Python versions +# run: | +# CI= PYTHON_VERSION=3.10 make constraints +# CI= PYTHON_VERSION=3.11 make constraints +# - name: Push changes if applicable +# run: | +# if [[ -n `git status --porcelain` ]]; then +# git commit -a -m "build: Update constraints for dependabot." +# git push +# fi +# name: CI +# on: +# pull_request: +# types: +# - opened +# - synchronize +# push: +# branches: +# - main diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml new file mode 100755 index 00000000..c759de42 --- /dev/null +++ b/.github/workflows/pr.yml @@ -0,0 +1,35 @@ +# name: Lint pull request title + +# on: +# pull_request: +# types: +# - opened +# - synchronize +# - reopened +# - edited + +# concurrency: +# cancel-in-progress: true +# group: ${{ github.workflow }}-${{ github.ref }} + +# jobs: +# lint-title: +# runs-on: ubuntu-latest +# steps: +# # This step is necessary because the lint title uses the .commitlintrc.js file in the project root directory. +# - name: Checkout Repository +# uses: actions/checkout@v4 + +# - name: Setup Node.js +# uses: actions/setup-node@v4 +# with: +# node-version: '16' + +# - name: Install commitlint +# run: npm install --save-dev @commitlint/{config-conventional,cli} + +# - name: Validate PR Title with commitlint +# env: +# BODY: ${{ github.event.pull_request.title }} +# run: | +# echo "$BODY" | npx commitlint --config .commitlintrc.js diff --git a/.github/workflows/readthedocs-preview.yml b/.github/workflows/readthedocs-preview.yml new file mode 100755 index 00000000..e3999a3c --- /dev/null +++ b/.github/workflows/readthedocs-preview.yml @@ -0,0 +1,17 @@ +concurrency: + cancel-in-progress: true + group: ${{ github.workflow }}-${{ github.ref }} +jobs: + documentation-links: + runs-on: ubuntu-latest + steps: + - uses: readthedocs/actions/preview@v1 + with: + project-slug: RDAgent +name: Read the Docs Pull Request Preview +on: + pull_request_target: + types: + - opened +permissions: + pull-requests: write diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100755 index 00000000..5f738b74 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,50 @@ +# name: Release +# on: +# push: +# branches: +# - main +# permissions: +# contents: read +# jobs: +# release_and_publish: +# permissions: +# contents: write +# pull-requests: read +# runs-on: ubuntu-latest +# steps: +# - name: Release please +# id: release_please +# uses: googleapis/release-please-action@v4 +# with: +# # The current PAT (personal access token) was created on 2024-08-05, +# # since the maximum validity of PAT is 1 year, you need to change the PAT before 2025-08-05. +# token: ${{ secrets.PAT }} +# release-type: simple +# - uses: actions/checkout@v4 +# if: ${{ steps.release_please.outputs.release_created }} +# with: +# fetch-depth: 0 +# - name: Set up Python +# if: ${{ steps.release_please.outputs.release_created }} +# uses: actions/setup-python@v5 +# with: +# cache: pip +# python-version: '3.10' +# - name: Install dependencies +# if: ${{ steps.release_please.outputs.release_created }} +# run: | +# python -m pip install --upgrade pip +# pip install setuptools wheel twine # better-exceptions(optional for debug) +# - run: env | sort +# if: ${{ steps.release_please.outputs.release_created }} +# - run: make dev +# if: ${{ steps.release_please.outputs.release_created }} +# - run: make build +# if: ${{ steps.release_please.outputs.release_created }} +# - name: upload +# if: ${{ steps.release_please.outputs.release_created }} +# env: +# TWINE_USERNAME: __token__ +# TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} +# run: | +# make upload diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 index ca169699..b5970aa9 --- a/.gitignore +++ b/.gitignore @@ -1,36 +1,192 @@ -# Python / uv -.venv/ +# Custom +*.swp +.DS_Store +Pipfile +public +release-notes.md + +# Byte-compiled / optimized / DLL files __pycache__/ +*/__pycache__/* *.py[cod] -.pytest_cache/ +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ *.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +# *.log +# /log/ +log/ +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version -# 环境 +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments .env +.venv +^env/ +venv/ +ENV/ +env.bak/ +venv.bak/ -# 参考项目 / Cursor skills(本地保留,不入库) -ref_projects/ -skills/ +# Spyder project settings +.spyderproject +.spyproject -# 本地大文件(可重建) -artifacts/market/ -artifacts/panel/ -artifacts/fundamental/ -artifacts/industry/ -artifacts/index/ -logs/ +# Rope project settings +.ropeproject -# 数据发布打包产物 -dist/ +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# all pkl files +*.pkl + +# all h5 files +*.h5 + +# all vs-code files +.vscode/ + +# reports +reports/ + +# git_ignore_folder +git_ignore_folder/ +factor_zoo/* +saved_mlruns/ +#cache +*cache*/ +*cache.json + +# DB files +*.db + +# Docker +factor_template/mlruns/ +env_tpl +mlruns/ + +# possible output from coder or runner +*.pth +*qlib_res.csv +*.ipynb + +# shell script +*.out +*.sh + +*_debug* +*.ipynb +注意事项.md +# alphaagent/scenarios/qlib/experiment/factor_template/ +# alphaagent/scenarios/qlib/experiment/factor_data_template/ +alphaagent/app/qlib_rd_loop/factor_rdagent.py -# factorzoo:同步 expressions/*.dsl + 挖掘元数据;memmap / parquet 不入库 -artifacts/factorzoo/**/* -!artifacts/factorzoo/stock_1d/ -artifacts/factorzoo/stock_1d/**/* -!artifacts/factorzoo/stock_1d/expressions/ -!artifacts/factorzoo/stock_1d/expressions/*.dsl -!artifacts/factorzoo/stock_1d/mining_delivered_registry.json -!artifacts/factorzoo/stock_1d/mls_fmb_percentiles.json - -# 旧路径(已迁到 factorzoo/expressions) -examples/factors/mined/ +alphaagent/scenarios/qlib/experiment/factor_template/* +alphaagent/app/data_mining/ +alphaagent/app/general_model/ +alphaagent/app/kaggle/ +alphaagent/scenarios/data_mining/ +alphaagent/scenarios/general_model/ +alphaagent/scenarios/kaggle/ +backend/ \ No newline at end of file diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100755 index 00000000..d98703ab --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,26 @@ +# .readthedocs.yml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the version of Python and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.10" + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/conf.py + +# Build all formats +formats: all + +# Optionally set the version of Python and requirements required to build your docs +python: + install: + - requirements: requirements/docs.txt + - method: pip + path: . diff --git a/.streamlit/config.toml b/.streamlit/config.toml new file mode 100755 index 00000000..74812cd0 --- /dev/null +++ b/.streamlit/config.toml @@ -0,0 +1,2 @@ +[client] +showSidebarNavigation = false \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100755 index 00000000..9e841e7a --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ + MIT License + + Copyright (c) Microsoft Corporation. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE diff --git a/Makefile b/Makefile new file mode 100755 index 00000000..a1dec920 --- /dev/null +++ b/Makefile @@ -0,0 +1,215 @@ +.PHONY: clean deepclean install init-qlib-env dev constraints black isort mypy ruff toml-sort lint pre-commit test-run test build upload docs-autobuild changelog docs-gen docs-mypy docs-coverage docs +#You can modify it according to your terminal +SHELL := /bin/bash + +######################################################################################## +# Variables +######################################################################################## + +# Determine whether to invoke pipenv based on CI environment variable and the availability of pipenv. +PIPRUN := $(shell [ "$$CI" != "true" ] && command -v pipenv > /dev/null 2>&1 && echo "pipenv run") + +# Get the Python version in `major.minor` format, using the environment variable or the virtual environment if exists. +PYTHON_VERSION := $(shell echo $${PYTHON_VERSION:-$$(python -V 2>&1 | cut -d ' ' -f 2)} | cut -d '.' -f 1,2) + +# Determine the constraints file based on the Python version. +CONSTRAINTS_FILE := constraints/$(PYTHON_VERSION).txt + +# Documentation target directory, will be adapted to specific folder for readthedocs. +PUBLIC_DIR := $(shell [ "$$READTHEDOCS" = "True" ] && echo "$$READTHEDOCS_OUTPUT/html" || echo "public") + +# URL and Path of changelog source code. +CHANGELOG_URL := $(shell echo $${CI_PAGES_URL:-https://microsoft.github.io/rdagent}/_sources/changelog.md.txt) +CHANGELOG_PATH := docs/changelog.md + +######################################################################################## +# Development Environment Management +######################################################################################## + +# Remove common intermediate files. +clean: + -rm -rf \ + $(PUBLIC_DIR) \ + .coverage \ + .mypy_cache \ + .pytest_cache \ + .ruff_cache \ + Pipfile* \ + coverage.xml \ + dist \ + release-notes.md + find . -name '*.egg-info' -print0 | xargs -0 rm -rf + find . -name '*.pyc' -print0 | xargs -0 rm -f + find . -name '*.swp' -print0 | xargs -0 rm -f + find . -name '.DS_Store' -print0 | xargs -0 rm -f + find . -name '__pycache__' -print0 | xargs -0 rm -rf + +# Remove pre-commit hook, virtual environment alongside itermediate files. +deepclean: clean + if command -v pre-commit > /dev/null 2>&1; then pre-commit uninstall --hook-type pre-push; fi + if command -v pipenv >/dev/null 2>&1 && pipenv --venv >/dev/null 2>&1; then pipenv --rm; fi + +# Install the package in editable mode. +install: + $(PIPRUN) pip install -e . -c $(CONSTRAINTS_FILE) + +# Install the package in editable mode with specific optional dependencies. +dev-%: + $(PIPRUN) pip install -e .[$*] -c $(CONSTRAINTS_FILE) + +# Prepare the development environment. +# Build submodules. +# Install the pacakge in editable mode with all optional dependencies and pre-commit hook. +init-qlib-env: + # note: You may need to install torch manually + # todo: downgrade ruamel.yaml in pyqlib + conda create -n qlibRDAgent python=3.8 -y + @source $$(conda info --base)/etc/profile.d/conda.sh && conda activate qlibRDAgent && which pip && pip install pyqlib && pip install ruamel-yaml==0.17.21 && pip install torch==2.1.1 && pip install catboost==0.24.3 && conda deactivate + +dev: + $(PIPRUN) pip install -e .[docs,lint,package,test] -c $(CONSTRAINTS_FILE) + if [ "$(CI)" != "true" ] && command -v pre-commit > /dev/null 2>&1; then pre-commit install --hook-type pre-push; fi + +# Generate constraints for current Python version. +constraints: deepclean + $(PIPRUN) --python $(PYTHON_VERSION) pip install --upgrade -e .[docs,lint,package,test] + $(PIPRUN) pip freeze --exclude-editable > $(CONSTRAINTS_FILE) + +######################################################################################## +# Lint and pre-commit +######################################################################################## + +# Check lint with black. +black: + $(PIPRUN) python -m black --check --diff . --extend-exclude test/scripts --extend-exclude git_ignore_folder -l 120 + +# Check lint with isort. +isort: + $(PIPRUN) python -m isort --check . -s git_ignore_folder -s test/scripts + +# Check lint with mypy. +# First deal with the core folder, and then gradually increase the scope of detection, +# and eventually realize the detection of the complete project. +mypy: + $(PIPRUN) python -m mypy rdagent/core # --exclude rdagent/scripts,git_ignore_folder + +# Check lint with ruff. +# First deal with the core folder, and then gradually increase the scope of detection, +# and eventually realize the detection of the complete project. +ruff: + $(PIPRUN) ruff check rdagent/core --ignore FBT001,FBT002,I001 # --exclude rdagent/scripts,git_ignore_folder + +# Check lint with toml-sort. +toml-sort: + $(PIPRUN) toml-sort --check pyproject.toml + +# Check lint with all linters. +# Prioritize fixing isort, then black, otherwise you'll get weird and unfixable black errors. +# lint: mypy ruff +lint: mypy ruff isort black toml-sort + +# Run pre-commit with autofix against all files. +pre-commit: + pre-commit run --all-files + +######################################################################################## +# Auto Lint +######################################################################################## + +# Auto lint with black. +auto-black: + $(PIPRUN) python -m black . --extend-exclude test/scripts --extend-exclude git_ignore_folder -l 120 + +# Auto lint with isort. +auto-isort: + $(PIPRUN) python -m isort . -s git_ignore_folder -s test/scripts + +# Auto lint with toml-sort. +auto-toml-sort: + $(PIPRUN) toml-sort pyproject.toml + +# Auto lint with all linters. +auto-lint: auto-isort auto-black auto-toml-sort + +######################################################################################## +# Test +######################################################################################## + +# Clean and run test with coverage. +test-run: + $(PIPRUN) python -m coverage erase + $(PIPRUN) python -m coverage run --concurrency=multiprocessing -m pytest --ignore test/scripts + $(PIPRUN) python -m coverage combine + +test-run-offline: + # some test that does not require api calling + $(PIPRUN) python -m coverage erase + $(PIPRUN) python -m coverage run --concurrency=multiprocessing -m pytest -m "offline" --ignore test/scripts + $(PIPRUN) python -m coverage combine + +# Generate coverage report for terminal and xml. +# TODO: we may have higher coverage rate if we have more test +test: test-run + $(PIPRUN) python -m coverage report --fail-under 20 # 80 + $(PIPRUN) python -m coverage xml --fail-under 20 # 80 + +test-offline: test-run-offline + $(PIPRUN) python -m coverage report --fail-under 20 # 80 + $(PIPRUN) python -m coverage xml --fail-under 20 # 80 + +######################################################################################## +# Package +######################################################################################## + +# Build the package. +build: + $(PIPRUN) python -m build + +# Upload the package. +upload: + $(PIPRUN) python -m twine upload dist/* + +######################################################################################## +# Documentation +######################################################################################## + +# Generate documentation with auto build when changes happen. +docs-autobuild: + $(PIPRUN) python -m sphinx_autobuild docs $(PUBLIC_DIR) \ + --watch README.md \ + --watch rdagent + +# Generate changelog from git commits. +# Usage: make changelog VERSION=0.1.0 +changelog: + @if wget -q --spider $(CHANGELOG_URL); then \ + echo "Existing Changelog found at '$(CHANGELOG_URL)', download for incremental generation."; \ + wget -q -O $(CHANGELOG_PATH) $(CHANGELOG_URL); \ + fi + $(PIPRUN) VERSION=$${VERSION:-$$(git tag --sort=-creatordate | head -n 1)}; \ + git-changelog --bump $$VERSION -Tio docs/changelog.md + +# Generate release notes from changelog. +release-notes: + @$(PIPRUN) git-changelog --input $(CHANGELOG_PATH) --release-notes + +# Build documentation only from alphaagent. +docs-gen: + $(PIPRUN) python -m sphinx.cmd.build -W docs $(PUBLIC_DIR) + +# Generate mypy reports. +docs-mypy: docs-gen + $(PIPRUN) python -m mypy rdagent test --exclude git_ignore_folder --exclude rdagent/scripts --html-report $(PUBLIC_DIR)/reports/mypy + +# Generate html coverage reports with badge. +docs-coverage: test-run docs-gen + $(PIPRUN) python -m coverage html -d $(PUBLIC_DIR)/reports/coverage --fail-under 80 + $(PIPRUN) bash scripts/generate-coverage-badge.sh $(PUBLIC_DIR)/_static/badges + +# Generate all documentation with reports. +docs: changelog docs-gen docs-mypy docs-coverage + + +######################################################################################## +# End +######################################################################################## diff --git a/README.md b/README.md old mode 100644 new mode 100755 index c641e906..7bab08ff --- a/README.md +++ b/README.md @@ -1,114 +1,149 @@ -# AlphaAgent +

+ RA-Agent logo + + +

-A multi-factor equity research framework for China A-shares: build a daily panel from Tushare (or pre-packaged parquet caches), express factors in DSL (domain-specific language), evaluate them in **FactorZoo**, and optionally mine new factors with LLMs. +Official source code of KDD 2025 paper: [AlphaAgent: LLM-Driven Alpha Mining with Regularized Exploration to Counteract Alpha Decay](https://arxiv.org/abs/2502.16789) -> **AlphaAgent** 是一套面向 A 股的多因子研究框架:从 Tushare(或开源数据包)构建日频 panel,用 DSL 表达因子,在 FactorZoo 中评估,并可选 LLM 辅助挖掘。 -## What it does -| Layer | Description | -|-------|-------------| -| **Data** | Two-stage pipeline — fetch raw caches (market / fundamentals / industry) online, then build or update the panel **offline** | -| **Factors** | DSL expressions → memmap factor library; IC / turnover / quantile reports via `eval_factor.py` | -| **Mining** | Optional AgentScope agents propose and iterate on factor expressions (`factor_mining_agentscope.py`) | +# 📖Introduction +
+ Our focused scenario +
-Universe in the reference dataset: **CSI 1000 (ZZ1000)** constituent union, 2015-01 ~ 2026-06, ~2,757 stocks, ~6.2M panel rows. -Panel and large binaries are **not in Git**. Clone the repo, then either pull data with a Tushare token or restore the [open data package](docs/data_release.md) and rebuild offline. -## Quick start + +**AlphaAgent** is an autonomous framework that effectively integrates LLM agents for mining interpretable and decay-resistant alpha factors through three specialized agents. -```powershell -uv sync -copy .env.example .env # set TUSHARE_TOKEN (only needed for Option B below) +- **Idea Agent**: Proposes market hypotheses to guide factor creation based on financial theories or emerging trends. +- **Factor Agent**: Constructs factors based on hypotheses while incorporating regularization mechanisms to avoid duplication and overfitting. +- **Eval Agent**: Validates practicality, performs backtesting, and iteratively refines factors via feedback loops. -# Get the data — see "Data preparation": download the open package, OR: -uv run python scripts/fetch_market.py --start 2015-01-01 --end 2026-06-30 --universe zz1000 -uv run python scripts/fetch_fundamentals.py --start 2015-01-01 --end 2026-12-31 +This repository follows the implementation of [RD-Agent](https://github.com/microsoft/RD-Agent). You can find its repository at: [https://github.com/microsoft/RD-Agent](https://github.com/microsoft/RD-Agent). We would like to extend our sincere gratitude to the RD-Agent team for their pioneering work and contributions to the community. -# Build the panel offline, then rebuild the factor library and evaluate a factor -uv run python scripts/build_panel.py --with-fundamentals --with-industry -uv run python scripts/init_factorlib.py -uv run python scripts/ingest_factors.py --expr-dir artifacts/factorzoo/stock_1d/expressions -uv run python scripts/eval_factor.py --expr-file artifacts/factorzoo/stock_1d/expressions/idio_qspread_win_20.dsl --report -``` -Incremental updates: `uv run python scripts/update_panel.py --universe zz1000 --with-fundamentals --with-industry` +# ⚡ Quick start -## Data preparation +### 🐍 Create a Conda Environment +- Create a new conda environment with Python (3.10 and 3.11 are well-tested in our CI): + ```sh + conda create -n alphaagent python=3.10 + ``` +- Activate the environment: + ```sh + conda activate alphaagent + ``` -The panel and raw caches are **not tracked in Git**. Choose one of the two options below. +### 🛠️ Install locally +- + ```sh + # Install AlphaAgent + pip install -e . + ``` -### Option A — download the open data package (no Tushare token) +### 📈 Data Preparation +- First, clone Qlib source code for runing backtest locally. + ``` + # Clone Qlib source code + git clone https://github.com/microsoft/qlib.git + cd qlib + pip install . + cd .. + ``` -Pre-built raw parquet caches (CSI 1000 union, 2015-01 ~ 2026-06): +- Then, mannully download Chinese stock data via baostock and dump into the Qlib format. + ```sh + # Download or update stock data from 2015-01-01 until NOW from baostock + python prepare_cn_data.py -- **Baidu Netdisk**: (code: `5qp5`) -- File: `alphaagent-data-20260703.zip` + cd qlib -```powershell -# 1. Extract the zip into the repo root, so that these folders are populated: -# artifacts/market, artifacts/fundamental, artifacts/industry, artifacts/index -# 2. Rebuild the panel offline (reads local caches only, no network): -uv run python scripts/build_panel.py --with-fundamentals --with-industry -# 3. Rebuild the factor library from Git-tracked DSL: -uv run python scripts/init_factorlib.py -uv run python scripts/ingest_factors.py --expr-dir artifacts/factorzoo/stock_1d/expressions -``` + # Convert csv to Qlib format. Check correct paths before runing. + python scripts/dump_bin.py dump_all ... \ + --include_fields open,high,low,close,preclose,volume,amount,turn,factor \ + --csv_path ~/.qlib/qlib_data/cn_data/raw_data_now \ + --qlib_dir ~/.qlib/qlib_data/cn_data \ + --date_field_name date \ + --symbol_field_name code -The package ships a `MANIFEST.json` (sha256) for integrity checks. See -[docs/data_release.md](docs/data_release.md) for the full layout. + # Collect calendar data + python scripts/data_collector/future_calendar_collector.py --qlib_dir ~/.qlib/qlib_data/cn_data/ --region cn -### Option B — fetch from Tushare yourself (needs token) -Set `TUSHARE_TOKEN` in `.env`, then run the fetch + build commands shown in -[Quick start](#quick-start). + # Download the CSI500/CSI300/CSI100 stock universe + python scripts/data_collector/cn_index/collector.py --index_name CSI500 --qlib_dir ~/.qlib/qlib_data/cn_data/ --method parse_instruments + ``` -## Documentation -| Doc | Topic | -|-----|-------| -| [docs/operations_manual.md](docs/operations_manual.md) | Full workflow (中文) | -| [docs/data_release.md](docs/data_release.md) | Open data package layout & restore | -| [docs/panel_fundamental_fields.md](docs/panel_fundamental_fields.md) | Panel fundamental columns | -| [docs/factor_metrics.md](docs/factor_metrics.md) | Factor evaluation metrics | +- Alternatively, stock data (out-dated) will be automatically downloaded to `~/.qlib/qlib_data/cn_data`. -## Factor sync (team) -| Action | Command | -|--------|---------| -| Export DSL after ingest | `uv run python scripts/sync_factor_exprs.py` | -| Commit | `git add artifacts/factorzoo/stock_1d/expressions/*.dsl` | -| Rebuild memmap after pull | `uv run python scripts/ingest_factors.py --expr-dir artifacts/factorzoo/stock_1d/expressions --overwrite` | +- You can modify backtest configuration files which are located at: + - Baseline: `alphaagent/scenarios/qlib/experiment/factor_template/conf.yaml` + - For Newly proposed factors: `alphaagent/scenarios/qlib/experiment/factor_template/conf_cn_combined.yaml` + - For changing train/val/test periods, first remove all cache files in `./git_ignore_folder` and `./pickle_cache`. + - For changing the market, remove cache files in `./git_ignore_folder`, `./pickle_cache`. Then, delete `daily_pv_all.h5` and `daily_pv_debug.h5` in directory `alphaagent/scenarios/qlib/experiment/factor_data_template/`. -**Label column**: fundamentals → `label_10d_close_to_close`; price/volume → `label_1d_close_to_close`. -## Factor mining (optional) +### ⚙️ Configuration +- For OpenAI compatible API, ensure both `OPENAI_BASE_URL` and `OPENAI_API_KEY` are configured in the `.env` file. +- `REASONING_MODEL` is used in the idea agent and factor agent, while `CHAT_MODEL` is for debugging factors and generating feedbacks. +- Slow-thinking models, such as o3-mini are preferred for the `REASONING_MODEL`. +- To run the project in a local environment (instead of Docker), add `USE_LOCAL=True` to the `.env` file. -```powershell -uv sync --extra mining -# .env: OPENAI_API_KEY, MODEL -uv run python scripts/factor_mining_agentscope.py --panel artifacts/panel/panel_1d.parquet --label-col label_10d_close_to_close -``` +### 🚀 Run AlphaAgent +- Run **AlphaAgent** based on [Qlib Backtesting Framework](http://github.com/microsoft/qlib). + ```sh + alphaagent mine --potential_direction "" + ``` -## Tests - -```powershell -uv run pytest tests/ -q -``` +- Alternatively, run the following command + ```sh + dotenv run -- python alphaagent/app/qlib_rd_loop/factor_alphaagent.py --direction "" + ``` + After running the command, log out and log back in for the changes to take effect. -## Layout +- Multi-factor backtesting + ```sh + alphaagent backtest --factor_path "" + ``` + Your factors need to be stored in a `.csv` file. Here is an example: + ```csv + factor_name,factor_expression + MACD_Factor,"MACD($close)" + RSI_Factor,"RSI($close)" + ``` + + +- If you need to rerun the baseline results or update backtest configs, remove the cache folders: + ```sh + rm -r ./pickle_cache/* + rm -r ./git_ignore_folder/* + ``` + +### 🖥️ Monitor the Application Results +- You can run the following command for our demo program to see the run logs. Note than the entrance is deprecated. + ```sh + alphaagent ui --port 19899 --log_dir log/ + ``` + + + +### 📚 Citation +If you find this work helpful, please cite our paper: +```bibtex +@misc{tang2025alphaagentllmdrivenalphamining, + title={AlphaAgent: LLM-Driven Alpha Mining with Regularized Exploration to Counteract Alpha Decay}, + author={Ziyi Tang and Zechuan Chen and Jiarui Yang and Jiayao Mai and Yongsen Zheng and Keze Wang and Jinrui Chen and Liang Lin}, + year={2025}, + eprint={2502.16789}, + archivePrefix={arXiv}, + primaryClass={cs.CE}, + url={https://arxiv.org/abs/2502.16789}, +} ``` -seekalpha/ # core package (data, factor, mining, …) -scripts/ # CLI entry points -artifacts/ # local data & factorzoo (only expressions/*.dsl tracked in Git) -docs/ # manuals -``` - -## License & data - -Code in this repository is open source. Market and fundamental data are derived from [Tushare Pro](https://tushare.pro); redistribution of the data package must comply with Tushare's terms. Research use only. - ---- diff --git a/alphaagent/app/CI/README.md b/alphaagent/app/CI/README.md new file mode 100755 index 00000000..73f08801 --- /dev/null +++ b/alphaagent/app/CI/README.md @@ -0,0 +1,38 @@ +# CI 检查 + +`.github/workflows/ci.yml`配置了提交时自动运行`Makefile`: 91~103行的命令,可以在这调整执行的命令 + +在`.env`中设置`USE_CHAT_CACHE=True`可以让第二次修复快一些 + +# Rules + +`pyproject.toml`中配置全局屏蔽的规则 +- ruff: `[tool.ruff.lint].ignore` +- mypy: `[tool.mypy]` + +## ruff rules +ruff rules 比较好修改, 大多可以自动修复 + +对于一些规则可以在代码中添加注释来局部屏蔽, 例如添加 `# noqa E234,ANN001` +遇到的不好修改的规则: +- 捕获异常时应该处理每一种异常,不应该统一当作`Exception`处理 +- `subprogress()` 调用命令应该先判断命令是否安全 +- ... + +规则列表: [ruff rules](https://docs.astral.sh/ruff/rules/) + +## mypy rules + +Mypy检查Python中类型标注, 常遇到需要修改结构/同时修改其他文件的情况, 自动修复效果不好 + +局部屏蔽: `# type: ignore` + +规则列表: [mypy rules](https://mypy.readthedocs.io/en/stable/error_code_list.html) + +# Optimization (Maybe) + +- 添加指定文件夹检查的功能 +- 增加一个修改选项: 调用`vim`, 用户直接修改此部分代码 +- 显示时把`Original Code`部分去掉, 直接在输出的表示修改的diff部分用`^^^^^^`在代码行下标注出错误位置,这样能更直观地观察错误修复情况 +- 当前为线性执行完所有修复后交给用户检查, 可修改成 后台多线程 / 进程处理修复的任务, 终端实时展示处理完的修复让用户检查 +- ... diff --git a/alphaagent/app/CI/prompts.yaml b/alphaagent/app/CI/prompts.yaml new file mode 100755 index 00000000..4499d54a --- /dev/null +++ b/alphaagent/app/CI/prompts.yaml @@ -0,0 +1,117 @@ +generate_lint_command_template: | + Please generate a command to lint or format a {language} repository. + Here are some information about different linting tools ```{linting_tools}``` +linting_system_prompt_template: | + You are a software engineer. You can write code to a high standard and are adept at solving {language} linting problems. +session_manual_template: | + There are some problems with the code you provided, please modify the code again according to the instruction and return the errors list you modified. + + Instruction: + {operation} + + Your response format should be like this: + + ```python + + ``` + + ```json + {{ + "errors": [": ", ...] + }} + ``` +session_normal_template: | + Please modify this code snippet based on the lint info. Here is the code snippet: + ```Python + {code} + ``` + + -----Lint info----- + {lint_info} + ------------------- + + The lint info contains one or more errors. Different errors are separated by blank lines. Each error follows this format: + -----Lint info format----- + : + + + -------------------------- + The error code is an abbreviation set by the checker for ease of describing the error. The error position includes the relevant code around the error, and the helpful information provides useful information or possible fix method. + + Please simply reply the code after you fix all linting errors. You should be aware of the following: + 1. The indentation of the code should be consistent with the original code. + 2. You should just replace the code I provided you, which starts from line {start_line} to line {end_line}. + 3. You'll need to add line numbers to the modified code which starts from {start_lineno}. + 4. You don't need to add comments to explain your changes. + Please wrap your code with following format: + + ```python + + ``` +session_start_template: | + Please modify the Python code based on the lint info. + Due to the length of the code, I will first tell you the entire code, and then each time I ask a question, I will extract a portion of the code and tell you the error information contained in this code segment. + You need to fix the corresponding error in the code segment and return the code that can replace the corresponding code segment. + + The Python code is from a complete Python project file. Each line of the code is annotated with a line number, separated from the original code by three characters ("|"). The vertical bars are aligned. + Here is the complete code, please be prepared to fix it: + ```Python + {code} + ``` +suffix2language_template: | + Here are the files suffix in one code repo: {suffix}. + Please tell me the programming language used in this repo and which language has linting-tools. + Your response should follow this template: + {{ + "languages": , + "languages_with_linting_tools": + }} +user_get_files_contain_lint_commands_template: | + You get a file list of a repository. Some files may contain linting rules or linting commands defined by repo authors. + Here are the file list: + ``` + {file_list} + ``` + + Please find all files that may correspond to linting from it. + Please respond with the following JSON template: + {{ + "files": , + }} +user_get_makefile_lint_commands_template: | + You get a Makefile which contains some linting rules. Here are its content: + ``` + {file_text} + ``` + Please find executable commands about linting from it. + Please respond with the following JSON template: + {{ + "commands": ["python -m xxx --params"...], + }} +user_template_for_code_snippet: | + Please modify the Python code based on the lint info. + -----Python Code----- + {code} + --------------------- + + -----Lint info----- + {lint_info} + ------------------- + + The Python code is a snippet from a complete Python project file. Each line of the code is annotated with a line number, separated from the original code by three characters ("|"). The vertical bars are aligned. + + The lint info contains one or more errors. Different errors are separated by blank lines. Each error follows this format: + -----Lint info format----- + : + + + -------------------------- + The error code is an abbreviation set by the checker for ease of describing the error. The error context includes the relevant code around the error, and the helpful information suggests possible fixes. + + Please simply reply the code after you fix all linting errors. + The code you return does not require line numbers, and should just replace the code I provided you, and does not require comments. + Please wrap your code with following format: + + ```python + + ``` \ No newline at end of file diff --git a/alphaagent/app/CI/run.py b/alphaagent/app/CI/run.py new file mode 100755 index 00000000..7e6d35c6 --- /dev/null +++ b/alphaagent/app/CI/run.py @@ -0,0 +1,817 @@ +from __future__ import annotations + +import datetime +import json +import re +import shlex +import subprocess +import time +from collections import defaultdict +from dataclasses import dataclass +from difflib import ndiff +from pathlib import Path +from typing import Any, Literal + +import tree_sitter_python +from rich import print +from rich.panel import Panel +from rich.progress import Progress, SpinnerColumn, TimeElapsedColumn +from rich.prompt import Prompt +from rich.rule import Rule +from rich.syntax import Syntax +from rich.table import Table +from rich.text import Text +from tree_sitter import Language, Node, Parser + +from alphaagent.core.evaluation import Evaluator +from alphaagent.core.evolving_agent import EvoAgent +from alphaagent.core.evolving_framework import ( + EvolvableSubjects, + EvolvingStrategy, + EvoStep, + Feedback, + Knowledge, +) +from alphaagent.core.prompts import Prompts +from alphaagent.oai.llm_utils import APIBackend + +py_parser = Parser(Language(tree_sitter_python.language())) +CI_prompts = Prompts(file_path=Path(__file__).parent / "prompts.yaml") + + +@dataclass +class CIError: + raw_str: str + file_path: Path | str + line: int + column: int + code: str + msg: str + hint: str + checker: Literal["ruff", "mypy"] + + def to_dict(self) -> dict[str, object]: + return self.__dict__ + + def __str__(self) -> str: + return f"{self.file_path}:{self.line}:{self.column}: {self.code} {self.msg}\n{self.hint}".strip() + + +@dataclass +class CIFeedback(Feedback): + errors: dict[str, list[CIError]] + + def statistics(self) -> dict[Literal["ruff", "mypy"], dict[str, int]]: + error_counts = defaultdict(lambda: defaultdict(int)) + for file_errors in self.errors.values(): + for error in file_errors: + error_counts[error.checker][error.code] += 1 + return error_counts + + +@dataclass +class FixRecord: + skipped_errors: list[CIError] + directly_fixed_errors: list[CIError] + manually_fixed_errors: list[CIError] + manual_instructions: dict[str, list[CIError]] + + def to_dict(self) -> dict[str, Any]: + return { + "skipped_errors": [error.to_dict() for error in self.skipped_errors], + "directly_fixed_errors": [error.to_dict() for error in self.directly_fixed_errors], + "manually_fixed_errors": [error.to_dict() for error in self.manually_fixed_errors], + "manual_instructions": { + key: [error.to_dict() for error in errors] for key, errors in self.manual_instructions.items() + }, + } + + +class CodeFile: + def __init__(self, path: Path | str) -> None: + self.path = Path(path) + self.load() + + @classmethod + def add_line_number(cls: CodeFile, code: list[str] | str, start: int = 1) -> list[str] | str: + code_lines = code.split("\n") if isinstance(code, str) else code + + lineno_width = len(str(start - 1 + len(code_lines))) + code_with_lineno = [] + for i, code_line in enumerate(code_lines): + code_with_lineno.append(f"{i+start: >{lineno_width}} | {code_line}") + + return code_with_lineno if isinstance(code, list) else "\n".join(code_with_lineno) + + @classmethod + def remove_line_number(cls: CodeFile, code: list[str] | str) -> list[str] | str: + code_lines = code.split("\n") if isinstance(code, str) else code + + try: + code_without_lineno = [re.split(r"\| ", code_line, maxsplit=1)[1] for code_line in code_lines] + except IndexError: + code_without_lineno = ["something went wrong when remove line numbers", *code_lines] + + return code_without_lineno if isinstance(code, list) else "\n".join(code_without_lineno) + + def load(self) -> None: + code = self.path.read_text(encoding="utf-8") + self.code_lines = code.split("\n") + + # line numbers + self.lineno = len(self.code_lines) + self.lineno_width = len(str(self.lineno)) + self.code_lines_with_lineno = self.add_line_number(self.code_lines) + + def get( + self, + start: int = 1, + end: int | None = None, + *, + add_line_number: bool = False, + return_list: bool = False, + ) -> list[str] | str: + """ + Retrieves a portion of the code lines. + line number starts from 1, return codes in [start, end]. + + Args: + start (int): The starting line number (inclusive). Defaults to 1. + end (int | None): The ending line number (inclusive). Defaults to None, which means the last line. + add_line_number (bool): Whether to include line numbers in the result. Defaults to False. + return_list (bool): Whether to return the result as a list of lines + or as a single string. Defaults to False. + + Returns: + list[str] | str: The code lines as a list of strings or as a + single string, depending on the value of `return_list`. + """ + start -= 1 + if start < 0: + start = 0 + end = self.lineno if end is None else end + if end <= start: + res = [] + res = self.code_lines_with_lineno[start:end] if add_line_number else self.code_lines[start:end] + + return res if return_list else "\n".join(res) + + def apply_changes(self, changes: list[tuple[int, int, str]]) -> None: + """ + Applies the given changes to the code lines. + + Args: + changes (List[Tuple[int, int, str]]): A list of tuples representing the changes to be applied. + Each tuple contains the start line number, end line number, and the new code to be inserted. + + Returns: + None + """ + offset = 0 + for start, end, code in changes: + # starts from 1 --> starts from 0 + adjusted_start = max(start - 1, 0) + + new_code = code.split("\n") + self.code_lines[adjusted_start + offset : end + offset] = new_code + offset += len(new_code) - (end - adjusted_start) + + self.path.write_text("\n".join(self.code_lines), encoding="utf-8") + self.load() + + def get_code_blocks(self, max_lines: int = 30) -> list[tuple[int, int]]: + tree = py_parser.parse(bytes("\n".join(self.code_lines), "utf8")) + + def get_blocks_in_node(node: Node, max_lines: int) -> list[tuple[int, int]]: + if node.type == "assignment": + return [(node.start_point.row, node.end_point.row + 1)] + + blocks: list[tuple[int, int]] = [] + block: tuple[int, int] | None = None # [start, end), line number starts from 0 + + for child in node.children: + if child.end_point.row + 1 - child.start_point.row > max_lines: + if block is not None: + blocks.append(block) + block = None + blocks.extend(get_blocks_in_node(child, max_lines)) + elif block is None: + block = (child.start_point.row, child.end_point.row + 1) + elif child.end_point.row + 1 - block[0] <= max_lines: + block = (block[0], child.end_point.row + 1) + else: + blocks.append(block) + block = (child.start_point.row, child.end_point.row + 1) + + if block is not None: + blocks.append(block) + + return blocks + + # change line number to start from 1 and [start, end) to [start, end] + return [(a + 1, b) for a, b in get_blocks_in_node(tree.root_node, max_lines)] + + def __str__(self) -> str: + return f"{self.path}" + + +class Repo(EvolvableSubjects): + def __init__(self, project_path: Path | str, excludes: list[Path] | None = None, **kwargs: Any) -> None: + if excludes is None: + excludes = [] + self.params = kwargs + self.project_path = Path(project_path) + + excludes = [self.project_path / path for path in excludes] + + git_ignored_output = subprocess.check_output( + ["/usr/bin/git", "status", "--ignored", "-s"], # noqa: S603 + cwd=str(self.project_path), + stderr=subprocess.STDOUT, + text=True, + ) + git_ignored_files = [ + (self.project_path / Path(line[3:])).resolve() + for line in git_ignored_output.split("\n") + if line.startswith("!!") + ] + + excludes.extend(git_ignored_files) + + files = [ + file + for file in self.project_path.glob("**/*") + if file.is_file() + and not any(str(file).startswith(str(path)) for path in excludes) + and ".git/" not in str(file) + and file.suffix == ".py" + ] + self.files = {file: CodeFile(file) for file in files} + + self.fix_records: dict[str, FixRecord] | None = None + + +@dataclass +class RuffRule: + """ + Example: + { + "name": "missing-trailing-comma", + "code": "COM812", + "linter": "flake8-commas", + "summary": "Trailing comma missing", + "message_formats": [ + "Trailing comma missing" + ], + "fix": "Fix is always available.", + "explanation": "...", + "preview": false + } + """ + + name: str + code: str + linter: str + summary: str + message_formats: list[str] + fix: str + explanation: str + preview: bool + + +class RuffEvaluator(Evaluator): + """ + The error message are generated by command + """ + + def __init__(self, command: str | None = None) -> None: + if command is None: + self.command = "ruff check . --output-format full" + else: + self.command = command + + @staticmethod + def explain_rule(error_code: str) -> RuffRule: + explain_command = f"ruff rule {error_code} --output-format json" + try: + out = subprocess.check_output( + shlex.split(explain_command), # noqa: S603 + stderr=subprocess.STDOUT, + text=True, + ) + except subprocess.CalledProcessError as e: + out = e.output + + return RuffRule(**json.loads(out)) + + def evaluate(self, evo: Repo, **kwargs: dict) -> CIFeedback: + """Simply run ruff to get the feedbacks.""" + try: + out = subprocess.check_output( + shlex.split(self.command), # noqa: S603 + cwd=evo.project_path, + stderr=subprocess.STDOUT, + text=True, + ) + except subprocess.CalledProcessError as e: + out = e.output + + """ruff output format: + rdagent/cli.py:9:5: ANN201 Missing return type annotation for public function `main` + | + 9 | def main(prompt=None): + | ^^^^ ANN201 + 10 | load_dotenv(verbose=True, override=True) + 11 | wm = WorkflowManager() + | + = help: Add return type annotation: `None` + """ + + # extract error info + pattern = r"(([^\n]*):(\d+):(\d+): (\w+) ([^\n]*)\n(.*?))\n\n" + matches = re.findall(pattern, out, re.DOTALL) + + errors = defaultdict(list) + + for match in matches: + raw_str, file_path, line_number, column_number, error_code, error_message, error_hint = match + + # TODO @bowen: filter these files when running the check command + if evo.project_path / Path(file_path) not in evo.files: + continue + error = CIError( + raw_str=raw_str, + file_path=file_path, + line=int(line_number), + column=int(column_number), + code=error_code, + msg=error_message, + hint=error_hint, + checker="ruff", + ) + + errors[file_path].append(error) + + return CIFeedback(errors=errors) + + +class MypyEvaluator(Evaluator): + def __init__(self, command: str | None = None) -> None: + if command is None: + self.command = "mypy . --pretty --no-error-summary --show-column-numbers" + else: + self.command = command + + def evaluate(self, evo: Repo, **kwargs: dict) -> CIFeedback: + try: + out = subprocess.check_output( + shlex.split(self.command), # noqa: S603 + cwd=evo.project_path, + stderr=subprocess.STDOUT, + text=True, + ) + except subprocess.CalledProcessError as e: + out = e.output + + errors = defaultdict(list) + + out = re.sub(r"([^\n]*?:\d+:\d+): error:", r"\n\1: error:", out) + out += "\n" + pattern = r"(([^\n]*?):(\d+):(\d+): error:(.*?)\s\[([\w-]*?)\]\s(.*?))\n\n" + for match in re.findall(pattern, out, re.DOTALL): + raw_str, file_path, line_number, column_number, error_message, error_code, error_hint = match + error_message = error_message.strip().replace("\n", " ") + if re.match(r".*[^\n]*?:\d+:\d+: note:.*", error_hint, flags=re.DOTALL) is not None: + error_hint_position = re.split( + pattern=r"[^\n]*?:\d+:\d+: note:", + string=error_hint, + maxsplit=1, + flags=re.DOTALL, + )[0] + error_hint_help = re.findall(r"^.*?:\d+:\d+: note: (.*)$", error_hint, flags=re.MULTILINE) + error_hint_help = "\n".join(error_hint_help) + error_hint = f"{error_hint_position}\nHelp:\n{error_hint_help}" + + if evo.project_path / Path(file_path) not in evo.files: + continue + error = CIError( + raw_str=raw_str, + file_path=file_path, + line=int(line_number), + column=int(column_number), + code=error_code, + msg=error_message, + hint=error_hint, + checker="mypy", + ) + + errors[file_path].append(error) + + return CIFeedback(errors=errors) + + +class MultiEvaluator(Evaluator): + def __init__(self, *evaluators: Evaluator) -> None: + self.evaluators = evaluators + + def evaluate(self, evo: Repo, **kwargs: dict) -> CIFeedback: + all_errors = defaultdict(list) + for evaluator in self.evaluators: + feedback: CIFeedback = evaluator.evaluate(evo, **kwargs) + for file_path, errors in feedback.errors.items(): + all_errors[file_path].extend(errors) + + # sort errors by position + for file_path in all_errors: + all_errors[file_path].sort(key=lambda x: (x.line, x.column)) + + return CIFeedback(errors=all_errors) + + +class CIEvoStr(EvolvingStrategy): + def evolve( # noqa: C901, PLR0912, PLR0915 + self, + evo: Repo, + evolving_trace: list[EvoStep] | None = None, + knowledge_l: list[Knowledge] | None = None, + **kwargs: dict, + ) -> Repo: + @dataclass + class CodeFixGroup: + start_line: int + end_line: int + errors: list[CIError] + session_id: str + responses: list[str] + + api = APIBackend() + system_prompt = CI_prompts["linting_system_prompt_template"].format(language="Python") + + if len(evolving_trace) > 0: + last_feedback: CIFeedback = evolving_trace[-1].feedback + + # print statistics + checker_error_counts = { + checker: sum(c_statistics.values()) for checker, c_statistics in last_feedback.statistics().items() + } + print( + f"Found [red]{sum(checker_error_counts.values())}[/red] errors, " + "including: " + + ", ".join( + f"[red]{count}[/red] [magenta]{checker}[/magenta] errors" + for checker, count in checker_error_counts.items() + ), + ) + + fix_records: dict[str, FixRecord] = defaultdict( + lambda: FixRecord([], [], [], defaultdict(list)), + ) + + # Group errors by code blocks + fix_groups: dict[str, list[CodeFixGroup]] = defaultdict(list) + changes: dict[str, list[tuple[int, int, str]]] = defaultdict(list) + for file_path, errors in last_feedback.errors.items(): + file = evo.files[evo.project_path / Path(file_path)] + + # check if the file needs to add `from __future__ import annotations` + # need to add rules here for different languages/tools + # TODO @bowen: current way of handling errors like 'Add import statement' may be not good + for error in errors: + if error.code in ("FA100", "FA102"): + changes[file_path].append((1, 1, "from __future__ import annotations\n")) + break + + # Group errors by code blocks + error_p = 0 + for start_line, end_line in file.get_code_blocks(max_lines=30): + group_errors: list[CIError] = [] + + # collect errors in the same code block + while error_p < len(errors) and start_line <= errors[error_p].line <= end_line: + if errors[error_p].code not in ("FA100", "FA102"): + group_errors.append(errors[error_p]) + error_p += 1 + + # process errors in the code block + if group_errors: + session = api.build_chat_session(session_system_prompt=system_prompt) + session_id = session.get_conversation_id() + session.build_chat_completion( + CI_prompts["session_start_template"].format(code=file.get(add_line_number=True)), + ) + + fix_groups[file_path].append( + CodeFixGroup(start_line, end_line, group_errors, session_id, []), + ) + + # Fix errors in each code block + with Progress(SpinnerColumn(), *Progress.get_default_columns(), TimeElapsedColumn()) as progress: + group_counts = sum([len(groups) for groups in fix_groups.values()]) + task_id = progress.add_task("Fixing repo...", total=group_counts) + + for file_path in fix_groups: + file = evo.files[evo.project_path / Path(file_path)] + for code_fix_g in fix_groups[file_path]: + start_line = code_fix_g.start_line + end_line = code_fix_g.end_line + group_errors = code_fix_g.errors + code_snippet_with_lineno = file.get( + start_line, + end_line, + add_line_number=True, + return_list=False, + ) + errors_str = "\n\n".join(str(e) for e in group_errors) + + # ask LLM to repair current code snippet + user_prompt = CI_prompts["session_normal_template"].format( + code=code_snippet_with_lineno, + lint_info=errors_str, + start_line=start_line, + end_line=end_line, + start_lineno=start_line, + ) + + session = api.build_chat_session(conversation_id=code_fix_g.session_id) + res = session.build_chat_completion(user_prompt) + code_fix_g.responses.append(res) + progress.update( + task_id, + description=f"[green]Fixing[/green] [cyan]{file_path}[/cyan]...", + advance=1, + ) + + # Manual inspection and repair + for file_path in last_feedback.errors: + print( + Rule( + f"[bright_blue]Checking[/bright_blue] [cyan]{file_path}[/cyan]", + style="bright_blue", + align="left", + characters=".", + ), + ) + + file = evo.files[evo.project_path / Path(file_path)] + + # generate changes + for group_id, code_fix_g in enumerate(fix_groups[file_path], start=1): + start_line, end_line, group_errors = code_fix_g.start_line, code_fix_g.end_line, code_fix_g.errors + session = api.build_chat_session(conversation_id=code_fix_g.session_id) + + print(f"[yellow]Checking part {group_id}...[/yellow]") + + front_context = file.get(start_line - 3, start_line - 1) + rear_context = file.get(end_line + 1, end_line + 3) + front_context_with_lineno = file.get(start_line - 3, start_line - 1, add_line_number=True) + rear_context_with_lineno = file.get(end_line + 1, end_line + 3, add_line_number=True) + + code_snippet_with_lineno = file.get(start_line, end_line, add_line_number=True, return_list=False) + + # print errors + printed_errors_str = "\n".join( + [ + f"[{error.checker}] {error.line: >{file.lineno_width}}:{error.column: <4}" + f" {error.code} {error.msg}" + for error in group_errors + ], + ) + print( + Panel.fit( + Syntax(printed_errors_str, lexer="python", background_color="default"), + title=f"{len(group_errors)} Errors", + ), + ) + + # print original code + table = Table(show_header=False, box=None) + table.add_column() + table.add_row(Syntax(front_context_with_lineno, lexer="python", background_color="default")) + table.add_row(Rule(style="dark_orange")) + table.add_row(Syntax(code_snippet_with_lineno, lexer="python", background_color="default")) + table.add_row(Rule(style="dark_orange")) + table.add_row(Syntax(rear_context_with_lineno, lexer="python", background_color="default")) + print(Panel.fit(table, title="Original Code")) + + res = code_fix_g.responses[0] + code_snippet_lines = file.get(start_line, end_line, add_line_number=False, return_list=True) + + while True: + try: + new_code = re.search(r".*```[Pp]ython\n(.*?)\n```.*", res, re.DOTALL).group(1) + except (re.error, AttributeError) as exc: + print(f"[red]Error when extract codes[/red]:\n {res}\nException: {exc}") + try: + fixed_errors_info = re.search(r".*```[Jj]son\n(.*?)\n```.*", res, re.DOTALL).group(1) + fixed_errors_info = json.loads(fixed_errors_info) + except AttributeError: + fixed_errors_info = None + except (json.JSONDecodeError, re.error) as exc: + fixed_errors_info = None + print(f"[red]Error when extracting fixed_errors[/red]: {exc}") + + new_code = CodeFile.remove_line_number(new_code) + + # print repair status (code diff) + diff = ndiff(code_snippet_lines, new_code.split("\n")) + + # add 2 spaces to align with diff format + front_context = re.sub(r"^", " ", front_context, flags=re.MULTILINE) + rear_context = re.sub(r"^", " ", rear_context, flags=re.MULTILINE) + + table = Table(show_header=False, box=None) + table.add_column() + table.add_column() + table.add_column() + table.add_row("", "", Syntax(front_context, lexer="python", background_color="default")) + table.add_row("", "", Rule(style="dark_orange")) + diff_original_lineno = start_line + diff_new_lineno = start_line + for i in diff: + if i.startswith("+"): + table.add_row( + "", + Text(str(diff_new_lineno), style="green bold"), + Text(i, style="green"), + ) + diff_new_lineno += 1 + elif i.startswith("-"): + table.add_row( + Text(str(diff_original_lineno), style="red bold"), + "", + Text(i, style="red"), + ) + diff_original_lineno += 1 + elif i.startswith("?"): + table.add_row("", "", Text(i, style="yellow")) + else: + table.add_row( + str(diff_original_lineno), + str(diff_new_lineno), + Syntax(i, lexer="python", background_color="default"), + ) + diff_original_lineno += 1 + diff_new_lineno += 1 + table.add_row("", "", Rule(style="dark_orange")) + table.add_row("", "", Syntax(rear_context, lexer="python", background_color="default")) + print(Panel.fit(table, title="Repair Status")) + + operation = Prompt.ask( + "Input your operation [ [red]([bold]s[/bold])kip[/red] / " + "[green]([bold]a[/bold])pply[/green] / " + "[yellow]manual instruction[/yellow] ]", + ) + print() + if operation in ("s", "skip"): + fix_records[file_path].skipped_errors.extend(group_errors) + break + if operation in ("a", "apply"): + if fixed_errors_info: + fixed_errors_str = "\n".join(fixed_errors_info["errors"]) + for error in group_errors: + if f"{error.line}:{error.column}" in fixed_errors_str: + fix_records[file_path].manually_fixed_errors.append(error) + else: + fix_records[file_path].skipped_errors.append(error) + else: + fix_records[file_path].directly_fixed_errors.extend(group_errors) + + changes[file_path].append((start_line, end_line, new_code)) + break + + fix_records[file_path].manual_instructions[operation].extend(group_errors) + res = session.build_chat_completion( + CI_prompts["session_manual_template"].format(operation=operation), + ) + code_fix_g.responses.append(res) + + # apply changes + file.apply_changes(changes[file_path]) + + evo.fix_records = fix_records + + return evo + + +class CIEvoAgent(EvoAgent): + def __init__(self, evolving_strategy: CIEvoStr) -> None: + super().__init__(max_loop=1, evolving_strategy=evolving_strategy) + self.evolving_trace = [] + + def multistep_evolve(self, evo: Repo, eva: Evaluator) -> Repo: + evo = self.evolving_strategy.evolve( + evo=evo, + evolving_trace=self.evolving_trace, + ) + + self.evolving_trace.append(EvoStep(evo, feedback=eva.evaluate(evo))) + + return evo + + +DIR = None +while DIR is None or not DIR.exists(): + DIR = Prompt.ask("Please input the [cyan]project directory[/cyan]") + DIR = Path(DIR) + +excludes = Prompt.ask( + "Input the [dark_orange]excluded directories[/dark_orange] (relative to " + "[cyan]project path[/cyan] and separated by whitespace)", +).split(" ") +excludes = [Path(exclude.strip()) for exclude in excludes if exclude.strip() != ""] + +start_time = time.time() +start_timestamp = datetime.datetime.now(datetime.timezone.utc).strftime("%m%d%H%M") + +repo = Repo(DIR, excludes=excludes) +# evaluator = MultiEvaluator(MypyEvaluator(), RuffEvaluator()) +evaluator = RuffEvaluator() +estr = CIEvoStr() +ea = CIEvoAgent(estr) +ea.multistep_evolve(repo, evaluator) +while True: + print(Rule(f"Round {len(ea.evolving_trace)} repair", style="blue")) + repo: Repo = ea.multistep_evolve(repo, evaluator) + + fix_records = repo.fix_records + filename = f"{DIR.name}_{start_timestamp}_round_{len(ea.evolving_trace)}_fix_records.json" + with Path(filename).open("w") as file: + json.dump({k: v.to_dict() for k, v in fix_records.items()}, file, indent=4) + + # Count the number of skipped errors + skipped_errors_count = 0 + directly_fixed_errors_count = 0 + manually_fixed_errors_count = 0 + skipped_errors_code_count = defaultdict(int) + directly_fixed_errors_code_count = defaultdict(int) + manually_fixed_errors_code_count = defaultdict(int) + code_message = defaultdict(str) + for record in fix_records.values(): + skipped_errors_count += len(record.skipped_errors) + directly_fixed_errors_count += len(record.directly_fixed_errors) + manually_fixed_errors_count += len(record.manually_fixed_errors) + for error in record.skipped_errors: + skipped_errors_code_count[error.code] += 1 + code_message[error.code] = error.msg + for error in record.directly_fixed_errors: + directly_fixed_errors_code_count[error.code] += 1 + code_message[error.code] = error.msg + for error in record.manually_fixed_errors: + manually_fixed_errors_code_count[error.code] += 1 + code_message[error.code] = error.msg + + skipped_errors_statistics = "" + directly_fixed_errors_statistics = "" + manually_fixed_errors_statistics = "" + for code, count in sorted(skipped_errors_code_count.items(), key=lambda x: x[1], reverse=True): + skipped_errors_statistics += f"{count: >5} {code: >10} {code_message[code]}\n" + for code, count in sorted(directly_fixed_errors_code_count.items(), key=lambda x: x[1], reverse=True): + directly_fixed_errors_statistics += f"{count: >5} {code: >10} {code_message[code]}\n" + for code, count in sorted(manually_fixed_errors_code_count.items(), key=lambda x: x[1], reverse=True): + manually_fixed_errors_statistics += f"{count: >5} {code: >10} {code_message[code]}\n" + + # Create a table to display the counts and ratios + table = Table(title="Error Fix Statistics") + table.add_column("Type") + table.add_column("Statistics") + table.add_column("Count") + table.add_column("Ratio") + + total_errors_count = skipped_errors_count + directly_fixed_errors_count + manually_fixed_errors_count + table.add_row("Total Errors", "", Text(str(total_errors_count), style="cyan"), "") + table.add_row( + Text("Skipped Errors", style="red"), + skipped_errors_statistics, + Text(str(skipped_errors_count), style="red"), + Text(f"{skipped_errors_count / total_errors_count:.2%}"), + style="red", + ) + table.add_row( + Text("Directly Fixed Errors", style="green"), + directly_fixed_errors_statistics, + Text(str(directly_fixed_errors_count), style="green"), + Text(f"{directly_fixed_errors_count / total_errors_count:.2%}"), + style="green", + ) + table.add_row( + Text("Manually Fixed Errors", style="yellow"), + manually_fixed_errors_statistics, + Text(str(manually_fixed_errors_count), style="yellow"), + Text(f"{manually_fixed_errors_count / total_errors_count:.2%}"), + style="yellow", + ) + + print(table) + operation = Prompt.ask("Start next round? (y/n)", choices=["y", "n"]) + if operation == "n": + break + + +end_time = time.time() +execution_time = end_time - start_time +print(f"Execution time: {execution_time} seconds") + +""" Please commit it by hand... and then run the next round +git add -u +git commit --no-verify -v +""" diff --git a/alphaagent/app/benchmark/factor/analysis.py b/alphaagent/app/benchmark/factor/analysis.py new file mode 100755 index 00000000..e91ab0a8 --- /dev/null +++ b/alphaagent/app/benchmark/factor/analysis.py @@ -0,0 +1,225 @@ +import json +import pickle +from pathlib import Path + +import fire +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import seaborn as sns + +from alphaagent.components.benchmark.conf import BenchmarkSettings +from alphaagent.components.benchmark.eval_method import FactorImplementEval + + +class BenchmarkAnalyzer: + def __init__(self, settings, only_correct_format=False): + self.settings = settings + self.index_map = self.load_index_map() + self.only_correct_format = only_correct_format + + def load_index_map(self): + index_map = {} + with open(self.settings.bench_data_path, "r") as file: + factor_dict = json.load(file) + for factor_name, data in factor_dict.items(): + index_map[factor_name] = (factor_name, data["Category"], data["Difficulty"]) + return index_map + + def load_data(self, file_path): + file_path = Path(file_path) + if not (file_path.is_file() and file_path.suffix == ".pkl"): + raise ValueError("Invalid file path") + + with file_path.open("rb") as f: + res = pickle.load(f) + + return res + + def process_results(self, results): + final_res = {} + for experiment, path in results.items(): + data = self.load_data(path) + summarized_data = FactorImplementEval.summarize_res(data) + processed_data = self.analyze_data(summarized_data) + final_res[experiment] = processed_data.iloc[-1, :] + return final_res + + def reformat_index(self, display_df): + """ + reform the results from + + .. code-block:: python + + success rate + High_Beta_Factor 0.2 + + to + + .. code-block:: python + + success rate + Category Difficulty Factor + 量价 Hard High_Beta_Factor 0.2 + + """ + new_idx = [] + display_df = display_df[display_df.index.isin(self.index_map.keys())] + for idx in display_df.index: + new_idx.append(self.index_map[idx]) + + display_df.index = pd.MultiIndex.from_tuples( + new_idx, + names=["Factor", "Category", "Difficulty"], + ) + display_df = display_df.swaplevel(0, 2).swaplevel(0, 1).sort_index(axis=0) + + return display_df.sort_index( + key=lambda x: [{"Easy": 0, "Medium": 1, "Hard": 2, "New Discovery": 3}.get(i, i) for i in x] + ) + + def result_all_key_order(self, x): + order_v = [] + for i in x: + order_v.append( + { + "Avg Run SR": 0, + "Avg Format SR": 1, + "Avg Correlation": 2, + "Max Correlation": 3, + "Max Accuracy": 4, + "Avg Accuracy": 5, + }.get(i, i), + ) + return order_v + + def analyze_data(self, sum_df): + index = [ + "FactorSingleColumnEvaluator", + "FactorRowCountEvaluator", + "FactorIndexEvaluator", + "FactorEqualValueRatioEvaluator", + "FactorCorrelationEvaluator", + "run factor error", + ] + sum_df = sum_df.reindex(index, axis=0) + sum_df_clean = sum_df.T.groupby(level=0).apply(lambda x: x.reset_index(drop=True)) + + run_error = sum_df_clean["run factor error"].unstack().T.fillna(False).astype(bool) + succ_rate = ~run_error + succ_rate = succ_rate.mean(axis=0).to_frame("success rate") + + succ_rate_f = self.reformat_index(succ_rate) + + # if it rasis Error when running the evaluator, we will get NaN + # Running failures are reguarded to zero score. + format_issue = sum_df_clean[["FactorRowCountEvaluator", "FactorIndexEvaluator"]].apply( + lambda x: np.mean(x.fillna(0.0)), axis=1 + ) + format_succ_rate = format_issue.unstack().T.mean(axis=0).to_frame("success rate") + format_succ_rate_f = self.reformat_index(format_succ_rate) + + corr = sum_df_clean["FactorCorrelationEvaluator"].fillna(0.0) + if self.only_correct_format: + corr = corr.loc[format_issue == 1.0] + + corr_res = corr.unstack().T.mean(axis=0).to_frame("corr(only success)") + corr_res = self.reformat_index(corr_res) + + corr_max = corr.unstack().T.max(axis=0).to_frame("corr(only success)") + corr_max_res = self.reformat_index(corr_max) + + value_max = sum_df_clean["FactorEqualValueRatioEvaluator"] + value_max = value_max.unstack().T.max(axis=0).to_frame("max_value") + value_max_res = self.reformat_index(value_max) + + value_avg = ( + (sum_df_clean["FactorEqualValueRatioEvaluator"] * format_issue) + .unstack() + .T.mean(axis=0) + .to_frame("avg_value") + ) + value_avg_res = self.reformat_index(value_avg) + + result_all = pd.concat( + { + "Avg Correlation": corr_res.iloc[:, 0], + "Avg Format SR": format_succ_rate_f.iloc[:, 0], + "Avg Run SR": succ_rate_f.iloc[:, 0], + "Max Correlation": corr_max_res.iloc[:, 0], + "Max Accuracy": value_max_res.iloc[:, 0], + "Avg Accuracy": value_avg_res.iloc[:, 0], + }, + axis=1, + ) + + df = result_all.sort_index(axis=1, key=self.result_all_key_order).sort_index(axis=0) + print(df) + + print() + print(df.groupby("Category").mean()) + + print() + print(df.mean()) + + # Calculate the mean of each column + mean_values = df.fillna(0.0).mean() + mean_df = pd.DataFrame(mean_values).T + + # Assign the MultiIndex to the DataFrame + mean_df.index = pd.MultiIndex.from_tuples([("-", "-", "Average")], names=["Factor", "Category", "Difficulty"]) + + # Append the mean values to the end of the dataframe + df_w_mean = pd.concat([df, mean_df]).astype("float") + + return df_w_mean + + +class Plotter: + @staticmethod + def change_fs(font_size): + plt.rc("font", size=font_size) + plt.rc("axes", titlesize=font_size) + plt.rc("axes", labelsize=font_size) + plt.rc("xtick", labelsize=font_size) + plt.rc("ytick", labelsize=font_size) + plt.rc("legend", fontsize=font_size) + plt.rc("figure", titlesize=font_size) + + @staticmethod + def plot_data(data, file_name, title): + plt.figure(figsize=(10, 10)) + plt.ylabel("Value") + colors = ["#3274A1", "#E1812C", "#3A923A", "#C03D3E"] + plt.bar(data["a"], data["b"], color=colors, capsize=5) + for idx, row in data.iterrows(): + plt.text(idx, row["b"] + 0.01, f"{row['b']:.2f}", ha="center", va="bottom") + plt.suptitle(title, y=0.98) + plt.xticks(rotation=45) + plt.ylim(0, 1) + plt.tight_layout() + plt.savefig(file_name) + + +def main( + path="git_ignore_folder/eval_results/res_promptV220240724-060037.pkl", + round=1, + title="Comparison of Different Methods", + only_correct_format=False, +): + settings = BenchmarkSettings() + benchmark = BenchmarkAnalyzer(settings, only_correct_format=only_correct_format) + results = { + f"{round} round experiment": path, + } + final_results = benchmark.process_results(results) + final_results_df = pd.DataFrame(final_results) + + Plotter.change_fs(20) + plot_data = final_results_df.drop(["Max Accuracy", "Avg Accuracy"], axis=0).T + plot_data = plot_data.reset_index().melt("index", var_name="a", value_name="b") + Plotter.plot_data(plot_data, "./comparison_plot.png", title) + + +if __name__ == "__main__": + fire.Fire(main) diff --git a/alphaagent/app/benchmark/factor/eval.py b/alphaagent/app/benchmark/factor/eval.py new file mode 100755 index 00000000..4c6a8504 --- /dev/null +++ b/alphaagent/app/benchmark/factor/eval.py @@ -0,0 +1,35 @@ +from alphaagent.app.qlib_rd_loop.conf import FACTOR_PROP_SETTING +from alphaagent.components.benchmark.conf import BenchmarkSettings +from alphaagent.components.benchmark.eval_method import FactorImplementEval +from alphaagent.core.scenario import Scenario +from alphaagent.core.utils import import_class +from alphaagent.log import logger +from alphaagent.scenarios.qlib.factor_experiment_loader.json_loader import ( + FactorTestCaseLoaderFromJsonFile, +) + +if __name__ == "__main__": + # 1.read the settings + bs = BenchmarkSettings() + + # 2.read and prepare the eval_data + test_cases = FactorTestCaseLoaderFromJsonFile().load(bs.bench_data_path) + + # 3.declare the method to be tested and pass the arguments. + + scen: Scenario = import_class(FACTOR_PROP_SETTING.scen)() + generate_method = import_class(bs.bench_method_cls)(scen=scen, **bs.bench_method_extra_kwargs) + # 4.declare the eval method and pass the arguments. + eval_method = FactorImplementEval( + method=generate_method, + test_cases=test_cases, + scen=scen, + catch_eval_except=True, + test_round=bs.bench_test_round, + ) + + # 5.run the eval + res = eval_method.eval(eval_method.develop()) + + # 6.save the result + logger.log_object(res) diff --git a/alphaagent/app/benchmark/model/README.md b/alphaagent/app/benchmark/model/README.md new file mode 100755 index 00000000..dabb7e6d --- /dev/null +++ b/alphaagent/app/benchmark/model/README.md @@ -0,0 +1,30 @@ +# Tasks + +## Task Extraction +From paper to task. +```bash +# python rdagent/app/model_implementation/task_extraction.py +# It may based on rdagent/document_reader/document_reader.py +python rdagent/components/task_implementation/model_implementation/task_extraction.py ./PaperImpBench/raw_paper/ +``` + +## Complete workflow +From paper to implementation +``` bash +# Similar to +# rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py +``` + +## Paper benchmark +```bash +# TODO: it does not work well now. +python rdagent/app/model_implementation/eval.py +``` + +TODO: +- Create reasonable benchmark + - with uniform input + - manually create task +- Create reasonable evaluation metrics + +## Evolving diff --git a/alphaagent/app/benchmark/model/eval.py b/alphaagent/app/benchmark/model/eval.py new file mode 100755 index 00000000..1299b508 --- /dev/null +++ b/alphaagent/app/benchmark/model/eval.py @@ -0,0 +1,42 @@ +from pathlib import Path + +from alphaagent.components.coder.model_coder import ModelCoSTEER +from alphaagent.components.loader.task_loader import ModelTaskLoaderJson, ModelWsLoader +from alphaagent.scenarios.qlib.experiment.model_experiment import ( + QlibModelExperiment, + QlibModelScenario, +) + +if __name__ == "__main__": + DIRNAME = Path(__file__).absolute().resolve().parent + + from alphaagent.components.coder.model_coder.benchmark.eval import ModelImpValEval + from alphaagent.components.coder.model_coder.one_shot import ModelCodeWriter + + bench_folder = DIRNAME.parent.parent / "components" / "coder" / "model_coder" / "benchmark" + mtl = ModelTaskLoaderJson(str(bench_folder / "model_dict.json")) + + task_l = mtl.load() + + task_l = [t for t in task_l if t.name == "A-DGN"] # FIXME: other models does not work well + + model_experiment = QlibModelExperiment(sub_tasks=task_l) + # mtg = ModelCodeWriter(scen=QlibModelScenario()) + mtg = ModelCoSTEER(scen=QlibModelScenario()) + + model_experiment = mtg.develop(model_experiment) + + # TODO: Align it with the benchmark framework after @wenjun's refine the evaluation part. + # Currently, we just handcraft a workflow for fast evaluation. + + mil = ModelWsLoader(bench_folder / "gt_code") + + mie = ModelImpValEval() + # Evaluation: + eval_l = [] + for impl in model_experiment.sub_workspace_list: + print(impl.target_task) + gt_impl = mil.load(impl.target_task) + eval_l.append(mie.evaluate(gt_impl, impl)) + + print(eval_l) diff --git a/alphaagent/app/cli.py b/alphaagent/app/cli.py new file mode 100755 index 00000000..2c812be9 --- /dev/null +++ b/alphaagent/app/cli.py @@ -0,0 +1,49 @@ +""" +CLI entrance for all alphaagent application. + +This will +- make alphaagent a nice entry and +- autoamtically load dotenv +""" + +from dotenv import load_dotenv + +load_dotenv(".env") +# 1) Make sure it is at the beginning of the script so that it will load dotenv before initializing BaseSettings. +# 2) The ".env" argument is necessary to make sure it loads `.env` from the current directory. + +import subprocess +from importlib.resources import path as rpath + +import fire +from alphaagent.app.qlib_rd_loop.factor_mining import main as mine +from alphaagent.app.qlib_rd_loop.factor_backtest import main as backtest +from alphaagent.app.utils.health_check import health_check +from alphaagent.app.utils.info import collect_info + + +def ui(port=19899, log_dir="./log", debug=False): + """ + start web app to show the log traces. + """ + with rpath("alphaagent.log.ui", "app.py") as app_path: + cmds = ["streamlit", "run", app_path, f"--server.port={port}"] + if log_dir or debug: + cmds.append("--") + if log_dir: + cmds.append(f"--log_dir={log_dir}") + if debug: + cmds.append("--debug") + subprocess.run(cmds) + + +def app(): + fire.Fire( + { + "mine": mine, + "backtest": backtest, + "ui": ui, + "health_check": health_check, + "collect_info": collect_info, + } + ) diff --git a/alphaagent/app/qlib_rd_loop/conf.py b/alphaagent/app/qlib_rd_loop/conf.py new file mode 100755 index 00000000..36410bfb --- /dev/null +++ b/alphaagent/app/qlib_rd_loop/conf.py @@ -0,0 +1,129 @@ +from alphaagent.components.workflow.conf import BasePropSetting +from alphaagent.core.conf import ExtendedSettingsConfigDict + + +class ModelBasePropSetting(BasePropSetting): + model_config = ExtendedSettingsConfigDict(env_prefix="QLIB_MODEL_", protected_namespaces=()) + + # 1) override base settings + scen: str = "alphaagent.scenarios.qlib.experiment.model_experiment.QlibModelScenario" + """Scenario class for Qlib Model""" + + hypothesis_gen: str = "alphaagent.scenarios.qlib.proposal.model_proposal.QlibModelHypothesisGen" + """Hypothesis generation class""" + + hypothesis2experiment: str = "alphaagent.scenarios.qlib.proposal.model_proposal.QlibModelHypothesis2Experiment" + """Hypothesis to experiment class""" + + coder: str = "alphaagent.scenarios.qlib.developer.model_coder.QlibModelCoSTEER" + """Coder class""" + + runner: str = "alphaagent.scenarios.qlib.developer.model_runner.QlibModelRunner" + """Runner class""" + + summarizer: str = "alphaagent.scenarios.qlib.developer.feedback.QlibModelHypothesisExperiment2Feedback" + """Summarizer class""" + + evolving_n: int = 10 + """Number of evolutions""" + + +class FactorBasePropSetting(BasePropSetting): + model_config = ExtendedSettingsConfigDict(env_prefix="QLIB_FACTOR_", protected_namespaces=()) + + # 1) override base settings + scen: str = "alphaagent.scenarios.qlib.experiment.factor_experiment.QlibFactorScenario" + """Scenario class for Qlib Factor""" + + hypothesis_gen: str = "alphaagent.scenarios.qlib.proposal.factor_proposal.QlibFactorHypothesisGen" + """Hypothesis generation class""" + + hypothesis2experiment: str = "alphaagent.scenarios.qlib.proposal.factor_proposal.QlibFactorHypothesis2Experiment" + """Hypothesis to experiment class""" + + coder: str = "alphaagent.scenarios.qlib.developer.factor_coder.QlibFactorCoSTEER" + """Coder class""" + + runner: str = "alphaagent.scenarios.qlib.developer.factor_runner.QlibFactorRunner" + """Runner class""" + + summarizer: str = "alphaagent.scenarios.qlib.developer.feedback.QlibFactorHypothesisExperiment2Feedback" + """Summarizer class""" + + evolving_n: int = 10 + """Number of evolutions""" + + +class AlphaAgentFactorBasePropSetting(BasePropSetting): + model_config = ExtendedSettingsConfigDict(env_prefix="QLIB_FACTOR_", protected_namespaces=()) + + # 1) override base settings + scen: str = "alphaagent.scenarios.qlib.experiment.factor_experiment.QlibAlphaAgentScenario" + """Scenario class for Qlib Factor""" + + hypothesis_gen: str = "alphaagent.scenarios.qlib.proposal.factor_proposal.AlphaAgentHypothesisGen" + """Hypothesis generation class""" + + hypothesis2experiment: str = "alphaagent.scenarios.qlib.proposal.factor_proposal.AlphaAgentHypothesis2FactorExpression" + """Hypothesis to experiment class""" + + # coder: str = "alphaagent.scenarios.qlib.developer.factor_coder.QlibFactorCoSTEER" + coder: str = "alphaagent.scenarios.qlib.developer.factor_coder.QlibFactorParser" + """Coder class""" + + runner: str = "alphaagent.scenarios.qlib.developer.factor_runner.QlibFactorRunner" + """Runner class""" + + summarizer: str = "alphaagent.scenarios.qlib.developer.feedback.AlphaAgentQlibFactorHypothesisExperiment2Feedback" + """Summarizer class""" + + evolving_n: int = 5 + """Number of evolutions""" + +class FactorBackTestBasePropSetting(BasePropSetting): + model_config = ExtendedSettingsConfigDict(env_prefix="QLIB_FACTOR_", protected_namespaces=()) + + # 1) override base settings + scen: str = "alphaagent.scenarios.qlib.experiment.factor_experiment.QlibAlphaAgentScenario" + """Scenario class for Qlib Factor""" + + hypothesis_gen: str = "alphaagent.scenarios.qlib.proposal.factor_proposal.EmptyHypothesisGen" + """Hypothesis generation class""" + + hypothesis2experiment: str = "alphaagent.scenarios.qlib.proposal.factor_proposal.BacktestHypothesis2FactorExpression" + """Hypothesis to experiment class""" + + coder: str = "alphaagent.scenarios.qlib.developer.factor_coder.QlibFactorCoder" + """Coder class""" + + runner: str = "alphaagent.scenarios.qlib.developer.factor_runner.QlibFactorRunner" + """Runner class""" + + summarizer: str = "alphaagent.scenarios.qlib.developer.feedback.QlibFactorHypothesisExperiment2Feedback" + """Summarizer class""" + + evolving_n: int = 1 + """Number of evolutions""" + + +class FactorFromReportPropSetting(FactorBasePropSetting): + # 1) override the scen attribute + scen: str = "alphaagent.scenarios.qlib.experiment.factor_from_report_experiment.QlibFactorFromReportScenario" + """Scenario class for Qlib Factor from Report""" + + # 2) sub task specific: + report_result_json_file_path: str = "git_ignore_folder/report_list.json" + """Path to the JSON file listing research reports for factor extraction""" + + max_factors_per_exp: int = 10000 + """Maximum number of factors implemented per experiment""" + + is_report_limit_enabled: bool = False + """Limits report processing count if True; processes all if False""" + + +FACTOR_PROP_SETTING = FactorBasePropSetting() +FACTOR_FROM_REPORT_PROP_SETTING = FactorFromReportPropSetting() +MODEL_PROP_SETTING = ModelBasePropSetting() +ALPHA_AGENT_FACTOR_PROP_SETTING = AlphaAgentFactorBasePropSetting() +FACTOR_BACK_TEST_PROP_SETTING = FactorBackTestBasePropSetting() \ No newline at end of file diff --git a/alphaagent/app/qlib_rd_loop/factor_backtest.py b/alphaagent/app/qlib_rd_loop/factor_backtest.py new file mode 100755 index 00000000..0dbd4b2e --- /dev/null +++ b/alphaagent/app/qlib_rd_loop/factor_backtest.py @@ -0,0 +1,30 @@ +""" +Factor workflow with session control +""" + +from typing import Any + +import fire + +from alphaagent.app.qlib_rd_loop.conf import FACTOR_BACK_TEST_PROP_SETTING +from alphaagent.components.workflow.alphaagent_loop import BacktestLoop + +def main(path=None, step_n=None, factor_path=None): + """ + Auto R&D Evolving loop for fintech factors. + + You can continue running session by + + .. code-block:: python + + dotenv run -- python alphaagent/app/qlib_rd_loop/factor_backtest.py --factor_path "/path/to/factor_file.csv" $LOG_PATH/__session__/1/0_propose --step_n 1 # `step_n` is a optional paramter + + """ + if path is None: + model_loop = BacktestLoop(FACTOR_BACK_TEST_PROP_SETTING, factor_path=factor_path) + else: + model_loop = BacktestLoop.load(path) + model_loop.run(step_n=step_n) + +if __name__ == "__main__": + fire.Fire(main) \ No newline at end of file diff --git a/alphaagent/app/qlib_rd_loop/factor_from_report.py b/alphaagent/app/qlib_rd_loop/factor_from_report.py new file mode 100755 index 00000000..44a0e8cb --- /dev/null +++ b/alphaagent/app/qlib_rd_loop/factor_from_report.py @@ -0,0 +1,172 @@ +import json +from pathlib import Path +from typing import Any, Tuple + +import fire +from jinja2 import Environment, StrictUndefined + +from alphaagent.app.qlib_rd_loop.conf import FACTOR_FROM_REPORT_PROP_SETTING +from alphaagent.app.qlib_rd_loop.factor import FactorRDLoop +from alphaagent.components.document_reader.document_reader import ( + extract_first_page_screenshot_from_pdf, + load_and_process_pdfs_by_langchain, +) +from alphaagent.core.prompts import Prompts +from alphaagent.core.proposal import Hypothesis +from alphaagent.log import logger +from alphaagent.log.time import measure_time +from alphaagent.oai.llm_utils import APIBackend +from alphaagent.scenarios.qlib.experiment.factor_experiment import QlibFactorExperiment +from alphaagent.scenarios.qlib.factor_experiment_loader.pdf_loader import ( + FactorExperimentLoaderFromPDFfiles, +) +from alphaagent.utils.workflow import LoopMeta + +prompts_path = Path(__file__).parent / "prompts.yaml" +prompts = Prompts(file_path=prompts_path) + + +def generate_hypothesis(factor_result: dict, report_content: str) -> str: + """ + Generate a hypothesis based on factor results and report content. + + Args: + factor_result (dict): The results of the factor analysis. + report_content (str): The content of the report. + + Returns: + str: The generated hypothesis. + """ + system_prompt = ( + Environment(undefined=StrictUndefined).from_string(prompts["hypothesis_generation"]["system"]).render() + ) + user_prompt = ( + Environment(undefined=StrictUndefined) + .from_string(prompts["hypothesis_generation"]["user"]) + .render(factor_descriptions=json.dumps(factor_result), report_content=report_content) + ) + + response = APIBackend().build_messages_and_create_chat_completion( + user_prompt=user_prompt, + system_prompt=system_prompt, + json_mode=True, + ) + + response_json = json.loads(response) + + return Hypothesis( + hypothesis=response_json.get("hypothesis", "No hypothesis provided"), + reason=response_json.get("reason", "No reason provided"), + concise_reason=response_json.get("concise_reason", "No concise reason provided"), + concise_observation=response_json.get("concise_observation", "No concise observation provided"), + concise_justification=response_json.get("concise_justification", "No concise justification provided"), + concise_knowledge=response_json.get("concise_knowledge", "No concise knowledge provided"), + ) + + +def extract_hypothesis_and_exp_from_reports(report_file_path: str) -> Tuple[QlibFactorExperiment, Hypothesis]: + """ + Extract hypothesis and experiment details from report files. + + Args: + report_file_path (str): Path to the report file. + + Returns: + Tuple[QlibFactorExperiment, Hypothesis]: The extracted experiment and generated hypothesis. + """ + with logger.tag("extract_factors_and_implement"): + with logger.tag("load_factor_tasks"): + exp = FactorExperimentLoaderFromPDFfiles().load(report_file_path) + if exp is None or exp.sub_tasks == []: + return None, None + + with logger.tag("load_pdf_screenshot"): + pdf_screenshot = extract_first_page_screenshot_from_pdf(report_file_path) + logger.log_object(pdf_screenshot) + + docs_dict = load_and_process_pdfs_by_langchain(report_file_path) + + factor_result = { + task.factor_name: { + "description": task.factor_description, + "formulation": task.factor_formulation, + "variables": task.variables, + "resources": task.factor_resources, + } + for task in exp.sub_tasks + } + + report_content = "\n".join(docs_dict.values()) + hypothesis = generate_hypothesis(factor_result, report_content) + return exp, hypothesis + + +class FactorReportLoop(FactorRDLoop, metaclass=LoopMeta): + @measure_time + def __init__(self, report_folder: str = None): + super().__init__(PROP_SETTING=FACTOR_FROM_REPORT_PROP_SETTING) + if report_folder is None: + self.judge_pdf_data_items = json.load( + open(FACTOR_FROM_REPORT_PROP_SETTING.report_result_json_file_path, "r") + ) + else: + self.judge_pdf_data_items = [i for i in Path(report_folder).rglob("*.pdf")] + + self.pdf_file_index = 0 + self.valid_pdf_file_count = 0 + self.current_loop_hypothesis = None + self.current_loop_exp = None + self.steps = ["propose_hypo_exp", "propose", "exp_gen", "coding", "running", "feedback"] + + @measure_time + def propose_hypo_exp(self, prev_out: dict[str, Any]): + with logger.tag("r"): + while True: + if FACTOR_FROM_REPORT_PROP_SETTING.is_report_limit_enabled and self.valid_pdf_file_count > 15: + break + report_file_path = self.judge_pdf_data_items[self.pdf_file_index] + logger.info(f"Processing number {self.pdf_file_index} report: {report_file_path}") + self.pdf_file_index += 1 + exp, hypothesis = extract_hypothesis_and_exp_from_reports(str(report_file_path)) + if exp is None: + continue + self.valid_pdf_file_count += 1 + exp.based_experiments = [QlibFactorExperiment(sub_tasks=[])] + [t[1] for t in self.trace.hist if t[2]] + exp.sub_workspace_list = exp.sub_workspace_list[: FACTOR_FROM_REPORT_PROP_SETTING.max_factors_per_exp] + exp.sub_tasks = exp.sub_tasks[: FACTOR_FROM_REPORT_PROP_SETTING.max_factors_per_exp] + logger.log_object(hypothesis, tag="hypothesis generation") + logger.log_object(exp.sub_tasks, tag="experiment generation") + self.current_loop_hypothesis = hypothesis + self.current_loop_exp = exp + return None + + @measure_time + def propose(self, prev_out: dict[str, Any]): + return self.current_loop_hypothesis + + @measure_time + def exp_gen(self, prev_out: dict[str, Any]): + return self.current_loop_exp + + +def main(report_folder=None, path=None, step_n=None): + """ + Auto R&D Evolving loop for fintech factors (the factors are extracted from finance reports). + + Args: + report_folder (str, optional): The folder contains the report PDF files. Reports will be loaded from this folder. + path (str, optional): The path for loading a session. If provided, the session will be loaded. + step_n (int, optional): Step number to continue running a session. + """ + if path is None and report_folder is None: + model_loop = FactorReportLoop() + elif path is not None: + model_loop = FactorReportLoop.load(path) + else: + model_loop = FactorReportLoop(report_folder=report_folder) + + model_loop.run(step_n=step_n) + + +if __name__ == "__main__": + fire.Fire(main) diff --git a/alphaagent/app/qlib_rd_loop/factor_mining.py b/alphaagent/app/qlib_rd_loop/factor_mining.py new file mode 100755 index 00000000..21c8ef4e --- /dev/null +++ b/alphaagent/app/qlib_rd_loop/factor_mining.py @@ -0,0 +1,83 @@ +""" +Factor workflow with session control +""" + +from typing import Any +import fire +import signal +import sys +import threading +from functools import wraps +import time +import ctypes +import os +from alphaagent.app.qlib_rd_loop.conf import ALPHA_AGENT_FACTOR_PROP_SETTING +from alphaagent.components.workflow.alphaagent_loop import AlphaAgentLoop +from alphaagent.core.exception import FactorEmptyError +from alphaagent.log import logger +from alphaagent.log.time import measure_time +from alphaagent.oai.llm_conf import LLM_SETTINGS + + + + +def force_timeout(): + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + # 优先选择timeout参数 + seconds = LLM_SETTINGS.factor_mining_timeout + def handle_timeout(signum, frame): + logger.error(f"强制终止程序执行,已超过{seconds}秒") + sys.exit(1) + + # 设置信号处理器 + signal.signal(signal.SIGALRM, handle_timeout) + # 设置闹钟 + signal.alarm(seconds) + + try: + result = func(*args, **kwargs) + finally: + # 取消闹钟 + signal.alarm(0) + return result + return wrapper + return decorator + + +@force_timeout() +def main(path=None, step_n=None, direction=None, stop_event=None): + """ + Autonomous alpha factor mining. + + Args: + path: 会话路径 + step_n: 步骤数 + direction: 初始方向 + stop_event: 停止事件 + + You can continue running session by + + .. code-block:: python + + dotenv run -- python rdagent/app/qlib_rd_loop/factor_alphaagent.py $LOG_PATH/__session__/1/0_propose --step_n 1 --potential_direction "[Initial Direction (Optional)]" # `step_n` is a optional paramter + + """ + try: + use_local = os.getenv("USE_LOCAL", "True").lower() + use_local = True if use_local in ["true", "1"] else False + logger.info(f"Use {'Local' if use_local else 'Docker container'} to execute factor backtest") + if path is None: + model_loop = AlphaAgentLoop(ALPHA_AGENT_FACTOR_PROP_SETTING, potential_direction=direction, stop_event=stop_event, use_local=use_local) + else: + model_loop = AlphaAgentLoop.load(path, use_local=use_local) + model_loop.run(step_n=step_n, stop_event=stop_event) + except Exception as e: + logger.error(f"执行过程中发生错误: {str(e)}") + raise + finally: + logger.info("程序执行完成或被终止") + +if __name__ == "__main__": + fire.Fire(main) diff --git a/alphaagent/app/utils/health_check.py b/alphaagent/app/utils/health_check.py new file mode 100755 index 00000000..555e71b8 --- /dev/null +++ b/alphaagent/app/utils/health_check.py @@ -0,0 +1,49 @@ +import socket + +import docker + +from alphaagent.log import logger + + +def check_docker() -> None: + try: + client = docker.from_env() + client.images.pull("hello-world") + container = client.containers.run("hello-world", detach=True) + logs = container.logs().decode("utf-8") + print(logs) + container.remove() + logger.info(f"The docker status is normal") + except docker.errors.DockerException as e: + logger.error(f"An error occurred: {e}") + logger.warning( + f"Docker status is exception, please check the docker configuration or reinstall it. Refs: https://docs.docker.com/engine/install/ubuntu/." + ) + + +def is_port_in_use(port): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + return s.connect_ex(("127.0.0.1", port)) == 0 + + +def check_and_list_free_ports(start_port=19899, max_ports=10) -> None: + is_occupied = is_port_in_use(port=start_port) + if is_occupied: + free_ports = [] + for port in range(start_port, start_port + max_ports): + if not is_port_in_use(port): + free_ports.append(port) + logger.warning( + f"Port 19899 is occupied, please replace it with an available port when running the `rdagent ui` command. Available ports: {free_ports}" + ) + else: + logger.info(f"Port 19899 is not occupied, you can run the `rdagent ui` command") + + +def health_check(): + """ + Check that docker is installed correctly, + and that the ports used in the sample README are not occupied. + """ + check_docker() + check_and_list_free_ports() diff --git a/alphaagent/app/utils/info.py b/alphaagent/app/utils/info.py new file mode 100755 index 00000000..3e8b4897 --- /dev/null +++ b/alphaagent/app/utils/info.py @@ -0,0 +1,57 @@ +import importlib.metadata +import platform +import sys +from pathlib import Path + +import docker +import requests +from setuptools_scm import get_version + +from alphaagent.log import logger + + +def sys_info(): + """collect system related info""" + method_list = [ + ["Name of current operating system: ", "system"], + ["Processor architecture: ", "machine"], + ["System, version, and hardware information: ", "platform"], + ["Version number of the system: ", "version"], + ] + for method in method_list: + logger.info(f"{method[0]}{getattr(platform, method[1])()}") + return None + + +def python_info(): + """collect Python related info""" + python_version = sys.version.replace("\n", " ") + logger.info(f"Python version: {python_version}") + return None + + +def docker_info(): + client = docker.from_env() + containers = client.containers.list(all=True) + if containers: + containers.sort(key=lambda c: c.attrs["Created"]) + last_container = containers[-1] + logger.info(f"Container ID: {last_container.id}") + logger.info(f"Container Name: {last_container.name}") + logger.info(f"Container Status: {last_container.status}") + logger.info(f"Image ID used by the container: {last_container.image.id}") + logger.info(f"Image tag used by the container: {last_container.image.tags}") + logger.info(f"Container port mapping: {last_container.ports}") + logger.info(f"Container Label: {last_container.labels}") + logger.info(f"Startup Commands: {' '.join(client.containers.get(last_container.id).attrs['Config']['Cmd'])}") + else: + logger.info(f"No run containers.") + + + +def collect_info(): + """Prints information about the system and the installed packages.""" + sys_info() + python_info() + docker_info() + return None diff --git a/alphaagent/components/benchmark/conf.py b/alphaagent/components/benchmark/conf.py new file mode 100755 index 00000000..5d6a0dd1 --- /dev/null +++ b/alphaagent/components/benchmark/conf.py @@ -0,0 +1,33 @@ +from dataclasses import field +from pathlib import Path +from typing import Optional + +from alphaagent.core.conf import ExtendedBaseSettings + +DIRNAME = Path("./") + + +class BenchmarkSettings(ExtendedBaseSettings): + class Config: + env_prefix = "BENCHMARK_" + """Use `BENCHMARK_` as prefix for environment variables""" + + bench_data_path: Path = DIRNAME / "example.json" + """data for benchmark""" + + bench_test_round: int = 10 + """how many rounds to run, each round may cost 10 minutes""" + + bench_test_case_n: Optional[int] = None + """how many test cases to run; If not given, all test cases will be run""" + + bench_method_cls: str = "rdagent.components.coder.factor_coder.FactorCoSTEER" + """method to be used for test cases""" + + bench_method_extra_kwargs: dict = field( + default_factory=dict, + ) + """extra kwargs for the method to be tested except the task list""" + + bench_result_path: Path = DIRNAME / "result" + """result save path""" diff --git a/alphaagent/components/benchmark/eval_method.py b/alphaagent/components/benchmark/eval_method.py new file mode 100755 index 00000000..ccd2b260 --- /dev/null +++ b/alphaagent/components/benchmark/eval_method.py @@ -0,0 +1,222 @@ +from collections import defaultdict +from pathlib import Path +from typing import Dict, List, Tuple, Union + +import pandas as pd +from tqdm import tqdm + +from alphaagent.components.coder.factor_coder.config import FACTOR_COSTEER_SETTINGS +from alphaagent.components.coder.factor_coder.eva_utils import ( + FactorCorrelationEvaluator, + FactorEqualValueRatioEvaluator, + FactorEvaluator, + FactorIndexEvaluator, + FactorRowCountEvaluator, + FactorSingleColumnEvaluator, +) +from alphaagent.components.coder.factor_coder.factor import FactorFBWorkspace +from alphaagent.core.conf import RD_AGENT_SETTINGS +from alphaagent.core.developer import Developer +from alphaagent.core.exception import CoderError +from alphaagent.core.experiment import Experiment, Task, Workspace +from alphaagent.core.scenario import Scenario +from alphaagent.core.utils import multiprocessing_wrapper + +EVAL_RES = Dict[ + str, + List[Tuple[FactorEvaluator, Union[object, CoderError]]], +] + + +class TestCase: + def __init__( + self, + target_task: Task, + ground_truth: Workspace, + ): + self.target_task = target_task + self.ground_truth = ground_truth + + +class TestCases: + def __init__(self, test_case_l: list[TestCase] = []): + # self.test_case_l = [TestCase(task, gt) for task, gt in zip(target_task, ground_truth)] + self.test_case_l = test_case_l + + def __getitem__(self, item): + return self.test_case_l[item] + + def __len__(self): + return len(self.test_case_l) + + def get_exp(self): + return Experiment([case.target_task for case in self.test_case_l]) + + @property + def target_task(self): + return [case.target_task for case in self.test_case_l] + + @property + def ground_truth(self): + return [case.ground_truth for case in self.test_case_l] + + +class BaseEval: + """ + The benchmark benchmark evaluation. + """ + + def __init__( + self, + evaluator_l: List[FactorEvaluator], + test_cases: TestCases, + generate_method: Developer, + catch_eval_except: bool = True, + ): + """Parameters + ---------- + test_cases : TestCases + cases to be evaluated, ground truth are included in the test cases. + evaluator_l : List[FactorEvaluator] + A list of evaluators to evaluate the generated code. + catch_eval_except : bool + If we want to debug the evaluators, we recommend to set the this parameter to True. + """ + self.evaluator_l = evaluator_l + self.test_cases = test_cases + self.generate_method = generate_method + self.catch_eval_except = catch_eval_except + + def load_cases_to_eval( + self, + path: Union[Path, str], + **kwargs, + ) -> List[Workspace]: + path = Path(path) + fi_l = [] + for tc in self.test_cases: + try: + fi = FactorFBWorkspace.from_folder(tc.task, path, **kwargs) + fi_l.append(fi) + except FileNotFoundError: + print("Fail to load test case for factor: ", tc.task.factor_name) + return fi_l + + def eval_case( + self, + case_gt: Workspace, + case_gen: Workspace, + ) -> List[Union[Tuple[FactorEvaluator, object], Exception]]: + """Parameters + ---------- + case_gt : FactorImplementation + + case_gen : FactorImplementation + + + Returns + ------- + List[Union[Tuple[FactorEvaluator, object],Exception]] + for each item + If the evaluation run successfully, return the evaluate results. Otherwise, return the exception. + """ + eval_res = [] + for ev in self.evaluator_l: + try: + case_gen.raise_exception = True + eval_res.append((ev, ev.evaluate(implementation=case_gen, gt_implementation=case_gt))) + # if the corr ev is successfully evaluated and achieve the best performance, then break + except CoderError as e: + return e + except Exception as e: + # exception when evaluation + if self.catch_eval_except: + eval_res.append((ev, e)) + else: + raise e + return eval_res + + +class FactorImplementEval(BaseEval): + def __init__( + self, + test_cases: TestCases, + method: Developer, + *args, + scen: Scenario, + test_round: int = 10, + **kwargs, + ): + online_evaluator_l = [ + FactorSingleColumnEvaluator(scen), + FactorRowCountEvaluator(scen), + FactorIndexEvaluator(scen), + FactorEqualValueRatioEvaluator(scen), + FactorCorrelationEvaluator(hard_check=False, scen=scen), + ] + super().__init__(online_evaluator_l, test_cases, method, *args, **kwargs) + self.test_round = test_round + + def develop(self): + gen_factor_l_all_rounds = [] + for _ in tqdm(range(self.test_round), desc="Rounds of Eval"): + print("\n========================================================") + print(f"Eval {_}-th times...") + print("========================================================\n") + try: + gen_factor_l = self.generate_method.develop(self.test_cases.get_exp()) + except KeyboardInterrupt: + # TODO: Why still need to save result after KeyboardInterrupt? + print("Manually interrupted the evaluation. Saving existing results") + break + + if len(gen_factor_l.sub_workspace_list) != len(self.test_cases.ground_truth): + raise ValueError( + "The number of cases to eval should be equal to the number of test cases.", + ) + gen_factor_l_all_rounds.extend(gen_factor_l.sub_workspace_list) + + return gen_factor_l_all_rounds + + def eval(self, gen_factor_l_all_rounds): + test_cases_all_rounds = [] + res = defaultdict(list) + for _ in range(self.test_round): + test_cases_all_rounds.extend(self.test_cases.ground_truth) + eval_res_list = multiprocessing_wrapper( + [ + (self.eval_case, (gt_case, gen_factor)) + for gt_case, gen_factor in zip(test_cases_all_rounds, gen_factor_l_all_rounds) + ], + n=RD_AGENT_SETTINGS.multi_proc_n, + ) + + for gt_case, eval_res, gen_factor in tqdm(zip(test_cases_all_rounds, eval_res_list, gen_factor_l_all_rounds)): + res[gt_case.target_task.factor_name].append((gen_factor, eval_res)) + + return res + + @staticmethod + def summarize_res(res: EVAL_RES) -> pd.DataFrame: + # None: indicate that it raises exception and get no results + sum_res = {} + for factor_name, runs in res.items(): + for fi, err_or_res_l in runs: + # NOTE: str(fi) may not be unique!! Because the workspace can be skipped when hitting the cache. + uniq_key = f"{str(fi)},{id(fi)}" + + key = (factor_name, uniq_key) + val = {} + if isinstance(err_or_res_l, Exception): + val["run factor error"] = str(err_or_res_l.__class__) + else: + val["run factor error"] = None + for ev_obj, err_or_res in err_or_res_l: + if isinstance(err_or_res, Exception): + val[str(ev_obj)] = None + else: + feedback, metric = err_or_res + val[str(ev_obj)] = metric + sum_res[key] = val + + return pd.DataFrame(sum_res) diff --git a/alphaagent/components/benchmark/example.json b/alphaagent/components/benchmark/example.json new file mode 100755 index 00000000..742927da --- /dev/null +++ b/alphaagent/components/benchmark/example.json @@ -0,0 +1,36 @@ +{ + "Turnover_Rate_Factor": { + "description": "A traditional factor based on 20-day average turnover rate, adjusted for market capitalization, which is further improved by applying the information distribution theory.", + "formulation": "\\text{Adjusted Turnover Rate} = \\frac{\\text{mean}(20\\text{-day turnover rate})}{\\text{Market Capitalization}}", + "variables": { + "20-day turnover rate": "Average turnover rate over the past 20 days.", + "Market Capitalization": "Total market value of a company's outstanding shares." + }, + "Category": "Fundamentals", + "Difficulty": "Easy", + "gt_code": "import pandas as pd\n\ndata_f = pd.read_hdf('daily_f.h5')\n\ndata = data_f.reset_index()\nwindow_size = 20\n\nnominator=data.groupby('instrument')[['TurnoverRate_30D']].rolling(window=window_size).mean().reset_index(0, drop=True)\n# transfer to series\nnew=nominator['TurnoverRate_30D']\ndata['Turnover_Rate_Factor']=new/data['TradableACapital']\n\n# set the datetime and instrument as index and drop the original index\nresult=pd.DataFrame(data['Turnover_Rate_Factor']).set_index(data_f.index)\n\n# transfer the result to series\nresult=result['Turnover_Rate_Factor']\nresult.to_hdf(\"result.h5\", key=\"data\")" + }, + "PctTurn20": { + "description": "A factor representing the percentage change in turnover rate over the past 20 trading days, market-value neutralized.", + "formulation": "\\text{PctTurn20} = \\frac{1}{N} \\sum_{i=1}^{N} \\left( \\frac{\\text{Turnover}_{i, t} - \\text{Turnover}_{i, t-20}}{\\text{Turnover}_{i, t-20}} \\right)", + "variables": { + "N": "Number of stocks in the market.", + "Turnover_{i, t}": "Turnover of stock i at day t.", + "Turnover_{i, t-20}": "Turnover of stock i at day t-20." + }, + "Category": "Volume&Price", + "Difficulty": "Medium", + "gt_code": "import pandas as pd\nfrom statsmodels import api as sm\n\ndef fill_mean(s: pd.Series) -> pd.Series:\n return s.fillna(s.mean()).fillna(0.0)\n\ndef market_value_neutralize(s: pd.Series, mv: pd.Series) -> pd.Series:\n s = s.groupby(\"datetime\", group_keys=False).apply(fill_mean)\n mv = mv.groupby(\"datetime\", group_keys=False).apply(fill_mean)\n\n df_f = mv.to_frame(\"MarketValue\")\n df_f[\"const\"] = 1\n X = df_f[[\"MarketValue\", \"const\"]]\n\n # Perform the Ordinary Least Squares (OLS) regression\n model = sm.OLS(s, X)\n results = model.fit()\n\n # Calculate the residuals\n df_f[\"residual\"] = results.resid\n df_f[\"norm_resi\"] = df_f.groupby(level=\"datetime\", group_keys=False)[\"residual\"].apply(\n lambda x: (x - x.mean()) / x.std(),\n )\n return df_f[\"norm_resi\"]\n\n\n# get_turnover\ndf_pv = pd.read_hdf(\"daily_pv.h5\", key=\"data\")\ndf_f = pd.read_hdf(\"daily_f.h5\", key=\"data\")\nturnover = df_pv[\"$money\"] / df_f[\"TradableMarketValue\"]\n\nf = turnover.groupby(\"instrument\").pct_change(periods=20)\n\nf_neutralized = market_value_neutralize(f, df_f[\"TradableMarketValue\"])\n\nf_neutralized.to_hdf(\"result.h5\", key=\"data\")" + }, + "PB_ROE": { + "description": "Constructed using the ranking difference between PB and ROE, with PB and ROE replacing original PB and ROE to obtain reconstructed factor values.", + "formulation": "\\text{rank}(PB\\_t) - rank(ROE_t)", + "variables": { + "\\text{rank}(PB_t)": "Ranking PB on cross-section at time t.", + "\\text{rank}(ROE_t)": "Ranking single-quarter ROE on cross-section at time t." + }, + "Category": "High-Frequency", + "Difficulty": "Hard", + "gt_code": "#!/usr/bin/env python\n\nimport pandas as pd\n\ndata_f = pd.read_hdf('daily_f.h5')\n\ndata = data_f.reset_index()\n\n# Calculate the rank of PB and ROE\ndata['PB_rank'] = data.groupby('datetime')['B/P'].rank()\ndata['ROE_rank'] = data.groupby('datetime')['ROE'].rank()\n\n# Calculate the difference between the ranks\ndata['PB_ROE'] = data['PB_rank'] - data['ROE_rank']\n\n# set the datetime and instrument as index and drop the original index\nresult=pd.DataFrame(data['PB_ROE']).set_index(data_f.index)\n\n# transfer the result to series\nresult=result['PB_ROE']\nresult.to_hdf(\"result.h5\", key=\"data\")" + } +} \ No newline at end of file diff --git a/alphaagent/components/coder/CoSTEER/__init__.py b/alphaagent/components/coder/CoSTEER/__init__.py new file mode 100755 index 00000000..6ee5b6bd --- /dev/null +++ b/alphaagent/components/coder/CoSTEER/__init__.py @@ -0,0 +1,108 @@ +import pickle +from pathlib import Path + +from alphaagent.components.coder.CoSTEER.config import CoSTEERSettings +from alphaagent.components.coder.CoSTEER.evolvable_subjects import EvolvingItem +from alphaagent.components.coder.CoSTEER.evolving_agent import FilterFailedRAGEvoAgent +from alphaagent.components.coder.CoSTEER.knowledge_management import ( + CoSTEERKnowledgeBaseV1, + CoSTEERKnowledgeBaseV2, + CoSTEERRAGStrategyV1, + CoSTEERRAGStrategyV2, +) +from alphaagent.core.developer import Developer +from alphaagent.core.evaluation import Evaluator +from alphaagent.core.evolving_agent import EvolvingStrategy +from alphaagent.core.experiment import Experiment +from alphaagent.log import logger + + +class CoSTEER(Developer[Experiment]): + def __init__( + self, + settings: CoSTEERSettings, + eva: Evaluator, + es: EvolvingStrategy, + evolving_version: int, + *args, + with_knowledge: bool = True, + with_feedback: bool = True, + knowledge_self_gen: bool = True, + filter_final_evo: bool = True, + **kwargs, + ) -> None: + super().__init__(*args, **kwargs) + self.max_loop = settings.max_loop + self.knowledge_base_path = ( + Path(settings.knowledge_base_path) if settings.knowledge_base_path is not None else None + ) + self.new_knowledge_base_path = ( + Path(settings.new_knowledge_base_path) if settings.new_knowledge_base_path is not None else None + ) + + self.with_knowledge = with_knowledge + self.with_feedback = with_feedback + self.knowledge_self_gen = knowledge_self_gen + self.filter_final_evo = filter_final_evo + self.evolving_strategy = es + self.evaluator = eva + self.evolving_version = evolving_version + + # init knowledge base + self.knowledge_base = self.load_or_init_knowledge_base( + former_knowledge_base_path=self.knowledge_base_path, + component_init_list=[], + ) + # init rag method + self.rag = ( + CoSTEERRAGStrategyV2(self.knowledge_base, settings=settings) + if self.evolving_version == 2 + else CoSTEERRAGStrategyV1(self.knowledge_base, settings=settings) + ) + + def load_or_init_knowledge_base(self, former_knowledge_base_path: Path = None, component_init_list: list = []): + if former_knowledge_base_path is not None and former_knowledge_base_path.exists(): + knowledge_base = pickle.load(open(former_knowledge_base_path, "rb")) + if self.evolving_version == 1 and not isinstance(knowledge_base, CoSTEERKnowledgeBaseV1): + raise ValueError("The former knowledge base is not compatible with the current version") + elif self.evolving_version == 2 and not isinstance( + knowledge_base, + CoSTEERKnowledgeBaseV2, + ): + raise ValueError("The former knowledge base is not compatible with the current version") + else: + knowledge_base = ( + CoSTEERKnowledgeBaseV2( + init_component_list=component_init_list, + ) + if self.evolving_version == 2 + else CoSTEERKnowledgeBaseV1() + ) + return knowledge_base + + def develop(self, exp: Experiment) -> Experiment: + + # init intermediate items + experiment = EvolvingItem.from_experiment(exp) + + self.evolve_agent = FilterFailedRAGEvoAgent( + max_loop=self.max_loop, + evolving_strategy=self.evolving_strategy, + rag=self.rag, + with_knowledge=self.with_knowledge, + with_feedback=self.with_feedback, + knowledge_self_gen=self.knowledge_self_gen, + ) + + experiment = self.evolve_agent.multistep_evolve( + experiment, + self.evaluator, + filter_final_evo=self.filter_final_evo, + ) + + # save new knowledge base + if self.new_knowledge_base_path is not None: + pickle.dump(self.knowledge_base, open(self.new_knowledge_base_path, "wb")) + logger.info(f"New knowledge base saved to {self.new_knowledge_base_path}") + exp.sub_workspace_list = experiment.sub_workspace_list + return exp diff --git a/alphaagent/components/coder/CoSTEER/config.py b/alphaagent/components/coder/CoSTEER/config.py new file mode 100755 index 00000000..9a851535 --- /dev/null +++ b/alphaagent/components/coder/CoSTEER/config.py @@ -0,0 +1,39 @@ +from typing import Union + +from alphaagent.core.conf import ExtendedBaseSettings + + +class CoSTEERSettings(ExtendedBaseSettings): + """CoSTEER settings, this setting is supposed not to be used directly!!!""" + + class Config: + env_prefix = "CoSTEER_" + + coder_use_cache: bool = False + """Indicates whether to use cache for the coder""" + + max_loop: int = 10 + """Maximum number of task implementation loops""" + + fail_task_trial_limit: int = 20 + + v1_query_former_trace_limit: int = 5 + v1_query_similar_success_limit: int = 5 + + v2_query_component_limit: int = 1 + v2_query_error_limit: int = 1 + v2_query_former_trace_limit: int = 1 + v2_add_fail_attempt_to_latest_successful_execution: bool = False + v2_error_summary: bool = False + v2_knowledge_sampler: float = 1.0 + + knowledge_base_path: Union[str, None] = None + """Path to the knowledge base""" + + new_knowledge_base_path: Union[str, None] = None + """Path to the new knowledge base""" + + select_threshold: int = 10 + + +CoSTEER_SETTINGS = CoSTEERSettings() diff --git a/alphaagent/components/coder/CoSTEER/evaluators.py b/alphaagent/components/coder/CoSTEER/evaluators.py new file mode 100755 index 00000000..a8a93f8f --- /dev/null +++ b/alphaagent/components/coder/CoSTEER/evaluators.py @@ -0,0 +1,112 @@ +from abc import abstractmethod +from typing import List + +from alphaagent.components.coder.CoSTEER.evolvable_subjects import EvolvingItem +from alphaagent.core.conf import RD_AGENT_SETTINGS +from alphaagent.core.evaluation import Evaluator, Feedback +from alphaagent.core.evolving_framework import QueriedKnowledge +from alphaagent.core.experiment import Workspace +from alphaagent.core.scenario import Task +from alphaagent.core.utils import multiprocessing_wrapper +from alphaagent.log import logger + + +class CoSTEERSingleFeedback(Feedback): + """This class is a base class for all code generator feedback to single implementation""" + + def __init__( + self, + execution_feedback: str = None, + shape_feedback: str = None, + code_feedback: str = None, + value_feedback: str = None, + final_decision: bool = None, + final_feedback: str = None, + value_generated_flag: bool = None, + final_decision_based_on_gt: bool = None, + ) -> None: + self.execution_feedback = execution_feedback + self.shape_feedback = shape_feedback + self.code_feedback = code_feedback + self.value_feedback = value_feedback + self.final_decision = final_decision + self.final_feedback = final_feedback + self.value_generated_flag = value_generated_flag + self.final_decision_based_on_gt = final_decision_based_on_gt + + def __str__(self) -> str: + return f"""------------------Execution Feedback------------------ +{self.execution_feedback if self.execution_feedback is not None else 'No execution feedback'} +------------------Shape Feedback------------------ +{self.shape_feedback if self.shape_feedback is not None else 'No shape feedback'} +------------------Code Feedback------------------ +{self.code_feedback if self.code_feedback is not None else 'No code feedback'} +------------------Value Feedback------------------ +{self.value_feedback if self.value_feedback is not None else 'No value feedback'} +------------------Final Feedback------------------ +{self.final_feedback if self.final_feedback is not None else 'No final feedback'} +------------------Final Decision------------------ +This implementation is {'SUCCESS' if self.final_decision else 'FAIL'}. +""" + + +class CoSTEERMultiFeedback( + Feedback, + List[CoSTEERSingleFeedback], +): + """Feedback contains a list, each element is the corresponding feedback for each factor implementation.""" + + +class CoSTEEREvaluator(Evaluator): + # TODO: + # I think we should have unified interface for all evaluates, for examples. + # So we should adjust the interface of other factors + @abstractmethod + def evaluate( + self, + target_task: Task, + implementation: Workspace, + gt_implementation: Workspace, + **kwargs, + ) -> CoSTEERSingleFeedback: + raise NotImplementedError("Please implement the `evaluator` method") + + +class CoSTEERMultiEvaluator(Evaluator): + def __init__(self, single_evaluator: CoSTEEREvaluator, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + self.single_evaluator = single_evaluator + + def evaluate( + self, + evo: EvolvingItem, + queried_knowledge: QueriedKnowledge = None, + **kwargs, + ) -> CoSTEERMultiFeedback: + multi_implementation_feedback = multiprocessing_wrapper( + [ + ( + self.single_evaluator.evaluate, + ( + evo.sub_tasks[index], + evo.sub_workspace_list[index], + evo.sub_gt_implementations[index] if evo.sub_gt_implementations is not None else None, + queried_knowledge, + ), + ) + for index in range(len(evo.sub_tasks)) + ], + n=RD_AGENT_SETTINGS.multi_proc_n, + ) + + final_decision = [ + None if single_feedback is None else single_feedback.final_decision + for single_feedback in multi_implementation_feedback + ] + logger.info(f"Final decisions: {final_decision} True count: {final_decision.count(True)}") + + for index in range(len(evo.sub_tasks)): + if final_decision[index]: + evo.sub_tasks[index].factor_implementation = True + + return multi_implementation_feedback diff --git a/alphaagent/components/coder/CoSTEER/evolvable_subjects.py b/alphaagent/components/coder/CoSTEER/evolvable_subjects.py new file mode 100755 index 00000000..675af691 --- /dev/null +++ b/alphaagent/components/coder/CoSTEER/evolvable_subjects.py @@ -0,0 +1,34 @@ +from alphaagent.core.evolving_framework import EvolvableSubjects +from alphaagent.core.experiment import Experiment, FBWorkspace +from alphaagent.core.scenario import Task +from alphaagent.log import logger + + +class EvolvingItem(Experiment, EvolvableSubjects): + """ + Intermediate item of factor implementation. + """ + + def __init__( + self, + sub_tasks: list[Task], + sub_gt_implementations: list[FBWorkspace] = None, + ): + Experiment.__init__(self, sub_tasks=sub_tasks) + self.corresponding_selection: list = None + if sub_gt_implementations is not None and len( + sub_gt_implementations, + ) != len(self.sub_tasks): + self.sub_gt_implementations = None + logger.warning( + "The length of sub_gt_implementations is not equal to the length of sub_tasks, set sub_gt_implementations to None", + ) + else: + self.sub_gt_implementations = sub_gt_implementations + + @classmethod + def from_experiment(cls, exp: Experiment) -> Experiment: + ei = cls(sub_tasks=exp.sub_tasks) + ei.based_experiments = exp.based_experiments + ei.experiment_workspace = exp.experiment_workspace + return ei diff --git a/alphaagent/components/coder/CoSTEER/evolving_agent.py b/alphaagent/components/coder/CoSTEER/evolving_agent.py new file mode 100755 index 00000000..17ae67a9 --- /dev/null +++ b/alphaagent/components/coder/CoSTEER/evolving_agent.py @@ -0,0 +1,18 @@ +from alphaagent.components.coder.CoSTEER.evaluators import CoSTEERSingleFeedback +from alphaagent.components.coder.CoSTEER.evolvable_subjects import EvolvingItem +from alphaagent.core.evolving_agent import RAGEvoAgent +from alphaagent.core.evolving_framework import EvolvableSubjects + + +class FilterFailedRAGEvoAgent(RAGEvoAgent): + def filter_evolvable_subjects_by_feedback( + self, evo: EvolvableSubjects, feedback: CoSTEERSingleFeedback + ) -> EvolvableSubjects: + assert isinstance(evo, EvolvingItem) + assert isinstance(feedback, list) + assert len(evo.sub_workspace_list) == len(feedback) + + for index in range(len(evo.sub_workspace_list)): + if evo.sub_workspace_list[index] is not None and feedback[index] and not feedback[index].final_decision: + evo.sub_workspace_list[index].clear() + return evo diff --git a/alphaagent/components/coder/CoSTEER/evolving_strategy.py b/alphaagent/components/coder/CoSTEER/evolving_strategy.py new file mode 100755 index 00000000..2f26abd4 --- /dev/null +++ b/alphaagent/components/coder/CoSTEER/evolving_strategy.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +from abc import abstractmethod +from pathlib import Path + +from alphaagent.components.coder.CoSTEER.config import CoSTEERSettings +from alphaagent.components.coder.CoSTEER.evolvable_subjects import EvolvingItem +from alphaagent.components.coder.CoSTEER.knowledge_management import ( + CoSTEERQueriedKnowledge, +) +from alphaagent.components.coder.CoSTEER.scheduler import random_select +from alphaagent.core.conf import RD_AGENT_SETTINGS +from alphaagent.core.evaluation import Scenario +from alphaagent.core.evolving_framework import EvolvingStrategy, QueriedKnowledge +from alphaagent.core.experiment import Workspace +from alphaagent.core.prompts import Prompts +from alphaagent.core.scenario import Task +from alphaagent.core.utils import multiprocessing_wrapper + +implement_prompts = Prompts(file_path=Path(__file__).parent / "prompts.yaml") + + +class MultiProcessEvolvingStrategy(EvolvingStrategy): + def __init__(self, scen: Scenario, settings: CoSTEERSettings): + super().__init__(scen) + self.settings = settings + + @abstractmethod + def implement_one_task( + self, + target_task: Task, + queried_knowledge: QueriedKnowledge = None, + ) -> Workspace: + raise NotImplementedError + + def select_one_round_tasks( + self, + to_be_finished_task_index: list, + evo: EvolvingItem, + selected_num: int, + queried_knowledge: CoSTEERQueriedKnowledge, + scen: Scenario, + ) -> list: + """Since scheduler is not essential, we implement a simple random selection here.""" + return random_select(to_be_finished_task_index, evo, selected_num, queried_knowledge, scen) + + @abstractmethod + def assign_code_list_to_evo(self, code_list: list, evo: EvolvingItem) -> None: + """ + Assign the code list to the evolving item. + + The code list is aligned with the evolving item's sub-tasks. + If a task is not implemented, put a None in the list. + """ + raise NotImplementedError + + def evolve( + self, + *, + evo: EvolvingItem, + queried_knowledge: CoSTEERQueriedKnowledge | None = None, + **kwargs, + ) -> EvolvingItem: + # 1.找出需要evolve的task + to_be_finished_task_index = [] + for index, target_task in enumerate(evo.sub_tasks): + target_task_desc = target_task.get_task_information() + if target_task_desc in queried_knowledge.success_task_to_knowledge_dict: + evo.sub_workspace_list[index] = queried_knowledge.success_task_to_knowledge_dict[ + target_task_desc + ].implementation + elif ( + target_task_desc not in queried_knowledge.success_task_to_knowledge_dict + and target_task_desc not in queried_knowledge.failed_task_info_set + ): + to_be_finished_task_index.append(index) + + # 2. 选择selection方法 + # if the number of factors to be implemented is larger than the limit, we need to select some of them + + if self.settings.select_threshold < len(to_be_finished_task_index): + # Select a fixed number of factors if the total exceeds the threshold + to_be_finished_task_index = self.select_one_round_tasks( + to_be_finished_task_index, evo, self.settings.select_threshold, queried_knowledge, self.scen + ) + + result = multiprocessing_wrapper( + [ + (self.implement_one_task, (evo.sub_tasks[target_index], queried_knowledge)) + for target_index in to_be_finished_task_index + ], + n=RD_AGENT_SETTINGS.multi_proc_n, + ) + code_list = [None for _ in range(len(evo.sub_tasks))] + for index, target_index in enumerate(to_be_finished_task_index): + code_list[target_index] = result[index] + + evo = self.assign_code_list_to_evo(code_list, evo) + evo.corresponding_selection = to_be_finished_task_index + + return evo diff --git a/alphaagent/components/coder/CoSTEER/knowledge_management.py b/alphaagent/components/coder/CoSTEER/knowledge_management.py new file mode 100755 index 00000000..61b036ef --- /dev/null +++ b/alphaagent/components/coder/CoSTEER/knowledge_management.py @@ -0,0 +1,916 @@ +from __future__ import annotations + +import copy +import json +import random +import re +from itertools import combinations +from pathlib import Path +from typing import Union + +from jinja2 import Environment, StrictUndefined + +from alphaagent.components.coder.CoSTEER.config import CoSTEERSettings +from alphaagent.components.coder.CoSTEER.evaluators import CoSTEERSingleFeedback +from alphaagent.components.knowledge_management.graph import ( + UndirectedGraph, + UndirectedNode, +) +from alphaagent.core.evolving_agent import Feedback +from alphaagent.core.evolving_framework import ( + EvolvableSubjects, + EvolvingKnowledgeBase, + EvoStep, + Knowledge, + QueriedKnowledge, + RAGStrategy, +) +from alphaagent.core.experiment import FBWorkspace +from alphaagent.core.prompts import Prompts +from alphaagent.core.scenario import Task +from alphaagent.log import logger +from alphaagent.oai.llm_utils import ( + APIBackend, + calculate_embedding_distance_between_str_list, +) + + +class CoSTEERKnowledge(Knowledge): + def __init__( + self, + target_task: Task, + implementation: FBWorkspace, + feedback: Feedback, + ) -> None: + self.target_task = target_task + self.implementation = implementation.copy() + self.feedback = feedback + + def get_implementation_and_feedback_str(self) -> str: + return f"""------------------implementation code:------------------ +{self.implementation.code} +------------------implementation feedback:------------------ +{self.feedback!s} +""" + + +class CoSTEERQueriedKnowledge(QueriedKnowledge): + def __init__(self, success_task_to_knowledge_dict: dict = {}, failed_task_info_set: set = set()) -> None: + self.success_task_to_knowledge_dict = success_task_to_knowledge_dict + self.failed_task_info_set = failed_task_info_set + + +class CoSTEERKnowledgeBaseV1(EvolvingKnowledgeBase): + def __init__(self, path: str | Path = None) -> None: + self.implementation_trace: dict[str, CoSTEERKnowledge] = dict() + self.success_task_info_set: set[str] = set() + + self.task_to_embedding = dict() + super().__init__(path) + + def query(self) -> CoSTEERQueriedKnowledge | None: + """ + Query the knowledge base to get the queried knowledge. So far is handled in RAG strategy. + """ + raise NotImplementedError + + +class CoSTEERQueriedKnowledgeV1(CoSTEERQueriedKnowledge): + def __init__( + self, + *args, + task_to_former_failed_traces: dict = {}, + task_to_similar_task_successful_knowledge: dict = {}, + **kwargs, + ) -> None: + self.task_to_former_failed_traces = task_to_former_failed_traces + self.task_to_similar_task_successful_knowledge = task_to_similar_task_successful_knowledge + super().__init__(*args, **kwargs) + + +class CoSTEERRAGStrategyV1(RAGStrategy): + def __init__(self, knowledgebase: CoSTEERKnowledgeBaseV1, settings: CoSTEERSettings) -> None: + super().__init__(knowledgebase) + self.current_generated_trace_count = 0 + self.settings = settings + + def generate_knowledge( + self, + evolving_trace: list[EvoStep], + *, + return_knowledge: bool = False, + ) -> Knowledge | None: + raise NotImplementedError( + "This method should be considered as an un-implemented method because we encourage everyone to use v2." + ) + if len(evolving_trace) == self.current_generated_trace_count: + return + else: + for trace_index in range( + self.current_generated_trace_count, + len(evolving_trace), + ): + evo_step = evolving_trace[trace_index] + implementations = evo_step.evolvable_subjects + feedback = evo_step.feedback + for task_index in range(len(implementations.sub_tasks)): + target_task = implementations.sub_tasks[task_index] + target_task_information = target_task.get_task_information() + implementation = implementations.sub_workspace_list[task_index] + single_feedback = feedback[task_index] + if single_feedback is None: + continue + single_knowledge = CoSTEERKnowledge( + target_task=target_task, + implementation=implementation, + feedback=single_feedback, + ) + if target_task_information not in self.knowledgebase.success_task_info_set: + self.knowledgebase.implementation_trace.setdefault( + target_task_information, + [], + ).append(single_knowledge) + + if single_feedback.final_decision == True: + self.knowledgebase.success_task_info_set.add( + target_task_information, + ) + self.current_generated_trace_count = len(evolving_trace) + + def query( + self, + evo: EvolvableSubjects, + evolving_trace: list[EvoStep], + ) -> CoSTEERQueriedKnowledge | None: + raise NotImplementedError( + "This method should be considered as an un-implemented method because we encourage everyone to use v2." + ) + v1_query_former_trace_limit = self.settings.v1_query_former_trace_limit + v1_query_similar_success_limit = self.settings.v1_query_similar_success_limit + fail_task_trial_limit = self.settings.fail_task_trial_limit + + queried_knowledge = CoSTEERQueriedKnowledgeV1() + for target_task in evo.sub_tasks: + target_task_information = target_task.get_task_information() + if target_task_information in self.knowledgebase.success_task_info_set: + queried_knowledge.success_task_to_knowledge_dict[target_task_information] = ( + self.knowledgebase.implementation_trace[target_task_information][-1] + ) + elif ( + len( + self.knowledgebase.implementation_trace.setdefault( + target_task_information, + [], + ), + ) + >= fail_task_trial_limit + ): + queried_knowledge.failed_task_info_set.add(target_task_information) + else: + queried_knowledge.task_to_former_failed_traces[target_task_information] = ( + self.knowledgebase.implementation_trace.setdefault( + target_task_information, + [], + )[-v1_query_former_trace_limit:] + ) + + knowledge_base_success_task_list = list( + self.knowledgebase.success_task_info_set, + ) + similarity = calculate_embedding_distance_between_str_list( + [target_task_information], + knowledge_base_success_task_list, + )[0] + similar_indexes = sorted( + range(len(similarity)), + key=lambda i: similarity[i], + reverse=True, + )[:v1_query_similar_success_limit] + similar_successful_knowledge = [ + self.knowledgebase.implementation_trace.setdefault( + knowledge_base_success_task_list[index], + [], + )[-1] + for index in similar_indexes + ] + queried_knowledge.task_to_similar_task_successful_knowledge[target_task_information] = ( + similar_successful_knowledge + ) + return queried_knowledge + + +class CoSTEERQueriedKnowledgeV2(CoSTEERQueriedKnowledgeV1): + # Aggregation of knowledge + def __init__( + self, + task_to_former_failed_traces: dict = {}, + task_to_similar_task_successful_knowledge: dict = {}, + task_to_similar_error_successful_knowledge: dict = {}, + **kwargs, + ) -> None: + self.task_to_similar_error_successful_knowledge = task_to_similar_error_successful_knowledge + super().__init__( + task_to_former_failed_traces=task_to_former_failed_traces, + task_to_similar_task_successful_knowledge=task_to_similar_task_successful_knowledge, + **kwargs, + ) + + +class CoSTEERRAGStrategyV2(RAGStrategy): + prompt = Prompts(file_path=Path(__file__).parent / "prompts.yaml") + + def __init__(self, knowledgebase: CoSTEERKnowledgeBaseV2, settings: CoSTEERSettings) -> None: + super().__init__(knowledgebase) + self.current_generated_trace_count = 0 + self.settings = settings + + def generate_knowledge( + self, + evolving_trace: list[EvoStep], + *, + return_knowledge: bool = False, + ) -> Knowledge | None: + if len(evolving_trace) == self.current_generated_trace_count: + return None + + else: + for trace_index in range(self.current_generated_trace_count, len(evolving_trace)): + evo_step = evolving_trace[trace_index] + implementations = evo_step.evolvable_subjects + feedback = evo_step.feedback + for task_index in range(len(implementations.sub_tasks)): + target_task = implementations.sub_tasks[task_index] + target_task_information = target_task.get_task_information() + implementation = implementations.sub_workspace_list[task_index] + single_feedback: CoSTEERSingleFeedback = feedback[task_index] + if implementation is None or single_feedback is None: + continue + single_knowledge = CoSTEERKnowledge( + target_task=target_task, + implementation=implementation, + feedback=single_feedback, + ) + if ( + target_task_information not in self.knowledgebase.success_task_to_knowledge_dict + and implementation is not None + ): + self.knowledgebase.working_trace_knowledge.setdefault(target_task_information, []).append( + single_knowledge, + ) # save to working trace + if single_feedback.final_decision == True: + self.knowledgebase.success_task_to_knowledge_dict.setdefault( + target_task_information, + single_knowledge, + ) + # Do summary for the last step and update the knowledge graph + self.knowledgebase.update_success_task( + target_task_information, + ) + else: + # generate error node and store into knowledge base + error_analysis_result = [] + if not single_feedback.value_generated_flag: + error_analysis_result = self.analyze_error( + single_feedback.execution_feedback, + feedback_type="execution", + ) + else: + error_analysis_result = self.analyze_error( + single_feedback.value_feedback, + feedback_type="value", + ) + self.knowledgebase.working_trace_error_analysis.setdefault( + target_task_information, + [], + ).append( + error_analysis_result, + ) # save to working trace error record, for graph update + + self.current_generated_trace_count = len(evolving_trace) + return None + + def query(self, evo: EvolvableSubjects, evolving_trace: list[EvoStep]) -> CoSTEERQueriedKnowledge | None: + conf_knowledge_sampler = self.settings.v2_knowledge_sampler + queried_knowledge_v2 = CoSTEERQueriedKnowledgeV2( + success_task_to_knowledge_dict=self.knowledgebase.success_task_to_knowledge_dict, + ) + + queried_knowledge_v2 = self.former_trace_query( + evo, + queried_knowledge_v2, + self.settings.v2_query_former_trace_limit, + self.settings.v2_add_fail_attempt_to_latest_successful_execution, + ) + queried_knowledge_v2 = self.component_query( + evo, + queried_knowledge_v2, + self.settings.v2_query_component_limit, + knowledge_sampler=conf_knowledge_sampler, + ) + queried_knowledge_v2 = self.error_query( + evo, + queried_knowledge_v2, + self.settings.v2_query_error_limit, + knowledge_sampler=conf_knowledge_sampler, + ) + return queried_knowledge_v2 + + def analyze_component( + self, + target_task_information, + ) -> list[UndirectedNode]: # Hardcode: certain component nodes + all_component_nodes = self.knowledgebase.graph.get_all_nodes_by_label_list(["component"]) + if not len(all_component_nodes): + return [] + all_component_content = "" + for _, component_node in enumerate(all_component_nodes): + all_component_content += f"{component_node.content}, \n" + analyze_component_system_prompt = ( + Environment(undefined=StrictUndefined) + .from_string(self.prompt["analyze_component_prompt_v1_system"]) + .render( + all_component_content=all_component_content, + ) + ) + + analyze_component_user_prompt = target_task_information + try: + component_no_list = json.loads( + APIBackend().build_messages_and_create_chat_completion( + system_prompt=analyze_component_system_prompt, + user_prompt=analyze_component_user_prompt, + json_mode=True, + ), + )["component_no_list"] + return [all_component_nodes[index - 1] for index in sorted(list(set(component_no_list)))] + except: + logger.warning("Error when analyzing components.") + analyze_component_user_prompt = "Your response is not a valid component index list." + + return [] + + def analyze_error( + self, + single_feedback, + feedback_type="execution", + ) -> list[ + UndirectedNode | str + ]: # Hardcode: Raised errors, existed error nodes + not existed error nodes(here, they are strs) + if feedback_type == "execution": + match = re.search( + r'File "(?P.+)", line (?P\d+), in (?P.+)\n\s+(?P.+)\n(?P\w+): (?P.+)', + single_feedback, + ) + if match: + error_details = match.groupdict() + # last_traceback = f'File "{error_details["file"]}", line {error_details["line"]}, in {error_details["function"]}\n {error_details["error_line"]}' + error_type = error_details["error_type"] + error_line = error_details["error_line"] + error_contents = [f"ErrorType: {error_type}" + "\n" + f"Error line: {error_line}"] + else: + error_contents = ["Undefined Error"] + elif feedback_type == "value": # value check error + value_check_types = r"The source dataframe and the ground truth dataframe have different rows count.|The source dataframe and the ground truth dataframe have different index.|Some values differ by more than the tolerance of 1e-6.|No sufficient correlation found when shifting up|Something wrong happens when naming the multi indices of the dataframe." + error_contents = re.findall(value_check_types, single_feedback) + else: + error_contents = ["Undefined Error"] + + all_error_nodes = self.knowledgebase.graph.get_all_nodes_by_label_list(["error"]) + if not len(all_error_nodes): + return error_contents + else: + error_list = [] + for error_content in error_contents: + for error_node in all_error_nodes: + if error_content == error_node.content: + error_list.append(error_node) + else: + error_list.append(error_content) + if error_list[-1] in error_list[:-1]: + error_list.pop() + + return error_list + + def former_trace_query( + self, + evo: EvolvableSubjects, + queried_knowledge_v2: CoSTEERQueriedKnowledgeV2, + v2_query_former_trace_limit: int = 5, + v2_add_fail_attempt_to_latest_successful_execution: bool = False, + ) -> Union[CoSTEERQueriedKnowledge, set]: + """ + Query the former trace knowledge of the working trace, and find all the failed task information which tried more than fail_task_trial_limit times + """ + fail_task_trial_limit = self.settings.fail_task_trial_limit + + for target_task in evo.sub_tasks: + target_task_information = target_task.get_task_information() + if ( + target_task_information not in self.knowledgebase.success_task_to_knowledge_dict + and target_task_information in self.knowledgebase.working_trace_knowledge + and len(self.knowledgebase.working_trace_knowledge[target_task_information]) >= fail_task_trial_limit + ): + queried_knowledge_v2.failed_task_info_set.add(target_task_information) + + if ( + target_task_information not in self.knowledgebase.success_task_to_knowledge_dict + and target_task_information not in queried_knowledge_v2.failed_task_info_set + and target_task_information in self.knowledgebase.working_trace_knowledge + ): + former_trace_knowledge = copy.copy( + self.knowledgebase.working_trace_knowledge[target_task_information], + ) + # in former trace query we will delete the right trace in the following order:[..., value_generated_flag is True, value_generated_flag is False, ...] + # because we think this order means a deterioration of the trial (like a wrong gradient descent) + current_index = 1 + while current_index < len(former_trace_knowledge): + if ( + not former_trace_knowledge[current_index].feedback.value_generated_flag + and former_trace_knowledge[current_index - 1].feedback.value_generated_flag + ): + former_trace_knowledge.pop(current_index) + else: + current_index += 1 + + latest_attempt = None + if v2_add_fail_attempt_to_latest_successful_execution: + # When the last successful execution is not the last one in the working trace, it means we have tried to correct it. We should tell the agent this fail trial to avoid endless loop in the future. + if ( + len(former_trace_knowledge) > 0 + and len(self.knowledgebase.working_trace_knowledge[target_task_information]) > 1 + and self.knowledgebase.working_trace_knowledge[target_task_information].index( + former_trace_knowledge[-1] + ) + < len(self.knowledgebase.working_trace_knowledge[target_task_information]) - 1 + ): + latest_attempt = self.knowledgebase.working_trace_knowledge[target_task_information][-1] + + queried_knowledge_v2.task_to_former_failed_traces[target_task_information] = ( + former_trace_knowledge[-v2_query_former_trace_limit:], + latest_attempt, + ) + else: + queried_knowledge_v2.task_to_former_failed_traces[target_task_information] = ([], None) + + return queried_knowledge_v2 + + def component_query( + self, + evo: EvolvableSubjects, + queried_knowledge_v2: CoSTEERQueriedKnowledgeV2, + v2_query_component_limit: int = 5, + knowledge_sampler: float = 1.0, + ) -> CoSTEERQueriedKnowledge | None: + for target_task in evo.sub_tasks: + target_task_information = target_task.get_task_information() + if ( + target_task_information in self.knowledgebase.success_task_to_knowledge_dict + or target_task_information in queried_knowledge_v2.failed_task_info_set + ): + queried_knowledge_v2.task_to_similar_task_successful_knowledge[target_task_information] = [] + else: + if target_task_information not in self.knowledgebase.task_to_component_nodes: + self.knowledgebase.task_to_component_nodes[target_task_information] = self.analyze_component( + target_task_information, + ) + + component_analysis_result = self.knowledgebase.task_to_component_nodes[target_task_information] + + if len(component_analysis_result) > 1: + task_des_node_list = self.knowledgebase.graph_query_by_intersection( + component_analysis_result, + constraint_labels=["task_description"], + ) + single_component_constraint = (v2_query_component_limit // len(component_analysis_result)) + 1 + else: + task_des_node_list = [] + single_component_constraint = v2_query_component_limit + queried_knowledge_v2.task_to_similar_task_successful_knowledge[target_task_information] = [] + for component_node in component_analysis_result: + # Reverse iterate, a trade-off with intersection search + count = 0 + for task_des_node in self.knowledgebase.graph_query_by_node( + node=component_node, + step=1, + constraint_labels=["task_description"], + block=True, + )[::-1]: + if task_des_node not in task_des_node_list: + task_des_node_list.append(task_des_node) + count += 1 + if count >= single_component_constraint: + break + + for node in task_des_node_list: + for searched_node in self.knowledgebase.graph_query_by_node( + node=node, + step=50, + constraint_labels=[ + "task_success_implement", + ], + block=True, + ): + if searched_node.label == "task_success_implement": + target_knowledge = self.knowledgebase.node_to_implementation_knowledge_dict[ + searched_node.id + ] + if ( + target_knowledge + not in queried_knowledge_v2.task_to_similar_task_successful_knowledge[ + target_task_information + ] + ): + queried_knowledge_v2.task_to_similar_task_successful_knowledge[ + target_task_information + ].append(target_knowledge) + + # finally add embedding related knowledge + knowledge_base_success_task_list = list(self.knowledgebase.success_task_to_knowledge_dict) + + similarity = calculate_embedding_distance_between_str_list( + [target_task_information], + knowledge_base_success_task_list, + )[0] + similar_indexes = sorted( + range(len(similarity)), + key=lambda i: similarity[i], + reverse=True, + ) + embedding_similar_successful_knowledge = [ + self.knowledgebase.success_task_to_knowledge_dict[knowledge_base_success_task_list[index]] + for index in similar_indexes + ] + for knowledge in embedding_similar_successful_knowledge: + if ( + knowledge + not in queried_knowledge_v2.task_to_similar_task_successful_knowledge[target_task_information] + ): + queried_knowledge_v2.task_to_similar_task_successful_knowledge[target_task_information].append( + knowledge + ) + + if knowledge_sampler > 0: + queried_knowledge_v2.task_to_similar_task_successful_knowledge[target_task_information] = [ + knowledge + for knowledge in queried_knowledge_v2.task_to_similar_task_successful_knowledge[ + target_task_information + ] + if random.uniform(0, 1) <= knowledge_sampler + ] + + # Make sure no less than half of the knowledge are from GT + queried_knowledge_list = queried_knowledge_v2.task_to_similar_task_successful_knowledge[ + target_task_information + ] + queried_from_gt_knowledge_list = [ + knowledge + for knowledge in queried_knowledge_list + if knowledge.feedback is not None and knowledge.feedback.final_decision_based_on_gt == True + ] + queried_without_gt_knowledge_list = [ + knowledge + for knowledge in queried_knowledge_list + if knowledge.feedback is not None and knowledge.feedback.final_decision_based_on_gt == False + ] + queried_from_gt_knowledge_count = max( + min((v2_query_component_limit // 2 + 1), len(queried_from_gt_knowledge_list)), + v2_query_component_limit - len(queried_without_gt_knowledge_list), + ) + queried_knowledge_v2.task_to_similar_task_successful_knowledge[target_task_information] = ( + queried_from_gt_knowledge_list[:queried_from_gt_knowledge_count] + + queried_without_gt_knowledge_list[: v2_query_component_limit - queried_from_gt_knowledge_count] + ) + + return queried_knowledge_v2 + + def error_query( + self, + evo: EvolvableSubjects, + queried_knowledge_v2: CoSTEERQueriedKnowledgeV2, + v2_query_error_limit: int = 5, + knowledge_sampler: float = 1.0, + ) -> CoSTEERQueriedKnowledge | None: + for task_index, target_task in enumerate(evo.sub_tasks): + target_task_information = target_task.get_task_information() + queried_knowledge_v2.task_to_similar_error_successful_knowledge[target_task_information] = [] + if ( + target_task_information in self.knowledgebase.success_task_to_knowledge_dict + or target_task_information in queried_knowledge_v2.failed_task_info_set + ): + queried_knowledge_v2.task_to_similar_error_successful_knowledge[target_task_information] = [] + else: + queried_knowledge_v2.task_to_similar_error_successful_knowledge[target_task_information] = [] + if ( + target_task_information in self.knowledgebase.working_trace_error_analysis + and len(self.knowledgebase.working_trace_error_analysis[target_task_information]) > 0 + and len(queried_knowledge_v2.task_to_former_failed_traces[target_task_information]) > 0 + ): + queried_last_trace = queried_knowledge_v2.task_to_former_failed_traces[target_task_information][0][ + -1 + ] + target_index = self.knowledgebase.working_trace_knowledge[target_task_information].index( + queried_last_trace, + ) + last_knowledge_error_analysis_result = self.knowledgebase.working_trace_error_analysis[ + target_task_information + ][target_index] + else: + last_knowledge_error_analysis_result = [] + + error_nodes = [] + for error_node in last_knowledge_error_analysis_result: + if not isinstance(error_node, UndirectedNode): + error_node = self.knowledgebase.graph_get_node_by_content(content=error_node) + if error_node is None: + continue + error_nodes.append(error_node) + + if len(error_nodes) > 1: + task_trace_node_list = self.knowledgebase.graph_query_by_intersection( + error_nodes, + constraint_labels=["task_trace"], + output_intersection_origin=True, + ) + single_error_constraint = (v2_query_error_limit // len(error_nodes)) + 1 + else: + task_trace_node_list = [] + single_error_constraint = v2_query_error_limit + for error_node in error_nodes: + # Reverse iterate, a trade-off with intersection search + count = 0 + for task_trace_node in self.knowledgebase.graph_query_by_node( + node=error_node, + step=1, + constraint_labels=["task_trace"], + block=True, + )[::-1]: + if task_trace_node not in task_trace_node_list: + task_trace_node_list.append([[error_node], task_trace_node]) + count += 1 + if count >= single_error_constraint: + break + + # for error_node in last_knowledge_error_analysis_result: + # if not isinstance(error_node, UndirectedNode): + # error_node = self.knowledgebase.graph_get_node_by_content(content=error_node) + # if error_node is None: + # continue + # for searched_node in self.knowledgebase.graph_query_by_node( + # node=error_node, + # step=1, + # constraint_labels=["task_trace"], + # block=True, + # ): + # if searched_node not in [node[0] for node in task_trace_node_list]: + # task_trace_node_list.append((searched_node, error_node.content)) + + same_error_success_knowledge_pair_list = [] + same_error_success_node_set = set() + for error_node_list, trace_node in task_trace_node_list: + for searched_trace_success_node in self.knowledgebase.graph_query_by_node( + node=trace_node, + step=50, + constraint_labels=[ + "task_trace", + "task_success_implement", + "task_description", + ], + block=True, + ): + if ( + searched_trace_success_node not in same_error_success_node_set + and searched_trace_success_node.label == "task_success_implement" + ): + same_error_success_node_set.add(searched_trace_success_node) + + trace_knowledge = self.knowledgebase.node_to_implementation_knowledge_dict[trace_node.id] + success_knowledge = self.knowledgebase.node_to_implementation_knowledge_dict[ + searched_trace_success_node.id + ] + error_content = "" + for index, error_node in enumerate(error_node_list): + error_content += f"{index+1}. {error_node.content}; " + same_error_success_knowledge_pair_list.append( + ( + error_content, + (trace_knowledge, success_knowledge), + ), + ) + + if knowledge_sampler > 0: + same_error_success_knowledge_pair_list = [ + knowledge + for knowledge in same_error_success_knowledge_pair_list + if random.uniform(0, 1) <= knowledge_sampler + ] + + same_error_success_knowledge_pair_list = same_error_success_knowledge_pair_list[:v2_query_error_limit] + queried_knowledge_v2.task_to_similar_error_successful_knowledge[target_task_information] = ( + same_error_success_knowledge_pair_list + ) + + return queried_knowledge_v2 + + +class CoSTEERKnowledgeBaseV2(EvolvingKnowledgeBase): + def __init__(self, init_component_list=None, path: str | Path = None) -> None: + """ + Load knowledge, offer brief information of knowledge and common handle interfaces + """ + self.graph: UndirectedGraph = UndirectedGraph(Path.cwd() / "graph.pkl") + logger.info(f"Knowledge Graph loaded, size={self.graph.size()}") + + if init_component_list: + for component in init_component_list: + exist_node = self.graph.get_node_by_content(content=component) + node = exist_node if exist_node else UndirectedNode(content=component, label="component") + self.graph.add_nodes(node=node, neighbors=[]) + + # A dict containing all working trace until they fail or succeed + self.working_trace_knowledge = {} + + # A dict containing error analysis each step aligned with working trace + self.working_trace_error_analysis = {} + + # Add already success task + self.success_task_to_knowledge_dict = {} + + # key:node_id(for task trace and success implement), value:knowledge instance(aka 'CoSTEERKnowledge') + self.node_to_implementation_knowledge_dict = {} + + # store the task description to component nodes + self.task_to_component_nodes = {} + + def get_all_nodes_by_label(self, label: str) -> list[UndirectedNode]: + return self.graph.get_all_nodes_by_label(label) + + def update_success_task( + self, + success_task_info: str, + ): # Transfer the success tasks' working trace to knowledge storage & graph + success_task_trace = self.working_trace_knowledge[success_task_info] + success_task_error_analysis_record = ( + self.working_trace_error_analysis[success_task_info] + if success_task_info in self.working_trace_error_analysis + else [] + ) + task_des_node = UndirectedNode(content=success_task_info, label="task_description") + self.graph.add_nodes( + node=task_des_node, + neighbors=self.task_to_component_nodes[success_task_info], + ) # 1st version, we assume that all component nodes are given + for index, trace_unit in enumerate(success_task_trace): # every unit: single_knowledge + neighbor_nodes = [task_des_node] + if index != len(success_task_trace) - 1: + trace_node = UndirectedNode( + content=trace_unit.get_implementation_and_feedback_str(), + label="task_trace", + ) + self.node_to_implementation_knowledge_dict[trace_node.id] = trace_unit + for node_index, error_node in enumerate(success_task_error_analysis_record[index]): + if type(error_node).__name__ == "str": + queried_node = self.graph.get_node_by_content(content=error_node) + if queried_node is None: + new_error_node = UndirectedNode(content=error_node, label="error") + self.graph.add_node(node=new_error_node) + success_task_error_analysis_record[index][node_index] = new_error_node + else: + success_task_error_analysis_record[index][node_index] = queried_node + neighbor_nodes.extend(success_task_error_analysis_record[index]) + self.graph.add_nodes(node=trace_node, neighbors=neighbor_nodes) + else: + success_node = UndirectedNode( + content=trace_unit.get_implementation_and_feedback_str(), + label="task_success_implement", + ) + self.graph.add_nodes(node=success_node, neighbors=neighbor_nodes) + self.node_to_implementation_knowledge_dict[success_node.id] = trace_unit + + def query(self): + pass + + def graph_get_node_by_content(self, content: str) -> UndirectedNode: + return self.graph.get_node_by_content(content=content) + + def graph_query_by_content( + self, + content: Union[str, list[str]], + topk_k: int = 5, + step: int = 1, + constraint_labels: list[str] = None, + constraint_node: UndirectedNode = None, + similarity_threshold: float = 0.0, + constraint_distance: float = 0, + block: bool = False, + ) -> list[UndirectedNode]: + """ + search graph by content similarity and connection relationship, return empty list if nodes' chain without node + near to constraint_node + + Parameters + ---------- + constraint_distance + content + topk_k: the upper number of output for each query, if the number of fit nodes is less than topk_k, return all fit nodes's content + step + constraint_labels + constraint_node + similarity_threshold + block: despite the start node, the search can only flow through the constraint_label type nodes + + Returns + ------- + + """ + + return self.graph.query_by_content( + content=content, + topk_k=topk_k, + step=step, + constraint_labels=constraint_labels, + constraint_node=constraint_node, + similarity_threshold=similarity_threshold, + constraint_distance=constraint_distance, + block=block, + ) + + def graph_query_by_node( + self, + node: UndirectedNode, + step: int = 1, + constraint_labels: list[str] = None, + constraint_node: UndirectedNode = None, + constraint_distance: float = 0, + block: bool = False, + ) -> list[UndirectedNode]: + """ + search graph by connection, return empty list if nodes' chain without node near to constraint_node + Parameters + ---------- + node : start node + step : the max steps will be searched + constraint_labels : the labels of output nodes + constraint_node : the node that the output nodes must connect to + constraint_distance : the max distance between output nodes and constraint_node + block: despite the start node, the search can only flow through the constraint_label type nodes + + Returns + ------- + A list of nodes + + """ + nodes = self.graph.query_by_node( + node=node, + step=step, + constraint_labels=constraint_labels, + constraint_node=constraint_node, + constraint_distance=constraint_distance, + block=block, + ) + return nodes + + def graph_query_by_intersection( + self, + nodes: list[UndirectedNode], + steps: int = 1, + constraint_labels: list[str] = None, + output_intersection_origin: bool = False, + ) -> list[UndirectedNode] | list[list[list[UndirectedNode], UndirectedNode]]: + """ + search graph by node intersection, node intersected by a higher frequency has a prior order in the list + Parameters + ---------- + nodes : node list + step : the max steps will be searched + constraint_labels : the labels of output nodes + output_intersection_origin: output the list that contains the node which form this intersection node + + Returns + ------- + A list of nodes + + """ + node_count = len(nodes) + assert node_count >= 2, "nodes length must >=2" + intersection_node_list = [] + if output_intersection_origin: + origin_list = [] + for k in range(node_count, 1, -1): + possible_combinations = combinations(nodes, k) + for possible_combination in possible_combinations: + node_list = list(possible_combination) + intersection_node_list.extend( + self.graph.get_nodes_intersection(node_list, steps=steps, constraint_labels=constraint_labels), + ) + if output_intersection_origin: + for _ in range(len(intersection_node_list)): + origin_list.append(node_list) + intersection_node_list_sort_by_freq = [] + for index, node in enumerate(intersection_node_list): + if node not in intersection_node_list_sort_by_freq: + if output_intersection_origin: + intersection_node_list_sort_by_freq.append([origin_list[index], node]) + else: + intersection_node_list_sort_by_freq.append(node) + + return intersection_node_list_sort_by_freq diff --git a/alphaagent/components/coder/CoSTEER/prompts.yaml b/alphaagent/components/coder/CoSTEER/prompts.yaml new file mode 100755 index 00000000..78f4b627 --- /dev/null +++ b/alphaagent/components/coder/CoSTEER/prompts.yaml @@ -0,0 +1,10 @@ + +analyze_component_prompt_v1_system: |- + User is getting a new task that might consist of the components below (given in component_index: component_description): + {{all_component_content}} + + You should find out what components does the new task have, and put their indices in a list. + Please response the critic in the json format. Here is an example structure for the JSON output, please strictly follow the format: + { + "component_no_list": the list containing indices of components. + } \ No newline at end of file diff --git a/alphaagent/components/coder/CoSTEER/scheduler.py b/alphaagent/components/coder/CoSTEER/scheduler.py new file mode 100755 index 00000000..ee3c63c9 --- /dev/null +++ b/alphaagent/components/coder/CoSTEER/scheduler.py @@ -0,0 +1,25 @@ +import random + +from alphaagent.components.coder.CoSTEER.evolvable_subjects import EvolvingItem +from alphaagent.components.coder.CoSTEER.knowledge_management import ( + CoSTEERQueriedKnowledge, +) +from alphaagent.core.evaluation import Scenario +from alphaagent.log import logger + + +def random_select( + to_be_finished_task_index: list, + evo: EvolvingItem, + selected_num: int, + queried_knowledge: CoSTEERQueriedKnowledge, + scen: Scenario, +): + + to_be_finished_task_index = random.sample( + to_be_finished_task_index, + selected_num, + ) + + logger.info(f"The random selection is: {to_be_finished_task_index}") + return to_be_finished_task_index diff --git a/alphaagent/components/coder/CoSTEER/task.py b/alphaagent/components/coder/CoSTEER/task.py new file mode 100755 index 00000000..3a85c435 --- /dev/null +++ b/alphaagent/components/coder/CoSTEER/task.py @@ -0,0 +1,7 @@ +from alphaagent.core.experiment import Task + + +class CoSTEERTask(Task): + def __init__(self, base_code: str = None, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + self.base_code = base_code diff --git a/alphaagent/components/coder/data_science/ensemble/__init__.py b/alphaagent/components/coder/data_science/ensemble/__init__.py new file mode 100755 index 00000000..2e5b8b4d --- /dev/null +++ b/alphaagent/components/coder/data_science/ensemble/__init__.py @@ -0,0 +1,19 @@ +# from alphaagent.components.coder.CoSTEER import CoSTEER +# from alphaagent.components.coder.CoSTEER.config import CoSTEER_SETTINGS +# from alphaagent.components.coder.CoSTEER.evaluators import CoSTEERMultiEvaluator +# from alphaagent.core.scenario import Scenario + + +# class ModelEnsembleCoSTEER(CoSTEER): +# def __init__( +# self, +# scen: Scenario, +# *args, +# **kwargs, +# ) -> None: +# eva = CoSTEERMultiEvaluator( +# ModelEnsembleCoSTEEREvaluator(scen=scen), scen=scen +# ) # Please specify whether you agree running your eva in parallel or not +# es = ModelEnsembleMultiProcessEvolvingStrategy(scen=scen, settings=CoSTEER_SETTINGS) + +# super().__init__(*args, settings=CoSTEER_SETTINGS, eva=eva, es=es, evolving_version=1, scen=scen, **kwargs) diff --git a/alphaagent/components/coder/data_science/feature_process/__init__.py b/alphaagent/components/coder/data_science/feature_process/__init__.py new file mode 100755 index 00000000..9b0f25c1 --- /dev/null +++ b/alphaagent/components/coder/data_science/feature_process/__init__.py @@ -0,0 +1,19 @@ +# from alphaagent.components.coder.CoSTEER import CoSTEER +# from alphaagent.components.coder.CoSTEER.config import CoSTEER_SETTINGS +# from alphaagent.components.coder.CoSTEER.evaluators import CoSTEERMultiEvaluator +# from alphaagent.core.scenario import Scenario + + +# class FeatureCoSTEER(CoSTEER): +# def __init__( +# self, +# scen: Scenario, +# *args, +# **kwargs, +# ) -> None: +# eva = CoSTEERMultiEvaluator( +# FeatureCoSTEEREvaluator(scen=scen), scen=scen +# ) # Please specify whether you agree running your eva in parallel or not +# es = FeatureMultiProcessEvolvingStrategy(scen=scen, settings=CoSTEER_SETTINGS) + +# super().__init__(*args, settings=CoSTEER_SETTINGS, eva=eva, es=es, evolving_version=1, scen=scen, **kwargs) diff --git a/alphaagent/components/coder/data_science/model/__init__.py b/alphaagent/components/coder/data_science/model/__init__.py new file mode 100755 index 00000000..fb0455c0 --- /dev/null +++ b/alphaagent/components/coder/data_science/model/__init__.py @@ -0,0 +1,19 @@ +# from alphaagent.components.coder.CoSTEER import CoSTEER +# from alphaagent.components.coder.CoSTEER.config import CoSTEER_SETTINGS +# from alphaagent.components.coder.CoSTEER.evaluators import CoSTEERMultiEvaluator +# from alphaagent.core.scenario import Scenario + + +# class ModelCoSTEER(CoSTEER): +# def __init__( +# self, +# scen: Scenario, +# *args, +# **kwargs, +# ) -> None: +# eva = CoSTEERMultiEvaluator( +# ModelCoSTEEREvaluator(scen=scen), scen=scen +# ) # Please specify whether you agree running your eva in parallel or not +# es = ModelMultiProcessEvolvingStrategy(scen=scen, settings=CoSTEER_SETTINGS) + +# super().__init__(*args, settings=CoSTEER_SETTINGS, eva=eva, es=es, evolving_version=1, scen=scen, **kwargs) diff --git a/alphaagent/components/coder/data_science/raw_data_loader/__init__.py b/alphaagent/components/coder/data_science/raw_data_loader/__init__.py new file mode 100755 index 00000000..b64cdf71 --- /dev/null +++ b/alphaagent/components/coder/data_science/raw_data_loader/__init__.py @@ -0,0 +1,19 @@ +# from alphaagent.components.coder.CoSTEER import CoSTEER +# from alphaagent.components.coder.CoSTEER.config import CoSTEER_SETTINGS +# from alphaagent.components.coder.CoSTEER.evaluators import CoSTEERMultiEvaluator +# from alphaagent.core.scenario import Scenario + + +# class DataLoaderCoSTEER(CoSTEER): +# def __init__( +# self, +# scen: Scenario, +# *args, +# **kwargs, +# ) -> None: +# eva = CoSTEERMultiEvaluator( +# DataLoaderCoSTEEREvaluator(scen=scen), scen=scen +# ) # Please specify whether you agree running your eva in parallel or not +# es = DataLoaderMultiProcessEvolvingStrategy(scen=scen, settings=CoSTEER_SETTINGS) + +# super().__init__(*args, settings=CoSTEER_SETTINGS, eva=eva, es=es, evolving_version=1, scen=scen, **kwargs) diff --git a/alphaagent/components/coder/data_science/workflow/__init__.py b/alphaagent/components/coder/data_science/workflow/__init__.py new file mode 100755 index 00000000..2cc2478c --- /dev/null +++ b/alphaagent/components/coder/data_science/workflow/__init__.py @@ -0,0 +1,19 @@ +# from alphaagent.components.coder.CoSTEER import CoSTEER +# from alphaagent.components.coder.CoSTEER.config import CoSTEER_SETTINGS +# from alphaagent.components.coder.CoSTEER.evaluators import CoSTEERMultiEvaluator +# from alphaagent.core.scenario import Scenario + + +# class WorkflowCoSTEER(CoSTEER): +# def __init__( +# self, +# scen: Scenario, +# *args, +# **kwargs, +# ) -> None: +# eva = CoSTEERMultiEvaluator( +# WorkflowCoSTEEREvaluator(scen=scen), scen=scen +# ) # Please specify whether you agree running your eva in parallel or not +# es = WorkflowMultiProcessEvolvingStrategy(scen=scen, settings=CoSTEER_SETTINGS) + +# super().__init__(*args, settings=CoSTEER_SETTINGS, eva=eva, es=es, evolving_version=1, scen=scen, **kwargs) diff --git a/alphaagent/components/coder/factor_coder/__init__.py b/alphaagent/components/coder/factor_coder/__init__.py new file mode 100755 index 00000000..d26d39ee --- /dev/null +++ b/alphaagent/components/coder/factor_coder/__init__.py @@ -0,0 +1,51 @@ +from alphaagent.components.coder.CoSTEER import CoSTEER +from alphaagent.components.coder.CoSTEER.evaluators import CoSTEERMultiEvaluator +from alphaagent.components.coder.factor_coder.config import FACTOR_COSTEER_SETTINGS +from alphaagent.components.coder.factor_coder.evaluators import FactorEvaluatorForCoder +from alphaagent.components.coder.factor_coder.evolving_strategy import ( + FactorMultiProcessEvolvingStrategy, FactorParsingStrategy, FactorRunningStrategy +) +from alphaagent.core.scenario import Scenario + + +class FactorCoSTEER(CoSTEER): + def __init__( + self, + scen: Scenario, + *args, + **kwargs, + ) -> None: + setting = FACTOR_COSTEER_SETTINGS + eva = CoSTEERMultiEvaluator(FactorEvaluatorForCoder(scen=scen), scen=scen) + es = FactorMultiProcessEvolvingStrategy(scen=scen, settings=FACTOR_COSTEER_SETTINGS) + + super().__init__(*args, settings=setting, eva=eva, es=es, evolving_version=2, scen=scen, **kwargs) + + + +class FactorParser(CoSTEER): + def __init__( + self, + scen: Scenario, + *args, + **kwargs, + ) -> None: + setting = FACTOR_COSTEER_SETTINGS + eva = CoSTEERMultiEvaluator(FactorEvaluatorForCoder(scen=scen), scen=scen) + es = FactorParsingStrategy(scen=scen, settings=FACTOR_COSTEER_SETTINGS) + + super().__init__(*args, settings=setting, eva=eva, es=es, evolving_version=2, scen=scen, **kwargs) + + +class FactorCoder(CoSTEER): + def __init__( + self, + scen: Scenario, + *args, + **kwargs, + ) -> None: + setting = FACTOR_COSTEER_SETTINGS + eva = CoSTEERMultiEvaluator(FactorEvaluatorForCoder(scen=scen), scen=scen) + es = FactorRunningStrategy(scen=scen, settings=FACTOR_COSTEER_SETTINGS) + + super().__init__(*args, settings=setting, eva=eva, es=es, evolving_version=2, scen=scen, **kwargs) diff --git a/alphaagent/components/coder/factor_coder/config.py b/alphaagent/components/coder/factor_coder/config.py new file mode 100755 index 00000000..7de29243 --- /dev/null +++ b/alphaagent/components/coder/factor_coder/config.py @@ -0,0 +1,27 @@ +from alphaagent.components.coder.CoSTEER.config import CoSTEERSettings +from alphaagent.core.conf import ExtendedSettingsConfigDict + + +class FactorCoSTEERSettings(CoSTEERSettings): + model_config = ExtendedSettingsConfigDict(env_prefix="FACTOR_CoSTEER_") + + data_folder: str = "git_ignore_folder/factor_implementation_source_data" + """Path to the folder containing financial data (default is fundamental data in Qlib)""" + + data_folder_debug: str = "git_ignore_folder/factor_implementation_source_data_debug" + """Path to the folder containing partial financial data (for debugging)""" + + simple_background: bool = True + """Whether to use simple background information for code feedback""" + + file_based_execution_timeout: int = 1200 + """Timeout in seconds for each factor implementation execution""" + + select_method: str = "random" + """Method for the selection of factors implementation""" + + python_bin: str = "python" + """Path to the Python binary""" + + +FACTOR_COSTEER_SETTINGS = FactorCoSTEERSettings() diff --git a/alphaagent/components/coder/factor_coder/eva_utils.py b/alphaagent/components/coder/factor_coder/eva_utils.py new file mode 100755 index 00000000..90ac072e --- /dev/null +++ b/alphaagent/components/coder/factor_coder/eva_utils.py @@ -0,0 +1,585 @@ +import io +import json +from abc import abstractmethod +from pathlib import Path +from typing import Tuple + +import pandas as pd +from jinja2 import Environment, StrictUndefined + +from alphaagent.components.coder.factor_coder.config import FACTOR_COSTEER_SETTINGS +from alphaagent.components.coder.factor_coder.factor import FactorTask +from alphaagent.core.experiment import Task, Workspace +from alphaagent.core.prompts import Prompts +from alphaagent.oai.llm_conf import LLM_SETTINGS +from alphaagent.oai.llm_utils import APIBackend + +evaluate_prompts = Prompts(file_path=Path(__file__).parent / "prompts.yaml") +alphaagent_evaluate_prompts = Prompts(file_path=Path(__file__).parent / "prompts_alphaagent.yaml") + + +class FactorEvaluator: + """Although the init method is same to Evaluator, but we want to emphasize they are different""" + + def __init__(self, scen=None) -> None: + self.scen = scen + + @abstractmethod + def evaluate( + self, + target_task: Task, + implementation: Workspace, + gt_implementation: Workspace, + **kwargs, + ) -> Tuple[str, object]: + """You can get the dataframe by + + .. code-block:: python + + _, gen_df = implementation.execute() + _, gt_df = gt_implementation.execute() + + Returns + ------- + Tuple[str, object] + - str: the text-based description of the evaluation result + - object: a comparable metric (bool, integer, float ...) None for evaluator with only text-based result + + """ + raise NotImplementedError("Please implement the `evaluator` method") + + def _get_df(self, gt_implementation: Workspace, implementation: Workspace): + if gt_implementation is not None: + _, gt_df = gt_implementation.execute() + if isinstance(gt_df, pd.Series): + gt_df = gt_df.to_frame("gt_factor") + if isinstance(gt_df, pd.DataFrame): + gt_df = gt_df.sort_index() + else: + gt_df = None + + _, gen_df = implementation.execute() + if isinstance(gen_df, pd.Series): + gen_df = gen_df.to_frame("source_factor") + if isinstance(gen_df, pd.DataFrame): + gen_df = gen_df.sort_index() + return gt_df, gen_df + + def __str__(self) -> str: + return self.__class__.__name__ + + +class FactorCodeEvaluator(FactorEvaluator): + def evaluate( + self, + target_task: FactorTask, + implementation: Workspace, + execution_feedback: str, + value_feedback: str = "", + gt_implementation: Workspace = None, + **kwargs, + ): + factor_information = target_task.get_task_information() + code = implementation.code + + system_prompt = ( + Environment(undefined=StrictUndefined) + .from_string(alphaagent_evaluate_prompts["evaluator_code_feedback_v1_system"]) + .render( + scenario=( + self.scen.get_scenario_all_desc( + target_task, + filtered_tag="feature", + simple_background=FACTOR_COSTEER_SETTINGS.simple_background, + ) + if self.scen is not None + else "No scenario description." + ) + ) + ) + + execution_feedback_to_render = execution_feedback + # import pdb; pdb.set_trace() + for _ in range(10): # 10 times to split the content is enough + user_prompt = ( + Environment(undefined=StrictUndefined) + .from_string( + alphaagent_evaluate_prompts["evaluator_code_feedback_v1_user"], + ) + .render( + factor_information=factor_information, + code=code, + execution_feedback=execution_feedback_to_render, + value_feedback=value_feedback, + gt_code=gt_implementation.code if gt_implementation else None, + ) + ) + if ( + APIBackend().build_messages_and_calculate_token( + user_prompt=user_prompt, + system_prompt=system_prompt, + ) + > LLM_SETTINGS.chat_token_limit + ): + execution_feedback_to_render = execution_feedback_to_render[len(execution_feedback_to_render) // 2 :] + else: + break + critic_response = APIBackend().build_messages_and_create_chat_completion( + user_prompt=user_prompt, + system_prompt=system_prompt, + json_mode=False, + reasoning_flag=False, + ) + + return critic_response, None + + +class FactorInfEvaluator(FactorEvaluator): + def evaluate( + self, + implementation: Workspace, + gt_implementation: Workspace, + ) -> Tuple[str, object]: + _, gen_df = self._get_df(gt_implementation, implementation) + if gen_df is None: + return ( + "The source dataframe is None. Please check the implementation.", + False, + ) + INF_count = gen_df.isin([float("inf"), -float("inf")]).sum().sum() + if INF_count == 0: + return "The source dataframe does not have any infinite values.", True + else: + return ( + f"The source dataframe has {INF_count} infinite values. Please check the implementation.", + False, + ) + + +class FactorSingleColumnEvaluator(FactorEvaluator): + def evaluate( + self, + implementation: Workspace, + gt_implementation: Workspace, + ) -> Tuple[str, object]: + _, gen_df = self._get_df(gt_implementation, implementation) + if gen_df is None: + return ( + "The source dataframe is None. Please check the implementation.", + False, + ) + if len(gen_df.columns) == 1: + return "The source dataframe has only one column which is correct.", True + else: + return ( + "The source dataframe has more than one column. Please check the implementation. We only evaluate the first column.", + False, + ) + + +class FactorOutputFormatEvaluator(FactorEvaluator): + def evaluate( + self, + implementation: Workspace, + gt_implementation: Workspace, + ) -> Tuple[str, object]: + gt_df, gen_df = self._get_df(gt_implementation, implementation) + if gen_df is None: + return ( + "The source dataframe is None. Skip the evaluation of the output format.", + False, + ) + buffer = io.StringIO() + gen_df.info(buf=buffer) + gen_df_info_str = f"The user is currently working on a feature related task.\nThe output dataframe info is:\n{buffer.getvalue()}" + system_prompt = ( + Environment(undefined=StrictUndefined) + .from_string( + evaluate_prompts["evaluator_output_format_system"], + ) + .render( + scenario=( + self.scen.get_scenario_all_desc(implementation.target_task, filtered_tag="feature") + if self.scen is not None + else "No scenario description." + ) + ) + ) + + # TODO: with retry_context(retry_n=3, except_list=[KeyError]): + max_attempts = 3 + attempts = 0 + final_evaluation_dict = None + + while attempts < max_attempts: + try: + api = APIBackend() if attempts == 0 else APIBackend(use_chat_cache=False) + resp = api.build_messages_and_create_chat_completion( + user_prompt=gen_df_info_str, + system_prompt=system_prompt, + json_mode=True, + reasoning_flag=False, + ) + resp_dict = json.loads(resp) + resp_dict["output_format_decision"] = str(resp_dict["output_format_decision"]).lower() in ["true", "1"] + + return ( + str(resp_dict["output_format_feedback"]), + resp_dict["output_format_decision"], + ) + except (KeyError, json.JSONDecodeError) as e: + attempts += 1 + if attempts >= max_attempts: + raise KeyError( + "Wrong JSON Response or missing 'output_format_decision' or 'output_format_feedback' key after multiple attempts." + ) from e + + return "Failed to evaluate output format after multiple attempts.", False + + +class FactorDatetimeDailyEvaluator(FactorEvaluator): + def evaluate( + self, + implementation: Workspace, + gt_implementation: Workspace, + ) -> Tuple[str | object]: + _, gen_df = self._get_df(gt_implementation, implementation) + if gen_df is None: + return "The source dataframe is None. Skip the evaluation of the datetime format.", False + + if "datetime" not in gen_df.index.names: + return "The source dataframe does not have a datetime index. Please check the implementation.", False + + try: + pd.to_datetime(gen_df.index.get_level_values("datetime")) + except Exception: + return ( + f"The source dataframe has a datetime index but it is not in the correct format (maybe a regular string or other objects). Please check the implementation.\n The head of the output dataframe is: \n{gen_df.head()}", + False, + ) + + time_diff = pd.to_datetime(gen_df.index.get_level_values("datetime")).to_series().diff().dropna().unique() + if pd.Timedelta(minutes=1) in time_diff: + return ( + "The generated dataframe is not daily. The implementation is definitely wrong. Please check the implementation.", + False, + ) + return "The generated dataframe is daily.", True + + +class FactorRowCountEvaluator(FactorEvaluator): + def evaluate( + self, + implementation: Workspace, + gt_implementation: Workspace, + ) -> Tuple[str, object]: + gt_df, gen_df = self._get_df(gt_implementation, implementation) + if gen_df is None: + return ( + "The source dataframe is None. Please check the implementation.", + False, + ) + ratio = min(len(gen_df), len(gt_df)) / max(len(gen_df), len(gt_df)) + return ( + ( + f"The ratio of rows count in the source dataframe to the ground truth dataframe is {ratio:.2f}. " + + "Please verify the implementation. " + if ratio <= 0.99 + else "" + ), + ratio, + ) + + +class FactorIndexEvaluator(FactorEvaluator): + def evaluate( + self, + implementation: Workspace, + gt_implementation: Workspace, + ) -> Tuple[str, object]: + gt_df, gen_df = self._get_df(gt_implementation, implementation) + if gen_df is None: + return ( + "The source dataframe is None. Please check the implementation.", + False, + ) + gen_index_set, gt_index_set = set(gen_df.index), set(gt_df.index) + similarity = len(gen_index_set.intersection(gt_index_set)) / len(gen_index_set.union(gt_index_set)) + return ( + ( + f"The source dataframe and the ground truth dataframe have different index with a similarity of {similarity:.2%}. The similarity is calculated by the number of shared indices divided by the union indices. " + + "Please check the implementation." + if similarity <= 0.99 + else "" + ), + similarity, + ) + + +class FactorMissingValuesEvaluator(FactorEvaluator): + def evaluate( + self, + implementation: Workspace, + gt_implementation: Workspace, + ) -> Tuple[str, object]: + gt_df, gen_df = self._get_df(gt_implementation, implementation) + if gen_df is None: + return ( + "The source dataframe is None. Please check the implementation.", + False, + ) + if gen_df.isna().sum().sum() == gt_df.isna().sum().sum(): + return "Both dataframes have the same missing values.", True + else: + return ( + f"The dataframes do not have the same missing values. The source dataframe has {gen_df.isna().sum().sum()} missing values, while the ground truth dataframe has {gt_df.isna().sum().sum()} missing values. Please check the implementation.", + False, + ) + + +class FactorEqualValueRatioEvaluator(FactorEvaluator): + def evaluate( + self, + implementation: Workspace, + gt_implementation: Workspace, + ) -> Tuple[str, object]: + gt_df, gen_df = self._get_df(gt_implementation, implementation) + if gen_df is None: + return ( + "The source dataframe is None. Please check the implementation.", + -1, + ) + try: + close_values = gen_df.sub(gt_df).abs().lt(1e-6) + result_int = close_values.astype(int) + pos_num = result_int.sum().sum() + acc_rate = pos_num / close_values.size + except: + close_values = gen_df + if close_values.all().iloc[0]: + return ( + "All values in the dataframes are equal within the tolerance of 1e-6.", + acc_rate, + ) + else: + return ( + "Some values differ by more than the tolerance of 1e-6. Check for rounding errors or differences in the calculation methods.", + acc_rate, + ) + + +class FactorCorrelationEvaluator(FactorEvaluator): + def __init__(self, hard_check: bool, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + self.hard_check = hard_check + + def evaluate( + self, + implementation: Workspace, + gt_implementation: Workspace, + ) -> Tuple[str, object]: + gt_df, gen_df = self._get_df(gt_implementation, implementation) + if gen_df is None: + return ( + "The source dataframe is None. Please check the implementation.", + False, + ) + concat_df = pd.concat([gen_df, gt_df], axis=1) + concat_df.columns = ["source", "gt"] + ic = concat_df.groupby("datetime").apply(lambda df: df["source"].corr(df["gt"])).dropna().mean() + ric = ( + concat_df.groupby("datetime") + .apply(lambda df: df["source"].corr(df["gt"], method="spearman")) + .dropna() + .mean() + ) + + if self.hard_check: + if ic > 0.99 and ric > 0.99: + return ( + f"The dataframes are highly correlated. The ic is {ic:.6f} and the rankic is {ric:.6f}.", + True, + ) + else: + return ( + f"The dataframes are not sufficiently high correlated. The ic is {ic:.6f} and the rankic is {ric:.6f}. Investigate the factors that might be causing the discrepancies and ensure that the logic of the factor calculation is consistent.", + False, + ) + else: + return f"The ic is ({ic:.6f}) and the rankic is ({ric:.6f}).", ic + + +class FactorValueEvaluator(FactorEvaluator): + def evaluate( + self, + implementation: Workspace, + gt_implementation: Workspace, + version: int = 1, # 1 for qlib factors and 2 for kaggle factors + **kwargs, + ) -> Tuple: + conclusions = [] + + # Initialize result variables + row_result = 0 + index_result = 0 + output_format_result = None + equal_value_ratio_result = 0 + high_correlation_result = False + row_result = None + + # import pdb; pdb.set_trace() + + # Check if both dataframe has only one columns Mute this since factor task might generate more than one columns now + if version == 1: + feedback_str, _ = FactorSingleColumnEvaluator(self.scen).evaluate(implementation, gt_implementation) + conclusions.append(feedback_str) + elif version == 2: + input_shape = self.scen.input_shape + _, gen_df = self._get_df(gt_implementation, implementation) + if gen_df.shape[-1] > input_shape[-1]: + conclusions.append( + "Output dataframe has more columns than input feature which is not acceptable in feature processing tasks. Please check the implementation to avoid generating too many columns. Consider this implementation as a failure." + ) + + feedback_str, inf_evaluate_res = FactorInfEvaluator(self.scen).evaluate(implementation, gt_implementation) + conclusions.append(feedback_str) + + # Check if the index of the dataframe is ("datetime", "instrument") + # feedback_str, _ = FactorOutputFormatEvaluator(self.scen).evaluate(implementation, gt_implementation) + # conclusions.append(feedback_str) + if version == 1: + feedback_str, daily_check_result = FactorDatetimeDailyEvaluator(self.scen).evaluate( + implementation, gt_implementation + ) + conclusions.append(feedback_str) + else: + daily_check_result = None + + # Check dataframe format + if gt_implementation is not None: + feedback_str, row_result = FactorRowCountEvaluator(self.scen).evaluate(implementation, gt_implementation) + conclusions.append(feedback_str) + + feedback_str, index_result = FactorIndexEvaluator(self.scen).evaluate(implementation, gt_implementation) + conclusions.append(feedback_str) + + feedback_str, output_format_result = FactorMissingValuesEvaluator(self.scen).evaluate( + implementation, gt_implementation + ) + conclusions.append(feedback_str) + + feedback_str, equal_value_ratio_result = FactorEqualValueRatioEvaluator(self.scen).evaluate( + implementation, gt_implementation + ) + conclusions.append(feedback_str) + + if index_result > 0.99: + feedback_str, high_correlation_result = FactorCorrelationEvaluator( + hard_check=True, scen=self.scen + ).evaluate(implementation, gt_implementation) + else: + high_correlation_result = False + feedback_str = "The source dataframe and the ground truth dataframe have different index. Give up comparing the values and correlation because it's useless" + conclusions.append(feedback_str) + + # Combine all conclusions into a single string + conclusion_str = "\n".join(conclusions) + + if gt_implementation is not None and (equal_value_ratio_result > 0.99) or high_correlation_result: + decision_from_value_check = True + elif ( + row_result is not None + and row_result <= 0.99 + or output_format_result is False + or daily_check_result is False + or inf_evaluate_res is False + ): + decision_from_value_check = False + else: + decision_from_value_check = None + return conclusion_str, decision_from_value_check + + +class FactorFinalDecisionEvaluator(FactorEvaluator): + def evaluate( + self, + target_task: FactorTask, + execution_feedback: str, + value_feedback: str, + code_feedback: str, + **kwargs, + ) -> Tuple: + system_prompt = ( + Environment(undefined=StrictUndefined) + .from_string(evaluate_prompts["evaluator_final_decision_v1_system"]) + .render( + scenario=( + self.scen.get_scenario_all_desc(target_task, filtered_tag="feature") + if self.scen is not None + else "No scenario description." + ) + ) + ) + execution_feedback_to_render = execution_feedback + + for _ in range(10): # 10 times to split the content is enough + user_prompt = ( + Environment(undefined=StrictUndefined) + .from_string( + evaluate_prompts["evaluator_final_decision_v1_user"], + ) + .render( + factor_information=target_task.get_task_information(), + execution_feedback=execution_feedback_to_render, + code_feedback=code_feedback, + value_feedback=( + value_feedback + if value_feedback is not None + else "No Ground Truth Value provided, so no evaluation on value is performed." + ), + ) + ) + if ( + APIBackend().build_messages_and_calculate_token( + user_prompt=user_prompt, + system_prompt=system_prompt, + ) + > LLM_SETTINGS.chat_token_limit + ): + execution_feedback_to_render = execution_feedback_to_render[len(execution_feedback_to_render) // 2 :] + else: + break + + # TODO: with retry_context(retry_n=3, except_list=[KeyError]): + final_evaluation_dict = None + attempts = 0 + max_attempts = 3 + + while attempts < max_attempts: + try: + api = APIBackend() if attempts == 0 else APIBackend(use_chat_cache=False) + final_evaluation_dict = json.loads( + api.build_messages_and_create_chat_completion( + user_prompt=user_prompt, + system_prompt=system_prompt, + reasoning_flag=False, + json_mode=True, + seed=attempts, # in case of useless retrying when cache enabled. + ), + ) + final_decision = final_evaluation_dict["final_decision"] + final_feedback = final_evaluation_dict["final_feedback"] + + final_decision = str(final_decision).lower() in ["true", "1"] + return final_decision, final_feedback + + except json.JSONDecodeError as e: + raise ValueError("Failed to decode JSON response from API.") from e + except KeyError as e: + attempts += 1 + if attempts >= max_attempts: + raise KeyError( + "Response from API is missing 'final_decision' or 'final_feedback' key after multiple attempts." + ) from e + + return None, None diff --git a/alphaagent/components/coder/factor_coder/evaluators.py b/alphaagent/components/coder/factor_coder/evaluators.py new file mode 100755 index 00000000..43ef2cb8 --- /dev/null +++ b/alphaagent/components/coder/factor_coder/evaluators.py @@ -0,0 +1,131 @@ +import re + +from alphaagent.components.coder.CoSTEER.evaluators import ( + CoSTEEREvaluator, + CoSTEERMultiFeedback, + CoSTEERSingleFeedback, +) +from alphaagent.components.coder.factor_coder.eva_utils import ( + FactorCodeEvaluator, + FactorFinalDecisionEvaluator, + FactorValueEvaluator, +) +from alphaagent.components.coder.factor_coder.factor import FactorTask +from alphaagent.core.evolving_framework import QueriedKnowledge +from alphaagent.core.experiment import Workspace + +FactorSingleFeedback = CoSTEERSingleFeedback +FactorMultiFeedback = CoSTEERMultiFeedback + + +class FactorEvaluatorForCoder(CoSTEEREvaluator): + """This class is the v1 version of evaluator for a single factor implementation. + It calls several evaluators in share modules to evaluate the factor implementation. + """ + + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + self.value_evaluator = FactorValueEvaluator(self.scen) + self.code_evaluator = FactorCodeEvaluator(self.scen) + self.final_decision_evaluator = FactorFinalDecisionEvaluator(self.scen) + + def evaluate( + self, + target_task: FactorTask, + implementation: Workspace, + gt_implementation: Workspace = None, + queried_knowledge: QueriedKnowledge = None, + **kwargs, + ) -> FactorSingleFeedback: + if implementation is None: + return None + + target_task_information = target_task.get_task_information() + if ( + queried_knowledge is not None + and target_task_information in queried_knowledge.success_task_to_knowledge_dict + ): + return queried_knowledge.success_task_to_knowledge_dict[target_task_information].feedback + elif queried_knowledge is not None and target_task_information in queried_knowledge.failed_task_info_set: + return FactorSingleFeedback( + execution_feedback="This task has failed too many times, skip implementation.", + value_generated_flag=False, + code_feedback="This task has failed too many times, skip code evaluation.", + value_feedback="This task has failed too many times, skip value evaluation.", + final_decision=False, + final_feedback="This task has failed too many times, skip final decision evaluation.", + final_decision_based_on_gt=False, + ) + else: + factor_feedback = FactorSingleFeedback() + + # 1. Get factor execution feedback to generated implementation and remove the long list of numbers in execution feedback + ( + execution_feedback, + gen_df, + ) = implementation.execute() + + execution_feedback = re.sub(r"(?<=\D)(,\s+-?\d+\.\d+){50,}(?=\D)", ", ", execution_feedback) + factor_feedback.execution_feedback = "\n".join( + [line for line in execution_feedback.split("\n") if "warning" not in line.lower()] + ) + + # 2. Get factor value feedback + if gen_df is None: + factor_feedback.value_feedback = "No factor value generated, skip value evaluation." + factor_feedback.value_generated_flag = False + decision_from_value_check = None + else: + factor_feedback.value_generated_flag = True + ( + factor_feedback.value_feedback, + decision_from_value_check, + ) = self.value_evaluator.evaluate( + implementation=implementation, gt_implementation=gt_implementation, version=target_task.version + ) + + factor_feedback.final_decision_based_on_gt = gt_implementation is not None + # import pdb; pdb.set_trace() + if decision_from_value_check is not None and decision_from_value_check is True: + # To avoid confusion, when same_value_or_high_correlation is True, we do not need code feedback + factor_feedback.code_feedback = "Final decision is True and there are no code critics." + factor_feedback.final_decision = decision_from_value_check + factor_feedback.final_feedback = "Value evaluation passed, skip final decision evaluation." + elif decision_from_value_check is not None and decision_from_value_check is False: + factor_feedback.code_feedback, _ = self.code_evaluator.evaluate( + target_task=target_task, + implementation=implementation, + execution_feedback=factor_feedback.execution_feedback, + value_feedback=factor_feedback.value_feedback, + gt_implementation=gt_implementation, + ) + factor_feedback.final_decision = decision_from_value_check + factor_feedback.final_feedback = "Value evaluation failed, skip final decision evaluation." + else: + factor_feedback.code_feedback, _ = self.code_evaluator.evaluate( + target_task=target_task, + implementation=implementation, + execution_feedback=factor_feedback.execution_feedback, + value_feedback=factor_feedback.value_feedback, + gt_implementation=gt_implementation, + ) + ( + factor_feedback.final_decision, + factor_feedback.final_feedback, + ) = self.final_decision_evaluator.evaluate( + target_task=target_task, + execution_feedback=factor_feedback.execution_feedback, + value_feedback=factor_feedback.value_feedback, + code_feedback=factor_feedback.code_feedback, + ) + return factor_feedback + + +# TODO: +def shorten_prompt(tpl: str, render_kwargs: dict, shorten_key: str, max_trail: int = 10) -> str: + """When the prompt is too long. We have to shorten it. + But we should not truncate the prompt directly, so we should find the key we want to shorten and then shorten it. + """ + # TODO: this should replace most of code in + # - FactorFinalDecisionEvaluator.evaluate + # - FactorCodeEvaluator.evaluate diff --git a/alphaagent/components/coder/factor_coder/evolving_strategy.py b/alphaagent/components/coder/factor_coder/evolving_strategy.py new file mode 100755 index 00000000..701ae1c5 --- /dev/null +++ b/alphaagent/components/coder/factor_coder/evolving_strategy.py @@ -0,0 +1,437 @@ +from __future__ import annotations + +import json +from pathlib import Path +import re +from jinja2 import Environment, StrictUndefined + +from alphaagent.components.coder.CoSTEER.evolving_strategy import ( + MultiProcessEvolvingStrategy, +) +from alphaagent.components.coder.CoSTEER.knowledge_management import ( + CoSTEERQueriedKnowledge, + CoSTEERQueriedKnowledgeV2, +) +from alphaagent.components.coder.factor_coder.config import FACTOR_COSTEER_SETTINGS +from alphaagent.components.coder.factor_coder.factor import FactorFBWorkspace, FactorTask +from alphaagent.core.prompts import Prompts +from alphaagent.core.template import CodeTemplate +from alphaagent.oai.llm_conf import LLM_SETTINGS +from alphaagent.oai.llm_utils import APIBackend +from alphaagent.core.utils import multiprocessing_wrapper +from alphaagent.core.conf import RD_AGENT_SETTINGS + +code_template = CodeTemplate(template_path=Path(__file__).parent / "template.jinjia2") +implement_prompts = Prompts(file_path=Path(__file__).parent / "prompts.yaml") + +class FactorMultiProcessEvolvingStrategy(MultiProcessEvolvingStrategy): + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + self.num_loop = 0 + self.haveSelected = False + + + def error_summary( + self, + target_task: FactorTask, + queried_former_failed_knowledge_to_render: list, + queried_similar_error_knowledge_to_render: list, + ) -> str: + error_summary_system_prompt = ( + Environment(undefined=StrictUndefined) + .from_string(implement_prompts["evolving_strategy_error_summary_v2_system"]) + .render( + scenario=self.scen.get_scenario_all_desc(target_task), + factor_information_str=target_task.get_task_information(), + code_and_feedback=queried_former_failed_knowledge_to_render[-1].get_implementation_and_feedback_str(), + ) + .strip("\n") + ) + for _ in range(10): # max attempt to reduce the length of error_summary_user_prompt + error_summary_user_prompt = ( + Environment(undefined=StrictUndefined) + .from_string(implement_prompts["evolving_strategy_error_summary_v2_user"]) + .render( + queried_similar_error_knowledge=queried_similar_error_knowledge_to_render, + ) + .strip("\n") + ) + if ( + APIBackend().build_messages_and_calculate_token( + user_prompt=error_summary_user_prompt, system_prompt=error_summary_system_prompt + ) + < LLM_SETTINGS.chat_token_limit + ): + break + elif len(queried_similar_error_knowledge_to_render) > 0: + queried_similar_error_knowledge_to_render = queried_similar_error_knowledge_to_render[:-1] + error_summary_critics = APIBackend( + use_chat_cache=FACTOR_COSTEER_SETTINGS.coder_use_cache + ).build_messages_and_create_chat_completion( + user_prompt=error_summary_user_prompt, system_prompt=error_summary_system_prompt, json_mode=False + ) + return error_summary_critics + + def implement_one_task( + self, + target_task: FactorTask, + queried_knowledge: CoSTEERQueriedKnowledge, + ) -> str: + target_factor_task_information = target_task.get_task_information() + + queried_similar_successful_knowledge = ( + queried_knowledge.task_to_similar_task_successful_knowledge[target_factor_task_information] + if queried_knowledge is not None + else [] + ) # A list, [success task implement knowledge] + + if isinstance(queried_knowledge, CoSTEERQueriedKnowledgeV2): + queried_similar_error_knowledge = ( + queried_knowledge.task_to_similar_error_successful_knowledge[target_factor_task_information] + if queried_knowledge is not None + else {} + ) # A dict, {{error_type:[[error_imp_knowledge, success_imp_knowledge],...]},...} + else: + queried_similar_error_knowledge = {} + + queried_former_failed_knowledge = ( + queried_knowledge.task_to_former_failed_traces[target_factor_task_information][0] + if queried_knowledge is not None + else [] + ) + + queried_former_failed_knowledge_to_render = queried_former_failed_knowledge + + latest_attempt_to_latest_successful_execution = queried_knowledge.task_to_former_failed_traces[ + target_factor_task_information + ][1] + + system_prompt = ( + Environment(undefined=StrictUndefined) + .from_string( + implement_prompts["evolving_strategy_factor_implementation_v1_system"], + ) + .render( + scenario=self.scen.get_scenario_all_desc(target_task, filtered_tag="feature"), + queried_former_failed_knowledge=queried_former_failed_knowledge_to_render, + ) + ) + queried_similar_successful_knowledge_to_render = queried_similar_successful_knowledge + queried_similar_error_knowledge_to_render = queried_similar_error_knowledge + # 动态地防止prompt超长 + for _ in range(10): # max attempt to reduce the length of user_prompt + # 总结error(可选) + if ( + isinstance(queried_knowledge, CoSTEERQueriedKnowledgeV2) + and FACTOR_COSTEER_SETTINGS.v2_error_summary + and len(queried_similar_error_knowledge_to_render) != 0 + and len(queried_former_failed_knowledge_to_render) != 0 + ): + error_summary_critics = self.error_summary( + target_task, + queried_former_failed_knowledge_to_render, + queried_similar_error_knowledge_to_render, + ) + else: + error_summary_critics = None + # 构建user_prompt。开始写代码 + user_prompt = ( + Environment(undefined=StrictUndefined) + .from_string( + implement_prompts["evolving_strategy_factor_implementation_v2_user"], + ) + .render( + # factor_information_str=target_factor_task_information, + # queried_similar_successful_knowledge=queried_similar_successful_knowledge_to_render, + # queried_similar_error_knowledge=queried_similar_error_knowledge_to_render, + # error_summary_critics=error_summary_critics, + # latest_attempt_to_latest_successful_execution=latest_attempt_to_latest_successful_execution, + factor_information_str=target_task.get_task_description(), + queried_similar_error_knowledge=queried_similar_error_knowledge_to_render, + error_summary_critics=error_summary_critics, + similar_successful_factor_description=queried_similar_successful_knowledge_to_render[0].target_task.get_task_description(), + similar_successful_expression=self.extract_expr(queried_similar_successful_knowledge_to_render[0].implementation.code), + latest_attempt_to_latest_successful_execution=latest_attempt_to_latest_successful_execution, + ) + .strip("\n") + ) + if ( + APIBackend().build_messages_and_calculate_token(user_prompt=user_prompt, system_prompt=system_prompt) + < LLM_SETTINGS.chat_token_limit + ): + break + elif len(queried_former_failed_knowledge_to_render) > 1: + queried_former_failed_knowledge_to_render = queried_former_failed_knowledge_to_render[1:] + elif len(queried_similar_successful_knowledge_to_render) > len( + queried_similar_error_knowledge_to_render, + ): + queried_similar_successful_knowledge_to_render = queried_similar_successful_knowledge_to_render[:-1] + elif len(queried_similar_error_knowledge_to_render) > 0: + queried_similar_error_knowledge_to_render = queried_similar_error_knowledge_to_render[:-1] + for _ in range(10): + try: + code = json.loads( + APIBackend( + use_chat_cache=FACTOR_COSTEER_SETTINGS.coder_use_cache + ).build_messages_and_create_chat_completion( + user_prompt=user_prompt, system_prompt=system_prompt, json_mode=True + ) + )["code"] + return code + except json.decoder.JSONDecodeError: + pass + else: + return "" # return empty code if failed to get code after 10 attempts + + def assign_code_list_to_evo(self, code_list, evo): + for index in range(len(evo.sub_tasks)): + if code_list[index] is None: + continue + if evo.sub_workspace_list[index] is None: + evo.sub_workspace_list[index] = FactorFBWorkspace(target_task=evo.sub_tasks[index]) + evo.sub_workspace_list[index].inject_code(**{"factor.py": code_list[index]}) + return evo + + + +alphaagent_implement_prompts = Prompts(file_path=Path(__file__).parent / "prompts_alphaagent.yaml") +class FactorParsingStrategy(MultiProcessEvolvingStrategy): + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + self.num_loop = 0 + self.haveSelected = False + + def extract_expr(self, code_str: str) -> str: + """从代码字符串中提取expr表达式""" + # 使用正则表达式匹配expr = "xxx"或expr = 'xxx'的模式 + pattern = r'expr\s*=\s*["\']([^"\']*)["\']' + match = re.search(pattern, code_str) + if match: + return match.group(1) + else: + return "" + + + def implement_one_task( + self, + target_task: FactorTask, + queried_knowledge: CoSTEERQueriedKnowledge, + ) -> str: + """ + 实现单个因子任务的代码生成逻辑 + + 该函数有两种工作模式: + 1. 首次执行时:直接使用模板生成代码 + 2. 之前有报错时:提供报错信息和成功/失败案例给LLM,由其重写表达式 + + Args: + target_task: 要实现的目标因子任务 + queried_knowledge: 查询到的知识库,包含相似的成功案例和失败案例 + + Returns: + str: 生成的因子代码 + """ + # 获取目标任务信息 + target_factor_task_information = target_task.get_task_information() + + # 获取相似的成功实现案例列表 + queried_similar_successful_knowledge = ( + queried_knowledge.task_to_similar_task_successful_knowledge[target_factor_task_information] + if queried_knowledge is not None + else [] + ) # A list, [success task implement knowledge] + + # 获取相似的错误实现案例字典(如果使用V2版本的知识管理) + if isinstance(queried_knowledge, CoSTEERQueriedKnowledgeV2): + queried_similar_error_knowledge = ( + queried_knowledge.task_to_similar_error_successful_knowledge[target_factor_task_information] + if queried_knowledge is not None + else {} + ) # A dict, {{error_type:[[error_imp_knowledge, success_imp_knowledge],...]},...} + else: + queried_similar_error_knowledge = {} + + # 获取此任务之前的失败实现列表 + queried_former_failed_knowledge = ( + queried_knowledge.task_to_former_failed_traces[target_factor_task_information][0] + if queried_knowledge is not None + else [] + ) + + queried_former_failed_knowledge_to_render = queried_former_failed_knowledge + + # 首次执行时:直接使用模板生成代码 + if len(queried_former_failed_knowledge) == 0: + rendered_code = code_template.render( + expression=target_task.factor_expression, + factor_name=target_task.factor_name + ) + return rendered_code + + # 之前有报错时:提供报错信息和案例给LLM,重写表达式 + else: + # 获取最近一次尝试到最近一次成功执行的信息 + latest_attempt_to_latest_successful_execution = queried_knowledge.task_to_former_failed_traces[ + target_factor_task_information + ][1] + + # 构建系统提示 + system_prompt = ( + Environment(undefined=StrictUndefined) + .from_string( + alphaagent_implement_prompts["evolving_strategy_factor_implementation_v1_system"], + ) + .render( + scenario=self.scen.get_scenario_all_desc(target_task, filtered_tag="feature"), + # former_expression=self.extract_expr(queried_former_failed_knowledge_to_render[-1].implementation.code), + # former_feedback=queried_former_failed_knowledge_to_render[-1].feedback, + ) + ) + queried_similar_successful_knowledge_to_render = queried_similar_successful_knowledge + queried_similar_error_knowledge_to_render = queried_similar_error_knowledge + + # 动态调整提示长度,防止超出token限制 + for _ in range(10): # 最多尝试10次减少用户提示长度 + # 生成错误摘要(可选功能) + if ( + isinstance(queried_knowledge, CoSTEERQueriedKnowledgeV2) + and FACTOR_COSTEER_SETTINGS.v2_error_summary + and len(queried_similar_error_knowledge_to_render) != 0 + and len(queried_former_failed_knowledge_to_render) != 0 + ): + error_summary_critics = self.error_summary( + target_task, + queried_former_failed_knowledge_to_render, + queried_similar_error_knowledge_to_render, + ) + else: + error_summary_critics = None + + # 构建用户提示 + user_prompt = ( + Environment(undefined=StrictUndefined) + .from_string( + alphaagent_implement_prompts["evolving_strategy_factor_implementation_v2_user"], + ) + .render( + factor_information_str=target_task.get_task_description(), + queried_similar_error_knowledge=queried_similar_error_knowledge_to_render, + former_expression=self.extract_expr(queried_former_failed_knowledge_to_render[-1].implementation.code), + former_feedback=queried_former_failed_knowledge_to_render[-1].feedback, + error_summary_critics=error_summary_critics, + similar_successful_factor_description=queried_similar_successful_knowledge_to_render[-1].target_task.get_task_description(), + similar_successful_expression=self.extract_expr(queried_similar_successful_knowledge_to_render[-1].implementation.code), + latest_attempt_to_latest_successful_execution=latest_attempt_to_latest_successful_execution, + ) + .strip("\n") + ) + + # 检查token数量是否超限,若超限则逐步减少要渲染的知识 + if ( + APIBackend().build_messages_and_calculate_token(user_prompt=user_prompt, system_prompt=system_prompt) + < LLM_SETTINGS.chat_token_limit + ): + break + elif len(queried_former_failed_knowledge_to_render) > 1: + # 减少历史失败案例 + queried_former_failed_knowledge_to_render = queried_former_failed_knowledge_to_render[1:] + elif len(queried_similar_successful_knowledge_to_render) > len( + queried_similar_error_knowledge_to_render, + ): + # 减少成功案例 + queried_similar_successful_knowledge_to_render = queried_similar_successful_knowledge_to_render[:-1] + elif len(queried_similar_error_knowledge_to_render) > 0: + # 减少错误案例 + queried_similar_error_knowledge_to_render = queried_similar_error_knowledge_to_render[:-1] + + # 尝试最多10次从LLM获取表达式 + for _ in range(10): + try: + # 调用API获取新的表达式 + expr = json.loads( + APIBackend( + use_chat_cache=FACTOR_COSTEER_SETTINGS.coder_use_cache + ).build_messages_and_create_chat_completion( + user_prompt=user_prompt, system_prompt=system_prompt, json_mode=True, reasoning_flag=False + ) + )["expr"] + + # 使用新表达式渲染代码模板 + rendered_code = code_template.render( + expression=expr, + factor_name=target_task.factor_name + ) + return rendered_code + + except json.decoder.JSONDecodeError: + # JSON解析失败时继续尝试 + pass + + def assign_code_list_to_evo(self, code_list, evo): + for index in range(len(evo.sub_tasks)): + if code_list[index] is None: + continue + if evo.sub_workspace_list[index] is None: + evo.sub_workspace_list[index] = FactorFBWorkspace(target_task=evo.sub_tasks[index]) + evo.sub_workspace_list[index].inject_code(**{"factor.py": code_list[index]}) + return evo + + + +class FactorRunningStrategy(MultiProcessEvolvingStrategy): + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + self.num_loop = 0 + self.haveSelected = False + + + def implement_one_task( + self, + target_task: FactorTask, + queried_knowledge: CoSTEERQueriedKnowledge, + ) -> str: + + rendered_code = code_template.render( + expression=target_task.factor_expression, + factor_name=target_task.factor_name + ) + return rendered_code + + + def assign_code_list_to_evo(self, code_list, evo): + for index in range(len(evo.sub_tasks)): + if code_list[index] is None: + continue + if evo.sub_workspace_list[index] is None: + evo.sub_workspace_list[index] = FactorFBWorkspace(target_task=evo.sub_tasks[index]) + evo.sub_workspace_list[index].inject_code(**{"factor.py": code_list[index]}) + return evo + + + def evolve( + self, + *, + evo: EvolvingItem, + queried_knowledge: CoSTEERQueriedKnowledge | None = None, + **kwargs, + ) -> EvolvingItem: + # 1.找出需要evolve的task + to_be_finished_task_index = [] + for index, target_task in enumerate(evo.sub_tasks): + to_be_finished_task_index.append(index) + + result = multiprocessing_wrapper( + [ + (self.implement_one_task, (evo.sub_tasks[target_index], queried_knowledge)) + for target_index in to_be_finished_task_index + ], + n=RD_AGENT_SETTINGS.multi_proc_n, + ) + code_list = [None for _ in range(len(evo.sub_tasks))] + for index, target_index in enumerate(to_be_finished_task_index): + code_list[target_index] = result[index] + + evo = self.assign_code_list_to_evo(code_list, evo) + evo.corresponding_selection = to_be_finished_task_index + + return evo diff --git a/alphaagent/components/coder/factor_coder/expr_parser.py b/alphaagent/components/coder/factor_coder/expr_parser.py new file mode 100755 index 00000000..8dd09e1b --- /dev/null +++ b/alphaagent/components/coder/factor_coder/expr_parser.py @@ -0,0 +1,314 @@ +from pyparsing import Word, alphas, alphanums, infixNotation, opAssoc, oneOf, Optional, delimitedList, Forward, Group +from pyparsing import ParseException +from pyparsing import Regex, Combine, Literal +import sys +import re +import numpy as np + +# 引入pyparsing自带的cache功能 +# 加快function_call = var + '(' + Optional(delimitedList(expr)) + ')'这种嵌套式的pyparsing解析器 +from pyparsing import ParserElement +ParserElement.enablePackrat() + +sys.setrecursionlimit(5000) # 设置更高的递归深度限制 + +# 定义基本元素 +var = ( + Combine(Optional(Literal("$")) + Word(alphas, alphanums + "_")) +).setName("variable") +# var = Word(alphas, alphanums + "_") + +# 定义数字的正则表达式 +# 正则表达式匹配整数和小数,可以有正负号,以及科学计数法 +number_pattern = r"[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?" +number = Regex(number_pattern) + +# 定义操作符 +mul_div = oneOf("* /", useRegex=True) +add_minus = oneOf("+ -") +comparison_op = oneOf("> < >= <= == !=") +logical_and = oneOf("&& &") +logical_or = oneOf("|| |") +conditional_op = ("?", ":") + + +def is_number(s): + try: + float(s) + return True + except ValueError: + return False + +# 展平嵌套的 ParseResults 为字符串 +def flatten_nested_tokens(tokens): + # import pdb; pdb.set_trace() + flattened = [] + for token in tokens: + if isinstance(token, str): + flattened.append(token) + elif isinstance(token, list): + flattened.extend(flatten_nested_tokens(token)) + else: # ParseResults + flattened.extend(flatten_nested_tokens(token.asList())) + return flattened + + + + +def parse_arith_op(s, loc, tokens): + # tokens[0] 包含整个运算表达式的分解 + # 因为操作符定义为左结合,我们可以从左到右递归处理tokens列表 + def recursive_build_expression(tokens): + if len(tokens) == 3: + A, op, B = tokens + # 构建表达式 + return build_expression(A, op, B) + else: + left = tokens[:-2] + op = tokens[-2] + right = tokens[-1] + left_expr = recursive_build_expression(left) + return build_expression(left_expr, op, right) + + def build_expression(A, op, B): + A = ''.join(flatten_nested_tokens([A])) + B = ''.join(flatten_nested_tokens([B])) + A_is_number = is_number(A) + B_is_number = is_number(B) + + ## 任意一个操作数都是数字 + if A_is_number or B_is_number: + return f"{A}{op}{B}" + + ## 两个操作数都是pd变量 + else: + if op == '+': + return f'ADD({A}, {B})' + # return f'np.add({A}, {B})' + elif op == '-': + return f'SUBTRACT({A}, {B})' + # return f'np.subtract({A}, {B})' + elif op == '*': + return f'MULTIPLY({A}, {B})' + # return f'np.multiply({A}, {B})' + elif op == '/': + return f'DIVIDE({A}, {B})' + # return f'np.divide({A}, {B})' + else: + raise NotImplementedError(f'arith op \'{op}\' is not implemented') + # 操作数2是BENCHMARKINDEX (pd.Series),而操作数1不是BENCHMARKINDEX (pd.Series)的情况下,Series必须要放在第二操作数,否则会报错 + # if 'BENCHMARKINDEX' in A and 'BENCHMARKINDEX' not in B: + # if op == '+': + # return f'({B}).add({A}, axis=0)' + # elif op == '-': + # return f'(-1*{(B)}).add({A}, axis=0)' + # elif op == '*': + # return f'({B}).mul({A}, axis=0)' + # elif op == '/': + # return f'(1/{(B)}).mul({A}, axis=0)' + # else: + # raise NotImplementedError(f'arith op \'{op}\' is not implemented') + # else: + # if op == '+': + # return f'({A}).add({B}, axis=0)' + # elif op == '-': + # return f'({A}).sub({B}, axis=0)' + # elif op == '*': + # return f'({A}).mul({B}, axis=0)' + # elif op == '/': + # return f'({A}).div({B}, axis=0)' + # else: + # raise NotImplementedError(f'arith op \'{op}\' is not implemented') + + return recursive_build_expression(tokens[0]) + +# def parse_arith_op(s, loc, tokens): +# A = ''.join(flatten_nested_tokens(tokens[0][0])) +# op = ''.join(flatten_nested_tokens(tokens[0][1])) +# B = ''.join(flatten_nested_tokens(tokens[0][2])) + +# # 检查操作数是否存在 +# if A == '' or B == '': +# raise ParseException(s, loc, f"运算符 '{op}' 缺少操作数") + +# # 检查操作数是否为数字 +# A_is_number = is_number(A) +# B_is_number = is_number(B) + +# # 根据操作数类型选择操作 + +# ## 任意一个操作数都是数字 +# if A_is_number or B_is_number: +# return f"{A}{op}{B}" + +# ## 两个操作数都是pd变量 +# else: +# # 操作数2是BENCHMARKINDEX (pd.Series),而操作数1不是BENCHMARKINDEX (pd.Series)的情况下,Series必须要放在第二操作数,否则会报错 +# if 'BENCHMARKINDEX' in A and 'BENCHMARKINDEX' not in B: +# if op == '+': +# return f'({B}).add({A}, axis=0)' +# elif op == '-': +# return f'(-1*{(B)}).add({A}, axis=0)' +# elif op == '*': +# return f'({B}).mul({A}, axis=0)' +# elif op == '/': +# return f'(1/{(B)}).mul({A}, axis=0)' +# else: +# raise NotImplementedError(f'arith op \'{op}\' is not implemented') +# else: +# if op == '+': +# return f'({A}).add({B}, axis=0)' +# elif op == '-': +# return f'({A}).sub({B}, axis=0)' +# elif op == '*': +# return f'({A}).mul({B}, axis=0)' +# elif op == '/': +# return f'({A}).div({B}, axis=0)' +# else: +# raise NotImplementedError(f'arith op \'{op}\' is not implemented') + + +# 定义条件表达式的解析函数 +def parse_conditional_expression(s, loc, tokens): + A, B, C = tokens[0][0], tokens[0][2], tokens[0][4] + # 将 A, B, C 转换为字符串 + A = ''.join(flatten_nested_tokens(A)) + B = ''.join(flatten_nested_tokens(B)) + C = ''.join(flatten_nested_tokens(C)) + + # 将结果转换为带有datetime和instrument双重索引的Series + return f"pd.Series(np.where({A}, {B}, {C}), index=({A}).index)" + +# 定义逻辑运算符的解析函数 +def parse_logical_expression(s, loc, tokens): + # tokens[0] 包含整个表达式的分解,可能包括嵌套的列表 + # 由于操作符定义为左结合,我们可以递归地展开tokens列表 + def recursive_flatten(tokens): + if len(tokens) == 1: + return ''.join(flatten_nested_tokens([tokens[0]])) + else: + left = tokens[0] + operator = tokens[1] + # right = tokens[2] + left_str = ''.join(flatten_nested_tokens([left])) + right_str = recursive_flatten(tokens[2:]) + if operator in ["||", "|"]: + return f"OR({left_str}, {right_str})" + # return f"({left_str}) | ({right_str})" + elif operator in ["&&", "&"]: + return f"AND({left_str}, {right_str})" + # return f"({left_str}) & ({right_str})" + + return recursive_flatten(tokens[0]) + + +# 定义函数调用解析函数 +def parse_function_call(s, loc, tokens): + # unary_operator = tokens[0] + function_name = tokens[0] + arguments = tokens[2:-1] + # import pdb; pdb.set_trace() + + + # 处理参数列表中的每个参数 + arguments_flat = [] + # import pdb; pdb.set_trace() + for arg in arguments: + if isinstance(arg, str): + arguments_flat.append(arg) + else: + # 如果参数是嵌套的表达式或函数调用,递归处理 + flattened_arg = ''.join(flatten_nested_tokens(arg)) + arguments_flat.append(flattened_arg) + arguments_str = ','.join(arguments_flat) + return f"{function_name}({arguments_str})" + +# 先定义一个 Forward 对象以便在定义 function_call 时引用 +expr = Forward() + +# 定义函数调用 +## 定义可选的一元操作符,这里使用 oneOf 选择器来匹配 "+" 或 "-" +unary_op = Optional(oneOf("+ -")).setParseAction(lambda t: t[0] if t else '') +function_call = var + '(' + Optional(delimitedList(expr)) + ')' # 使用 expr +function_call.setParseAction(parse_function_call) +nested_expr = Group('(' + expr + ')') +# sign_var = unary_op + var + +# 更新操作数,以包含函数调用 +operand = Group(unary_op + (function_call | var | number | nested_expr | expr)) + +# unary_operand = oneOf("+ -") + operand +# unary_operand.setParseAction(lambda tokens: ''.join(tokens)) +# operand = (unary_operand | function_call | var | number ) + +# 使用新的 flatten_nested_tokens 函数 +def parse_entire_expression(s, loc, tokens): + # import pdb; pdb.set_trace() + return ''.join(flatten_nested_tokens(tokens)) + + +def check_for_invalid_operators(expression): + valid_operators = {"(", ")", ",", "+", "-", "*", "/", "&&", "||", "&", "|", ">", "<", ">=", "<=", "==", "!=", "?", ":", "."} + # 使用正则表达式查找所有的运算符 + pattern = r'([+\-*/,><=!&|^`~@#%\\;{}[\]"\'\\]+)' # ([|&=]{3,})| + found_operators_tuples = re.findall(pattern, expression) + found_operators = [operator for tup in found_operators_tuples for operator in tup if operator] + invalid_operators = set(found_operators) - valid_operators + + if invalid_operators: + raise Exception(f"无效的运算符: \"{''.join(invalid_operators)}\"") + + +# 现在更新 expr 的定义 +expr <<= infixNotation(operand, + [ + (mul_div, 2, opAssoc.LEFT, parse_arith_op), + (add_minus, 2, opAssoc.LEFT, parse_arith_op), + (comparison_op, 2, opAssoc.LEFT), + (logical_and, 2, opAssoc.LEFT, parse_logical_expression), + (logical_or, 2, opAssoc.LEFT, parse_logical_expression), + (conditional_op, 3, opAssoc.RIGHT, parse_conditional_expression) + ]) + + +def check_parentheses_balance(expr): + if expr.count('(') != expr.count(')'): + raise ParseException(f"表达式括号未闭合") + +# 定义整个表达式的解析规则 +expr.setParseAction(parse_entire_expression) # check_parentheses_balance, +# expr.setDebug() + +def parse_expression(factor_expression): + check_parentheses_balance(factor_expression) + check_for_invalid_operators(factor_expression) + print("factor_expression: ", factor_expression) + + parsed_data_function = expr.parseString(factor_expression)[0] + return parsed_data_function + + + +def parse_symbol(expr, columns): + replace_map = {} + replace_map.update({ + "TRUE": "True", + "true": "True", + "FALSE": "False", + "false": "False", + "NAN": "np.nan", + "NaN": "np.nan", + "nan": "np.nan", + "NULL": "np.nan", + "null": "np.nan" + }) + for col in columns: + replace_map.update({col: col.replace('$', '')}) + # replace_map.update({col.replace('$', '').upper(): col.replace('$', '')}) + + for var, var_df in replace_map.items(): + expr = expr.replace(var, var_df) + return expr + +if __name__ == '__main__': + parse_expression("RANK(DELTA($open, 1) - DELTA($open, 1)) / (1e-8 + 1)") \ No newline at end of file diff --git a/alphaagent/components/coder/factor_coder/factor.py b/alphaagent/components/coder/factor_coder/factor.py new file mode 100755 index 00000000..6f1fd235 --- /dev/null +++ b/alphaagent/components/coder/factor_coder/factor.py @@ -0,0 +1,225 @@ +from __future__ import annotations + +import subprocess +import uuid +from pathlib import Path +from typing import Tuple, Union + +import pandas as pd +from filelock import FileLock + +from alphaagent.components.coder.CoSTEER.task import CoSTEERTask +from alphaagent.components.coder.factor_coder.config import FACTOR_COSTEER_SETTINGS +from alphaagent.core.exception import CodeFormatError, CustomRuntimeError, NoOutputError +from alphaagent.core.experiment import Experiment, FBWorkspace +from alphaagent.core.utils import cache_with_pickle +from alphaagent.oai.llm_utils import md5_hash + + +class FactorTask(CoSTEERTask): + # TODO: generalized the attributes into the Task + # - factor_* -> * + def __init__( + self, + factor_name, + factor_description, + factor_formulation, + factor_expression = None, + *args, + variables: dict = {}, + resource: str = None, + factor_implementation: bool = False, + **kwargs, + ) -> None: + self.factor_name = ( + factor_name # TODO: remove it in the later version. Keep it only for pickle version compatibility + ) + self.factor_description = factor_description + self.factor_formulation = factor_formulation + self.factor_expression = factor_expression + self.variables = variables + self.factor_resources = resource + self.factor_implementation = factor_implementation + super().__init__(name=factor_name, *args, **kwargs) + + def get_task_information(self): + return f"""factor_name: {self.factor_name} +factor_description: {self.factor_description} +factor_formulation: {self.factor_formulation} +variables: {str(self.variables)}""" + + + def get_task_description(self): + return f"""factor_name: {self.factor_name} +factor_description: {self.factor_description}""" + + def get_task_information_and_implementation_result(self): + return { + "factor_name": self.factor_name, + "factor_description": self.factor_description, + "factor_formulation": self.factor_formulation, + "factor_expression": self.factor_expression, + "variables": str(self.variables), + "factor_implementation": str(self.factor_implementation), + } + + @staticmethod + def from_dict(dict): + return FactorTask(**dict) + + def __repr__(self) -> str: + return f"<{self.__class__.__name__}[{self.factor_name}]>" + + +class FactorFBWorkspace(FBWorkspace): + """ + This class is used to implement a factor by writing the code to a file. + Input data and output factor value are also written to files. + """ + + # TODO: (Xiao) think raising errors may get better information for processing + FB_EXEC_SUCCESS = "Execution succeeded without error." + FB_CODE_NOT_SET = "code is not set." + FB_EXECUTION_SUCCEEDED = "Execution succeeded without error." + FB_OUTPUT_FILE_NOT_FOUND = "\nExpected output file not found." + FB_OUTPUT_FILE_FOUND = "\nExpected output file found." + + def __init__( + self, + *args, + raise_exception: bool = False, + **kwargs, + ) -> None: + super().__init__(*args, **kwargs) + self.raise_exception = raise_exception + + def hash_func(self, data_type: str = "Debug") -> str: + return ( + md5_hash(data_type + self.code_dict["factor.py"]) + if ("factor.py" in self.code_dict and not self.raise_exception) + else None + ) + + @cache_with_pickle(hash_func) + def execute(self, data_type: str = "Debug") -> Tuple[str, pd.DataFrame]: + """ + execute the implementation and get the factor value by the following steps: + 1. make the directory in workspace path + 2. write the code to the file in the workspace path + 3. link all the source data to the workspace path folder + if call_factor_py is True: + 4. execute the code + else: + 4. generate a script from template to import the factor.py dump get the factor value to result.h5 + 5. read the factor value from the output file in the workspace path folder + returns the execution feedback as a string and the factor value as a pandas dataframe + + + Regarding the cache mechanism: + 1. We will store the function's return value to ensure it behaves as expected. + - The cached information will include a tuple with the following: (execution_feedback, executed_factor_value_dataframe, Optional[Exception]) + + """ + super().execute() + if self.code_dict is None or "factor.py" not in self.code_dict: + if self.raise_exception: + raise CodeFormatError(self.FB_CODE_NOT_SET) + else: + return self.FB_CODE_NOT_SET, None + with FileLock(self.workspace_path / "execution.lock"): + if self.target_task.version == 1: + source_data_path = ( + Path( + FACTOR_COSTEER_SETTINGS.data_folder_debug, + ) + if data_type == "Debug" # FIXME: (yx) don't think we should use a debug tag for this. + else Path( + FACTOR_COSTEER_SETTINGS.data_folder, + ) + ) + + source_data_path.mkdir(exist_ok=True, parents=True) + code_path = self.workspace_path / f"factor.py" + + self.link_all_files_in_folder_to_workspace(source_data_path, self.workspace_path) + + execution_feedback = self.FB_EXECUTION_SUCCEEDED + execution_success = False + execution_error = None + + if self.target_task.version == 1: + execution_code_path = code_path + elif self.target_task.version == 2: + execution_code_path = self.workspace_path / f"{uuid.uuid4()}.py" + execution_code_path.write_text((Path(__file__).parent / "factor_execution_template.txt").read_text()) + + try: + subprocess.check_output( + f"{FACTOR_COSTEER_SETTINGS.python_bin} {execution_code_path}", + shell=True, + cwd=self.workspace_path, + stderr=subprocess.STDOUT, + timeout=FACTOR_COSTEER_SETTINGS.file_based_execution_timeout, + ) + execution_success = True + except subprocess.CalledProcessError as e: + import site + + execution_feedback = ( + e.output.decode() + .replace(str(execution_code_path.parent.absolute()), r"/path/to") + .replace(str(site.getsitepackages()[0]), r"/path/to/site-packages") + ) + if len(execution_feedback) > 2000: + execution_feedback = ( + execution_feedback[:1000] + "....hidden long error message...." + execution_feedback[-1000:] + ) + if self.raise_exception: + raise CustomRuntimeError(execution_feedback) + else: + execution_error = CustomRuntimeError(execution_feedback) + except subprocess.TimeoutExpired: + execution_feedback += f"Execution timeout error and the timeout is set to {FACTOR_COSTEER_SETTINGS.file_based_execution_timeout} seconds." + if self.raise_exception: + raise CustomRuntimeError(execution_feedback) + else: + execution_error = CustomRuntimeError(execution_feedback) + + workspace_output_file_path = self.workspace_path / "result.h5" + if workspace_output_file_path.exists() and execution_success: + try: + executed_factor_value_dataframe = pd.read_hdf(workspace_output_file_path) + execution_feedback += self.FB_OUTPUT_FILE_FOUND + except Exception as e: + execution_feedback += f"Error found when reading hdf file: {e}"[:1000] + executed_factor_value_dataframe = None + else: + execution_feedback += self.FB_OUTPUT_FILE_NOT_FOUND + executed_factor_value_dataframe = None + if self.raise_exception: + raise NoOutputError(execution_feedback) + else: + execution_error = NoOutputError(execution_feedback) + + return execution_feedback, executed_factor_value_dataframe + + def __str__(self) -> str: + # NOTE: + # If the code cache works, the workspace will be None. + return f"File Factor[{self.target_task.factor_name}]: {self.workspace_path}" + + def __repr__(self) -> str: + return self.__str__() + + @staticmethod + def from_folder(task: FactorTask, path: Union[str, Path], **kwargs): + path = Path(path) + code_dict = {} + for file_path in path.iterdir(): + if file_path.suffix == ".py": + code_dict[file_path.name] = file_path.read_text() + return FactorFBWorkspace(target_task=task, code_dict=code_dict, **kwargs) + + +FactorExperiment = Experiment +FeatureExperiment = Experiment diff --git a/alphaagent/components/coder/factor_coder/factor_ast.py b/alphaagent/components/coder/factor_coder/factor_ast.py new file mode 100755 index 00000000..2d372821 --- /dev/null +++ b/alphaagent/components/coder/factor_coder/factor_ast.py @@ -0,0 +1,506 @@ +from pyparsing import Word, alphas, alphanums, infixNotation, opAssoc, oneOf, Optional, delimitedList, Forward, Group +from pyparsing import ParserElement, ParseException, ParseResults +from pyparsing import Regex, Combine, Literal +from dataclasses import dataclass +from typing import List, Union, Optional as Opt +from collections import defaultdict +import sys +import pandas as pd + +# Enable packrat parsing for better performance +ParserElement.enablePackrat() + +# Set higher recursion limit for complex expressions +sys.setrecursionlimit(4000) + +# AST Node classes +@dataclass +class Node: + def tree_str(self, level: int = 0) -> str: + """Return a tree-like string representation with given indent level.""" + indent = " " * level + return f"{indent}{self._node_str()}" + + def _node_str(self) -> str: + """Basic string representation of the node for tree view.""" + return str(self) + + def print_tree(self): + """Print the AST in a tree structure.""" + print(self.tree_str()) + +@dataclass +class VarNode(Node): + name: str + + def __str__(self): + return self.name + + def _node_str(self): + return f"VAR({self.name})" + +@dataclass +class NumberNode(Node): + value: float + + def __str__(self): + return str(self.value) + + def _node_str(self): + return f"NUM({self.value})" + +@dataclass +class FunctionNode(Node): + name: str + args: List[Node] + + def __str__(self): + args_str = ", ".join(str(arg) for arg in self.args) + return f"{self.name}({args_str})" + + def _node_str(self): + return f"FUNC({self.name})" + + def tree_str(self, level: int = 0) -> str: + indent = " " * level + result = [f"{indent}{self._node_str()}"] + for arg in self.args: + result.append(arg.tree_str(level + 1)) + return "\n".join(result) + +@dataclass +class BinaryOpNode(Node): + op: str + left: Node + right: Node + + def __str__(self): + return f"({str(self.left)} {self.op} {str(self.right)})" + + def _node_str(self): + return f"OP({self.op})" + + def tree_str(self, level: int = 0) -> str: + indent = " " * level + result = [f"{indent}{self._node_str()}"] + result.append(self.left.tree_str(level + 1)) + result.append(self.right.tree_str(level + 1)) + return "\n".join(result) + +@dataclass +class ConditionalNode(Node): + condition: Node + true_expr: Node + false_expr: Node + + def __str__(self): + return f"({str(self.condition)} ? {str(self.true_expr)} : {str(self.false_expr)})" + + def _node_str(self): + return "CONDITIONAL" + + def tree_str(self, level: int = 0) -> str: + indent = " " * level + result = [f"{indent}{self._node_str()}"] + result.append(self.condition.tree_str(level + 1)) + result.append(self.true_expr.tree_str(level + 1)) + result.append(self.false_expr.tree_str(level + 1)) + return "\n".join(result) + +# Basic elements definition +var = Combine(Optional(Literal("$")) + Word(alphas, alphanums + "_")) +number = Regex(r"[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?") + +# Operators definition +mul_div = oneOf("* /") +add_sub = oneOf("+ -") +comparison = oneOf("> < >= <= == !=") +logical_and = oneOf("&& &") +logical_or = oneOf("|| |") +conditional = ("?", ":") + +def create_var_node(tokens): + return VarNode(tokens[0]) + +def create_number_node(tokens): + return NumberNode(float(tokens[0])) + +def create_function_node(tokens): + name = tokens[0] # function name + args = tokens[2:-1] # skip parentheses + + def unwrap(arg): + if isinstance(arg, (list, ParseResults)): + if len(arg) == 1: + return unwrap(arg[0]) + return [unwrap(x) for x in arg][0] # first element + return arg + + processed_args = [unwrap(arg) for arg in args] + # All args should be Node classes + assert all(isinstance(arg, Node) for arg in processed_args), f"Invalid args: {processed_args}" + return FunctionNode(name, processed_args) + +def create_binary_op_node(tokens): + tokens = tokens[0] + def unwrap(arg): + if isinstance(arg, (list, ParseResults)): + if len(arg) == 1: + return unwrap(arg[0]) + return [unwrap(x) for x in arg] + return arg + + if len(tokens) == 3: + return BinaryOpNode(tokens[1], unwrap(tokens[0]), unwrap(tokens[2])) + + result = unwrap(tokens[0]) + for i in range(1, len(tokens)-1, 2): + result = BinaryOpNode(tokens[i], result, unwrap(tokens[i+1])) + return result + +def create_conditional_node(tokens): + tokens = tokens[0] + def unwrap(arg): + if isinstance(arg, (list, ParseResults)): + if len(arg) == 1: + return unwrap(arg[0]) + return [unwrap(x) for x in arg] + return arg + + return ConditionalNode( + unwrap(tokens[0]), + unwrap(tokens[2]), + unwrap(tokens[4]) + ) + +# Expression parser definition +expr = Forward() + +# Basic elements +var.setParseAction(create_var_node) +number.setParseAction(create_number_node) + +# Function call +function_call = var + "(" + Optional(delimitedList(expr)) + ")" +function_call.setParseAction(create_function_node) + +# Operands +operand = function_call | var | number | ("(" + expr + ")").setParseAction(lambda tokens: tokens[1]) + +# Complete expression +expr <<= infixNotation( + operand, + [ + (mul_div, 2, opAssoc.LEFT, create_binary_op_node), + (add_sub, 2, opAssoc.LEFT, create_binary_op_node), + (comparison, 2, opAssoc.LEFT, create_binary_op_node), + (logical_and, 2, opAssoc.LEFT, create_binary_op_node), + (logical_or, 2, opAssoc.LEFT, create_binary_op_node), + (conditional, 3, opAssoc.RIGHT, create_conditional_node), + ] +) + +def parse_expression(text: str) -> Node: + """Parse an expression and return its AST.""" + try: + result = expr.parseString(text, parseAll=True) + return result[0] # Extract the first element from ParseResults + except ParseException as e: + raise ValueError(f"Failed to parse expression: {str(e)}") + + + + + + +def are_nodes_equal(node1: Node, node2: Node) -> bool: + """比较两个节点是否相等""" + if type(node1) != type(node2): + return False + + if isinstance(node1, NumberNode): + return node1.value == node2.value + elif isinstance(node1, VarNode): + return node1.name == node2.name + elif isinstance(node1, FunctionNode): + return node1.name == node2.name and len(node1.args) == len(node2.args) + elif isinstance(node1, BinaryOpNode): + return node1.op == node2.op + elif isinstance(node1, ConditionalNode): + return True # 条件节点本身相等,子节点会在递归中比较 + return False + +@dataclass +class SubtreeMatch: + root1: Node # 第一个树中的子树根节点 + root2: Node # 第二个树中的子树根节点 + size: int # 子树大小(节点数) + + def __str__(self): + return f"Match(size={self.size}):\n Tree1: {str(root1)}\n Tree2: {str(root2)}" + +def find_largest_common_subtree(root1: Node, root2: Node) -> Opt[SubtreeMatch]: + """查找两棵树之间的最大公共子树""" + + def get_subtree_size(node: Node) -> int: + """计算以给定节点为根的子树大小""" + if isinstance(node, (NumberNode, VarNode)): + return 1 + elif isinstance(node, FunctionNode): + return 1 + sum(get_subtree_size(arg) for arg in node.args) + elif isinstance(node, BinaryOpNode): + return 1 + get_subtree_size(node.left) + get_subtree_size(node.right) + elif isinstance(node, ConditionalNode): + return 1 + get_subtree_size(node.condition) + \ + get_subtree_size(node.true_expr) + \ + get_subtree_size(node.false_expr) + return 0 + + def get_all_subtrees(root: Node) -> List[Node]: + """获取树中的所有子树根节点""" + result = [root] + if isinstance(root, FunctionNode): + for arg in root.args: + result.extend(get_all_subtrees(arg)) + elif isinstance(root, BinaryOpNode): + result.extend(get_all_subtrees(root.left)) + result.extend(get_all_subtrees(root.right)) + elif isinstance(root, ConditionalNode): + result.extend(get_all_subtrees(root.condition)) + result.extend(get_all_subtrees(root.true_expr)) + result.extend(get_all_subtrees(root.false_expr)) + return result + + def is_commutative_op(op: str) -> bool: + """判断是否为可交换操作符""" + return op in {'+', '*', '==', '!=', '&', '&&', '|', '||'} + + def are_subtrees_equal(node1: Node, node2: Node) -> bool: + """递归比较两个子树是否完全相等,考虑可交换操作""" + if not are_nodes_equal(node1, node2): + return False + + if isinstance(node1, (NumberNode, VarNode)): + return True + elif isinstance(node1, FunctionNode): + return all(are_subtrees_equal(arg1, arg2) + for arg1, arg2 in zip(node1.args, node2.args)) + elif isinstance(node1, BinaryOpNode): + # 对于可交换操作符,尝试两种顺序 + if is_commutative_op(node1.op): + return (are_subtrees_equal(node1.left, node2.left) and + are_subtrees_equal(node1.right, node2.right)) or \ + (are_subtrees_equal(node1.left, node2.right) and + are_subtrees_equal(node1.right, node2.left)) + else: + return are_subtrees_equal(node1.left, node2.left) and \ + are_subtrees_equal(node1.right, node2.right) + elif isinstance(node1, ConditionalNode): + return are_subtrees_equal(node1.condition, node2.condition) and \ + are_subtrees_equal(node1.true_expr, node2.true_expr) and \ + are_subtrees_equal(node1.false_expr, node2.false_expr) + return False + + # 获取所有可能的子树 + subtrees1 = get_all_subtrees(root1) + subtrees2 = get_all_subtrees(root2) + + # 找到最大的公共子树 + max_match = None + max_size = 0 + + for st1 in subtrees1: + size1 = get_subtree_size(st1) + if size1 <= max_size: + continue + + for st2 in subtrees2: + size2 = get_subtree_size(st2) + if size2 != size1 or size2 <= max_size: + continue + + if are_subtrees_equal(st1, st2): + max_size = size1 + max_match = SubtreeMatch(st1, st2, size1) + + return max_match + +def compare_expressions(expr1: str, expr2: str) -> Opt[SubtreeMatch]: + """Compare two expressions and return their largest common subtree""" + tree1 = parse_expression(expr1) + tree2 = parse_expression(expr2) + return find_largest_common_subtree(tree1, tree2) + + + +def match_alphazoo(prop_expr, factor_df): + max_size = 0 + matched_subtree = None + matched_alpha = None + for index, (name, alpha_expr) in factor_df.iterrows(): + try: + match = compare_expressions(prop_expr, alpha_expr) + if match is not None and match.size > max_size: + max_size = match.size + matched_subtree = match.root1 + matched_alpha = alpha_expr + except Exception as e: + print(f"Error comparing alpha \"{alpha_expr}\": \n {e}") + return max_size, matched_subtree, matched_alpha + + + +def count_free_args(expr: str) -> int: + """ + Count the number of NumberNode instances (numeric constants) in the given expression. + + Args: + expr: A string representing a mathematical expression + + Returns: + int: The number of numeric constants in the expression + """ + tree = parse_expression(expr) + return count_number_nodes(tree) + +def count_number_nodes(node: Node) -> int: + """ + Recursively count the number of NumberNode instances in an AST. + + Args: + node: The root node of the AST or sub-tree + + Returns: + int: The number of NumberNode instances in the tree + """ + if isinstance(node, NumberNode): + return 1 + elif isinstance(node, VarNode): + return 0 + elif isinstance(node, FunctionNode): + return sum(count_number_nodes(arg) for arg in node.args) + elif isinstance(node, BinaryOpNode): + return count_number_nodes(node.left) + count_number_nodes(node.right) + elif isinstance(node, ConditionalNode): + return (count_number_nodes(node.condition) + + count_number_nodes(node.true_expr) + + count_number_nodes(node.false_expr)) + return 0 + + + +def count_unique_vars(expr: str) -> int: + """ + Count the number of unique variable names in the given expression. + + Args: + expr: A string representing a mathematical expression + + Returns: + int: The number of unique variable names in the expression + """ + tree = parse_expression(expr) + unique_vars = set() + collect_unique_vars(tree, unique_vars) + return len(unique_vars) + +def collect_unique_vars(node: Node, unique_vars: set) -> None: + """ + Recursively collect unique variable names from an AST. + + Args: + node: The root node of the AST or sub-tree + unique_vars: A set to collect unique variable names + """ + if isinstance(node, VarNode): + # Only add actual data variables, not function names + if node.name.startswith('$'): + unique_vars.add(node.name) + elif isinstance(node, NumberNode): + pass # No variables in number nodes + elif isinstance(node, FunctionNode): + # Don't add the function name itself as a variable + for arg in node.args: + collect_unique_vars(arg, unique_vars) + elif isinstance(node, BinaryOpNode): + collect_unique_vars(node.left, unique_vars) + collect_unique_vars(node.right, unique_vars) + elif isinstance(node, ConditionalNode): + collect_unique_vars(node.condition, unique_vars) + collect_unique_vars(node.true_expr, unique_vars) + collect_unique_vars(node.false_expr, unique_vars) + + +def count_all_nodes(expr: str) -> int: + """ + Count the number of Node instances (numeric constants) in the given expression. + + Args: + expr: A string representing a mathematical expression + + Returns: + int: The number of numeric constants in the expression + """ + tree = parse_expression(expr) + return count_nodes(tree) + + +def count_nodes(node: Node) -> int: + """ + Recursively count the number of Node instances in an AST. + + Args: + node: The root node of the AST or sub-tree + + Returns: + int: The number of Node instances in the tree + """ + if isinstance(node, (NumberNode, VarNode)): + return 1 + elif isinstance(node, FunctionNode): + return 1 + sum(count_nodes(arg) for arg in node.args) + elif isinstance(node, BinaryOpNode): + return 1 + count_nodes(node.left) + count_nodes(node.right) + elif isinstance(node, ConditionalNode): + return 1 + (count_nodes(node.condition) + + count_nodes(node.true_expr) + + count_nodes(node.false_expr)) + return 0 + + +# Example usage: +if __name__ == "__main__": + expr1 = "(($close - TS_MIN($low, 14)) / (TS_MAX($high, 14) - TS_MIN($low, 14) + 1e-8))" + count = count_free_args(expr1) + print(f"Number of NumberNode instances in expression: {count}") # Should print 3 (14, 1e-8, and 100) + count = count_unique_vars(expr1) + print(f"Number of unique variables in expression: {count}") + count = count_all_nodes(expr1) + print(f"Number of Node instances in expression: {count}") + +# if __name__ == "__main__": +# # Test cases +# expr1 = "(($close - TS_MIN($low, 14)) / (TS_MAX($high, 14) - TS_MIN($low, 14) + 1e-8)) * 100" +# expr2 = "(TS_MAX($high, 14) - TS_MIN($low, 14)) * STD($close, 20) / MEAN($volume, 10)" +# match = compare_expressions(expr1, expr2) +# factor_df = pd.read_csv("factor_zoo/alpha101.csv", index_col=None) + + +# max_size = 0 +# matched_subtree = None +# matched_alpha = None +# for index, (name, alpha_expr) in factor_df.iterrows(): +# try: +# match = compare_expressions(expr1, alpha_expr) +# if match is not None and match.size > max_size: +# max_size = match.size +# matched_subtree = match.root1 +# matched_alpha = alpha_expr +# except Exception as e: +# print(f"Error comparing alpha \"{alpha_expr}\": \n {e}") + + + +# print(max_size) +# print(matched_subtree) +# print(matched_alpha) \ No newline at end of file diff --git a/alphaagent/components/coder/factor_coder/function_lib.py b/alphaagent/components/coder/factor_coder/function_lib.py new file mode 100755 index 00000000..9587aaf6 --- /dev/null +++ b/alphaagent/components/coder/factor_coder/function_lib.py @@ -0,0 +1,982 @@ +import numpy as np +import pandas as pd +from joblib import Parallel, delayed + + +def datatype_adapter(func): + def wrapper(*args): + # 对于单个输入,若是np array,则转成df + if len(args) == 1 and isinstance(args[0], np.ndarray): + # 转换NumPy数组到DataFrame + new_args = (pd.DataFrame(args[0]),) + # 执行函数并转回NumPy数组 + result = func(*new_args) + return result + # 对于单个输入,若是float,则转成df再转回float + if len(args) == 1 and isinstance(args[0], (float, int)): + new_args = (pd.DataFrame([args[0]]),) + result = func(*new_args) + return float(result.iloc[0]) + # 对于典型输入,func(df, p) or func(df) + if (len(args) == 2 and isinstance(args[0], np.ndarray) and not isinstance(args[1], np.ndarray)): + # 转换NumPy数组到DataFrame + new_args = (pd.DataFrame(args[0]), args[1]) + # 执行函数并转回NumPy数组 + result = func(*new_args) + elif (len(args) == 2 and isinstance(args[1], np.ndarray) and not isinstance(args[0], np.ndarray)): + # 转换NumPy数组到DataFrame + new_args = (args[0], pd.DataFrame(args[1])) + # 执行函数并转回NumPy数组 + result = func(*new_args) + else: + result = func(*args) + return result + + return wrapper + +@datatype_adapter +def DELTA(df:pd.DataFrame, p:int=1): + return df.groupby('instrument').transform(lambda x: x.diff(periods=p)) + +@datatype_adapter +def RANK(df:pd.DataFrame): + """计算横截面排序""" + return df.groupby('datetime').rank(pct=True) + +@datatype_adapter +def MEAN(df:pd.DataFrame): + """计算横截面平均值""" + return df.groupby('datetime').mean() + +@datatype_adapter +def STD(df:pd.DataFrame): + """计算横截面标准差""" + return df.groupby('datetime').std() + +@datatype_adapter +def SKEW(df:pd.DataFrame): + """计算横截面偏度""" + return df.groupby('datetime').skew() + +@datatype_adapter +def KURT(df:pd.DataFrame): + """计算横截面峰度""" + return df.groupby('datetime').kurt() + +@datatype_adapter +def MAX(df:pd.DataFrame): + """计算横截面最大值""" + return df.groupby('datetime').max() + +@datatype_adapter +def MIN(df:pd.DataFrame): + """计算横截面最小值""" + return df.groupby('datetime').min() + +@datatype_adapter +def MEDIAN(df:pd.DataFrame): + """计算横截面中位数""" + return df.groupby('datetime').median() + + +@datatype_adapter +def TS_RANK(df:pd.DataFrame, p:int=5): + """计算时间序列的百分比排名""" + return df.groupby('instrument').transform(lambda x: x.rolling(p, min_periods=1).rank(pct=True)) + +@datatype_adapter +def TS_MAX(df:pd.DataFrame, p:int=5): + """计算时间序列的最大值""" + return df.groupby('instrument').transform(lambda x: x.rolling(p, min_periods=1).max()) + +@datatype_adapter +def TS_MIN(df:pd.DataFrame, p:int=5): + """计算时间序列的最小值""" + return df.groupby('instrument').transform(lambda x: x.rolling(p, min_periods=1).min()) + +@datatype_adapter +def TS_MEAN(df:pd.DataFrame, p:int=5): + """计算时间序列的平均值""" + return df.groupby('instrument').transform(lambda x: x.rolling(p, min_periods=1).mean()) + +@datatype_adapter +def TS_MEDIAN(df:pd.DataFrame, p:int=5): + """计算时间序列的中位数""" + return df.groupby('instrument').transform(lambda x: x.rolling(p, min_periods=1).median()) + +@datatype_adapter +def PERCENTILE(df: pd.DataFrame, q: float, p: int = None): + """ + 计算给定数据的分位数。 + + 参数: + df (pd.DataFrame): 输入数据,可以是 DataFrame 或 NumPy 数组。 + q (float): 分位数,范围在 [0, 1] 之间。 + p (int): 滚动窗口大小,如果提供,则计算滚动分位数。 + + 返回: + pd.DataFrame: 包含分位数的 DataFrame。 + """ + assert 0 <= q <= 1, "分位数 q 必须在 [0, 1] 之间" + + if p is not None: + # 如果有滚动窗口大小,计算滚动分位数 + return df.groupby('instrument').transform(lambda x: x.rolling(p, min_periods=1).quantile(q)) + else: + # 如果没有滚动窗口大小,直接计算分位数 + return df.groupby('instrument').transform(lambda x: x.quantile(q)) + + + +@datatype_adapter +def TS_SUM(df:pd.DataFrame, p:int=5): + """计算时间序列的累加和""" + return df.groupby('instrument').transform(lambda x: x.rolling(p, min_periods=1).sum()) + + +@datatype_adapter +def TS_ARGMAX(df: pd.DataFrame, p: int = 5): + """计算过去p天内最大值出现的位置距今天数""" + def rolling_argmax(window): + return len(window) - window.argmax() - 1 + return df.groupby('instrument').transform(lambda x: x.rolling(p, min_periods=1).apply(rolling_argmax, raw=True)) + +@datatype_adapter +def TS_ARGMIN(df: pd.DataFrame, p: int = 5): + """计算过去p天内最小值出现的位置距今天数""" + def rolling_argmin(window): + return len(window) - window.argmin() - 1 + return df.groupby('instrument').transform(lambda x: x.rolling(p, min_periods=1).apply(rolling_argmin, raw=True)) + + + +def MAX(x:pd.DataFrame, y:pd.DataFrame, z:pd.DataFrame=None): + """计算多个DataFrame之间的最大值""" + if z is None: + return np.maximum(x, y) + else: + return np.maximum(np.maximum(x, y), z) + + + + +def MIN(x:pd.DataFrame, y:pd.DataFrame, z:pd.DataFrame=None): + """计算多个DataFrame之间的最小值""" + if z is None: + return np.minimum(x, y) + else: + return np.minimum(np.minimum(x, y), z) + + + +@datatype_adapter +def ABS(df:pd.DataFrame): + """计算DataFrame中每个元素的绝对值""" + return df.groupby('instrument').transform(lambda x: x.abs()) + +@datatype_adapter +def DELAY(df:pd.DataFrame, p:int=1): + """将数据延迟p个周期""" + assert p >= 0, ValueError("DELAY的时长不能小于0,否则将会造成数据窥测") + return df.groupby('instrument').transform(lambda x: x.shift(p)) + + +def TS_CORR(df1:pd.Series, df2: np.ndarray | pd.Series, p:int=5): + """计算两个序列的滚动相关性""" + if isinstance(df2, np.ndarray) and p != len(df2): + p = len(df2) + def corr(window): + x = window + y = df2[:len(window)] + # 计算均值 + mean_x = np.mean(x) + mean_y = np.mean(y) + + # 计算协方差和标准差 + cov = np.sum((x - mean_x) * (y - mean_y)) + std_x = np.sqrt(np.sum((x - mean_x) ** 2)) + std_y = np.sqrt(np.sum((y - mean_y) ** 2)) + + # 计算相关系数 + return cov / (std_x * std_y) + + return df1.groupby('instrument').transform(lambda x: x.rolling(p, min_periods=2).apply(corr, raw=True)) + else: + def rolling_corr(group, df2, p): + # 获取当前分组的 instrument + instrument = group.name + # 从 df2 中提取对应的 instrument 数据 + df2_group = df2.xs(instrument, level='instrument') + # 计算滚动相关性 + return group.rolling(p, min_periods=2).corr(df2_group) + + # 使用 groupby 和 apply 来计算每个 instrument 的滚动相关性 + result = df1.groupby('instrument').apply(lambda x: rolling_corr(x, df2, p)) + # 由于 apply 会改变索引结构,我们需要将其恢复为原始结构 + result = result.reset_index(level=0, drop=True).sort_index() + return result + + +def TS_COVARIANCE(df1:pd.DataFrame, df2:pd.DataFrame, p:int=5): + """计算两个序列的滚动协方差""" + if isinstance(df2, np.ndarray) and p != len(df2): + p = len(df2) + def cov(window): + return np.cov(window, df2[:len(window)]) + return df1.groupby('instrument').transform(lambda x: x.rolling(p, min_periods=2).apply(cov, raw=True)) + else: + def rolling_cov(group, df2, p): + # 获取当前分组的 instrument + instrument = group.name + # 从 df2 中提取对应的 instrument 数据 + df2_group = df2.xs(instrument, level='instrument') + # 计算滚动相关性 + return group.rolling(p, min_periods=2).cov(df2_group) + + # 使用 groupby 和 apply 来计算每个 instrument 的滚动相关性 + result = df1.groupby('instrument').apply(lambda x: rolling_cov(x, df2, p)) + # 由于 apply 会改变索引结构,我们需要将其恢复为原始结构 + result = result.reset_index(level=0, drop=True).sort_index() + return result + +@datatype_adapter +def TS_STD(df:pd.DataFrame, p:int=20): + """计算时间序列的滚动标准差(Standard Deviation)""" + return df.groupby('instrument').transform(lambda x: x.rolling(p, min_periods=1).std()) + + + + + +@datatype_adapter +def TS_VAR(df: pd.DataFrame, p: int = 5, ddof: int = 1): + """计算时间序列的滚动方差(Variance)""" + return df.groupby('instrument').transform( + lambda x: x.rolling(p, min_periods=1).var(ddof=ddof) + ) + +@datatype_adapter +def SIGN(df: pd.DataFrame): + """计算DataFrame中每个元素的符号""" + return np.sign(df) + +@datatype_adapter +def SMA(df:pd.DataFrame, m:float=None, n:float=None): + """ + 计算简单移动平均线(Simple Moving Average) + + 参数: + df (pd.DataFrame): 输入数据 + m (float, optional): 移动平均的周期数 + n (float, optional): 移动平均的权重 + Y_{i+1} = m/n*X_i + (1 - m/n)*Y_i + """ + + if isinstance(m, int) and m >= 1 and n is None: + return df.groupby('instrument').transform(lambda x: x.rolling(m, min_periods=1).mean()) + else: + return df.groupby('instrument').transform(lambda x: x.ewm(alpha=n/m).mean()) + +@datatype_adapter +def EMA(df:pd.DataFrame, p): + """ + 计算指数移动平均线(Exponential Moving Average) + + 参数: + df (pd.DataFrame): 输入数据 + p (int): 移动平均的周期数 + + 返回: + pd.DataFrame: 指数移动平均线结果 + """ + return df.groupby('instrument').transform(lambda x: x.ewm(span=int(p), min_periods=1).mean()) + +@datatype_adapter +def WMA(df:pd.DataFrame, p:int=20): + """ + 计算加权移动平均线(Weighted Moving Average) + + 参数: + df (pd.DataFrame): 输入数据 + p (int): 移动平均的周期数 + + 返回: + pd.DataFrame: 加权移动平均线结果 + """ + # 计算权重,最近的数据(i=0)有最大的权重 + weights = [0.9**i for i in range(p)][::-1] + def calculate_wma(window): + return (window * weights[:len(window)]).sum() / sum(weights[:len(window)]) + + # 应用权重计算滑动WMA + return df.groupby('instrument').transform(lambda x: x.rolling(window=p, min_periods=1).apply(calculate_wma, raw=True)) + +@datatype_adapter +def COUNT(cond:pd.DataFrame, p:int=20): + """ + 计算条件计数 + + 参数: + cond (pd.DataFrame): 条件数据 + p (int): 滚动窗口大小 + + 返回: + pd.DataFrame: 条件计数结果 + """ + return cond.groupby('instrument').transform(lambda x: x.rolling(p, min_periods=1).sum()) + +@datatype_adapter +def SUMIF(df:pd.DataFrame, p:int, cond:pd.DataFrame): + """ + 计算满足条件的序列的滚动和 + + 参数: + df (pd.DataFrame): 输入数据 + p (int): 滚动窗口大小 + cond (pd.DataFrame): 条件数据 + + 返回: + pd.DataFrame: 满足条件的序列的滚动和 + """ + return (df * cond).groupby('instrument').transform(lambda x: x.rolling(p, min_periods=1).sum()) + +@datatype_adapter +def FILTER(df:pd.DataFrame, cond:pd.DataFrame): + """ + 根据条件过滤序列,保留满足条件的元素,不满足条件的元素置为0 + + 参数: + df (pd.DataFrame): 输入数据 + cond (pd.DataFrame): 条件数据 + + 返回: + pd.DataFrame: 根据条件过滤后的序列 + """ + return df.mul(cond) + + +@datatype_adapter +def PROD(df:pd.DataFrame, p:int=5): + """ + 计算序列的滚动乘积 + + 参数: + df (pd.DataFrame): 输入数据 + p (int): 滚动窗口大小 + + 返回: + pd.DataFrame: 滚动乘积结果 + """ + + # 使用rolling方法创建一个滑动窗口,然后应用累乘 + if isinstance(p, int): + return df.groupby('instrument').transform(lambda x: x.rolling(p, min_periods=1).apply(lambda x: x.prod(), raw=True)) + else: + return df.mul(p) + +@datatype_adapter +def DECAYLINEAR(df:pd.DataFrame, p:int=5): + """ + 计算序列的线性衰减加权平均 + + 参数: + df (pd.DataFrame): 输入数据 + p (int): 滚动窗口大小 + + 返回: + pd.DataFrame: 线性衰减加权平均结果 + """ + assert isinstance(p, int), ValueError(f"DECAYLINEAR仅接收正整数参数n,接收到{type(p).__name__}") + decay_weights = np.arange(1, p+1, 1) + decay_weights = decay_weights / decay_weights.sum() + + def calculate_deycaylinear(window): + return (window * decay_weights[:len(window)]).sum() + + return df.groupby('instrument').transform(lambda x: x.rolling(p, min_periods=1).apply(calculate_deycaylinear, raw=True)) + +@datatype_adapter +def HIGHDAY(df:pd.DataFrame, p:int=5): + """ + 计算序列中最大值出现的位置距今天数 + + 参数: + df (pd.DataFrame): 输入数据 + p (int): 滚动窗口大小 + + 返回: + pd.DataFrame: 最大值出现的位置距今天数 + """ + assert isinstance(p, int), ValueError(f"HIGHDAY仅接收正整数参数n,接收到{type(p).__name__}") + def highday(window): + return len(window) - window.argmax(axis=0) + return df.groupby('instrument').transform(lambda x: x.rolling(p, min_periods=1).apply(highday, raw=True)) + +@datatype_adapter +def LOWDAY(df:pd.DataFrame, p:int=5): + """ + 计算序列中最小值出现的位置距今天数 + + 参数: + df (pd.DataFrame): 输入数据 + p (int): 滚动窗口大小 + + 返回: + pd.DataFrame: 最小值出现的位置距今天数 + """ + assert isinstance(p, int), ValueError(f"LOWDAY仅接收正整数参数n,接收到{type(p).__name__}") + def lowday(window): + return len(window) - window.argmin(axis=0) + return df.groupby('instrument').transform(lambda x: x.rolling(p, min_periods=1).apply(lowday, raw=True)) + + +def SEQUENCE(n): + """ + 生成一个从1到n的等差数列 + + 参数: + n (int): 数列的长度 + """ + assert isinstance(n, int), ValueError(f"SEQUENCE(n)仅接收正整数参数n,接收到{type(n).__name__}") + return np.linspace(1, n, n, dtype=np.float32) + +@datatype_adapter +def SUMAC(df:pd.DataFrame, p:int=10): + """ + 计算序列的滚动累加和 + + 参数: + df (pd.DataFrame): 输入数据 + p (int): 滚动窗口大小 + + 返回: + pd.DataFrame: 滚动累加和结果 + """ + assert isinstance(p, int), ValueError(f"SUMAC仅接收正整数参数n,接收到{type(p).__name__}") + return df.groupby('instrument').transform(lambda x: x.rolling(p, min_periods=1).sum()) + + + +def calculate_beta(y, x): + """计算回归系数(beta)""" + X = np.vstack([x, np.ones(len(x))]).T + beta, _ = np.linalg.lstsq(X, y, rcond=None)[0] + return beta + +def rolling_beta(df1_group, df2_group, p): + """对 df1 和 df2 的滚动窗口计算 beta""" + result = np.empty(len(df1_group)) + result[:] = np.nan # 初始化结果为 NaN + + # 滚动计算 beta + for i in range(p - 1, len(df1_group)): + window_y = df1_group.iloc[i - p + 1 : i + 1].values + window_x = df2_group.iloc[:p].values if df1_group.shape != df2_group.shape else df2_group.iloc[i - p + 1 : i + 1].values + result[i] = calculate_beta(window_y, window_x) + + # 返回与输入数据索引一致的 Series + return pd.Series(result, index=df1_group.index) + + +def REGBETA(df1: pd.DataFrame, df2: pd.DataFrame, p: int = 5, n_jobs: int = -1): + """ + 计算 df1 和 df2 的滚动回归系数(beta) + + 参数: + df1 (pd.DataFrame): 第一个 DataFrame,包含目标变量。 + df2 (pd.DataFrame): 第二个 DataFrame,包含解释变量。 + p (int): 滚动窗口大小。 + n_jobs (int): 并行计算的 CPU 核心数。 + + 返回: + pd.Series: 滚动回归系数结果。 + """ + assert not (isinstance(df2, np.ndarray) and isinstance(df1, np.ndarray)), "df1与df2不能同时是np.ndarray,至少有一个需要是dataframe,例如$close。" + if isinstance(df2, np.ndarray) or isinstance(df1, np.ndarray): + if isinstance(df1, np.ndarray): + df3 = df1 + df1 = df2 + df2 = df3 + p = min(len(df2), p) + df2 = pd.Series(df2) + # 填充缺失值 + df1 = df1.fillna(0) + + # 获取分组后的数据 + df1_groups = list(df1.groupby('instrument')) + df2 = pd.Series(df2[:p]) + + # 使用 joblib 进行并行计算 + results = Parallel(n_jobs=n_jobs)( + delayed(rolling_beta)(df1_group, df2, p) + for _, df1_group in df1_groups + ) + + # 将结果合并为一个 Series,并确保索引一致 + result = pd.concat(results) + result = result.sort_index() # 按索引排序 + return result + + else: + # 确保 df1 和 df2 的索引一致 + assert df1.index.equals(df2.index), "df1 和 df2 的索引必须对齐" + + # 填充缺失值 + df1 = df1.fillna(0) + df2 = df2.fillna(0) + + # 获取分组后的数据 + df1_groups = list(df1.groupby('instrument')) + df2_groups = list(df2.groupby('instrument')) + + # 确保分组顺序一致 + if len(df1_groups) != len(df2_groups): + raise ValueError("df1 和 df2 的分组数量不一致,请检查数据。") + + # 使用 joblib 进行并行计算 + results = Parallel(n_jobs=n_jobs)( + delayed(rolling_beta)(df1_group, df2_group, p) + for (_, df1_group), (_, df2_group) in zip(df1_groups, df2_groups) + ) + + # 将结果合并为一个 Series,并确保索引一致 + result = pd.concat(results) + result = result.sort_index() # 按索引排序 + return result + + + +def calculate_residuals(y, x): + """计算残差(实际值 - 预测值)""" + # 添加常数项以计算截距 + X = np.vstack([x, np.ones(len(x))]).T + # 使用最小二乘法计算回归系数 + beta, intercept = np.linalg.lstsq(X, y, rcond=None)[0] + # 计算预测值 + y_pred = beta * x + intercept + # 计算残差(实际值 - 预测值) + residuals = y - y_pred + return residuals[-1] # 返回最后一个残差值(滚动窗口的最新值) + +def rolling_residuals(df1_group, df2_group, p): + """对 df1 和 df2 的滚动窗口计算残差""" + result = np.empty(len(df1_group)) + result[:] = np.nan # 初始化结果为 NaN + + # 滚动计算残差 + for i in range(p - 1, len(df1_group)): + window_y = df1_group.iloc[i - p + 1 : i + 1].values + window_x = df2_group.iloc[:p].values if df1_group.shape != df2_group.shape else df2_group.iloc[i - p + 1 : i + 1].values + result[i] = calculate_residuals(window_y, window_x) + + # 返回与输入数据索引一致的 Series + return pd.Series(result, index=df1_group.index) + + +def REGRESI(df1: pd.DataFrame, df2: pd.DataFrame, p: int = 5, n_jobs: int = -1): + """ + 计算 df1 和 df2 的滚动残差 + + 参数: + df1 (pd.DataFrame): 第一个 DataFrame,包含目标变量。 + df2 (pd.DataFrame): 第二个 DataFrame,包含解释变量。 + p (int): 滚动窗口大小。 + n_jobs (int): 并行计算的 CPU 核心数。 + + 返回: + pd.Series: 滚动残差结果。 + """ + + assert not (isinstance(df2, np.ndarray) and isinstance(df1, np.ndarray)), "df1与df2不能同时是np.ndarray,至少有一个需要是dataframe,例如$close。" + if isinstance(df2, np.ndarray) or isinstance(df1, np.ndarray): + if isinstance(df1, np.ndarray): + df3 = df1 + df1 = df2 + df2 = df3 + p = min(len(df2), p) + # 填充缺失值 + df1 = df1.fillna(0) + df2 = pd.Series(df2[:p]) + + # 获取分组后的数据 + df1_groups = list(df1.groupby('instrument')) + + # 使用 joblib 进行并行计算 + results = Parallel(n_jobs=n_jobs)( + delayed(rolling_residuals)(df1_group, df2, p) + for _, df1_group in df1_groups + ) + + # 将结果合并为一个 Series,并确保索引一致 + result = pd.concat(results) + result = result.sort_index() # 按索引排序 + return result + + else: + # 确保 df1 和 df2 的索引一致 + assert df1.index.equals(df2.index), "df1 和 df2 的索引必须对齐" + + # 填充缺失值 + df1 = df1.fillna(0) + df2 = df2.fillna(0) + + # 获取分组后的数据 + df1_groups = list(df1.groupby('instrument')) + df2_groups = list(df2.groupby('instrument')) + + # 确保分组顺序一致 + if len(df1_groups) != len(df2_groups): + raise ValueError("df1 和 df2 的分组数量不一致,请检查数据。") + + # 使用 joblib 进行并行计算 + results = Parallel(n_jobs=n_jobs)( + delayed(rolling_residuals)(df1_group, df2_group, p) + for (_, df1_group), (_, df2_group) in zip(df1_groups, df2_groups) + ) + + # 将结果合并为一个 Series,并确保索引一致 + result = pd.concat(results) + result = result.sort_index() # 按索引排序 + return result + + +### 数学运算 +@datatype_adapter +def EXP(df:pd.DataFrame): + """ + 计算序列的指数值 + + 参数: + df (pd.DataFrame): 输入数据 + + 返回: + pd.DataFrame: 指数值结果 + """ + return df.apply(np.exp) + +@datatype_adapter +def SQRT(df: pd.DataFrame): + """计算序列的平方根""" + if isinstance(df, int): + return np.sqrt(df) + return df.apply(np.sqrt) + +@datatype_adapter +def LOG(df:pd.DataFrame): + """计算序列的自然对数""" + if isinstance(df, int): + return np.log(df) + return (df+1).apply(np.log) + +@datatype_adapter +def INV(df: pd.DataFrame): + """计算序列的倒数 (1/x)""" + return 1 / df + +@datatype_adapter +def POW(df:pd.DataFrame, n:int): + """计算序列的幂""" + return np.power(df, n) + +def FLOOR(df:pd.DataFrame): + """计算序列的向下取整""" + return df.apply(np.floor) + +@datatype_adapter +def TS_ZSCORE(df: pd.DataFrame, p:int=5): + assert isinstance(p, int), ValueError(f"TS_ZSCORE仅接收正整数参数n,接收到{type(p).__name__}") + # assert isinstance(df, pd.DataFrame), ValueError(f"TS_ZSCORE仅接收pd.DataFrame作为A的类型,接收到{type(df).__name__}") + return (df - df.groupby('instrument').transform(lambda x: x.rolling(p, min_periods=1).mean())) / df.groupby('instrument').transform(lambda x: x.rolling(p, min_periods=1).std()) + +@datatype_adapter +def ZSCORE(df): + # 在每个因子截面上计算平均值和标准差 + mean = df.groupby('datetime').mean() + std = df.groupby('datetime').std() + + # 计算z-score: (X - μ) / σ + zscore = (df - mean) / std + return zscore + +@datatype_adapter +def SCALE(df: pd.DataFrame, target_sum: float = 1.0): + """ + 将序列标准化使其绝对值之和等于target_sum + """ + # 计算当前绝对值之和 + abs_sum = ABS(df).groupby('datetime').sum() + # 进行缩放 + return df.multiply(target_sum).div(abs_sum, axis=0) + + +@datatype_adapter +def TS_MAD(df: pd.DataFrame, p: int = 5): + """ + 计算时间序列的滚动中位数绝对偏差(Median Absolute Deviation) + + MAD = median(|X_i - median(X)|) + + 参数: + df (pd.DataFrame): 输入数据 + p (int): 滚动窗口大小 + + 返回: + pd.DataFrame: 滚动MAD结果 + """ + def rolling_mad(window): + # 计算窗口内的中位数 + median_val = np.median(window) + # 计算每个值与中位数的绝对偏差 + abs_dev = np.abs(window - median_val) + # 返回这些偏差的中位数 + return np.median(abs_dev) + + return df.groupby('instrument').transform( + lambda x: x.rolling(p, min_periods=1).apply(rolling_mad, raw=True) + ) + + +@datatype_adapter +def TS_QUANTILE(df: pd.DataFrame, p: int = 5, q: float = 0.5): + """ + 计算时间序列的滚动分位数 + + 参数: + df (pd.DataFrame): 输入数据 + p (int): 滚动窗口大小 + q (float): 分位数,范围在[0, 1]之间 + + 返回: + pd.DataFrame: 滚动分位数结果 + """ + assert 0 <= q <= 1, "分位数 q 必须在 [0, 1] 之间" + return df.groupby('instrument').transform(lambda x: x.rolling(p, min_periods=1).quantile(q)) + +@datatype_adapter +def TS_PCTCHANGE(df: pd.DataFrame, p: int = 1): + """ + 计算时间序列的百分比变化 + + 参数: + df (pd.DataFrame): 输入数据 + p (int): 计算间隔,默认为1(相邻期) + + 返回: + pd.DataFrame: 百分比变化结果 + """ + return df.groupby('instrument').transform(lambda x: x.pct_change(periods=p).fillna(0)) + + +def ADD(df1, df2): + return np.add(df1, df2) + + +def SUBTRACT(df1, df2): + return np.subtract(df1, df2) + +def MULTIPLY(df1, df2): + return np.multiply(df1, df2) + +def DIVIDE(df1, df2): + return np.divide(df1, df2) + +def AND(df1, df2): + return np.bitwise_and(df1.astype(np.bool_), df2.astype(np.bool_)) + +def OR(df1, df2): + return np.bitwise_or(df1.astype(np.bool_), df2.astype(np.bool_)) + + + +def MACD(price_df, short_window=12, long_window=26): + """ + 计算MACD指标 + + 参数: + price_df: pd.DataFrame - 价格数据 + short_window: int - 短期EMA的窗口大小,默认为12 + long_window: int - 长期EMA的窗口大小,默认为26 + + 返回: + pd.DataFrame: MACD结果 + """ + # 计算短期EMA + short_ema = EMA(price_df, short_window) + + # 计算长期EMA + long_ema = EMA(price_df, long_window) + + # 计算MACD差值 + macd = short_ema - long_ema + return macd + + +def RSI(price_df, window=14): + """ + 计算相对强弱指数(RSI) + + 参数: + price_df: pd.DataFrame - 价格数据 + window: int - RSI的窗口大小,默认为14 + + 返回: + pd.DataFrame: RSI结果 + """ + # 计算价格变化 + price_change = DELTA(price_df, 1) + + # 分别计算上涨和下跌(使用向量化操作) + up = (price_change > 0) * price_change + down = (price_change < 0) * ABS(price_change) + + # 计算EMA + avg_up = EMA(up, window) + avg_down = EMA(down, window) + + # 计算RSI + rsi = 100 - (100 / (1 + (avg_up / avg_down))) + return rsi + + + + +def _calculate_rolling_mean(group_data): + """计算单个组的动态移动平均""" + price_group, window_group, group_name = group_data + result = pd.Series(index=price_group.index, dtype=float) + + for i in range(len(price_group)): + curr_window = int(window_group.iloc[i].values) + if curr_window < 1: + curr_window = 1 + if i < curr_window: + result.iloc[i] = price_group.iloc[:i+1].mean() + else: + result.iloc[i] = price_group.iloc[i-curr_window+1:i+1].mean() + + return group_name, result + +def _calculate_rolling_std(group_data): + """计算单个组的动态标准差""" + price_group, window_group, group_name = group_data + result = pd.Series(index=price_group.index, dtype=float) + + for i in range(len(price_group)): + curr_window = int(window_group.iloc[i].values) + if curr_window < 1: + curr_window = 1 + if i < curr_window: + result.iloc[i] = price_group.iloc[:i+1].std() + else: + result.iloc[i] = price_group.iloc[i-curr_window+1:i+1].std() + + return group_name, result + + + +@datatype_adapter +def BB_MIDDLE(price_df, window, n_jobs=-1): + """ + 计算布林带中轨,支持动态窗口大小和并行计算 + + 参数: + price_df: pd.DataFrame - 价格数据 + window: int 或 pd.DataFrame - 窗口大小 + n_jobs: int - 并行计算的作业数,默认为-1 + """ + if isinstance(window, (int, float)): + # 如果window是固定值,使用原来的逻辑 + return price_df.groupby('instrument').transform(lambda x: x.rolling(int(window), min_periods=1).mean()) + else: + window.index = price_df.index + # 准备并行计算的数据 + groups_data = [ + (price_group, + window.xs(group_name, level='instrument'), + group_name) + for group_name, price_group in price_df.groupby('instrument') + ] + + # 并行计算 + results = Parallel(n_jobs=n_jobs)( + delayed(_calculate_rolling_mean)(group_data) + for group_data in groups_data + ) + + # 合并结果 + final_result = pd.concat([result for _, result in sorted(results, key=lambda x: x[0])]) + return final_result + +@datatype_adapter +def BB_UPPER(price_df, window, n_jobs=-1): + """ + 计算布林带上轨,支持动态窗口大小和并行计算 + + 参数: + price_df: pd.DataFrame - 价格数据 + window: int 或 pd.DataFrame - 窗口大小 + n_jobs: int - 并行计算的作业数,默认为-1 + """ + + if isinstance(window, (int, float)): + # 固定窗口大小的标准差计算 + middle_band = BB_MIDDLE(price_df, window, n_jobs) + std = price_df.groupby('instrument').transform(lambda x: x.rolling(int(window), min_periods=1).std()) + else: + window.index = price_df.index + middle_band = BB_MIDDLE(price_df, window, n_jobs) + # 准备并行计算的数据 + groups_data = [ + (price_group, + window.xs(group_name, level='instrument'), + group_name) + for group_name, price_group in price_df.groupby('instrument') + ] + + # 并行计算标准差 + results = Parallel(n_jobs=n_jobs)( + delayed(_calculate_rolling_std)(group_data) + for group_data in groups_data + ) + + # 合并结果 + std = pd.concat([result for _, result in sorted(results, key=lambda x: x[0])]) + + return middle_band + std + +@datatype_adapter +def BB_LOWER(price_df, window, n_jobs=-1): + """ + 计算布林带下轨,支持动态窗口大小和并行计算 + + 参数: + price_df: pd.DataFrame - 价格数据 + window: int 或 pd.DataFrame - 窗口大小 + n_jobs: int - 并行计算的作业数,默认为-1 + """ + + if isinstance(window, (int, float)): + # 固定窗口大小的标准差计算 + middle_band = BB_MIDDLE(price_df, window, n_jobs) + std = price_df.groupby('instrument').transform(lambda x: x.rolling(int(window), min_periods=1).std()) + else: + window.index = price_df.index + middle_band = BB_MIDDLE(price_df, window, n_jobs) + # 准备并行计算的数据 + groups_data = [ + (price_group, + window.xs(group_name, level='instrument'), + group_name) + for group_name, price_group in price_df.groupby('instrument') + ] + + # 并行计算标准差 + results = Parallel(n_jobs=n_jobs)( + delayed(_calculate_rolling_std)(group_data) + for group_data in groups_data + ) + + # 合并结果 + std = pd.concat([result for _, result in sorted(results, key=lambda x: x[0])]) + + return middle_band - std diff --git a/alphaagent/components/coder/factor_coder/prompts.yaml b/alphaagent/components/coder/factor_coder/prompts.yaml new file mode 100755 index 00000000..d5bc8b9c --- /dev/null +++ b/alphaagent/components/coder/factor_coder/prompts.yaml @@ -0,0 +1,204 @@ + +evaluator_code_feedback_v1_system: |- + User is trying to implement some factors in the following scenario: + {{ scenario }} + User will provide you the information of the factor. + + Your job is to check whether user's code is align with the factor and the scenario. + The user will provide the source python code and the execution error message if execution failed. + The user might provide you the ground truth code for you to provide the critic. You should not leak the ground truth code to the user in any form but you can use it to provide the critic. + + User has also compared the factor values calculated by the user's code and the ground truth code. The user will provide you some analyze result comparing two output. You may find some error in the code which caused the difference between the two output. + + If the ground truth code is provided, your critic should only consider checking whether the user's code is align with the ground truth code since the ground truth is definitely correct. + If the ground truth code is not provided, your critic should consider checking whether the user's code is reasonable and correct. + + Notice that your critics are not for user to debug the code. They are sent to the coding agent to correct the code. So don't give any following items for the user to check like "Please check the code line XXX". + + You suggestion should not include any code, just some clear and short suggestions. Please point out very critical issues in your response, ignore non-important issues to avoid confusion. If no big issue found in the code, you can response "No critics found". + + You should provide the suggestion to each of your critic to help the user improve the code. Please response the critic in the following format. Here is an example structure for the output: + critic 1: The critic message to critic 1 + critic 2: The critic message to critic 2 + +evaluator_code_feedback_v1_user: |- + --------------Factor information:--------------- + {{ factor_information }} + --------------Python code:--------------- + {{ code }} + --------------Execution feedback:--------------- + {{ execution_feedback }} + {% if value_feedback is not none %} + --------------Factor value feedback:--------------- + {{ value_feedback }} + {% endif %} + {% if gt_code is not none %} + --------------Ground truth Python code:--------------- + {{ gt_code }} + {% endif %} + +evolving_strategy_factor_implementation_v1_system: |- + User is trying to implement some factors in the following scenario: + {{ scenario }} + Your code is expected to align the scenario in any form which means The user needs to get the exact factor values with your code as expected. + + To help you write the correct code, the user might provide multiple information that helps you write the correct code: + 1. The user might provide you the correct code to similar factors. Your should learn from these code to write the correct code. + 2. The user might provide you the failed former code and the corresponding feedback to the code. The feedback contains to the execution, the code and the factor value. You should analyze the feedback and try to correct the latest code. + 3. The user might provide you the suggestion to the latest fail code and some similar fail to correct pairs. Each pair contains the fail code with similar error and the corresponding corrected version code. You should learn from these suggestion to write the correct code. + + Your must write your code based on your former latest attempt below which consists of your former code and code feedback, you should read the former attempt carefully and must not modify the right part of your former code. + + {% if queried_former_failed_knowledge|length != 0 %} + --------------Your former latest attempt:--------------- + =====Code to the former implementation===== + {{ queried_former_failed_knowledge[-1].implementation.code }} + =====Feedback to the former implementation===== + {{ queried_former_failed_knowledge[-1].feedback }} + {% endif %} + + Please response the code in the following format, using `\n` to separate the code, without any other content. Here is an example structure for the JSON output: + { + "code": "The Python code as a string." + } + +evolving_strategy_factor_implementation_v2_user: |- + --------------Target factor information:--------------- + {{ factor_information_str }} + + {% if queried_similar_error_knowledge|length != 0 %} + {% if error_summary_critics is none %} + Recall your last failure, your implementation met some errors. + When doing other tasks, you met some similar errors but you finally solve them. Here are some examples: + {% for error_content, similar_error_knowledge in queried_similar_error_knowledge %} + --------------Factor information to similar error ({{error_content}}):--------------- + {{ similar_error_knowledge[0].target_task.get_task_information() }} + =====Code with similar error ({{error_content}}):===== + {{ similar_error_knowledge[0].implementation.code }} + =====Success code to former code with similar error ({{error_content}}):===== + {{ similar_error_knowledge[1].implementation.code }} + {% endfor %} + {% else %} + Recall your last failure, your implementation met some errors. + After reviewing some similar errors and their solutions, here are some suggestions for you to correct your code: + {{error_summary_critics}} + {% endif %} + {% endif %} + {% if queried_similar_successful_knowledge|length != 0 %} + Here are some success implements of similar component tasks, take them as references: + --------------Correct code to similar factors:--------------- + {% for similar_successful_knowledge in queried_similar_successful_knowledge %} + =====Factor {{loop.index}}:===== + {{ similar_successful_knowledge.target_task.get_task_information() }} + =====Code:===== + {{ similar_successful_knowledge.implementation.code }} + {% endfor %} + {% endif %} + {% if latest_attempt_to_latest_successful_execution is not none %} + You have tried to correct your former failed code but still met some errors. Here is the latest attempt to the latest successful execution, try not to get the same error to your new code: + =====Your latest attempt===== + {{ latest_attempt_to_latest_successful_execution.implementation.code }} + =====Feedback to your latest attempt===== + {{ latest_attempt_to_latest_successful_execution.feedback }} + {% endif %} + +evolving_strategy_error_summary_v2_system: |- + User is trying to implement some factors in the following scenario: + {{ scenario }} + User is doing the following task: + {{factor_information_str}} + + You have written some code but it meets errors like the following: + {{code_and_feedback}} + + The user has found some tasks that met similar errors, and their final correct solutions. + Please refer to these similar errors and their solutions, provide some clear, short and accurate critics that might help you solve the issues in your code. + + You suggestion should not include any code, just some clear and short suggestions. Please point out very critical issues in your response, ignore non-important issues to avoid confusion. If no big issue found in the code, you can response "No critics found". + + Please response the critic in the following format. Here is an example structure for the output: + critic 1: The critic message to critic 1 + critic 2: The critic message to critic 2 + +evolving_strategy_error_summary_v2_user: |- + {% if queried_similar_error_knowledge|length != 0 %} + {% for error_content, similar_error_knowledge in queried_similar_error_knowledge %} + --------------Factor information to similar error ({{error_content}}):--------------- + {{ similar_error_knowledge[0].target_task.get_task_information() }} + =====Code with similar error ({{error_content}}):===== + {{ similar_error_knowledge[0].implementation.code }} + =====Success code to former code with similar error ({{error_content}}):===== + {{ similar_error_knowledge[1].implementation.code }} + {% endfor %} + {% endif %} + + +select_implementable_factor_system: |- + User is trying to implement some factors in the following scenario: + {{ scenario }} + Your job is to help the user select the easiest-to-implement factors. Some factors may be difficult to implement due to a lack of information or excessive complexity. The user will provide the number of factors you should pick and information about the factors, including their descriptions, formulas, and variable explanations. + User will provide you the former attempt to implement the factor and the feedback to the implementation. You need to carefully review your previous attempts. Some factors have been repeatedly tried without success. You should consider discarding these factors. + Please analyze the difficulties of the each factors and provide the reason and response the indices of selected implementable factor in the json format. Here is an example structure for the JSON output: + { + "Analysis": "Analyze the difficulties of the each factors and provide the reason why the factor can be implemented or not." + "selected_factor": "The indices of selected factor index in the list, like [0, 2, 3].The length should be the number of factor left after filtering.", + } + +select_implementable_factor_user: |- + Number of factor you should pick: {{ factor_num }} + {% for factor_info in sub_tasks %} + =============Factor index:{{factor_info[0]}}:============= + =====Factor name:===== + {{ factor_info[1].factor_name }} + =====Factor description:===== + {{ factor_info[1].factor_description }} + =====Factor formulation:===== + {{ factor_info[1].factor_formulation }} + {% if factor_info[2]|length != 0 %} + --------------Your former attempt:--------------- + {% for former_attempt in factor_info[2] %} + =====Code to attempt {{ loop.index }}===== + {{ former_attempt.implementation.code }} + =====Feedback to attempt {{ loop.index }}===== + {{ former_attempt.feedback }} + {% endfor %} + {% endif %} + {% endfor %} + +evaluator_output_format_system: |- + User is trying to implement some factors in the following scenario: + {{ scenario }} + User will provide you the format of the output. Please help to check whether the output is align with the format. + Please respond in the JSON format. Here is an example structure for the JSON output: + { + "output_format_decision": true, + "output_format_feedback": "The output format is correct." + } + + +evaluator_final_decision_v1_system: |- + User is trying to implement some factors in the following scenario: + {{ scenario }} + User has finished evaluation and got some feedback from the evaluator. + The evaluator run the code and get the factor value dataframe and provide several feedback regarding user's code and code output. You should analyze the feedback and considering the scenario and factor description to give a final decision about the evaluation result. The final decision concludes whether the factor is implemented correctly and if not, detail feedback containing reason and suggestion if the final decision is False. + + The implementation final decision is considered in the following logic: + 1. If the value and the ground truth value are exactly the same under a small tolerance, the implementation is considered correct. + 2. If the value and the ground truth value have a high correlation on ic or rank ic, the implementation is considered correct. + 3. If no ground truth value is provided, the implementation is considered correct if the code executes successfully (assuming the data provided is correct). Any exceptions, including those actively raised, are considered faults of the code. Additionally, the code feedback must align with the scenario and factor description. + + Please response the critic in the json format, without any other content. Here is an example structure for the JSON output, please strictly follow the format: + { + "final_decision": true, + "final_feedback": "The final feedback message, A SINGLE LINE OF TEXT", + } + +evaluator_final_decision_v1_user: |- + --------------Factor information:--------------- + {{ factor_information }} + --------------Execution feedback:--------------- + {{ execution_feedback }} + --------------Code feedback:--------------- + {{ code_feedback }} + --------------Factor value feedback:--------------- + {{ value_feedback }} diff --git a/alphaagent/components/coder/factor_coder/prompts_alphaagent.yaml b/alphaagent/components/coder/factor_coder/prompts_alphaagent.yaml new file mode 100755 index 00000000..02d3ffb2 --- /dev/null +++ b/alphaagent/components/coder/factor_coder/prompts_alphaagent.yaml @@ -0,0 +1,456 @@ +evaluator_code_feedback_v1_system: |- + User is trying to implement some factors with expression in the following scenario: + {{ scenario }} + + **Only the following operations are allowed in expression:** + ### **Cross-sectional Functions** + - **RANK(A)**: Ranking of each element in the cross-sectional dimension of A. + - **ZSCORE(A)**: Z-score of each element in the cross-sectional dimension of A. + - **MEAN(A)**: Mean value of each element in the cross-sectional dimension of A. + - **STD(A)**: Standard deviation in the cross-sectional dimension of A. + - **SKEW(A)**: Skewness in the cross-sectional dimension of A. + - **KURT(A)**: Kurtosis in the cross-sectional dimension of A. + - **MAX(A)**: Maximum value in the cross-sectional dimension of A. + - **MIN(A)**: Minimum value in the cross-sectional dimension of A. + - **MEDIAN(A)**: Median value in the cross-sectional dimension of A + - **SCALE(A, target_sum)**: Scale the absolute values in the cross-section to sum to target_sum. + + ### **Time-Series Functions** + - **DELTA(A, n)**: Change in value of A over n periods. + - **DELAY(A, n)**: Value of A delayed by n periods. + - **TS_MEAN(A, n)**: Mean value of sequence A over the past n days. + - **TS_SUM(A, n)**: Sum of sequence A over the past n days. + - **TS_RANK(A, n)**: Time-series rank of the last value of A in the past n days. + - **TS_ZSCORE(A, n)**: Z-score for each sequence in A over the past n days. + - **TS_MEDIAN(A, n)**: Median value of sequence A over the past n days. + - **TS_PCTCHANGE(A, p)**: Percentage change in the value of sequence A over p periods. + - **TS_MIN(A, n)**: Minimum value of A in the past n days. + - **TS_MAX(A, n)**: Maximum value of A in the past n days. + - **TS_ARGMAX(A, n)**: The index (relative to the current time) of the maximum value of A over the past n days. + - **TS_ARGMIN(A, n)**: The index (relative to the current time) of the minimum value of A over the past n days. + - **TS_QUANTILE(A, p, q)**: Rolling quantile of sequence A over the past p periods, where q is the quantile value between 0 and 1. + - **TS_STD(A, n)**: Standard deviation of sequence A over the past n days. + - **TS_VAR(A, p)**: Rolling variance of sequence A over the past p periods. + - **TS_CORR(A, B, n)**: Correlation coefficient between sequences A and B over the past n days. + - **TS_COVARIANCE(A, B, n)**: Covariance between sequences A and B over the past n days. + - **TS_MAD(A, n)**: Rolling Median Absolute Deviation of sequence A over the past n days. + - **PERCENTILE(A, q, p)**: Quantile of sequence A, where q is the quantile value between 0 and 1. If p is provided, it calculates the rolling quantile over the past p periods. + - **HIGHDAY(A, n)**: Number of days since the highest value of A in the past n days. + - **LOWDAY(A, n)**: Number of days since the lowest value of A in the past n days. + - **SUMAC(A, n)**: Cumulative sum of A over the past n days. + + ### **Moving Averages and Smoothing Functions** + - **SMA(A, n, m)**: Simple moving average of A over n periods with modifier m. + - **WMA(A, n)**: Weighted moving average of A over n periods, with weights decreasing from 0.9 to 0.9^(n). + - **EMA(A, n)**: Exponential moving average of A over n periods, where the decay factor is 2/(n+1). + - **DECAYLINEAR(A, d)**: Linearly weighted moving average of A over d periods, with weights increasing from 1 to d. + + ### **Mathematical Operations** + - **PROD(A, n)**: Product of values in A over the past n days. Use `*` for general multiplication. + - **LOG(A)**: Natural logarithm of each element in A. + - **SQRT(A)**: Square root of each element in A. + - **POW(A, n)**: Raise each element in A to the power of n. + - **SIGN(A)**: Sign of each element in A, one of 1, 0, or -1. + - **EXP(A)**: Exponential of each element in A. + - **ABS(A)**: Absolute value of A. + - **MAX(A, B)**: Maximum value between A and B. + - **MIN(A, B)**: Minimum value between A and B. + - **INV(A)**: Reciprocal (1/x) of each element in sequence A. + - **FLOOR(A)**: Floor of each element in sequence A. + + ### **Conditional and Logical Functions** + - **COUNT(C, n)**: Count of samples satisfying condition C in the past n periods. Here, C is a logical expression, e.g., `$close > $open`. + - **SUMIF(A, n, C)**: Sum of A over the past n periods if condition C is met. Here, C is a logical expression. + - **FILTER(A, C)**: Filtering multi-column sequence A based on condition C. Here, C is presented in a logical expression form, with the same size as A. + - **(C1)&&(C2)**: Logical operation "and". Both C1 and C2 are logical expressions, such as A > B. + - **(C1)||(C2)**: Logical operation "or". Both C1 and C2 are logical expressions, such as A > B. + - **(C1)?(A):(B)**: Logical operation "If condition C1 holds, then A, otherwise B". C1 is a logical expression, such as A > B. + + ### **Regression and Residual Functions** + - **SEQUENCE(n)**: A single-column sequence of length n, ranging from 1 to integer n. `SEQUENCE()` should always be nested in `REGBETA()` or `REGRESI()` as argument B. + - **REGBETA(A, B, n)**: Regression coefficient of A on B using the past n samples, where A MUST be a multi-column sequence and B a single-column or multi-column sequence. + - **REGRESI(A, B, n)**: Residual of regression of A on B using the past n samples, where A MUST be a multi-column sequence and B a single-column or multi-column sequence. + + ### **Technical Indicators** + - **RSI(A, n)**: Relative Strength Index of sequence A over n periods. Measures momentum by comparing the magnitude of recent gains to recent losses. + - **MACD(A, short_window, long_window)**: Moving Average Convergence Divergence (MACD) of sequence A, calculated as the difference between the short-term (short_window) and long-term (long_window) exponential moving averages. + - **BB_MIDDLE(A, n)**: Middle Bollinger Band, calculated as the n-period simple moving average of sequence A. + - **BB_UPPER(A, n)**: Upper Bollinger Band, calculated as middle band plus two standard deviations of sequence A over n periods. + - **BB_LOWER(A, n)**: Lower Bollinger Band, calculated as middle band minus two standard deviations of sequence A over n periods. + + + + Note that: + - Only the variables provided in data (e.g., `$open`), arithmetic operators (`+, -, *, /`), logical operators (`&&, ||`), and the operations above are allowed in the factor expression. + - Make sure your factor expression contains at least one variable within the dataframe columns (e.g., $open), combined with registered operations above. Do NOT use any undeclared variable (e.g., `n`, `w_1`) and undefined symbols (e.g., `=`) in the expression. + - Pay attention to the distinction between operations with the TS prefix (e.g., `TS_STD()) and those without (e.g., `STD()`). + + + User will provide you the information of the factor. + + Your job is to check whether user's factor expression is align with the factor description and whether the factor can be correctly calculated. The factor expression was rendered into a python jinja2 template and then was executed. The user will provide the execution error message if execution failed. + + Your comments should examine whether the user's factor expression conveys a meaning similar to that of the factor description. Minor discrepancies between the factor formulation and the expression are acceptable. E.g., differences in window size or the implementation of non-core elements are OK. There's no need to nitpick. + + Notice that your comments are not for user to debug the expression. They are sent to the coding agent to correct the expression. So don't give any following items for the user to check like "Please check the code line XXX". + + You suggestion should not include any code, just some clear and short suggestions. Please point out very critical issues in your response, ignore non-important issues to avoid confusion. + + If there is no big issue found in the expression, you need to response "No comment found" without any other comment. + + You should provide the suggestion to each of your comment to help the user improve the expression. Please response the comment in the following format. Here is an example structure for the output: + comment 1: The comment message 1 + comment 2: The comment message 2 + +evaluator_code_feedback_v1_user: |- + --------------Factor information:--------------- + {{ factor_information }} + --------------Factor Expression in the Python template:--------------- + {{ code }} + --------------Execution feedback:--------------- + {{ execution_feedback }} + {% if value_feedback is not none %} + --------------Factor value feedback:--------------- + {{ value_feedback }} + {% endif %} + {% if gt_code is not none %} + {% endif %} + +evolving_strategy_factor_implementation_v1_system: |- + User is trying to implement some factors by writing factor expressions in the following scenario: + {{ scenario }} + + An executable factor expression is expected to align the factor descrption if it is possible to implement using available data and operations. + + To help you write the correct expressions, the user might provide multiple information that helps you write the correct expression: + 1. The user might provide you the correct expression to similar factors. Your should learn from these expression to write the correct expression. + 2. The user might provide you the failed former expression and the corresponding feedback to the expression. The feedback contains to the execution, the expression and the factor value. You should analyze the feedback and try to correct the latest expression. + 3. The user might provide you the suggestion to the latest fail expression and some similar fail to correct pairs. Each pair contains the fail expression with similar error and the corresponding corrected version expression. You should learn from these suggestion to write the correct expression. + 4. Other parts of the code snippet is a fixed jinja2 template. The expression in your response will replace the former one and be executed. + + + **Your task is to correct or rewrite your expression based on your former latest attempt below which consists of your former expression and code feedback.** + + **Only the following operations are allowed in expression:** + ### **Cross-sectional Functions** + - **RANK(A)**: Ranking of each element in the cross-sectional dimension of A. + - **ZSCORE(A)**: Z-score of each element in the cross-sectional dimension of A. + - **MEAN(A)**: Mean value of each element in the cross-sectional dimension of A. + - **STD(A)**: Standard deviation in the cross-sectional dimension of A. + - **SKEW(A)**: Skewness in the cross-sectional dimension of A. + - **KURT(A)**: Kurtosis in the cross-sectional dimension of A. + - **MAX(A)**: Maximum value in the cross-sectional dimension of A. + - **MIN(A)**: Minimum value in the cross-sectional dimension of A. + - **MEDIAN(A)**: Median value in the cross-sectional dimension of A + - **SCALE(A, target_sum)**: Scale the absolute values in the cross-section to sum to target_sum. + + ### **Time-Series Functions** + - **DELTA(A, n)**: Change in value of A over n periods. + - **DELAY(A, n)**: Value of A delayed by n periods. + - **TS_MEAN(A, n)**: Mean value of sequence A over the past n days. + - **TS_SUM(A, n)**: Sum of sequence A over the past n days. + - **TS_RANK(A, n)**: Time-series rank of the last value of A in the past n days. + - **TS_ZSCORE(A, n)**: Z-score for each sequence in A over the past n days. + - **TS_MEDIAN(A, n)**: Median value of sequence A over the past n days. + - **TS_PCTCHANGE(A, p)**: Percentage change in the value of sequence A over p periods. + - **TS_MIN(A, n)**: Minimum value of A in the past n days. + - **TS_MAX(A, n)**: Maximum value of A in the past n days. + - **TS_ARGMAX(A, n)**: The index (relative to the current time) of the maximum value of A over the past n days. + - **TS_ARGMIN(A, n)**: The index (relative to the current time) of the minimum value of A over the past n days. + - **TS_QUANTILE(A, p, q)**: Rolling quantile of sequence A over the past p periods, where q is the quantile value between 0 and 1. + - **TS_STD(A, n)**: Standard deviation of sequence A over the past n days. + - **TS_VAR(A, p)**: Rolling variance of sequence A over the past p periods. + - **TS_CORR(A, B, n)**: Correlation coefficient between sequences A and B over the past n days. + - **TS_COVARIANCE(A, B, n)**: Covariance between sequences A and B over the past n days. + - **TS_MAD(A, n)**: Rolling Median Absolute Deviation of sequence A over the past n days. + - **PERCENTILE(A, q, p)**: Quantile of sequence A, where q is the quantile value between 0 and 1. If p is provided, it calculates the rolling quantile over the past p periods. + - **HIGHDAY(A, n)**: Number of days since the highest value of A in the past n days. + - **LOWDAY(A, n)**: Number of days since the lowest value of A in the past n days. + - **SUMAC(A, n)**: Cumulative sum of A over the past n days. + + ### **Moving Averages and Smoothing Functions** + - **SMA(A, n, m)**: Simple moving average of A over n periods with modifier m. + - **WMA(A, n)**: Weighted moving average of A over n periods, with weights decreasing from 0.9 to 0.9^(n). + - **EMA(A, n)**: Exponential moving average of A over n periods, where the decay factor is 2/(n+1). + - **DECAYLINEAR(A, d)**: Linearly weighted moving average of A over d periods, with weights increasing from 1 to d. + + ### **Mathematical Operations** + - **PROD(A, n)**: Product of values in A over the past n days. Use `*` for general multiplication. + - **LOG(A)**: Natural logarithm of each element in A. + - **SQRT(A)**: Square root of each element in A. + - **POW(A, n)**: Raise each element in A to the power of n. + - **SIGN(A)**: Sign of each element in A, one of 1, 0, or -1. + - **EXP(A)**: Exponential of each element in A. + - **ABS(A)**: Absolute value of A. + - **MAX(A, B)**: Maximum value between A and B. + - **MIN(A, B)**: Minimum value between A and B. + - **INV(A)**: Reciprocal (1/x) of each element in sequence A. + - **FLOOR(A)**: Floor of each element in sequence A. + + ### **Conditional and Logical Functions** + - **COUNT(C, n)**: Count of samples satisfying condition C in the past n periods. Here, C is a logical expression, e.g., `$close > $open`. + - **SUMIF(A, n, C)**: Sum of A over the past n periods if condition C is met. Here, C is a logical expression. + - **FILTER(A, C)**: Filtering multi-column sequence A based on condition C. Here, C is presented in a logical expression form, with the same size as A. + - **(C1)&&(C2)**: Logical operation "and". Both C1 and C2 are logical expressions, such as A > B. + - **(C1)||(C2)**: Logical operation "or". Both C1 and C2 are logical expressions, such as A > B. + - **(C1)?(A):(B)**: Logical operation "If condition C1 holds, then A, otherwise B". C1 is a logical expression, such as A > B. + + ### **Regression and Residual Functions** + - **SEQUENCE(n)**: A single-column sequence of length n, ranging from 1 to integer n. `SEQUENCE()` should always be nested in `REGBETA()` or `REGRESI()` as argument B. + - **REGBETA(A, B, n)**: Regression coefficient of A on B using the past n samples, where A MUST be a multi-column sequence and B a single-column or multi-column sequence. + - **REGRESI(A, B, n)**: Residual of regression of A on B using the past n samples, where A MUST be a multi-column sequence and B a single-column or multi-column sequence. + + ### **Technical Indicators** + - **RSI(A, n)**: Relative Strength Index of sequence A over n periods. Measures momentum by comparing the magnitude of recent gains to recent losses. + - **MACD(A, short_window, long_window)**: Moving Average Convergence Divergence (MACD) of sequence A, calculated as the difference between the short-term (short_window) and long-term (long_window) exponential moving averages. + - **BB_MIDDLE(A, n)**: Middle Bollinger Band, calculated as the n-period simple moving average of sequence A. + - **BB_UPPER(A, n)**: Upper Bollinger Band, calculated as middle band plus two standard deviations of sequence A over n periods. + - **BB_LOWER(A, n)**: Lower Bollinger Band, calculated as middle band minus two standard deviations of sequence A over n periods. + + + + Note that: + - Only the variables provided in data (e.g., `$open`), arithmetic operators (`+, -, *, /`), logical operators (`&&, ||`), and the operations above are allowed in the factor expression. + - Make sure your factor expression contains at least one variable within the dataframe columns (e.g., $open), combined with registered operations above. Do NOT use any undeclared variable (e.g., `n`, `w_1`) and undefined symbols (e.g., `=`) in the expression. + - Pay attention to the distinction between operations with the TS prefix (e.g., TS_STD()) and those without (e.g., `STD()`). + + Please response the corrected expression in the following json format. Here is the structure for the JSON output: + { + "expr": "[CORRECTED_FACTOR_EXPRESSION]" + } + + + + +evolving_strategy_factor_implementation_v2_user: |- + --------------Target factor information:--------------- + {{ factor_information_str }} + + + {% if former_expression is not none %} + --------------Your former latest attempt:--------------- + =====Expression to the former implementation===== + {{ former_expression }} + + =====Feedback to the former implementation===== + {{ former_feedback }} + {% endif %} + + {% if queried_similar_error_knowledge|length != 0 %} + {% if error_summary_critics is none %} + Recall your last failure, your implementation met some errors. + When doing other tasks, you met some similar errors but you finally solve them. Here are some examples: + {% for error_content, similar_error_knowledge in queried_similar_error_knowledge %} + --------------Factor information to similar error ({{error_content}}):--------------- + {{ similar_error_knowledge[0].target_task.get_task_information() }} + =====Code with similar error ({{error_content}}):===== + {{ similar_error_knowledge[0].implementation.code }} + =====Success code to former code with similar error ({{error_content}}):===== + {{ similar_error_knowledge[1].implementation.code }} + {% endfor %} + {% else %} + Recall your last failure, your implementation met some errors. + After reviewing some similar errors and their solutions, here are some suggestions for you to correct your code: + {{error_summary_critics}} + {% endif %} + {% endif %} + + {% if similar_successful_factor_description is not none %} + Here are some success implements of similar component tasks, take them as references: + --------------Correct code to similar factors:--------------- + =====Factor Description:===== + {{ similar_successful_factor_description }} + =====Factor Expression:===== + {{ similar_successful_expression }} + {% endif %} + {% if latest_attempt_to_latest_successful_execution is not none %} + You have tried to correct your former failed expression but still met some errors. Here is the latest attempt to the latest successful execution, try not to get the same error to your new code: + =====Your latest attempt===== + {{ latest_attempt_to_latest_successful_execution.implementation.code }} + =====Feedback to your latest attempt===== + {{ latest_attempt_to_latest_successful_execution.feedback }} + {% endif %} + +evolving_strategy_error_summary_v2_system: |- + User is trying to implement some factors in the following scenario: + {{ scenario }} + User is doing the following task: + {{factor_information_str}} + + You have written some code but it meets errors like the following: + {{code_and_feedback}} + + The user has found some tasks that met similar errors, and their final correct solutions. + Please refer to these similar errors and their solutions, provide some clear, short and accurate critics that might help you solve the issues in your code. + + You suggestion should not include any code, just some clear and short suggestions. Please point out very critical issues in your response, ignore non-important issues to avoid confusion. If no big issue found in the code, you can response "No critics found". + + Please response the critic in the following format. Here is an example structure for the output: + critic 1: The critic message to critic 1 + critic 2: The critic message to critic 2 + +evolving_strategy_error_summary_v2_user: |- + {% if queried_similar_error_knowledge|length != 0 %} + {% for error_content, similar_error_knowledge in queried_similar_error_knowledge %} + --------------Factor information to similar error ({{error_content}}):--------------- + {{ similar_error_knowledge[0].target_task.get_task_information() }} + =====Code with similar error ({{error_content}}):===== + {{ similar_error_knowledge[0].implementation.code }} + =====Success code to former code with similar error ({{error_content}}):===== + {{ similar_error_knowledge[1].implementation.code }} + {% endfor %} + {% endif %} + + +select_implementable_factor_system: |- + User is trying to implement some factors in the following scenario: + {{ scenario }} + Your job is to help the user select the easiest-to-implement factors. Some factors may be difficult to implement due to a lack of information or excessive complexity. The user will provide the number of factors you should pick and information about the factors, including their descriptions, formulas, and variable explanations. + User will provide you the former attempt to implement the factor and the feedback to the implementation. You need to carefully review your previous attempts. Some factors have been repeatedly tried without success. You should consider discarding these factors. + Please analyze the difficulties of the each factors and provide the reason and response the indices of selected implementable factor in the json format. Here is an example structure for the JSON output: + { + "Analysis": "Analyze the difficulties of the each factors and provide the reason why the factor can be implemented or not." + "selected_factor": "The indices of selected factor index in the list, like [0, 2, 3].The length should be the number of factor left after filtering.", + } + +select_implementable_factor_user: |- + Number of factor you should pick: {{ factor_num }} + {% for factor_info in sub_tasks %} + =============Factor index:{{factor_info[0]}}:============= + =====Factor name:===== + {{ factor_info[1].factor_name }} + =====Factor description:===== + {{ factor_info[1].factor_description }} + =====Factor formulation:===== + {{ factor_info[1].factor_formulation }} + {% if factor_info[2]|length != 0 %} + --------------Your former attempt:--------------- + {% for former_attempt in factor_info[2] %} + =====Code to attempt {{ loop.index }}===== + {{ former_attempt.implementation.code }} + =====Feedback to attempt {{ loop.index }}===== + {{ former_attempt.feedback }} + {% endfor %} + {% endif %} + {% endfor %} + +evaluator_output_format_system: |- + User is trying to implement some factors in the following scenario: + {{ scenario }} + User will provide you the format of the output. Please help to check whether the output is align with the format. + Please respond in the JSON format. Here is an example structure for the JSON output: + { + "output_format_decision": true, + "output_format_feedback": "The output format is correct." + } + + +evaluator_final_decision_v1_system: |- + User is trying to implement some factors in the following scenario: + {{ scenario }} + User has finished evaluation and got some feedback from the evaluator. + The evaluator run the code and get the factor value dataframe and provide several feedback regarding user's code and code output. You should analyze the feedback and considering the scenario and factor description to give a final decision about the evaluation result. The final decision concludes whether the factor is implemented correctly and if not, detail feedback containing reason and suggestion if the final decision is False. + + The implementation is considered correct if the code executes successfully (assuming the data provided is correct). Any exceptions, including those actively raised, are considered faults of the code. Additionally, the code feedback must align with the scenario and factor description. + + Please response the critic in the json format. Here is an example structure for the JSON output, please strictly follow the format: + { + "final_decision": true, + "final_feedback": "The final feedback message, A SINGLE LINE OF TEXT", + } + +evaluator_final_decision_v1_user: |- + --------------Factor information:--------------- + {{ factor_information }} + --------------Execution feedback:--------------- + {{ execution_feedback }} + --------------Code feedback:--------------- + {{ code_feedback }} + --------------Factor value feedback:--------------- + {{ value_feedback }} + + +function_lib_description: |- + (Do NOT use any undeclared variables (such as `n`) in your expression. Only the variables provided in data (e.g., `$open`), arithmetic operators (`+, -, *, /`), logical operators (`&&, ||`), and the followed operations are allowed in expression: + The followed operations are allowed in expression: + ### **Ranking and Normalization Functions** + - **RANK(A)**: Ranking of each element in the cross-sectional dimension of A. + - **ZSCORE(A)**: Z-score of each element in the cross-sectional dimension of A. + - **MEAN(A)**: Mean value of each element in the cross-sectional dimension of A. + - **STD(A)**: Standard deviation in the cross-sectional dimension of A. + - **SKEW(A)**: Skewness in the cross-sectional dimension of A. + - **KURT(A)**: Kurtosis in the cross-sectional dimension of A. + - **MAX(A)**: Maximum value in the cross-sectional dimension of A. + - **MIN(A)**: Minimum value in the cross-sectional dimension of A. + - **MEDIAN(A)**: Median value in the cross-sectional dimension of A. + - **SCALE(A, target_sum)**: Scale the absolute values in the cross-section to sum to target_sum. + - **TS_RANK(A, n)**: Time-series rank of the last value of A in the past n days. + - **TS_ZSCORE(A, n)**: Z-score for each sequence in A over the past n days. + + ### **Statistical Functions** + - **TS_STD(A, n)**: Standard deviation of sequence A over the past n days. + - **TS_VAR(A, p)**: Rolling variance of sequence A over the past p periods. + - **TS_CORR(A, B, n)**: Correlation coefficient between sequences A and B over the past n days. + - **TS_COVARIANCE(A, B, n)**: Covariance between sequences A and B over the past n days. + - **PERCENTILE(A, q, p)**: Quantile of sequence A, where q is the quantile value between 0 and 1. If p is provided, it calculates the rolling quantile over the past p periods. + + ### **Time-Series Functions** + - **DELTA(A, n)**: Change in value of A over n periods. + - **DELAY(A, n)**: Value of A delayed by n periods. + - **TS_MEAN(A, n)**: Mean value of sequence A over the past n days. + - **TS_SUM(A, n)**: Sum of sequence A over the past n days. + - **TS_MEDIAN(A, n)**: Median value of sequence A over the past n days. + - **TS_MAD(A, n)**: Rolling Median Absolute Deviation of sequence A over the past n days. + - **TS_PCTCHANGE(A, p)**: Percentage change in the value of sequence A over p periods. + - **TS_MIN(A, n)**: Minimum value of A in the past n days. + - **TS_MAX(A, n)**: Maximum value of A in the past n days. + - **TS_ARGMAX(A, n)**: The index (relative to the current time) of the maximum value of A over the past n days. + - **TS_ARGMIN(A, n)**: The index (relative to the current time) of the minimum value of A over the past n days. + - **TS_QUANTILE(A, p, q)**: Rolling quantile of sequence A over the past p periods, where q is the quantile value between 0 and 1. + - **HIGHDAY(A, n)**: Number of days since the highest value of A in the past n days. + - **LOWDAY(A, n)**: Number of days since the lowest value of A in the past n days. + - **SUMAC(A, n)**: Cumulative sum of A over the past n days. + ### **Moving Averages and Smoothing Functions** + - **SMA(A, n, m)**: Simple moving average of A over n periods with modifier m. + - **WMA(A, n)**: Weighted moving average of A over n periods, with weights decreasing from 0.9 to 0.9^(n). + - **EMA(A, n)**: Exponential moving average of A over n periods, where the decay factor is 2/(n+1). + - **DECAYLINEAR(A, d)**: Linearly weighted moving average of A over d periods, with weights increasing from 1 to d. + + ### **Mathematical Operations** + - **PROD(A, n)**: Product of values in A over the past n days. Use `*` for general multiplication. + - **LOG(A)**: Natural logarithm of each element in A. + - **SQRT(A)**: Square root of each element in A. + - **POW(A, n)**: Raise each element in A to the power of n. + - **SIGN(A)**: Sign of each element in A, one of 1, 0, or -1. + - **EXP(A)**: Exponential of each element in A. + - **ABS(A)**: Absolute value of A. + - **MAX(A, B)**: Pair-wise maximum value between A and B. + - **MIN(A, B)**: Pair-wise minimum value between A and B. + - **INV(A)**: Reciprocal (1/x) of each element in sequence A. + - **ADD(A, B)**: Add A and B element-wise. + - **SUBTRACT(A, B)**: Subtract B from A element-wise. + - **MULTIPLY(A, B)**: Multiply A and B element-wise. + - **DIVIDE(A, B)**: Divide A by B element-wise. + - **AND(A, B)**: Logical AND operation between A and B. + - **OR(A, B)**: Logical OR operation between A and B. + + ### **Conditional and Logical Functions** + - **COUNT(C, n)**: Count of samples satisfying condition C in the past n periods. Here, C is a logical expression, e.g., `$close > $open`. + - **SUMIF(A, n, C)**: Sum of A over the past n periods if condition C is met. Here, C is a logical expression. + - **FILTER(A, C)**: Filtering multi-column sequence A based on condition C. Here, C is presented in a logical expression form, with the same size as A. + - **(C1)&&(C2)**: Logical operation "and". Both C1 and C2 are logical expressions, such as A > B. + - **(C1)||(C2)**: Logical operation "or". Both C1 and C2 are logical expressions, such as A > B. + - **(C1)?(A):(B)**: Logical operation "If condition C1 holds, then A, otherwise B". C1 is a logical expression, such as A > B. + + ### **Regression and Residual Functions** + - **SEQUENCE(n)**: A single-column sequence of length n, ranging from 1 to integer n. `SEQUENCE()` should always be nested in `REGBETA()` or `REGRESI()` as argument B. + - **REGBETA(A, B, n)**: Regression coefficient of A on B using the past n samples, where A MUST be a multi-column sequence and B a single-column or multi-column sequence. + - **REGRESI(A, B, n)**: Residual of regression of A on B using the past n samples, where A MUST be a multi-column sequence and B a single-column or multi-column sequence. + + ### **Technical Indicators** + - **RSI(A, n)**: Relative Strength Index of sequence A over n periods. Measures momentum by comparing the magnitude of recent gains to recent losses. + - **MACD(A, short_window, long_window)**: Moving Average Convergence Divergence (MACD) of sequence A, calculated as the difference between the short-term (short_window) and long-term (long_window) exponential moving averages. + - **BB_MIDDLE(A, n)**: Middle Bollinger Band, calculated as the n-period simple moving average of sequence A. + - **BB_UPPER(A, n)**: Upper Bollinger Band, calculated as middle band plus two standard deviations of sequence A over n periods. + - **BB_LOWER(A, n)**: Lower Bollinger Band, calculated as middle band minus two standard deviations of sequence A over n periods. + diff --git a/alphaagent/components/coder/factor_coder/template.jinjia2 b/alphaagent/components/coder/factor_coder/template.jinjia2 new file mode 100755 index 00000000..36215aae --- /dev/null +++ b/alphaagent/components/coder/factor_coder/template.jinjia2 @@ -0,0 +1,31 @@ + +import pandas as pd +import numpy as np +import os +from alphaagent.components.coder.factor_coder.expr_parser import parse_expression, parse_symbol +from alphaagent.components.coder.factor_coder.function_lib import * + + +def calculate_factor(expr: str, name: str): + # stock dataframe + df = pd.read_hdf('./daily_pv.h5', key='data') + + expr = parse_symbol(expr, df.columns) + expr = parse_expression(expr) + + # replace '$var' by 'df['var'] to extract var's actual values + for col in df.columns: + expr = expr.replace(col[1:], f"df[\'{col}\']") + + df[name] = eval(expr) + result = df[name].astype(np.float64) + + if os.path.exists('result.h5'): + os.remove('result.h5') + result.to_hdf('result.h5', key='data') + +if __name__ == '__main__': + # Input factor expression. Do NOT use the variable format like "df['$xxx']" in factor expressions. Instead, you should use "$xxx". + expr = "{{ expression }}" # Your output factor expression will be filled in here + name = "{{ factor_name }}" # Your output factor name will be filled in here + calculate_factor(expr, name) \ No newline at end of file diff --git a/alphaagent/components/coder/factor_coder/template_debug.jinjia2 b/alphaagent/components/coder/factor_coder/template_debug.jinjia2 new file mode 100755 index 00000000..14955e8d --- /dev/null +++ b/alphaagent/components/coder/factor_coder/template_debug.jinjia2 @@ -0,0 +1,38 @@ +import pandas as pd +import numpy as np +import os +from alphaagent.components.coder.factor_coder.expr_parser import parse_expression, parse_symbol +from alphaagent.components.coder.factor_coder.function_lib import * + + +def calculate_factor(expr: str, name: str): + # Stock DataFrame + # df.columns: ['$open', '$close', '$high', '$low', '$volume', '$amount', '$turn', '$pettm', '$pbmrq'] + df = pd.read_hdf('/home/tangziyi/RD-Agent/alphaagent/scenarios/qlib/experiment/factor_data_template/daily_pv_all.h5', key='data') + + print('expr: ', expr) + expr = parse_symbol(expr, df.columns) + print('expr: ', expr) + expr = parse_expression(expr) + + print('expr: ', expr) + # replace 'var_name' by 'df['$var_name'] + for col in df.columns: + expr = expr.replace(col[1:], f"df[\'{col}\']") + + print('expr: ', expr) + df[name] = eval(expr) + result = df[name].astype(np.float64) + print(result) + print(type(result)) + print(result.shape) + + if os.path.exists('result.h5'): + os.remove('result.h5') + result.to_hdf('result.h5', key='data') + +if __name__ == '__main__': + # Input factor expression. Do NOT use the variable format like "df['$xxx']" in factor expressions. Instead, you should use "$xxx". + expr = "{{ expression }}" + name = "{{ factor_name }}" + calculate_factor(expr, name) diff --git a/alphaagent/components/coder/factor_coder/test.py b/alphaagent/components/coder/factor_coder/test.py new file mode 100755 index 00000000..d5d9640b --- /dev/null +++ b/alphaagent/components/coder/factor_coder/test.py @@ -0,0 +1,16 @@ +from jinja2 import Template + +# Step 1: 读取模板内容 +with open('/home/tangziyi/RD-Agent/alphaagent/components/coder/factor_coder/template_debug.jinjia2', 'r') as f: + template_content = f.read() + +# Step 2: 渲染模板 +template = Template(template_content) +rendered_code = template.render( + expression="ZSCORE( (TS_STD($return,20) < TS_QUANTILE(TS_STD($return,20),60,0.3)) ? (1.5/(TS_STD($return,20)+1e-8)) : (1/(TS_STD($return,20)+1e-8)) )", # "DELAY($high + $low / 2, 5)", + factor_name="FACTOR_1" + ) + +# Step 3: 打印渲染后的代码 +print(rendered_code) +exec(rendered_code) \ No newline at end of file diff --git a/alphaagent/components/coder/model_coder/__init__.py b/alphaagent/components/coder/model_coder/__init__.py new file mode 100755 index 00000000..b1d3e8dd --- /dev/null +++ b/alphaagent/components/coder/model_coder/__init__.py @@ -0,0 +1,21 @@ +from alphaagent.components.coder.CoSTEER import CoSTEER +from alphaagent.components.coder.CoSTEER.config import CoSTEER_SETTINGS +from alphaagent.components.coder.CoSTEER.evaluators import CoSTEERMultiEvaluator +from alphaagent.components.coder.model_coder.evaluators import ModelCoSTEEREvaluator +from alphaagent.components.coder.model_coder.evolving_strategy import ( + ModelMultiProcessEvolvingStrategy, +) +from alphaagent.core.scenario import Scenario + + +class ModelCoSTEER(CoSTEER): + def __init__( + self, + scen: Scenario, + *args, + **kwargs, + ) -> None: + eva = CoSTEERMultiEvaluator(ModelCoSTEEREvaluator(scen=scen), scen=scen) + es = ModelMultiProcessEvolvingStrategy(scen=scen, settings=CoSTEER_SETTINGS) + + super().__init__(*args, settings=CoSTEER_SETTINGS, eva=eva, es=es, evolving_version=2, scen=scen, **kwargs) diff --git a/alphaagent/components/coder/model_coder/benchmark/eval.py b/alphaagent/components/coder/model_coder/benchmark/eval.py new file mode 100755 index 00000000..9646b9ca --- /dev/null +++ b/alphaagent/components/coder/model_coder/benchmark/eval.py @@ -0,0 +1,71 @@ +# TODO: inherent from the benchmark base class +import torch + +from alphaagent.components.coder.model_coder.model import ModelFBWorkspace + + +def get_data_conf(init_val): + # TODO: design this step in the workflow + in_dim = 1000 + in_channels = 128 + exec_config = {"model_eval_param_init": init_val} + node_feature = torch.randn(in_dim, in_channels) + edge_index = torch.randint(0, in_dim, (2, 2000)) + return (node_feature, edge_index), exec_config + + +class ModelImpValEval: + """ + Evaluate the similarity of the model structure by changing the input and observe the output. + + Assumption: + - If the model structure is similar, the output will change in similar way when we change the input. + + Challenge: + - The key difference between it and implementing models is that we have parameters in the layers (Model operators often have no parameters or are given parameters). + - we try to initialize the model param in similar value. So only the model structure is different. + + Comparing the correlation of following sequences + - modelA[init1](input1).hidden_out1, modelA[init1](input2).hidden_out1, ... + - modelB[init1](input1).hidden_out1, modelB[init1](input2).hidden_out1, ... + + For each hidden output, we can calculate a correlation. The average correlation will be the metrics. + """ + + def evaluate(self, gt: ModelFBWorkspace, gen: ModelFBWorkspace): + round_n = 10 + + eval_pairs: list[tuple] = [] + + # run different input value + for _ in range(round_n): + # run different model initial parameters. + for init_val in [-0.2, -0.1, 0.1, 0.2]: + _, gt_res = gt.execute(input_value=init_val, param_init_value=init_val) + _, res = gen.execute(input_value=init_val, param_init_value=init_val) + eval_pairs.append((res, gt_res)) + + # flat and concat the output + res_batch, gt_res_batch = [], [] + for res, gt_res in eval_pairs: + res_batch.append(res.reshape(-1)) + gt_res_batch.append(gt_res.reshape(-1)) + res_batch = torch.stack(res_batch) + gt_res_batch = torch.stack(gt_res_batch) + + res_batch = res_batch.detach().numpy() + gt_res_batch = gt_res_batch.detach().numpy() + + # pearson correlation of each hidden output + def norm(x): + return (x - x.mean(axis=0)) / x.std(axis=0) + + dim_corr = (norm(res_batch) * norm(gt_res_batch)).mean(axis=0) # the correlation of each hidden output + + # aggregate all the correlation + avr_corr = dim_corr.mean() + # FIXME: + # It is too high(e.g. 0.944) . + # Check if it is not a good evaluation!! + # Maybe all the same initial params will results in extreamly high correlation without regard to the model structure. + return avr_corr diff --git a/alphaagent/components/coder/model_coder/benchmark/gt_code/A-DGN.py b/alphaagent/components/coder/model_coder/benchmark/gt_code/A-DGN.py new file mode 100755 index 00000000..5c65bf12 --- /dev/null +++ b/alphaagent/components/coder/model_coder/benchmark/gt_code/A-DGN.py @@ -0,0 +1,134 @@ +import math +from typing import Any, Callable, Dict, Optional, Union + +import torch +from torch import Tensor +from torch.nn import Parameter +from torch_geometric.nn.conv import GCNConv, MessagePassing +from torch_geometric.nn.inits import zeros +from torch_geometric.nn.resolver import activation_resolver +from torch_geometric.typing import Adj + + +class AntiSymmetricConv(torch.nn.Module): + r"""The anti-symmetric graph convolutional operator from the + `"Anti-Symmetric DGN: a stable architecture for Deep Graph Networks" + `_ paper. + + .. math:: + \mathbf{x}^{\prime}_i = \mathbf{x}_i + \epsilon \cdot \sigma \left( + (\mathbf{W}-\mathbf{W}^T-\gamma \mathbf{I}) \mathbf{x}_i + + \Phi(\mathbf{X}, \mathcal{N}_i) + \mathbf{b}\right), + + where :math:`\Phi(\mathbf{X}, \mathcal{N}_i)` denotes a + :class:`~torch.nn.conv.MessagePassing` layer. + + Args: + in_channels (int): Size of each input sample. + phi (MessagePassing, optional): The message passing module + :math:`\Phi`. If set to :obj:`None`, will use a + :class:`~torch_geometric.nn.conv.GCNConv` layer as default. + (default: :obj:`None`) + num_iters (int, optional): The number of times the anti-symmetric deep + graph network operator is called. (default: :obj:`1`) + epsilon (float, optional): The discretization step size + :math:`\epsilon`. (default: :obj:`0.1`) + gamma (float, optional): The strength of the diffusion :math:`\gamma`. + It regulates the stability of the method. (default: :obj:`0.1`) + act (str, optional): The non-linear activation function :math:`\sigma`, + *e.g.*, :obj:`"tanh"` or :obj:`"relu"`. (default: :class:`"tanh"`) + act_kwargs (Dict[str, Any], optional): Arguments passed to the + respective activation function defined by :obj:`act`. + (default: :obj:`None`) + bias (bool, optional): If set to :obj:`False`, the layer will not learn + an additive bias. (default: :obj:`True`) + + Shapes: + - **input:** + node features :math:`(|\mathcal{V}|, F_{in})`, + edge indices :math:`(2, |\mathcal{E}|)`, + edge weights :math:`(|\mathcal{E}|)` *(optional)* + - **output:** node features :math:`(|\mathcal{V}|, F_{in})` + """ + + def __init__( + self, + in_channels: int, + phi: Optional[MessagePassing] = None, + num_iters: int = 1, + epsilon: float = 0.1, + gamma: float = 0.1, + act: Union[str, Callable, None] = "tanh", + act_kwargs: Optional[Dict[str, Any]] = None, + bias: bool = True, + ): + super().__init__() + + self.in_channels = in_channels + self.num_iters = num_iters + self.gamma = gamma + self.epsilon = epsilon + self.act = activation_resolver(act, **(act_kwargs or {})) + + if phi is None: + phi = GCNConv(in_channels, in_channels, bias=False) + + self.W = Parameter(torch.empty(in_channels, in_channels)) + self.register_buffer("eye", torch.eye(in_channels)) + self.phi = phi + + if bias: + self.bias = Parameter(torch.empty(in_channels)) + else: + self.register_parameter("bias", None) + + self.reset_parameters() + + def reset_parameters(self): + r"""Resets all learnable parameters of the module.""" + torch.nn.init.kaiming_uniform_(self.W, a=math.sqrt(5)) + self.phi.reset_parameters() + zeros(self.bias) + + def forward(self, x: Tensor, edge_index: Adj, *args, **kwargs) -> Tensor: + r"""Runs the forward pass of the module.""" + antisymmetric_W = self.W - self.W.t() - self.gamma * self.eye + + for _ in range(self.num_iters): + h = self.phi(x, edge_index, *args, **kwargs) + h = x @ antisymmetric_W.t() + h + + if self.bias is not None: + h += self.bias + + if self.act is not None: + h = self.act(h) + + x = x + self.epsilon * h + + return x + + def __repr__(self) -> str: + return ( + f"{self.__class__.__name__}(" + f"{self.in_channels}, " + f"phi={self.phi}, " + f"num_iters={self.num_iters}, " + f"epsilon={self.epsilon}, " + f"gamma={self.gamma})" + ) + + +model_cls = AntiSymmetricConv + + +if __name__ == "__main__": + node_features = torch.load("node_features.pt") + edge_index = torch.load("edge_index.pt") + + # Model instantiation and forward pass + model = AntiSymmetricConv(in_channels=node_features.size(-1)) + output = model(node_features, edge_index) + + # Save output to a file + torch.save(output, "gt_output.pt") diff --git a/alphaagent/components/coder/model_coder/benchmark/gt_code/dirgnn.py b/alphaagent/components/coder/model_coder/benchmark/gt_code/dirgnn.py new file mode 100755 index 00000000..d22cb898 --- /dev/null +++ b/alphaagent/components/coder/model_coder/benchmark/gt_code/dirgnn.py @@ -0,0 +1,89 @@ +import copy + +import torch +from torch import Tensor +from torch_geometric.nn.conv import MessagePassing + + +class DirGNNConv(torch.nn.Module): + r"""A generic wrapper for computing graph convolution on directed + graphs as described in the `"Edge Directionality Improves Learning on + Heterophilic Graphs" `_ paper. + :class:`DirGNNConv` will pass messages both from source nodes to target + nodes and from target nodes to source nodes. + + Args: + conv (MessagePassing): The underlying + :class:`~torch_geometric.nn.conv.MessagePassing` layer to use. + alpha (float, optional): The alpha coefficient used to weight the + aggregations of in- and out-edges as part of a convex combination. + (default: :obj:`0.5`) + root_weight (bool, optional): If set to :obj:`True`, the layer will add + transformed root node features to the output. + (default: :obj:`True`) + """ + + def __init__( + self, + conv: MessagePassing, + alpha: float = 0.5, + root_weight: bool = True, + ): + super().__init__() + + self.alpha = alpha + self.root_weight = root_weight + + self.conv_in = copy.deepcopy(conv) + self.conv_out = copy.deepcopy(conv) + + if hasattr(conv, "add_self_loops"): + self.conv_in.add_self_loops = False + self.conv_out.add_self_loops = False + if hasattr(conv, "root_weight"): + self.conv_in.root_weight = False + self.conv_out.root_weight = False + + if root_weight: + self.lin = torch.nn.Linear(conv.in_channels, conv.out_channels) + else: + self.lin = None + + self.reset_parameters() + + def reset_parameters(self): + r"""Resets all learnable parameters of the module.""" + self.conv_in.reset_parameters() + self.conv_out.reset_parameters() + if self.lin is not None: + self.lin.reset_parameters() + + def forward(self, x: Tensor, edge_index: Tensor) -> Tensor: + """""" # noqa: D419 + x_in = self.conv_in(x, edge_index) + x_out = self.conv_out(x, edge_index.flip([0])) + + out = self.alpha * x_out + (1 - self.alpha) * x_in + + if self.root_weight: + out = out + self.lin(x) + + return out + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({self.conv_in}, alpha={self.alpha})" + + +model_cls = DirGNNConv + + +if __name__ == "__main__": + node_features = torch.load("node_features.pt") + edge_index = torch.load("edge_index.pt") + + # Model instantiation and forward pass + model = DirGNNConv(MessagePassing()) + output = model(node_features, edge_index) + + # Save output to a file + torch.save(output, "gt_output.pt") diff --git a/alphaagent/components/coder/model_coder/benchmark/gt_code/gpsconv.py b/alphaagent/components/coder/model_coder/benchmark/gt_code/gpsconv.py new file mode 100755 index 00000000..305b30e3 --- /dev/null +++ b/alphaagent/components/coder/model_coder/benchmark/gt_code/gpsconv.py @@ -0,0 +1,198 @@ +import inspect +from typing import Any, Dict, Optional + +import torch +import torch.nn.functional as F +from torch import Tensor +from torch.nn import Dropout, Linear, Sequential +from torch_geometric.nn.attention import PerformerAttention +from torch_geometric.nn.conv import MessagePassing +from torch_geometric.nn.inits import reset +from torch_geometric.nn.resolver import activation_resolver, normalization_resolver +from torch_geometric.typing import Adj +from torch_geometric.utils import to_dense_batch + + +class GPSConv(torch.nn.Module): + r"""The general, powerful, scalable (GPS) graph transformer layer from the + `"Recipe for a General, Powerful, Scalable Graph Transformer" + `_ paper. + + The GPS layer is based on a 3-part recipe: + + 1. Inclusion of positional (PE) and structural encodings (SE) to the input + features (done in a pre-processing step via + :class:`torch_geometric.transforms`). + 2. A local message passing layer (MPNN) that operates on the input graph. + 3. A global attention layer that operates on the entire graph. + + .. note:: + + For an example of using :class:`GPSConv`, see + `examples/graph_gps.py + `_. + + Args: + channels (int): Size of each input sample. + conv (MessagePassing, optional): The local message passing layer. + heads (int, optional): Number of multi-head-attentions. + (default: :obj:`1`) + dropout (float, optional): Dropout probability of intermediate + embeddings. (default: :obj:`0.`) + act (str or Callable, optional): The non-linear activation function to + use. (default: :obj:`"relu"`) + act_kwargs (Dict[str, Any], optional): Arguments passed to the + respective activation function defined by :obj:`act`. + (default: :obj:`None`) + norm (str or Callable, optional): The normalization function to + use. (default: :obj:`"batch_norm"`) + norm_kwargs (Dict[str, Any], optional): Arguments passed to the + respective normalization function defined by :obj:`norm`. + (default: :obj:`None`) + attn_type (str): Global attention type, :obj:`multihead` or + :obj:`performer`. (default: :obj:`multihead`) + attn_kwargs (Dict[str, Any], optional): Arguments passed to the + attention layer. (default: :obj:`None`) + """ + + def __init__( + self, + channels: int, + conv: Optional[MessagePassing], + heads: int = 1, + dropout: float = 0.0, + act: str = "relu", + act_kwargs: Optional[Dict[str, Any]] = None, + norm: Optional[str] = "batch_norm", + norm_kwargs: Optional[Dict[str, Any]] = None, + attn_type: str = "multihead", + attn_kwargs: Optional[Dict[str, Any]] = None, + ): + super().__init__() + + self.channels = channels + self.conv = conv + self.heads = heads + self.dropout = dropout + self.attn_type = attn_type + + attn_kwargs = attn_kwargs or {} + if attn_type == "multihead": + self.attn = torch.nn.MultiheadAttention( + channels, + heads, + batch_first=True, + **attn_kwargs, + ) + elif attn_type == "performer": + self.attn = PerformerAttention( + channels=channels, + heads=heads, + **attn_kwargs, + ) + else: + # TODO: Support BigBird + raise ValueError(f"{attn_type} is not supported") + + self.mlp = Sequential( + Linear(channels, channels * 2), + activation_resolver(act, **(act_kwargs or {})), + Dropout(dropout), + Linear(channels * 2, channels), + Dropout(dropout), + ) + + norm_kwargs = norm_kwargs or {} + self.norm1 = normalization_resolver(norm, channels, **norm_kwargs) + self.norm2 = normalization_resolver(norm, channels, **norm_kwargs) + self.norm3 = normalization_resolver(norm, channels, **norm_kwargs) + + self.norm_with_batch = False + if self.norm1 is not None: + signature = inspect.signature(self.norm1.forward) + self.norm_with_batch = "batch" in signature.parameters + + def reset_parameters(self): + r"""Resets all learnable parameters of the module.""" + if self.conv is not None: + self.conv.reset_parameters() + self.attn._reset_parameters() + reset(self.mlp) + if self.norm1 is not None: + self.norm1.reset_parameters() + if self.norm2 is not None: + self.norm2.reset_parameters() + if self.norm3 is not None: + self.norm3.reset_parameters() + + def forward( + self, + x: Tensor, + edge_index: Adj, + batch: Optional[torch.Tensor] = None, + **kwargs, + ) -> Tensor: + r"""Runs the forward pass of the module.""" + hs = [] + if self.conv is not None: # Local MPNN. + h = self.conv(x, edge_index, **kwargs) + h = F.dropout(h, p=self.dropout, training=self.training) + h = h + x + if self.norm1 is not None: + if self.norm_with_batch: + h = self.norm1(h, batch=batch) + else: + h = self.norm1(h) + hs.append(h) + + # Global attention transformer-style model. + h, mask = to_dense_batch(x, batch) + + if isinstance(self.attn, torch.nn.MultiheadAttention): + h, _ = self.attn(h, h, h, key_padding_mask=~mask, need_weights=False) + elif isinstance(self.attn, PerformerAttention): + h = self.attn(h, mask=mask) + + h = h[mask] + h = F.dropout(h, p=self.dropout, training=self.training) + h = h + x # Residual connection. + if self.norm2 is not None: + if self.norm_with_batch: + h = self.norm2(h, batch=batch) + else: + h = self.norm2(h) + hs.append(h) + + out = sum(hs) # Combine local and global outputs. + + out = out + self.mlp(out) + if self.norm3 is not None: + if self.norm_with_batch: + out = self.norm3(out, batch=batch) + else: + out = self.norm3(out) + + return out + + def __repr__(self) -> str: + return ( + f"{self.__class__.__name__}({self.channels}, " + f"conv={self.conv}, heads={self.heads}, " + f"attn_type={self.attn_type})" + ) + + +model_cls = GPSConv + + +if __name__ == "__main__": + node_features = torch.load("node_features.pt") + edge_index = torch.load("edge_index.pt") + + # Model instantiation and forward pass + model = GPSConv(channels=node_features.size(-1), conv=MessagePassing()) + output = model(node_features, edge_index) + + # Save output to a file + torch.save(output, "gt_output.pt") diff --git a/alphaagent/components/coder/model_coder/benchmark/gt_code/linkx.py b/alphaagent/components/coder/model_coder/benchmark/gt_code/linkx.py new file mode 100755 index 00000000..fc71e86d --- /dev/null +++ b/alphaagent/components/coder/model_coder/benchmark/gt_code/linkx.py @@ -0,0 +1,187 @@ +import math + +import torch +from torch import Tensor +from torch.nn import BatchNorm1d, Parameter +from torch_geometric.nn import inits +from torch_geometric.nn.conv import MessagePassing +from torch_geometric.nn.models import MLP +from torch_geometric.typing import Adj, OptTensor +from torch_geometric.utils import spmm + + +class SparseLinear(MessagePassing): + def __init__(self, in_channels: int, out_channels: int, bias: bool = True): + super().__init__(aggr="add") + self.in_channels = in_channels + self.out_channels = out_channels + + self.weight = Parameter(torch.empty(in_channels, out_channels)) + if bias: + self.bias = Parameter(torch.empty(out_channels)) + else: + self.register_parameter("bias", None) + + self.reset_parameters() + + def reset_parameters(self): + inits.kaiming_uniform(self.weight, fan=self.in_channels, a=math.sqrt(5)) + inits.uniform(self.in_channels, self.bias) + + def forward( + self, + edge_index: Adj, + edge_weight: OptTensor = None, + ) -> Tensor: + # propagate_type: (weight: Tensor, edge_weight: OptTensor) + out = self.propagate(edge_index, weight=self.weight, edge_weight=edge_weight) + + if self.bias is not None: + out = out + self.bias + + return out + + def message(self, weight_j: Tensor, edge_weight: OptTensor) -> Tensor: + if edge_weight is None: + return weight_j + else: + return edge_weight.view(-1, 1) * weight_j + + def message_and_aggregate(self, adj_t: Adj, weight: Tensor) -> Tensor: + return spmm(adj_t, weight, reduce=self.aggr) + + +class LINKX(torch.nn.Module): + r"""The LINKX model from the `"Large Scale Learning on Non-Homophilous + Graphs: New Benchmarks and Strong Simple Methods" + `_ paper. + + .. math:: + \mathbf{H}_{\mathbf{A}} &= \textrm{MLP}_{\mathbf{A}}(\mathbf{A}) + + \mathbf{H}_{\mathbf{X}} &= \textrm{MLP}_{\mathbf{X}}(\mathbf{X}) + + \mathbf{Y} &= \textrm{MLP}_{f} \left( \sigma \left( \mathbf{W} + [\mathbf{H}_{\mathbf{A}}, \mathbf{H}_{\mathbf{X}}] + + \mathbf{H}_{\mathbf{A}} + \mathbf{H}_{\mathbf{X}} \right) \right) + + .. note:: + + For an example of using LINKX, see `examples/linkx.py `_. + + Args: + num_nodes (int): The number of nodes in the graph. + in_channels (int): Size of each input sample, or :obj:`-1` to derive + the size from the first input(s) to the forward method. + hidden_channels (int): Size of each hidden sample. + out_channels (int): Size of each output sample. + num_layers (int): Number of layers of :math:`\textrm{MLP}_{f}`. + num_edge_layers (int, optional): Number of layers of + :math:`\textrm{MLP}_{\mathbf{A}}`. (default: :obj:`1`) + num_node_layers (int, optional): Number of layers of + :math:`\textrm{MLP}_{\mathbf{X}}`. (default: :obj:`1`) + dropout (float, optional): Dropout probability of each hidden + embedding. (default: :obj:`0.0`) + """ + + def __init__( + self, + num_nodes: int, + in_channels: int, + hidden_channels: int, + out_channels: int, + num_layers: int, + num_edge_layers: int = 1, + num_node_layers: int = 1, + dropout: float = 0.0, + ): + super().__init__() + + self.num_nodes = num_nodes + self.in_channels = in_channels + self.out_channels = out_channels + self.num_edge_layers = num_edge_layers + + self.edge_lin = SparseLinear(num_nodes, hidden_channels) + + if self.num_edge_layers > 1: + self.edge_norm = BatchNorm1d(hidden_channels) + channels = [hidden_channels] * num_edge_layers + self.edge_mlp = MLP(channels, dropout=0.0, act_first=True) + else: + self.edge_norm = None + self.edge_mlp = None + + channels = [in_channels] + [hidden_channels] * num_node_layers + self.node_mlp = MLP(channels, dropout=0.0, act_first=True) + + self.cat_lin1 = torch.nn.Linear(hidden_channels, hidden_channels) + self.cat_lin2 = torch.nn.Linear(hidden_channels, hidden_channels) + + channels = [hidden_channels] * num_layers + [out_channels] + self.final_mlp = MLP(channels, dropout=dropout, act_first=True) + + self.reset_parameters() + + def reset_parameters(self): + r"""Resets all learnable parameters of the module.""" + self.edge_lin.reset_parameters() + if self.edge_norm is not None: + self.edge_norm.reset_parameters() + if self.edge_mlp is not None: + self.edge_mlp.reset_parameters() + self.node_mlp.reset_parameters() + self.cat_lin1.reset_parameters() + self.cat_lin2.reset_parameters() + self.final_mlp.reset_parameters() + + def forward( + self, + x: OptTensor, + edge_index: Adj, + edge_weight: OptTensor = None, + ) -> Tensor: + """""" # noqa: D419 + out = self.edge_lin(edge_index, edge_weight) + + if self.edge_norm is not None and self.edge_mlp is not None: + out = out.relu_() + out = self.edge_norm(out) + out = self.edge_mlp(out) + + out = out + self.cat_lin1(out) + + if x is not None: + x = self.node_mlp(x) + out = out + x + out = out + self.cat_lin2(x) + + return self.final_mlp(out.relu_()) + + def __repr__(self) -> str: + return ( + f"{self.__class__.__name__}(num_nodes={self.num_nodes}, " + f"in_channels={self.in_channels}, " + f"out_channels={self.out_channels})" + ) + + +model_cls = LINKX + +if __name__ == "__main__": + node_features = torch.load("node_features.pt") + edge_index = torch.load("edge_index.pt") + + # Model instantiation and forward pass + model = LINKX( + num_nodes=node_features.size(0), + in_channels=node_features.size(1), + hidden_channels=node_features.size(1), + out_channels=node_features.size(1), + num_layers=1, + ) + output = model(node_features, edge_index) + + # Save output to a file + torch.save(output, "gt_output.pt") diff --git a/alphaagent/components/coder/model_coder/benchmark/gt_code/pmlp.py b/alphaagent/components/coder/model_coder/benchmark/gt_code/pmlp.py new file mode 100755 index 00000000..3dadd76d --- /dev/null +++ b/alphaagent/components/coder/model_coder/benchmark/gt_code/pmlp.py @@ -0,0 +1,118 @@ +from typing import Optional + +import torch +import torch.nn.functional as F +from torch import Tensor +from torch_geometric.nn import SimpleConv +from torch_geometric.nn.dense.linear import Linear + + +class PMLP(torch.nn.Module): + r"""The P(ropagational)MLP model from the `"Graph Neural Networks are + Inherently Good Generalizers: Insights by Bridging GNNs and MLPs" + `_ paper. + :class:`PMLP` is identical to a standard MLP during training, but then + adopts a GNN architecture during testing. + + Args: + in_channels (int): Size of each input sample. + hidden_channels (int): Size of each hidden sample. + out_channels (int): Size of each output sample. + num_layers (int): The number of layers. + dropout (float, optional): Dropout probability of each hidden + embedding. (default: :obj:`0.`) + norm (bool, optional): If set to :obj:`False`, will not apply batch + normalization. (default: :obj:`True`) + bias (bool, optional): If set to :obj:`False`, the module + will not learn additive biases. (default: :obj:`True`) + """ + + def __init__( + self, + in_channels: int, + hidden_channels: int, + out_channels: int, + num_layers: int, + dropout: float = 0.0, + norm: bool = True, + bias: bool = True, + ): + super().__init__() + + self.in_channels = in_channels + self.hidden_channels = hidden_channels + self.out_channels = out_channels + self.num_layers = num_layers + self.dropout = dropout + self.bias = bias + + self.lins = torch.nn.ModuleList() + self.lins.append(Linear(in_channels, hidden_channels, self.bias)) + for _ in range(self.num_layers - 2): + lin = Linear(hidden_channels, hidden_channels, self.bias) + self.lins.append(lin) + self.lins.append(Linear(hidden_channels, out_channels, self.bias)) + + self.norm = None + if norm: + self.norm = torch.nn.BatchNorm1d( + hidden_channels, + affine=False, + track_running_stats=False, + ) + + self.conv = SimpleConv(aggr="mean", combine_root="self_loop") + + self.reset_parameters() + + def reset_parameters(self): + r"""Resets all learnable parameters of the module.""" + for lin in self.lins: + torch.nn.init.xavier_uniform_(lin.weight, gain=1.414) + if self.bias: + torch.nn.init.zeros_(lin.bias) + + def forward( + self, + x: torch.Tensor, + edge_index: Optional[Tensor] = None, + ) -> torch.Tensor: + """""" # noqa: D419 + if not self.training and edge_index is None: + raise ValueError(f"'edge_index' needs to be present during " f"inference in '{self.__class__.__name__}'") + + for i in range(self.num_layers): + x = x @ self.lins[i].weight.t() + if not self.training: + x = self.conv(x, edge_index) + if self.bias: + x = x + self.lins[i].bias + if i != self.num_layers - 1: + if self.norm is not None: + x = self.norm(x) + x = x.relu() + x = F.dropout(x, p=self.dropout, training=self.training) + + return x + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({self.in_channels}, " f"{self.out_channels}, num_layers={self.num_layers})" + + +model_cls = PMLP + +if __name__ == "__main__": + node_features = torch.load("node_features.pt") + edge_index = torch.load("edge_index.pt") + + # Model instantiation and forward pass + model = PMLP( + in_channels=node_features.size(-1), + hidden_channels=node_features.size(-1), + out_channels=node_features.size(-1), + num_layers=1, + ) + output = model(node_features, edge_index) + + # Save output to a file + torch.save(output, "gt_output.pt") diff --git a/alphaagent/components/coder/model_coder/benchmark/gt_code/visnet.py b/alphaagent/components/coder/model_coder/benchmark/gt_code/visnet.py new file mode 100755 index 00000000..67cff208 --- /dev/null +++ b/alphaagent/components/coder/model_coder/benchmark/gt_code/visnet.py @@ -0,0 +1,1191 @@ +import math +from typing import Optional, Tuple + +import torch +from torch import Tensor +from torch.autograd import grad +from torch.nn import Embedding, LayerNorm, Linear, Parameter +from torch_geometric.nn import MessagePassing, radius_graph +from torch_geometric.utils import scatter + + +class CosineCutoff(torch.nn.Module): + r"""Appies a cosine cutoff to the input distances. + + .. math:: + \text{cutoffs} = + \begin{cases} + 0.5 * (\cos(\frac{\text{distances} * \pi}{\text{cutoff}}) + 1.0), + & \text{if } \text{distances} < \text{cutoff} \\ + 0, & \text{otherwise} + \end{cases} + + Args: + cutoff (float): A scalar that determines the point at which the cutoff + is applied. + """ + + def __init__(self, cutoff: float) -> None: + super().__init__() + self.cutoff = cutoff + + def forward(self, distances: Tensor) -> Tensor: + r"""Applies a cosine cutoff to the input distances. + + Args: + distances (torch.Tensor): A tensor of distances. + + Returns: + cutoffs (torch.Tensor): A tensor where the cosine function + has been applied to the distances, + but any values that exceed the cutoff are set to 0. + """ + cutoffs = 0.5 * ((distances * math.pi / self.cutoff).cos() + 1.0) + cutoffs = cutoffs * (distances < self.cutoff).float() + return cutoffs + + +class ExpNormalSmearing(torch.nn.Module): + r"""Applies exponential normal smearing to the input distances. + + .. math:: + \text{smeared\_dist} = \text{CosineCutoff}(\text{dist}) + * e^{-\beta * (e^{\alpha * (-\text{dist})} - \text{means})^2} + + Args: + cutoff (float, optional): A scalar that determines the point at which + the cutoff is applied. (default: :obj:`5.0`) + num_rbf (int, optional): The number of radial basis functions. + (default: :obj:`128`) + trainable (bool, optional): If set to :obj:`False`, the means and betas + of the RBFs will not be trained. (default: :obj:`True`) + """ + + def __init__( + self, + cutoff: float = 5.0, + num_rbf: int = 128, + trainable: bool = True, + ) -> None: + super().__init__() + self.cutoff = cutoff + self.num_rbf = num_rbf + self.trainable = trainable + + self.cutoff_fn = CosineCutoff(cutoff) + self.alpha = 5.0 / cutoff + + means, betas = self._initial_params() + if trainable: + self.register_parameter("means", Parameter(means)) + self.register_parameter("betas", Parameter(betas)) + else: + self.register_buffer("means", means) + self.register_buffer("betas", betas) + + def _initial_params(self) -> Tuple[Tensor, Tensor]: + r"""Initializes the means and betas for the radial basis functions.""" + start_value = torch.exp(torch.tensor(-self.cutoff)) + means = torch.linspace(start_value, 1, self.num_rbf) + betas = torch.tensor([(2 / self.num_rbf * (1 - start_value)) ** -2] * self.num_rbf) + return means, betas + + def reset_parameters(self): + r"""Resets the means and betas to their initial values.""" + means, betas = self._initial_params() + self.means.data.copy_(means) + self.betas.data.copy_(betas) + + def forward(self, dist: Tensor) -> Tensor: + r"""Applies the exponential normal smearing to the input distance. + + Args: + dist (torch.Tensor): A tensor of distances. + """ + dist = dist.unsqueeze(-1) + smeared_dist = self.cutoff_fn(dist) * (-self.betas * ((self.alpha * (-dist)).exp() - self.means) ** 2).exp() + return smeared_dist + + +class Sphere(torch.nn.Module): + r"""Computes spherical harmonics of the input data. + + This module computes the spherical harmonics up to a given degree + :obj:`lmax` for the input tensor of 3D vectors. + The vectors are assumed to be given in Cartesian coordinates. + See `here `_ + for mathematical details. + + Args: + lmax (int, optional): The maximum degree of the spherical harmonics. + (default: :obj:`2`) + """ + + def __init__(self, lmax: int = 2) -> None: + super().__init__() + self.lmax = lmax + + def forward(self, edge_vec: Tensor) -> Tensor: + r"""Computes the spherical harmonics of the input tensor. + + Args: + edge_vec (torch.Tensor): A tensor of 3D vectors. + """ + return self._spherical_harmonics( + self.lmax, + edge_vec[..., 0], + edge_vec[..., 1], + edge_vec[..., 2], + ) + + @staticmethod + def _spherical_harmonics( + lmax: int, + x: Tensor, + y: Tensor, + z: Tensor, + ) -> Tensor: + r"""Computes the spherical harmonics up to degree :obj:`lmax` of the + input vectors. + + Args: + lmax (int): The maximum degree of the spherical harmonics. + x (torch.Tensor): The x coordinates of the vectors. + y (torch.Tensor): The y coordinates of the vectors. + z (torch.Tensor): The z coordinates of the vectors. + """ + sh_1_0, sh_1_1, sh_1_2 = x, y, z + + if lmax == 1: + return torch.stack([sh_1_0, sh_1_1, sh_1_2], dim=-1) + + sh_2_0 = math.sqrt(3.0) * x * z + sh_2_1 = math.sqrt(3.0) * x * y + y2 = y.pow(2) + x2z2 = x.pow(2) + z.pow(2) + sh_2_2 = y2 - 0.5 * x2z2 + sh_2_3 = math.sqrt(3.0) * y * z + sh_2_4 = math.sqrt(3.0) / 2.0 * (z.pow(2) - x.pow(2)) + + if lmax == 2: + return torch.stack( + [ + sh_1_0, + sh_1_1, + sh_1_2, + sh_2_0, + sh_2_1, + sh_2_2, + sh_2_3, + sh_2_4, + ], + dim=-1, + ) + + raise ValueError(f"'lmax' needs to be 1 or 2 (got {lmax})") + + +class VecLayerNorm(torch.nn.Module): + r"""Applies layer normalization to the input data. + + This module applies a custom layer normalization to a tensor of vectors. + The normalization can either be :obj:`"max_min"` normalization, or no + normalization. + + Args: + hidden_channels (int): The number of hidden channels in the input. + trainable (bool): If set to :obj:`True`, the normalization weights are + trainable parameters. + norm_type (str, optional): The type of normalization to apply, one of + :obj:`"max_min"` or :obj:`None`. (default: :obj:`"max_min"`) + """ + + def __init__( + self, + hidden_channels: int, + trainable: bool, + norm_type: Optional[str] = "max_min", + ) -> None: + super().__init__() + + self.hidden_channels = hidden_channels + self.norm_type = norm_type + self.eps = 1e-12 + + weight = torch.ones(self.hidden_channels) + if trainable: + self.register_parameter("weight", Parameter(weight)) + else: + self.register_buffer("weight", weight) + + self.reset_parameters() + + def reset_parameters(self): + r"""Resets the normalization weights to their initial values.""" + torch.nn.init.ones_(self.weight) + + def max_min_norm(self, vec: Tensor) -> Tensor: + r"""Applies max-min normalization to the input tensor. + + .. math:: + \text{dist} = ||\text{vec}||_2 + \text{direct} = \frac{\text{vec}}{\text{dist}} + \text{max\_val} = \max(\text{dist}) + \text{min\_val} = \min(\text{dist}) + \text{delta} = \text{max\_val} - \text{min\_val} + \text{dist} = \frac{\text{dist} - \text{min\_val}}{\text{delta}} + \text{normed\_vec} = \max(0, \text{dist}) \cdot \text{direct} + + Args: + vec (torch.Tensor): The input tensor. + """ + dist = torch.norm(vec, dim=1, keepdim=True) + + if (dist == 0).all(): + return torch.zeros_like(vec) + + dist = dist.clamp(min=self.eps) + direct = vec / dist + + max_val, _ = dist.max(dim=-1) + min_val, _ = dist.min(dim=-1) + delta = (max_val - min_val).view(-1) + delta = torch.where(delta == 0, torch.ones_like(delta), delta) + dist = (dist - min_val.view(-1, 1, 1)) / delta.view(-1, 1, 1) + + return dist.relu() * direct + + def forward(self, vec: Tensor) -> Tensor: + r"""Applies the layer normalization to the input tensor. + + Args: + vec (torch.Tensor): The input tensor. + """ + if vec.size(1) == 3: + if self.norm_type == "max_min": + vec = self.max_min_norm(vec) + return vec * self.weight.unsqueeze(0).unsqueeze(0) + elif vec.size(1) == 8: + vec1, vec2 = torch.split(vec, [3, 5], dim=1) + if self.norm_type == "max_min": + vec1 = self.max_min_norm(vec1) + vec2 = self.max_min_norm(vec2) + vec = torch.cat([vec1, vec2], dim=1) + return vec * self.weight.unsqueeze(0).unsqueeze(0) + + raise ValueError(f"'{self.__class__.__name__}' only support 3 or 8 " f"channels (got {vec.size(1)})") + + +class Distance(torch.nn.Module): + r"""Computes the pairwise distances between atoms in a molecule. + + This module computes the pairwise distances between atoms in a molecule, + represented by their positions :obj:`pos`. + The distances are computed only between points that are within a certain + cutoff radius. + + Args: + cutoff (float): The cutoff radius beyond + which distances are not computed. + max_num_neighbors (int, optional): The maximum number of neighbors + considered for each point. (default: :obj:`32`) + add_self_loops (bool, optional): If set to :obj:`False`, will not + include self-loops. (default: :obj:`True`) + """ + + def __init__( + self, + cutoff: float, + max_num_neighbors: int = 32, + add_self_loops: bool = True, + ) -> None: + super().__init__() + self.cutoff = cutoff + self.max_num_neighbors = max_num_neighbors + self.add_self_loops = add_self_loops + + def forward( + self, + pos: Tensor, + batch: Tensor, + ) -> Tuple[Tensor, Tensor, Tensor]: + r"""Computes the pairwise distances between atoms in the molecule. + + Args: + pos (torch.Tensor): The positions of the atoms in the molecule. + batch (torch.Tensor): A batch vector, which assigns each node to a + specific example. + + Returns: + edge_index (torch.Tensor): The indices of the edges in the graph. + edge_weight (torch.Tensor): The distances between connected nodes. + edge_vec (torch.Tensor): The vector differences between connected + nodes. + """ + edge_index = radius_graph( + pos, + r=self.cutoff, + batch=batch, + loop=self.add_self_loops, + max_num_neighbors=self.max_num_neighbors, + ) + edge_vec = pos[edge_index[0]] - pos[edge_index[1]] + + if self.add_self_loops: + mask = edge_index[0] != edge_index[1] + edge_weight = torch.zeros(edge_vec.size(0), device=edge_vec.device) + edge_weight[mask] = torch.norm(edge_vec[mask], dim=-1) + else: + edge_weight = torch.norm(edge_vec, dim=-1) + + return edge_index, edge_weight, edge_vec + + +class NeighborEmbedding(MessagePassing): + r"""The :class:`NeighborEmbedding` module from the `"Enhancing Geometric + Representations for Molecules with Equivariant Vector-Scalar Interactive + Message Passing" `_ paper. + + Args: + hidden_channels (int): The number of hidden channels in the node + embeddings. + num_rbf (int): The number of radial basis functions. + cutoff (float): The cutoff distance. + max_z (int, optional): The maximum atomic numbers. + (default: :obj:`100`) + """ + + def __init__( + self, + hidden_channels: int, + num_rbf: int, + cutoff: float, + max_z: int = 100, + ) -> None: + super().__init__(aggr="add") + self.embedding = Embedding(max_z, hidden_channels) + self.distance_proj = Linear(num_rbf, hidden_channels) + self.combine = Linear(hidden_channels * 2, hidden_channels) + self.cutoff = CosineCutoff(cutoff) + + self.reset_parameters() + + def reset_parameters(self): + r"""Resets the parameters of the module.""" + self.embedding.reset_parameters() + torch.nn.init.xavier_uniform_(self.distance_proj.weight) + torch.nn.init.xavier_uniform_(self.combine.weight) + self.distance_proj.bias.data.zero_() + self.combine.bias.data.zero_() + + def forward( + self, + z: Tensor, + x: Tensor, + edge_index: Tensor, + edge_weight: Tensor, + edge_attr: Tensor, + ) -> Tensor: + r"""Computes the neighborhood embedding of the nodes in the graph. + + Args: + z (torch.Tensor): The atomic numbers. + x (torch.Tensor): The node features. + edge_index (torch.Tensor): The indices of the edges. + edge_weight (torch.Tensor): The weights of the edges. + edge_attr (torch.Tensor): The edge features. + + Returns: + x_neighbors (torch.Tensor): The neighborhood embeddings of the + nodes. + """ + mask = edge_index[0] != edge_index[1] + if not mask.all(): + edge_index = edge_index[:, mask] + edge_weight = edge_weight[mask] + edge_attr = edge_attr[mask] + + C = self.cutoff(edge_weight) + W = self.distance_proj(edge_attr) * C.view(-1, 1) + + x_neighbors = self.embedding(z) + x_neighbors = self.propagate(edge_index, x=x_neighbors, W=W) + x_neighbors = self.combine(torch.cat([x, x_neighbors], dim=1)) + return x_neighbors + + def message(self, x_j: Tensor, W: Tensor) -> Tensor: + return x_j * W + + +class EdgeEmbedding(torch.nn.Module): + r"""The :class:`EdgeEmbedding` module from the `"Enhancing Geometric + Representations for Molecules with Equivariant Vector-Scalar Interactive + Message Passing" `_ paper. + + Args: + num_rbf (int): The number of radial basis functions. + hidden_channels (int): The number of hidden channels in the node + embeddings. + """ + + def __init__(self, num_rbf: int, hidden_channels: int) -> None: + super().__init__() + self.edge_proj = Linear(num_rbf, hidden_channels) + self.reset_parameters() + + def reset_parameters(self): + r"""Resets the parameters of the module.""" + torch.nn.init.xavier_uniform_(self.edge_proj.weight) + self.edge_proj.bias.data.zero_() + + def forward( + self, + edge_index: Tensor, + edge_attr: Tensor, + x: Tensor, + ) -> Tensor: + r"""Computes the edge embeddings of the graph. + + Args: + edge_index (torch.Tensor): The indices of the edges. + edge_attr (torch.Tensor): The edge features. + x (torch.Tensor): The node features. + + Returns: + out_edge_attr (torch.Tensor): The edge embeddings. + """ + x_j = x[edge_index[0]] + x_i = x[edge_index[1]] + return (x_i + x_j) * self.edge_proj(edge_attr) + + +class ViS_MP(MessagePassing): + r"""The message passing module without vertex geometric features of the + equivariant vector-scalar interactive graph neural network (ViSNet) + from the `"Enhancing Geometric Representations for Molecules with + Equivariant Vector-Scalar Interactive Message Passing" + `_ paper. + + Args: + num_heads (int): The number of attention heads. + hidden_channels (int): The number of hidden channels in the node + embeddings. + cutoff (float): The cutoff distance. + vecnorm_type (str, optional): The type of normalization to apply to the + vectors. + trainable_vecnorm (bool): Whether the normalization weights are + trainable. + last_layer (bool, optional): Whether this is the last layer in the + model. (default: :obj:`False`) + """ + + def __init__( + self, + num_heads: int, + hidden_channels: int, + cutoff: float, + vecnorm_type: Optional[str], + trainable_vecnorm: bool, + last_layer: bool = False, + ) -> None: + super().__init__(aggr="add", node_dim=0) + + if hidden_channels % num_heads != 0: + raise ValueError( + f"The number of hidden channels (got {hidden_channels}) must " + f"be evenly divisible by the number of attention heads " + f"(got {num_heads})" + ) + + self.num_heads = num_heads + self.hidden_channels = hidden_channels + self.head_dim = hidden_channels // num_heads + self.last_layer = last_layer + + self.layernorm = LayerNorm(hidden_channels) + self.vec_layernorm = VecLayerNorm( + hidden_channels, + trainable=trainable_vecnorm, + norm_type=vecnorm_type, + ) + + self.act = torch.nn.SiLU() + self.attn_activation = torch.nn.SiLU() + + self.cutoff = CosineCutoff(cutoff) + + self.vec_proj = Linear(hidden_channels, hidden_channels * 3, False) + + self.q_proj = Linear(hidden_channels, hidden_channels) + self.k_proj = Linear(hidden_channels, hidden_channels) + self.v_proj = Linear(hidden_channels, hidden_channels) + self.dk_proj = Linear(hidden_channels, hidden_channels) + self.dv_proj = Linear(hidden_channels, hidden_channels) + + self.s_proj = Linear(hidden_channels, hidden_channels * 2) + if not self.last_layer: + self.f_proj = Linear(hidden_channels, hidden_channels) + self.w_src_proj = Linear(hidden_channels, hidden_channels, False) + self.w_trg_proj = Linear(hidden_channels, hidden_channels, False) + + self.o_proj = Linear(hidden_channels, hidden_channels * 3) + + self.reset_parameters() + + @staticmethod + def vector_rejection(vec: Tensor, d_ij: Tensor) -> Tensor: + r"""Computes the component of :obj:`vec` orthogonal to :obj:`d_ij`. + + Args: + vec (torch.Tensor): The input vector. + d_ij (torch.Tensor): The reference vector. + """ + vec_proj = (vec * d_ij.unsqueeze(2)).sum(dim=1, keepdim=True) + return vec - vec_proj * d_ij.unsqueeze(2) + + def reset_parameters(self): + r"""Resets the parameters of the module.""" + self.layernorm.reset_parameters() + self.vec_layernorm.reset_parameters() + torch.nn.init.xavier_uniform_(self.q_proj.weight) + self.q_proj.bias.data.zero_() + torch.nn.init.xavier_uniform_(self.k_proj.weight) + self.k_proj.bias.data.zero_() + torch.nn.init.xavier_uniform_(self.v_proj.weight) + self.v_proj.bias.data.zero_() + torch.nn.init.xavier_uniform_(self.o_proj.weight) + self.o_proj.bias.data.zero_() + torch.nn.init.xavier_uniform_(self.s_proj.weight) + self.s_proj.bias.data.zero_() + + if not self.last_layer: + torch.nn.init.xavier_uniform_(self.f_proj.weight) + self.f_proj.bias.data.zero_() + torch.nn.init.xavier_uniform_(self.w_src_proj.weight) + torch.nn.init.xavier_uniform_(self.w_trg_proj.weight) + + torch.nn.init.xavier_uniform_(self.vec_proj.weight) + torch.nn.init.xavier_uniform_(self.dk_proj.weight) + self.dk_proj.bias.data.zero_() + torch.nn.init.xavier_uniform_(self.dv_proj.weight) + self.dv_proj.bias.data.zero_() + + def forward( + self, + x: Tensor, + vec: Tensor, + edge_index: Tensor, + r_ij: Tensor, + f_ij: Tensor, + d_ij: Tensor, + ) -> Tuple[Tensor, Tensor, Optional[Tensor]]: + r"""Computes the residual scalar and vector features of the nodes and + scalar featues of the edges. + + Args: + x (torch.Tensor): The scalar features of the nodes. + vec (torch.Tensor):The vector features of the nodes. + edge_index (torch.Tensor): The indices of the edges. + r_ij (torch.Tensor): The distances between connected nodes. + f_ij (torch.Tensor): The scalar features of the edges. + d_ij (torch.Tensor): The unit vectors of the edges + + Returns: + dx (torch.Tensor): The residual scalar features of the nodes. + dvec (torch.Tensor): The residual vector features of the nodes. + df_ij (torch.Tensor, optional): The residual scalar features of the + edges, or :obj:`None` if this is the last layer. + """ + x = self.layernorm(x) + vec = self.vec_layernorm(vec) + + q = self.q_proj(x).reshape(-1, self.num_heads, self.head_dim) + k = self.k_proj(x).reshape(-1, self.num_heads, self.head_dim) + v = self.v_proj(x).reshape(-1, self.num_heads, self.head_dim) + dk = self.act(self.dk_proj(f_ij)) + dk = dk.reshape(-1, self.num_heads, self.head_dim) + dv = self.act(self.dv_proj(f_ij)) + dv = dv.reshape(-1, self.num_heads, self.head_dim) + + vec1, vec2, vec3 = torch.split(self.vec_proj(vec), self.hidden_channels, dim=-1) + vec_dot = (vec1 * vec2).sum(dim=1) + + x, vec_out = self.propagate(edge_index, q=q, k=k, v=v, dk=dk, dv=dv, vec=vec, r_ij=r_ij, d_ij=d_ij) + + o1, o2, o3 = torch.split(self.o_proj(x), self.hidden_channels, dim=1) + dx = vec_dot * o2 + o3 + dvec = vec3 * o1.unsqueeze(1) + vec_out + if not self.last_layer: + df_ij = self.edge_updater(edge_index, vec=vec, d_ij=d_ij, f_ij=f_ij) + return dx, dvec, df_ij + else: + return dx, dvec, None + + def message( + self, q_i: Tensor, k_j: Tensor, v_j: Tensor, vec_j: Tensor, dk: Tensor, dv: Tensor, r_ij: Tensor, d_ij: Tensor + ) -> Tuple[Tensor, Tensor]: + attn = (q_i * k_j * dk).sum(dim=-1) + attn = self.attn_activation(attn) * self.cutoff(r_ij).unsqueeze(1) + + v_j = v_j * dv + v_j = (v_j * attn.unsqueeze(2)).view(-1, self.hidden_channels) + + s1, s2 = torch.split(self.act(self.s_proj(v_j)), self.hidden_channels, dim=1) + vec_j = vec_j * s1.unsqueeze(1) + s2.unsqueeze(1) * d_ij.unsqueeze(2) + + return v_j, vec_j + + def edge_update(self, vec_i: Tensor, vec_j: Tensor, d_ij: Tensor, f_ij: Tensor) -> Tensor: + w1 = self.vector_rejection(self.w_trg_proj(vec_i), d_ij) + w2 = self.vector_rejection(self.w_src_proj(vec_j), -d_ij) + w_dot = (w1 * w2).sum(dim=1) + df_ij = self.act(self.f_proj(f_ij)) * w_dot + return df_ij + + def aggregate( + self, + features: Tuple[Tensor, Tensor], + index: Tensor, + ptr: Optional[torch.Tensor], + dim_size: Optional[int], + ) -> Tuple[Tensor, Tensor]: + x, vec = features + x = scatter(x, index, dim=self.node_dim, dim_size=dim_size) + vec = scatter(vec, index, dim=self.node_dim, dim_size=dim_size) + return x, vec + + +class ViS_MP_Vertex(ViS_MP): + r"""The message passing module with vertex geometric features of the + equivariant vector-scalar interactive graph neural network (ViSNet) + from the `"Enhancing Geometric Representations for Molecules with + Equivariant Vector-Scalar Interactive Message Passing" + `_ paper. + + Args: + num_heads (int): The number of attention heads. + hidden_channels (int): The number of hidden channels in the node + embeddings. + cutoff (float): The cutoff distance. + vecnorm_type (str, optional): The type of normalization to apply to the + vectors. + trainable_vecnorm (bool): Whether the normalization weights are + trainable. + last_layer (bool, optional): Whether this is the last layer in the + model. (default: :obj:`False`) + """ + + def __init__( + self, + num_heads: int, + hidden_channels: int, + cutoff: float, + vecnorm_type: Optional[str], + trainable_vecnorm: bool, + last_layer: bool = False, + ) -> None: + super().__init__(num_heads, hidden_channels, cutoff, vecnorm_type, trainable_vecnorm, last_layer) + + if not self.last_layer: + self.f_proj = Linear(hidden_channels, hidden_channels * 2) + self.t_src_proj = Linear(hidden_channels, hidden_channels, False) + self.t_trg_proj = Linear(hidden_channels, hidden_channels, False) + + self.reset_parameters() + + def reset_parameters(self): + r"""Resets the parameters of the module.""" + super().reset_parameters() + + if not self.last_layer: + if hasattr(self, "t_src_proj"): + torch.nn.init.xavier_uniform_(self.t_src_proj.weight) + if hasattr(self, "t_trg_proj"): + torch.nn.init.xavier_uniform_(self.t_trg_proj.weight) + + def edge_update(self, vec_i: Tensor, vec_j: Tensor, d_ij: Tensor, f_ij: Tensor) -> Tensor: + w1 = self.vector_rejection(self.w_trg_proj(vec_i), d_ij) + w2 = self.vector_rejection(self.w_src_proj(vec_j), -d_ij) + w_dot = (w1 * w2).sum(dim=1) + + t1 = self.vector_rejection(self.t_trg_proj(vec_i), d_ij) + t2 = self.vector_rejection(self.t_src_proj(vec_i), -d_ij) + t_dot = (t1 * t2).sum(dim=1) + + f1, f2 = torch.split(self.act(self.f_proj(f_ij)), self.hidden_channels, dim=-1) + + return f1 * w_dot + f2 * t_dot + + +class ViSNetBlock(torch.nn.Module): + r"""The representation module of the equivariant vector-scalar + interactive graph neural network (ViSNet) from the `"Enhancing Geometric + Representations for Molecules with Equivariant Vector-Scalar Interactive + Message Passing" `_ paper. + + Args: + lmax (int, optional): The maximum degree of the spherical harmonics. + (default: :obj:`1`) + vecnorm_type (str, optional): The type of normalization to apply to the + vectors. (default: :obj:`None`) + trainable_vecnorm (bool, optional): Whether the normalization weights + are trainable. (default: :obj:`False`) + num_heads (int, optional): The number of attention heads. + (default: :obj:`8`) + num_layers (int, optional): The number of layers in the network. + (default: :obj:`6`) + hidden_channels (int, optional): The number of hidden channels in the + node embeddings. (default: :obj:`128`) + num_rbf (int, optional): The number of radial basis functions. + (default: :obj:`32`) + trainable_rbf (bool, optional): Whether the radial basis function + parameters are trainable. (default: :obj:`False`) + max_z (int, optional): The maximum atomic numbers. + (default: :obj:`100`) + cutoff (float, optional): The cutoff distance. (default: :obj:`5.0`) + max_num_neighbors (int, optional): The maximum number of neighbors + considered for each atom. (default: :obj:`32`) + vertex (bool, optional): Whether to use vertex geometric features. + (default: :obj:`False`) + """ + + def __init__( + self, + lmax: int = 1, + vecnorm_type: Optional[str] = None, + trainable_vecnorm: bool = False, + num_heads: int = 8, + num_layers: int = 6, + hidden_channels: int = 128, + num_rbf: int = 32, + trainable_rbf: bool = False, + max_z: int = 100, + cutoff: float = 5.0, + max_num_neighbors: int = 32, + vertex: bool = False, + ) -> None: + super().__init__() + + self.lmax = lmax + self.vecnorm_type = vecnorm_type + self.trainable_vecnorm = trainable_vecnorm + self.num_heads = num_heads + self.num_layers = num_layers + self.hidden_channels = hidden_channels + self.num_rbf = num_rbf + self.trainable_rbf = trainable_rbf + self.max_z = max_z + self.cutoff = cutoff + self.max_num_neighbors = max_num_neighbors + + self.embedding = Embedding(max_z, hidden_channels) + self.distance = Distance(cutoff, max_num_neighbors=max_num_neighbors) + self.sphere = Sphere(lmax=lmax) + self.distance_expansion = ExpNormalSmearing(cutoff, num_rbf, trainable_rbf) + self.neighbor_embedding = NeighborEmbedding(hidden_channels, num_rbf, cutoff, max_z) + self.edge_embedding = EdgeEmbedding(num_rbf, hidden_channels) + + self.vis_mp_layers = torch.nn.ModuleList() + vis_mp_kwargs = dict( + num_heads=num_heads, + hidden_channels=hidden_channels, + cutoff=cutoff, + vecnorm_type=vecnorm_type, + trainable_vecnorm=trainable_vecnorm, + ) + vis_mp_class = ViS_MP if not vertex else ViS_MP_Vertex + for _ in range(num_layers - 1): + layer = vis_mp_class(last_layer=False, **vis_mp_kwargs) + self.vis_mp_layers.append(layer) + self.vis_mp_layers.append(vis_mp_class(last_layer=True, **vis_mp_kwargs)) + + self.out_norm = LayerNorm(hidden_channels) + self.vec_out_norm = VecLayerNorm( + hidden_channels, + trainable=trainable_vecnorm, + norm_type=vecnorm_type, + ) + + self.reset_parameters() + + def reset_parameters(self): + r"""Resets the parameters of the module.""" + self.embedding.reset_parameters() + self.distance_expansion.reset_parameters() + self.neighbor_embedding.reset_parameters() + self.edge_embedding.reset_parameters() + for layer in self.vis_mp_layers: + layer.reset_parameters() + self.out_norm.reset_parameters() + self.vec_out_norm.reset_parameters() + + def forward( + self, + z: Tensor, + pos: Tensor, + batch: Tensor, + ) -> Tuple[Tensor, Tensor]: + r"""Computes the scalar and vector features of the nodes. + + Args: + z (torch.Tensor): The atomic numbers. + pos (torch.Tensor): The coordinates of the atoms. + batch (torch.Tensor): A batch vector, which assigns each node to a + specific example. + + Returns: + x (torch.Tensor): The scalar features of the nodes. + vec (torch.Tensor): The vector features of the nodes. + """ + x = self.embedding(z) + edge_index, edge_weight, edge_vec = self.distance(pos, batch) + edge_attr = self.distance_expansion(edge_weight) + mask = edge_index[0] != edge_index[1] + edge_vec[mask] = edge_vec[mask] / torch.norm(edge_vec[mask], dim=1).unsqueeze(1) + edge_vec = self.sphere(edge_vec) + x = self.neighbor_embedding(z, x, edge_index, edge_weight, edge_attr) + vec = torch.zeros(x.size(0), ((self.lmax + 1) ** 2) - 1, x.size(1), dtype=x.dtype, device=x.device) + edge_attr = self.edge_embedding(edge_index, edge_attr, x) + + for attn in self.vis_mp_layers[:-1]: + dx, dvec, dedge_attr = attn(x, vec, edge_index, edge_weight, edge_attr, edge_vec) + x = x + dx + vec = vec + dvec + edge_attr = edge_attr + dedge_attr + + dx, dvec, _ = self.vis_mp_layers[-1](x, vec, edge_index, edge_weight, edge_attr, edge_vec) + x = x + dx + vec = vec + dvec + + x = self.out_norm(x) + vec = self.vec_out_norm(vec) + + return x, vec + + +class GatedEquivariantBlock(torch.nn.Module): + r"""Applies a gated equivariant operation to scalar features and vector + features from the `"Enhancing Geometric Representations for Molecules with + Equivariant Vector-Scalar Interactive Message Passing" + `_ paper. + + Args: + hidden_channels (int): The number of hidden channels in the node + embeddings. + out_channels (int): The number of output channels. + intermediate_channels (int, optional): The number of channels in the + intermediate layer, or :obj:`None` to use the same number as + :obj:`hidden_channels`. (default: :obj:`None`) + scalar_activation (bool, optional): Whether to apply a scalar + activation function to the output node features. + (default: obj:`False`) + """ + + def __init__( + self, + hidden_channels: int, + out_channels: int, + intermediate_channels: Optional[int] = None, + scalar_activation: bool = False, + ) -> None: + super().__init__() + self.out_channels = out_channels + + if intermediate_channels is None: + intermediate_channels = hidden_channels + + self.vec1_proj = Linear(hidden_channels, hidden_channels, bias=False) + self.vec2_proj = Linear(hidden_channels, out_channels, bias=False) + + self.update_net = torch.nn.Sequential( + Linear(hidden_channels * 2, intermediate_channels), + torch.nn.SiLU(), + Linear(intermediate_channels, out_channels * 2), + ) + + self.act = torch.nn.SiLU() if scalar_activation else None + + self.reset_parameters() + + def reset_parameters(self): + r"""Resets the parameters of the module.""" + torch.nn.init.xavier_uniform_(self.vec1_proj.weight) + torch.nn.init.xavier_uniform_(self.vec2_proj.weight) + torch.nn.init.xavier_uniform_(self.update_net[0].weight) + self.update_net[0].bias.data.zero_() + torch.nn.init.xavier_uniform_(self.update_net[2].weight) + self.update_net[2].bias.data.zero_() + + def forward(self, x: Tensor, v: Tensor) -> Tuple[Tensor, Tensor]: + r"""Applies a gated equivariant operation to node features and vector + features. + + Args: + x (torch.Tensor): The scalar features of the nodes. + v (torch.Tensor): The vector features of the nodes. + """ + vec1 = torch.norm(self.vec1_proj(v), dim=-2) + vec2 = self.vec2_proj(v) + + x = torch.cat([x, vec1], dim=-1) + x, v = torch.split(self.update_net(x), self.out_channels, dim=-1) + v = v.unsqueeze(1) * vec2 + + if self.act is not None: + x = self.act(x) + + return x, v + + +class EquivariantScalar(torch.nn.Module): + r"""Computes final scalar outputs based on node features and vector + features. + + Args: + hidden_channels (int): The number of hidden channels in the node + embeddings. + """ + + def __init__(self, hidden_channels: int) -> None: + super().__init__() + + self.output_network = torch.nn.ModuleList( + [ + GatedEquivariantBlock( + hidden_channels, + hidden_channels // 2, + scalar_activation=True, + ), + GatedEquivariantBlock( + hidden_channels // 2, + 1, + scalar_activation=False, + ), + ] + ) + + self.reset_parameters() + + def reset_parameters(self): + r"""Resets the parameters of the module.""" + for layer in self.output_network: + layer.reset_parameters() + + def pre_reduce(self, x: Tensor, v: Tensor) -> Tensor: + r"""Computes the final scalar outputs. + + Args: + x (torch.Tensor): The scalar features of the nodes. + v (torch.Tensor): The vector features of the nodes. + + Returns: + out (torch.Tensor): The final scalar outputs of the nodes. + """ + for layer in self.output_network: + x, v = layer(x, v) + + return x + v.sum() * 0 + + +class Atomref(torch.nn.Module): + r"""Adds atom reference values to atomic energies. + + Args: + atomref (torch.Tensor, optional): A tensor of atom reference values, + or :obj:`None` if not provided. (default: :obj:`None`) + max_z (int, optional): The maximum atomic numbers. + (default: :obj:`100`) + """ + + def __init__( + self, + atomref: Optional[Tensor] = None, + max_z: int = 100, + ) -> None: + super().__init__() + + if atomref is None: + atomref = torch.zeros(max_z, 1) + else: + atomref = torch.as_tensor(atomref) + + if atomref.ndim == 1: + atomref = atomref.view(-1, 1) + + self.register_buffer("initial_atomref", atomref) + self.atomref = Embedding(len(atomref), 1) + + self.reset_parameters() + + def reset_parameters(self): + r"""Resets the parameters of the module.""" + self.atomref.weight.data.copy_(self.initial_atomref) + + def forward(self, x: Tensor, z: Tensor) -> Tensor: + r"""Adds atom reference values to atomic energies. + + Args: + x (torch.Tensor): The atomic energies. + z (torch.Tensor): The atomic numbers. + """ + return x + self.atomref(z) + + +class ViSNet(torch.nn.Module): + r"""A :pytorch:`PyTorch` module that implements the equivariant + vector-scalar interactive graph neural network (ViSNet) from the + `"Enhancing Geometric Representations for Molecules with Equivariant + Vector-Scalar Interactive Message Passing" + `_ paper. + + Args: + lmax (int, optional): The maximum degree of the spherical harmonics. + (default: :obj:`1`) + vecnorm_type (str, optional): The type of normalization to apply to the + vectors. (default: :obj:`None`) + trainable_vecnorm (bool, optional): Whether the normalization weights + are trainable. (default: :obj:`False`) + num_heads (int, optional): The number of attention heads. + (default: :obj:`8`) + num_layers (int, optional): The number of layers in the network. + (default: :obj:`6`) + hidden_channels (int, optional): The number of hidden channels in the + node embeddings. (default: :obj:`128`) + num_rbf (int, optional): The number of radial basis functions. + (default: :obj:`32`) + trainable_rbf (bool, optional): Whether the radial basis function + parameters are trainable. (default: :obj:`False`) + max_z (int, optional): The maximum atomic numbers. + (default: :obj:`100`) + cutoff (float, optional): The cutoff distance. (default: :obj:`5.0`) + max_num_neighbors (int, optional): The maximum number of neighbors + considered for each atom. (default: :obj:`32`) + vertex (bool, optional): Whether to use vertex geometric features. + (default: :obj:`False`) + atomref (torch.Tensor, optional): A tensor of atom reference values, + or :obj:`None` if not provided. (default: :obj:`None`) + reduce_op (str, optional): The type of reduction operation to apply + (:obj:`"sum"`, :obj:`"mean"`). (default: :obj:`"sum"`) + mean (float, optional): The mean of the output distribution. + (default: :obj:`0.0`) + std (float, optional): The standard deviation of the output + distribution. (default: :obj:`1.0`) + derivative (bool, optional): Whether to compute the derivative of the + output with respect to the positions. (default: :obj:`False`) + """ + + def __init__( + self, + lmax: int = 1, + vecnorm_type: Optional[str] = None, + trainable_vecnorm: bool = False, + num_heads: int = 8, + num_layers: int = 6, + hidden_channels: int = 128, + num_rbf: int = 32, + trainable_rbf: bool = False, + max_z: int = 100, + cutoff: float = 5.0, + max_num_neighbors: int = 32, + vertex: bool = False, + atomref: Optional[Tensor] = None, + reduce_op: str = "sum", + mean: float = 0.0, + std: float = 1.0, + derivative: bool = False, + ) -> None: + super().__init__() + + self.representation_model = ViSNetBlock( + lmax=lmax, + vecnorm_type=vecnorm_type, + trainable_vecnorm=trainable_vecnorm, + num_heads=num_heads, + num_layers=num_layers, + hidden_channels=hidden_channels, + num_rbf=num_rbf, + trainable_rbf=trainable_rbf, + max_z=max_z, + cutoff=cutoff, + max_num_neighbors=max_num_neighbors, + vertex=vertex, + ) + + self.output_model = EquivariantScalar(hidden_channels=hidden_channels) + self.prior_model = Atomref(atomref=atomref, max_z=max_z) + self.reduce_op = reduce_op + self.derivative = derivative + + self.register_buffer("mean", torch.tensor(mean)) + self.register_buffer("std", torch.tensor(std)) + + self.reset_parameters() + + def reset_parameters(self): + r"""Resets the parameters of the module.""" + self.representation_model.reset_parameters() + self.output_model.reset_parameters() + if self.prior_model is not None: + self.prior_model.reset_parameters() + + def forward( + self, + z: Tensor, + pos: Tensor, + batch: Tensor, + ) -> Tuple[Tensor, Optional[Tensor]]: + r"""Computes the energies or properties (forces) for a batch of + molecules. + + Args: + z (torch.Tensor): The atomic numbers. + pos (torch.Tensor): The coordinates of the atoms. + batch (torch.Tensor): A batch vector, + which assigns each node to a specific example. + + Returns: + y (torch.Tensor): The energies or properties for each molecule. + dy (torch.Tensor, optional): The negative derivative of energies. + """ + if self.derivative: + pos.requires_grad_(True) + + x, v = self.representation_model(z, pos, batch) + x = self.output_model.pre_reduce(x, v) + x = x * self.std + + if self.prior_model is not None: + x = self.prior_model(x, z) + + y = scatter(x, batch, dim=0, reduce=self.reduce_op) + y = y + self.mean + + if self.derivative: + grad_outputs = [torch.ones_like(y)] + dy = grad( + [y], + [pos], + grad_outputs=grad_outputs, + create_graph=True, + retain_graph=True, + )[0] + if dy is None: + raise RuntimeError("Autograd returned None for the force prediction.") + return y, -dy + + return y, None + + +model_cls = ViSNet + + +if __name__ == "__main__": + node_features = torch.load("node_features.pt") + edge_index = torch.load("edge_index.pt") + + # Model instantiation and forward pass + model = ViSNet() + output = model(node_features, edge_index) + + # Save output to a file + torch.save(output, "gt_output.pt") diff --git a/alphaagent/components/coder/model_coder/benchmark/model_dict.json b/alphaagent/components/coder/model_coder/benchmark/model_dict.json new file mode 100755 index 00000000..a15edfff --- /dev/null +++ b/alphaagent/components/coder/model_coder/benchmark/model_dict.json @@ -0,0 +1,80 @@ +{ + "PMLP": { + "description": "`PMLP` is identical to a standard MLP during training, but then adopts a GNN architecture (add message passing) during testing.", + "formulation": "\\hat{y}_u = \\psi(\\text{MP}(\\{h^{(l-1)}_v\\}_{v \\in N_u \\cup \\{u\\}}))", + "variables": { + "\\hat{y}_u": "The predicted output for node u", + "\\psi": "A function representing the feed-forward process, consisting of a linear feature transformation followed by a non-linear activation", + "\\text{MP}": "Message Passing operation that aggregates neighbored information", + "h^{(l-1)}_v": "The feature representation of node v at layer (l-1)", + "N_u": "The set of neighbored nodes centered at node u" + }, + "key": "pmlp", + "model_type": "TimeSeries" + }, + "LINKX": { + "description": "A scalable model for node classification that separately embeds adjacency and node features, combines them with MLPs, and applies simple transformations.", + "formulation": "Y = MLP_f(\\sigma(W[h_A; h_X] + h_A + h_X))", + "variables": { + "Y": "The output predictions", + "\\sigma": "Non-linear activation function", + "W": "Learned weight matrix", + "h_A": "Embedding of the adjacency matrix", + "h_X": "Embedding of the node features", + "MLP_f": "Final multilayer perceptron for prediction" + }, + "key": "linkx", + "model_type": "TimeSeries" + }, + "GPSConv": { + "description": "A scalable and powerful graph transformer with linear complexity, capable of handling large graphs with state-of-the-art results across diverse benchmarks.", + "formulation": "X^{(l+1)} = \\text{MPNN}^{(l)}(X^{(l)}, A) + \\text{GlobalAttn}^{(l)}(X^{(l)})", + "variables": { + "X^{(l)}": "The node features at layer l", + "A": "The adjacency matrix of the graph", + "X^{(l+1)}": "The updated node features at layer l+1", + "MPNN^{(l)}": "The message-passing neural network function at layer l", + "GlobalAttn^{(l)}": "The global attention function at layer l" + }, + "key": "gpsconv", + "model_type": "TimeSeries" + }, + "ViSNet": { + "description": "ViSNet is an equivariant geometry-enhanced graph neural network designed for efficient molecular modeling[^1^][1][^2^][2]. It utilizes a Vector-Scalar interactive message passing mechanism to extract and utilize geometric features with low computational costs, achieving state-of-the-art performance on multiple molecular dynamics benchmarks.", + "formulation": "\\text{ViSNet}(G) = \\sum_{u \\in G} f(\\mathbf{h}_u, \\mathbf{e}_u, \\mathbf{v}_u)", + "variables": { + "\\mathbf{h}_u": "Node embedding for atom u", + "\\mathbf{e}_u": "Edge embedding associated with atom u", + "\\mathbf{v}_u": "Direction unit vector for atom u" + }, + "key": "visnet", + "model_type": "TimeSeries" + }, + "Dir-GNN": { + "description": "A framework for deep learning on directed graphs that extends MPNNs to incorporate edge directionality.", + "formulation": "x^{(k)}_i = COM^{(k)}\\left(x^{(k-1)}_i, m^{(k)}_{i,\\leftarrow}, m^{(k)}_{i,\\rightarrow}\\right)", + "variables": { + "x^{(k)}_i": "The feature representation of node i at layer k", + "m^{(k)}_{i,\\leftarrow}": "The aggregated incoming messages to node i at layer k", + "m^{(k)}_{i,\\rightarrow}": "The aggregated outgoing messages from node i at layer k" + }, + "key": "dirgnn", + "model_type": "TimeSeries" + }, + "A-DGN": { + "description": "A framework for stable and non-dissipative DGN design, conceived through the lens of ordinary differential equations (ODEs). It ensures long-range information preservation between nodes and prevents gradient vanishing or explosion during training.", + "formulation": "\\frac{\\partial x_u(t)}{\\partial t} = \\sigma(W^T x_u(t) + \\Phi(X(t), N_u) + b)", + "variables": { + "x_u(t)": "The state of node u at time t", + "\\frac{\\partial x_u(t)}{\\partial t}": "The rate of change of the state of node u at time t", + "\\sigma": "A monotonically non-decreasing activation function", + "W": "A weight matrix", + "b": "A bias vector", + "\\Phi(X(t), N_u)": "The aggregation function for the states of the nodes in the neighborhood of u", + "X(t)": "The node feature matrix of the whole graph at time t", + "N_u": "The set of neighboring nodes of u" + }, + "key": "A-DGN", + "model_type": "TimeSeries" + } +} \ No newline at end of file diff --git a/alphaagent/components/coder/model_coder/eva_utils.py b/alphaagent/components/coder/model_coder/eva_utils.py new file mode 100755 index 00000000..06741068 --- /dev/null +++ b/alphaagent/components/coder/model_coder/eva_utils.py @@ -0,0 +1,188 @@ +import json +from pathlib import Path +from typing import Tuple + +import numpy as np +from jinja2 import Environment, StrictUndefined + +from alphaagent.components.coder.model_coder.model import ModelFBWorkspace, ModelTask +from alphaagent.core.evaluation import Evaluator +from alphaagent.core.experiment import Task, Workspace +from alphaagent.core.prompts import Prompts +from alphaagent.oai.llm_conf import LLM_SETTINGS +from alphaagent.oai.llm_utils import APIBackend + +evaluate_prompts = Prompts(file_path=Path(__file__).parent / "prompts.yaml") + + +def shape_evaluator(prediction: np.ndarray, target_shape: Tuple = None) -> Tuple[str, bool]: + if target_shape is None or prediction is None: + return ( + "No output generated from the model. No shape evaluation conducted.", + False, + ) + pre_shape = prediction.shape + + if pre_shape == target_shape: + return "The shape of the output is correct.", True + else: + return ( + f"The shape of the output is incorrect. Expected {target_shape}, but got {pre_shape}.", + False, + ) + + +def value_evaluator( + prediction: np.ndarray, + target: np.ndarray, +) -> Tuple[np.ndarray, bool]: + if prediction is None: + return "No output generated from the model. Skip value evaluation", False + elif target is None: + return ( + "No ground truth output provided. Value evaluation not impractical", + False, + ) + else: + # Calculate the mean absolute difference + diff = np.mean(np.abs(target - prediction)) + return ( + f"The value of the output is correct. The mean absolute difference is {diff}.", + diff < 0.1, + ) + + +class ModelCodeEvaluator(Evaluator): + def evaluate( + self, + target_task: Task, + implementation: Workspace, + gt_implementation: Workspace, + model_execution_feedback: str = "", + model_value_feedback: str = "", + ): + assert isinstance(target_task, ModelTask) + assert isinstance(implementation, ModelFBWorkspace) + if gt_implementation is not None: + assert isinstance(gt_implementation, ModelFBWorkspace) + + model_task_information = target_task.get_task_information() + code = implementation.code + + system_prompt = ( + Environment(undefined=StrictUndefined) + .from_string(evaluate_prompts["evaluator_code_feedback"]["system"]) + .render( + scenario=( + self.scen.get_scenario_all_desc(target_task, filtered_tag=target_task.model_type) + if self.scen is not None + else "No scenario description." + ) + ) + ) + + execution_feedback_to_render = model_execution_feedback + for _ in range(10): # 10 times to split the content is enough + user_prompt = ( + Environment(undefined=StrictUndefined) + .from_string( + evaluate_prompts["evaluator_code_feedback"]["user"], + ) + .render( + model_information=model_task_information, + code=code, + model_execution_feedback=execution_feedback_to_render, + model_value_feedback=model_value_feedback, + gt_code=gt_implementation.code if gt_implementation else None, + ) + ) + if ( + APIBackend().build_messages_and_calculate_token( + user_prompt=user_prompt, + system_prompt=system_prompt, + ) + > LLM_SETTINGS.chat_token_limit + ): + execution_feedback_to_render = execution_feedback_to_render[len(execution_feedback_to_render) // 2 :] + else: + break + + critic_response = APIBackend().build_messages_and_create_chat_completion( + user_prompt=user_prompt, + system_prompt=system_prompt, + json_mode=False, + ) + + return critic_response, None + + +class ModelFinalEvaluator(Evaluator): + def evaluate( + self, + target_task: Task, + implementation: Workspace, + gt_implementation: Workspace, + model_execution_feedback: str, + model_shape_feedback: str, + model_value_feedback: str, + model_code_feedback: str, + ): + assert isinstance(target_task, ModelTask) + assert isinstance(implementation, ModelFBWorkspace) + if gt_implementation is not None: + assert isinstance(gt_implementation, ModelFBWorkspace) + + system_prompt = ( + Environment(undefined=StrictUndefined) + .from_string(evaluate_prompts["evaluator_final_feedback"]["system"]) + .render( + scenario=( + self.scen.get_scenario_all_desc(target_task, filtered_tag=target_task.model_type) + if self.scen is not None + else "No scenario description." + ) + ) + ) + + execution_feedback_to_render = model_execution_feedback + + for _ in range(10): # 10 times to split the content is enough + user_prompt = ( + Environment(undefined=StrictUndefined) + .from_string( + evaluate_prompts["evaluator_final_feedback"]["user"], + ) + .render( + model_information=target_task.get_task_information(), + model_execution_feedback=execution_feedback_to_render, + model_shape_feedback=model_shape_feedback, + model_code_feedback=model_code_feedback, + model_value_feedback=model_value_feedback, + ) + ) + if ( + APIBackend().build_messages_and_calculate_token( + user_prompt=user_prompt, + system_prompt=system_prompt, + ) + > LLM_SETTINGS.chat_token_limit + ): + execution_feedback_to_render = execution_feedback_to_render[len(execution_feedback_to_render) // 2 :] + else: + break + + final_evaluation_dict = json.loads( + APIBackend().build_messages_and_create_chat_completion( + user_prompt=user_prompt, + system_prompt=system_prompt, + json_mode=True, + ), + ) + if isinstance(final_evaluation_dict["final_decision"], str) and final_evaluation_dict[ + "final_decision" + ].lower() in ("true", "false"): + final_evaluation_dict["final_decision"] = bool(final_evaluation_dict["final_decision"]) + return ( + final_evaluation_dict["final_feedback"], + final_evaluation_dict["final_decision"], + ) diff --git a/alphaagent/components/coder/model_coder/evaluators.py b/alphaagent/components/coder/model_coder/evaluators.py new file mode 100755 index 00000000..49f1ea76 --- /dev/null +++ b/alphaagent/components/coder/model_coder/evaluators.py @@ -0,0 +1,104 @@ +from alphaagent.components.coder.CoSTEER.evaluators import ( + CoSTEEREvaluator, + CoSTEERMultiFeedback, + CoSTEERSingleFeedback, +) +from alphaagent.components.coder.model_coder.eva_utils import ( + ModelCodeEvaluator, + ModelFinalEvaluator, + shape_evaluator, + value_evaluator, +) +from alphaagent.components.coder.model_coder.model import ModelFBWorkspace, ModelTask +from alphaagent.core.evolving_framework import QueriedKnowledge +from alphaagent.core.experiment import Task, Workspace + +ModelSingleFeedback = CoSTEERSingleFeedback +ModelMultiFeedback = CoSTEERMultiFeedback + + +class ModelCoSTEEREvaluator(CoSTEEREvaluator): + def evaluate( + self, + target_task: Task, + implementation: Workspace, + gt_implementation: Workspace, + queried_knowledge: QueriedKnowledge = None, + **kwargs, + ) -> ModelSingleFeedback: + target_task_information = target_task.get_task_information() + if ( + queried_knowledge is not None + and target_task_information in queried_knowledge.success_task_to_knowledge_dict + ): + return queried_knowledge.success_task_to_knowledge_dict[target_task_information].feedback + elif queried_knowledge is not None and target_task_information in queried_knowledge.failed_task_info_set: + return ModelSingleFeedback( + execution_feedback="This task has failed too many times, skip implementation.", + shape_feedback="This task has failed too many times, skip implementation.", + value_feedback="This task has failed too many times, skip implementation.", + code_feedback="This task has failed too many times, skip implementation.", + final_feedback="This task has failed too many times, skip implementation.", + final_decision=False, + ) + assert isinstance(target_task, ModelTask) + + # NOTE: Use fixed input to test the model to avoid randomness + batch_size = 8 + num_features = 30 + num_timesteps = 40 + input_value = 0.4 + param_init_value = 0.6 + + assert isinstance(implementation, ModelFBWorkspace) + model_execution_feedback, gen_np_array = implementation.execute( + batch_size=batch_size, + num_features=num_features, + num_timesteps=num_timesteps, + input_value=input_value, + param_init_value=param_init_value, + ) + if gt_implementation is not None: + assert isinstance(gt_implementation, ModelFBWorkspace) + _, gt_np_array = gt_implementation.execute( + batch_size=batch_size, + num_features=num_features, + num_timesteps=num_timesteps, + input_value=input_value, + param_init_value=param_init_value, + ) + else: + gt_np_array = None + + shape_feedback, shape_decision = shape_evaluator( + gen_np_array, + (batch_size, self.scen.model_output_channel if hasattr(self.scen, "model_output_channel") else 1), + ) + value_feedback, value_decision = value_evaluator(gen_np_array, gt_np_array) + code_feedback, _ = ModelCodeEvaluator(scen=self.scen).evaluate( + target_task=target_task, + implementation=implementation, + gt_implementation=gt_implementation, + model_execution_feedback=model_execution_feedback, + model_value_feedback="\n".join([shape_feedback, value_feedback]), + ) + final_feedback, final_decision = ModelFinalEvaluator(scen=self.scen).evaluate( + target_task=target_task, + implementation=implementation, + gt_implementation=gt_implementation, + model_execution_feedback=model_execution_feedback, + model_shape_feedback=shape_feedback, + model_value_feedback=value_feedback, + model_code_feedback=code_feedback, + ) + + return ModelSingleFeedback( + execution_feedback=model_execution_feedback, + shape_feedback=shape_feedback, + value_feedback=value_feedback, + code_feedback=code_feedback, + final_feedback=final_feedback, + final_decision=final_decision, + value_generated_flag=(gen_np_array is not None), + final_decision_based_on_gt=(gt_implementation is not None), + ) diff --git a/alphaagent/components/coder/model_coder/evolving_strategy.py b/alphaagent/components/coder/model_coder/evolving_strategy.py new file mode 100755 index 00000000..0bcd3cab --- /dev/null +++ b/alphaagent/components/coder/model_coder/evolving_strategy.py @@ -0,0 +1,106 @@ +import json +from pathlib import Path + +from jinja2 import Environment, StrictUndefined + +from alphaagent.components.coder.CoSTEER.config import CoSTEER_SETTINGS +from alphaagent.components.coder.CoSTEER.evolving_strategy import ( + MultiProcessEvolvingStrategy, +) +from alphaagent.components.coder.CoSTEER.knowledge_management import ( + CoSTEERQueriedKnowledge, + CoSTEERQueriedKnowledgeV2, +) +from alphaagent.components.coder.model_coder.model import ( + ModelExperiment, + ModelFBWorkspace, + ModelTask, +) +from alphaagent.core.prompts import Prompts +from alphaagent.oai.llm_conf import LLM_SETTINGS +from alphaagent.oai.llm_utils import APIBackend + +coder_prompts = Prompts(file_path=Path(__file__).parent / "prompts.yaml") + + +class ModelMultiProcessEvolvingStrategy(MultiProcessEvolvingStrategy): + def implement_one_task( + self, + target_task: ModelTask, + queried_knowledge: CoSTEERQueriedKnowledge = None, + ) -> str: + model_information_str = target_task.get_task_information() + + queried_similar_successful_knowledge = ( + queried_knowledge.task_to_similar_task_successful_knowledge[model_information_str] + if queried_knowledge is not None + else [] + ) + queried_former_failed_knowledge = ( + queried_knowledge.task_to_former_failed_traces[model_information_str] + if queried_knowledge is not None + else [] + ) + + queried_former_failed_knowledge_to_render = ( + queried_former_failed_knowledge[0] + if isinstance(queried_knowledge, CoSTEERQueriedKnowledgeV2) + else queried_former_failed_knowledge + ) + + system_prompt = ( + Environment(undefined=StrictUndefined) + .from_string( + coder_prompts["evolving_strategy_model_coder"]["system"], + ) + .render( + scenario=self.scen.get_scenario_all_desc(filtered_tag=target_task.model_type), + queried_former_failed_knowledge=queried_former_failed_knowledge_to_render, + current_code=target_task.base_code, + ) + ) + + queried_similar_successful_knowledge_to_render = queried_similar_successful_knowledge + for _ in range(10): # max attempt to reduce the length of user_prompt + user_prompt = ( + Environment(undefined=StrictUndefined) + .from_string( + coder_prompts["evolving_strategy_model_coder"]["user"], + ) + .render( + model_information_str=model_information_str, + queried_similar_successful_knowledge=queried_similar_successful_knowledge_to_render, + queried_former_failed_knowledge=queried_former_failed_knowledge_to_render, + ) + .strip("\n") + ) + if ( + APIBackend().build_messages_and_calculate_token( + user_prompt=user_prompt, + system_prompt=system_prompt, + ) + < LLM_SETTINGS.chat_token_limit + ): + break + elif len(queried_former_failed_knowledge_to_render) > 1: + queried_former_failed_knowledge_to_render = queried_former_failed_knowledge_to_render[1:] + elif len(queried_similar_successful_knowledge_to_render) > 1: + queried_similar_successful_knowledge_to_render = queried_similar_successful_knowledge_to_render[1:] + + code = json.loads( + APIBackend(use_chat_cache=CoSTEER_SETTINGS.coder_use_cache).build_messages_and_create_chat_completion( + user_prompt=user_prompt, + system_prompt=system_prompt, + json_mode=True, + ), + )["code"] + return code + + def assign_code_list_to_evo(self, code_list, evo): + for index in range(len(evo.sub_tasks)): + if code_list[index] is None: + continue + if evo.sub_workspace_list[index] is None: + evo.sub_workspace_list[index] = ModelFBWorkspace(target_task=evo.sub_tasks[index]) + evo.sub_workspace_list[index].inject_code(**{"model.py": code_list[index]}) + return evo diff --git a/alphaagent/components/coder/model_coder/gt_code.py b/alphaagent/components/coder/model_coder/gt_code.py new file mode 100755 index 00000000..988273a3 --- /dev/null +++ b/alphaagent/components/coder/model_coder/gt_code.py @@ -0,0 +1,136 @@ +""" +This is just an exmaple. +It will be replaced wtih a list of ground truth tasks. +""" + +import math +from typing import Any, Callable, Dict, Optional, Union + +import torch +from torch import Tensor +from torch.nn import Parameter +from torch_geometric.nn.conv import GCNConv, MessagePassing +from torch_geometric.nn.inits import zeros +from torch_geometric.nn.resolver import activation_resolver +from torch_geometric.typing import Adj + + +class AntiSymmetricConv(torch.nn.Module): + r"""The anti-symmetric graph convolutional operator from the + `"Anti-Symmetric DGN: a stable architecture for Deep Graph Networks" + `_ paper. + + .. math:: + \mathbf{x}^{\prime}_i = \mathbf{x}_i + \epsilon \cdot \sigma \left( + (\mathbf{W}-\mathbf{W}^T-\gamma \mathbf{I}) \mathbf{x}_i + + \Phi(\mathbf{X}, \mathcal{N}_i) + \mathbf{b}\right), + + where :math:`\Phi(\mathbf{X}, \mathcal{N}_i)` denotes a + :class:`~torch.nn.conv.MessagePassing` layer. + + Args: + in_channels (int): Size of each input sample. + phi (MessagePassing, optional): The message passing module + :math:`\Phi`. If set to :obj:`None`, will use a + :class:`~torch_geometric.nn.conv.GCNConv` layer as default. + (default: :obj:`None`) + num_iters (int, optional): The number of times the anti-symmetric deep + graph network operator is called. (default: :obj:`1`) + epsilon (float, optional): The discretization step size + :math:`\epsilon`. (default: :obj:`0.1`) + gamma (float, optional): The strength of the diffusion :math:`\gamma`. + It regulates the stability of the method. (default: :obj:`0.1`) + act (str, optional): The non-linear activation function :math:`\sigma`, + *e.g.*, :obj:`"tanh"` or :obj:`"relu"`. (default: :class:`"tanh"`) + act_kwargs (Dict[str, Any], optional): Arguments passed to the + respective activation function defined by :obj:`act`. + (default: :obj:`None`) + bias (bool, optional): If set to :obj:`False`, the layer will not learn + an additive bias. (default: :obj:`True`) + + Shapes: + - **input:** + node features :math:`(|\mathcal{V}|, F_{in})`, + edge indices :math:`(2, |\mathcal{E}|)`, + edge weights :math:`(|\mathcal{E}|)` *(optional)* + - **output:** node features :math:`(|\mathcal{V}|, F_{in})` + """ + + def __init__( + self, + in_channels: int, + phi: Optional[MessagePassing] = None, + num_iters: int = 1, + epsilon: float = 0.1, + gamma: float = 0.1, + act: Union[str, Callable, None] = "tanh", + act_kwargs: Optional[Dict[str, Any]] = None, + bias: bool = True, + ): + super().__init__() + + self.in_channels = in_channels + self.num_iters = num_iters + self.gamma = gamma + self.epsilon = epsilon + self.act = activation_resolver(act, **(act_kwargs or {})) + + if phi is None: + phi = GCNConv(in_channels, in_channels, bias=False) + + self.W = Parameter(torch.empty(in_channels, in_channels)) + self.register_buffer("eye", torch.eye(in_channels)) + self.phi = phi + + if bias: + self.bias = Parameter(torch.empty(in_channels)) + else: + self.register_parameter("bias", None) + + self.reset_parameters() + + def reset_parameters(self): + r"""Resets all learnable parameters of the module.""" + torch.nn.init.kaiming_uniform_(self.W, a=math.sqrt(5)) + self.phi.reset_parameters() + zeros(self.bias) + + def forward(self, x: Tensor, edge_index: Adj, *args, **kwargs) -> Tensor: + r"""Runs the forward pass of the module.""" + antisymmetric_W = self.W - self.W.t() - self.gamma * self.eye + + for _ in range(self.num_iters): + h = self.phi(x, edge_index, *args, **kwargs) + h = x @ antisymmetric_W.t() + h + + if self.bias is not None: + h += self.bias + + if self.act is not None: + h = self.act(h) + + x = x + self.epsilon * h + + return x + + def __repr__(self) -> str: + return ( + f"{self.__class__.__name__}(" + f"{self.in_channels}, " + f"phi={self.phi}, " + f"num_iters={self.num_iters}, " + f"epsilon={self.epsilon}, " + f"gamma={self.gamma})" + ) + + +if __name__ == "__main__": + node_features = torch.load("node_features.pt") + edge_index = torch.load("edge_index.pt") + + # Model instantiation and forward pass + model = AntiSymmetricConv(in_channels=node_features.size(-1)) + output = model(node_features, edge_index) + + # Save output to a file + torch.save(output, "gt_output.pt") diff --git a/alphaagent/components/coder/model_coder/model.py b/alphaagent/components/coder/model_coder/model.py new file mode 100755 index 00000000..fd0a97c0 --- /dev/null +++ b/alphaagent/components/coder/model_coder/model.py @@ -0,0 +1,142 @@ +import pickle +import site +import traceback +from pathlib import Path +from typing import Dict, Optional + +from alphaagent.components.coder.CoSTEER.task import CoSTEERTask +from alphaagent.core.experiment import Experiment, FBWorkspace +from alphaagent.core.utils import cache_with_pickle +from alphaagent.oai.llm_utils import md5_hash +from alphaagent.utils.env import KGDockerEnv, QTDockerEnv + + +class ModelTask(CoSTEERTask): + def __init__( + self, + name: str, + description: str, + architecture: str, + *args, + hyperparameters: Dict[str, str], + formulation: str = None, + variables: Dict[str, str] = None, + model_type: Optional[str] = None, + **kwargs, + ) -> None: + self.description: str = description + self.formulation: str = formulation + self.architecture: str = architecture + self.variables: str = variables + self.hyperparameters: str = hyperparameters + self.model_type: str = ( + model_type # Tabular for tabular model, TimesSeries for time series model, Graph for graph model, XGBoost for XGBoost model + ) + super().__init__(name=name, *args, **kwargs) + + def get_task_information(self): + task_desc = f"""name: {self.name} +description: {self.description} +""" + task_desc += f"formulation: {self.formulation}\n" if self.formulation else "" + task_desc += f"architecture: {self.architecture}\n" + task_desc += f"variables: {self.variables}\n" if self.variables else "" + task_desc += f"hyperparameters: {self.hyperparameters}\n" + task_desc += f"model_type: {self.model_type}\n" + return task_desc + + @staticmethod + def from_dict(dict): + return ModelTask(**dict) + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} {self.name}>" + + +class ModelFBWorkspace(FBWorkspace): + """ + It is a Pytorch model implementation task; + All the things are placed in a folder. + + Folder + - data source and documents prepared by `prepare` + - Please note that new data may be passed in dynamically in `execute` + - code (file `model.py` ) injected by `inject_code` + - the `model.py` that contains a variable named `model_cls` which indicates the implemented model structure + - `model_cls` is a instance of `torch.nn.Module`; + + We support two ways of interface: + (version 1) for qlib we'll make a script to import the model in the implementation in file `model.py` after setting the cwd into the directory + - from model import model_cls + - initialize the model by initializing it `model_cls(input_dim=INPUT_DIM)` + - And then verify the model. + + (version 2) for kaggle we'll make a script to call the fit and predict function in the implementation in file `model.py` after setting the cwd into the directory + """ + + def hash_func( + self, + batch_size: int = 8, + num_features: int = 10, + num_timesteps: int = 4, + num_edges: int = 20, + input_value: float = 1.0, + param_init_value: float = 1.0, + ) -> str: + target_file_name = f"{batch_size}_{num_features}_{num_timesteps}_{input_value}_{param_init_value}" + for code_file_name in sorted(list(self.code_dict.keys())): + target_file_name = f"{target_file_name}_{self.code_dict[code_file_name]}" + return md5_hash(target_file_name) + + @cache_with_pickle(hash_func) + def execute( + self, + batch_size: int = 8, + num_features: int = 10, + num_timesteps: int = 4, + num_edges: int = 20, + input_value: float = 1.0, + param_init_value: float = 1.0, + ): + super().execute() + try: + qtde = QTDockerEnv() if self.target_task.version == 1 else KGDockerEnv() + qtde.prepare() + + if self.target_task.version == 1: + dump_code = f""" +MODEL_TYPE = "{self.target_task.model_type}" +BATCH_SIZE = {batch_size} +NUM_FEATURES = {num_features} +NUM_TIMESTEPS = {num_timesteps} +NUM_EDGES = {num_edges} +INPUT_VALUE = {input_value} +PARAM_INIT_VALUE = {param_init_value} +{(Path(__file__).parent / 'model_execute_template_v1.txt').read_text()} +""" + elif self.target_task.version == 2: + dump_code = (Path(__file__).parent / "model_execute_template_v2.txt").read_text() + + log, results = qtde.dump_python_code_run_and_get_results( + code=dump_code, + dump_file_names=["execution_feedback_str.pkl", "execution_model_output.pkl"], + local_path=str(self.workspace_path), + env={}, + code_dump_file_py_name="model_test", + ) + if results is None: + raise RuntimeError(f"Error in running the model code: {log}") + [execution_feedback_str, execution_model_output] = results + + except Exception as e: + execution_feedback_str = f"Execution error: {e}\nTraceback: {traceback.format_exc()}" + execution_model_output = None + + if len(execution_feedback_str) > 2000: + execution_feedback_str = ( + execution_feedback_str[:1000] + "....hidden long error message...." + execution_feedback_str[-1000:] + ) + return execution_feedback_str, execution_model_output + + +ModelExperiment = Experiment diff --git a/alphaagent/components/coder/model_coder/model_execute_template_v1.txt b/alphaagent/components/coder/model_coder/model_execute_template_v1.txt new file mode 100755 index 00000000..46e32367 --- /dev/null +++ b/alphaagent/components/coder/model_coder/model_execute_template_v1.txt @@ -0,0 +1,44 @@ +# MODEL_TYPE = "Tabular" +# BATCH_SIZE = 32 +# NUM_FEATURES = 10 +# NUM_TIMESTEPS = 4 +# NUM_EDGES = 20 +# INPUT_VALUE = 1.0 +# PARAM_INIT_VALUE = 1.0 + +import pickle + +import torch +from model import model_cls + +if MODEL_TYPE == "Tabular": + input_shape = (BATCH_SIZE, NUM_FEATURES) + m = model_cls(num_features=input_shape[1]) + data = torch.full(input_shape, INPUT_VALUE) +elif MODEL_TYPE == "TimeSeries": + input_shape = (BATCH_SIZE, NUM_FEATURES, NUM_TIMESTEPS) + m = model_cls(num_features=input_shape[1], num_timesteps=input_shape[2]) + data = torch.full(input_shape, INPUT_VALUE) +elif MODEL_TYPE == "Graph": + node_feature = torch.randn(BATCH_SIZE, NUM_FEATURES) + edge_index = torch.randint(0, BATCH_SIZE, (2, NUM_EDGES)) + m = model_cls(num_features=NUM_FEATURES) + data = (node_feature, edge_index) +else: + raise ValueError(f"Unsupported model type: {MODEL_TYPE}") + +# Initialize all parameters of `m` to `param_init_value` +for _, param in m.named_parameters(): + param.data.fill_(PARAM_INIT_VALUE) + +# Execute the model +if MODEL_TYPE == "Graph": + out = m(*data) +else: + out = m(data) + +execution_model_output = out.cpu().detach().numpy() +execution_feedback_str = f"Execution successful, output tensor shape: {execution_model_output.shape}" + +pickle.dump(execution_model_output, open("execution_model_output.pkl", "wb")) +pickle.dump(execution_feedback_str, open("execution_feedback_str.pkl", "wb")) diff --git a/alphaagent/components/coder/model_coder/model_execute_template_v2.txt b/alphaagent/components/coder/model_coder/model_execute_template_v2.txt new file mode 100755 index 00000000..78962c2a --- /dev/null +++ b/alphaagent/components/coder/model_coder/model_execute_template_v2.txt @@ -0,0 +1,24 @@ +import os +import pickle + +import numpy as np +import pandas as pd +import torch +from model import fit, predict + +train_X = pd.DataFrame(np.random.randn(8, 30), columns=[f"{i}" for i in range(30)]) +train_y = pd.Series(np.random.randint(0, 2, 8)) +valid_X = pd.DataFrame(np.random.randn(8, 30), columns=[f"{i}" for i in range(30)]) +valid_y = pd.Series(np.random.randint(0, 2, 8)) + +model = fit(train_X, train_y, valid_X, valid_y) +execution_model_output = predict(model, valid_X) + +if isinstance(execution_model_output, torch.Tensor): + execution_model_output = execution_model_output.cpu().detach().numpy() + + +execution_feedback_str = f"Execution successful, output numpy ndarray shape: {execution_model_output.shape}" + +pickle.dump(execution_model_output, open("execution_model_output.pkl", "wb")) +pickle.dump(execution_feedback_str, open("execution_feedback_str.pkl", "wb")) diff --git a/alphaagent/components/coder/model_coder/one_shot/__init__.py b/alphaagent/components/coder/model_coder/one_shot/__init__.py new file mode 100755 index 00000000..2658094d --- /dev/null +++ b/alphaagent/components/coder/model_coder/one_shot/__init__.py @@ -0,0 +1,41 @@ +import re +from pathlib import Path + +from jinja2 import Environment, StrictUndefined + +from alphaagent.components.coder.model_coder.model import ModelExperiment, ModelFBWorkspace +from alphaagent.core.developer import Developer +from alphaagent.core.prompts import Prompts +from alphaagent.oai.llm_utils import APIBackend + +DIRNAME = Path(__file__).absolute().resolve().parent + + +class ModelCodeWriter(Developer[ModelExperiment]): + def develop(self, exp: ModelExperiment) -> ModelExperiment: + mti_l = [] + for t in exp.sub_tasks: + mti = ModelFBWorkspace(t) + mti.prepare() + pr = Prompts(file_path=DIRNAME / "prompt.yaml") + + user_prompt_tpl = Environment(undefined=StrictUndefined).from_string(pr["code_implement_user"]) + sys_prompt_tpl = Environment(undefined=StrictUndefined).from_string(pr["code_implement_sys"]) + + user_prompt = user_prompt_tpl.render( + name=t.name, + description=t.description, + formulation=t.formulation, + variables=t.variables, + ) + system_prompt = sys_prompt_tpl.render() + + resp = APIBackend().build_messages_and_create_chat_completion(user_prompt, system_prompt) + + # Extract the code part from the response + match = re.search(r".*```[Pp]ython\n(.*)\n```.*", resp, re.DOTALL) + code = match.group(1) + mti.inject_code(**{"model.py": code}) + mti_l.append(mti) + exp.sub_workspace_list = mti_l + return exp diff --git a/alphaagent/components/coder/model_coder/one_shot/prompt.yaml b/alphaagent/components/coder/model_coder/one_shot/prompt.yaml new file mode 100755 index 00000000..d9774631 --- /dev/null +++ b/alphaagent/components/coder/model_coder/one_shot/prompt.yaml @@ -0,0 +1,27 @@ + + +code_implement_sys: |- + You are an assistant whose job is to answer user's question. +code_implement_user: |- + With the following given information, write a python code using pytorch and torch_geometric to implement the model. + This model is in the graph learning field, only have one layer. + The input will be node_feature [num_nodes, dim_feature] and edge_index [2, num_edges] (It would be the input of the forward model) + There is not edge attribute or edge weight as input. The model should detect the node_feature and edge_index shape, if there is Linear transformation layer in the model, the input and output shape should be consistent. The in_channels is the dimension of the node features. + Implement the model forward function based on the following information:model formula information. + 1. model name:{{name}} + 2. model description:{{description}} + 3. model formulation:{{formulation}} + 4. model variables:{{variables}}. + You must complete the forward function as far as you can do. + Execution Your implemented code will be executed in the follow way: + The the implemented code will be placed in a file like [uuid]/model.py + We'll import the model in the implementation in file `model.py` after setting the cwd into the directory + - from model import model_cls (So you must have a variable named `model_cls` in the file) + - So your implemented code could follow the following pattern + ```Python + class XXXLayer(torch.nn.Module): + ... + model_cls = XXXLayer + ``` + - initialize the model by initializing it `model_cls(input_dim=INPUT_DIM)` + - And then verify the model by comparing the output tensors by feeding specific input tensor. diff --git a/alphaagent/components/coder/model_coder/prompts.yaml b/alphaagent/components/coder/model_coder/prompts.yaml new file mode 100755 index 00000000..d3393305 --- /dev/null +++ b/alphaagent/components/coder/model_coder/prompts.yaml @@ -0,0 +1,168 @@ +extract_model_formulation_system: |- + offer description of the proposed model in this paper, write a latex formula with variable as well as the architecture of the model. the format should be like + { + "model_name (The name of the model)": { + "description": "A detailed description of the model", + "formulation": "A LaTeX formula representing the model's formulation", + "architecture": "A detailed description of the model's architecture, e.g., neural network layers or tree structures", + "variables": { + "\\hat{y}_u": "The predicted output for node u", + "variable_name_2": "Description of variable 2", + "variable_name_3": "Description of variable 3" + }, + "hyperparameters": { + "hyperparameter_name_1": "value of hyperparameter 1", + "hyperparameter_name_2": "value of hyperparameter 2", + "hyperparameter_name_3": "value of hyperparameter 3" + }, + "model_type": "Tabular or TimeSeries or Graph or XGBoost" # Should be one of "Tabular", "TimeSeries", "Graph", or "XGBoost" + } + } + Eg. + { + "ABC Model": { + "description": "A detailed description of the model", + "formulation": "A LaTeX formula representing the model's formulation", + "architecture": "A detailed description of the model's architecture, e.g., neural network layers or tree structures", + "variables": { + "\\hat{y}_u": "The predicted output for node u", + "variable_name_2": "Description of variable 2", + "variable_name_3": "Description of variable 3" + }, + "hyperparameters": { + "hyperparameter_name_1": "value of hyperparameter 1", + "hyperparameter_name_2": "value of hyperparameter 2", + "hyperparameter_name_3": "value of hyperparameter 3" + }, + "model_type": "Tabular or TimeSeries or Graph or RandomForest or XGBoost" # If torch & Neural network models are required, the choice should be one of "Tabular", "TimeSeries", or "Graph" + } + } + such format content should be begin with ```json and end with ``` and the content should be in json format. + +evolving_strategy_model_coder: + system: |- + User is trying to implement some pytorch models in the following scenario: + {{ scenario }} + Your code is expected to align the scenario in any form which means The user needs to get the prediction of the model based on the input data. + + To help you write the correct code, the user might provide multiple information that helps you write the correct code: + 1. The user might provide you the correct code to similar models. Your should learn from these code to write the correct code. + 2. The user might provide you the failed former code and the corresponding feedback to the code. The feedback contains to the execution, the code and the model output value. You should analyze the feedback and try to correct the latest code. + 3. The user might provide you the suggestion to the latest fail code and some similar fail to correct pairs. Each pair contains the fail code with similar error and the corresponding corrected version code. You should learn from these suggestion to write the correct code. + + Your must write your code based on your former latest attempt below which consists of your former code and code feedback, you should read the former attempt carefully and must not modify the right part of your former code. + + {% if current_code is not none %} + User has write some code before. You should write the new code based on this code. Here is the latest code: + ```python + {{ current_code }} + ``` + Your code should be very similar to the former code which means your code should be ninety more percent same as the former code! You should not modify the right part of the code. + {% else %} + User has not write any code before. You should write the new code from scratch. + {% endif %} + + {% if queried_former_failed_knowledge|length != 0 %} + --------------Your former latest attempt:--------------- + =====Code to the former implementation===== + {{ queried_former_failed_knowledge[-1].implementation.code }} + =====Feedback to the former implementation===== + {{ queried_former_failed_knowledge[-1].feedback }} + {% endif %} + + Please response the code in the following format, using `\n` to separate the code, without any other content. Here is an example structure for the JSON output: + { + "code": "The Python code as a string." + } + + user: |- + --------------Target model information:--------------- + {{ model_information_str }} + + {% if queried_similar_successful_knowledge|length != 0 %} + --------------Correct code to similar models:--------------- + {% for similar_successful_knowledge in queried_similar_successful_knowledge %} + =====Model {{loop.index}}:===== + {{ similar_successful_knowledge.target_task.get_task_information() }} + =====Code:===== + {{ similar_successful_knowledge.implementation.code }} + {% endfor %} + {% endif %} + + {% if queried_former_failed_knowledge|length != 0 %} + --------------Former failed code:--------------- + {% for former_failed_knowledge in queried_former_failed_knowledge %} + =====Code to implementation {{ loop.index }}===== + {{ former_failed_knowledge.implementation.code }} + =====Feedback to implementation {{ loop.index }}===== + {{ former_failed_knowledge.feedback }} + {% endfor %} + {% endif %} + +evaluator_code_feedback: + system: |- + User is trying to implement some models in the following scenario: + {{ scenario }} + User will provide you the information of the model. + + Your job is to check whether user's code is align with the model information and the scenario. + The user will provide the source python code and the execution error message if execution failed. + The user might provide you the ground truth code for you to provide the critic. You should not leak the ground truth code to the user in any form but you can use it to provide the critic. + + User has also compared the output generated by the user's code and the ground truth code. The user will provide you some analysis results comparing two output. You may find some error in the code which caused the difference between the two output. + + If the ground truth code is provided, your critic should only consider checking whether the user's code is align with the ground truth code since the ground truth is definitely correct. + If the ground truth code is not provided, your critic should consider checking whether the user's code is reasonable and correct to the description and to the scenario. + + Notice that your critics are not for user to debug the code. They are sent to the coding agent to correct the code. So don't give any following items for the user to check like "Please check the code line XXX". + + You suggestion should not include any code, just some clear and short suggestions. Please point out very critical issues in your response, ignore non-important issues to avoid confusion. If no big issue found in the code, you can response "No critics found". + + You should provide the suggestion to each of your critic to help the user improve the code. Please response the critic in the following format. Here is an example structure for the output: + critic 1: The critic message to critic 1 + critic 2: The critic message to critic 2 + + user: |- + --------------Model information:--------------- + {{ model_information }} + --------------Python code:--------------- + {{ code }} + --------------Execution feedback:--------------- + {{ model_execution_feedback }} + {% if model_value_feedback is not none %} + --------------Model value feedback:--------------- + {{ model_value_feedback }} + {% endif %} + {% if gt_code is not none %} + --------------Ground truth Python code:--------------- + {{ gt_code }} + {% endif %} + + +evaluator_final_feedback: + system: |- + User is trying to implement a model in the following scenario: + {{ scenario }} + User has finished evaluation and got some feedback from the evaluator. + The evaluator run the code and get the output and provide several feedback regarding user's code and code output. You should analyze the feedback and considering the scenario and model description to give a final decision about the evaluation result. The final decision concludes whether the model is implemented correctly and if not, detail feedback containing reason and suggestion if the final decision is False. + + The implementation final decision is considered in the following logic: + 1. If the value and the ground truth value are exactly the same under a small tolerance, the implementation is considered correct. + 2. If no ground truth value is not provided, the implementation is considered correct if the code execution is successful and the code feedback is align with the scenario and model description. + + Please response the critic in the json format. Here is an example structure for the JSON output, please strictly follow the format: + { + "final_decision": True, + "final_feedback": "The final feedback message, A SINGLE LINE OF TEXT", + } + user: |- + --------------Model information:--------------- + {{ model_information }} + --------------Model Execution feedback:--------------- + {{ model_execution_feedback }} + --------------Model shape feedback:--------------- + {{ model_shape_feedback }} + --------------Model Code feedback:--------------- + {{ model_code_feedback }} + --------------Model value feedback:--------------- + {{ model_value_feedback }} \ No newline at end of file diff --git a/alphaagent/components/coder/model_coder/task_loader.py b/alphaagent/components/coder/model_coder/task_loader.py new file mode 100755 index 00000000..1d5c7993 --- /dev/null +++ b/alphaagent/components/coder/model_coder/task_loader.py @@ -0,0 +1,126 @@ +from __future__ import annotations + +import json +import re +from pathlib import Path + +from alphaagent.components.coder.model_coder.model import ModelExperiment, ModelTask +from alphaagent.components.document_reader.document_reader import ( + load_and_process_pdfs_by_langchain, +) +from alphaagent.components.loader.task_loader import ModelTaskLoader +from alphaagent.core.prompts import Prompts +from alphaagent.log import logger +from alphaagent.oai.llm_utils import APIBackend +from alphaagent.scenarios.qlib.experiment.model_experiment import QlibModelExperiment + +document_process_prompts = Prompts(file_path=Path(__file__).parent / "prompts.yaml") + + +def extract_model_from_doc(doc_content: str) -> dict: + """ + Extract model information from document content. + + Parameters + ---------- + doc_content : str + Document content. + + Returns + ------- + dict + {model_name: dict{description, formulation, variables}} + """ + session = APIBackend().build_chat_session( + session_system_prompt=document_process_prompts["extract_model_formulation_system"], + ) + current_user_prompt = doc_content + + # Extract model information from document content. + model_dict = {} + + for _ in range(10): + # try to extract model information from the document content, retry at most 10 times. + extract_result_resp = session.build_chat_completion( + user_prompt=current_user_prompt, + json_mode=False, + ) + re_search_res = re.search(r"```json(.*)```", extract_result_resp, re.S) + ret_json_str = re_search_res.group(1) if re_search_res is not None else "" + try: + ret_dict = json.loads(ret_json_str) + parse_success = bool(isinstance(ret_dict, dict)) + except json.JSONDecodeError: + parse_success = False + if ret_json_str is None or not parse_success: + current_user_prompt = "Your response didn't follow the instruction might be wrong json format. Try again." + else: + for name, formulation_and_description in ret_dict.items(): + if name not in model_dict: + model_dict[name] = formulation_and_description + if len(model_dict) == 0: + current_user_prompt = "No model extracted. Please try again." + else: + break + + logger.info(f"已经完成{len(model_dict)}个模型的提取") + + return model_dict + + +def merge_file_to_model_dict_to_model_dict( + file_to_model_dict: dict[str, dict], +) -> dict: + model_dict = {} + for file_name in file_to_model_dict: + for model_name in file_to_model_dict[file_name]: + model_dict.setdefault(model_name, []) + model_dict[model_name].append(file_to_model_dict[file_name][model_name]) + + model_dict_simple_deduplication = {} + for model_name in model_dict: + if len(model_dict[model_name]) > 1: + model_dict_simple_deduplication[model_name] = max( + model_dict[model_name], + key=lambda x: len(x["formulation"]), + ) + else: + model_dict_simple_deduplication[model_name] = model_dict[model_name][0] + return model_dict_simple_deduplication + + +def extract_model_from_docs(docs_dict): + model_dict = {} + for doc_name, doc_content in docs_dict.items(): + model_dict[doc_name] = extract_model_from_doc(doc_content) + return model_dict + + +class ModelExperimentLoaderFromDict(ModelTaskLoader): + def load(self, model_dict: dict) -> list: + """Load data from a dict.""" + task_l = [] + for model_name, model_data in model_dict.items(): + task = ModelTask( + name=model_name, + description=model_data["description"], + formulation=model_data["formulation"], + architecture=model_data["architecture"], + variables=model_data["variables"], + hyperparameters=model_data["hyperparameters"], + model_type=model_data["model_type"], + ) + task_l.append(task) + return QlibModelExperiment(sub_tasks=task_l) + + +class ModelExperimentLoaderFromPDFfiles(ModelTaskLoader): + def load(self, file_or_folder_path: str) -> dict: + docs_dict = load_and_process_pdfs_by_langchain(file_or_folder_path) # dict{file_path:content} + model_dict = extract_model_from_docs( + docs_dict + ) # dict{file_name: dict{model_name: dict{description, formulation, variables}}} + model_dict = merge_file_to_model_dict_to_model_dict( + model_dict + ) # dict {model_name: dict{description, formulation, variables}} + return ModelExperimentLoaderFromDict().load(model_dict) diff --git a/alphaagent/components/document_reader/document_reader.py b/alphaagent/components/document_reader/document_reader.py new file mode 100755 index 00000000..ed5a14ae --- /dev/null +++ b/alphaagent/components/document_reader/document_reader.py @@ -0,0 +1,121 @@ +from __future__ import annotations + +import io +from pathlib import Path +from typing import TYPE_CHECKING + +import fitz +import requests +from azure.ai.formrecognizer import DocumentAnalysisClient +from azure.core.credentials import AzureKeyCredential +from langchain_community.document_loaders import PyPDFDirectoryLoader, PyPDFLoader +from PIL import Image + +if TYPE_CHECKING: + from langchain_core.documents import Document + +from alphaagent.core.conf import RD_AGENT_SETTINGS + + +def load_documents_by_langchain(path: str) -> list: + """Load documents from the specified path. + + Args: + path (str): The path to the directory or file containing the documents. + + Returns: + list: A list of loaded documents. + """ + if Path(path).is_dir(): + loader = PyPDFDirectoryLoader(path, silent_errors=True) + else: + loader = PyPDFLoader(path) + return loader.load() + + +def process_documents_by_langchain(docs: list[Document]) -> dict[str, str]: + """Process a list of documents and group them by document name. + + Args: + docs (list): A list of documents. + + Returns: + dict: A dictionary where the keys are document names and the values are + the concatenated content of the documents. + """ + content_dict = {} + + for doc in docs: + if Path(doc.metadata["source"]).exists(): + doc_name = str(Path(doc.metadata["source"]).resolve()) + else: + doc_name = doc.metadata["source"] + doc_content = doc.page_content + + if doc_name not in content_dict: + content_dict[str(doc_name)] = doc_content + else: + content_dict[str(doc_name)] += doc_content + + return content_dict + + +def load_and_process_pdfs_by_langchain(path: str) -> dict[str, str]: + return process_documents_by_langchain(load_documents_by_langchain(path)) + + +def load_and_process_one_pdf_by_azure_document_intelligence( + path: Path, + key: str, + endpoint: str, +) -> str: + pages = len(PyPDFLoader(str(path)).load()) + document_analysis_client = DocumentAnalysisClient( + endpoint=endpoint, + credential=AzureKeyCredential(key), + ) + + with path.open("rb") as file: + result = document_analysis_client.begin_analyze_document( + "prebuilt-document", + file, + pages=f"1-{pages}", + ).result() + return result.content + + +def load_and_process_pdfs_by_azure_document_intelligence(path: Path) -> dict[str, str]: + assert RD_AGENT_SETTINGS.azure_document_intelligence_key is not None + assert RD_AGENT_SETTINGS.azure_document_intelligence_endpoint is not None + + content_dict = {} + ab_path = path.resolve() + if ab_path.is_file(): + assert ".pdf" in ab_path.suffixes, "The file must be a PDF file." + proc = load_and_process_one_pdf_by_azure_document_intelligence + content_dict[str(ab_path)] = proc( + ab_path, + RD_AGENT_SETTINGS.azure_document_intelligence_key, + RD_AGENT_SETTINGS.azure_document_intelligence_endpoint, + ) + else: + for file_path in ab_path.rglob("*"): + if file_path.is_file() and ".pdf" in file_path.suffixes: + content_dict[str(file_path)] = load_and_process_one_pdf_by_azure_document_intelligence( + file_path, + RD_AGENT_SETTINGS.azure_document_intelligence_key, + RD_AGENT_SETTINGS.azure_document_intelligence_endpoint, + ) + return content_dict + + +def extract_first_page_screenshot_from_pdf(pdf_path: str) -> Image: + if not Path(pdf_path).exists(): + doc = fitz.open(stream=io.BytesIO(requests.get(pdf_path).content), filetype="pdf") + else: + doc = fitz.open(pdf_path) + page = doc.load_page(0) + pix = page.get_pixmap() + image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) + + return image diff --git a/alphaagent/components/knowledge_management/graph.py b/alphaagent/components/knowledge_management/graph.py new file mode 100755 index 00000000..23d78943 --- /dev/null +++ b/alphaagent/components/knowledge_management/graph.py @@ -0,0 +1,481 @@ +from __future__ import annotations + +import pickle +import random +from collections import deque +from pathlib import Path +from typing import Any, NoReturn + +from alphaagent.components.knowledge_management.vector_base import ( + KnowledgeMetaData, + PDVectorBase, + VectorBase, + cosine, +) +from alphaagent.core.knowledge_base import KnowledgeBase +from alphaagent.oai.llm_utils import APIBackend + +Node = KnowledgeMetaData + + +class UndirectedNode(Node): + def __init__(self, content: str = "", label: str = "", embedding: Any = None) -> None: + super().__init__(content, label, embedding) + self.neighbors: set[UndirectedNode] = set() + assert isinstance(content, str), "content must be a string" + + def add_neighbor(self, node: UndirectedNode) -> None: + self.neighbors.add(node) + node.neighbors.add(self) + + def remove_neighbor(self, node: UndirectedNode) -> None: + if node in self.neighbors: + self.neighbors.remove(node) + node.neighbors.remove(self) + + def get_neighbors(self) -> set[UndirectedNode]: + return self.neighbors + + def __str__(self) -> str: + return ( + f"UndirectedNode(id={self.id}, label={self.label}, content={self.content[:100]}, " + f"neighbors={self.neighbors})" + ) + + def __repr__(self) -> str: + return ( + f"UndirectedNode(id={self.id}, label={self.label}, content={self.content[:100]}, " + f"neighbors={self.neighbors})" + ) + + +class Graph(KnowledgeBase): + """ + base Graph class for Knowledge Graph Search + """ + + def __init__(self, path: str | Path | None = None) -> None: + self.nodes = {} + super().__init__(path=path) + + def size(self) -> int: + return len(self.nodes) + + def get_node(self, node_id: str) -> Node | None: + return self.nodes.get(node_id) + + def add_node(self, **kwargs: Any) -> NoReturn: + raise NotImplementedError + + def get_all_nodes(self) -> list[Node]: + return list(self.nodes.values()) + + def get_all_nodes_by_label_list(self, label_list: list[str]) -> list[Node]: + return [node for node in self.nodes.values() if node.label in label_list] + + def find_node(self, content: str, label: str) -> Node | None: + for node in self.nodes.values(): + if node.content == content and node.label == label: + return node + return None + + @staticmethod + def batch_embedding(nodes: list[Node]) -> list[Node]: + contents = [node.content for node in nodes] + # openai create embedding API input's max length is 16 + size = 16 + embeddings = [] + for i in range(0, len(contents), size): + embeddings.extend( + APIBackend().create_embedding(input_content=contents[i : i + size]), + ) + + assert len(nodes) == len(embeddings), "nodes' length must equals embeddings' length" + for node, embedding in zip(nodes, embeddings): + node.embedding = embedding + return nodes + + def __str__(self) -> str: + return f"Graph(nodes={self.nodes})" + + +class UndirectedGraph(Graph): + """ + Undirected Graph which edges have no relationship + """ + + def __init__(self, path: str | Path | None = None) -> None: + self.vector_base: VectorBase = PDVectorBase() + super().__init__(path=path) + + def __str__(self) -> str: + return f"UndirectedGraph(nodes={self.nodes})" + + def add_node( + self, + node: UndirectedNode, + neighbor: UndirectedNode = None, + same_node_threshold: float = 0.95, # noqa: ARG002 + ) -> None: + """ + add node and neighbor to the Graph + Parameters + ---------- + same_node_threshold: 0.95 is an empirical value. When two strings only differ in case, the similarity is greater + than 0.95. + node + neighbor + + Returns + ------- + + """ + if self.get_node(node.id): + node = self.get_node(node.id) + elif self.find_node(content=node.content, label=node.label): + node = self.find_node(content=node.content, label=node.label) + else: + # same_node = self.semantic_search(node=node.content, similarity_threshold=same_node_threshold, topk_k=1) + # if len(same_node): + # node = same_node[0] + # else: + node.create_embedding() + self.vector_base.add(document=node) + self.nodes.update({node.id: node}) + + if neighbor is not None: + if self.get_node(neighbor.id): + neighbor = self.get_node(neighbor.id) + elif self.find_node(content=neighbor.content, label=node.label): + neighbor = self.find_node(content=neighbor.content, label=node.label) + else: + # same_node = self.semantic_search(node=neighbor.content, + # similarity_threshold=same_node_threshold, topk_k=1) + # if len(same_node): + # neighbor = same_node[0] + # else: + neighbor.create_embedding() + self.vector_base.add(document=neighbor) + self.nodes.update({neighbor.id: neighbor}) + + node.add_neighbor(neighbor) + + def add_nodes(self, node: UndirectedNode, neighbors: list[UndirectedNode]) -> None: + if not neighbors: + self.add_node(node) + else: + for neighbor in neighbors: + self.add_node(node, neighbor=neighbor) + + def get_node(self, node_id: str) -> UndirectedNode: + return self.nodes.get(node_id) + + def get_node_by_content(self, content: str) -> UndirectedNode | None: + """ + Get node by semantic distance + Parameters + ---------- + content + + Returns + ------- + + """ + if content == "Model": + pass + match = self.semantic_search(node=content, similarity_threshold=0.999) + if match: + return match[0] + return None + + def get_nodes_within_steps( + self, + start_node: UndirectedNode, + steps: int = 1, + constraint_labels: list[str] | None = None, + *, + block: bool = False, + ) -> list[UndirectedNode]: + """ + Returns the nodes in the graph whose distance from node is less than or equal to step + """ + visited = set() + queue = deque([(start_node, 0)]) + result = [] + + while queue: + node, current_steps = queue.popleft() + + if current_steps > steps: + break + + if node not in visited: + visited.add(node) + result.append(node) + + for neighbor in sorted( + self.get_node(node.id).neighbors, + key=lambda x: x.content, + ): # to make sure the result is deterministic + if neighbor not in visited and not (block and neighbor.label not in constraint_labels): + queue.append((neighbor, current_steps + 1)) + + if constraint_labels: + result = [node for node in result if node.label in constraint_labels] + if start_node in result: + result.remove(start_node) + return result + + def get_nodes_intersection( + self, + nodes: list[UndirectedNode], + steps: int = 1, + constraint_labels: list[str] | None = None, + ) -> list[UndirectedNode]: + """ + Get the intersection with nodes connected within n steps of nodes + + Parameters + ---------- + nodes + steps + constraint_labels + + Returns + ------- + + """ + min_nodes_count = 2 + assert len(nodes) >= min_nodes_count, "nodes length must >=2" + intersection = None + + for node in nodes: + if intersection is None: + intersection = self.get_nodes_within_steps( + node, + steps=steps, + constraint_labels=constraint_labels, + ) + intersection = self.intersection( + nodes1=intersection, + nodes2=self.get_nodes_within_steps( + node, + steps=steps, + constraint_labels=constraint_labels, + ), + ) + return intersection + + def semantic_search( + self, + node: UndirectedNode | str, + similarity_threshold: float = 0.0, + topk_k: int = 5, + ) -> list[UndirectedNode]: + """ + semantic search by node's embedding + + Parameters + ---------- + topk_k + node + similarity_threshold: Returns nodes whose distance score from the input + node is greater than similarity_threshold + + Returns + ------- + + """ + if isinstance(node, str): + node = UndirectedNode(content=node) + docs, scores = self.vector_base.search( + content=node.content, + topk_k=topk_k, + similarity_threshold=similarity_threshold, + ) + return [self.get_node(doc.id) for doc in docs] + + def clear(self) -> None: + self.nodes.clear() + self.vector_base: VectorBase = PDVectorBase() + + def query_by_node( + self, + node: UndirectedNode, + step: int = 1, + constraint_labels: list[str] | None = None, + constraint_node: UndirectedNode | None = None, + constraint_distance: float = 0, + *, + block: bool = False, + ) -> list[UndirectedNode]: + """ + search graph by connection, return empty list if nodes' chain without node near to constraint_node + Parameters + ---------- + node + step + constraint_labels + constraint_node + constraint_distance + block: despite the start node, the search can only flow through the constraint_label type nodes + + Returns + ------- + + """ + nodes = self.get_nodes_within_steps( + start_node=node, + steps=step, + constraint_labels=constraint_labels, + block=block, + ) + if constraint_node is not None: + for n in nodes: + if self.cal_distance(n, constraint_node) > constraint_distance: + return nodes + return [] + return nodes + + def query_by_content( + self, + content: str | list[str], + topk_k: int = 5, + step: int = 1, + constraint_labels: list[str] | None = None, + constraint_node: UndirectedNode | None = None, + similarity_threshold: float = 0.0, + constraint_distance: float = 0, + *, + block: bool = False, + ) -> list[UndirectedNode]: + """ + Search graph by content similarity and connection relationship, return empty + list if nodes' chain without node near to constraint_node. + + Parameters + ---------- + constraint_distance : float + The distance between the node and the constraint_node. + content : Union[str, List[str]] + Content to search for. + topk_k: int + The upper number of output for each query. If the number of fit nodes is + less than topk_k, returns all fit nodes' content. + step : int + The maximum distance between the start node and the result node. + constraint_labels : List[str] + The type of nodes that the search can only flow through. + constraint_node : UndirectedNode, optional + The node that the search can only flow through. + similarity_threshold : float + The similarity threshold of the content. + block: bool + Despite the start node, the search can only flow through the constraint_label type nodes. + + Returns + ------- + + """ + + if isinstance(content, str): + content = [content] + + res_list = [] + for query in content: + similar_nodes = self.semantic_search( + content=query, + topk_k=topk_k, + similarity_threshold=similarity_threshold, + ) + + connected_nodes = [] + for node in similar_nodes: + graph_query_node_res = self.query_by_node( + node, + step=step, + constraint_labels=constraint_labels, + constraint_node=constraint_node, + constraint_distance=constraint_distance, + block=block, + ) + connected_nodes.extend( + [node for node in graph_query_node_res if node not in connected_nodes], + ) + if len(connected_nodes) >= topk_k: + break + + res_list.extend( + [node for node in connected_nodes[:topk_k] if node not in res_list], + ) + return res_list + + @staticmethod + def intersection(nodes1: list[UndirectedNode], nodes2: list[UndirectedNode]) -> list[UndirectedNode]: + return [node for node in nodes1 if node in nodes2] + + @staticmethod + def different(nodes1: list[UndirectedNode], nodes2: list[UndirectedNode]) -> list[UndirectedNode]: + return list(set(nodes1).symmetric_difference(set(nodes2))) + + @staticmethod + def cal_distance(node1: UndirectedNode, node2: UndirectedNode) -> float: + return cosine(node1.embedding, node2.embedding) + + @staticmethod + def filter_label(nodes: list[UndirectedNode], labels: list[str]) -> list[UndirectedNode]: + return [node for node in nodes if node.label in labels] + + +def graph_to_edges(graph: dict[str, list[str]]) -> list[tuple[str, str]]: + edges = [] + + for node, neighbors in graph.items(): + for neighbor in neighbors: + if (node, neighbor) in edges or (neighbor, node) in edges: + continue + edges.append((node, neighbor)) + + return edges + + +def assign_random_coordinate_to_node( + nodes: list[str], + scope: float = 1.0, + origin: tuple[float, float] = (0.0, 0.0), +) -> dict[str, tuple[float, float]]: + coordinates = {} + for node in nodes: + x = random.SystemRandom().uniform(0, scope) + origin[0] + y = random.SystemRandom().uniform(0, scope) + origin[1] + coordinates[node] = (x, y) + + return coordinates + + +def assign_isometric_coordinate_to_node( + nodes: list, + x_step: float = 1.0, + x_origin: float = 0.0, + y_origin: float = 0.0, +) -> dict: + coordinates = {} + + for i, node in enumerate(nodes): + x = x_origin + i * x_step + y = y_origin + coordinates[node] = (x, y) + + return coordinates + + +def curly_node_coordinate( + coordinates: dict, + center_y: float = 1.0, + r: float = 1.0, +) -> dict: + # noto: this method can only curly < 90 degree, and the curl line is circle. + # the original function is: x**2 + (y-m)**2 = r**2 + for node, coordinate in coordinates.items(): + coordinates[node] = (coordinate[0], center_y + (r**2 - coordinate[0] ** 2) ** 0.5) + return coordinates diff --git a/alphaagent/components/knowledge_management/vector_base.py b/alphaagent/components/knowledge_management/vector_base.py new file mode 100755 index 00000000..677cc32a --- /dev/null +++ b/alphaagent/components/knowledge_management/vector_base.py @@ -0,0 +1,183 @@ +import uuid +from pathlib import Path +from typing import List, Tuple, Union + +import pandas as pd +from scipy.spatial.distance import cosine + +from alphaagent.core.knowledge_base import KnowledgeBase +from alphaagent.log import logger +from alphaagent.oai.llm_utils import APIBackend + + +class KnowledgeMetaData: + def __init__(self, content: str = "", label: str = None, embedding=None, identity=None): + self.label = label + self.content = content + self.id = str(uuid.uuid3(uuid.NAMESPACE_DNS, str(self.content))) if identity is None else identity + self.embedding = embedding + self.trunks = [] + self.trunks_embedding = [] + + def split_into_trunk(self, size: int = 1000, overlap: int = 0): + """ + split content into trunks and create embedding by trunk + Returns + ------- + + """ + + def split_string_into_chunks(string: str, chunk_size: int): + chunks = [] + for i in range(0, len(string), chunk_size): + chunk = string[i : i + chunk_size] + chunks.append(chunk) + return chunks + + self.trunks = split_string_into_chunks(self.content, chunk_size=size) + self.trunks_embedding = APIBackend().create_embedding(input_content=self.trunks) + + def create_embedding(self): + """ + create content's embedding + Returns + ------- + + """ + if self.embedding is None: + self.embedding = APIBackend().create_embedding(input_content=self.content) + + def from_dict(self, data: dict): + for key, value in data.items(): + setattr(self, key, value) + return self + + def __repr__(self): + return f"Document(id={self.id}, label={self.label}, data={self.content})" + + +Document = KnowledgeMetaData + + +def contents_to_documents(contents: List[str], label: str = None) -> List[Document]: + # openai create embedding API input's max length is 16 + size = 16 + embedding = [] + for i in range(0, len(contents), size): + embedding.extend(APIBackend().create_embedding(input_content=contents[i : i + size])) + docs = [Document(content=c, label=label, embedding=e) for c, e in zip(contents, embedding)] + return docs + + +class VectorBase(KnowledgeBase): + """ + This class is used for handling vector storage and query + """ + + def add(self, document: Union[Document, List[Document]]): + """ + add new node to vector_df + Parameters + ---------- + document + + Returns + ------- + + """ + pass + + def search(self, content: str, topk_k: int = 5, similarity_threshold: float = 0) -> List[Document]: + """ + search vector_df by node + Parameters + ---------- + similarity_threshold + content + topk_k: return topk_k nearest vector_df + + Returns + ------- + + """ + pass + + +class PDVectorBase(VectorBase): + """ + Implement of VectorBase using Pandas + """ + + def __init__(self, path: Union[str, Path] = None): + self.vector_df = pd.DataFrame(columns=["id", "label", "content", "embedding"]) + super().__init__(path) + + def shape(self): + return self.vector_df.shape + + def add(self, document: Union[Document, List[Document]]): + """ + add new node to vector_df + Parameters + ---------- + document + + Returns + ------- + + """ + if isinstance(document, Document): + if document.embedding is None: + document.create_embedding() + docs = [ + { + "id": document.id, + "label": document.label, + "content": document.content, + "trunk": document.content, + "embedding": document.embedding, + } + ] + docs.extend( + [ + { + "id": document.id, + "label": document.label, + "content": document.content, + "trunk": trunk, + "embedding": embedding, + } + for trunk, embedding in zip(document.trunks, document.trunks_embedding) + ] + ) + self.vector_df = pd.concat([self.vector_df, pd.DataFrame(docs)], ignore_index=True) + else: + for doc in document: + self.add(document=doc) + + def search(self, content: str, topk_k: int = 5, similarity_threshold: float = 0) -> Tuple[List[Document], List]: + """ + search vector by node + Parameters + ---------- + similarity_threshold + content + topk_k: return topk_k nearest vector + + Returns + ------- + + """ + if not self.vector_df.shape[0]: + return [], [] + document = Document(content=content) + document.create_embedding() + similarities = self.vector_df["embedding"].apply( + lambda x: 1 - cosine(x, document.embedding) + ) # cosine is cosine distance, 1-similarity + searched_similarities = similarities[similarities > similarity_threshold].nlargest(topk_k) + most_similar_docs = self.vector_df.loc[searched_similarities.index] + docs = [] + for _, similar_docs in most_similar_docs.iterrows(): + docs.append(Document().from_dict(similar_docs.to_dict())) + return docs, searched_similarities.to_list() diff --git a/alphaagent/components/loader/experiment_loader.py b/alphaagent/components/loader/experiment_loader.py new file mode 100755 index 00000000..4a3a23d7 --- /dev/null +++ b/alphaagent/components/loader/experiment_loader.py @@ -0,0 +1,10 @@ +from alphaagent.components.coder.factor_coder.factor import FactorExperiment +from alphaagent.core.experiment import Loader + + +class FactorExperimentLoader(Loader[FactorExperiment]): + pass + + +class ModelExperimentLoader(Loader[FactorExperiment]): + pass diff --git a/alphaagent/components/loader/task_loader.py b/alphaagent/components/loader/task_loader.py new file mode 100755 index 00000000..1b7d04fb --- /dev/null +++ b/alphaagent/components/loader/task_loader.py @@ -0,0 +1,94 @@ +import json +from pathlib import Path +from typing import Sequence + +from alphaagent.components.coder.factor_coder.factor import FactorTask +from alphaagent.components.coder.model_coder.model import ModelFBWorkspace, ModelTask +from alphaagent.core.experiment import Loader, WsLoader + + +class FactorTaskLoader(Loader[FactorTask]): + pass + + +class ModelTaskLoader(Loader[ModelTask]): + pass + + +class ModelTaskLoaderJson(ModelTaskLoader): + # def __init__(self, json_uri: str, select_model: Optional[str] = None) -> None: + # super().__init__() + # self.json_uri = json_uri + # self.select_model = 'A-DGN' + + # def load(self, *argT, **kwargs) -> Sequence[ModelImplTask]: + # # json is supposed to be in the format of {model_name: dict{model_data}} + # model_dict = json.load(open(self.json_uri, "r")) + # if self.select_model is not None: + # assert self.select_model in model_dict + # model_name = self.select_model + # model_data = model_dict[self.select_model] + # else: + # model_name, model_data = list(model_dict.items())[0] + + # model_impl_task = ModelImplTask( + # name=model_name, + # description=model_data["description"], + # formulation=model_data["formulation"], + # variables=model_data["variables"], + # key=model_name + # ) + + # return [model_impl_task] + + def __init__(self, json_uri: str) -> None: + super().__init__() + self.json_uri = json_uri + + def load(self, *argT, **kwargs) -> Sequence[ModelTask]: + # json is supposed to be in the format of {model_name: dict{model_data}} + model_dict = json.load(open(self.json_uri, "r")) + # FIXME: the model in the json file is not right due to extraction error + # We should fix them case by case in the future + # + # formula_info = { + # "name": "Anti-Symmetric Deep Graph Network (A-DGN)", + # "description": "A framework for stable and non-dissipative DGN design. It ensures long-range information preservation between nodes and prevents gradient vanishing or explosion during training.", + # "formulation": r"\mathbf{x}^{\prime}_i = \mathbf{x}_i + \epsilon \cdot \sigma \left( (\mathbf{W}-\mathbf{W}^T-\gamma \mathbf{I}) \mathbf{x}_i + \Phi(\mathbf{X}, \mathcal{N}_i) + \mathbf{b}\right),", + # "variables": { + # r"\mathbf{x}_i": "The state of node i at previous layer", + # r"\epsilon": "The step size in the Euler discretization", + # r"\sigma": "A monotonically non-decreasing activation function", + # r"\Phi": "A graph convolutional operator", + # r"W": "An anti-symmetric weight matrix", + # r"\mathbf{x}^{\prime}_i": "The node feature matrix at layer l-1", + # r"\mathcal{N}_i": "The set of neighbors of node u", + # r"\mathbf{b}": "A bias vector", + # }, + # "key": "A-DGN", + # } + model_impl_task_list = [] + for model_name, model_data in model_dict.items(): + model_impl_task = ModelTask( + name=model_name, + description=model_data["description"], + formulation=model_data["formulation"], + variables=model_data["variables"], + model_type=model_data["model_type"], + ) + model_impl_task_list.append(model_impl_task) + return model_impl_task_list + + +class ModelWsLoader(WsLoader[ModelTask, ModelFBWorkspace]): + def __init__(self, path: Path) -> None: + self.path = Path(path) + + def load(self, task: ModelTask) -> ModelFBWorkspace: + assert task.name is not None + mti = ModelFBWorkspace(task) + mti.prepare() + with open(self.path / f"{task.name}.py", "r") as f: + code = f.read() + mti.inject_code(**{"model.py": code}) + return mti diff --git a/alphaagent/components/proposal/__init__.py b/alphaagent/components/proposal/__init__.py new file mode 100755 index 00000000..d331180d --- /dev/null +++ b/alphaagent/components/proposal/__init__.py @@ -0,0 +1,128 @@ +from abc import abstractmethod +from pathlib import Path +from typing import Tuple + +from jinja2 import Environment, StrictUndefined + +from alphaagent.core.experiment import Experiment +from alphaagent.core.prompts import Prompts +from alphaagent.core.proposal import ( + Hypothesis, + Hypothesis2Experiment, + HypothesisGen, + Scenario, + Trace, +) +from alphaagent.oai.llm_utils import APIBackend + +prompt_dict = Prompts(file_path=Path(__file__).parent / "prompts.yaml") + + +class LLMHypothesisGen(HypothesisGen): + def __init__(self, scen: Scenario): + super().__init__(scen) + + # The following methods are scenario related so they should be implemented in the subclass + @abstractmethod + def prepare_context(self, trace: Trace) -> Tuple[dict, bool]: ... + + @abstractmethod + def convert_response(self, response: str) -> Hypothesis: ... + + def gen(self, trace: Trace) -> Hypothesis: + context_dict, json_flag = self.prepare_context(trace) + system_prompt = ( + Environment(undefined=StrictUndefined) + .from_string(prompt_dict["hypothesis_gen"]["system_prompt"]) + .render( + targets=self.targets, + scenario=self.scen.get_scenario_all_desc(filtered_tag="hypothesis_and_experiment"), + hypothesis_output_format=context_dict["hypothesis_output_format"], + hypothesis_specification=context_dict["hypothesis_specification"], + ) + ) + user_prompt = ( + Environment(undefined=StrictUndefined) + .from_string(prompt_dict["hypothesis_gen"]["user_prompt"]) + .render( + targets=self.targets, + hypothesis_and_feedback=context_dict["hypothesis_and_feedback"], + RAG=context_dict["RAG"], + ) + ) + + resp = APIBackend().build_messages_and_create_chat_completion(user_prompt, system_prompt, json_mode=json_flag) + + hypothesis = self.convert_response(resp) + + return hypothesis + + +class FactorHypothesisGen(LLMHypothesisGen): + def __init__(self, scen: Scenario): + super().__init__(scen) + self.targets = "factors" + + +class ModelHypothesisGen(LLMHypothesisGen): + def __init__(self, scen: Scenario): + super().__init__(scen) + self.targets = "model tuning" + + +class FactorAndModelHypothesisGen(LLMHypothesisGen): + def __init__(self, scen: Scenario): + super().__init__(scen) + self.targets = "feature engineering and model building" + + +class LLMHypothesis2Experiment(Hypothesis2Experiment[Experiment]): + @abstractmethod + def prepare_context(self, hypothesis: Hypothesis, trace: Trace) -> Tuple[dict, bool]: ... + + @abstractmethod + def convert_response(self, response: str, trace: Trace) -> Experiment: ... + + def convert(self, hypothesis: Hypothesis, trace: Trace) -> Experiment: + context, json_flag = self.prepare_context(hypothesis, trace) + system_prompt = ( + Environment(undefined=StrictUndefined) + .from_string(prompt_dict["hypothesis2experiment"]["system_prompt"]) + .render( + targets=self.targets, + scenario=trace.scen.get_scenario_all_desc(filtered_tag="hypothesis_and_experiment"), + experiment_output_format=context["experiment_output_format"], + ) + ) + user_prompt = ( + Environment(undefined=StrictUndefined) + .from_string(prompt_dict["hypothesis2experiment"]["user_prompt"]) + .render( + targets=self.targets, + target_hypothesis=context["target_hypothesis"], + hypothesis_and_feedback=context["hypothesis_and_feedback"], + target_list=context["target_list"], + RAG=context["RAG"], + ) + ) + + resp = APIBackend().build_messages_and_create_chat_completion(user_prompt, system_prompt, json_mode=json_flag) + return self.convert_response(resp, trace) + + +class FactorHypothesis2Experiment(LLMHypothesis2Experiment): + def __init__(self): + super().__init__() + self.targets = "factors" + + +class ModelHypothesis2Experiment(LLMHypothesis2Experiment): + def __init__(self): + super().__init__() + self.targets = "model tuning" + + +class FactorAndModelHypothesis2Experiment(LLMHypothesis2Experiment): + def __init__(self): + super().__init__() + self.targets = "feature engineering and model building" diff --git a/alphaagent/components/proposal/prompts.yaml b/alphaagent/components/proposal/prompts.yaml new file mode 100755 index 00000000..e1ff750f --- /dev/null +++ b/alphaagent/components/proposal/prompts.yaml @@ -0,0 +1,48 @@ +hypothesis_gen: + system_prompt: |- + The user is working on generating new hypotheses for the {{targets}} in a data-driven research and development process. + The {{targets}} are used in the following scenario: + {{scenario}} + The user has already proposed several hypotheses and conducted evaluations on them. This information will be provided to you. Your task is to check whether a similar hypothesis has already been generated. + If one exists and you agree with it, feel free to use it. If you disagree, please generate an improved version. + {% if hypothesis_specification %} + To assist you in formulating new hypotheses, the user has provided some additional information: {{hypothesis_specification}}. + **Important:** If the hypothesis_specification outlines the next steps you need to follow, ensure you adhere to those instructions. + {% endif %} + Please generate the output using the following format and specifications: + {{ hypothesis_output_format }} + + user_prompt: |- + {% if hypothesis_and_feedback|length == 0 %}It is the first round of hypothesis generation. The user has no hypothesis on this scenario yet. + {% else %}It is not the first round, the user has made several hypothesis on this scenario and did several evaluation on them. + The former hypothesis and the corresponding feedbacks are as follows (focus on the last one & the new hypothesis that it provides and reasoning to see if you agree): + {{ hypothesis_and_feedback }} + {% endif %} + {% if RAG %} + To assist you in generating new {{targets}}, we have provided the following information: {{RAG}}. + **Note:** The provided RAG is for reference only. + You must carefully assess whether the RAG aligns with the {{targets}}. + If it does not, it should not be used. Exercise caution and make your own judgment. + {% endif %} + Also generate the relevant keys for the reasoning and the distilled knowledge that follows. For those keys, in particular for knowledge, explain in the context of the specific scenario to build up domain knowledge in the specific field rather than general knowledge. + +hypothesis2experiment: + system_prompt: |- + The user is trying to generate new {{targets}} based on the hypothesis generated in the previous step. + The {{targets}} are used in certain scenario, the scenario is as follows: + {{ scenario }} + The user will use the {{targets}} generated to do some experiments. The user will provide this information to you: + 1. The target hypothesis you are targeting to generate {{targets}} for. + 2. The hypothesis generated in the previous steps and their corresponding feedbacks. + 3. Former proposed {{targets}} on similar hypothesis. + 4. Some additional information to help you generate new {{targets}}. + Please generate the output, including 1-2 factors implemented on the hypothesis, without any other content, following the format below: + {{ experiment_output_format }} + + user_prompt: |- + The user has made several hypothesis on this scenario and did several evaluation on them. + The target hypothesis you are targeting to generate {{targets}} for is as follows: + {{ target_hypothesis }} + The former hypothesis and the corresponding feedbacks are as follows: + {{ hypothesis_and_feedback }} + Please generate the new {{targets}} based on the information above. diff --git a/alphaagent/components/runner/__init__.py b/alphaagent/components/runner/__init__.py new file mode 100755 index 00000000..86029f92 --- /dev/null +++ b/alphaagent/components/runner/__init__.py @@ -0,0 +1,25 @@ +import pickle +import shutil +from pathlib import Path +from typing import Any, Tuple + +from alphaagent.core.developer import Developer +from alphaagent.core.experiment import ASpecificExp, Experiment +from alphaagent.oai.llm_utils import md5_hash + + +class CachedRunner(Developer[ASpecificExp]): + def get_cache_key(self, exp: Experiment, **kwargs) -> str: + all_tasks = [] + for based_exp in exp.based_experiments: + all_tasks.extend(based_exp.sub_tasks) + all_tasks.extend(exp.sub_tasks) + task_info_list = [task.get_task_information() for task in all_tasks] + task_info_str = "\n".join(task_info_list) + return md5_hash(task_info_str) + + def assign_cached_result(self, exp: Experiment, cached_res: Experiment) -> Experiment: + if exp.based_experiments and exp.based_experiments[-1].result is None: + exp.based_experiments[-1].result = cached_res.based_experiments[-1].result + exp.result = cached_res.result + return exp diff --git a/alphaagent/components/workflow/alphaagent_loop.py b/alphaagent/components/workflow/alphaagent_loop.py new file mode 100755 index 00000000..156d3043 --- /dev/null +++ b/alphaagent/components/workflow/alphaagent_loop.py @@ -0,0 +1,229 @@ +""" +Model workflow with session control +It is from `rdagent/app/qlib_rd_loop/model.py` and try to replace `rdagent/app/qlib_rd_loop/RDAgent.py` +""" + +import time +import pandas as pd +from typing import Any + +from alphaagent.components.workflow.conf import BaseFacSetting +from alphaagent.core.developer import Developer +from alphaagent.core.proposal import ( + Hypothesis2Experiment, + HypothesisExperiment2Feedback, + HypothesisGen, + Trace, +) +from alphaagent.core.scenario import Scenario +from alphaagent.core.utils import import_class +from alphaagent.log import logger +from alphaagent.log.time import measure_time +from alphaagent.utils.workflow import LoopBase, LoopMeta +from alphaagent.core.exception import FactorEmptyError +import threading + + +import datetime +import pickle +from collections import defaultdict +from dataclasses import dataclass, field +from pathlib import Path +from typing import Callable + +from tqdm.auto import tqdm + +from alphaagent.core.exception import CoderError +from alphaagent.log import logger +from functools import wraps + +# 定义装饰器:在函数调用前检查stop_event + + +def stop_event_check(func): + @wraps(func) + def wrapper(self, *args, **kwargs): + if STOP_EVENT is not None and STOP_EVENT.is_set(): + # 当收到停止信号时,可以直接抛出异常或返回特定值,这里示例抛出异常 + raise Exception("Operation stopped due to stop_event flag.") + return func(self, *args, **kwargs) + return wrapper + + +class AlphaAgentLoop(LoopBase, metaclass=LoopMeta): + skip_loop_error = (FactorEmptyError,) + + @measure_time + def __init__(self, PROP_SETTING: BaseFacSetting, potential_direction, stop_event: threading.Event, use_local: bool = True): + with logger.tag("init"): + self.use_local = use_local + logger.info(f"初始化AlphaAgentLoop,使用{'本地环境' if use_local else 'Docker容器'}回测") + scen: Scenario = import_class(PROP_SETTING.scen)(use_local=use_local) + logger.log_object(scen, tag="scenario") + + ### 换成基于初始hypo的,生成完整的hypo + self.hypothesis_generator: HypothesisGen = import_class(PROP_SETTING.hypothesis_gen)(scen, potential_direction) + logger.log_object(self.hypothesis_generator, tag="hypothesis generator") + + ### 换成一次生成10个因子 + self.factor_constructor: Hypothesis2Experiment = import_class(PROP_SETTING.hypothesis2experiment)() + logger.log_object(self.factor_constructor, tag="experiment generation") + + ### 加入代码执行中的 Variables / Functions + self.coder: Developer = import_class(PROP_SETTING.coder)(scen) + logger.log_object(self.coder, tag="coder") + + self.runner: Developer = import_class(PROP_SETTING.runner)(scen) + logger.log_object(self.runner, tag="runner") + + self.summarizer: HypothesisExperiment2Feedback = import_class(PROP_SETTING.summarizer)(scen) + logger.log_object(self.summarizer, tag="summarizer") + self.trace = Trace(scen=scen) + + global STOP_EVENT + STOP_EVENT = stop_event + super().__init__() + + @classmethod + def load(cls, path, use_local: bool = True): + """加载现有会话""" + instance = super().load(path) + instance.use_local = use_local + logger.info(f"加载AlphaAgentLoop,使用{'本地环境' if use_local else 'Docker容器'}回测") + return instance + + @measure_time + @stop_event_check + def factor_propose(self, prev_out: dict[str, Any]): + """ + 提出作为构建因子的基础的假设 + """ + with logger.tag("r"): + idea = self.hypothesis_generator.gen(self.trace) + logger.log_object(idea, tag="hypothesis generation") + return idea + + @measure_time + @stop_event_check + def factor_construct(self, prev_out: dict[str, Any]): + """ + 基于假设构造多个不同的因子 + """ + with logger.tag("r"): + factor = self.factor_constructor.convert(prev_out["factor_propose"], self.trace) + logger.log_object(factor.sub_tasks, tag="experiment generation") + return factor + + @measure_time + @stop_event_check + def factor_calculate(self, prev_out: dict[str, Any]): + """ + 根据因子表达式计算过去的因子表(因子值) + """ + with logger.tag("d"): # develop + factor = self.coder.develop(prev_out["factor_construct"]) + logger.log_object(factor.sub_workspace_list, tag="coder result") + return factor + + + @measure_time + @stop_event_check + def factor_backtest(self, prev_out: dict[str, Any]): + """ + 回测因子 + """ + with logger.tag("ef"): # evaluate and feedback + logger.info(f"Start factor backtest (Local: {self.use_local})") + exp = self.runner.develop(prev_out["factor_calculate"], use_local=self.use_local) + if exp is None: + logger.error(f"Factor extraction failed.") + raise FactorEmptyError("Factor extraction failed.") + logger.log_object(exp, tag="runner result") + return exp + + @measure_time + @stop_event_check + def feedback(self, prev_out: dict[str, Any]): + feedback = self.summarizer.generate_feedback(prev_out["factor_backtest"], prev_out["factor_propose"], self.trace) + with logger.tag("ef"): # evaluate and feedback + logger.log_object(feedback, tag="feedback") + self.trace.hist.append((prev_out["factor_propose"], prev_out["factor_backtest"], feedback)) + + + + +class BacktestLoop(LoopBase, metaclass=LoopMeta): + skip_loop_error = (FactorEmptyError,) + @measure_time + def __init__(self, PROP_SETTING: BaseFacSetting, factor_path=None): + with logger.tag("init"): + + self.factor_path = factor_path + + scen: Scenario = import_class(PROP_SETTING.scen)() + logger.log_object(scen, tag="scenario") + + self.hypothesis_generator: HypothesisGen = import_class(PROP_SETTING.hypothesis_gen)(scen) + logger.log_object(self.hypothesis_generator, tag="hypothesis generator") + + self.factor_constructor: Hypothesis2Experiment = import_class(PROP_SETTING.hypothesis2experiment)(factor_path=factor_path) + logger.log_object(self.factor_constructor, tag="experiment generation") + + self.coder: Developer = import_class(PROP_SETTING.coder)(scen, with_feedback=False, with_knowledge=False, knowledge_self_gen=False) + logger.log_object(self.coder, tag="coder") + + self.runner: Developer = import_class(PROP_SETTING.runner)(scen) + logger.log_object(self.runner, tag="runner") + + self.summarizer: HypothesisExperiment2Feedback = import_class(PROP_SETTING.summarizer)(scen) + logger.log_object(self.summarizer, tag="summarizer") + self.trace = Trace(scen=scen) + super().__init__() + + def factor_propose(self, prev_out: dict[str, Any]): + """ + Market hypothesis on which factors are built + """ + with logger.tag("r"): + idea = self.hypothesis_generator.gen(self.trace) + logger.log_object(idea, tag="hypothesis generation") + return idea + + + @measure_time + def factor_construct(self, prev_out: dict[str, Any]): + """ + Construct a variety of factors that depend on the hypothesis + """ + with logger.tag("r"): + factor = self.factor_constructor.convert(prev_out["factor_propose"], self.trace) + logger.log_object(factor.sub_tasks, tag="experiment generation") + return factor + + @measure_time + def factor_calculate(self, prev_out: dict[str, Any]): + """ + Debug factors and calculate their values + """ + with logger.tag("d"): # develop + factor = self.coder.develop(prev_out["factor_construct"]) + logger.log_object(factor.sub_workspace_list, tag="coder result") + return factor + + + @measure_time + def factor_backtest(self, prev_out: dict[str, Any]): + """ + Conduct Backtesting + """ + with logger.tag("ef"): # evaluate and feedback + exp = self.runner.develop(prev_out["factor_calculate"]) + if exp is None: + logger.error(f"Factor extraction failed.") + raise FactorEmptyError("Factor extraction failed.") + logger.log_object(exp, tag="runner result") + return exp + + @measure_time + def stop(self, prev_out: dict[str, Any]): + exit(0) diff --git a/alphaagent/components/workflow/conf.py b/alphaagent/components/workflow/conf.py new file mode 100755 index 00000000..780f4f3d --- /dev/null +++ b/alphaagent/components/workflow/conf.py @@ -0,0 +1,39 @@ +from alphaagent.core.conf import ExtendedBaseSettings + + +class BasePropSetting(ExtendedBaseSettings): + """ + The common part of the config for RD Loop to propose and development + You can add following config in the subclass to distinguish the environment variables. + """ + + scen: str = "" + knowledge_base: str = "" + knowledge_base_path: str = "" + hypothesis_gen: str = "" + hypothesis2experiment: str = "" + coder: str = "" + runner: str = "" + summarizer: str = "" + + evolving_n: int = 10 + + +class BaseFacSetting(ExtendedBaseSettings): + """ + The common part of the config for Alpha Agent Loop to propose and development + You can add following config in the subclass to distinguish the environment variables. + """ + + scen: str = "" + knowledge_base: str = "" + knowledge_base_path: str = "" + hypothesis_gen: str = "" + construction: str = "" + calculation: str = "" + + coder: str = "" + runner: str = "" + summarizer: str = "" + + evolving_n: int = 10 diff --git a/alphaagent/components/workflow/rd_loop.py b/alphaagent/components/workflow/rd_loop.py new file mode 100755 index 00000000..0e5cd552 --- /dev/null +++ b/alphaagent/components/workflow/rd_loop.py @@ -0,0 +1,79 @@ +""" +Model workflow with session control +It is from `rdagent/app/qlib_rd_loop/model.py` and try to replace `rdagent/app/qlib_rd_loop/RDAgent.py` +""" + +import time +from typing import Any + +from alphaagent.components.workflow.conf import BasePropSetting +from alphaagent.core.developer import Developer +from alphaagent.core.proposal import ( + Hypothesis2Experiment, + HypothesisExperiment2Feedback, + HypothesisGen, + Trace, +) +from alphaagent.core.scenario import Scenario +from alphaagent.core.utils import import_class +from alphaagent.log import logger +from alphaagent.log.time import measure_time +from alphaagent.utils.workflow import LoopBase, LoopMeta + + +class RDLoop(LoopBase, metaclass=LoopMeta): + @measure_time + def __init__(self, PROP_SETTING: BasePropSetting): + with logger.tag("init"): + scen: Scenario = import_class(PROP_SETTING.scen)() + logger.log_object(scen, tag="scenario") + + self.hypothesis_gen: HypothesisGen = import_class(PROP_SETTING.hypothesis_gen)(scen) + logger.log_object(self.hypothesis_gen, tag="hypothesis generator") + + self.hypothesis2experiment: Hypothesis2Experiment = import_class(PROP_SETTING.hypothesis2experiment)() + logger.log_object(self.hypothesis2experiment, tag="hypothesis2experiment") + self.coder: Developer = import_class(PROP_SETTING.coder)(scen) + logger.log_object(self.coder, tag="coder") + self.runner: Developer = import_class(PROP_SETTING.runner)(scen) + logger.log_object(self.runner, tag="runner") + + self.summarizer: HypothesisExperiment2Feedback = import_class(PROP_SETTING.summarizer)(scen) + logger.log_object(self.summarizer, tag="summarizer") + self.trace = Trace(scen=scen) + super().__init__() + + @measure_time + def propose(self, prev_out: dict[str, Any]): + with logger.tag("r"): # research + hypothesis = self.hypothesis_gen.gen(self.trace) + logger.log_object(hypothesis, tag="hypothesis generation") + return hypothesis + + @measure_time + def exp_gen(self, prev_out: dict[str, Any]): + with logger.tag("r"): # research + exp = self.hypothesis2experiment.convert(prev_out["propose"], self.trace) + logger.log_object(exp.sub_tasks, tag="experiment generation") + return exp + + @measure_time + def coding(self, prev_out: dict[str, Any]): + with logger.tag("d"): # develop + exp = self.coder.develop(prev_out["exp_gen"]) + logger.log_object(exp.sub_workspace_list, tag="coder result") + return exp + + @measure_time + def running(self, prev_out: dict[str, Any]): + with logger.tag("ef"): # evaluate and feedback + exp = self.runner.develop(prev_out["coding"]) + logger.log_object(exp, tag="runner result") + return exp + + @measure_time + def feedback(self, prev_out: dict[str, Any]): + feedback = self.summarizer.generate_feedback(prev_out["running"], prev_out["propose"], self.trace) + with logger.tag("ef"): # evaluate and feedback + logger.log_object(feedback, tag="feedback") + self.trace.hist.append((prev_out["propose"], prev_out["running"], feedback)) diff --git a/alphaagent/core/conf.py b/alphaagent/core/conf.py new file mode 100755 index 00000000..eddf5547 --- /dev/null +++ b/alphaagent/core/conf.py @@ -0,0 +1,86 @@ +from __future__ import annotations + +# TODO: use pydantic for other modules in Qlib +from pathlib import Path +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from pydantic.fields import FieldInfo + +from pydantic_settings import ( + BaseSettings, + EnvSettingsSource, + PydanticBaseSettingsSource, + SettingsConfigDict, +) + + +class ExtendedEnvSettingsSource(EnvSettingsSource): + def get_field_value(self, field: FieldInfo, field_name: str) -> tuple[Any, str, bool]: + # Dynamically gather prefixes from the current and parent classes + prefixes = [self.config.get("env_prefix", "")] + if hasattr(self.settings_cls, "__bases__"): + for base in self.settings_cls.__bases__: + if hasattr(base, "model_config"): + parent_prefix = base.model_config.get("env_prefix") + if parent_prefix and parent_prefix not in prefixes: + prefixes.append(parent_prefix) + for prefix in prefixes: + self.env_prefix = prefix + env_val, field_key, value_is_complex = super().get_field_value(field, field_name) + if env_val is not None: + return env_val, field_key, value_is_complex + + return super().get_field_value(field, field_name) + + +class ExtendedSettingsConfigDict(SettingsConfigDict, total=False): ... + + +class ExtendedBaseSettings(BaseSettings): + + @classmethod + def settings_customise_sources( + cls, + settings_cls: type[BaseSettings], + init_settings: PydanticBaseSettingsSource, # noqa + env_settings: PydanticBaseSettingsSource, # noqa + dotenv_settings: PydanticBaseSettingsSource, # noqa + file_secret_settings: PydanticBaseSettingsSource, # noqa + ) -> tuple[PydanticBaseSettingsSource, ...]: + return (ExtendedEnvSettingsSource(settings_cls),) + + +class RDAgentSettings(ExtendedBaseSettings): + # TODO: (xiao) I think LLMSetting may be a better name. + # TODO: (xiao) I think most of the config should be in oai.config + # Log configs + # TODO: (xiao) think it can be a separate config. + log_trace_path: str | None = None + + # azure document intelligence configs + azure_document_intelligence_key: str = "" + azure_document_intelligence_endpoint: str = "" + # factor extraction conf + max_input_duplicate_factor_group: int = 300 + max_output_duplicate_factor_group: int = 20 + max_kmeans_group_number: int = 40 + + # workspace conf + workspace_path: Path = Path.cwd() / "git_ignore_folder" / "RD-Agent_workspace" + + # multi processing conf + multi_proc_n: int = 1 + + # pickle cache conf + cache_with_pickle: bool = True # whether to use pickle cache + pickle_cache_folder_path_str: str = str( + Path.cwd() / "pickle_cache/", + ) # the path of the folder to store the pickle cache + use_file_lock: bool = ( + True # when calling the function with same parameters, whether to use file lock to avoid + # executing the function multiple times + ) + + +RD_AGENT_SETTINGS = RDAgentSettings() diff --git a/alphaagent/core/developer.py b/alphaagent/core/developer.py new file mode 100755 index 00000000..32809712 --- /dev/null +++ b/alphaagent/core/developer.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Generic + +from alphaagent.core.experiment import ASpecificExp + +if TYPE_CHECKING: + from alphaagent.core.scenario import Scenario + + +class Developer(ABC, Generic[ASpecificExp]): + def __init__(self, scen: Scenario) -> None: + self.scen: Scenario = scen + + @abstractmethod + def develop(self, exp: ASpecificExp) -> ASpecificExp: + """ + Task Generator should take in an experiment. + + Because the schedule of different tasks is crucial for the final performance + due to it affects the learning process. + + """ + error_message = "generate method is not implemented." + raise NotImplementedError(error_message) diff --git a/alphaagent/core/evaluation.py b/alphaagent/core/evaluation.py new file mode 100755 index 00000000..a2e81a22 --- /dev/null +++ b/alphaagent/core/evaluation.py @@ -0,0 +1,26 @@ +from abc import ABC, abstractmethod + +from alphaagent.core.experiment import Task, Workspace +from alphaagent.core.scenario import Scenario + + +class Feedback: + pass + + +class Evaluator(ABC): + def __init__( + self, + scen: Scenario, + ) -> None: + self.scen = scen + + @abstractmethod + def evaluate( + self, + target_task: Task, + implementation: Workspace, + gt_implementation: Workspace, + **kwargs: object, + ) -> None: + raise NotImplementedError diff --git a/alphaagent/core/evolving_agent.py b/alphaagent/core/evolving_agent.py new file mode 100755 index 00000000..b6a5cd53 --- /dev/null +++ b/alphaagent/core/evolving_agent.py @@ -0,0 +1,131 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Any + +from tqdm import tqdm + +if TYPE_CHECKING: + from alphaagent.core.evaluation import Evaluator + from alphaagent.core.evolving_framework import EvolvableSubjects + +from alphaagent.core.evaluation import Feedback +from alphaagent.core.evolving_framework import EvolvingStrategy, EvoStep +from alphaagent.log import logger + + +class EvoAgent(ABC): + def __init__(self, max_loop: int, evolving_strategy: EvolvingStrategy) -> None: + self.max_loop = max_loop + self.evolving_strategy = evolving_strategy + + @abstractmethod + def multistep_evolve( + self, + evo: EvolvableSubjects, + eva: Evaluator | Feedback, + filter_final_evo: bool = False, + ) -> EvolvableSubjects: ... + + @abstractmethod + def filter_evolvable_subjects_by_feedback( + self, + evo: EvolvableSubjects, + feedback: Feedback | None, + ) -> EvolvableSubjects: ... + + +class RAGEvoAgent(EvoAgent): + def __init__( + self, + max_loop: int, + evolving_strategy: EvolvingStrategy, + rag: Any, + with_knowledge: bool = False, + with_feedback: bool = True, + knowledge_self_gen: bool = False, + ) -> None: + super().__init__(max_loop, evolving_strategy) + self.rag = rag + self.evolving_trace: list[EvoStep] = [] + self.with_knowledge = with_knowledge + self.with_feedback = with_feedback + self.knowledge_self_gen = knowledge_self_gen + + def multistep_evolve( + self, + evo: EvolvableSubjects, + eva: Evaluator | Feedback, + filter_final_evo: bool = False, + ) -> EvolvableSubjects: + """多步进化方法,实现了完整的进化循环流程 + + Args: + evo (EvolvableSubjects): 可进化的主体对象 + eva (Evaluator | Feedback): 评估器或反馈对象 + filter_final_evo (bool, optional): 是否在最终结果中过滤进化主体. Defaults to False. + + Returns: + EvolvableSubjects: 进化后的主体对象 + + 进化流程包含以下步骤: + 1. 知识自进化:如果启用,根据进化轨迹生成新知识 + 2. RAG查询:如果启用,使用RAG检索相关知识 + 3. 进化:使用进化策略对主体进行进化 + 4. 打包进化结果:将进化结果和查询到的知识打包 + 5. 评估:如果启用反馈,对进化结果进行评估 + 6. 更新轨迹:将本次进化步骤添加到进化轨迹中 + """ + for _ in tqdm(range(self.max_loop), "Debugging"): + # 1. 知识自进化 - 如果启用了知识自生成且RAG可用,根据进化轨迹生成新知识 + if self.knowledge_self_gen and self.rag is not None: + self.rag.generate_knowledge(self.evolving_trace) + + # 2. RAG查询 - 如果启用了知识检索且RAG可用,查询相关知识 + queried_knowledge = None + if self.with_knowledge and self.rag is not None: + # TODO: 将进化轨迹放在这里实际上并不起作用 + queried_knowledge = self.rag.query(evo, self.evolving_trace) + + # 3. 进化 - 使用进化策略对主体进行进化 + evo = self.evolving_strategy.evolve( + evo=evo, + evolving_trace=self.evolving_trace, + queried_knowledge=queried_knowledge, + ) + + # 记录进化后的代码工作区 + # TODO: 由于设计问题,我们选择忽略这个mypy错误 + logger.log_object(evo.sub_workspace_list, tag="evolving code") # type: ignore[attr-defined] + for sw in evo.sub_workspace_list: # type: ignore[attr-defined] + logger.info(f"evolving code workspace: {sw}") + + # 4. 打包进化结果 - 将进化结果和查询到的知识打包成进化步骤 + es = EvoStep(evo, queried_knowledge) + + # 5. 评估 - 如果启用了反馈,对进化结果进行评估 + if self.with_feedback: + es.feedback = ( + # TODO: 由于rdagent.core.evaluation.Evaluator的不规则设计, + # 这里未能通过mypy的测试,暂时忽略这个错误 + eva + if isinstance(eva, Feedback) + else eva.evaluate(evo, queried_knowledge=queried_knowledge) # type: ignore[arg-type, call-arg] + ) + logger.log_object(es.feedback, tag="evolving feedback") + + # 6. 更新轨迹 - 将本次进化步骤添加到进化轨迹中 + self.evolving_trace.append(es) + + # 如果启用了反馈且需要过滤,根据最后一次反馈过滤进化主体 + if self.with_feedback and filter_final_evo: + evo = self.filter_evolvable_subjects_by_feedback(evo, self.evolving_trace[-1].feedback) + return evo + + def filter_evolvable_subjects_by_feedback( + self, + evo: EvolvableSubjects, + feedback: Feedback | None, + ) -> EvolvableSubjects: + # Implementation of filter_evolvable_subjects_by_feedback method + pass diff --git a/alphaagent/core/evolving_framework.py b/alphaagent/core/evolving_framework.py new file mode 100755 index 00000000..0393aeca --- /dev/null +++ b/alphaagent/core/evolving_framework.py @@ -0,0 +1,106 @@ +from __future__ import annotations + +import copy +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any + +from alphaagent.core.knowledge_base import KnowledgeBase + +if TYPE_CHECKING: + from alphaagent.core.evaluation import Feedback + from alphaagent.core.scenario import Scenario + + +class Knowledge: + pass + + +class QueriedKnowledge: + pass + + +class EvolvingKnowledgeBase(KnowledgeBase): + @abstractmethod + def query( + self, + ) -> QueriedKnowledge | None: + raise NotImplementedError + + +class EvolvableSubjects: + """The target object to be evolved""" + + def clone(self) -> EvolvableSubjects: + return copy.deepcopy(self) + + +class QlibEvolvableSubjects(EvolvableSubjects): ... + + +@dataclass +class EvoStep: + """At a specific step, + based on + - previous trace + - newly RAG knowledge `QueriedKnowledge` + + the EvolvableSubjects is evolved to a new one `EvolvableSubjects`. + + (optional) After evaluation, we get feedback `feedback`. + """ + + evolvable_subjects: EvolvableSubjects + queried_knowledge: QueriedKnowledge | None = None + feedback: Feedback | None = None + + +class EvolvingStrategy(ABC): + def __init__(self, scen: Scenario) -> None: + self.scen = scen + + @abstractmethod + def evolve( + self, + *evo: EvolvableSubjects, + evolving_trace: list[EvoStep] | None = None, + queried_knowledge: QueriedKnowledge | None = None, + **kwargs: Any, + ) -> EvolvableSubjects: + """The evolving trace is a list of (evolvable_subjects, feedback) ordered + according to the time. + + The reason why the parameter is important for the evolving. + - evolving_trace: the historical feedback is important. + - queried_knowledge: queried knowledge + """ + + +class RAGStrategy(ABC): + """Retrieval Augmentation Generation Strategy""" + + def __init__(self, knowledgebase: EvolvingKnowledgeBase) -> None: + self.knowledgebase: EvolvingKnowledgeBase = knowledgebase + + @abstractmethod + def query( + self, + evo: EvolvableSubjects, + evolving_trace: list[EvoStep], + **kwargs: Any, + ) -> QueriedKnowledge | None: + pass + + @abstractmethod + def generate_knowledge( + self, + evolving_trace: list[EvoStep], + *, + return_knowledge: bool = False, + **kwargs: Any, + ) -> Knowledge | None: + """Generating new knowledge based on the evolving trace. + - It is encouraged to query related knowledge before generating new knowledge. + + RAGStrategy should maintain the new knowledge all by itself. + """ diff --git a/alphaagent/core/exception.py b/alphaagent/core/exception.py new file mode 100755 index 00000000..2167ab9d --- /dev/null +++ b/alphaagent/core/exception.py @@ -0,0 +1,50 @@ +class CoderError(Exception): + """ + Exceptions raised when Implementing and running code. + - start: FactorTask => FactorGenerator + - end: Get dataframe after execution + + The more detailed evaluation in dataframe values are managed by the evaluator. + """ + + +class CodeFormatError(CoderError): + """ + The generated code is not found due format error. + """ + + +class CustomRuntimeError(CoderError): + """ + The generated code fail to execute the script. + """ + + +class NoOutputError(CoderError): + """ + The code fail to generate output file. + """ + + +class CustomRunnerError(Exception): + """ + Exceptions raised when running the code output. + """ + + +class FactorEmptyError(Exception): + """ + Exceptions raised when no factor is generated correctly + """ + + +class ModelEmptyError(Exception): + """ + Exceptions raised when no model is generated correctly + """ + + +class KaggleError(Exception): + """ + Exceptions raised when calling Kaggle API + """ diff --git a/alphaagent/core/experiment.py b/alphaagent/core/experiment.py new file mode 100755 index 00000000..a3c5ffc9 --- /dev/null +++ b/alphaagent/core/experiment.py @@ -0,0 +1,226 @@ +from __future__ import annotations + +import os +import platform +import shutil +import uuid +from abc import ABC, abstractmethod +from collections.abc import Sequence +from copy import deepcopy +from pathlib import Path +from typing import Any, Generic, TypeVar + +from alphaagent.core.conf import RD_AGENT_SETTINGS + +""" +This file contains the all the class about organizing the task in RD-Agent. +""" + + +class Task(ABC): + def __init__(self, name: str, version: int = 1) -> None: + """ + The version of the task, default is 1 + Because qlib tasks execution and kaggle tasks execution are different, we need to distinguish them. + TODO: We may align them in the future. + """ + self.version = version + self.name = name + + @abstractmethod + def get_task_information(self) -> str: + """ + Get the task information string to build the unique key + """ + + +ASpecificTask = TypeVar("ASpecificTask", bound=Task) + + +class Workspace(ABC, Generic[ASpecificTask]): + """ + A workspace is a place to store the task implementation. It evolves as the developer implements the task. + To get a snapshot of the workspace, make sure call `copy` to get a copy of the workspace. + """ + + def __init__(self, target_task: ASpecificTask | None = None) -> None: + self.target_task: ASpecificTask | None = target_task + + @abstractmethod + def execute(self, *args: Any, **kwargs: Any) -> object | None: + error_message = "execute method is not implemented." + raise NotImplementedError(error_message) + + @abstractmethod + def copy(self) -> Workspace: + error_message = "copy method is not implemented." + raise NotImplementedError(error_message) + + +ASpecificWS = TypeVar("ASpecificWS", bound=Workspace) + + +class WsLoader(ABC, Generic[ASpecificTask, ASpecificWS]): + @abstractmethod + def load(self, task: ASpecificTask) -> ASpecificWS: + error_message = "load method is not implemented." + raise NotImplementedError(error_message) + + +class FBWorkspace(Workspace): + """ + File-based task workspace + + The implemented task will be a folder which contains related elements. + - Data + - Code Workspace + - Output + - After execution, it will generate the final output as file. + + A typical way to run the pipeline of FBWorkspace will be: + (We didn't add it as a method due to that we may pass arguments into + `prepare` or `execute` based on our requirements.) + + .. code-block:: python + + def run_pipeline(self, **files: str): + self.prepare() + self.inject_code(**files) + self.execute() + + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.code_dict: dict[str, Any] = {} + self.code_dict = ( + {} + ) # The code injected into the folder, store them in the variable to reproduce the former result + self.workspace_path: Path = RD_AGENT_SETTINGS.workspace_path / uuid.uuid4().hex + + @property + def code(self) -> str: + code_string = "" + for file_name, code in self.code_dict.items(): + code_string += f"File: {file_name}\n{code}\n" + return code_string + + def prepare(self) -> None: + """ + Prepare the workspace except the injected code + - Data + - Documentation + typical usage of `*args, **kwargs`: + Different methods shares the same data. The data are passed by the arguments. + """ + self.workspace_path.mkdir(parents=True, exist_ok=True) + + @staticmethod + def link_all_files_in_folder_to_workspace(data_path: Path, workspace_path: Path) -> None: + data_path = Path(data_path).absolute() # in case of relative path that will be invalid when we change cwd. + workspace_path = Path(workspace_path) + for data_file_path in data_path.iterdir(): + workspace_data_file_path = workspace_path / data_file_path.name + if workspace_data_file_path.exists(): + workspace_data_file_path.unlink() + if platform.system() == "Linux": + os.symlink(data_file_path, workspace_data_file_path) + if platform.system() == "Windows": + os.link(data_file_path, workspace_data_file_path) + + def inject_code(self, **files: str) -> None: + """ + Inject the code into the folder. + { + : + } + """ + self.prepare() + for k, v in files.items(): + self.code_dict[k] = v + target_file_path = self.workspace_path / k + if not target_file_path.parent.exists(): + target_file_path.parent.mkdir(parents=True, exist_ok=True) + with Path.open(self.workspace_path / k, "w") as f: + f.write(v) + + def get_files(self) -> list[Path]: + """ + Get the environment description. + + To be general, we only return a list of filenames. + How to summarize the environment is the responsibility of the Developer. + """ + return list(self.workspace_path.iterdir()) + + def inject_code_from_folder(self, folder_path: Path) -> None: + """ + Load the workspace from the folder + """ + for file_path in folder_path.rglob("*"): + if file_path.suffix in (".py", ".yaml", ".md"): + relative_path = file_path.relative_to(folder_path) + self.inject_code(**{str(relative_path): file_path.read_text()}) + + def copy(self) -> FBWorkspace: + """ + copy the workspace from the original one + """ + return deepcopy(self) + + def clear(self) -> None: + """ + Clear the workspace + """ + shutil.rmtree(self.workspace_path, ignore_errors=True) + self.code_dict = {} + + def execute(self) -> object | None: + """ + Before each execution, make sure to prepare and inject code + """ + self.prepare() + self.inject_code(**self.code_dict) + return None + + def __str__(self) -> str: + return f"Workspace[{self.workspace_path=}" + ( + "]" if self.target_task is None else f",{self.target_task.name=}]" + ) + + +ASpecificWSForExperiment = TypeVar("ASpecificWSForExperiment", bound=Workspace) +ASpecificWSForSubTasks = TypeVar("ASpecificWSForSubTasks", bound=Workspace) + + +class Experiment( + ABC, + Generic[ASpecificTask, ASpecificWSForExperiment, ASpecificWSForSubTasks], +): + """ + The experiment is a sequence of tasks and the implementations of the tasks after generated by the Developer. + """ + + def __init__( + self, + sub_tasks: Sequence[ASpecificTask], + based_experiments: Sequence[ASpecificWSForExperiment] = [], + ) -> None: + self.sub_tasks: Sequence[ASpecificTask] = sub_tasks + self.sub_workspace_list: list[ASpecificWSForSubTasks | None] = [None] * len(self.sub_tasks) + self.based_experiments: Sequence[ASpecificWSForExperiment] = based_experiments + self.result: object = None # The result of the experiment, can be different types in different scenarios. + self.sub_results: dict[str, float] = {} + self.experiment_workspace: ASpecificWSForExperiment | None = None + + +ASpecificExp = TypeVar("ASpecificExp", bound=Experiment) + +TaskOrExperiment = TypeVar("TaskOrExperiment", Task, Experiment) + + +class Loader(ABC, Generic[TaskOrExperiment]): + @abstractmethod + def load(self, *args: Any, **kwargs: Any) -> TaskOrExperiment: + err_msg = "load method is not implemented." + raise NotImplementedError(err_msg) diff --git a/alphaagent/core/knowledge_base.py b/alphaagent/core/knowledge_base.py new file mode 100755 index 00000000..acb18477 --- /dev/null +++ b/alphaagent/core/knowledge_base.py @@ -0,0 +1,27 @@ +from pathlib import Path + +import dill as pickle # type: ignore[import-untyped] + +from alphaagent.log import logger + + +class KnowledgeBase: + def __init__(self, path: str | Path | None = None) -> None: + self.path = Path(path) if path else None + self.load() + + def load(self) -> None: + if self.path is not None and self.path.exists(): + with self.path.open("rb") as f: + loaded = pickle.load(f) + if isinstance(loaded, dict): + self.__dict__.update({k: v for k, v in loaded.items() if k != "path"}) + else: + self.__dict__.update({k: v for k, v in loaded.__dict__.items() if k != "path"}) + + def dump(self) -> None: + if self.path is not None: + self.path.parent.mkdir(parents=True, exist_ok=True) + pickle.dump(self.__dict__, self.path.open("wb")) + else: + logger.warning("KnowledgeBase path is not set, dump failed.") diff --git a/alphaagent/core/prompts.py b/alphaagent/core/prompts.py new file mode 100755 index 00000000..be6dcccb --- /dev/null +++ b/alphaagent/core/prompts.py @@ -0,0 +1,19 @@ +from pathlib import Path + +import yaml + +from alphaagent.core.utils import SingletonBaseClass + + +class Prompts(SingletonBaseClass, dict[str, str]): + def __init__(self, file_path: Path) -> None: + super().__init__() + with file_path.open(encoding="utf8") as file: + prompt_yaml_dict = yaml.safe_load(file) + + if prompt_yaml_dict is None: + error_message = f"Failed to load prompts from {file_path}" + raise ValueError(error_message) + + for key, value in prompt_yaml_dict.items(): + self[key] = value diff --git a/alphaagent/core/proposal.py b/alphaagent/core/proposal.py new file mode 100755 index 00000000..23cb3048 --- /dev/null +++ b/alphaagent/core/proposal.py @@ -0,0 +1,159 @@ +""" + +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Generic, TypeVar + +from alphaagent.core.evaluation import Feedback +from alphaagent.core.experiment import ASpecificExp, Experiment +from alphaagent.core.knowledge_base import KnowledgeBase +from alphaagent.core.scenario import Scenario + +if TYPE_CHECKING: + from alphaagent.core.prompts import Prompts + +# class data_ana: XXX + + +class Hypothesis: + """ + TODO: We may have better name for it. + + Name Candidates: + - Belief + """ + + def __init__( + self, + hypothesis: str, + reason: str, + concise_reason: str, + concise_observation: str, + concise_justification: str, + concise_knowledge: str, + ) -> None: + self.hypothesis: str = hypothesis + self.reason: str = reason + self.concise_reason: str = concise_reason + self.concise_observation: str = concise_observation + self.concise_justification: str = concise_justification + self.concise_knowledge: str = concise_knowledge + + def __str__(self) -> str: + return f"""Hypothesis: {self.hypothesis} + Reason: {self.reason} + Concise Reason & Knowledge: {self.concise_reason} + Concise Observation: {self.concise_observation} + Concise Justification: {self.concise_justification} + Concise Knowledge: {self.concise_knowledge} + """ + + # source: data_ana | model_nan = None + + +# Origin(path of repo/data/feedback) => view/summarization => generated Hypothesis + + +class HypothesisFeedback(Feedback): + def __init__( + self, + observations: str, + hypothesis_evaluation: str, + new_hypothesis: str, + reason: str, + decision: bool, + ) -> None: + self.observations = observations + self.hypothesis_evaluation = hypothesis_evaluation + self.new_hypothesis = new_hypothesis + self.reason = reason + self.decision = decision + + def __bool__(self) -> bool: + return self.decision + + def __str__(self) -> str: + return f"""Observations: {self.observations} +Hypothesis Evaluation: {self.hypothesis_evaluation} +New Hypothesis: {self.new_hypothesis} +Decision: {self.decision} +Reason: {self.reason}""" + + +ASpecificScen = TypeVar("ASpecificScen", bound=Scenario) +ASpecificKB = TypeVar("ASpecificKB", bound=KnowledgeBase) + + +class Trace(Generic[ASpecificScen, ASpecificKB]): + def __init__(self, scen: ASpecificScen, knowledge_base: ASpecificKB | None = None) -> None: + self.scen: ASpecificScen = scen + self.hist: list[tuple[Hypothesis, Experiment, HypothesisFeedback]] = [] + self.knowledge_base: ASpecificKB | None = knowledge_base + + def get_sota_hypothesis_and_experiment(self) -> tuple[Hypothesis | None, Experiment | None]: + """Access the last experiment result, sub-task, and the corresponding hypothesis.""" + # TODO: The return value does not align with the signature. + for hypothesis, experiment, feedback in self.hist[::-1]: + if feedback.decision: + return hypothesis, experiment + + return None, None + + +class HypothesisGen(ABC): + # NOTE: the design is a little wierd + # - Sometimes we want accurate access the prompts in a specific level + # - It renders the prompt to a specific abstract level + # - Sometimes we want to access the most recent level prompts + prompts: Prompts # this is a class level prompt. + + def __init__(self, scen: Scenario) -> None: + self.scen = scen + + @abstractmethod + def gen(self, trace: Trace) -> Hypothesis: + # def gen(self, scenario_desc: str, ) -> Hypothesis: + """ + Motivation of the variable `scenario_desc`: + - Mocking a data-scientist is observing the scenario. + + scenario_desc may include: + - data observation: + - Original or derivative + - Task information: + """ + + +class Hypothesis2Experiment(ABC, Generic[ASpecificExp]): + """ + [Abstract description => concrete description] => Code implementation Card + """ + + @abstractmethod + def convert(self, hypothesis: Hypothesis, trace: Trace) -> ASpecificExp: + """Connect the idea proposal to implementation""" + ... + + +# Boolean, Reason, Confidence, etc. + + +class HypothesisExperiment2Feedback(ABC): + """ "Generated feedbacks on the hypothesis from **Executed** Implementations of different tasks + & their comparisons with previous performances""" + + def __init__(self, scen: Scenario) -> None: + self.scen = scen + + @abstractmethod + def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trace) -> HypothesisFeedback: + """ + The `exp` should be executed and the results should be included, as well as the comparison + between previous results (done by LLM). + For example: `mlflow` of Qlib will be included. + """ + error_message = "generate_feedback method is not implemented." + raise NotImplementedError(error_message) diff --git a/alphaagent/core/scenario.py b/alphaagent/core/scenario.py new file mode 100755 index 00000000..06a85a42 --- /dev/null +++ b/alphaagent/core/scenario.py @@ -0,0 +1,64 @@ +from abc import ABC, abstractmethod + +from alphaagent.core.experiment import Task + + +class Scenario(ABC): + @property + @abstractmethod + def background(self) -> str: + """Background information""" + + # TODO: We have to change all the sub classes to override get_source_data_desc instead of `source_data` + def get_source_data_desc(self, task: Task | None = None) -> str: # noqa: ARG002 + """ + Source data description + + The choice of data may vary based on the specific task at hand. + """ + return "" + + @property + def source_data(self) -> str: + """ + A convenient shortcut for describing source data + """ + return self.get_source_data_desc() + + @property + @abstractmethod + def interface(self) -> str: + """Interface description about how to run the code""" + + @property + @abstractmethod + def output_format(self) -> str: + """Output format description""" + + @property + @abstractmethod + def simulator(self) -> str: + """Simulator description""" + + @property + @abstractmethod + def rich_style_description(self) -> str: + """Rich style description to present""" + + @abstractmethod + def get_scenario_all_desc( + self, + task: Task | None = None, + filtered_tag: str | None = None, + simple_background: bool | None = None, + ) -> str: + """ + Combine all descriptions together + + The scenario description varies based on the task being performed. + """ + + @property + def experiment_setting(self) -> str | None: + """Get experiment setting and return as rich text string""" + return None diff --git a/alphaagent/core/template.py b/alphaagent/core/template.py new file mode 100755 index 00000000..39a748ba --- /dev/null +++ b/alphaagent/core/template.py @@ -0,0 +1,25 @@ +from pathlib import Path +from typing import Any +import yaml +from jinja2 import Environment, FileSystemLoader, Template + + +class CodeTemplate: + def __init__(self, template_path: Path): + """ + Initialize the CodeTemplate with a path to the template file. + + :param template_path: Path to the Jinja2 template file. + """ + self.template_path = template_path + self.env = Environment(loader=FileSystemLoader(template_path.parent)) + self.template = self.env.get_template(template_path.name) + + def render(self, **kwargs: Any) -> str: + """ + Render the template with the provided context. + + :param kwargs: Context variables to be used in the template. + :return: Rendered template as a string. + """ + return self.template.render(**kwargs) \ No newline at end of file diff --git a/alphaagent/core/utils.py b/alphaagent/core/utils.py new file mode 100755 index 00000000..0593e0ce --- /dev/null +++ b/alphaagent/core/utils.py @@ -0,0 +1,201 @@ +from __future__ import annotations + +import functools +import importlib +import json +import multiprocessing as mp +import pickle +import random +from collections.abc import Callable +from pathlib import Path +from typing import Any, ClassVar, NoReturn, cast + +from filelock import FileLock +from fuzzywuzzy import fuzz # type: ignore[import-untyped] + +from alphaagent.core.conf import RD_AGENT_SETTINGS +from alphaagent.oai.llm_conf import LLM_SETTINGS + + +class RDAgentException(Exception): # noqa: N818 + pass + + +class SingletonBaseClass: + """ + Because we try to support defining Singleton with `class A(SingletonBaseClass)` + instead of `A(metaclass=SingletonMeta)` this class becomes necessary. + """ + + _instance_dict: ClassVar[dict] = {} + + def __new__(cls, *args: Any, **kwargs: Any) -> Any: + # Since it's hard to align the difference call using args and kwargs, we strictly ask to use kwargs in Singleton + if args: + # TODO: this restriction can be solved. + exception_message = "Please only use kwargs in Singleton to avoid misunderstanding." + raise RDAgentException(exception_message) + class_name = [(-1, f"{cls.__module__}.{cls.__name__}")] + args_l = [(i, args[i]) for i in args] + kwargs_l = sorted(kwargs.items()) + all_args = class_name + args_l + kwargs_l + kwargs_hash = hash(tuple(all_args)) + if kwargs_hash not in cls._instance_dict: + cls._instance_dict[kwargs_hash] = super().__new__(cls) # Corrected call + return cls._instance_dict[kwargs_hash] + + def __reduce__(self) -> NoReturn: + """ + NOTE: + When loading an object from a pickle, the __new__ method does not receive the `kwargs` + it was initialized with. This makes it difficult to retrieve the correct singleton object. + Therefore, we have made it unpickable. + """ + msg = f"Instances of {self.__class__.__name__} cannot be pickled" + raise pickle.PicklingError(msg) + + +def parse_json(response: str) -> Any: + try: + return json.loads(response) + except json.decoder.JSONDecodeError: + pass + error_message = f"Failed to parse response: {response}, please report it or help us to fix it." + raise ValueError(error_message) + + +def similarity(text1: str, text2: str) -> int: + text1 = text1 if isinstance(text1, str) else "" + text2 = text2 if isinstance(text2, str) else "" + + # Maybe we can use other similarity algorithm such as tfidf + return cast(int, fuzz.ratio(text1, text2)) # mypy does not reguard it as int + + +def import_class(class_path: str) -> Any: + """ + Parameters + ---------- + class_path : str + class path like"scripts.factor_implementation.baselines.naive.one_shot.OneshotFactorGen" + + Returns + ------- + class of `class_path` + """ + module_path, class_name = class_path.rsplit(".", 1) + module = importlib.import_module(module_path) + return getattr(module, class_name) + + +class CacheSeedGen: + """ + It is a global seed generator to generate a sequence of seeds. + This will support the feature `use_auto_chat_cache_seed_gen` claim + + NOTE: + - This seed is specifically for the cache and is different from a regular seed. + - If the cache is removed, setting the same seed will not produce the same QA trace. + """ + + def __init__(self) -> None: + self.set_seed(LLM_SETTINGS.init_chat_cache_seed) + + def set_seed(self, seed: int) -> None: + random.seed(seed) + + def get_next_seed(self) -> int: + """generate next random int""" + return random.randint(0, 10000) # noqa: S311 + + +LLM_CACHE_SEED_GEN = CacheSeedGen() + + +def _subprocess_wrapper(f: Callable, seed: int, args: list) -> Any: + """ + It is a function wrapper. To ensure the subprocess has a fixed start seed. + """ + + LLM_CACHE_SEED_GEN.set_seed(seed) + return f(*args) + + +def multiprocessing_wrapper(func_calls: list[tuple[Callable, tuple]], n: int) -> list: + """It will use multiprocessing to call the functions in func_calls with the given parameters. + The results equals to `return [f(*args) for f, args in func_calls]` + It will not call multiprocessing if `n=1` + + NOTE: + We coooperate with chat_cache_seed feature + We ensure get the same seed trace even we have multiple number of seed + + Parameters + ---------- + func_calls : List[Tuple[Callable, Tuple]] + the list of functions and their parameters + n : int + the number of subprocesses + + Returns + ------- + list + + """ + if n == 1 or max(1, min(n, len(func_calls))) == 1: + return [f(*args) for f, args in func_calls] + + with mp.Pool(processes=max(1, min(n, len(func_calls)))) as pool: + results = [ + pool.apply_async(_subprocess_wrapper, args=(f, LLM_CACHE_SEED_GEN.get_next_seed(), args)) + for f, args in func_calls + ] + return [result.get() for result in results] + + +def cache_with_pickle(hash_func: Callable, post_process_func: Callable | None = None) -> Callable: + """ + This decorator will cache the return value of the function with pickle. + The cache key is generated by the hash_func. The hash function returns a string or None. + If it returns None, the cache will not be used. The cache will be stored in the folder + specified by RD_AGENT_SETTINGS.pickle_cache_folder_path_str with name hash_key.pkl. + The post_process_func will be called with the original arguments and the cached result + to give each caller a chance to process the cached result. The post_process_func should + return the final result. + """ + + def cache_decorator(func: Callable) -> Callable: + @functools.wraps(func) + def cache_wrapper(*args: Any, **kwargs: Any) -> Any: + if not RD_AGENT_SETTINGS.cache_with_pickle: + return func(*args, **kwargs) + + target_folder = Path(RD_AGENT_SETTINGS.pickle_cache_folder_path_str) / f"{func.__module__}.{func.__name__}" + target_folder.mkdir(parents=True, exist_ok=True) + hash_key = hash_func(*args, **kwargs) + + if hash_key is None: + return func(*args, **kwargs) + + cache_file = target_folder / f"{hash_key}.pkl" + lock_file = target_folder / f"{hash_key}.lock" + + if cache_file.exists(): + with cache_file.open("rb") as f: + cached_res = pickle.load(f) + return post_process_func(*args, cached_res=cached_res) if post_process_func else cached_res + + if RD_AGENT_SETTINGS.use_file_lock: + with FileLock(lock_file): + result = func(*args, **kwargs) + else: + result = func(*args, **kwargs) + + with cache_file.open("wb") as f: + pickle.dump(result, f) + + return result + + return cache_wrapper + + return cache_decorator diff --git a/alphaagent/log/__init__.py b/alphaagent/log/__init__.py new file mode 100755 index 00000000..aa55f898 --- /dev/null +++ b/alphaagent/log/__init__.py @@ -0,0 +1,4 @@ +from alphaagent.log.logger import AgentLog +from alphaagent.log.utils import LogColors + +logger: AgentLog = AgentLog() diff --git a/alphaagent/log/base.py b/alphaagent/log/base.py new file mode 100755 index 00000000..305fb7d3 --- /dev/null +++ b/alphaagent/log/base.py @@ -0,0 +1,98 @@ +from __future__ import annotations + +from abc import abstractmethod +from collections.abc import Generator +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path +from typing import Literal, Optional, Union + + +@dataclass +class Message: + """The info unit of the storage""" + + tag: str # namespace like like a.b.c + level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] # The level of the logging + timestamp: datetime # The time when the message is generated + caller: Optional[ + str + ] # The caller of the logging like `rdagent.oai.llm_utils:_create_chat_completion_inner_function:55`(file:func:line) + pid_trace: Optional[str] # The process id trace; A-B-C represents A create B, B create C + content: object # The content + + +class Storage: + """ + Basic storage to support saving objects; + + # Usage: + + The storage has mainly two kind of users: + - The logging end: you can choose any of the following method to use the object + - We can use it directly with the native logging storage + - We can use it with other logging tools; For example, serve as a handler for loggers + - The view end: + - Mainly for the subclass of `logging.base.View` + - It should provide two kind of ways to provide content + - offline content provision. + - online content preovision. + """ + + @abstractmethod + def log( + self, + obj: object, + name: str = "", + save_type: Literal["json", "text", "pkl"] = "text", + timestamp: datetime | None = None, + **kwargs: dict, + ) -> str | Path: + """ + + Parameters + ---------- + obj : object + The object for logging. + name : str + The name of the object. For example "a.b.c" + We may log a lot of objects to a same name + + Returns + ------- + str | Path + The storage identifier of the object. + """ + ... + + @abstractmethod + def iter_msg(self, watch: bool = False) -> Generator[Message, None, None]: + """ + Parameters + ---------- + watch : bool + should we watch the new content and display them + """ + ... + + +class View: + """ + Motivation: + + Display the content in the storage + """ + + # TODO: pleas fix me + @abstractmethod + def display(self, s: Storage, watch: bool = False) -> None: + """ + + Parameters + ---------- + s : Storage + + watch : bool + should we watch the new content and display them + """ + ... diff --git a/alphaagent/log/logger.py b/alphaagent/log/logger.py new file mode 100755 index 00000000..3a8ed465 --- /dev/null +++ b/alphaagent/log/logger.py @@ -0,0 +1,166 @@ +import os +import sys +from contextlib import contextmanager +from datetime import datetime, timezone +from functools import partial +from logging import LogRecord +from multiprocessing import Pipe +from multiprocessing.connection import Connection +from pathlib import Path +from typing import TYPE_CHECKING, Any, Dict, Generator, Union + +from loguru import logger + +if TYPE_CHECKING: + from loguru import Record + +from psutil import Process + +from alphaagent.core.conf import RD_AGENT_SETTINGS +from alphaagent.core.utils import SingletonBaseClass + +from .storage import FileStorage +from .utils import LogColors, get_caller_info + + +class AgentLog(SingletonBaseClass): + """ + The files are organized based on the tag & PID + Here is an example tag + + .. code-block:: + + a + - b + - c + - 123 + - common_logs.log + - 1322 + - common_logs.log + - 1233 + - .pkl + - d + - 1233-673 ... + - 1233-4563 ... + - 1233-365 ... + + """ + + # TODO: Simplify it to introduce less concepts ( We may merge RDAgentLog, Storage &) + # Solution: Storage => PipeLog, View => PipeLogView, RDAgentLog is an instance of PipeLogger + # PipeLogger.info(...) , PipeLogger.get_resp() to get feedback from frontend. + # def f(): + # logger = PipeLog() + # logger.info("") + # feedback = logger.get_reps() + _tag: str = "" + + def __init__(self, log_trace_path: Union[str, None] = RD_AGENT_SETTINGS.log_trace_path) -> None: + if log_trace_path is None: + timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d_%H-%M-%S-%f") + self.log_trace_path = Path.cwd() / "log" / timestamp + else: + self.log_trace_path = Path(log_trace_path) + + self.log_trace_path.mkdir(parents=True, exist_ok=True) + + self.storage = FileStorage(self.log_trace_path) + + self.main_pid = os.getpid() + + def set_trace_path(self, log_trace_path: str | Path) -> None: + self.log_trace_path = Path(log_trace_path) + self.storage = FileStorage(log_trace_path) + + @contextmanager + def tag(self, tag: str) -> Generator[None, None, None]: + if tag.strip() == "": + raise ValueError("Tag cannot be empty.") + if self._tag != "": + tag = "." + tag + + # TODO: It may result in error in mutithreading or co-routine + self._tag = self._tag + tag + try: + yield + finally: + self._tag = self._tag[: -len(tag)] + + def get_pids(self) -> str: + """ + Returns a string of pids from the current process to the main process. + Split by '-'. + """ + pid = os.getpid() + process = Process(pid) + pid_chain = f"{pid}" + while process.pid != self.main_pid: + parent_pid = process.ppid() + parent_process = Process(parent_pid) + pid_chain = f"{parent_pid}-{pid_chain}" + process = parent_process + return pid_chain + + def file_format(self, record: "Record", raw: bool = False) -> str: + # FIXME: the formmat is tightly coupled with the message reading in storage. + record["message"] = LogColors.remove_ansi_codes(record["message"]) + if raw: + return "{message}" + return "{time:YYYY-MM-DD HH:mm:ss.SSS} | {level: <8} | {name}:{function}:{line} - {message}\n" + + def log_object(self, obj: object, *, tag: str = "") -> None: + # TODO: I think we can merge the log_object function with other normal log methods to make the interface simpler. + caller_info = get_caller_info() + tag = f"{self._tag}.{tag}.{self.get_pids()}".strip(".") + + logp = self.storage.log(obj, name=tag, save_type="pkl") + + file_handler_id = logger.add( + self.log_trace_path / tag.replace(".", "/") / "common_logs.log", format=self.file_format + ) + logger.patch(lambda r: r.update(caller_info)).info(f"Logging object in {Path(logp).absolute()}") + logger.remove(file_handler_id) + + def info(self, msg: str, *, tag: str = "", raw: bool = False) -> None: + # TODO: too much duplicated. due to we have no logger with stream context; + caller_info = get_caller_info() + if raw: + logger.remove() + logger.add(sys.stderr, format=lambda r: "{message}") + + tag = f"{self._tag}.{tag}.{self.get_pids()}".strip(".") + log_file_path = self.log_trace_path / tag.replace(".", "/") / "common_logs.log" + if raw: + file_handler_id = logger.add(log_file_path, format=partial(self.file_format, raw=True)) + else: + file_handler_id = logger.add(log_file_path, format=self.file_format) + + logger.patch(lambda r: r.update(caller_info)).info(msg) + logger.remove(file_handler_id) + + if raw: + logger.remove() + logger.add(sys.stderr) + + def warning(self, msg: str, *, tag: str = "") -> None: + # TODO: reuse code + # _log(self, msg: str, *, tag: str = "", level=Literal["warning", "error", ..]) -> None: + # getattr(logger.patch(lambda r: r.update(caller_info)), level)(msg) + caller_info = get_caller_info() + + tag = f"{self._tag}.{tag}.{self.get_pids()}".strip(".") + file_handler_id = logger.add( + self.log_trace_path / tag.replace(".", "/") / "common_logs.log", format=self.file_format + ) + logger.patch(lambda r: r.update(caller_info)).warning(msg) + logger.remove(file_handler_id) + + def error(self, msg: str, *, tag: str = "") -> None: + caller_info = get_caller_info() + + tag = f"{self._tag}.{tag}.{self.get_pids()}".strip(".") + file_handler_id = logger.add( + self.log_trace_path / tag.replace(".", "/") / "common_logs.log", format=self.file_format + ) + logger.patch(lambda r: r.update(caller_info)).error(msg) + logger.remove(file_handler_id) diff --git a/alphaagent/log/storage.py b/alphaagent/log/storage.py new file mode 100755 index 00000000..0411a5ba --- /dev/null +++ b/alphaagent/log/storage.py @@ -0,0 +1,148 @@ +import json +import pickle +import re +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Generator, Literal, Union, cast + +from .base import Message, Storage + +LOG_LEVEL = Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] + + +class FileStorage(Storage): + """ + The info are logginged to the file systems + + TODO: describe the storage format + """ + + def __init__(self, path: str | Path = "./log/") -> None: + self.path = Path(path) + self.path.mkdir(parents=True, exist_ok=True) + + def log( + self, + obj: object, + name: str = "", + save_type: Literal["json", "text", "pkl"] = "text", + timestamp: datetime | None = None, + **kwargs: Any, + ) -> Union[str, Path]: + # TODO: We can remove the timestamp after we implement PipeLog + if timestamp is None: + timestamp = datetime.now(timezone.utc) + else: + timestamp = timestamp.astimezone(timezone.utc) + + cur_p = self.path / name.replace(".", "/") + cur_p.mkdir(parents=True, exist_ok=True) + + path = cur_p / f"{timestamp.strftime('%Y-%m-%d_%H-%M-%S-%f')}.log" + + if save_type == "json": + path = path.with_suffix(".json") + with path.open("w") as f: + try: + json.dump(obj, f) + except TypeError: + json.dump(json.loads(str(obj)), f) + return path + elif save_type == "pkl": + path = path.with_suffix(".pkl") + with path.open("wb") as f: + pickle.dump(obj, f) + return path + elif save_type == "text": + obj = str(obj) + with path.open("w") as f: + f.write(obj) + return path + + log_pattern = re.compile( + r"(?P\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}) \| " + r"(?PDEBUG|INFO|WARNING|ERROR|CRITICAL) *\| " + r"(?P.+:.+:\d+) - " + ) + + def iter_msg(self, watch: bool = False) -> Generator[Message, None, None]: + msg_l = [] + for file in self.path.glob("**/*.log"): + tag = ".".join(file.relative_to(self.path).as_posix().replace("/", ".").split(".")[:-3]) + pid = file.parent.name + + with file.open("r", encoding="utf-8") as f: + content = f.read() + + matches, next_matches = self.log_pattern.finditer(content), self.log_pattern.finditer(content) + next_match = next(next_matches, None) + # NOTE: the content will be the text between `match` and `next_match` + for match in matches: + next_match = next(next_matches, None) + + timestamp_str = match.group("timestamp") + timestamp = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S.%f").replace(tzinfo=timezone.utc) + level: LOG_LEVEL = cast(LOG_LEVEL, match.group("level")) + caller = match.group("caller") + + # Extract the message content + message_start = match.end() + message_end = next_match.start() if next_match else len(content) + message_content = content[message_start:message_end].strip() + + if "Logging object in" in message_content: + continue + + m = Message( + tag=tag, level=level, timestamp=timestamp, caller=caller, pid_trace=pid, content=message_content + ) + + msg_l.append(m) + + for file in self.path.glob("**/*.pkl"): + tag = ".".join(file.relative_to(self.path).as_posix().replace("/", ".").split(".")[:-3]) + pid = file.parent.name + + with file.open("rb") as f: + content = pickle.load(f) + + timestamp = datetime.strptime(file.stem, "%Y-%m-%d_%H-%M-%S-%f").replace(tzinfo=timezone.utc) + + m = Message(tag=tag, level="INFO", timestamp=timestamp, caller="", pid_trace=pid, content=content) + + msg_l.append(m) + + msg_l.sort(key=lambda x: x.timestamp) + for m in msg_l: + yield m + + def truncate(self, time: datetime) -> None: + # any message later than `time` will be removed + for file in self.path.glob("**/*.log"): + with file.open("r") as f: + content = f.read() + + new_content = "" + + matches, next_matches = self.log_pattern.finditer(content), self.log_pattern.finditer(content) + + next_match = next(next_matches, None) + for match in matches: + next_match = next(next_matches, None) + timestamp_str = match.group("timestamp") + timestamp = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S.%f").replace(tzinfo=timezone.utc) + + log_start = match.start() + log_end = next_match.start() if next_match else len(content) + msg = content[match.end() : log_end].strip() + + if timestamp > time: + if "Logging object in" in msg: + absolute_p = msg.split("Logging object in ")[1] + p = Path(absolute_p) + p.unlink() + continue + + new_content += content[log_start:log_end] + with file.open("w") as f: + f.write(new_content) diff --git a/alphaagent/log/time.py b/alphaagent/log/time.py new file mode 100755 index 00000000..ac8280a0 --- /dev/null +++ b/alphaagent/log/time.py @@ -0,0 +1,19 @@ +import time +from functools import wraps + +from alphaagent.log import logger + + +def measure_time(method): + @wraps(method) + def timed(*args, **kwargs): + start_time = time.time() + result = method(*args, **kwargs) + end_time = time.time() + duration = end_time - start_time + method_name = method.__name__ + # logger.log_object(f"{method_name} took {duration:.2f} sec") + logger.info(f"{method_name} took {duration:.2f} sec") + return result + + return timed diff --git a/alphaagent/log/ui/__init__.py b/alphaagent/log/ui/__init__.py new file mode 100755 index 00000000..6040d81b --- /dev/null +++ b/alphaagent/log/ui/__init__.py @@ -0,0 +1,7 @@ +""" +UI is a kind of view for user. + +We are not sure how generality of the UI, we can't make decision among following options: +- in general folder like rdagent/log/ui +- It is for specific scenario rdagent/scenarios/ +""" diff --git a/alphaagent/log/ui/app.py b/alphaagent/log/ui/app.py new file mode 100755 index 00000000..4b0a8b71 --- /dev/null +++ b/alphaagent/log/ui/app.py @@ -0,0 +1,1304 @@ +import argparse +import textwrap +from collections import defaultdict +from datetime import datetime, timezone +from importlib.resources import files as rfiles +from pathlib import Path +from typing import Callable, Type +import os +import re +import pandas as pd +import plotly.express as px +import plotly.graph_objects as go +import streamlit as st +from plotly.subplots import make_subplots +from streamlit import session_state as state +from streamlit_theme import st_theme + +from alphaagent.components.coder.factor_coder.evaluators import FactorSingleFeedback +from alphaagent.components.coder.factor_coder.factor import FactorFBWorkspace, FactorTask +from alphaagent.components.coder.model_coder.evaluators import ModelSingleFeedback +from alphaagent.components.coder.model_coder.model import ModelFBWorkspace, ModelTask +from alphaagent.core.proposal import Hypothesis, HypothesisFeedback +from alphaagent.core.scenario import Scenario +from alphaagent.log.base import Message +from alphaagent.log.storage import FileStorage +from alphaagent.log.ui.qlib_report_figure import report_figure +# from alphaagent.scenarios.data_mining.experiment.model_experiment import DMModelScenario +# from alphaagent.scenarios.general_model.scenario import GeneralModelScenario +# from alphaagent.scenarios.kaggle.experiment.scenario import KGScenario +from alphaagent.scenarios.qlib.experiment.factor_experiment import QlibFactorScenario, QlibAlphaAgentScenario +from alphaagent.scenarios.qlib.experiment.factor_from_report_experiment import ( + QlibFactorFromReportScenario, +) +from alphaagent.scenarios.qlib.experiment.model_experiment import ( + QlibModelExperiment, + QlibModelScenario, +) + +import requests +from datetime import datetime +import time + +# 设置页面配置 +st.set_page_config(layout="wide", page_title="AlphaAgent", page_icon="🎓", initial_sidebar_state="expanded") + +# 添加CSS样式 +st.markdown(""" + +""", unsafe_allow_html=True) + +# 在文件开头添加 +if '_watch' not in state: + state._watch = True + st.cache_data.clear() # 使用 st.cache_data 替代 st.experimental_memo + +# 在与state.current_task相关定义的地方附近添加自动刷新的state变量 +if "current_task" not in state: + state.current_task = None +if "api_base" not in state: + state.api_base = "http://127.0.0.1:6701" # 根据实际后端地址配置 + +# 获取log_path参数 +parser = argparse.ArgumentParser(description="AlphaAgent Streamlit App") +parser.add_argument("--log_dir", required=True, type=str, help="Path to the log directory") +parser.add_argument("--debug", action="store_true", help="Enable debug mode") +args = parser.parse_args() + +if args.log_dir: + main_log_path = Path(args.log_dir) + if not main_log_path.exists(): + st.error(f"Log dir `{main_log_path}` does not exist!") + st.stop() +else: + main_log_path = None + + +QLIB_SELECTED_METRICS = [ + "IC", + "annualized_return", + "information_ratio", + "max_drawdown", +] + +SIMILAR_SCENARIOS = (QlibAlphaAgentScenario, QlibModelScenario, QlibModelScenario, QlibFactorScenario, QlibFactorFromReportScenario) + + +def filter_log_folders(main_log_path): + """ + The webpage only displays valid folders. + If the __session__ folder exists in a subfolder of the log folder, it is considered a valid folder, + otherwise it is considered an invalid folder. + """ + folders = [ + folder.relative_to(main_log_path) + for folder in main_log_path.iterdir() + if folder.is_dir() and folder.joinpath("__session__").exists() and folder.joinpath("__session__").is_dir() + ] + # folders = sorted(folders, key=lambda x: x.name) + folders.sort(key=lambda f: os.path.getmtime(os.path.join(main_log_path, f)), reverse=True) + return folders + + +if "log_path" not in state: + if main_log_path: + folders = filter_log_folders(main_log_path) + state.log_path = folders[0] if folders else None # 自动选择第一个(最新) + else: + state.log_path = None + +if "scenario" not in state: + state.scenario = None + +if "fs" not in state: + state.fs = None + +if "msgs" not in state: + state.msgs = defaultdict(lambda: defaultdict(list)) + +if "last_msg" not in state: + state.last_msg = None + +if "current_tags" not in state: + state.current_tags = [] + +if "lround" not in state: + state.lround = 0 # RD Loop Round + +if "times" not in state: + state.times = defaultdict(lambda: defaultdict(list)) + +if "erounds" not in state: + state.erounds = defaultdict(int) # Evolving Rounds in each RD Loop + +if "e_decisions" not in state: + state.e_decisions = defaultdict(lambda: defaultdict(tuple)) + +# Summary Info +if "hypotheses" not in state: + # Hypotheses in each RD Loop + state.hypotheses = defaultdict(None) + +if "h_decisions" not in state: + state.h_decisions = defaultdict(bool) + +if "metric_series" not in state: + state.metric_series = [] + +# Factor Task Baseline +if "alpha158_metrics" not in state: + state.alpha158_metrics = None + +if "excluded_tags" not in state: + state.excluded_tags = ["llm_messages"] # 默认值 + +if "excluded_types" not in state: + state.excluded_types = ["str"] # 默认值 + +def should_display(msg: Message): + for t in state.excluded_tags: + if t in msg.tag.split("."): + return False + + if type(msg.content).__name__ in state.excluded_types: + return False + + return True + + +def get_msgs_until(end_func: Callable[[Message], bool] = lambda _: True): + if state.fs: + while True: + try: + msg = next(state.fs) + if should_display(msg): + tags = msg.tag.split(".") + if "r" not in state.current_tags and "r" in tags: + state.lround += 1 + if "evolving code" not in state.current_tags and "evolving code" in tags: + state.erounds[state.lround] += 1 + + state.current_tags = tags + state.last_msg = msg + + # Update Summary Info + if "model runner result" in tags or "factor runner result" in tags or "runner result" in tags: + # factor baseline exp metrics + if isinstance(state.scenario, QlibFactorScenario) and state.alpha158_metrics is None: + sms = msg.content.based_experiments[0].result.loc[QLIB_SELECTED_METRICS] + sms.name = "alpha158" + state.alpha158_metrics = sms + + if ( + state.lround == 1 + and len(msg.content.based_experiments) > 0 + and msg.content.based_experiments[-1].result is not None + ): + sms = msg.content.based_experiments[-1].result + if isinstance( + state.scenario, (QlibModelScenario, QlibFactorFromReportScenario, QlibFactorScenario) + ): + sms = sms.loc[QLIB_SELECTED_METRICS] + sms.name = f"Baseline" + state.metric_series.append(sms) + + # common metrics + if msg.content.result is not None: + sms = msg.content.result + if isinstance( + state.scenario, (QlibModelScenario, QlibFactorFromReportScenario, QlibFactorScenario) + ): + sms = sms.loc[QLIB_SELECTED_METRICS] + + sms.name = f"Round {state.lround}" + state.metric_series.append(sms) + elif "hypothesis generation" in tags: + state.hypotheses[state.lround] = msg.content + elif "ef" in tags and "feedback" in tags: + state.h_decisions[state.lround] = msg.content.decision + elif "d" in tags: + if "evolving code" in tags: + msg.content = [i for i in msg.content if i] + if "evolving feedback" in tags: + total_len = len(msg.content) + msg.content = [i for i in msg.content if i] + none_num = total_len - len(msg.content) + if len(msg.content) != len(state.msgs[state.lround]["d.evolving code"][-1].content): + st.toast(":red[**Evolving Feedback Length Error!**]", icon="‼️") + right_num = 0 + for wsf in msg.content: + if wsf.final_decision: + right_num += 1 + wrong_num = len(msg.content) - right_num + state.e_decisions[state.lround][state.erounds[state.lround]] = ( + right_num, + wrong_num, + none_num, + ) + + state.msgs[state.lround][msg.tag].append(msg) + + # Update Times + if "init" in tags: + state.times[state.lround]["init"].append(msg.timestamp) + if "r" in tags: + state.times[state.lround]["r"].append(msg.timestamp) + if "d" in tags: + state.times[state.lround]["d"].append(msg.timestamp) + if "ef" in tags: + state.times[state.lround]["ef"].append(msg.timestamp) + + # Stop Getting Logs + if end_func(msg): + break + except StopIteration: + st.toast(":red[**No More Logs to Show!**]", icon="🛑") + break + + +def refresh(same_trace: bool = False): + if state.log_path is None: + st.toast(":red[**Please Set Log Path!**]", icon="⚠️") + return + + if main_log_path: + state.fs = FileStorage(main_log_path / state.log_path).iter_msg() + else: + state.fs = FileStorage(state.log_path).iter_msg() + + # detect scenario + if not same_trace: + get_msgs_until(lambda m: not isinstance(m.content, str)) + if state.last_msg is None or not isinstance(state.last_msg.content, Scenario): + st.toast(":red[**No Scenario Info detected**]", icon="❗") + state.scenario = None + else: + state.scenario = state.last_msg.content + st.toast(f":green[**Scenario Info detected**] *{type(state.scenario).__name__}*", icon="✅") + + state.msgs = defaultdict(lambda: defaultdict(list)) + state.lround = 0 + state.erounds = defaultdict(int) + state.e_decisions = defaultdict(lambda: defaultdict(tuple)) + state.hypotheses = defaultdict(None) + state.h_decisions = defaultdict(bool) + state.metric_series = [] + state.last_msg = None + state.current_tags = [] + state.alpha158_metrics = None + state.times = defaultdict(lambda: defaultdict(list)) + + if state.log_path is None: + st.toast(":red[**Please Set Log Path!**]", icon="⚠️") + return + + +def evolving_feedback_window(wsf: FactorSingleFeedback | ModelSingleFeedback): + if isinstance(wsf, FactorSingleFeedback): + ffc, efc, cfc, vfc = st.tabs( + ["**Final Feedback🏁**", "Execution Feedback🖥️", "Code Feedback📄", "Value Feedback🔢"] + ) + with ffc: + st.code(wsf.final_feedback, language="log") + with efc: + st.code(wsf.execution_feedback, language="log") + with cfc: + st.code(wsf.code_feedback, language="log") + with vfc: + st.code(wsf.value_feedback, language="log") + + elif isinstance(wsf, ModelSingleFeedback): + ffc, efc, cfc, msfc, vfc = st.tabs( + [ + "**Final Feedback🏁**", + "Execution Feedback🖥️", + "Code Feedback📄", + "Model Shape Feedback📐", + "Value Feedback🔢", + ] + ) + with ffc: + st.markdown(wsf.final_feedback) + with efc: + st.code(wsf.execution_feedback, language="log") + with cfc: + st.markdown(wsf.code_feedback) + with msfc: + st.markdown(wsf.shape_feedback) + with vfc: + st.markdown(wsf.value_feedback) + + + +def display_hypotheses(hypotheses: dict[int, Hypothesis], decisions: dict[int, bool], round: int = None): + if round is not None: + hypotheses = {round: hypotheses.get(round)} + decisions = {round: decisions.get(round)} + + name_dict = { + "hypothesis": "RD-Agent proposes the hypothesis⬇️", + "concise_justification": "because the reason⬇️", + "concise_observation": "based on the observation⬇️", + "concise_knowledge": "Knowledge⬇️ gained after practice", + } + + # if success_only: + # shd = {k: v.__dict__ for k, v in hypotheses.items() if decisions[k]} + # else: + shd = {k: v.__dict__ for k, v in hypotheses.items()} + + df = pd.DataFrame(shd).T + + if "concise_observation" in df.columns and "concise_justification" in df.columns: + df["concise_observation"], df["concise_justification"] = df["concise_justification"], df["concise_observation"] + df.rename( + columns={"concise_observation": "concise_justification", "concise_justification": "concise_observation"}, + inplace=True, + ) + + if "reason" in df.columns: + df.drop(["reason"], axis=1, inplace=True) + + if "concise_reason" in df.columns: + df.drop(["concise_reason"], axis=1, inplace=True) + + df.columns = df.columns.map(lambda x: name_dict.get(x, x)) + + def style_rows(row): + if decisions[row.name]: + return ["color: green;"] * len(row) + return [""] * len(row) + + def style_columns(col): + if col.name != name_dict.get("hypothesis", "hypothesis"): + return ["font-style: italic;"] * len(col) + return ["font-weight: bold;"] * len(col) + + st.markdown(df.style.apply(style_rows, axis=1).apply(style_columns, axis=0).to_html(), unsafe_allow_html=True) + +# def display_hypotheses(hypotheses: dict[int, Hypothesis], decisions: dict[int, bool], success_only: bool = False): +# name_dict = { +# "hypothesis": "RD-Agent proposes the hypothesis⬇️", +# "concise_justification": "because the reason⬇️", +# "concise_observation": "based on the observation⬇️", +# "concise_knowledge": "Knowledge⬇️ gained after practice", +# } +# if success_only: +# shd = {k: v.__dict__ for k, v in hypotheses.items() if decisions[k]} +# else: +# shd = {k: v.__dict__ for k, v in hypotheses.items()} +# df = pd.DataFrame(shd).T + +# if "concise_observation" in df.columns and "concise_justification" in df.columns: +# df["concise_observation"], df["concise_justification"] = df["concise_justification"], df["concise_observation"] +# df.rename( +# columns={"concise_observation": "concise_justification", "concise_justification": "concise_observation"}, +# inplace=True, +# ) +# if "reason" in df.columns: +# df.drop(["reason"], axis=1, inplace=True) +# if "concise_reason" in df.columns: +# df.drop(["concise_reason"], axis=1, inplace=True) + +# df.columns = df.columns.map(lambda x: name_dict.get(x, x)) + +# def style_rows(row): +# if decisions[row.name]: +# return ["color: green;"] * len(row) +# return [""] * len(row) + +# def style_columns(col): +# if col.name != name_dict.get("hypothesis", "hypothesis"): +# return ["font-style: italic;"] * len(col) +# return ["font-weight: bold;"] * len(col) + +# # st.dataframe(df.style.apply(style_rows, axis=1).apply(style_columns, axis=0)) +# st.markdown(df.style.apply(style_rows, axis=1).apply(style_columns, axis=0).to_html(), unsafe_allow_html=True) + + +def metrics_window(df: pd.DataFrame, R: int, C: int, *, height: int = 300, colors: list[str] = None): + if len(df.columns) > R*C and R*C <= 8: + df = df[[ + 'IC', 'ICIR', 'Rank IC', 'Rank ICIR', + '1day.excess_return_with_cost.mean', + '1day.excess_return_with_cost.annualized_return', + '1day.excess_return_with_cost.information_ratio', + '1day.excess_return_with_cost.max_drawdown' + ][:R*C]] + + # 去掉前缀 + df.columns = df.columns.str.replace('1day.excess_return_without_cost.', '') + df.columns = df.columns.str.replace('1day.excess_return_with_cost.', '') + + # 创建子图 + fig = make_subplots(rows=R, cols=C, subplot_titles=df.columns) + + def hypothesis_hover_text(h: Hypothesis, d: bool = False): + color = "green" if d else "black" + text = h.hypothesis + lines = textwrap.wrap(text, width=60) + return f"{'
'.join(lines)}
" + + hover_texts = [ + hypothesis_hover_text(state.hypotheses[int(i[6:])], state.h_decisions[int(i[6:])]) + for i in df.index[2:] + if (i != "alpha158" and i.startswith('Round ')) + ] + if state.alpha158_metrics is not None: + hover_texts = ["Baseline: alpha158"] + hover_texts + + # 使用自定义颜色 + custom_colors = colors if colors else ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f'] + + for ci, col in enumerate(df.columns): + row = ci // C + 1 + col_num = ci % C + 1 + fig.add_trace( + go.Scatter( + x=df.index, + y=df[col], + name=col, + mode="lines+markers", + connectgaps=True, + marker=dict( + size=10, + color=custom_colors[col_num-1], + line=dict(width=2, color='white') + ), + line=dict(width=3), + ), + row=row, + col=col_num, + ) + + # 更新布局 + fig.update_layout( + showlegend=False, + height=height, + paper_bgcolor='rgba(0,0,0,0)', + plot_bgcolor='rgba(0,0,0,0)', + margin=dict(l=40, r=40, t=60, b=40), + ) + + # 更新所有子图的样式 + for i in range(1, R + 1): + for j in range(1, C + 1): + fig.update_xaxes( + showgrid=True, + gridwidth=1, + gridcolor='rgba(128,128,128,0.2)', + tickvals=[df.index[0]] + list(df.index[1:]), + ticktext=[f'{df.index[0]}'] + list(df.index[1:]), + row=i, + col=j, + ) + fig.update_yaxes( + showgrid=True, + gridwidth=1, + gridcolor='rgba(128,128,128,0.2)', + row=i, + col=j, + ) + + # 使用卡片容器显示图表 + # st.markdown('
', unsafe_allow_html=True) + # st.markdown('
Performance Metrics', unsafe_allow_html=True) + st.plotly_chart(fig, use_container_width=True, config={'displayModeBar': False}) + # st.markdown('
', unsafe_allow_html=True) + + +def summary_window(): + if isinstance(state.scenario, SIMILAR_SCENARIOS): + st.header("Runing Summary📊", divider="rainbow", anchor="_summary") + if state.lround == 0: + return + with st.container(): + # TODO: not fixed height + with st.container(): + bc, cc = st.columns([1, 1], vertical_alignment="center") + with bc: + st.subheader("Metrics📈", anchor="_metrics") + # with cc: + # show_true_only = st.toggle("successful hypotheses", value=False) + + # hypotheses_c, chart_c = st.columns([2, 3]) + chart_c = st.container(border=True) + # hypotheses_c = st.container() + + # with hypotheses_c: + # st.subheader("Hypotheses🏅", anchor="_hypotheses") + # display_hypotheses(state.hypotheses, state.h_decisions, show_true_only) + + with chart_c: + if isinstance(state.scenario, QlibFactorScenario) and state.alpha158_metrics is not None: + df = pd.DataFrame([state.alpha158_metrics] + state.metric_series) + else: + df = pd.DataFrame(state.metric_series) + # if show_true_only and len(state.hypotheses) >= len(state.metric_series): + # if state.alpha158_metrics is not None: + # selected = ["alpha158"] + [i for i in df.index[2:] if state.h_decisions[int(i[6:])]] + # else: + # selected = [i for i in df.index if i == "Baseline" or state.h_decisions[int(i[6:])]] + # df = df.loc[selected] + if df.shape[0] == 1: + st.table(df.iloc[0]) + elif df.shape[0] > 1: + if df.shape[1] == 1: + fig = px.line(df, x=df.index, y=df.columns, markers=True) + fig.update_layout(xaxis_title="Loop Round", yaxis_title=None) + st.plotly_chart(fig) + else: + metrics_window(df, 2, 4, height=600, colors=["red", "blue", "orange", "green"]) + + + +def tabs_hint(): + st.markdown( + "

You can navigate through the tabs using ⬅️ ➡️ or by holding Shift and scrolling with the mouse wheel🖱️.

", + unsafe_allow_html=True, + ) + + +def tasks_window(tasks: list[FactorTask | ModelTask]): + if isinstance(tasks[0], FactorTask): + title = "Factor Agent⚙️" + st.subheader(title, divider="blue", anchor="_factor") + + for ft in tasks: + # 使用 Streamlit 容器创建卡片效果 + with st.container(): + # 添加一些上下边距 + # st.markdown("
", unsafe_allow_html=True) + + # 使用 expander 创建可展开的卡片 + with st.expander(f"### 🔍 **{ft.factor_name}**", expanded=True): + # Description 部分 + st.markdown("##### Description") + st.code(ft.factor_description, language="plaintext") + + # Expression 部分 + st.markdown("##### Expression") + # 使用 success 样式代替 info,显示为绿色背景 + st.code(f"{ft.factor_expression}", language="python") + + # 添加分隔 + st.markdown("
", unsafe_allow_html=True) + + elif isinstance(tasks[0], ModelTask): + st.markdown("**Model Tasks🚩**") + tnames = [m.name for m in tasks] + if sum(len(tn) for tn in tnames) > 100: + tabs_hint() + tabs = st.tabs(tnames) + for i, mt in enumerate(tasks): + with tabs[i]: + st.markdown(f"**Model Type**: {mt.model_type}") + st.markdown(f"**Description**: {mt.description}") + st.latex("Formulation") + st.latex(mt.formulation) + + mks = "| Variable | Description |\n| --- | --- |\n" + if mt.variables: + for v, d in mt.variables.items(): + mks += f"| ${v}$ | {d} |\n" + st.markdown(mks) + + +def research_window(round: int): + with st.container(border=True): + title = "Idea Agent💡" + st.subheader(title, divider="blue", anchor="_idea") + if isinstance(state.scenario, SIMILAR_SCENARIOS): + # pdf image + if pim := state.msgs[round]["r.extract_factors_and_implement.load_pdf_screenshot"]: + for i in range(min(2, len(pim))): + st.image(pim[i].content, use_container_width=True) + + # Hypothesis + if hg := state.msgs[round]["r.hypothesis generation"]: + h: Hypothesis = hg[0].content + + # 创建网格布局的HTML + cards_html = f""" +
+
+
Hypothesis
+
{h.hypothesis}
+
+
+
Justification
+
{h.concise_justification}
+
+
+
Knowledge
+
{h.concise_knowledge}
+
+
+
Specification
+
By combining Intraday Price Velocity with volume and volatility data within a specific time window and analyzing their collective impact on short-term returns, we aim to enhance the model's predictive power and capture a more nuanced understanding of market dynamics, thereby increasing the accuracy of short-term return predictions.
+
+
+ """ + + st.markdown(cards_html, unsafe_allow_html=True) + + if eg := state.msgs[round]["r.experiment generation"]: + tasks_window(eg[0].content) + + + +def feedback_window(): + if isinstance(state.scenario, SIMILAR_SCENARIOS): + with st.container(border=True): + st.subheader("Eval Agent📝", divider="orange", anchor="_eval") + + if state.lround > 0 and isinstance( + state.scenario, (QlibModelScenario, QlibFactorScenario, QlibFactorFromReportScenario) + ): + with st.expander("**Config**", expanded=True): + st.markdown(state.scenario.experiment_setting, unsafe_allow_html=True) + + if fbr := state.msgs[round]["ef.Quantitative Backtesting Chart"]: + # st.markdown("

", unsafe_allow_html=True) + st.markdown("#### PnL Figure📈") + num_fig = len(state.msgs[round]["ef.Quantitative Backtesting Chart"]) + if num_fig > 1: + for i in range(num_fig): + if i == 0: + # 使用 HTML 实现居中 + st.markdown( + "
Baseline
", + unsafe_allow_html=True + ) + fig = report_figure(fbr[i].content) + st.plotly_chart(fig) + if i < num_fig - 1: # 在图表之间添加分割线 + st.divider() + else: + fig = report_figure(fbr[0].content) + st.plotly_chart(fig) + if fbn := state.msgs[round]["ef.runner result"]: + # 添加空行 + st.markdown("

", unsafe_allow_html=True) + st.markdown("#### Runner Result Backtesting Table 📌") + # 获取结果数据 + runner_result_data = fbn[0].content + result = runner_result_data.result + # 将结果转化为 DataFrame + result_df = pd.DataFrame(result) if isinstance(result, pd.Series) else pd.DataFrame(result) + result_df = result_df.reset_index() + result_df.columns = ["Metric", "Value"] + + # 添加Category列来分类指标 + def categorize_metric(metric): + if "without_cost" in metric: + return "Without Cost" + elif "with_cost" in metric: + return "With Cost" + else: + return "Other Metrics" + + result_df['Category'] = result_df['Metric'].apply(categorize_metric) + + # 清理Metric名称 + result_df['Metric'] = result_df['Metric'].apply(lambda x: x.split('.')[-1].replace('_', ' ').title()) + + # 规范化指标名称 + metric_name_map = { + 'Ic': 'IC', + 'Icir': 'ICIR', + 'Rank Ic': 'Rank IC', + 'Rank Icir': 'Rank ICIR', + 'Ffr': 'ffr', + 'Pa': 'pa', + 'Pos': 'pos' + } + result_df['Metric'] = result_df['Metric'].apply(lambda x: metric_name_map.get(x, x)) + + # 设置表格样式 + st.markdown(""" + + """, unsafe_allow_html=True) + + # 创建HTML表格 + table_html = '' + + # 按Category分组添加行 + for category in ['Without Cost', 'With Cost', 'Other Metrics']: + category_data = result_df[result_df['Category'] == category] + if not category_data.empty: + # 添加类别标题行 + table_html += f'' + # 添加该类别的所有指标 + for _, row in category_data.iterrows(): + table_html += f'' + + table_html += '
CategoryMetricValue
{category}
{row["Metric"]}{row["Value"]:.4f}
' + + # 显示表格 + st.markdown(table_html, unsafe_allow_html=True) + if fb := state.msgs[round]["ef.feedback"]: + st.markdown("

", unsafe_allow_html=True) + st.markdown("#### Hypothesis Feedback🔍") + h: HypothesisFeedback = fb[0].content + + # 使用网格布局显示反馈内容 + feedback_html = """ +
+
+
Observations
+
{}
+
+
+
Hypothesis Evaluation
+
{}
+
+
+
New Hypothesis
+
{}
+
+
+
Decision & Reason
+
Decision: {}

Reason: {}
+
+
+ """.format( + h.observations, + h.hypothesis_evaluation, + h.new_hypothesis, + h.decision, + h.reason + ) + st.markdown(feedback_html, unsafe_allow_html=True) + + # if isinstance(state.scenario, KGScenario): + # if fbe := state.msgs[round]["ef.runner result"]: + # submission_path = fbe[0].content.experiment_workspace.workspace_path / "submission.csv" + # st.markdown( + # f":green[**Exp Workspace**]: {str(fbe[0].content.experiment_workspace.workspace_path.absolute())}" + # ) + # try: + # data = submission_path.read_bytes() + # st.download_button( + # label="**Download** submission.csv", + # data=data, + # file_name="submission.csv", + # mime="text/csv", + # ) + # except Exception as e: + # st.markdown(f":red[**Download Button Error**]: {e}") + + +def evolving_window(): + title = "Debugging" if isinstance(state.scenario, SIMILAR_SCENARIOS) else "Development🛠️ (evolving coder)" + st.subheader(title, divider="green", anchor="_debugging") + + # Evolving Status + if state.erounds[round] > 0: + st.markdown("##### **☑️ Evolving Status**") + es = state.e_decisions[round] + e_status_mks = "".join(f"| {ei} " for ei in range(1, state.erounds[round] + 1)) + "|\n" + e_status_mks += "|--" * state.erounds[round] + "|\n" + for ei, estatus in es.items(): + if not estatus: + estatus = (0, 0, 0) + e_status_mks += "| " + "🕙
" * estatus[2] + "✔️
" * estatus[0] + "❌
" * estatus[1] + " " + e_status_mks += "|\n" + st.markdown(e_status_mks, unsafe_allow_html=True) + + # Evolving Tabs + if state.erounds[round] > 0: + if state.erounds[round] > 1: + evolving_round = st.radio( + "**🔄️Evolving Rounds**", + horizontal=True, + options=range(1, state.erounds[round] + 1), + index=state.erounds[round] - 1, + key="show_eround", + ) + else: + evolving_round = 1 + + ws: list[FactorFBWorkspace | ModelFBWorkspace] = state.msgs[round]["d.evolving code"][ + evolving_round - 1 + ].content + + tab_names = [ + w.target_task.factor_name if isinstance(w.target_task, FactorTask) else w.target_task.name for w in ws + ] + if len(state.msgs[round]["d.evolving feedback"]) >= evolving_round: + for j in range(len(ws)): + if state.msgs[round]["d.evolving feedback"][evolving_round - 1].content[j].final_decision: + tab_names[j] += "✔️" + else: + tab_names[j] += "❌" + + if sum(len(tn) for tn in tab_names) > 100: + tabs_hint() + + wtabs = st.tabs(tab_names) + for j, w in enumerate(ws): + with wtabs[j]: + # if 'file_dict' in w.__dict__: + # for k, v in w.file_dict.items(): + # with st.expander(f":green[`{k}`]", expanded=True): + # st.code(v, language="python") + # continue + + + # Evolving Code + st.markdown(f"**Workspace Path**: {w.workspace_path}") + expr = re.search(r"expr\s*=\s*\"(.*?)\"", w.code_dict['factor.py'], re.DOTALL).group(1) + # 只展示表达式而不是整个代码块 + expression = w.target_task.factor_expression + st.markdown(f"- ##### **Expression** ✨: \n```\n{expr}\n```") + + # Evolving Feedback + if len(state.msgs[round]["d.evolving feedback"]) >= evolving_round: + evolving_feedback_window(state.msgs[round]["d.evolving feedback"][evolving_round - 1].content[j]) + + +## [Scenario Description](#_scenario) +toc = """ +## [Summary📊](#_summary) +- [**Metrics📈**](#_metrics) +## [AlphaAgent Loops♾️](#_loops) +- [**Idea Agent💡**](#_idea) +- [**Factor Agent⚙️**](#_factor) +- [**Eval Agent📝**](#_eval) +""" +# Config Sidebar +with st.sidebar: + st.markdown("# **AlphaAgent**✨") + st.subheader(":blue[Table of Content]", divider="blue") + st.markdown(toc) + st.subheader(":blue[Control Panel]", divider="blue") + + + + with st.container(border=True): + if main_log_path: + lc1, lc2 = st.columns([1, 2], vertical_alignment="center") + with lc1: + st.markdown(":blue[**Log Path**]") + with lc2: + manually = st.toggle("Manual Input") + if manually: + st.text_input("log path", key="log_path", on_change=refresh, label_visibility="collapsed") + else: + folders = filter_log_folders(main_log_path) + # 按修改时间排序,最新的在最前面 + st.selectbox(f"**Select from `{main_log_path}`**", folders, key="log_path", on_change=refresh) + else: + st.text_input(":blue[**log path**]", key="log_path", on_change=refresh) + + c1, c2 = st.columns([1, 1], vertical_alignment="center") + with c1: + if st.button(":green[**All Loops**]", use_container_width=True): + if not state.fs: + refresh() + get_msgs_until(lambda m: False) + if st.button("**Reset**", use_container_width=True): + refresh(same_trace=True) + with c2: + if st.button(":green[Next Loop]", use_container_width=True): + if not state.fs: + refresh() + get_msgs_until(lambda m: "ef.feedback" in m.tag) + + if st.button("Next Step", use_container_width=True): + if not state.fs: + refresh() + get_msgs_until(lambda m: "d.evolving feedback" in m.tag) + + with st.popover(":orange[**Config⚙️**]", use_container_width=True): + st.multiselect("excluded log tags", ["llm_messages"], ["llm_messages"], key="excluded_tags") + st.multiselect("excluded log types", ["str", "dict", "list"], ["str"], key="excluded_types") + + if args.debug: + debug = st.toggle("debug", value=False) + + if debug: + if st.button("Single Step Run", use_container_width=True): + get_msgs_until() + else: + debug = False + + + st.subheader(":blue[Entrance]", divider="blue") + user_hypothesis = st.text_input("🔍 **Enter an hypothesis you want to verify**", + value=state.get("user_direction", ""), + placeholder="..." + ) + + # 启动/停止按钮 + col1, col2 = st.columns([1, 1]) + with col1: + start_clicked = st.button( + "🚀 Start Mining" if not state.current_task else "⏳ Mining...", + disabled=state.current_task is not None, + use_container_width=True + ) + with col2: + stop_clicked = st.button( + "⏹ Stop Mining", + disabled=state.current_task is None, + use_container_width=True + ) + + # 处理按钮点击事件 + if start_clicked and user_hypothesis: + response = requests.post( + f"{state.api_base}/api/tasks", + json={"direction": user_hypothesis} + ) + if response.status_code == 200: + state.current_task = response.json()["task_id"] + state.user_direction = user_hypothesis + refresh(same_trace=True) + st.rerun() + + if stop_clicked and state.current_task: + print("Stop posted") + response = requests.post( + f"{state.api_base}/api/tasks/{state.current_task}/stop" + ) + + if response.status_code == 200: + st.success("Stop signal sent") + state.current_task = None + print("Stop succeeds") + st.rerun() + + # 删除自动刷新控制代码,仅保留手动刷新按钮 + if state.current_task: + # 手动刷新按钮 - 使用英文 + if st.button("🔄 Refresh Now", use_container_width=True): + refresh(same_trace=True) + get_msgs_until(lambda m: False) + st.rerun() + + +# Debug Info Window +if debug: + with st.expander(":red[**Debug Info**]", expanded=True): + dcol1, dcol2 = st.columns([1, 3]) + with dcol1: + st.markdown( + f"**log path**: {state.log_path}\n\n" + f"**excluded tags**: {state.excluded_tags}\n\n" + f"**excluded types**: {state.excluded_types}\n\n" + f":blue[**message id**]: {sum(sum(len(tmsgs) for tmsgs in rmsgs.values()) for rmsgs in state.msgs.values())}\n\n" + f":blue[**round**]: {state.lround}\n\n" + f":blue[**evolving round**]: {state.erounds[state.lround]}\n\n" + ) + with dcol2: + if state.last_msg: + st.write(state.last_msg) + if isinstance(state.last_msg.content, list): + st.write(state.last_msg.content[0]) + elif not isinstance(state.last_msg.content, str): + st.write(state.last_msg.content.__dict__) + + +if state.log_path and state.fs is None: + refresh() + +# Main Window +# header_c1, header_c3 = st.columns([1, 6], vertical_alignment="center") +# with st.container(): +# with header_c1: +# st.image("https://img-prod-cms-rt-microsoft-com.akamaized.net/cms/api/am/imageFileData/RE1Mu3b?ver=5c31") +# with header_c3: +# st.markdown( +# """ +#

+# RD-Agent:
LLM-based autonomous evolving agents for industrial data-driven R&D +#

+# """, +# unsafe_allow_html=True, +# ) + +# Project Info +# with st.container(): +# image_c, scen_c = st.columns([3, 3], vertical_alignment="center") +# with image_c: +# img_path = rfiles("rdagent.log.ui").joinpath("flow.png") +# st.image(str(img_path), use_container_width=True) +# with scen_c: +# st.header("Scenario Description📖", divider="violet", anchor="_scenario") +# if state.scenario is not None: +# theme = st_theme() +# if theme: +# theme = theme.get("base", "light") +# css = f""" +# +# """ +# st.markdown(state.scenario.rich_style_description + css, unsafe_allow_html=True) + + +def show_times(round: int): + for k, v in state.times[round].items(): + if len(v) > 1: + diff = v[-1] - v[0] + else: + diff = v[0] - v[0] + total_seconds = diff.seconds + seconds = total_seconds % 60 + minutes = total_seconds // 60 + st.markdown(f"**:blue[{k}]**: :red[**{minutes}**] minutes :orange[**{seconds}**] seconds") + + +if state.scenario is not None: + summary_window() + + # R&D Loops Window + if isinstance(state.scenario, SIMILAR_SCENARIOS): + st.header("AlphaAgent Loops♾️", divider="rainbow", anchor="_loops") + # st.markdown("#### Loops") + if len(state.msgs) > 1: + r_options = list(state.msgs.keys()) + if 0 in r_options: + r_options.remove(0) + round = st.radio("# **Loop**", horizontal=True, options=r_options, index=state.lround - 1) + else: + round = 1 + + # show_times(round) + # rf_c, d_c = st.columns([2, 2]) + r_c = st.container() + d_c = st.container() + f_c = st.container() + else: + st.error("Unknown Scenario!") + st.stop() + + with r_c: + research_window(round) + with f_c: + feedback_window() + + with d_c.container(border=True): + evolving_window() + + +st.markdown("


", unsafe_allow_html=True) +st.markdown("#### Disclaimer") +st.markdown( + "*This content is AI-generated and may not be fully accurate or up-to-date; please verify with a professional for critical matters.*", + unsafe_allow_html=True, +) diff --git a/alphaagent/log/ui/flow.png b/alphaagent/log/ui/flow.png new file mode 100755 index 00000000..caca2c4c Binary files /dev/null and b/alphaagent/log/ui/flow.png differ diff --git a/alphaagent/log/ui/qlib_report_figure.py b/alphaagent/log/ui/qlib_report_figure.py new file mode 100755 index 00000000..cd4f9418 --- /dev/null +++ b/alphaagent/log/ui/qlib_report_figure.py @@ -0,0 +1,445 @@ +import importlib +import math + +import pandas as pd +import plotly.graph_objs as go +from plotly.subplots import make_subplots + + +class BaseGraph: + _name = None + + def __init__( + self, df: pd.DataFrame = None, layout: dict = None, graph_kwargs: dict = None, name_dict: dict = None, **kwargs + ): + """ + + :param df: + :param layout: + :param graph_kwargs: + :param name_dict: + :param kwargs: + layout: dict + go.Layout parameters + graph_kwargs: dict + Graph parameters, eg: go.Bar(**graph_kwargs) + """ + self._df = df + + self._layout = dict() if layout is None else layout + self._graph_kwargs = dict() if graph_kwargs is None else graph_kwargs + self._name_dict = name_dict + + self.data = None + + self._init_parameters(**kwargs) + self._init_data() + + def _init_data(self): + """ + + :return: + """ + if self._df.empty: + raise ValueError("df is empty.") + + self.data = self._get_data() + + def _init_parameters(self, **kwargs): + """ + + :param kwargs + """ + + # Instantiate graphics parameters + self._graph_type = self._name.lower().capitalize() + + # Displayed column name + if self._name_dict is None: + self._name_dict = {_item: _item for _item in self._df.columns} + + @staticmethod + def get_instance_with_graph_parameters(graph_type: str = None, **kwargs): + """ + + :param graph_type: + :param kwargs: + :return: + """ + try: + _graph_module = importlib.import_module("plotly.graph_objs") + _graph_class = getattr(_graph_module, graph_type) + except AttributeError: + _graph_module = importlib.import_module("qlib.contrib.report.graph") + _graph_class = getattr(_graph_module, graph_type) + return _graph_class(**kwargs) + + def _get_layout(self) -> go.Layout: + """ + + :return: + """ + return go.Layout(**self._layout) + + def _get_data(self) -> list: + """ + + :return: + """ + + _data = [ + self.get_instance_with_graph_parameters( + graph_type=self._graph_type, x=self._df.index, y=self._df[_col], name=_name, **self._graph_kwargs + ) + for _col, _name in self._name_dict.items() + ] + return _data + + @property + def figure(self) -> go.Figure: + """ + + :return: + """ + _figure = go.Figure(data=self.data, layout=self._get_layout()) + # NOTE: Use the default theme from plotly version 3.x, template=None + _figure["layout"].update(template=None) + return _figure + + +class SubplotsGraph: + """Create subplots same as df.plot(subplots=True) + + Simple package for `plotly.tools.subplots` + """ + + def __init__( + self, + df: pd.DataFrame = None, + kind_map: dict = None, + layout: dict = None, + sub_graph_layout: dict = None, + sub_graph_data: list = None, + subplots_kwargs: dict = None, + **kwargs, + ): + """ + + :param df: pd.DataFrame + + :param kind_map: dict, subplots graph kind and kwargs + eg: dict(kind='Scatter', kwargs=dict()) + + :param layout: `go.Layout` parameters + + :param sub_graph_layout: Layout of each graphic, similar to 'layout' + + :param sub_graph_data: Instantiation parameters for each sub-graphic + eg: [(column_name, instance_parameters), ] + + column_name: str or go.Figure + + Instance_parameters: + + - row: int, the row where the graph is located + + - col: int, the col where the graph is located + + - name: str, show name, default column_name in 'df' + + - kind: str, graph kind, default `kind` param, eg: bar, scatter, ... + + - graph_kwargs: dict, graph kwargs, default {}, used in `go.Bar(**graph_kwargs)` + + :param subplots_kwargs: `plotly.tools.make_subplots` original parameters + + - shared_xaxes: bool, default False + + - shared_yaxes: bool, default False + + - vertical_spacing: float, default 0.3 / rows + + - subplot_titles: list, default [] + If `sub_graph_data` is None, will generate 'subplot_titles' according to `df.columns`, + this field will be discarded + + + - specs: list, see `make_subplots` docs + + - rows: int, Number of rows in the subplot grid, default 1 + If `sub_graph_data` is None, will generate 'rows' according to `df`, this field will be discarded + + - cols: int, Number of cols in the subplot grid, default 1 + If `sub_graph_data` is None, will generate 'cols' according to `df`, this field will be discarded + + + :param kwargs: + + """ + + self._df = df + self._layout = layout + self._sub_graph_layout = sub_graph_layout + + self._kind_map = kind_map + if self._kind_map is None: + self._kind_map = dict(kind="Scatter", kwargs=dict()) + + self._subplots_kwargs = subplots_kwargs + if self._subplots_kwargs is None: + self._init_subplots_kwargs() + + self.__cols = self._subplots_kwargs.get("cols", 2) # pylint: disable=W0238 + self.__rows = self._subplots_kwargs.get( # pylint: disable=W0238 + "rows", math.ceil(len(self._df.columns) / self.__cols) + ) + + self._sub_graph_data = sub_graph_data + if self._sub_graph_data is None: + self._init_sub_graph_data() + + self._init_figure() + + def _init_sub_graph_data(self): + """ + + :return: + """ + self._sub_graph_data = [] + self._subplot_titles = [] + + for i, column_name in enumerate(self._df.columns): + row = math.ceil((i + 1) / self.__cols) + _temp = (i + 1) % self.__cols + col = _temp if _temp else self.__cols + res_name = column_name.replace("_", " ") + _temp_row_data = ( + column_name, + dict( + row=row, + col=col, + name=res_name, + kind=self._kind_map["kind"], + graph_kwargs=self._kind_map["kwargs"], + ), + ) + self._sub_graph_data.append(_temp_row_data) + self._subplot_titles.append(res_name) + + def _init_subplots_kwargs(self): + """ + + :return: + """ + # Default cols, rows + _cols = 2 + _rows = math.ceil(len(self._df.columns) / 2) + self._subplots_kwargs = dict() + self._subplots_kwargs["rows"] = _rows + self._subplots_kwargs["cols"] = _cols + self._subplots_kwargs["shared_xaxes"] = False + self._subplots_kwargs["shared_yaxes"] = False + self._subplots_kwargs["vertical_spacing"] = 0.3 / _rows + self._subplots_kwargs["print_grid"] = False + self._subplots_kwargs["subplot_titles"] = self._df.columns.tolist() + + def _init_figure(self): + """ + + :return: + """ + self._figure = make_subplots(**self._subplots_kwargs) + + for column_name, column_map in self._sub_graph_data: + if isinstance(column_name, go.Figure): + _graph_obj = column_name + elif isinstance(column_name, str): + temp_name = column_map.get("name", column_name.replace("_", " ")) + kind = column_map.get("kind", self._kind_map.get("kind", "Scatter")) + _graph_kwargs = column_map.get("graph_kwargs", self._kind_map.get("kwargs", {})) + _graph_obj = BaseGraph.get_instance_with_graph_parameters( + kind, + **dict( + x=self._df.index, + y=self._df[column_name], + name=temp_name, + **_graph_kwargs, + ), + ) + else: + raise TypeError() + + row = column_map["row"] + col = column_map["col"] + + self._figure.add_trace(_graph_obj, row=row, col=col) + + if self._sub_graph_layout is not None: + for k, v in self._sub_graph_layout.items(): + self._figure["layout"][k].update(v) + + # NOTE: Use the default theme from plotly version 3.x: template=None + self._figure["layout"].update(template=None) + self._figure["layout"].update(self._layout) + + @property + def figure(self): + return self._figure + + +def _calculate_maximum(df: pd.DataFrame, is_ex: bool = False): + """ + + :param df: + :param is_ex: + :return: + """ + if is_ex: + end_date = df["cum_ex_return_wo_cost_mdd"].idxmin() + start_date = df.loc[df.index <= end_date]["cum_ex_return_wo_cost"].idxmax() + else: + end_date = df["return_wo_mdd"].idxmin() + start_date = df.loc[df.index <= end_date]["cum_return_wo_cost"].idxmax() + return start_date, end_date + + +def _calculate_mdd(series): + """ + Calculate mdd + + :param series: + :return: + """ + return series - series.cummax() + + +def _calculate_report_data(raw_df: pd.DataFrame) -> pd.DataFrame: + """ + + :param df: + :return: + """ + df = raw_df.copy(deep=True) + index_names = df.index.names + df.index = df.index.strftime("%Y-%m-%d") + + report_df = pd.DataFrame() + + report_df["cum_bench"] = df["bench"].cumsum() + report_df["cum_return_wo_cost"] = df["return"].cumsum() + report_df["cum_return_w_cost"] = (df["return"] - df["cost"]).cumsum() + # report_df['cum_return'] - report_df['cum_return'].cummax() + report_df["return_wo_mdd"] = _calculate_mdd(report_df["cum_return_wo_cost"]) + report_df["return_w_cost_mdd"] = _calculate_mdd((df["return"] - df["cost"]).cumsum()) + + report_df["cum_ex_return_wo_cost"] = (df["return"] - df["bench"]).cumsum() + report_df["cum_ex_return_w_cost"] = (df["return"] - df["bench"] - df["cost"]).cumsum() + report_df["cum_ex_return_wo_cost_mdd"] = _calculate_mdd((df["return"] - df["bench"]).cumsum()) + report_df["cum_ex_return_w_cost_mdd"] = _calculate_mdd((df["return"] - df["cost"] - df["bench"]).cumsum()) + # return_wo_mdd , return_w_cost_mdd, cum_ex_return_wo_cost_mdd, cum_ex_return_w + + report_df["turnover"] = df["turnover"] + report_df.sort_index(ascending=True, inplace=True) + + report_df.index.names = index_names + return report_df + + +def report_figure(df: pd.DataFrame) -> list | tuple: + """ + + :param df: + :return: + """ + + # Get data + report_df = _calculate_report_data(df) + + # Maximum Drawdown + max_start_date, max_end_date = _calculate_maximum(report_df) + ex_max_start_date, ex_max_end_date = _calculate_maximum(report_df, True) + + index_name = report_df.index.name + _temp_df = report_df.reset_index() + _temp_df.loc[-1] = 0 + _temp_df = _temp_df.shift(1) + _temp_df.loc[0, index_name] = "T0" + _temp_df.set_index(index_name, inplace=True) + _temp_df.iloc[0] = 0 + report_df = _temp_df + + # Create figure + _default_kind_map = dict(kind="Scatter", kwargs={"mode": "lines+markers"}) + _temp_fill_args = {"fill": "tozeroy", "mode": "lines+markers"} + _column_row_col_dict = [ + ("cum_bench", dict(row=1, col=1)), + ("cum_return_wo_cost", dict(row=1, col=1)), + ("cum_return_w_cost", dict(row=1, col=1)), + ("return_wo_mdd", dict(row=2, col=1, graph_kwargs=_temp_fill_args)), + ("return_w_cost_mdd", dict(row=3, col=1, graph_kwargs=_temp_fill_args)), + ("cum_ex_return_wo_cost", dict(row=4, col=1)), + ("cum_ex_return_w_cost", dict(row=4, col=1)), + ("turnover", dict(row=5, col=1)), + ("cum_ex_return_w_cost_mdd", dict(row=6, col=1, graph_kwargs=_temp_fill_args)), + ("cum_ex_return_wo_cost_mdd", dict(row=7, col=1, graph_kwargs=_temp_fill_args)), + ] + + _subplot_layout = dict() + for i in range(1, 8): + # yaxis + _subplot_layout.update({"yaxis{}".format(i): dict(zeroline=True, showline=True, showticklabels=True)}) + _show_line = i == 7 + _subplot_layout.update({"xaxis{}".format(i): dict(showline=_show_line, type="category", tickangle=45)}) + + _layout_style = dict( + height=1200, + title=" ", + shapes=[ + { + "type": "rect", + "xref": "x", + "yref": "paper", + "x0": max_start_date, + "y0": 0.55, + "x1": max_end_date, + "y1": 1, + "fillcolor": "#d3d3d3", + "opacity": 0.3, + "line": { + "width": 0, + }, + }, + { + "type": "rect", + "xref": "x", + "yref": "paper", + "x0": ex_max_start_date, + "y0": 0, + "x1": ex_max_end_date, + "y1": 0.55, + "fillcolor": "#d3d3d3", + "opacity": 0.3, + "line": { + "width": 0, + }, + }, + ], + ) + + _subplot_kwargs = dict( + shared_xaxes=True, + vertical_spacing=0.01, + rows=7, + cols=1, + row_width=[1, 1, 1, 3, 1, 1, 3], + print_grid=False, + ) + figure = SubplotsGraph( + df=report_df, + layout=_layout_style, + sub_graph_data=_column_row_col_dict, + subplots_kwargs=_subplot_kwargs, + kind_map=_default_kind_map, + sub_graph_layout=_subplot_layout, + ).figure + return figure diff --git a/alphaagent/log/ui/st_fixed_container.py b/alphaagent/log/ui/st_fixed_container.py new file mode 100755 index 00000000..262f0388 --- /dev/null +++ b/alphaagent/log/ui/st_fixed_container.py @@ -0,0 +1,126 @@ +from typing import Literal + +import streamlit as st +from streamlit.components.v1 import html + +FIXED_CONTAINER_CSS = """ +:root {{ + --background-color: #ffffff; /* Default background color */ +}} +div[data-testid="stVerticalBlockBorderWrapper"]:has(div.fixed-container-{id}):not(:has(div.not-fixed-container)) {{ + position: {mode}; + width: inherit; + background-color: inherit; + {position}: {margin}; + z-index: 999; +}} +div[data-testid="stVerticalBlockBorderWrapper"]:has(div.fixed-container-{id}):not(:has(div.not-fixed-container)) div[data-testid="stVerticalBlock"]:has(div.fixed-container-{id}):not(:has(div.not-fixed-container)) > div[data-testid="stVerticalBlockBorderWrapper"] {{ + background-color: transparent; + width: 100%; +}} +div[data-testid="stVerticalBlockBorderWrapper"]:has(div.fixed-container-{id}):not(:has(div.not-fixed-container)) div[data-testid="stVerticalBlock"]:has(div.fixed-container-{id}):not(:has(div.not-fixed-container)) > div[data-testid="stVerticalBlockBorderWrapper"] div[data-testid="stVerticalBlockBorderWrapper"] {{ + background-color: var(--background-color); +}} +div[data-testid="stVerticalBlockBorderWrapper"]:has(div.fixed-container-{id}):not(:has(div.not-fixed-container)) div[data-testid="stVerticalBlock"]:has(div.fixed-container-{id}):not(:has(div.not-fixed-container)) > div[data-testid="element-container"] {{ + display: none; +}} +div[data-testid="stVerticalBlockBorderWrapper"]:has(div.not-fixed-container):not(:has(div[class^='fixed-container-'])) {{ + display: none; +}} +""".strip() + +FIXED_CONTAINER_JS = """ +const root = parent.document.querySelector('.stApp'); +let lastBackgroundColor = null; +function updateContainerBackground(currentBackground) { + parent.document.documentElement.style.setProperty('--background-color', currentBackground); + ; +} +function checkForBackgroundColorChange() { + const style = window.getComputedStyle(root); + const currentBackgroundColor = style.backgroundColor; + if (currentBackgroundColor !== lastBackgroundColor) { + lastBackgroundColor = currentBackgroundColor; // Update the last known value + updateContainerBackground(lastBackgroundColor); + } +} +const observerCallback = (mutationsList, observer) => { + for(let mutation of mutationsList) { + if (mutation.type === 'attributes' && (mutation.attributeName === 'class' || mutation.attributeName === 'style')) { + checkForBackgroundColorChange(); + } + } +}; +const main = () => { + checkForBackgroundColorChange(); + const observer = new MutationObserver(observerCallback); + observer.observe(root, { attributes: true, childList: false, subtree: false }); +} +// main(); +document.addEventListener("DOMContentLoaded", main); +""".strip() + + +MARGINS = { + "top": "2.875rem", + "bottom": "0", +} + + +counter = 0 + + +def st_fixed_container( + *, + height: int | None = None, + border: bool | None = None, + mode: Literal["fixed", "sticky"] = "fixed", + position: Literal["top", "bottom"] = "top", + margin: str | None = None, + transparent: bool = False, +): + if margin is None: + margin = MARGINS[position] + global counter + + fixed_container = st.container() + non_fixed_container = st.container() + css = FIXED_CONTAINER_CSS.format( + mode=mode, + position=position, + margin=margin, + id=counter, + ) + with fixed_container: + html(f"", scrolling=False, height=0) + st.markdown(f"", unsafe_allow_html=True) + st.markdown( + f"
", + unsafe_allow_html=True, + ) + with non_fixed_container: + st.markdown( + f"
", + unsafe_allow_html=True, + ) + counter += 1 + + parent_container = fixed_container if transparent else fixed_container.container() + return parent_container.container(height=height, border=border) + + +if __name__ == "__main__": + for i in range(30): + st.write(f"Line {i}") + + # with st_fixed_container(mode="sticky", position="top", border=True): + # with st_fixed_container(mode="sticky", position="bottom", border=True): + # with st_fixed_container(mode="fixed", position="top", border=True): + with st_fixed_container(mode="fixed", position="bottom", border=True): + st.write("This is a fixed container.") + st.write("This is a fixed container.") + st.write("This is a fixed container.") + + st.container(border=True).write("This is a regular container.") + for i in range(30): + st.write(f"Line {i}") diff --git a/alphaagent/log/ui/web.py b/alphaagent/log/ui/web.py new file mode 100755 index 00000000..e1ddf598 --- /dev/null +++ b/alphaagent/log/ui/web.py @@ -0,0 +1,637 @@ +import time +from collections import defaultdict +from copy import deepcopy +from datetime import datetime, timezone +from typing import Callable, Type + +import pandas as pd +import plotly.express as px +import streamlit as st +from streamlit.delta_generator import DeltaGenerator + +from alphaagent.components.coder.factor_coder.evaluators import FactorSingleFeedback +from alphaagent.components.coder.factor_coder.factor import FactorFBWorkspace, FactorTask +from alphaagent.components.coder.model_coder.evaluators import ModelSingleFeedback +from alphaagent.components.coder.model_coder.model import ModelFBWorkspace, ModelTask +from alphaagent.core.proposal import Hypothesis, HypothesisFeedback, Trace +from alphaagent.log.base import Message, Storage, View +from alphaagent.scenarios.qlib.experiment.factor_experiment import QlibFactorExperiment +from alphaagent.scenarios.qlib.experiment.model_experiment import ( + QlibModelExperiment, + QlibModelScenario, +) + +st.set_page_config(layout="wide") + +TIME_DELAY = 0.001 + + +class WebView(View): + def __init__(self, ui: "StWindow"): + self.ui = ui + # Save logs to your desired data structure + # ... + + def display(self, s: Storage, watch: bool = False): + for msg in s.iter_msg(): # iterate overtime + # NOTE: iter_msg will correctly separate the information. + # TODO: msg may support streaming mode. + self.ui.consume_msg(msg) + + +class StWindow: + def __init__(self, container: "DeltaGenerator"): + self.container = container + + def consume_msg(self, msg: Message): + msg_str = f"{msg.timestamp.astimezone(timezone.utc).isoformat()} | {msg.level} | {msg.caller} - {msg.content}" + self.container.code(msg_str, language="log") + + +class LLMWindow(StWindow): + def __init__(self, container: "DeltaGenerator", session_name: str = "common"): + self.session_name = session_name + self.container = container.expander(f"{self.session_name} message") + + def consume_msg(self, msg: Message): + self.container.chat_message("user").markdown(f"{msg.content}") + + +class ProgressTabsWindow(StWindow): + """ + For windows with stream messages, will refresh when a new tab is created. + """ + + def __init__( + self, + container: "DeltaGenerator", + inner_class: Type[StWindow] = StWindow, + mapper: Callable[[Message], str] = lambda x: x.pid_trace, + ): + self.inner_class = inner_class + self.mapper = mapper + + self.container = container.empty() + self.tab_windows: dict[str, StWindow] = defaultdict(None) + self.tab_caches: dict[str, list[Message]] = defaultdict(list) + + def consume_msg(self, msg: Message): + name = self.mapper(msg) + + if name not in self.tab_windows: + # new tab need to be created, current streamlit container need to be updated. + names = list(self.tab_windows.keys()) + [name] + + if len(names) == 1: + tabs = [self.container.container()] + else: + tabs = self.container.tabs(names) + + for id, name in enumerate(names): + self.tab_windows[name] = self.inner_class(tabs[id]) + + # consume the cache + for name in self.tab_caches: + for msg in self.tab_caches[name]: + self.tab_windows[name].consume_msg(msg) + + self.tab_caches[name].append(msg) + self.tab_windows[name].consume_msg(msg) + + +class ObjectsTabsWindow(StWindow): + def __init__( + self, + container: "DeltaGenerator", + inner_class: Type[StWindow] = StWindow, + mapper: Callable[[object], str] = lambda x: str(x), + tab_names: list[str] | None = None, + ): + self.inner_class = inner_class + self.mapper = mapper + self.container = container + self.tab_names = tab_names + + def consume_msg(self, msg: Message): + if isinstance(msg.content, list): + if self.tab_names: + assert len(self.tab_names) == len( + msg.content + ), "List of objects should have the same length as provided tab names." + objs_dict = {self.tab_names[id]: obj for id, obj in enumerate(msg.content)} + else: + objs_dict = {self.mapper(obj): obj for obj in msg.content} + elif not isinstance(msg.content, dict): + raise ValueError("Message content should be a list or a dict of objects.") + + # two many tabs may cause display problem + tab_names = list(objs_dict.keys()) + tabs = [] + for i in range(0, len(tab_names), 10): + tabs.extend(self.container.tabs(tab_names[i : i + 10])) + + for id, obj in enumerate(objs_dict.values()): + splited_msg = Message( + tag=msg.tag, + level=msg.level, + timestamp=msg.timestamp, + caller=msg.caller, + pid_trace=msg.pid_trace, + content=obj, + ) + self.inner_class(tabs[id]).consume_msg(splited_msg) + + +class RoundTabsWindow(StWindow): + def __init__( + self, + container: "DeltaGenerator", + new_tab_func: Callable[[Message], bool], + inner_class: Type[StWindow] = StWindow, + title: str = "Round tabs", + ): + container.markdown(f"### **{title}**") + self.inner_class = inner_class + self.new_tab_func = new_tab_func + self.round = 0 + + self.current_win = StWindow(container) + self.tabs_c = container.empty() + + def consume_msg(self, msg: Message): + if self.new_tab_func(msg): + self.round += 1 + self.current_win = self.inner_class(self.tabs_c.tabs([str(i) for i in range(1, self.round + 1)])[-1]) + + self.current_win.consume_msg(msg) + + +class HypothesisWindow(StWindow): + def consume_msg(self, msg: Message | Hypothesis): + h: Hypothesis = msg.content if isinstance(msg, Message) else msg + + self.container.markdown("#### **Hypothesis💡**") + self.container.markdown( + f""" +- **Hypothesis**: {h.hypothesis} +- **Reason**: {h.reason}""" + ) + + +class HypothesisFeedbackWindow(StWindow): + def consume_msg(self, msg: Message | HypothesisFeedback): + h: HypothesisFeedback = msg.content if isinstance(msg, Message) else msg + + self.container.markdown("#### **Hypothesis Feedback🔍**") + self.container.markdown( + f""" +- **Observations**: {h.observations} +- **Hypothesis Evaluation**: {h.hypothesis_evaluation} +- **New Hypothesis**: {h.new_hypothesis} +- **Decision**: {h.decision} +- **Reason**: {h.reason}""" + ) + + +class FactorTaskWindow(StWindow): + def consume_msg(self, msg: Message | FactorTask): + ft: FactorTask = msg.content if isinstance(msg, Message) else msg + + self.container.markdown(f"**Factor Name**: {ft.factor_name}") + self.container.markdown(f"**Description**: {ft.factor_description}") + self.container.latex(f"Formulation: {ft.factor_formulation}") + + variables_df = pd.DataFrame(ft.variables, index=["Description"]).T + variables_df.index.name = "Variable" + self.container.table(variables_df) + self.container.text(f"Factor resources: {ft.factor_resources}") + + +class ModelTaskWindow(StWindow): + def consume_msg(self, msg: Message | ModelTask): + mt: ModelTask = msg.content if isinstance(msg, Message) else msg + + self.container.markdown(f"**Model Name**: {mt.name}") + self.container.markdown(f"**Model Type**: {mt.model_type}") + self.container.markdown(f"**Description**: {mt.description}") + self.container.latex(f"Formulation: {mt.formulation}") + + variables_df = pd.DataFrame(mt.variables, index=["Value"]).T + variables_df.index.name = "Variable" + self.container.table(variables_df) + + +class FactorFeedbackWindow(StWindow): + def consume_msg(self, msg: Message | FactorSingleFeedback): + fb: FactorSingleFeedback = msg.content if isinstance(msg, Message) else msg + + self.container.markdown( + f"""### :blue[Factor Execution Feedback] +{fb.execution_feedback} +### :blue[Factor Code Feedback] +{fb.code_feedback} +### :blue[Factor Value Feedback] +{fb.value_feedback} +### :blue[Factor Final Feedback] +{fb.final_feedback} +### :blue[Factor Final Decision] +This implementation is {'SUCCESS' if fb.final_decision else 'FAIL'}. +""" + ) + + +class ModelFeedbackWindow(StWindow): + def consume_msg(self, msg: Message | ModelSingleFeedback): + mb: ModelSingleFeedback = msg.content if isinstance(msg, Message) else msg + + self.container.markdown( + f"""### :blue[Model Execution Feedback] +{mb.execution_feedback} +### :blue[Model Shape Feedback] +{mb.shape_feedback} +### :blue[Model Value Feedback] +{mb.value_feedback} +### :blue[Model Code Feedback] +{mb.code_feedback} +### :blue[Model Final Feedback] +{mb.final_feedback} +### :blue[Model Final Decision] +This implementation is {'SUCCESS' if mb.final_decision else 'FAIL'}. +""" + ) + + +class WorkspaceWindow(StWindow): + def __init__(self, container: "DeltaGenerator", show_task_info: bool = False): + self.container = container + self.show_task_info = show_task_info + + def consume_msg(self, msg: Message | FactorFBWorkspace | ModelFBWorkspace): + ws: FactorFBWorkspace | ModelFBWorkspace = msg.content if isinstance(msg, Message) else msg + + # no workspace + if ws is None: + return + + # task info + if self.show_task_info: + task_msg = deepcopy(msg) + task_msg.content = ws.target_task + if isinstance(ws, FactorFBWorkspace): + self.container.subheader("Factor Info") + FactorTaskWindow(self.container.container()).consume_msg(task_msg) + else: + self.container.subheader("Model Info") + ModelTaskWindow(self.container.container()).consume_msg(task_msg) + + # task codes + for k, v in ws.code_dict.items(): + self.container.markdown(f"`{k}`") + self.container.code(v, language="python") + + +class QlibFactorExpWindow(StWindow): + def __init__(self, container: DeltaGenerator, show_task_info: bool = False): + self.container = container + self.show_task_info = show_task_info + + def consume_msg(self, msg: Message | QlibFactorExperiment): + exp: QlibFactorExperiment = msg.content if isinstance(msg, Message) else msg + + # factor tasks + if self.show_task_info: + ftm_msg = deepcopy(msg) + ftm_msg.content = [ws for ws in exp.sub_workspace_list if ws] + self.container.markdown("**Factor Tasks**") + ObjectsTabsWindow( + self.container.container(), + inner_class=WorkspaceWindow, + mapper=lambda x: x.target_task.factor_name, + ).consume_msg(ftm_msg) + + # result + self.container.markdown("**Results**") + results = pd.DataFrame({f"base_exp_{id}": e.result for id, e in enumerate(exp.based_experiments)}) + results["now"] = exp.result + + self.container.expander("results table").table(results) + + try: + bar_chart = px.bar(results, orientation="h", barmode="group") + self.container.expander("results chart").plotly_chart(bar_chart) + except: + self.container.text("Results are incomplete.") + + +class QlibModelExpWindow(StWindow): + def __init__(self, container: DeltaGenerator, show_task_info: bool = False): + self.container = container + self.show_task_info = show_task_info + + def consume_msg(self, msg: Message | QlibModelExperiment): + exp: QlibModelExperiment = msg.content if isinstance(msg, Message) else msg + + # model tasks + if self.show_task_info: + _msg = deepcopy(msg) + _msg.content = [ws for ws in exp.sub_workspace_list if ws] + self.container.markdown("**Model Tasks**") + ObjectsTabsWindow( + self.container.container(), + inner_class=WorkspaceWindow, + mapper=lambda x: x.target_task.name, + ).consume_msg(_msg) + + # result + self.container.subheader("Results", divider=True) + results = pd.DataFrame({f"base_exp_{id}": e.result for id, e in enumerate(exp.based_experiments)}) + results["now"] = exp.result + + self.container.expander("results table").table(results) + + +class SimpleTraceWindow(StWindow): + def __init__( + self, container: "DeltaGenerator" = st.container(), show_llm: bool = False, show_common_logs: bool = False + ): + super().__init__(container) + self.show_llm = show_llm + self.show_common_logs = show_common_logs + self.pid_trace = "" + self.current_tag = "" + + self.current_win = StWindow(self.container) + self.evolving_tasks: list[str] = [] + + def consume_msg(self, msg: Message): + # divide tag levels + if len(msg.tag) > len(self.current_tag): + # write a header about current task, if it is llm message, not write. + if not msg.tag.endswith("llm_messages"): + self.container.header(msg.tag.replace(".", " ➡ "), divider=True) + + self.current_tag = msg.tag + + # set log writer (window) according to msg + if msg.tag.endswith("llm_messages"): + # llm messages logs + if not self.show_llm: + return + if not isinstance(self.current_win, LLMWindow): + self.current_win = LLMWindow(self.container) + elif isinstance(msg.content, Hypothesis): + # hypothesis + self.current_win = HypothesisWindow(self.container) + elif isinstance(msg.content, HypothesisFeedback): + # hypothesis feedback + self.current_win = HypothesisFeedbackWindow(self.container) + elif isinstance(msg.content, QlibFactorExperiment): + self.current_win = QlibFactorExpWindow(self.container) + elif isinstance(msg.content, QlibModelExperiment): + self.current_win = QlibModelExpWindow(self.container) + elif isinstance(msg.content, list): + msg.content = [m for m in msg.content if m] + if len(msg.content) == 0: + return + if isinstance(msg.content[0], FactorTask): + self.current_win = ObjectsTabsWindow( + self.container.expander("Factor Tasks"), FactorTaskWindow, lambda x: x.factor_name + ) + elif isinstance(msg.content[0], ModelTask): + self.current_win = ObjectsTabsWindow( + self.container.expander("Model Tasks"), ModelTaskWindow, lambda x: x.name + ) + + elif isinstance(msg.content[0], FactorFBWorkspace): + self.current_win = ObjectsTabsWindow( + self.container.expander("Factor Workspaces"), + inner_class=WorkspaceWindow, + mapper=lambda x: x.target_task.factor_name, + ) + self.evolving_tasks = [m.target_task.factor_name for m in msg.content] + elif isinstance(msg.content[0], ModelFBWorkspace): + self.current_win = ObjectsTabsWindow( + self.container.expander("Model Workspaces"), + inner_class=WorkspaceWindow, + mapper=lambda x: x.target_task.name, + ) + self.evolving_tasks = [m.target_task.name for m in msg.content] + + elif isinstance(msg.content[0], FactorSingleFeedback): + self.current_win = ObjectsTabsWindow( + self.container.expander("Factor Feedbacks"), + inner_class=FactorFeedbackWindow, + tab_names=self.evolving_tasks, + ) + elif isinstance(msg.content[0], ModelSingleFeedback): + self.current_win = ObjectsTabsWindow( + self.container.expander("Model Feedbacks"), + inner_class=ModelFeedbackWindow, + tab_names=self.evolving_tasks, + ) + else: + # common logs + if not self.show_common_logs: + return + self.current_win = StWindow(self.container) + + self.current_win.consume_msg(msg) + + +def mock_msg(obj) -> Message: + return Message(tag="mock", level="INFO", timestamp=datetime.now(), pid_trace="000", caller="mock", content=obj) + + +class TraceObjWindow(StWindow): + def __init__(self, container: "DeltaGenerator" = st.container()): + self.container = container + + def consume_msg(self, msg: Message | Trace): + if isinstance(msg, Message): + trace: Trace = msg.content + else: + trace = msg + + for id, (h, e, hf) in enumerate(trace.hist): + self.container.header(f"Trace History {id}", divider=True) + HypothesisWindow(self.container).consume_msg(mock_msg(h)) + if isinstance(e, QlibFactorExperiment): + QlibFactorExpWindow(self.container).consume_msg(mock_msg(e)) + else: + QlibModelExpWindow(self.container).consume_msg(mock_msg(e)) + HypothesisFeedbackWindow(self.container).consume_msg(mock_msg(hf)) + + +class ResearchWindow(StWindow): + def consume_msg(self, msg: Message): + if msg.tag.endswith("hypothesis generation"): + HypothesisWindow(self.container.container()).consume_msg(msg) + elif msg.tag.endswith("experiment generation"): + if isinstance(msg.content, list): + if isinstance(msg.content[0], FactorTask): + self.container.markdown("**Factor Tasks**") + ObjectsTabsWindow( + self.container.container(), FactorTaskWindow, lambda x: x.factor_name + ).consume_msg(msg) + elif isinstance(msg.content[0], ModelTask): + self.container.markdown("**Model Tasks**") + ObjectsTabsWindow(self.container.container(), ModelTaskWindow, lambda x: x.name).consume_msg(msg) + elif msg.tag.endswith("load_pdf_screenshot"): + self.container.image(msg.content) + elif msg.tag.endswith("load_factor_tasks"): + self.container.json(msg.content) + + +class EvolvingWindow(StWindow): + def __init__(self, container: "DeltaGenerator"): + self.container = container + self.evolving_tasks: list[str] = [] + + def consume_msg(self, msg: Message): + if msg.tag.endswith("evolving code"): + if isinstance(msg.content, list): + msg.content = [m for m in msg.content if m] + if len(msg.content) == 0: + return + if isinstance(msg.content[0], FactorFBWorkspace): + self.container.markdown("**Factor Codes**") + ObjectsTabsWindow( + self.container.container(), + inner_class=WorkspaceWindow, + mapper=lambda x: x.target_task.factor_name, + ).consume_msg(msg) + self.evolving_tasks = [m.target_task.factor_name for m in msg.content] + elif isinstance(msg.content[0], ModelFBWorkspace): + self.container.markdown("**Model Codes**") + ObjectsTabsWindow( + self.container.container(), inner_class=WorkspaceWindow, mapper=lambda x: x.target_task.name + ).consume_msg(msg) + self.evolving_tasks = [m.target_task.name for m in msg.content] + elif msg.tag.endswith("evolving feedback"): + if isinstance(msg.content, list): + msg.content = [m for m in msg.content if m] + if len(msg.content) == 0: + return + if isinstance(msg.content[0], FactorSingleFeedback): + self.container.markdown("**Factor Feedbacks🔍**") + ObjectsTabsWindow( + self.container.container(), inner_class=FactorFeedbackWindow, tab_names=self.evolving_tasks + ).consume_msg(msg) + elif isinstance(msg.content[0], ModelSingleFeedback): + self.container.markdown("**Model Feedbacks🔍**") + ObjectsTabsWindow( + self.container.container(), inner_class=ModelFeedbackWindow, tab_names=self.evolving_tasks + ).consume_msg(msg) + + +class DevelopmentWindow(StWindow): + def __init__(self, container: "DeltaGenerator"): + self.E_win = RoundTabsWindow( + container.container(), + new_tab_func=lambda x: x.tag.endswith("evolving code"), + inner_class=EvolvingWindow, + title="Evolving Loops🔧", + ) + + def consume_msg(self, msg: Message): + if "evolving" in msg.tag: + self.E_win.consume_msg(msg) + + +class FeedbackWindow(StWindow): + def __init__(self, container: "DeltaGenerator"): + self.container = container + + def consume_msg(self, msg: Message): + if msg.tag.endswith("returns"): + fig = px.line(msg.content) + self.container.markdown("**Returns📈**") + self.container.plotly_chart(fig) + elif isinstance(msg.content, HypothesisFeedback): + HypothesisFeedbackWindow(self.container.container(border=True)).consume_msg(msg) + elif isinstance(msg.content, QlibModelExperiment): + QlibModelExpWindow(self.container.container(border=True)).consume_msg(msg) + elif isinstance(msg.content, QlibFactorExperiment): + QlibFactorExpWindow(self.container.container(border=True)).consume_msg(msg) + + +class SingleRDLoopWindow(StWindow): + def __init__(self, container: "DeltaGenerator"): + self.container = container + col1, col2 = self.container.columns([2, 3]) + self.R_win = ResearchWindow(col1.container(border=True)) + self.F_win = FeedbackWindow(col1.container(border=True)) + self.D_win = DevelopmentWindow(col2.container(border=True)) + + def consume_msg(self, msg: Message): + tags = msg.tag.split(".") + if "r" in tags: + self.R_win.consume_msg(msg) + elif "d" in tags: + self.D_win.consume_msg(msg) + elif "ef" in tags: + self.F_win.consume_msg(msg) + + +class TraceWindow(StWindow): + def __init__( + self, container: "DeltaGenerator" = st.container(), show_llm: bool = False, show_common_logs: bool = False + ): + self.show_llm = show_llm + self.show_common_logs = show_common_logs + image_c, scen_c = container.columns([2, 3], vertical_alignment="center") + image_c.image("scen.png") + scen_c.container(border=True).markdown(QlibModelScenario().rich_style_description) + top_container = container.container() + col1, col2 = top_container.columns([2, 3]) + chart_c = col2.container(border=True, height=500) + chart_c.markdown("**Metrics📈**") + self.chart_c = chart_c.empty() + hypothesis_status_c = col1.container(border=True, height=500) + hypothesis_status_c.markdown("**Hypotheses🏅**") + self.summary_c = hypothesis_status_c.empty() + + self.RDL_win = RoundTabsWindow( + container.container(), + new_tab_func=lambda x: x.tag.endswith("hypothesis generation"), + inner_class=SingleRDLoopWindow, + title="R&D Loops♾️", + ) + + self.hypothesis_decisions = defaultdict(bool) + self.hypotheses: list[Hypothesis] = [] + + self.results = [] + + def consume_msg(self, msg: Message): + if not self.show_llm and "llm_messages" in msg.tag: + return + if not self.show_common_logs and isinstance(msg.content, str): + return + if isinstance(msg.content, dict): + return + if msg.tag.endswith("hypothesis generation"): + self.hypotheses.append(msg.content) + elif msg.tag.endswith("ef.feedback"): + self.hypothesis_decisions[self.hypotheses[-1]] = msg.content.decision + self.summary_c.markdown( + "\n".join( + ( + f"{id+1}. :green[{self.hypotheses[id].hypothesis}]\n\t>*{self.hypotheses[id].concise_reason}*" + if d + else f"{id+1}. {self.hypotheses[id].hypothesis}\n\t>*{self.hypotheses[id].concise_reason}*" + ) + for id, (h, d) in enumerate(self.hypothesis_decisions.items()) + ) + ) + elif msg.tag.endswith("ef.model runner result") or msg.tag.endswith("ef.factor runner result"): + self.results.append(msg.content.result) + if len(self.results) == 1: + self.chart_c.table(self.results[0]) + else: + df = pd.DataFrame(self.results, index=range(1, len(self.results) + 1)) + fig = px.line(df, x=df.index, y=df.columns, markers=True) + self.chart_c.plotly_chart(fig) + + self.RDL_win.consume_msg(msg) + # time.sleep(TIME_DELAY) diff --git a/alphaagent/log/utils.py b/alphaagent/log/utils.py new file mode 100755 index 00000000..b87682bc --- /dev/null +++ b/alphaagent/log/utils.py @@ -0,0 +1,77 @@ +import inspect +import re +from typing import Dict, Optional, TypedDict, Union + + +class LogColors: + """ + ANSI color codes for use in console output. + """ + + RED = "\033[91m" + GREEN = "\033[92m" + YELLOW = "\033[93m" + BLUE = "\033[94m" + MAGENTA = "\033[95m" + CYAN = "\033[96m" + WHITE = "\033[97m" + GRAY = "\033[90m" + BLACK = "\033[30m" + + BOLD = "\033[1m" + ITALIC = "\033[3m" + + END = "\033[0m" + + @classmethod + def get_all_colors(cls: type["LogColors"]) -> list: + names = dir(cls) + names = [name for name in names if not name.startswith("__") and not callable(getattr(cls, name))] + return [getattr(cls, name) for name in names] + + def render(self, text: str, color: str = "", style: str = "") -> str: + """ + render text by input color and style. + It's not recommend that input text is already rendered. + """ + # This method is called too frequently, which is not good. + colors = self.get_all_colors() + # Perhaps color and font should be distinguished here. + if color and color in colors: + error_message = f"color should be in: {colors} but now is: {color}" + raise ValueError(error_message) + if style and style in colors: + error_message = f"style should be in: {colors} but now is: {style}" + raise ValueError(error_message) + + text = f"{color}{text}{self.END}" + + return f"{style}{text}{self.END}" + + @staticmethod + def remove_ansi_codes(s: str) -> str: + """ + It is for removing ansi ctrl characters in the string(e.g. colored text) + """ + ansi_escape = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]") + return ansi_escape.sub("", s) + + +class CallerInfo(TypedDict): + function: str + line: int + name: Optional[str] + + +def get_caller_info() -> CallerInfo: + # Get the current stack information + stack = inspect.stack() + # The second element is usually the caller's information + caller_info = stack[2] + frame = caller_info[0] + info: CallerInfo = { + "line": caller_info.lineno, + "name": frame.f_globals["__name__"], # Get the module name from the frame's globals + "function": frame.f_code.co_name, # Get the caller's function name + } + return info diff --git a/alphaagent/oai/llm_conf.py b/alphaagent/oai/llm_conf.py new file mode 100755 index 00000000..213c732c --- /dev/null +++ b/alphaagent/oai/llm_conf.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +from pathlib import Path + +from alphaagent.core.conf import ExtendedBaseSettings + + +class LLMSettings(ExtendedBaseSettings): + log_llm_chat_content: bool = True + + use_azure: bool = False + chat_use_azure_token_provider: bool = False + embedding_use_azure_token_provider: bool = False + managed_identity_client_id: str | None = None + max_retry: int = 10 + retry_wait_seconds: int = 1 + dump_chat_cache: bool = False + use_chat_cache: bool = False + dump_embedding_cache: bool = False + use_embedding_cache: bool = False + prompt_cache_path: str = str(Path.cwd() / "prompt_cache.db") + max_past_message_include: int = 10 + + # Behavior of returning answers to the same question when caching is enabled + use_auto_chat_cache_seed_gen: bool = False + """ + `_create_chat_completion_inner_function` provdies a feature to pass in a seed to affect the cache hash key + We want to enable a auto seed generator to get different default seed for `_create_chat_completion_inner_function` + if seed is not given. + So the cache will only not miss you ask the same question on same round. + """ + init_chat_cache_seed: int = 42 + + # Chat configs + openai_api_key: str = "" # TODO: simplify the key design. + openai_base_url: str = "" + chat_openai_api_key: str = "" + chat_azure_api_base: str = "" + chat_azure_api_version: str = "" + chat_model: str = "gpt-4-turbo" + reasoning_model: str = "" + chat_max_tokens: int = 3000 + chat_temperature: float = 0.5 + chat_stream: bool = True + chat_seed: int | None = None + chat_frequency_penalty: float = 0.0 + chat_presence_penalty: float = 0.0 + chat_token_limit: int = ( + 100000 # 100000 is the maximum limit of gpt4, which might increase in the future version of gpt + ) + default_system_prompt: str = "You are an AI assistant who helps to answer user's questions." + factor_mining_timeout: int = 36000 # 10小时,单位:秒 + + # Embedding configs + embedding_openai_api_key: str = "" + embedding_azure_api_base: str = "" + embedding_azure_api_version: str = "" + embedding_model: str = "" + embedding_max_str_num: int = 50 + embedding_api_key: str = "" + embedding_base_url: str = "" + + # offline llama2 related config + use_llama2: bool = False + llama2_ckpt_dir: str = "Llama-2-7b-chat" + llama2_tokenizer_path: str = "Llama-2-7b-chat/tokenizer.model" + llams2_max_batch_size: int = 8 + + # server served endpoints + use_gcr_endpoint: bool = False + gcr_endpoint_type: str = "llama2_70b" # or "llama3_70b", "phi2", "phi3_4k", "phi3_128k" + + llama2_70b_endpoint: str = "" + llama2_70b_endpoint_key: str = "" + llama2_70b_endpoint_deployment: str = "" + + llama3_70b_endpoint: str = "" + llama3_70b_endpoint_key: str = "" + llama3_70b_endpoint_deployment: str = "" + + phi2_endpoint: str = "" + phi2_endpoint_key: str = "" + phi2_endpoint_deployment: str = "" + + phi3_4k_endpoint: str = "" + phi3_4k_endpoint_key: str = "" + phi3_4k_endpoint_deployment: str = "" + + phi3_128k_endpoint: str = "" + phi3_128k_endpoint_key: str = "" + phi3_128k_endpoint_deployment: str = "" + + gcr_endpoint_temperature: float = 0.7 + gcr_endpoint_top_p: float = 0.9 + gcr_endpoint_do_sample: bool = False + gcr_endpoint_max_token: int = 100 + + chat_model_map: str = "{}" + + +LLM_SETTINGS = LLMSettings() diff --git a/alphaagent/oai/llm_utils.py b/alphaagent/oai/llm_utils.py new file mode 100755 index 00000000..1022e67d --- /dev/null +++ b/alphaagent/oai/llm_utils.py @@ -0,0 +1,853 @@ +from __future__ import annotations + +import hashlib +import inspect +import json +import os +import random +import re +import sqlite3 +import ssl +import time +import urllib.request +import uuid +from copy import deepcopy +from pathlib import Path +from typing import Any, Optional + +import numpy as np +import tiktoken + +from alphaagent.core.utils import LLM_CACHE_SEED_GEN, SingletonBaseClass +from alphaagent.log import LogColors +from alphaagent.log import logger +from alphaagent.oai.llm_conf import LLM_SETTINGS + +DEFAULT_QLIB_DOT_PATH = Path("./") + + +def md5_hash(input_string: str) -> str: + hash_md5 = hashlib.md5(usedforsecurity=False) + input_bytes = input_string.encode("utf-8") + hash_md5.update(input_bytes) + return hash_md5.hexdigest() + + +try: + from azure.identity import DefaultAzureCredential, get_bearer_token_provider +except ImportError: + logger.warning("azure.identity is not installed.") + +try: + import openai +except ImportError: + logger.warning("openai is not installed.") + +try: + from llama import Llama +except ImportError: + logger.warning("llama is not installed.") + + +class ConvManager: + """ + This is a conversation manager of LLM + It is for convenience of exporting conversation for debugging. + """ + + def __init__( + self, + path: Path | str = DEFAULT_QLIB_DOT_PATH / "llm_conv", + recent_n: int = 10, + ) -> None: + self.path = Path(path) + self.path.mkdir(parents=True, exist_ok=True) + self.recent_n = recent_n + + def _rotate_files(self) -> None: + pairs = [] + for f in self.path.glob("*.json"): + m = re.match(r"(\d+).json", f.name) + if m is not None: + n = int(m.group(1)) + pairs.append((n, f)) + pairs.sort(key=lambda x: x[0]) + for n, f in pairs[: self.recent_n][::-1]: + if (self.path / f"{n+1}.json").exists(): + (self.path / f"{n+1}.json").unlink() + f.rename(self.path / f"{n+1}.json") + + def append(self, conv: tuple[list, str]) -> None: + self._rotate_files() + with (self.path / "0.json").open("w") as file: + json.dump(conv, file) + # TODO: reseve line breaks to make it more convient to edit file directly. + + +class SQliteLazyCache(SingletonBaseClass): + def __init__(self, cache_location: str) -> None: + super().__init__() + self.cache_location = cache_location + db_file_exist = Path(cache_location).exists() + # TODO: sqlite3 does not support multiprocessing. + self.conn = sqlite3.connect(cache_location, timeout=20) + self.c = self.conn.cursor() + if not db_file_exist: + self.c.execute( + """ + CREATE TABLE chat_cache ( + md5_key TEXT PRIMARY KEY, + chat TEXT + ) + """, + ) + self.c.execute( + """ + CREATE TABLE embedding_cache ( + md5_key TEXT PRIMARY KEY, + embedding TEXT + ) + """, + ) + self.c.execute( + """ + CREATE TABLE message_cache ( + conversation_id TEXT PRIMARY KEY, + message TEXT + ) + """, + ) + self.conn.commit() + + def chat_get(self, key: str) -> str | None: + md5_key = md5_hash(key) + self.c.execute("SELECT chat FROM chat_cache WHERE md5_key=?", (md5_key,)) + result = self.c.fetchone() + if result is None: + return None + return result[0] + + def embedding_get(self, key: str) -> list | dict | str | None: + md5_key = md5_hash(key) + self.c.execute("SELECT embedding FROM embedding_cache WHERE md5_key=?", (md5_key,)) + result = self.c.fetchone() + if result is None: + return None + return json.loads(result[0]) + + def chat_set(self, key: str, value: str) -> None: + md5_key = md5_hash(key) + self.c.execute( + "INSERT OR REPLACE INTO chat_cache (md5_key, chat) VALUES (?, ?)", + (md5_key, value), + ) + self.conn.commit() + + def embedding_set(self, content_to_embedding_dict: dict) -> None: + for key, value in content_to_embedding_dict.items(): + md5_key = md5_hash(key) + self.c.execute( + "INSERT OR REPLACE INTO embedding_cache (md5_key, embedding) VALUES (?, ?)", + (md5_key, json.dumps(value)), + ) + self.conn.commit() + + def message_get(self, conversation_id: str) -> list[str]: + self.c.execute("SELECT message FROM message_cache WHERE conversation_id=?", (conversation_id,)) + result = self.c.fetchone() + if result is None: + return [] + return json.loads(result[0]) + + def message_set(self, conversation_id: str, message_value: list[str]) -> None: + self.c.execute( + "INSERT OR REPLACE INTO message_cache (conversation_id, message) VALUES (?, ?)", + (conversation_id, json.dumps(message_value)), + ) + self.conn.commit() + + +class SessionChatHistoryCache(SingletonBaseClass): + def __init__(self) -> None: + """load all history conversation json file from self.session_cache_location""" + self.cache = SQliteLazyCache(cache_location=LLM_SETTINGS.prompt_cache_path) + + def message_get(self, conversation_id: str) -> list[str]: + return self.cache.message_get(conversation_id) + + def message_set(self, conversation_id: str, message_value: list[str]) -> None: + self.cache.message_set(conversation_id, message_value) + + +class ChatSession: + def __init__(self, api_backend: Any, conversation_id: str | None = None, system_prompt: str | None = None) -> None: + self.conversation_id = str(uuid.uuid4()) if conversation_id is None else conversation_id + self.system_prompt = system_prompt if system_prompt is not None else LLM_SETTINGS.default_system_prompt + self.api_backend = api_backend + + def build_chat_completion_message(self, user_prompt: str) -> list[dict[str, Any]]: + history_message = SessionChatHistoryCache().message_get(self.conversation_id) + messages = history_message + if not messages: + messages.append({"role": "system", "content": self.system_prompt}) + messages.append( + { + "role": "user", + "content": user_prompt, + }, + ) + return messages + + def build_chat_completion_message_and_calculate_token(self, user_prompt: str) -> Any: + messages = self.build_chat_completion_message(user_prompt) + return self.api_backend.calculate_token_from_messages(messages) + + def build_chat_completion(self, user_prompt: str, **kwargs: Any) -> str: + """ + this function is to build the session messages + user prompt should always be provided + """ + messages = self.build_chat_completion_message(user_prompt) + + with logger.tag(f"session_{self.conversation_id}"): + response = self.api_backend._try_create_chat_completion_or_embedding( # noqa: SLF001 + messages=messages, + chat_completion=True, + **kwargs, + ) + + messages.append( + { + "role": "assistant", + "content": response, + }, + ) + SessionChatHistoryCache().message_set(self.conversation_id, messages) + return response + + def get_conversation_id(self) -> str: + return self.conversation_id + + def display_history(self) -> None: + # TODO: Realize a beautiful presentation format for history messages + pass + + +class APIBackend: + """ + This is a unified interface for different backends. + + (xiao) thinks integrate all kinds of API in a single class is not a good design. + So we should split them into different classes in `oai/backends/` in the future. + """ + + # FIXME: (xiao) We should avoid using self.xxxx. + # Instead, we can use LLM_SETTINGS directly. If it's difficult to support different backend settings, we can split them into multiple BaseSettings. + def __init__( # noqa: C901, PLR0912, PLR0915 + self, + *, + chat_api_key: str | None = None, + chat_model: str | None = None, + reasoning_model: str | None = None, + chat_api_base: str | None = None, + chat_api_version: str | None = None, + embedding_api_key: str | None = None, + embedding_model: str | None = None, + embedding_api_base: str | None = None, + embedding_api_version: str | None = None, + use_chat_cache: bool | None = None, + dump_chat_cache: bool | None = None, + use_embedding_cache: bool | None = None, + dump_embedding_cache: bool | None = None, + ) -> None: + if LLM_SETTINGS.use_llama2: + self.generator = Llama.build( + ckpt_dir=LLM_SETTINGS.llama2_ckpt_dir, + tokenizer_path=LLM_SETTINGS.llama2_tokenizer_path, + max_seq_len=LLM_SETTINGS.max_tokens, + max_batch_size=LLM_SETTINGS.llams2_max_batch_size, + ) + self.encoder = None + elif LLM_SETTINGS.use_gcr_endpoint: + gcr_endpoint_type = LLM_SETTINGS.gcr_endpoint_type + if gcr_endpoint_type == "llama2_70b": + self.gcr_endpoint_key = LLM_SETTINGS.llama2_70b_endpoint_key + self.gcr_endpoint_deployment = LLM_SETTINGS.llama2_70b_endpoint_deployment + self.gcr_endpoint = LLM_SETTINGS.llama2_70b_endpoint + elif gcr_endpoint_type == "llama3_70b": + self.gcr_endpoint_key = LLM_SETTINGS.llama3_70b_endpoint_key + self.gcr_endpoint_deployment = LLM_SETTINGS.llama3_70b_endpoint_deployment + self.gcr_endpoint = LLM_SETTINGS.llama3_70b_endpoint + elif gcr_endpoint_type == "phi2": + self.gcr_endpoint_key = LLM_SETTINGS.phi2_endpoint_key + self.gcr_endpoint_deployment = LLM_SETTINGS.phi2_endpoint_deployment + self.gcr_endpoint = LLM_SETTINGS.phi2_endpoint + elif gcr_endpoint_type == "phi3_4k": + self.gcr_endpoint_key = LLM_SETTINGS.phi3_4k_endpoint_key + self.gcr_endpoint_deployment = LLM_SETTINGS.phi3_4k_endpoint_deployment + self.gcr_endpoint = LLM_SETTINGS.phi3_4k_endpoint + elif gcr_endpoint_type == "phi3_128k": + self.gcr_endpoint_key = LLM_SETTINGS.phi3_128k_endpoint_key + self.gcr_endpoint_deployment = LLM_SETTINGS.phi3_128k_endpoint_deployment + self.gcr_endpoint = LLM_SETTINGS.phi3_128k_endpoint + else: + error_message = f"Invalid gcr_endpoint_type: {gcr_endpoint_type}" + raise ValueError(error_message) + self.headers = { + "Content-Type": "application/json", + "Authorization": ("Bearer " + self.gcr_endpoint_key), + } + self.gcr_endpoint_temperature = LLM_SETTINGS.gcr_endpoint_temperature + self.gcr_endpoint_top_p = LLM_SETTINGS.gcr_endpoint_top_p + self.gcr_endpoint_do_sample = LLM_SETTINGS.gcr_endpoint_do_sample + self.gcr_endpoint_max_token = LLM_SETTINGS.gcr_endpoint_max_token + if not os.environ.get("PYTHONHTTPSVERIFY", "") and hasattr(ssl, "_create_unverified_context"): + ssl._create_default_https_context = ssl._create_unverified_context # noqa: SLF001 + self.chat_model_map = json.loads(LLM_SETTINGS.chat_model_map) + self.chat_model = LLM_SETTINGS.chat_model if chat_model is None else chat_model + self.encoder = None + else: + self.use_azure = LLM_SETTINGS.use_azure + self.chat_use_azure_token_provider = LLM_SETTINGS.chat_use_azure_token_provider + self.embedding_use_azure_token_provider = LLM_SETTINGS.embedding_use_azure_token_provider + self.managed_identity_client_id = LLM_SETTINGS.managed_identity_client_id + + # Priority: chat_api_key/embedding_api_key > openai_api_key > os.environ.get("OPENAI_API_KEY") + # TODO: Simplify the key design. Consider Pandatic's field alias & priority. + self.chat_api_key = ( + chat_api_key + or LLM_SETTINGS.chat_openai_api_key + or LLM_SETTINGS.openai_api_key + or os.environ.get("OPENAI_API_KEY") + ) + self.embedding_api_key = ( + embedding_api_key + or LLM_SETTINGS.embedding_openai_api_key + or LLM_SETTINGS.openai_api_key + or os.environ.get("OPENAI_API_KEY") + ) + + self.base_url = ( + LLM_SETTINGS.openai_base_url + or os.environ.get("OPENAI_BASE_URL") + ) + + self.embedding_base_url = ( + LLM_SETTINGS.embedding_base_url + or os.environ.get("EMBEDDING_BASE_URL") + ) + + self.embedding_api_key = ( + LLM_SETTINGS.embedding_api_key + or os.environ.get("EMBEDDING_API_KEY") + ) + + + self.chat_model = LLM_SETTINGS.chat_model if chat_model is None else chat_model + self.reasoning_model = LLM_SETTINGS.reasoning_model if reasoning_model is None else reasoning_model + self.chat_model_map = json.loads(LLM_SETTINGS.chat_model_map) + # self.encoder = self._get_encoder() + + self.chat_api_base = LLM_SETTINGS.chat_azure_api_base if chat_api_base is None else chat_api_base + self.chat_api_version = ( + LLM_SETTINGS.chat_azure_api_version if chat_api_version is None else chat_api_version + ) + self.chat_stream = LLM_SETTINGS.chat_stream + self.chat_seed = LLM_SETTINGS.chat_seed + + self.embedding_model = LLM_SETTINGS.embedding_model if embedding_model is None else embedding_model + self.embedding_api_base = ( + LLM_SETTINGS.embedding_azure_api_base if embedding_api_base is None else embedding_api_base + ) + self.embedding_api_version = ( + LLM_SETTINGS.embedding_azure_api_version if embedding_api_version is None else embedding_api_version + ) + + if self.use_azure: + if self.chat_use_azure_token_provider or self.embedding_use_azure_token_provider: + dac_kwargs = {} + if self.managed_identity_client_id is not None: + dac_kwargs["managed_identity_client_id"] = self.managed_identity_client_id + credential = DefaultAzureCredential(**dac_kwargs) + token_provider = get_bearer_token_provider( + credential, + "https://cognitiveservices.azure.com/.default", + ) + if self.chat_use_azure_token_provider: + self.chat_client = openai.AzureOpenAI( + azure_ad_token_provider=token_provider, + api_version=self.chat_api_version, + azure_endpoint=self.chat_api_base, + ) + else: + self.chat_client = openai.AzureOpenAI( + api_key=self.chat_api_key, + api_version=self.chat_api_version, + azure_endpoint=self.chat_api_base, + ) + + if self.embedding_use_azure_token_provider: + self.embedding_client = openai.AzureOpenAI( + azure_ad_token_provider=token_provider, + api_version=self.embedding_api_version, + azure_endpoint=self.embedding_api_base, + ) + else: + self.embedding_client = openai.AzureOpenAI( + api_key=self.embedding_api_key, + api_version=self.embedding_api_version, + azure_endpoint=self.embedding_api_base, + ) + else: + self.chat_client = openai.OpenAI(api_key=self.chat_api_key, base_url=self.base_url) + self.embedding_client = openai.OpenAI(api_key=self.embedding_api_key, base_url=self.embedding_base_url) + + self.dump_chat_cache = LLM_SETTINGS.dump_chat_cache if dump_chat_cache is None else dump_chat_cache + self.use_chat_cache = LLM_SETTINGS.use_chat_cache if use_chat_cache is None else use_chat_cache + self.dump_embedding_cache = ( + LLM_SETTINGS.dump_embedding_cache if dump_embedding_cache is None else dump_embedding_cache + ) + self.use_embedding_cache = ( + LLM_SETTINGS.use_embedding_cache if use_embedding_cache is None else use_embedding_cache + ) + if self.dump_chat_cache or self.use_chat_cache or self.dump_embedding_cache or self.use_embedding_cache: + self.cache_file_location = LLM_SETTINGS.prompt_cache_path + self.cache = SQliteLazyCache(cache_location=self.cache_file_location) + + # transfer the config to the class if the config is not supposed to change during the runtime + self.use_llama2 = LLM_SETTINGS.use_llama2 + self.use_gcr_endpoint = LLM_SETTINGS.use_gcr_endpoint + self.retry_wait_seconds = LLM_SETTINGS.retry_wait_seconds + + def _get_encoder(self): + """ + tiktoken.encoding_for_model(self.chat_model) does not cover all cases it should consider. + + This function attempts to handle several edge cases. + """ + + # 1) cases + def _azure_patch(model: str) -> str: + """ + When using Azure API, self.chat_model is the deployment name that can be any string. + For example, it may be `gpt-4o_2024-08-06`. But tiktoken.encoding_for_model can't handle this. + """ + return model.replace("_", "-") + + model = self.chat_model + try: + return tiktoken.encoding_for_model(model) + except KeyError: + logger.warning(f"Failed to get encoder. Trying to patch the model name") + for patch_func in [_azure_patch]: + try: + return tiktoken.encoding_for_model(patch_func(model)) + except KeyError: + logger.error(f"Failed to get encoder even after patching with {patch_func.__name__}") + raise + + def build_chat_session( + self, + conversation_id: str | None = None, + session_system_prompt: str | None = None, + ) -> ChatSession: + """ + conversation_id is a 256-bit string created by uuid.uuid4() and is also + the file name under session_cache_folder/ for each conversation + """ + return ChatSession(self, conversation_id, session_system_prompt) + + def build_messages( + self, + user_prompt: str, + system_prompt: str | None = None, + former_messages: list[dict] | None = None, + *, + shrink_multiple_break: bool = False, + ) -> list[dict]: + """ + build the messages to avoid implementing several redundant lines of code + + """ + if former_messages is None: + former_messages = [] + # shrink multiple break will recursively remove multiple breaks(more than 2) + if shrink_multiple_break: + while "\n\n\n" in user_prompt: + user_prompt = user_prompt.replace("\n\n\n", "\n\n") + if system_prompt is not None: + while "\n\n\n" in system_prompt: + system_prompt = system_prompt.replace("\n\n\n", "\n\n") + system_prompt = LLM_SETTINGS.default_system_prompt if system_prompt is None else system_prompt + messages = [ + { + "role": "system", + "content": system_prompt, + }, + ] + messages.extend(former_messages[-1 * LLM_SETTINGS.max_past_message_include :]) + messages.append( + { + "role": "user", + "content": user_prompt, + }, + ) + return messages + + def build_messages_and_create_chat_completion( + self, + user_prompt: str, + system_prompt: str | None = None, + former_messages: list | None = None, + chat_cache_prefix: str = "", + *, + shrink_multiple_break: bool = False, + **kwargs: Any, + ) -> str: + if former_messages is None: + former_messages = [] + messages = self.build_messages( + user_prompt, + system_prompt, + former_messages, + shrink_multiple_break=shrink_multiple_break, + ) + return self._try_create_chat_completion_or_embedding( + messages=messages, + chat_completion=True, + chat_cache_prefix=chat_cache_prefix, + **kwargs, + ) + + def create_embedding(self, input_content: str | list[str], **kwargs: Any) -> list[Any] | Any: + input_content_list = [input_content] if isinstance(input_content, str) else input_content + resp = self._try_create_chat_completion_or_embedding( + input_content_list=input_content_list, + embedding=True, + **kwargs, + ) + if isinstance(input_content, str): + return resp[0] + return resp + + def _create_chat_completion_auto_continue(self, messages: list, **kwargs: dict) -> str: + """ + Call the chat completion function and automatically continue the conversation if the finish_reason is length. + TODO: This function only continues once, maybe need to continue more than once in the future. + """ + response, finish_reason = self._create_chat_completion_inner_function(messages=messages, **kwargs) + + if finish_reason == "length": + new_message = deepcopy(messages) + new_message.append({"role": "assistant", "content": response}) + new_message.append( + { + "role": "user", + "content": "continue the former output with no overlap", + }, + ) + new_response, finish_reason = self._create_chat_completion_inner_function(messages=new_message, **kwargs) + return response + new_response + return response + + def _try_create_chat_completion_or_embedding( + self, + max_retry: int = 10, + *, + chat_completion: bool = False, + embedding: bool = False, + **kwargs: Any, + ) -> Any: + assert not (chat_completion and embedding), "chat_completion and embedding cannot be True at the same time" + max_retry = LLM_SETTINGS.max_retry if LLM_SETTINGS.max_retry is not None else max_retry + for i in range(max_retry): + try: + # import pdb; pdb.set_trace() + if embedding: + return self._create_embedding_inner_function(**kwargs) + if chat_completion: + return self._create_chat_completion_auto_continue(**kwargs) + except openai.BadRequestError as e: # noqa: PERF203 + logger.warning(e) + logger.warning(f"Retrying {i+1}th time...") + if "'messages' must contain the word 'json' in some form" in e.message: + kwargs["add_json_in_prompt"] = True + elif embedding and "maximum context length" in e.message: + kwargs["input_content_list"] = [ + content[: len(content) // 2] for content in kwargs.get("input_content_list", []) + ] + except Exception as e: # noqa: BLE001 + logger.warning(e) + logger.warning(f"Retrying {i+1}th time...") + time.sleep(self.retry_wait_seconds) + error_message = f"Failed to create chat completion after {max_retry} retries." + raise RuntimeError(error_message) + + def _create_embedding_inner_function( + self, input_content_list: list[str], **kwargs: Any + ) -> list[Any]: # noqa: ARG002 + content_to_embedding_dict = {} + filtered_input_content_list = [] + if self.use_embedding_cache: + for content in input_content_list: + cache_result = self.cache.embedding_get(content) + if cache_result is not None: + content_to_embedding_dict[content] = cache_result + else: + filtered_input_content_list.append(content) + else: + filtered_input_content_list = input_content_list + + if len(filtered_input_content_list) > 0: + for sliced_filtered_input_content_list in [ + filtered_input_content_list[i : i + LLM_SETTINGS.embedding_max_str_num] + for i in range(0, len(filtered_input_content_list), LLM_SETTINGS.embedding_max_str_num) + ]: + if self.use_azure: + response = self.embedding_client.embeddings.create( + model=self.embedding_model, + input=sliced_filtered_input_content_list, + ) + else: + response = self.embedding_client.embeddings.create( + model=self.embedding_model, + input=sliced_filtered_input_content_list, + ) + for index, data in enumerate(response.data): + content_to_embedding_dict[sliced_filtered_input_content_list[index]] = data.embedding + + if self.dump_embedding_cache: + self.cache.embedding_set(content_to_embedding_dict) + return [content_to_embedding_dict[content] for content in input_content_list] + + def _build_log_messages(self, messages: list[dict]) -> str: + log_messages = "" + for m in messages: + log_messages += ( + f"\n{LogColors.MAGENTA}{LogColors.BOLD}Role:{LogColors.END}" + f"{LogColors.CYAN}{m['role']}{LogColors.END}\n" + f"{LogColors.MAGENTA}{LogColors.BOLD}Content:{LogColors.END} " + f"{LogColors.CYAN}{m['content']}{LogColors.END}\n" + ) + return log_messages + + def _create_chat_completion_inner_function( # noqa: C901, PLR0912, PLR0915 + self, + messages: list[dict], + reasoning_flag = True, + temperature: float | None = None, + max_tokens: int | None = None, + chat_cache_prefix: str = "", + frequency_penalty: float | None = None, + presence_penalty: float | None = None, + *, + json_mode: bool = False, + add_json_in_prompt: bool = False, + seed: Optional[int] = None, + ) -> str: + """ + seed : Optional[int] + When retrying with cache enabled, it will keep returning the same results. + To make retries useful, we need to enable a seed. + This seed is different from `self.chat_seed` for GPT. It is for the local cache mechanism enabled by RD-Agent locally. + """ + if seed is None and LLM_SETTINGS.use_auto_chat_cache_seed_gen: + seed = LLM_CACHE_SEED_GEN.get_next_seed() + + # TODO: we can add this function back to avoid so much `self.cfg.log_llm_chat_content` + if LLM_SETTINGS.log_llm_chat_content: + logger.info(self._build_log_messages(messages), tag="llm_messages") + # TODO: fail to use loguru adaptor due to stream response + input_content_json = json.dumps(messages) + input_content_json = ( + chat_cache_prefix + input_content_json + f"" + ) # FIXME this is a hack to make sure the cache represents the round index + if self.use_chat_cache: + cache_result = self.cache.chat_get(input_content_json) + if cache_result is not None: + if LLM_SETTINGS.log_llm_chat_content: + logger.info(f"{LogColors.CYAN}Response:{cache_result}{LogColors.END}", tag="llm_messages") + return cache_result, None + + if temperature is None: + temperature = LLM_SETTINGS.chat_temperature + if max_tokens is None: + max_tokens = LLM_SETTINGS.chat_max_tokens + if frequency_penalty is None: + frequency_penalty = LLM_SETTINGS.chat_frequency_penalty + if presence_penalty is None: + presence_penalty = LLM_SETTINGS.chat_presence_penalty + + # Use index 4 to skip the current function and intermediate calls, + # and get the locals of the caller's frame. + caller_locals = inspect.stack()[4].frame.f_locals + if "self" in caller_locals: + tag = caller_locals["self"].__class__.__name__ + else: + tag = inspect.stack()[4].function + + if reasoning_flag: + model = self.reasoning_model + json_mode = None + else: + model = self.chat_model_map.get(tag, self.chat_model) + + finish_reason = None + if self.use_llama2: + response = self.generator.chat_completion( + messages, # type: ignore + max_gen_len=max_tokens, + temperature=temperature, + ) + resp = response[0]["generation"]["content"] + if LLM_SETTINGS.log_llm_chat_content: + logger.info(f"{LogColors.CYAN}Response:{resp}{LogColors.END}", tag="llm_messages") + elif self.use_gcr_endpoint: + body = str.encode( + json.dumps( + { + "input_data": { + "input_string": messages, + "parameters": { + "temperature": self.gcr_endpoint_temperature, + "top_p": self.gcr_endpoint_top_p, + "max_new_tokens": self.gcr_endpoint_max_token, + }, + }, + }, + ), + ) + + req = urllib.request.Request(self.gcr_endpoint, body, self.headers) # noqa: S310 + response = urllib.request.urlopen(req) # noqa: S310 + resp = json.loads(response.read().decode())["output"] + if LLM_SETTINGS.log_llm_chat_content: + logger.info(f"{LogColors.CYAN}Response:{resp}{LogColors.END}", tag="llm_messages") + else: + kwargs = dict( + model=model, + messages=messages, + max_tokens=max_tokens, + temperature=temperature, + stream=self.chat_stream, + seed=self.chat_seed, + frequency_penalty=frequency_penalty, + presence_penalty=presence_penalty, + ) + + if json_mode: + if add_json_in_prompt: + for message in messages[::-1]: + message["content"] = message["content"] + "\nPlease respond in json format." + if message["role"] == "system": + break + kwargs["response_format"] = {"type": "json_object"} + response = self.chat_client.chat.completions.create(**kwargs) + + + if self.chat_stream: + resp = "" + # TODO: with logger.config(stream=self.chat_stream): and add a `stream_start` flag to add timestamp for first message. + if LLM_SETTINGS.log_llm_chat_content: + logger.info(f"{LogColors.CYAN}Response:{LogColors.END}", tag="llm_messages") + + for chunk in response: + content = ( + chunk.choices[0].delta.content + if len(chunk.choices) > 0 and chunk.choices[0].delta.content is not None + else "" + ) + if LLM_SETTINGS.log_llm_chat_content: + logger.info(LogColors.CYAN + content + LogColors.END, raw=True, tag="llm_messages") + resp += content + if len(chunk.choices) > 0 and chunk.choices[0].finish_reason is not None: + finish_reason = chunk.choices[0].finish_reason + + if LLM_SETTINGS.log_llm_chat_content: + logger.info("\n", raw=True, tag="llm_messages") + + else: + resp = response.choices[0].message.content + finish_reason = response.choices[0].finish_reason + if LLM_SETTINGS.log_llm_chat_content: + logger.info(f"{LogColors.CYAN}Response:{resp}{LogColors.END}", tag="llm_messages") + logger.info( + json.dumps( + { + "tag": tag, + "total_tokens": response.usage.total_tokens, + "prompt_tokens": response.usage.prompt_tokens, + "completion_tokens": response.usage.completion_tokens, + "model": model, + } + ), + tag="llm_messages", + ) + if json_mode or reasoning_flag: + # 提取JSON部分 + json_start = resp.find('{') + json_end = resp.rfind('}') + 1 + resp = resp[json_start:json_end] + json.loads(resp) + if self.dump_chat_cache: + self.cache.chat_set(input_content_json, resp) + return resp, finish_reason + + def calculate_token_from_messages(self, messages: list[dict]) -> int: + return 0 + if self.use_llama2 or self.use_gcr_endpoint: + logger.warning("num_tokens_from_messages() is not implemented for model llama2.") + return 0 # TODO implement this function for llama2 + + if "gpt4" in self.chat_model or "gpt-4" in self.chat_model: + tokens_per_message = 3 + tokens_per_name = 1 + else: + tokens_per_message = 4 # every message follows {role/name}\n{content}\n + tokens_per_name = -1 # if there's a name, the role is omitted + num_tokens = 0 + for message in messages: + num_tokens += tokens_per_message + for key, value in message.items(): + num_tokens += len(self.encoder.encode(value)) + if key == "name": + num_tokens += tokens_per_name + num_tokens += 3 # every reply is primed with assistant + return num_tokens + + def build_messages_and_calculate_token( + self, + user_prompt: str, + system_prompt: str | None, + former_messages: list[dict] | None = None, + *, + shrink_multiple_break: bool = False, + ) -> int: + if former_messages is None: + former_messages = [] + messages = self.build_messages( + user_prompt, system_prompt, former_messages, shrink_multiple_break=shrink_multiple_break + ) + return self.calculate_token_from_messages(messages) + + +def calculate_embedding_distance_between_str_list( + source_str_list: list[str], + target_str_list: list[str], +) -> list[list[float]]: + if not source_str_list or not target_str_list: + return [[]] + + embeddings = APIBackend().create_embedding(source_str_list + target_str_list) + + source_embeddings = embeddings[: len(source_str_list)] + target_embeddings = embeddings[len(source_str_list) :] + + source_embeddings_np = np.array(source_embeddings) + target_embeddings_np = np.array(target_embeddings) + + source_embeddings_np = source_embeddings_np / np.linalg.norm(source_embeddings_np, axis=1, keepdims=True) + target_embeddings_np = target_embeddings_np / np.linalg.norm(target_embeddings_np, axis=1, keepdims=True) + similarity_matrix = np.dot(source_embeddings_np, target_embeddings_np.T) + + return similarity_matrix.tolist() diff --git a/alphaagent/scenarios/qlib/developer/factor_coder.py b/alphaagent/scenarios/qlib/developer/factor_coder.py new file mode 100755 index 00000000..da514eea --- /dev/null +++ b/alphaagent/scenarios/qlib/developer/factor_coder.py @@ -0,0 +1,5 @@ +from alphaagent.components.coder.factor_coder import FactorCoSTEER, FactorParser, FactorCoder + +QlibFactorCoSTEER = FactorCoSTEER +QlibFactorParser = FactorParser +QlibFactorCoder = FactorCoder \ No newline at end of file diff --git a/alphaagent/scenarios/qlib/developer/factor_runner.py b/alphaagent/scenarios/qlib/developer/factor_runner.py new file mode 100755 index 00000000..39f06cbd --- /dev/null +++ b/alphaagent/scenarios/qlib/developer/factor_runner.py @@ -0,0 +1,166 @@ +import pickle +from pathlib import Path +from typing import List +import os +import pandas as pd +from pandarallel import pandarallel + +from alphaagent.core.conf import RD_AGENT_SETTINGS +from alphaagent.core.utils import cache_with_pickle, multiprocessing_wrapper + +pandarallel.initialize(verbose=1) + +from alphaagent.components.runner import CachedRunner +from alphaagent.core.exception import FactorEmptyError +from alphaagent.log import logger +from alphaagent.scenarios.qlib.experiment.factor_experiment import QlibFactorExperiment + +DIRNAME = Path(__file__).absolute().resolve().parent +DIRNAME_local = Path.cwd() + +# class QlibFactorExpWorkspace: + +# def prepare(): +# # create a folder; +# # copy template +# # place data inside the folder `combined_factors` +# # +# def execute(): +# de = DockerEnv() +# de.run(local_path=self.ws_path, entry="qrun conf.yaml") + +# TODO: supporting multiprocessing and keep previous results + + +class QlibFactorRunner(CachedRunner[QlibFactorExperiment]): + """ + Docker run + Everything in a folder + - config.yaml + - price-volume data dumper + - `data.py` + Adaptor to Factor implementation + - results in `mlflow` + """ + + def calculate_information_coefficient( + self, concat_feature: pd.DataFrame, SOTA_feature_column_size: int, new_feature_columns_size: int + ) -> pd.DataFrame: + res = pd.Series(index=range(SOTA_feature_column_size * new_feature_columns_size)) + for col1 in range(SOTA_feature_column_size): + for col2 in range(SOTA_feature_column_size, SOTA_feature_column_size + new_feature_columns_size): + res.loc[col1 * new_feature_columns_size + col2 - SOTA_feature_column_size] = concat_feature.iloc[ + :, col1 + ].corr(concat_feature.iloc[:, col2]) + return res + + def deduplicate_new_factors(self, SOTA_feature: pd.DataFrame, new_feature: pd.DataFrame) -> pd.DataFrame: + # calculate the IC between each column of SOTA_feature and new_feature + # if the IC is larger than a threshold, remove the new_feature column + # return the new_feature + + concat_feature = pd.concat([SOTA_feature, new_feature], axis=1) + IC_max = ( + concat_feature.groupby("datetime") + .parallel_apply( + lambda x: self.calculate_information_coefficient(x, SOTA_feature.shape[1], new_feature.shape[1]) + ) + .mean() + ) + IC_max.index = pd.MultiIndex.from_product([range(SOTA_feature.shape[1]), range(new_feature.shape[1])]) + IC_max = IC_max.unstack().max(axis=0) + return new_feature.iloc[:, IC_max[IC_max < 0.99].index] + + @cache_with_pickle(CachedRunner.get_cache_key, CachedRunner.assign_cached_result) + def develop(self, exp: QlibFactorExperiment, use_local: bool = True) -> QlibFactorExperiment: + + """ + Generate the experiment by processing and combining factor data, + then passing the combined data to Docker or local environment for backtest results. + """ + + if exp.based_experiments and exp.based_experiments[-1].result is None: + exp.based_experiments[-1] = self.develop(exp.based_experiments[-1], use_local=use_local) + + if exp.based_experiments: + SOTA_factor = None + if len(exp.based_experiments) > 1: + SOTA_factor = self.process_factor_data(exp.based_experiments) + + # Process the new factors data + new_factors = self.process_factor_data(exp) + if new_factors.empty: + raise FactorEmptyError("No valid factor data found to merge.") + + # Combine the SOTA factor and new factors if SOTA factor exists + if False: # SOTA_factor is not None and not SOTA_factor.empty: + new_factors = self.deduplicate_new_factors(SOTA_factor, new_factors) + if new_factors.empty: + raise FactorEmptyError("No valid factor data found to merge.") + combined_factors = pd.concat([SOTA_factor, new_factors], axis=1).dropna() + else: + combined_factors = new_factors + + if len(combined_factors.columns) >= 2: + pd.set_option('display.width', 1000) + logger.info(f"Factor correlation: \n\n{combined_factors.corr()}\n") + + # Sort and nest the combined factors under 'feature' + combined_factors = combined_factors.sort_index() + combined_factors = combined_factors.loc[:, ~combined_factors.columns.duplicated(keep="last")] + new_columns = pd.MultiIndex.from_product([["feature"], combined_factors.columns]) + combined_factors.columns = new_columns + + logger.info(f"Factor values this round: \n\n{combined_factors.tail()}\n\n") + + # Save the combined factors to the workspace + with open(exp.experiment_workspace.workspace_path / "combined_factors_df.pkl", "wb") as f: + pickle.dump(combined_factors, f) + + + # 执行回测,支持本地或Docker环境 + config_name = f"conf.yaml" if len(exp.based_experiments) == 0 else "conf_cn_combined_kdd_ver.yaml" + logger.info(f"Execute factor backtest (Use {'Local' if use_local else 'Docker container'}): {config_name}") + + result = exp.experiment_workspace.execute( + qlib_config_name=config_name, + use_local=use_local + ) + + logger.info(f"Backtesting results: \n{result.iloc[2:] if result is not None else 'None'}") + exp.result = result + + return exp + + def process_factor_data(self, exp_or_list: List[QlibFactorExperiment] | QlibFactorExperiment) -> pd.DataFrame: + """ + Process and combine factor data from experiment implementations. + + Args: + exp (ASpecificExp): The experiment containing factor data. + + Returns: + pd.DataFrame: Combined factor data without NaN values. + """ + if isinstance(exp_or_list, QlibFactorExperiment): + exp_or_list = [exp_or_list] + factor_dfs = [] + + # Collect all exp's dataframes + for exp in exp_or_list: + # Iterate over sub-implementations and execute them to get each factor data + message_and_df_list = multiprocessing_wrapper( + [(implementation.execute, ("All",)) for implementation in exp.sub_workspace_list], + n=RD_AGENT_SETTINGS.multi_proc_n, + ) + for message, df in message_and_df_list: + # Check if factor generation was successful + if df is not None and "datetime" in df.index.names: + time_diff = df.index.get_level_values("datetime").to_series().diff().dropna().unique() + if pd.Timedelta(minutes=1) not in time_diff: + factor_dfs.append(df) + + # Combine all successful factor data + if factor_dfs: + return pd.concat(factor_dfs, axis=1) + else: + raise FactorEmptyError("No valid factor data found to merge.") diff --git a/alphaagent/scenarios/qlib/developer/feedback.py b/alphaagent/scenarios/qlib/developer/feedback.py new file mode 100755 index 00000000..f86ec985 --- /dev/null +++ b/alphaagent/scenarios/qlib/developer/feedback.py @@ -0,0 +1,239 @@ +import json +from pathlib import Path + +import pandas as pd +from jinja2 import Environment, StrictUndefined + +from alphaagent.core.experiment import Experiment +from alphaagent.core.prompts import Prompts +from alphaagent.core.proposal import ( + Hypothesis, + HypothesisExperiment2Feedback, + HypothesisFeedback, + Trace, +) +from alphaagent.log import logger +from alphaagent.oai.llm_utils import APIBackend +from alphaagent.utils import convert2bool + +rdagent_feedback_prompts = Prompts(file_path=Path(__file__).parent.parent / "prompts_rdagent.yaml") +DIRNAME = Path(__file__).absolute().resolve().parent + + +def process_results(current_result, sota_result): + # Convert the results to dataframes + current_df = pd.DataFrame(current_result) + sota_df = pd.DataFrame(sota_result) + + # Set the metric as the index + current_df.index.name = "metric" + sota_df.index.name = "metric" + + # Rename the value column to reflect the result type + current_df.rename(columns={"0": "Current Result"}, inplace=True) + sota_df.rename(columns={"0": "SOTA Result"}, inplace=True) + + # Combine the dataframes on the Metric index + combined_df = pd.concat([current_df, sota_df], axis=1) + + # Select important metrics for comparison + important_metrics = [ + "1day.excess_return_without_cost.max_drawdown", + "1day.excess_return_without_cost.information_ratio", + "1day.excess_return_without_cost.annualized_return", + "IC", + ] + + # Filter the combined DataFrame to retain only the important metrics + filtered_combined_df = combined_df.loc[important_metrics] + + filtered_combined_df[ + "Bigger columns name (Didn't consider the direction of the metric, you should judge it by yourself that bigger is better or smaller is better)" + ] = filtered_combined_df.apply( + lambda row: "Current Result" if row["Current Result"] > row["SOTA Result"] else "SOTA Result", axis=1 + ) + + return filtered_combined_df.to_string() + + +class QlibFactorHypothesisExperiment2Feedback(HypothesisExperiment2Feedback): + def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trace) -> HypothesisFeedback: + """ + Generate feedback for the given experiment and hypothesis. + + Args: + exp (QlibFactorExperiment): The experiment to generate feedback for. + hypothesis (QlibFactorHypothesis): The hypothesis to generate feedback for. + trace (Trace): The trace of the experiment. + + Returns: + Any: The feedback generated for the given experiment and hypothesis. + """ + logger.info("Generating feedback...") + hypothesis_text = hypothesis.hypothesis + current_result = exp.result + tasks_factors = [task.get_task_information_and_implementation_result() for task in exp.sub_tasks] + sota_result = exp.based_experiments[-1].result + + # Process the results to filter important metrics + combined_result = process_results(current_result, sota_result) + + # Generate the system prompt + sys_prompt = ( + Environment(undefined=StrictUndefined) + .from_string(rdagent_feedback_prompts["factor_feedback_generation"]["system"]) + .render(scenario=self.scen.get_scenario_all_desc()) + ) + + # Generate the user prompt + usr_prompt = ( + Environment(undefined=StrictUndefined) + .from_string(rdagent_feedback_prompts["factor_feedback_generation"]["user"]) + .render( + hypothesis_text=hypothesis_text, + task_details=tasks_factors, + combined_result=combined_result, + ) + ) + + # Call the APIBackend to generate the response for hypothesis feedback + response = APIBackend().build_messages_and_create_chat_completion( + user_prompt=usr_prompt, + system_prompt=sys_prompt, + json_mode=True, + ) + + # Parse the JSON response to extract the feedback + response_json = json.loads(response) + + # Extract fields from JSON response + observations = response_json.get("Observations", "No observations provided") + hypothesis_evaluation = response_json.get("Feedback for Hypothesis", "No feedback provided") + new_hypothesis = response_json.get("New Hypothesis", "No new hypothesis provided") + reason = response_json.get("Reasoning", "No reasoning provided") + decision = convert2bool(response_json.get("Replace Best Result", "no")) + + return HypothesisFeedback( + observations=observations, + hypothesis_evaluation=hypothesis_evaluation, + new_hypothesis=new_hypothesis, + reason=reason, + decision=decision, + ) + + + +alphaagent_feedback_prompts = Prompts(file_path=Path(__file__).parent.parent / "prompts_alphaagent.yaml") +class AlphaAgentQlibFactorHypothesisExperiment2Feedback(HypothesisExperiment2Feedback): + def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trace) -> HypothesisFeedback: + """ + Generate feedback for the given experiment and hypothesis. + + Args: + exp (QlibFactorExperiment): The experiment to generate feedback for. + hypothesis (QlibFactorHypothesis): The hypothesis to generate feedback for. + trace (Trace): The trace of the experiment. + + Returns: + Any: The feedback generated for the given experiment and hypothesis. + """ + logger.info("Generating feedback...") + hypothesis_text = hypothesis.hypothesis + current_result = exp.result + tasks_factors = [task.get_task_information_and_implementation_result() for task in exp.sub_tasks] + sota_result = exp.based_experiments[-1].result + + # Process the results to filter important metrics + combined_result = process_results(current_result, sota_result) + + # Generate the system prompt + sys_prompt = ( + Environment(undefined=StrictUndefined) + .from_string(alphaagent_feedback_prompts["factor_feedback_generation"]["system"]) + .render(scenario=self.scen.get_scenario_all_desc()) + ) + + # Generate the user prompt + usr_prompt = ( + Environment(undefined=StrictUndefined) + .from_string(alphaagent_feedback_prompts["factor_feedback_generation"]["user"]) + .render( + hypothesis_text=hypothesis_text, + task_details=tasks_factors, + combined_result=combined_result, + ) + ) + + # Call the APIBackend to generate the response for hypothesis feedback + response = APIBackend().build_messages_and_create_chat_completion( + user_prompt=usr_prompt, + system_prompt=sys_prompt, + json_mode=True, + ) + + # Parse the JSON response to extract the feedback + response_json = json.loads(response) + + # Extract fields from JSON response + observations = response_json.get("Observations", "No observations provided") + hypothesis_evaluation = response_json.get("Feedback for Hypothesis", "No feedback provided") + new_hypothesis = response_json.get("New Hypothesis", "No new hypothesis provided") + reason = response_json.get("Reasoning", "No reasoning provided") + decision = convert2bool(response_json.get("Replace Best Result", "no")) + + return HypothesisFeedback( + observations=observations, + hypothesis_evaluation=hypothesis_evaluation, + new_hypothesis=new_hypothesis, + reason=reason, + decision=decision, + ) + + +class QlibModelHypothesisExperiment2Feedback(HypothesisExperiment2Feedback): + """Generated feedbacks on the hypothesis from **Executed** Implementations of different tasks & their comparisons with previous performances""" + + def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trace) -> HypothesisFeedback: + """ + The `ti` should be executed and the results should be included, as well as the comparison between previous results (done by LLM). + For example: `mlflow` of Qlib will be included. + """ + + logger.info("Generating feedback...") + # Define the system prompt for hypothesis feedback + system_prompt = feedback_prompts["model_feedback_generation"]["system"] + + # Define the user prompt for hypothesis feedback + context = trace.scen + SOTA_hypothesis, SOTA_experiment = trace.get_sota_hypothesis_and_experiment() + + user_prompt = ( + Environment(undefined=StrictUndefined) + .from_string(feedback_prompts["model_feedback_generation"]["user"]) + .render( + context=context, + last_hypothesis=SOTA_hypothesis, + last_task=SOTA_experiment.sub_tasks[0].get_task_information() if SOTA_hypothesis else None, + last_code=SOTA_experiment.sub_workspace_list[0].code_dict.get("model.py") if SOTA_hypothesis else None, + last_result=SOTA_experiment.result if SOTA_hypothesis else None, + hypothesis=hypothesis, + exp=exp, + ) + ) + + # Call the APIBackend to generate the response for hypothesis feedback + response_hypothesis = APIBackend().build_messages_and_create_chat_completion( + user_prompt=user_prompt, + system_prompt=system_prompt, + json_mode=True, + ) + + # Parse the JSON response to extract the feedback + response_json_hypothesis = json.loads(response_hypothesis) + return HypothesisFeedback( + observations=response_json_hypothesis.get("Observations", "No observations provided"), + hypothesis_evaluation=response_json_hypothesis.get("Feedback for Hypothesis", "No feedback provided"), + new_hypothesis=response_json_hypothesis.get("New Hypothesis", "No new hypothesis provided"), + reason=response_json_hypothesis.get("Reasoning", "No reasoning provided"), + decision=convert2bool(response_json_hypothesis.get("Decision", "false")), + ) diff --git a/alphaagent/scenarios/qlib/developer/model_coder.py b/alphaagent/scenarios/qlib/developer/model_coder.py new file mode 100755 index 00000000..c16b0233 --- /dev/null +++ b/alphaagent/scenarios/qlib/developer/model_coder.py @@ -0,0 +1,3 @@ +from alphaagent.components.coder.model_coder import ModelCoSTEER + +QlibModelCoSTEER = ModelCoSTEER diff --git a/alphaagent/scenarios/qlib/developer/model_runner.py b/alphaagent/scenarios/qlib/developer/model_runner.py new file mode 100755 index 00000000..91c60188 --- /dev/null +++ b/alphaagent/scenarios/qlib/developer/model_runner.py @@ -0,0 +1,38 @@ +from alphaagent.components.runner import CachedRunner +from alphaagent.core.exception import ModelEmptyError +from alphaagent.core.utils import cache_with_pickle +from alphaagent.scenarios.qlib.experiment.model_experiment import QlibModelExperiment + + +class QlibModelRunner(CachedRunner[QlibModelExperiment]): + """ + Docker run + Everything in a folder + - config.yaml + - Pytorch `model.py` + - results in `mlflow` + + https://github.com/microsoft/qlib/blob/main/qlib/contrib/model/pytorch_nn.py + - pt_model_uri: hard-code `model.py:Net` in the config + - let LLM modify model.py + """ + + @cache_with_pickle(CachedRunner.get_cache_key, CachedRunner.assign_cached_result) + def develop(self, exp: QlibModelExperiment) -> QlibModelExperiment: + if exp.sub_workspace_list[0].code_dict.get("model.py") is None: + raise ModelEmptyError("model.py is empty") + # to replace & inject code + exp.experiment_workspace.inject_code(**{"model.py": exp.sub_workspace_list[0].code_dict["model.py"]}) + + env_to_use = {"PYTHONPATH": "./"} + + if exp.sub_tasks[0].model_type == "TimeSeries": + env_to_use.update({"dataset_cls": "TSDatasetH", "step_len": 20, "num_timesteps": 20}) + elif exp.sub_tasks[0].model_type == "Tabular": + env_to_use.update({"dataset_cls": "DatasetH"}) + + result = exp.experiment_workspace.execute(qlib_config_name="conf.yaml", run_env=env_to_use) + + exp.result = result + + return exp diff --git a/alphaagent/scenarios/qlib/docker/Dockerfile b/alphaagent/scenarios/qlib/docker/Dockerfile new file mode 100755 index 00000000..461d4a7d --- /dev/null +++ b/alphaagent/scenarios/qlib/docker/Dockerfile @@ -0,0 +1,26 @@ +FROM docker.1panel.live/pytorch/pytorch:2.2.1-cuda12.1-cudnn8-runtime + +# For GPU support, please choose the proper tag from https://hub.docker.com/r/pytorch/pytorch/tags + +RUN sed -i 's/archive.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list +RUN apt-get clean && apt-get update && apt-get install -y \ + curl \ + vim \ + git \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +RUN git clone https://www.ghproxy.cn/https://github.com/microsoft/qlib.git + +WORKDIR /workspace/qlib + +RUN git reset c9ed050ef034fe6519c14b59f3d207abcb693282 --hard + +RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple +RUN python -m pip install --upgrade cython +RUN python -m pip install -e . + +RUN pip install catboost +RUN pip install xgboost +RUN pip install scipy==1.11.4 +RUN pip install joblib==1.4.2 diff --git a/alphaagent/scenarios/qlib/experiment/factor_data_template/README.md b/alphaagent/scenarios/qlib/experiment/factor_data_template/README.md new file mode 100755 index 00000000..0abc563e --- /dev/null +++ b/alphaagent/scenarios/qlib/experiment/factor_data_template/README.md @@ -0,0 +1,24 @@ +# How to read files. +For example, if you want to read `filename.h5` +```Python +import pandas as pd +df = pd.read_hdf("filename.h5", key="data") +``` +NOTE: **key is always "data" for all hdf5 files **. + +# Here is a short description about the data + +| Filename | Description | +| -------------- | -----------------------------------------------------------------| +| "daily_pv.h5" | Adjusted daily price and volume data. | + + +# For different data, We have some basic knowledge for them + +## Daily data variables +$open: open price of the stock on that day. +$close: close price of the stock on that day. +$high: high price of the stock on that day. +$low: low price of the stock on that day. +$volume: volume of the stock on that day. +$return: daily return of the stock on that day. \ No newline at end of file diff --git a/alphaagent/scenarios/qlib/experiment/factor_data_template/generate.py b/alphaagent/scenarios/qlib/experiment/factor_data_template/generate.py new file mode 100755 index 00000000..2fa0bb91 --- /dev/null +++ b/alphaagent/scenarios/qlib/experiment/factor_data_template/generate.py @@ -0,0 +1,34 @@ +import qlib + +qlib.init(provider_uri="~/.qlib/qlib_data/cn_data") +# qlib.init(provider_uri="~/.qlib/qlib_data/us_data") +from qlib.data import D + +instruments = D.instruments() +fields = ["$open", "$close", "$high", "$low", "$volume"] # , "$amount", "$turn", "$pettm", "$pbmrq" +data = D.features(instruments, fields, freq="day").swaplevel().sort_index().loc["2015-01-01":].sort_index() + +# 计算收益率 +data["$return"] = data.groupby(level=0)["$close"].pct_change().fillna(0) + +print(data) + +data.to_hdf("./daily_pv_all.h5", key="data") + +fields = ["$open", "$close", "$high", "$low", "$volume"] # , "$amount", "$turn", "$pettm", "$pbmrq" +data = ( + ( + D.features(instruments, fields, freq="day") + .swaplevel() + .sort_index() + ) + .swaplevel() + .loc[data.reset_index()["instrument"].unique()[:100]] + .swaplevel() + .sort_index() +) + +# 计算收益率 +data["$return"] = data.groupby(level=0)["$close"].pct_change().fillna(0) +print(data) +data.to_hdf("./daily_pv_debug.h5", key="data") \ No newline at end of file diff --git a/alphaagent/scenarios/qlib/experiment/factor_experiment.py b/alphaagent/scenarios/qlib/experiment/factor_experiment.py new file mode 100755 index 00000000..d62c74bf --- /dev/null +++ b/alphaagent/scenarios/qlib/experiment/factor_experiment.py @@ -0,0 +1,139 @@ +from copy import deepcopy +from pathlib import Path + +from alphaagent.components.coder.factor_coder.factor import ( + FactorExperiment, + FactorFBWorkspace, + FactorTask, +) +from alphaagent.core.experiment import Task +from alphaagent.core.prompts import Prompts +from alphaagent.core.scenario import Scenario +from alphaagent.scenarios.qlib.experiment.utils import get_data_folder_intro +from alphaagent.scenarios.qlib.experiment.workspace import QlibFBWorkspace + +rdagent_prompt_dict = Prompts(file_path=Path(__file__).parent / "prompts_rdagent.yaml") + + +class QlibFactorExperiment(FactorExperiment[FactorTask, QlibFBWorkspace, FactorFBWorkspace]): + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + self.experiment_workspace = QlibFBWorkspace(template_folder_path=Path(__file__).parent / "factor_template") + + +class QlibFactorScenario(Scenario): + def __init__(self) -> None: + super().__init__() + self._background = deepcopy(rdagent_prompt_dict["qlib_factor_background"]) + self._source_data = deepcopy(get_data_folder_intro()) + self._output_format = deepcopy(rdagent_prompt_dict["qlib_factor_output_format"]) + self._interface = deepcopy(rdagent_prompt_dict["qlib_factor_interface"]) + self._strategy = deepcopy(rdagent_prompt_dict["qlib_factor_strategy"]) + self._simulator = deepcopy(rdagent_prompt_dict["qlib_factor_simulator"]) + self._rich_style_description = deepcopy(rdagent_prompt_dict["qlib_factor_rich_style_description"]) + self._experiment_setting = deepcopy(rdagent_prompt_dict["qlib_factor_experiment_setting"]) + + @property + def background(self) -> str: + return self._background + + def get_source_data_desc(self, task: Task | None = None) -> str: + return self._source_data + + @property + def output_format(self) -> str: + return self._output_format + + @property + def interface(self) -> str: + return self._interface + + @property + def simulator(self) -> str: + return self._simulator + + @property + def rich_style_description(self) -> str: + return self._rich_style_description + + @property + def experiment_setting(self) -> str: + return self._experiment_setting + + def get_scenario_all_desc( + self, task: Task | None = None, filtered_tag: str | None = None, simple_background: bool | None = None + ) -> str: + """A static scenario describer""" + if simple_background: + return f"""Background of the scenario: +{self.background}""" + return f"""Background of the scenario: +{self.background} +The source data you can use: +{self.get_source_data_desc(task)} +The interface you should follow to write the runnable code: +{self.interface} +The output of your code should be in the format: +{self.output_format} +The simulator user can use to test your factor: +{self.simulator} +""" + + + +alphaagent_prompt_dict = Prompts(file_path=Path(__file__).parent / "prompts_alphaagent.yaml") +class QlibAlphaAgentScenario(Scenario): + def __init__(self, use_local: bool = True) -> None: + super().__init__() + self._background = deepcopy(alphaagent_prompt_dict["qlib_factor_background"]) + self._source_data = deepcopy(get_data_folder_intro(use_local=use_local)) + self._output_format = deepcopy(alphaagent_prompt_dict["qlib_factor_output_format"]) + self._interface = deepcopy(alphaagent_prompt_dict["qlib_factor_interface"]) + self._strategy = deepcopy(alphaagent_prompt_dict["qlib_factor_strategy"]) + self._simulator = deepcopy(alphaagent_prompt_dict["qlib_factor_simulator"]) + self._rich_style_description = deepcopy(alphaagent_prompt_dict["qlib_factor_rich_style_description"]) + self._experiment_setting = deepcopy(alphaagent_prompt_dict["qlib_factor_experiment_setting"]) + + @property + def background(self) -> str: + return self._background + + def get_source_data_desc(self, task: Task | None = None) -> str: + return self._source_data + + @property + def output_format(self) -> str: + return self._output_format + + @property + def interface(self) -> str: + return self._interface + + @property + def simulator(self) -> str: + return self._simulator + + @property + def rich_style_description(self) -> str: + return self._rich_style_description + + @property + def experiment_setting(self) -> str: + return self._experiment_setting + + def get_scenario_all_desc( + self, task: Task | None = None, filtered_tag: str | None = None, simple_background: bool | None = None + ) -> str: + """A static scenario describer""" + if simple_background: + return f"""Background of the scenario: +{self.background}""" + return f"""Background of the scenario: +{self.background} +The source data you can use: +{self.get_source_data_desc(task)} +The interface you should follow to write the runnable code: +{self.interface} +The simulator user can use to test your factor: +{self.simulator} +""" \ No newline at end of file diff --git a/alphaagent/scenarios/qlib/experiment/factor_from_report_experiment.py b/alphaagent/scenarios/qlib/experiment/factor_from_report_experiment.py new file mode 100755 index 00000000..f2e2358e --- /dev/null +++ b/alphaagent/scenarios/qlib/experiment/factor_from_report_experiment.py @@ -0,0 +1,24 @@ +from copy import deepcopy +from pathlib import Path + +from alphaagent.components.coder.factor_coder.factor import ( + FactorExperiment, + FactorFBWorkspace, + FactorTask, +) +from alphaagent.core.prompts import Prompts +from alphaagent.core.scenario import Scenario +from alphaagent.scenarios.qlib.experiment.factor_experiment import QlibFactorScenario +from alphaagent.scenarios.qlib.experiment.workspace import QlibFBWorkspace + +prompt_dict = Prompts(file_path=Path(__file__).parent / "prompts_rdagent.yaml") + + +class QlibFactorFromReportScenario(QlibFactorScenario): + def __init__(self) -> None: + super().__init__() + self._rich_style_description = deepcopy(prompt_dict["qlib_factor_from_report_rich_style_description"]) + + @property + def rich_style_description(self) -> str: + return self._rich_style_description diff --git a/alphaagent/scenarios/qlib/experiment/factor_template/conf.yaml b/alphaagent/scenarios/qlib/experiment/factor_template/conf.yaml new file mode 100755 index 00000000..57611446 --- /dev/null +++ b/alphaagent/scenarios/qlib/experiment/factor_template/conf.yaml @@ -0,0 +1,147 @@ +# qlib_init: +# provider_uri: "~/.qlib/qlib_data/us_data" +# region: us + +# market: &market SP500 +# benchmark: &benchmark SPX + +qlib_init: + provider_uri: "~/.qlib/qlib_data/cn_data" + region: cn + +market: &market csi500 +benchmark: &benchmark SH000905 + +data_handler_config: &data_handler_config + start_time: 2019-01-01 + end_time: 2025-03-31 + instruments: *market + data_loader: + class: qlib.contrib.data.loader.QlibDataLoader + kwargs: + config: + feature: ["($close-$open)/$open", "$volume/Mean($volume, 20)", "($high-$low)/Ref($close, 1)", "$close/Ref($close, 1)-1"] # "$high", "$low", "$close", "$volume" + label: + - ["Ref($close, -2)/Ref($close, -1) - 1"] + - ["LABEL0"] + + learn_processors: + - class: Fillna + kwargs: + fields_group: feature + - class: DropnaLabel + - class: CSZScoreNorm + kwargs: + fields_group: feature + - class: CSZScoreNorm + kwargs: + fields_group: label + infer_processors: + - class: Fillna + kwargs: + fields_group: feature + - class: CSZScoreNorm + kwargs: + fields_group: feature + - class: CSZScoreNorm + kwargs: + fields_group: label + +port_analysis_config: &port_analysis_config + executor: + class: SimulatorExecutor + module_path: qlib.backtest.executor + kwargs: + time_per_step: day + generate_portfolio_metrics: True + verbose: False + indicator_config: + show_indicator: True + strategy: + class: TopkDropoutStrategy + module_path: qlib.contrib.strategy + kwargs: + signal: + topk: 100 + n_drop: 5 + backtest: + start_time: 2024-01-01 + end_time: 2025-03-31 + account: 10000000 + benchmark: *benchmark + exchange_kwargs: + limit_threshold: 0.095 + deal_price: close + open_cost: 0.0005 + close_cost: 0.0015 + min_cost: 5 + # open_cost: 0.0 + # close_cost: 0.0005 + # min_cost: 0 + + +task: + model: + class: LGBModel + module_path: qlib.contrib.model.gbdt + kwargs: + loss: mse + colsample_bytree: 0.8879 + learning_rate: 0.1 + subsample: 0.8789 + lambda_l1: 205.6999 + lambda_l2: 580.9768 + max_depth: 4 + num_leaves: 210 + num_threads: 20 + # model: + # class: LGBModel + # module_path: qlib.contrib.model.gbdt + # kwargs: + # loss: mse + # colsample_bytree: 0.8879 + # learning_rate: 0.1 + # subsample: 0.8789 + # lambda_l1: 205.6999 + # lambda_l2: 580.9768 + # max_depth: 8 + # num_leaves: 210 + # num_threads: 20 + # model: + # class: XGBModel + # module_path: qlib.contrib.model.xgboost + # kwargs: + # eval_metric: rmse + # colsample_bytree: 0.8879 + # eta: 0.0421 + # max_depth: 8 + # n_estimators: 647 + # subsample: 0.8789 + # nthread: 20 + dataset: + class: DatasetH + module_path: qlib.data.dataset + kwargs: + handler: + class: DataHandlerLP + module_path: qlib.contrib.data.handler + kwargs: *data_handler_config + segments: + train: [2019-01-01, 2023-06-01] + valid: [2023-06-01, 2023-12-31] + test: [2024-01-01, 2025-03-31] + record: + - class: SignalRecord + module_path: qlib.workflow.record_temp + kwargs: + model: + dataset: + - class: SigAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + ana_long_short: False + ann_scaler: 252 + - class: PortAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + config: *port_analysis_config diff --git a/alphaagent/scenarios/qlib/experiment/factor_template/conf_cn_combined_kdd_ver.yaml b/alphaagent/scenarios/qlib/experiment/factor_template/conf_cn_combined_kdd_ver.yaml new file mode 100755 index 00000000..0eea6fa7 --- /dev/null +++ b/alphaagent/scenarios/qlib/experiment/factor_template/conf_cn_combined_kdd_ver.yaml @@ -0,0 +1,115 @@ +qlib_init: + provider_uri: "~/.qlib/qlib_data/cn_data" + region: cn + +market: &market csi500 +benchmark: &benchmark SH000905 + +data_handler_config: &data_handler_config + start_time: 2015-01-01 + end_time: 2025-03-31 + instruments: *market + data_loader: + class: NestedDataLoader + kwargs: + dataloader_l: + - class: qlib.contrib.data.loader.QlibDataLoader + kwargs: + config: + feature: ["($close - $open) / $open", "$close / Ref($close, 1) - 1", "$volume / Mean($volume, 20)", "($high - $low) / Ref($close, 1)"] + # ["($close-$open)/$open", "$volume/Mean($volume, 20)", "($high-$low)/Ref($close, 1)", "$close/Ref($close, 1)-1"] + # "($close - $open) / $open", "$close / Ref($close, 1) - 1", "$volume / Mean($volume, 20)", "($high - $low) / Ref($close, 1)" + label: + - ["Ref($close, -2)/Ref($close, -1) - 1"] + - ["LABEL0"] + - class: qlib.data.dataset.loader.StaticDataLoader + kwargs: + config: "combined_factors_df.pkl" + + learn_processors: + - class: Fillna + kwargs: + fields_group: feature + - class: DropnaLabel + - class: CSZScoreNorm + kwargs: + fields_group: feature + - class: CSZScoreNorm + kwargs: + fields_group: label + infer_processors: + - class: Fillna + kwargs: + fields_group: feature + - class: CSZScoreNorm + kwargs: + fields_group: feature + - class: CSZScoreNorm + kwargs: + fields_group: label + +port_analysis_config: &port_analysis_config + strategy: + class: TopkDropoutStrategy + module_path: qlib.contrib.strategy + kwargs: + signal: + topk: 50 + n_drop: 5 + backtest: + start_time: 2021-01-01 + end_time: 2024-12-30 + # start_time: 2020-01-01 + # end_time: 2024-12-30 + account: 100000000 + benchmark: *benchmark + exchange_kwargs: + limit_threshold: 0.095 + deal_price: open + open_cost: 0.0005 + close_cost: 0.0015 + min_cost: 5 +task: + model: + class: LGBModel + module_path: qlib.contrib.model.gbdt + kwargs: + loss: mse + colsample_bytree: 0.8879 + learning_rate: 0.1 + subsample: 0.8789 + lambda_l1: 205.6999 + lambda_l2: 580.9768 + max_depth: 4 + num_leaves: 210 + num_threads: 20 + dataset: + class: DatasetH + module_path: qlib.data.dataset + kwargs: + handler: + class: DataHandlerLP + module_path: qlib.contrib.data.handler + kwargs: *data_handler_config + segments: + # train: [2015-01-01, 2019-12-31] + # valid: [2020-01-01, 2020-12-31] + # test: [2021-01-01, 2025-03-31] + train: [2015-01-01, 2019-12-31] + valid: [2020-01-01, 2020-12-31] + test: [2021-01-01, 2024-12-30] + record: + - class: SignalRecord + module_path: qlib.workflow.record_temp + kwargs: + model: + dataset: + - class: SigAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + ana_long_short: False + ann_scaler: 252 + - class: PortAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + config: *port_analysis_config diff --git a/alphaagent/scenarios/qlib/experiment/factor_template/conf_us_combined_kdd_ver.yaml b/alphaagent/scenarios/qlib/experiment/factor_template/conf_us_combined_kdd_ver.yaml new file mode 100755 index 00000000..3f575f45 --- /dev/null +++ b/alphaagent/scenarios/qlib/experiment/factor_template/conf_us_combined_kdd_ver.yaml @@ -0,0 +1,120 @@ +qlib_init: + provider_uri: "~/.qlib/qlib_data/us_data" + region: us + +market: &market SP500 +benchmark: &benchmark SPX + +data_handler_config: &data_handler_config + start_time: 2015-01-01 + end_time: 2024-12-30 + instruments: *market + data_loader: + class: NestedDataLoader + kwargs: + dataloader_l: + - class: qlib.contrib.data.loader.QlibDataLoader + kwargs: + config: + feature: ["($close-$open)/$open", "$volume/Mean($volume, 20)", "($high-$low)/Ref($close, 1)", "$close/Ref($close, 1)-1"] # "$high", "$low", "$close", "$volume" + label: + - ["Ref($close, -2)/Ref($close, -1) - 1"] + - ["LABEL0"] + - class: qlib.data.dataset.loader.StaticDataLoader + kwargs: + config: "combined_factors_df.pkl" + + learn_processors: + - class: Fillna + kwargs: + fields_group: feature + - class: DropnaLabel + - class: CSZScoreNorm + kwargs: + fields_group: feature + - class: CSZScoreNorm + kwargs: + fields_group: label + infer_processors: + - class: Fillna + kwargs: + fields_group: feature + - class: CSZScoreNorm + kwargs: + fields_group: feature + - class: CSZScoreNorm + kwargs: + fields_group: label + +port_analysis_config: &port_analysis_config + strategy: + class: TopkDropoutStrategy + module_path: qlib.contrib.strategy + kwargs: + signal: + topk: 50 # 50 + n_drop: 5 + backtest: + start_time: 2021-01-01 + end_time: 2024-12-30 + account: 100000000 + benchmark: *benchmark + exchange_kwargs: + limit_threshold: 0.095 + deal_price: open + open_cost: 0.0 + close_cost: 0.0005 + min_cost: 0 + +task: + model: + class: LGBModel + module_path: qlib.contrib.model.gbdt + kwargs: + loss: mse + colsample_bytree: 0.8879 + learning_rate: 0.1 + subsample: 0.8789 + lambda_l1: 205.6999 + lambda_l2: 580.9768 + max_depth: 3 + num_leaves: 210 + num_threads: 20 + # model: + # class: XGBModel + # module_path: qlib.contrib.model.xgboost + # kwargs: + # eval_metric: rmse + # colsample_bytree: 0.8879 + # eta: 0.0421 + # max_depth: 8 + # n_estimators: 647 + # subsample: 0.8789 + # nthread: 20 + dataset: + class: DatasetH + module_path: qlib.data.dataset + kwargs: + handler: + class: DataHandlerLP + module_path: qlib.contrib.data.handler + kwargs: *data_handler_config + segments: + train: [2015-01-01, 2019-12-31] + valid: [2020-01-01, 2020-12-31] + test: [2021-01-01, 2024-12-30] + record: + - class: SignalRecord + module_path: qlib.workflow.record_temp + kwargs: + model: + dataset: + - class: SigAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + ana_long_short: False + ann_scaler: 252 + - class: PortAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + config: *port_analysis_config diff --git a/alphaagent/scenarios/qlib/experiment/factor_template/read_exp_res.py b/alphaagent/scenarios/qlib/experiment/factor_template/read_exp_res.py new file mode 100755 index 00000000..928cc645 --- /dev/null +++ b/alphaagent/scenarios/qlib/experiment/factor_template/read_exp_res.py @@ -0,0 +1,48 @@ +import pickle +from pathlib import Path + +import pandas as pd +import qlib +from mlflow.entities import ViewType +from mlflow.tracking import MlflowClient + +qlib.init() + +from qlib.workflow import R + +# here is the documents of the https://qlib.readthedocs.io/en/latest/component/recorder.html + +# TODO: list all the recorder and metrics + +# Assuming you have already listed the experiments +experiments = R.list_experiments() + +# Iterate through each experiment to find the latest recorder +experiment_name = None +latest_recorder = None +for experiment in experiments: + recorders = R.list_recorders(experiment_name=experiment) + for recorder_id in recorders: + if recorder_id is not None: + experiment_name = experiment + recorder = R.get_recorder(recorder_id=recorder_id, experiment_name=experiment) + end_time = recorder.info["end_time"] + if latest_recorder is None or end_time > latest_recorder.info["end_time"]: + latest_recorder = recorder + +# Check if the latest recorder is found +if latest_recorder is None: + print("No recorders found") +else: + print(f"Latest recorder: {latest_recorder}") + + # Load the specified file from the latest recorder + metrics = pd.Series(latest_recorder.list_metrics()) + + output_path = Path(__file__).resolve().parent / "qlib_res.csv" + metrics.to_csv(output_path) + + print(f"Output has been saved to {output_path}") + + ret_data_frame = latest_recorder.load_object("portfolio_analysis/report_normal_1day.pkl") + ret_data_frame.to_pickle("ret.pkl") diff --git a/alphaagent/scenarios/qlib/experiment/model_experiment.py b/alphaagent/scenarios/qlib/experiment/model_experiment.py new file mode 100755 index 00000000..5868c67d --- /dev/null +++ b/alphaagent/scenarios/qlib/experiment/model_experiment.py @@ -0,0 +1,72 @@ +from copy import deepcopy +from pathlib import Path + +from alphaagent.components.coder.model_coder.model import ( + ModelExperiment, + ModelFBWorkspace, + ModelTask, +) +from alphaagent.core.experiment import Task +from alphaagent.core.prompts import Prompts +from alphaagent.core.scenario import Scenario +from alphaagent.scenarios.qlib.experiment.workspace import QlibFBWorkspace + +rdagent_prompt_dict = Prompts(file_path=Path(__file__).parent / "prompts_rdagent.yaml") + + +class QlibModelExperiment(ModelExperiment[ModelTask, QlibFBWorkspace, ModelFBWorkspace]): + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + self.experiment_workspace = QlibFBWorkspace(template_folder_path=Path(__file__).parent / "model_template") + + +class QlibModelScenario(Scenario): + def __init__(self) -> None: + super().__init__() + self._background = deepcopy(rdagent_prompt_dict["qlib_model_background"]) + self._output_format = deepcopy(rdagent_prompt_dict["qlib_model_output_format"]) + self._interface = deepcopy(rdagent_prompt_dict["qlib_model_interface"]) + self._simulator = deepcopy(rdagent_prompt_dict["qlib_model_simulator"]) + self._rich_style_description = deepcopy(rdagent_prompt_dict["qlib_model_rich_style_description"]) + self._experiment_setting = deepcopy(rdagent_prompt_dict["qlib_model_experiment_setting"]) + + @property + def background(self) -> str: + return self._background + + @property + def source_data(self) -> str: + raise NotImplementedError("source_data of QlibModelScenario is not implemented") + + @property + def output_format(self) -> str: + return self._output_format + + @property + def interface(self) -> str: + return self._interface + + @property + def simulator(self) -> str: + return self._simulator + + @property + def rich_style_description(self) -> str: + return self._rich_style_description + + @property + def experiment_setting(self) -> str: + return self._experiment_setting + + def get_scenario_all_desc( + self, task: Task | None = None, filtered_tag: str | None = None, simple_background: bool | None = None + ) -> str: + return f"""Background of the scenario: +{self.background} +The interface you should follow to write the runnable code: +{self.interface} +The output of your code should be in the format: +{self.output_format} +The simulator user can use to test your model: +{self.simulator} +""" diff --git a/alphaagent/scenarios/qlib/experiment/model_template/README.md b/alphaagent/scenarios/qlib/experiment/model_template/README.md new file mode 100755 index 00000000..fb7c22c9 --- /dev/null +++ b/alphaagent/scenarios/qlib/experiment/model_template/README.md @@ -0,0 +1,3 @@ +## This folder is a template to be copied from for each model implementation & running process. + +Components: Dummy model.py, versatile conf.yaml, and a result reader. diff --git a/alphaagent/scenarios/qlib/experiment/model_template/conf.yaml b/alphaagent/scenarios/qlib/experiment/model_template/conf.yaml new file mode 100755 index 00000000..661f4f1f --- /dev/null +++ b/alphaagent/scenarios/qlib/experiment/model_template/conf.yaml @@ -0,0 +1,109 @@ +qlib_init: + provider_uri: "~/.qlib/qlib_data/cn_data" + region: cn +market: &market csi300 +benchmark: &benchmark SH000300 +data_handler_config: &data_handler_config + start_time: 2008-01-01 + end_time: 2020-08-01 + fit_start_time: 2008-01-01 + fit_end_time: 2014-12-31 + instruments: *market + infer_processors: + - class: FilterCol + kwargs: + fields_group: feature + col_list: ["RESI5", "WVMA5", "RSQR5", "KLEN", "RSQR10", "CORR5", "CORD5", "CORR10", + "ROC60", "RESI10", "VSTD5", "RSQR60", "CORR60", "WVMA60", "STD5", + "RSQR20", "CORD60", "CORD10", "CORR20", "KLOW" + ] + - class: RobustZScoreNorm + kwargs: + fields_group: feature + clip_outlier: true + - class: Fillna + kwargs: + fields_group: feature + learn_processors: + - class: DropnaLabel + - class: CSRankNorm + kwargs: + fields_group: label + label: ["Ref($close, -2) / Ref($close, -1) - 1"] + +port_analysis_config: &port_analysis_config + strategy: + class: TopkDropoutStrategy + module_path: qlib.contrib.strategy + kwargs: + signal: + topk: 50 + n_drop: 5 + backtest: + start_time: 2017-01-01 + end_time: 2020-08-01 + account: 100000000 + benchmark: *benchmark + exchange_kwargs: + limit_threshold: 0.095 + deal_price: close + open_cost: 0.0005 + close_cost: 0.0015 + min_cost: 5 +task: + model: + class: GeneralPTNN + module_path: qlib.contrib.model.pytorch_general_nn + kwargs: + n_epochs: 100 + lr: 1e-3 + early_stop: 10 + batch_size: 2000 + metric: loss + loss: mse + n_jobs: 20 + GPU: 2 + # loss: mse + # lr: 0.002 + # optimizer: adam + # batch_size: 8192 + # GPU: 0 + weight_decay: 0.0001 + # pt_model_uri: "qlib.contrib.model.pytorch_nn.Net" + # pt_model_uri: "env_tpl.model.Net" + # pt_model_uri: "./model.py:Net" + pt_model_uri: "model.model_cls" + pt_model_kwargs: { + "num_features": 20, + {% if num_timesteps %}num_timesteps: {{ num_timesteps }}{% endif %} + } + # input_dim: 20 + # How should I use jinja to put step len here conditionally + dataset: + class: {{ dataset_cls | default("DatasetH") }} + module_path: qlib.data.dataset + kwargs: + handler: + class: Alpha158 + module_path: qlib.contrib.data.handler + kwargs: *data_handler_config + segments: + train: [2008-01-01, 2014-12-31] + valid: [2015-01-01, 2016-12-31] + test: [2017-01-01, 2020-08-01] + {% if step_len %}step_len: {{ step_len }}{% endif %} + record: + - class: SignalRecord + module_path: qlib.workflow.record_temp + kwargs: + model: + dataset: + - class: SigAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + ana_long_short: False + ann_scaler: 252 + - class: PortAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + config: *port_analysis_config diff --git a/alphaagent/scenarios/qlib/experiment/model_template/read_exp_res.py b/alphaagent/scenarios/qlib/experiment/model_template/read_exp_res.py new file mode 100755 index 00000000..928cc645 --- /dev/null +++ b/alphaagent/scenarios/qlib/experiment/model_template/read_exp_res.py @@ -0,0 +1,48 @@ +import pickle +from pathlib import Path + +import pandas as pd +import qlib +from mlflow.entities import ViewType +from mlflow.tracking import MlflowClient + +qlib.init() + +from qlib.workflow import R + +# here is the documents of the https://qlib.readthedocs.io/en/latest/component/recorder.html + +# TODO: list all the recorder and metrics + +# Assuming you have already listed the experiments +experiments = R.list_experiments() + +# Iterate through each experiment to find the latest recorder +experiment_name = None +latest_recorder = None +for experiment in experiments: + recorders = R.list_recorders(experiment_name=experiment) + for recorder_id in recorders: + if recorder_id is not None: + experiment_name = experiment + recorder = R.get_recorder(recorder_id=recorder_id, experiment_name=experiment) + end_time = recorder.info["end_time"] + if latest_recorder is None or end_time > latest_recorder.info["end_time"]: + latest_recorder = recorder + +# Check if the latest recorder is found +if latest_recorder is None: + print("No recorders found") +else: + print(f"Latest recorder: {latest_recorder}") + + # Load the specified file from the latest recorder + metrics = pd.Series(latest_recorder.list_metrics()) + + output_path = Path(__file__).resolve().parent / "qlib_res.csv" + metrics.to_csv(output_path) + + print(f"Output has been saved to {output_path}") + + ret_data_frame = latest_recorder.load_object("portfolio_analysis/report_normal_1day.pkl") + ret_data_frame.to_pickle("ret.pkl") diff --git a/alphaagent/scenarios/qlib/experiment/prompts_alphaagent.yaml b/alphaagent/scenarios/qlib/experiment/prompts_alphaagent.yaml new file mode 100755 index 00000000..991f1062 --- /dev/null +++ b/alphaagent/scenarios/qlib/experiment/prompts_alphaagent.yaml @@ -0,0 +1,239 @@ +qlib_factor_background: |- + The factor is a characteristic or variable used in quant investment that can help explain the returns and risks of a portfolio or a single asset. Factors are used by investors to identify and exploit sources of excess returns, and they are central to many quantitative investment strategies. + Each number in the factor represents a physics value to an instrument on a day. + User will train a model to predict the next several days return based on the factor values of the previous days. + The factor is defined in the following parts: + 1. Name: The name of the factor. + 2. Description: The description of the factor. + 3. Formulation: The formulation of the factor. + 4. Expression: The executable expression of the factor. + 4. Variables: The variables or functions used in the formulation of the factor. + The factor might not provide all the parts of the information above since some might not be applicable. + Please specifically give all the hyperparameter in the factors like the window size, look back period, and so on. One factor should statically defines one output with a static source data. For example, last 10 days momentum and last 20 days momentum should be two different factors. + +qlib_factor_interface: |- + Your factor expression should follow the interface to better interact with the user's system. + Your expression must at least one variables (e.g., `$open`). Other parts contain arithmetic operator (`+, -, *, /`), logical operator (`&&, ||`), functions (`DELAY(), EXP()`), and conditional statement (`A?B:C`). + User will write your expression into a python template and execute the file directly with "python {your_file_name}.py". The factor values will be saved into a HDF5(H5) file named "result.h5" in the same directory as your python file. The result file is a HDF5(H5) file containing a pandas dataframe. The index of the dataframe is the "datetime" and "instrument", and the single column name is the factor name, and the value is the factor value. The result file should be saved in the same directory as your python file. + +qlib_factor_strategy: |- + Ensure that for every step of data processing, the data format (including indexes) is clearly explained through comments. + Each transformation or calculation should be accompanied by a detailed description of how the data is structured, especially focusing on key aspects like whether the data has multi-level indexing, how to access specific columns or index levels, and any operations that affect the data shape (e.g., `reset_index()`, `groupby()`, `merge()`). + This step-by-step explanation will ensure clarity and accuracy in data handling. For example: + 1. **Start with multi-level index**: + ```python + # The initial DataFrame has a multi-level index with 'datetime' and 'instrument'. + # To access the 'datetime' index, use df.index.get_level_values('datetime'). + datetime_values = df.index.get_level_values('datetime') + ``` + + 2. **Reset the index if necessary**: + ```python + # Resetting the index to move 'datetime' and 'instrument' from the index to columns. + # This operation flattens the multi-index structure. + df = df.reset_index() + ``` + + 3. **Perform groupby operations**: + ```python + # Grouping by 'datetime' and 'instrument' to aggregate the data. + # After groupby, the result will maintain 'datetime' and 'instrument' as a multi-level index. + df_grouped = df.groupby(['datetime', 'instrument']).sum() + ``` + + 4. **Ensure consistent datetime formats**: + ```python + # Before merging, ensure that the 'datetime' column in both DataFrames is of the same format. + # Convert to datetime format if necessary. + df['datetime'] = pd.to_datetime(df['datetime']) + other_df['datetime'] = pd.to_datetime(other_df['datetime']) + ``` + + 5. **Merge operations**: + ```python + # When merging DataFrames, ensure you are merging on both 'datetime' and 'instrument'. + # If these are part of the index, reset the index before merging. + merged_df = pd.merge(df, other_df, on=['datetime', 'instrument'], how='inner') + ``` + +qlib_factor_output_format: |- + Your output should be a pandas dataframe similar to the following example information: + + MultiIndex: 40914 entries, (Timestamp('2020-01-02 00:00:00'), 'SH600000') to (Timestamp('2021-12-31 00:00:00'), 'SZ300059') + Data columns (total 1 columns): + # Column Non-Null Count Dtype + --- ------ -------------- ----- + 0 your factor name 40914 non-null float64 + dtypes: float64(1) + memory usage: + Notice: The non-null count is OK to be different to the total number of entries since some instruments may not have the factor value on some days. + One possible format of `result.h5` may be like following: + datetime instrument + 2020-01-02 SZ000001 -0.001796 + SZ000166 0.005780 + SZ000686 0.004228 + SZ000712 0.001298 + SZ000728 0.005330 + ... + 2021-12-31 SZ000750 0.000000 + SZ000776 0.002459 + +qlib_factor_simulator: |- + The factors will be sent into Qlib to train a model to predict the next several days return based on the factor values of the previous days. + Qlib is an AI-oriented quantitative investment platform that aims to realize the potential, empower research, and create value using AI technologies in quantitative investment, from exploring ideas to implementing productions. Qlib supports diverse machine learning modeling paradigms. including supervised learning, market dynamics modeling, and RL. + User will use Qlib to automatically do the following things: + 1. generate a new factor table based on the factor values. + 2. train a model like LightGBM, CatBoost, LSTM or simple PyTorch model to predict the next several days return based on the factor values. + 3. build a portfolio based on the predicted return based on a strategy. + 4. evaluate the portfolio's performance including the return, sharpe ratio, max drawdown, and so on. + +qlib_factor_rich_style_description : |- + ### R&D Agent-Qlib: Automated Quantitative Trading & Iterative Factors Evolution Demo + + #### [Overview](#_summary) + + The demo showcases the iterative process of hypothesis generation, knowledge construction, and decision-making. It highlights how financial factors evolve through continuous feedback and refinement. + + #### [Automated R&D](#_rdloops) + + - **[R (Research)](#_research)** + - Iterative development of ideas and hypotheses. + - Continuous learning and knowledge construction. + + - **[D (Development)](#_development)** + - Progressive implementation and code generation of factors. + - Automated testing and validation of financial factors. + + #### [Objective](#_summary) + + To demonstrate the dynamic evolution of financial factors through the Qlib platform, emphasizing how each iteration enhances the accuracy and reliability of the resulting financial factors. + +qlib_factor_from_report_rich_style_description : |- + ### R&D Agent-Qlib: Automated Quantitative Trading & Factor Extraction from Financial Reports Demo + + #### [Overview](#_summary) + + This demo showcases the process of extracting factors from financial research reports, implementing these factors, and analyzing their performance through Qlib backtest, continually expanding and refining the factor library. + + #### [Automated R&D](#_rdloops) + + - **[R (Research)](#_research)** + - Iterative development of ideas and hypotheses from financial reports. + - Continuous learning and knowledge construction. + + - **[D (Development)](#_development)** + - Progressive factor extraction and code generation. + - Automated implementation and testing of financial factors. + + #### [Objective](#_summary) + + + + + + + + + + + + + + +
💡 Innovation Tool to quickly extract and test factors from research reports.
Efficiency Rapid identification of valuable factors from numerous reports.
🗃️ Outputs Expand and refine the factor library to support further research.
+ +qlib_factor_experiment_setting: |- + | Dataset 📊 | Model 🤖 | Factors 🌟 | Data Split 🧮 | + |---------|----------|---------------|-------------------------------------------------| + | CSI300 | LGBModel | Alpha158 Plus | Train: 2012-01-01 to 2018-12-31
Valid: 2019-01-01 to 2020-12-31
Test  : 2021-01-01 to 2024-12-01 | + + +qlib_model_background: |- + The model is a machine learning or deep learning structure used in quantitative investment to predict the returns and risks of a portfolio or a single asset. Models are employed by investors to generate forecasts based on historical data and identified factors, which are central to many quantitative investment strategies. + Each model takes the factors as input and predicts the future returns. Usually, the bigger the model is, the better the performance would be. + The model is defined in the following parts: + 1. Name: The name of the model. + 2. Description: The description of the model. + 3. Architecture: The detailed architecture of the model, such as neural network layers or tree structures. + 4. Hyperparameters: The hyperparameters used in the model, such as learning rate, number of epochs, etc. + 5. ModelType: The type of the model, "Tabular" for tabular model and "TimeSeries" for time series model. + The model should provide clear and detailed documentation of its architecture and hyperparameters. One model should statically define one output with a fixed architecture and hyperparameters. For example, a model with an two GRU layer and a model with three GRU layer should be considered two different models. + +qlib_model_interface: |- + Your python code should follow the interface to better interact with the user's system. + You code should contain several parts: + 1. The import part: import the necessary libraries. + 2. A class which is a sub-class of pytorch.nn.Module. This class should should have a init function and a forward function which inputs a tensor and outputs a tensor. + 3. Set a variable called "model_cls" to the class you defined. + + The user will save your code into a python file called "model.py". Then the user imports model_cls in file "model.py" after setting the cwd into the directory: + ```python + from model import model_cls + ``` + So your python code should follow the pattern: + ```python + class XXXModel(torch.nn.Module): + ... + model_cls = XXXModel + ``` + + The model has two types, "Tabular" for tabular model and "TimeSeries" for time series model. The input shape to a tabular model is (batch_size, num_features) and the input shape to a time series model is (batch_size, num_features, num_timesteps). The output shape of the model should be (batch_size, 1). + The "batch_size" is a dynamic value which is determined by the input of forward function. + The "num_features" and "num_timesteps" are static which will be provided to the model through init function. + User will initialize the tabular model with the following code: + ```python + model = model_cls(num_features=num_features) + ``` + User will initialize the time series model with the following code: + ```python + model = model_cls(num_features=num_features, num_timesteps=num_timesteps) + ``` + No other parameters will be passed to the model so give other parameters a default value or just make them static. + + When dealing with TimeSeries model, remember to permute the input tensor since the input tensor is in the shape of (batch_size, num_features, num_timesteps) and a normal time series model is expecting the input tensor in the shape of (batch_size, num_timesteps, num_features). + + Don't write any try-except block in your python code. The user will catch the exception message and provide the feedback to you. Also, don't write main function in your python code. The user will call the forward method in the model_cls to get the output tensor. + + Please notice that your model should only use current features as input. The user will provide the input tensor to the model's forward function. + + +qlib_model_output_format: |- + Your output should be a tensor with shape (batch_size, 1). + The output tensor should be saved in a file named "output.pth" in the same directory as your python file. + The user will evaluate the shape of the output tensor so the tensor read from "output.pth" should be 8 numbers. + +qlib_model_simulator: |- + The models will be sent into Qlib to train and evaluate their performance in predicting future returns. Hypothesis is improved upon checking the feedback on the results. + Qlib is an AI-oriented quantitative investment platform that aims to realize the potential, empower research, and create value using AI technologies in quantitative investment, from exploring ideas to implementing productions. Qlib supports diverse machine learning modeling paradigms, including supervised learning, market dynamics modeling, and reinforcement learning (RL). + User will use Qlib to automatically perform the following tasks: + 1. Generate a baseline factor table. + 2. Train the model defined in your class Net to predict the next several days' returns based on the factor values. + 3. Build a portfolio based on the predicted returns using a specific strategy. + 4. Evaluate the portfolio's performance, including metrics such as return, Sharpe ratio, max drawdown, and others. + 5. Iterate on growing the hypothesis to enable model improvements based on performance evaluations and feedback. + +qlib_model_rich_style_description: |- + ### Qlib Model Evolving Automatic R&D Demo + + #### [Overview](#_summary) + + The demo showcases the iterative process of hypothesis generation, knowledge construction, and decision-making in model construction in quantitative finance. It highlights how models evolve through continuous feedback and refinement. + + #### [Automated R&D](#_rdloops) + + - **[R (Research)](#_research)** + - Iteration of ideas and hypotheses. + - Continuous learning and knowledge construction. + + - **[D (Development)](#_development)** + - Evolving code generation and model refinement. + - Automated implementation and testing of models. + + #### [Objective](#_summary) + + To demonstrate the dynamic evolution of models through the Qlib platform, emphasizing how each iteration enhances the accuracy and reliability of the resulting models. + +qlib_model_experiment_setting: |- + | Dataset 📊 | Model 🤖 | Factors 🌟 | Data Split 🧮 | + |---------|----------|---------------|-------------------------------------------------| + | CSI300 | RDAgent-dev | 20 factors (Alpha158) | Train: 2008-01-01 to 2014-12-31
Valid: 2015-01-01 to 2016-12-31
Test  : 2017-01-01 to 2020-08-01 | \ No newline at end of file diff --git a/alphaagent/scenarios/qlib/experiment/prompts_rdagent.yaml b/alphaagent/scenarios/qlib/experiment/prompts_rdagent.yaml new file mode 100755 index 00000000..80762906 --- /dev/null +++ b/alphaagent/scenarios/qlib/experiment/prompts_rdagent.yaml @@ -0,0 +1,238 @@ +qlib_factor_background: |- + The factor is a characteristic or variable used in quant investment that can help explain the returns and risks of a portfolio or a single asset. Factors are used by investors to identify and exploit sources of excess returns, and they are central to many quantitative investment strategies. + Each number in the factor represents a physics value to an instrument on a day. + User will train a model to predict the next several days return based on the factor values of the previous days. + The factor is defined in the following parts: + 1. Name: The name of the factor. + 2. Description: The description of the factor. + 3. Formulation: The formulation of the factor. + 4. Variables: The variables or functions used in the formulation of the factor. + The factor might not provide all the parts of the information above since some might not be applicable. + Please specifically give all the hyperparameter in the factors like the window size, look back period, and so on. One factor should statically defines one output with a static source data. For example, last 10 days momentum and last 20 days momentum should be two different factors. + +qlib_factor_interface: |- + Your python code should follow the interface to better interact with the user's system. + Your python code should contain the following part: the import part, the function part, and the main part. You should write a main function name: "calculate_{function_name}" and call this function in "if __name__ == __main__" part. Don't write any try-except block in your python code. The user will catch the exception message and provide the feedback to you. + User will write your python code into a python file and execute the file directly with "python {your_file_name}.py". You should calculate the factor values and save the result into a HDF5(H5) file named "result.h5" in the same directory as your python file. The result file is a HDF5(H5) file containing a pandas dataframe. The index of the dataframe is the "datetime" and "instrument", and the single column name is the factor name,and the value is the factor value. The result file should be saved in the same directory as your python file. + +qlib_factor_strategy: |- + Ensure that for every step of data processing, the data format (including indexes) is clearly explained through comments. + Each transformation or calculation should be accompanied by a detailed description of how the data is structured, especially focusing on key aspects like whether the data has multi-level indexing, how to access specific columns or index levels, and any operations that affect the data shape (e.g., `reset_index()`, `groupby()`, `merge()`). + This step-by-step explanation will ensure clarity and accuracy in data handling. For example: + 1. **Start with multi-level index**: + ```python + # The initial DataFrame has a multi-level index with 'datetime' and 'instrument'. + # To access the 'datetime' index, use df.index.get_level_values('datetime'). + datetime_values = df.index.get_level_values('datetime') + ``` + + 2. **Reset the index if necessary**: + ```python + # Resetting the index to move 'datetime' and 'instrument' from the index to columns. + # This operation flattens the multi-index structure. + df = df.reset_index() + ``` + + 3. **Perform groupby operations**: + ```python + # Grouping by 'datetime' and 'instrument' to aggregate the data. + # After groupby, the result will maintain 'datetime' and 'instrument' as a multi-level index. + df_grouped = df.groupby(['datetime', 'instrument']).sum() + ``` + + 4. **Ensure consistent datetime formats**: + ```python + # Before merging, ensure that the 'datetime' column in both DataFrames is of the same format. + # Convert to datetime format if necessary. + df['datetime'] = pd.to_datetime(df['datetime']) + other_df['datetime'] = pd.to_datetime(other_df['datetime']) + ``` + + 5. **Merge operations**: + ```python + # When merging DataFrames, ensure you are merging on both 'datetime' and 'instrument'. + # If these are part of the index, reset the index before merging. + merged_df = pd.merge(df, other_df, on=['datetime', 'instrument'], how='inner') + ``` + +qlib_factor_output_format: |- + Your output should be a pandas dataframe similar to the following example information: + + MultiIndex: 40914 entries, (Timestamp('2020-01-02 00:00:00'), 'SH600000') to (Timestamp('2021-12-31 00:00:00'), 'SZ300059') + Data columns (total 1 columns): + # Column Non-Null Count Dtype + --- ------ -------------- ----- + 0 your factor name 40914 non-null float64 + dtypes: float64(1) + memory usage: + Notice: The non-null count is OK to be different to the total number of entries since some instruments may not have the factor value on some days. + One possible format of `result.h5` may be like following: + datetime instrument + 2020-01-02 SZ000001 -0.001796 + SZ000166 0.005780 + SZ000686 0.004228 + SZ000712 0.001298 + SZ000728 0.005330 + ... + 2021-12-31 SZ000750 0.000000 + SZ000776 0.002459 + +qlib_factor_simulator: |- + The factors will be sent into Qlib to train a model to predict the next several days return based on the factor values of the previous days. + Qlib is an AI-oriented quantitative investment platform that aims to realize the potential, empower research, and create value using AI technologies in quantitative investment, from exploring ideas to implementing productions. Qlib supports diverse machine learning modeling paradigms. including supervised learning, market dynamics modeling, and RL. + User will use Qlib to automatically do the following things: + 1. generate a new factor table based on the factor values. + 2. train a model like LightGBM, CatBoost, LSTM or simple PyTorch model to predict the next several days return based on the factor values. + 3. build a portfolio based on the predicted return based on a strategy. + 4. evaluate the portfolio's performance including the return, sharpe ratio, max drawdown, and so on. + +qlib_factor_rich_style_description : |- + ### R&D Agent-Qlib: Automated Quantitative Trading & Iterative Factors Evolution Demo + + #### [Overview](#_summary) + + The demo showcases the iterative process of hypothesis generation, knowledge construction, and decision-making. It highlights how financial factors evolve through continuous feedback and refinement. + + #### [Automated R&D](#_rdloops) + + - **[R (Research)](#_research)** + - Iterative development of ideas and hypotheses. + - Continuous learning and knowledge construction. + + - **[D (Development)](#_development)** + - Progressive implementation and code generation of factors. + - Automated testing and validation of financial factors. + + #### [Objective](#_summary) + + To demonstrate the dynamic evolution of financial factors through the Qlib platform, emphasizing how each iteration enhances the accuracy and reliability of the resulting financial factors. + +qlib_factor_from_report_rich_style_description : |- + ### R&D Agent-Qlib: Automated Quantitative Trading & Factor Extraction from Financial Reports Demo + + #### [Overview](#_summary) + + This demo showcases the process of extracting factors from financial research reports, implementing these factors, and analyzing their performance through Qlib backtest, continually expanding and refining the factor library. + + #### [Automated R&D](#_rdloops) + + - **[R (Research)](#_research)** + - Iterative development of ideas and hypotheses from financial reports. + - Continuous learning and knowledge construction. + + - **[D (Development)](#_development)** + - Progressive factor extraction and code generation. + - Automated implementation and testing of financial factors. + + #### [Objective](#_summary) + + + + + + + + + + + + + + +
💡 Innovation Tool to quickly extract and test factors from research reports.
Efficiency Rapid identification of valuable factors from numerous reports.
🗃️ Outputs Expand and refine the factor library to support further research.
+ +qlib_factor_experiment_setting: |- + | Dataset 📊 | Model 🤖 | Factors 🌟 | Data Split 🧮 | + |---------|----------|---------------|-------------------------------------------------| + | CSI300 | LGBModel | Alpha158 Plus | Train: 2012-01-01 to 2018-12-31
Valid: 2019-01-01 to 2020-12-31
Test  : 2021-01-01 to 2024-12-01 | + + +qlib_model_background: |- + The model is a machine learning or deep learning structure used in quantitative investment to predict the returns and risks of a portfolio or a single asset. Models are employed by investors to generate forecasts based on historical data and identified factors, which are central to many quantitative investment strategies. + Each model takes the factors as input and predicts the future returns. Usually, the bigger the model is, the better the performance would be. + The model is defined in the following parts: + 1. Name: The name of the model. + 2. Description: The description of the model. + 3. Architecture: The detailed architecture of the model, such as neural network layers or tree structures. + 4. Hyperparameters: The hyperparameters used in the model, such as learning rate, number of epochs, etc. + 5. ModelType: The type of the model, "Tabular" for tabular model and "TimeSeries" for time series model. + The model should provide clear and detailed documentation of its architecture and hyperparameters. One model should statically define one output with a fixed architecture and hyperparameters. For example, a model with an two GRU layer and a model with three GRU layer should be considered two different models. + +qlib_model_interface: |- + Your python code should follow the interface to better interact with the user's system. + You code should contain several parts: + 1. The import part: import the necessary libraries. + 2. A class which is a sub-class of pytorch.nn.Module. This class should should have a init function and a forward function which inputs a tensor and outputs a tensor. + 3. Set a variable called "model_cls" to the class you defined. + + The user will save your code into a python file called "model.py". Then the user imports model_cls in file "model.py" after setting the cwd into the directory: + ```python + from model import model_cls + ``` + So your python code should follow the pattern: + ```python + class XXXModel(torch.nn.Module): + ... + model_cls = XXXModel + ``` + + The model has two types, "Tabular" for tabular model and "TimeSeries" for time series model. The input shape to a tabular model is (batch_size, num_features) and the input shape to a time series model is (batch_size, num_features, num_timesteps). The output shape of the model should be (batch_size, 1). + The "batch_size" is a dynamic value which is determined by the input of forward function. + The "num_features" and "num_timesteps" are static which will be provided to the model through init function. + User will initialize the tabular model with the following code: + ```python + model = model_cls(num_features=num_features) + ``` + User will initialize the time series model with the following code: + ```python + model = model_cls(num_features=num_features, num_timesteps=num_timesteps) + ``` + No other parameters will be passed to the model so give other parameters a default value or just make them static. + + When dealing with TimeSeries model, remember to permute the input tensor since the input tensor is in the shape of (batch_size, num_features, num_timesteps) and a normal time series model is expecting the input tensor in the shape of (batch_size, num_timesteps, num_features). + + Don't write any try-except block in your python code. The user will catch the exception message and provide the feedback to you. Also, don't write main function in your python code. The user will call the forward method in the model_cls to get the output tensor. + + Please notice that your model should only use current features as input. The user will provide the input tensor to the model's forward function. + + +qlib_model_output_format: |- + Your output should be a tensor with shape (batch_size, 1). + The output tensor should be saved in a file named "output.pth" in the same directory as your python file. + The user will evaluate the shape of the output tensor so the tensor read from "output.pth" should be 8 numbers. + +qlib_model_simulator: |- + The models will be sent into Qlib to train and evaluate their performance in predicting future returns. Hypothesis is improved upon checking the feedback on the results. + Qlib is an AI-oriented quantitative investment platform that aims to realize the potential, empower research, and create value using AI technologies in quantitative investment, from exploring ideas to implementing productions. Qlib supports diverse machine learning modeling paradigms, including supervised learning, market dynamics modeling, and reinforcement learning (RL). + User will use Qlib to automatically perform the following tasks: + 1. Generate a baseline factor table. + 2. Train the model defined in your class Net to predict the next several days' returns based on the factor values. + 3. Build a portfolio based on the predicted returns using a specific strategy. + 4. Evaluate the portfolio's performance, including metrics such as return, Sharpe ratio, max drawdown, and others. + 5. Iterate on growing the hypothesis to enable model improvements based on performance evaluations and feedback. + +qlib_model_rich_style_description: |- + ### Qlib Model Evolving Automatic R&D Demo + + #### [Overview](#_summary) + + The demo showcases the iterative process of hypothesis generation, knowledge construction, and decision-making in model construction in quantitative finance. It highlights how models evolve through continuous feedback and refinement. + + #### [Automated R&D](#_rdloops) + + - **[R (Research)](#_research)** + - Iteration of ideas and hypotheses. + - Continuous learning and knowledge construction. + + - **[D (Development)](#_development)** + - Evolving code generation and model refinement. + - Automated implementation and testing of models. + + #### [Objective](#_summary) + + To demonstrate the dynamic evolution of models through the Qlib platform, emphasizing how each iteration enhances the accuracy and reliability of the resulting models. + +qlib_model_experiment_setting: |- + | Dataset 📊 | Model 🤖 | Factors 🌟 | Data Split 🧮 | + |---------|----------|---------------|-------------------------------------------------| + | CSI300 | RDAgent-dev | 20 factors (Alpha158) | Train: 2008-01-01 to 2014-12-31
Valid: 2015-01-01 to 2016-12-31
Test  : 2017-01-01 to 2020-08-01 | \ No newline at end of file diff --git a/alphaagent/scenarios/qlib/experiment/utils.py b/alphaagent/scenarios/qlib/experiment/utils.py new file mode 100755 index 00000000..128f9ef4 --- /dev/null +++ b/alphaagent/scenarios/qlib/experiment/utils.py @@ -0,0 +1,171 @@ +import io +import re +import shutil +from pathlib import Path + +import pandas as pd + +# render it with jinja +from jinja2 import Environment, StrictUndefined + +from alphaagent.components.coder.factor_coder.config import FACTOR_COSTEER_SETTINGS +from alphaagent.utils.env import QTDockerEnv +from alphaagent.log import logger + + +def generate_data_folder_from_qlib(use_local: bool = True): + template_path = Path(__file__).parent / "factor_data_template" + qtde = QTDockerEnv(is_local=use_local) + qtde.prepare() + + # 运行数据生成脚本 + logger.info(f"在{'本地' if use_local else 'Docker容器'}中生成因子数据") + execute_log = qtde.run( + local_path=str(template_path), + entry=f"python generate.py", + ) + + # 检查文件是否生成 + daily_pv_all = Path(__file__).parent / "factor_data_template" / "daily_pv_all.h5" + daily_pv_debug = Path(__file__).parent / "factor_data_template" / "daily_pv_debug.h5" + + assert daily_pv_all.exists(), "daily_pv_all.h5 is not generated." + assert daily_pv_debug.exists(), "daily_pv_debug.h5 is not generated." + + # 创建数据目录并复制文件 + logger.info(f"复制生成的数据文件到工作目录") + Path(FACTOR_COSTEER_SETTINGS.data_folder).mkdir(parents=True, exist_ok=True) + shutil.copy( + daily_pv_all, + Path(FACTOR_COSTEER_SETTINGS.data_folder) / "daily_pv.h5", + ) + shutil.copy( + Path(__file__).parent / "factor_data_template" / "README.md", + Path(FACTOR_COSTEER_SETTINGS.data_folder) / "README.md", + ) + + Path(FACTOR_COSTEER_SETTINGS.data_folder_debug).mkdir(parents=True, exist_ok=True) + shutil.copy( + daily_pv_debug, + Path(FACTOR_COSTEER_SETTINGS.data_folder_debug) / "daily_pv.h5", + ) + shutil.copy( + Path(__file__).parent / "factor_data_template" / "README.md", + Path(FACTOR_COSTEER_SETTINGS.data_folder_debug) / "README.md", + ) + + logger.info(f"数据准备完成") + + + +def get_file_desc(p: Path, variable_list=[]) -> str: + """ + Get the description of a file based on its type. + + Parameters + ---------- + p : Path + The path of the file. + + Returns + ------- + str + The description of the file. + """ + p = Path(p) + + JJ_TPL = Environment(undefined=StrictUndefined).from_string( + """ +{{file_name}} +```{{type_desc}} +{{content}} +``` +""" + ) + + if p.name.endswith(".h5"): + df = pd.read_hdf(p) + # get df.head() as string with full width + pd.set_option("display.max_columns", None) # or 1000 + pd.set_option("display.max_rows", None) # or 1000 + pd.set_option("display.max_colwidth", None) # or 199 + + if isinstance(df.index, pd.MultiIndex): + df_info = f"MultiIndex names:, {df.index.names})\n" + else: + df_info = f"Index name: {df.index.name}\n" + columns = df.dtypes.to_dict() + filtered_columns = [f"{i, j}" for i, j in columns.items() if i in variable_list] + if filtered_columns: + df_info += "Related Data columns: \n" + df_info += ",".join(filtered_columns) + else: + df_info += "Data columns: \n" + df_info += ",".join(columns) + df_info += "\n" + if "REPORT_PERIOD" in df.columns: + one_instrument = df.index.get_level_values("instrument")[0] + df_on_one_instrument = df.loc[pd.IndexSlice[:, one_instrument], ["REPORT_PERIOD"]] + df_info += f""" +A snapshot of one instrument, from which you can tell the distribution of the data: +{df_on_one_instrument.head(5)} +""" + return JJ_TPL.render( + file_name=p.name, + type_desc="h5 info", + content=df_info, + ) + elif p.name.endswith(".md"): + with open(p) as f: + content = f.read() + return JJ_TPL.render( + file_name=p.name, + type_desc="markdown", + content=content, + ) + else: + raise NotImplementedError( + f"file type {p.name} is not supported. Please implement its description function.", + ) + + +def get_data_folder_intro( + fname_reg: str = ".*", + flags=0, + variable_mapping=None, + use_local: bool = True, +) -> str: + """ + Directly get the info of the data folder. + It is for preparing prompting message. + + Parameters + ---------- + fname_reg : str + a regular expression to filter the file name. + + flags: str + flags for re.match + + Returns + ------- + str + The description of the data folder. + """ + + if ( + not Path(FACTOR_COSTEER_SETTINGS.data_folder).exists() + or not Path(FACTOR_COSTEER_SETTINGS.data_folder_debug).exists() + ): + # FIXME: (xiao) I think this is writing in a hard-coded way. + # get data folder intro does not imply that we are generating the data folder. + generate_data_folder_from_qlib(use_local=use_local) + content_l = [] + + for p in Path(FACTOR_COSTEER_SETTINGS.data_folder_debug).iterdir(): + if re.match(fname_reg, p.name, flags) is not None: + if variable_mapping: + content_l.append(get_file_desc(p, variable_mapping.get(p.stem, []))) + else: + content_l.append(get_file_desc(p)) + return "\n----------------- file splitter -------------\n".join(content_l) diff --git a/alphaagent/scenarios/qlib/experiment/workspace.py b/alphaagent/scenarios/qlib/experiment/workspace.py new file mode 100755 index 00000000..653bb882 --- /dev/null +++ b/alphaagent/scenarios/qlib/experiment/workspace.py @@ -0,0 +1,53 @@ +from pathlib import Path +from typing import Any + +import pandas as pd + +from alphaagent.core.experiment import FBWorkspace +from alphaagent.log import logger +from alphaagent.utils.env import QTDockerEnv + + +class QlibFBWorkspace(FBWorkspace): + def __init__(self, template_folder_path: Path, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + self.inject_code_from_folder(template_folder_path) + + def execute( + self, + qlib_config_name: str = "conf.yaml", + run_env: dict = {}, + use_local: bool = True, + *args, + **kwargs + ) -> str: + # 使用本地环境或Docker环境 + qtde = QTDockerEnv(is_local=use_local) + qtde.prepare() + + # 运行Qlib回测 + logger.info(f"Execute {'Local' if use_local else 'Docker container'} Backtest: qrun {qlib_config_name}") + execute_log = qtde.run( + local_path=str(self.workspace_path), + entry=f"qrun {qlib_config_name}", + env=run_env, + ) + + # 处理结果 + logger.info(f"Read {'Local' if use_local else 'Docker container'} Backtest Result") + execute_log = qtde.run( + local_path=str(self.workspace_path), + entry="python read_exp_res.py", + env=run_env, + ) + + # 加载结果 + ret_df = pd.read_pickle(self.workspace_path / "ret.pkl") + logger.log_object(ret_df, tag="Quantitative Backtesting Chart") + + csv_path = self.workspace_path / "qlib_res.csv" + if not csv_path.exists(): + logger.error(f"File {csv_path} does not exist.") + return None + + return pd.read_csv(csv_path, index_col=0).iloc[:, 0] diff --git a/alphaagent/scenarios/qlib/factor_experiment_loader/json_loader.py b/alphaagent/scenarios/qlib/factor_experiment_loader/json_loader.py new file mode 100755 index 00000000..d26d469f --- /dev/null +++ b/alphaagent/scenarios/qlib/factor_experiment_loader/json_loader.py @@ -0,0 +1,63 @@ +import json +from pathlib import Path + +from alphaagent.components.benchmark.eval_method import TestCase, TestCases +from alphaagent.components.coder.factor_coder.factor import ( + FactorExperiment, + FactorFBWorkspace, + FactorTask, +) +from alphaagent.components.loader.experiment_loader import FactorExperimentLoader +from alphaagent.core.experiment import Experiment, Loader +from alphaagent.scenarios.qlib.experiment.factor_experiment import QlibFactorExperiment + + +class FactorExperimentLoaderFromDict(FactorExperimentLoader): + def load(self, factor_dict: dict) -> list: + """Load data from a dict.""" + task_l = [] + for factor_name, factor_data in factor_dict.items(): + task = FactorTask( + factor_name=factor_name, + factor_description=factor_data["description"], + factor_formulation=factor_data["formulation"], + variables=factor_data["variables"], + ) + task_l.append(task) + exp = QlibFactorExperiment(sub_tasks=task_l) + return exp + + +class FactorExperimentLoaderFromJsonFile(FactorExperimentLoader): + def load(self, json_file_path: Path) -> list: + with open(json_file_path, "r") as file: + factor_dict = json.load(file) + return FactorExperimentLoaderFromDict().load(factor_dict) + + +class FactorExperimentLoaderFromJsonString(FactorExperimentLoader): + def load(self, json_string: str) -> list: + factor_dict = json.loads(json_string) + return FactorExperimentLoaderFromDict().load(factor_dict) + + +# TODO loader only supports generic of task or experiment, testcase might cause CI error here +# class FactorTestCaseLoaderFromJsonFile(Loader[TestCases]): +class FactorTestCaseLoaderFromJsonFile: + def load(self, json_file_path: Path) -> TestCases: + with open(json_file_path, "r") as file: + factor_dict = json.load(file) + test_cases = TestCases() + for factor_name, factor_data in factor_dict.items(): + task = FactorTask( + factor_name=factor_name, + factor_description=factor_data["description"], + factor_formulation=factor_data["formulation"], + variables=factor_data["variables"], + ) + gt = FactorFBWorkspace(task, raise_exception=False) + code = {"factor.py": factor_data["gt_code"]} + gt.inject_code(**code) + test_cases.test_case_l.append(TestCase(task, gt)) + + return test_cases diff --git a/alphaagent/scenarios/qlib/factor_experiment_loader/pdf_loader.py b/alphaagent/scenarios/qlib/factor_experiment_loader/pdf_loader.py new file mode 100755 index 00000000..488bed87 --- /dev/null +++ b/alphaagent/scenarios/qlib/factor_experiment_loader/pdf_loader.py @@ -0,0 +1,599 @@ +from __future__ import annotations + +import json +import multiprocessing as mp +import re +from pathlib import Path +from typing import Mapping + +import numpy as np +import pandas as pd +from jinja2 import Environment, StrictUndefined +from sklearn.cluster import KMeans +from sklearn.metrics.pairwise import cosine_similarity +from sklearn.preprocessing import normalize +from tqdm.auto import tqdm + +from alphaagent.components.document_reader.document_reader import ( + load_and_process_pdfs_by_langchain, +) +from alphaagent.components.loader.experiment_loader import FactorExperimentLoader +from alphaagent.core.conf import RD_AGENT_SETTINGS +from alphaagent.core.prompts import Prompts +from alphaagent.core.utils import multiprocessing_wrapper +from alphaagent.log import logger +from alphaagent.oai.llm_conf import LLM_SETTINGS +from alphaagent.oai.llm_utils import APIBackend +from alphaagent.scenarios.qlib.factor_experiment_loader.json_loader import ( + FactorExperimentLoaderFromDict, +) + +document_process_prompts = Prompts(file_path=Path(__file__).parent / "prompts.yaml") + + +def classify_report_from_dict( + report_dict: Mapping[str, str], + vote_time: int = 1, + substrings: tuple[str] = (), +) -> dict[str, dict[str, str]]: + """ + Parameters: + - report_dict (Dict[str, str]): + A dictionary where the key is the path of the report (ending with .pdf), + and the value is either the report content as a string. + - input_max_token (int): Specifying the maximum number of input tokens. + - vote_time (int): An integer specifying how many times to vote. + - substrings (list(str)): List of hardcode substrings. + + Returns: + - Dict[str, Dict[str, str]]: A dictionary where each key is the path of the report, + with a single key 'class' and its value being the classification result (0 or 1). + + """ + # if len(substrings) == 0: + # substrings = ( + # "金融工程", + # "金工", + # "回测", + # "因子", + # "机器学习", + # "深度学习", + # "量化", + # ) + + res_dict = {} + classify_prompt = document_process_prompts["classify_system"] + + for key, value in tqdm(report_dict.items()): + if not key.endswith(".pdf"): + continue + file_name = key + + if isinstance(value, str): + content = value + else: + logger.warning(f"Input format does not meet the requirements: {file_name}") + res_dict[file_name] = {"class": 0} + continue + + # pre-filter document with key words is not necessary, skip this check for now + # if ( + # not any(substring in content for substring in substrings) and False + # ): + # res_dict[file_name] = {"class": 0} + # else: + while ( + APIBackend().build_messages_and_calculate_token( + user_prompt=content, + system_prompt=classify_prompt, + ) + > LLM_SETTINGS.chat_token_limit + ): + content = content[: -(LLM_SETTINGS.chat_token_limit // 100)] + + vote_list = [] + for _ in range(vote_time): + user_prompt = content + system_prompt = classify_prompt + res = APIBackend().build_messages_and_create_chat_completion( + user_prompt=user_prompt, + system_prompt=system_prompt, + json_mode=True, + ) + try: + res = json.loads(res) + vote_list.append(int(res["class"])) + except json.JSONDecodeError: + logger.warning(f"Return value could not be parsed: {file_name}") + res_dict[file_name] = {"class": 0} + count_0 = vote_list.count(0) + count_1 = vote_list.count(1) + if max(count_0, count_1) > int(vote_time / 2): + break + + result = 1 if count_1 > count_0 else 0 + res_dict[file_name] = {"class": result} + + return res_dict + + +def __extract_factors_name_and_desc_from_content( + content: str, +) -> dict[str, dict[str, str]]: + session = APIBackend().build_chat_session( + session_system_prompt=document_process_prompts["extract_factors_system"], + ) + + extracted_factor_dict = {} + current_user_prompt = content + + for _ in range(10): + extract_result_resp = session.build_chat_completion( + user_prompt=current_user_prompt, + json_mode=True, + ) + ret_dict = json.loads(extract_result_resp) + factors = ret_dict["factors"] + if len(factors) == 0: + break + for factor_name, factor_description in factors.items(): + extracted_factor_dict[factor_name] = factor_description + current_user_prompt = document_process_prompts["extract_factors_follow_user"] + + return extracted_factor_dict + + +def __extract_factors_formulation_from_content( + content: str, + factor_dict: dict[str, str], +) -> dict[str, dict[str, str]]: + factor_dict_df = pd.DataFrame( + factor_dict.items(), + columns=["factor_name", "factor_description"], + ) + + system_prompt = document_process_prompts["extract_factor_formulation_system"] + current_user_prompt = ( + Environment(undefined=StrictUndefined) + .from_string( + document_process_prompts["extract_factor_formulation_user"], + ) + .render(report_content=content, factor_dict=factor_dict_df.to_string()) + ) + + session = APIBackend().build_chat_session(session_system_prompt=system_prompt) + factor_to_formulation = {} + + for _ in range(10): + extract_result_resp = session.build_chat_completion( + user_prompt=current_user_prompt, + json_mode=True, + ) + ret_dict = json.loads(extract_result_resp) + for name, formulation_and_description in ret_dict.items(): + if name in factor_dict: + factor_to_formulation[name] = formulation_and_description + if len(factor_to_formulation) != len(factor_dict): + remain_df = factor_dict_df[~factor_dict_df["factor_name"].isin(factor_to_formulation)] + current_user_prompt = ( + "Some factors are missing. Please check the following" + " factors and their descriptions and continue extraction.\n" + "==========================Remaining factors" + "==========================\n" + remain_df.to_string() + ) + else: + break + + return factor_to_formulation + + +def __extract_factor_and_formulation_from_one_report( + content: str, +) -> dict[str, dict[str, str]]: + final_factor_dict_to_one_report = {} + factor_dict = __extract_factors_name_and_desc_from_content(content) + if len(factor_dict) != 0: + factor_to_formulation = __extract_factors_formulation_from_content( + content, + factor_dict, + ) + for factor_name in factor_dict: + if ( + factor_name not in factor_to_formulation + or "formulation" not in factor_to_formulation[factor_name] + or "variables" not in factor_to_formulation[factor_name] + ): + continue + + final_factor_dict_to_one_report.setdefault(factor_name, {}) + final_factor_dict_to_one_report[factor_name]["description"] = factor_dict[factor_name] + + # use code to correct _ in formulation + formulation = factor_to_formulation[factor_name]["formulation"] + if factor_name in formulation: + target_factor_name = factor_name.replace("_", r"\_") + formulation = formulation.replace(factor_name, target_factor_name) + for variable in factor_to_formulation[factor_name]["variables"]: + if variable in formulation: + target_variable = variable.replace("_", r"\_") + formulation = formulation.replace(variable, target_variable) + + final_factor_dict_to_one_report[factor_name]["formulation"] = formulation + final_factor_dict_to_one_report[factor_name]["variables"] = factor_to_formulation[factor_name]["variables"] + + return final_factor_dict_to_one_report + + +def extract_factors_from_report_dict( + report_dict: dict[str, str], + useful_no_dict: dict[str, dict[str, str]], + n_proc: int = 11, +) -> dict[str, dict[str, dict[str, str]]]: + useful_report_dict = {} + for key, value in useful_no_dict.items(): + if isinstance(value, dict): + if int(value.get("class")) == 1: + useful_report_dict[key] = report_dict[key] + else: + logger.warning(f"Invalid input format: {key}") + + file_name_list = list(useful_report_dict.keys()) + + final_report_factor_dict = {} + factor_dict_list = multiprocessing_wrapper( + [ + (__extract_factor_and_formulation_from_one_report, (useful_report_dict[file_name],)) + for file_name in file_name_list + ], + n=RD_AGENT_SETTINGS.multi_proc_n, + ) + for index, file_name in enumerate(file_name_list): + final_report_factor_dict[file_name] = factor_dict_list[index] + logger.info(f"Factor extraction completed for {len(final_report_factor_dict)} reports") + + return final_report_factor_dict + + +def merge_file_to_factor_dict_to_factor_dict( + file_to_factor_dict: dict[str, dict], +) -> dict: + factor_dict = {} + for file_name in file_to_factor_dict: + for factor_name in file_to_factor_dict[file_name]: + factor_dict.setdefault(factor_name, []) + factor_dict[factor_name].append(file_to_factor_dict[file_name][factor_name]) + + factor_dict_simple_deduplication = {} + for factor_name in factor_dict: + if len(factor_dict[factor_name]) > 1: + factor_dict_simple_deduplication[factor_name] = max( + factor_dict[factor_name], + key=lambda x: len(x["formulation"]), + ) + else: + factor_dict_simple_deduplication[factor_name] = factor_dict[factor_name][0] + return factor_dict_simple_deduplication + + +def __check_factor_dict_relevance( + factor_df_string: str, +) -> dict[str, dict[str, str]]: + extract_result_resp = APIBackend().build_messages_and_create_chat_completion( + system_prompt=document_process_prompts["factor_relevance_system"], + user_prompt=factor_df_string, + json_mode=True, + ) + return json.loads(extract_result_resp) + + +def check_factor_relevance( + factor_dict: dict[str, dict[str, str]], +) -> tuple[dict[str, dict[str, str]], dict[str, dict[str, str]]]: + factor_relevance_dict = {} + + factor_df = pd.DataFrame(factor_dict).T + factor_df.index.names = ["factor_name"] + + while factor_df.shape[0] > 0: + result_list = multiprocessing_wrapper( + [ + (__check_factor_dict_relevance, (factor_df.iloc[i : i + 50, :].to_string(),)) + for i in range(0, factor_df.shape[0], 50) + ], + n=RD_AGENT_SETTINGS.multi_proc_n, + ) + + for result in result_list: + for factor_name, relevance in result.items(): + factor_relevance_dict[factor_name] = relevance + + factor_df = factor_df[~factor_df.index.isin(factor_relevance_dict)] + + filtered_factor_dict = { + factor_name: factor_dict[factor_name] + for factor_name in factor_dict + if factor_relevance_dict[factor_name]["relevance"] + } + + return factor_relevance_dict, filtered_factor_dict + + +def __check_factor_dict_viability_simulate_json_mode( + factor_df_string: str, +) -> dict[str, dict[str, str]]: + extract_result_resp = APIBackend().build_messages_and_create_chat_completion( + system_prompt=document_process_prompts["factor_viability_system"], + user_prompt=factor_df_string, + json_mode=True, + ) + return json.loads(extract_result_resp) + + +def check_factor_viability( + factor_dict: dict[str, dict[str, str]], +) -> tuple[dict[str, dict[str, str]], dict[str, dict[str, str]]]: + factor_viability_dict = {} + + factor_df = pd.DataFrame(factor_dict).T + factor_df.index.names = ["factor_name"] + + while factor_df.shape[0] > 0: + result_list = multiprocessing_wrapper( + [ + (__check_factor_dict_viability_simulate_json_mode, (factor_df.iloc[i : i + 50, :].to_string(),)) + for i in range(0, factor_df.shape[0], 50) + ], + n=RD_AGENT_SETTINGS.multi_proc_n, + ) + + for result in result_list: + for factor_name, viability in result.items(): + factor_viability_dict[factor_name] = viability + + factor_df = factor_df[~factor_df.index.isin(factor_viability_dict)] + + filtered_factor_dict = { + factor_name: factor_dict[factor_name] + for factor_name in factor_dict + if factor_viability_dict[factor_name]["viability"] + } + + return factor_viability_dict, filtered_factor_dict + + +def __check_factor_duplication_simulate_json_mode( + factor_df: pd.DataFrame, +) -> list[list[str]]: + current_user_prompt = factor_df.to_string() + + working_list = [factor_df] + final_list = [] + + while len(working_list) > 0: + current_df = working_list.pop(0) + if ( + APIBackend().build_messages_and_calculate_token( + user_prompt=current_df.to_string(), system_prompt=document_process_prompts["factor_duplicate_system"] + ) + > LLM_SETTINGS.chat_token_limit + ): + working_list.append(current_df.iloc[: current_df.shape[0] // 2, :]) + working_list.append(current_df.iloc[current_df.shape[0] // 2 :, :]) + else: + final_list.append(current_df) + + generated_duplicated_groups = [] + for current_df in final_list: + current_factor_to_string = current_df.to_string() + session = APIBackend().build_chat_session( + session_system_prompt=document_process_prompts["factor_duplicate_system"], + ) + for _ in range(10): + extract_result_resp = session.build_chat_completion( + user_prompt=current_factor_to_string, + json_mode=True, + ) + ret_dict = json.loads(extract_result_resp) + if len(ret_dict) == 0: + return generated_duplicated_groups + else: + generated_duplicated_groups.extend(ret_dict) + current_factor_to_string = """Continue to extract duplicated groups. If no more duplicated group found please respond empty dict.""" + return generated_duplicated_groups + + +def __kmeans_embeddings(embeddings: np.ndarray, k: int = 20) -> list[list[str]]: + x_normalized = normalize(embeddings) + + np.random.seed(42) + + kmeans = KMeans( + n_clusters=k, + init="random", + max_iter=100, + n_init=10, + random_state=42, + ) + + # KMeans algorithm uses Euclidean distance, and we need to customize a function to find the most similar cluster center + def find_closest_cluster_cosine_similarity( + data: np.ndarray, + centroids: np.ndarray, + ) -> np.ndarray: + similarity = cosine_similarity(data, centroids) + return np.argmax(similarity, axis=1) + + # Initializes the cluster center + rng = np.random.default_rng(seed=42) + centroids = rng.choice(x_normalized, size=k, replace=False) + + # Iterate until convergence or the maximum number of iterations is reached + for _ in range(kmeans.max_iter): + # Assign the sample to the nearest cluster center + closest_clusters = find_closest_cluster_cosine_similarity( + x_normalized, + centroids, + ) + + # update the cluster center + new_centroids = np.array( + [x_normalized[closest_clusters == i].mean(axis=0) for i in range(k)], + ) + new_centroids = normalize(new_centroids) # 归一化新的簇中心 + + # Check whether the cluster center has changed + if np.allclose(centroids, new_centroids): + break + + centroids = new_centroids + + clusters = find_closest_cluster_cosine_similarity(x_normalized, centroids) + cluster_to_index = {} + for index, cluster in enumerate(clusters): + cluster_to_index.setdefault(cluster, []).append(index) + return sorted( + cluster_to_index.values(), + key=lambda x: len(x), + reverse=True, + ) + + +def __deduplicate_factor_dict(factor_dict: dict[str, dict[str, str]]) -> list[list[str]]: + if len(factor_dict) == 0: + return [] + factor_df = pd.DataFrame(factor_dict).T + factor_df.index.names = ["factor_name"] + + factor_names = sorted(factor_dict) + + factor_name_to_full_str = {} + for factor_name in factor_dict: + description = factor_dict[factor_name]["description"] + formulation = factor_dict[factor_name]["formulation"] + variables = factor_dict[factor_name]["variables"] + factor_name_to_full_str[ + factor_name + ] = f"""Factor name: {factor_name} +Factor description: {description} +Factor formulation: {formulation} +Factor variables: {variables} +""" + + full_str_list = [factor_name_to_full_str[factor_name] for factor_name in factor_names] + embeddings = APIBackend.create_embedding(full_str_list) + + target_k = None + if len(full_str_list) < RD_AGENT_SETTINGS.max_input_duplicate_factor_group: + kmeans_index_group = [list(range(len(full_str_list)))] + target_k = 1 + else: + for k in range( + len(full_str_list) // RD_AGENT_SETTINGS.max_input_duplicate_factor_group, + RD_AGENT_SETTINGS.max_kmeans_group_number, + ): + kmeans_index_group = __kmeans_embeddings(embeddings=embeddings, k=k) + if len(kmeans_index_group[0]) < RD_AGENT_SETTINGS.max_input_duplicate_factor_group: + target_k = k + logger.info(f"K-means group number: {k}") + break + factor_name_groups = [[factor_names[index] for index in index_group] for index_group in kmeans_index_group] + + duplication_names_list = [] + + result_list = multiprocessing_wrapper( + [ + (__check_factor_duplication_simulate_json_mode, (factor_df.loc[factor_name_group, :],)) + for factor_name_group in factor_name_groups + ], + n=RD_AGENT_SETTINGS.multi_proc_n, + ) + + duplication_names_list = [] + + for deduplication_factor_names_list in result_list: + filter_factor_names = [ + factor_name for factor_name in set(deduplication_factor_names_list) if factor_name in factor_dict + ] + if len(filter_factor_names) > 1: + duplication_names_list.append(filter_factor_names) + + return duplication_names_list + + +def deduplicate_factors_by_llm( # noqa: C901, PLR0912 + factor_dict: dict[str, dict[str, str]], + factor_viability_dict: dict[str, dict[str, str]] | None = None, +) -> list[list[str]]: + final_duplication_names_list = [] + current_round_factor_dict = factor_dict + + # handle multi-round deduplication + for _ in range(10): + duplication_names_list = __deduplicate_factor_dict(current_round_factor_dict) + + new_round_names = [] + for duplication_names in duplication_names_list: + if len(duplication_names) < RD_AGENT_SETTINGS.max_output_duplicate_factor_group: + final_duplication_names_list.append(duplication_names) + else: + new_round_names.extend(duplication_names) + + if len(new_round_names) != 0: + current_round_factor_dict = {factor_name: factor_dict[factor_name] for factor_name in new_round_names} + else: + break + + # sort the final list of duplicates by their length, largest first + final_duplication_names_list = sorted(final_duplication_names_list, key=lambda x: len(x), reverse=True) + + to_replace_dict = {} # to map duplicates to the target factor names + for duplication_names in duplication_names_list: + if factor_viability_dict is not None: + # check viability of each factor in the duplicates group + viability_list = [factor_viability_dict[name]["viability"] for name in duplication_names] + if True not in viability_list: + continue + target_factor_name = duplication_names[viability_list.index(True)] + else: + target_factor_name = duplication_names[0] + for duplication_factor_name in duplication_names: + if duplication_factor_name == target_factor_name: + continue + to_replace_dict[duplication_factor_name] = target_factor_name + + llm_deduplicated_factor_dict = {} + added_lower_name_set = set() + for factor_name in factor_dict: + # only add factors that haven't been replaced and are not duplicates + if factor_name not in to_replace_dict and factor_name.lower() not in added_lower_name_set: + if factor_viability_dict is not None and not factor_viability_dict[factor_name]["viability"]: + continue + added_lower_name_set.add(factor_name.lower()) + llm_deduplicated_factor_dict[factor_name] = factor_dict[factor_name] + + return llm_deduplicated_factor_dict, final_duplication_names_list + + +class FactorExperimentLoaderFromPDFfiles(FactorExperimentLoader): + def load(self, file_or_folder_path: str) -> dict: + with logger.tag("docs"): + docs_dict = load_and_process_pdfs_by_langchain(file_or_folder_path) + logger.log_object(docs_dict) + + selected_report_dict = classify_report_from_dict(report_dict=docs_dict, vote_time=1) + + with logger.tag("file_to_factor_result"): + file_to_factor_result = extract_factors_from_report_dict(docs_dict, selected_report_dict) + logger.log_object(file_to_factor_result) + + with logger.tag("factor_dict"): + factor_dict = merge_file_to_factor_dict_to_factor_dict(file_to_factor_result) + logger.log_object(factor_dict) + + with logger.tag("filtered_factor_dict"): + factor_viability, filtered_factor_dict = check_factor_viability(factor_dict) + logger.log_object(filtered_factor_dict) + + # factor_dict, duplication_names_list = deduplicate_factors_by_llm(factor_dict, factor_viability) + + return FactorExperimentLoaderFromDict().load(filtered_factor_dict) diff --git a/alphaagent/scenarios/qlib/factor_experiment_loader/prompts.yaml b/alphaagent/scenarios/qlib/factor_experiment_loader/prompts.yaml new file mode 100755 index 00000000..2c49a14b --- /dev/null +++ b/alphaagent/scenarios/qlib/factor_experiment_loader/prompts.yaml @@ -0,0 +1,227 @@ +extract_factors_system: |- + 用户会提供一篇金融工程研报,其中包括了量化因子和模型研究,请按照要求抽取以下信息: + 1. 概述这篇研报的主要研究思路; + 2. 抽取出所有的因子,并概述因子的计算过程,请注意有些因子可能存在于表格中,请不要遗漏,因子的名称请使用英文,不能包含空格,可用下划线连接,研报中可能不含有因子,若没有请返回空字典; + 3. 抽取研报里面的所有模型,并概述模型的计算过程,可以分步骤描述模型搭建或计算的过程,研报中可能不含有模型,若没有请返回空字典; + + user will treat your factor name as key to store the factor, don't put any interaction message in the content. Just response the output without any interaction and explanation. + All names should be in English. + Respond with your analysis in JSON format. The JSON schema should include: + { + "summary": "The summary of this report", + "factors": { + "Name of factor 1": "Description to factor 1", + "Name of factor 2": "Description to factor 2" + }, + "models": { + "Name of model 1": "Description to model 1", + "Name of model 2": "Description to model 2" + } + } + +extract_factors_follow_user: |- + Please continue extracting the factors. Please ignore factors appeared in former messages. If no factor is found, please return an empty dict. + Notice: You should not miss any factor in the report! Some factors might appear several times in the report. You can repeat them to avoid missing other factors. + Respond with your analysis in JSON format. The JSON schema should include: + { + "factors": { + "Name of factor 1": "Description to factor 1", + "Name of factor 2": "Description to factor 2" + } + } + +extract_factor_formulation_system: |- + I have a financial engineering research report and a list of factors extracted from it. I need assistance in extracting specific information based on the report and the provided list of factors. The tasks are as follows: + + 1. For each factor, I need its calculation formula in LaTeX format. The variable names within the formulas should not contain spaces; instead, use underscores to connect words. Ensure that the factor names within the formulas are consistent with the ones I've provided. + 2. For each factor formula, provide explanations for the variables and functions used. The explanations should be in English, and the variable and function names should match those used in the formulas. + + Here are the sources of data I have: + + 1. Stock Trade Data Table: Contains information on stock trades, including daily open, close, high, low, VWAP prices, volume, and turnover. + 2. Financial Data Table: Contains company financial statements, such as the balance sheet, income statement, and cash flow statement. + 3. Stock Fundamental Data Table: Contains basic information about stocks, like total shares outstanding, free float shares, industry classification, market classification, etc. + 4. High-Frequency Data: Contains price and volume of each stock at the minute level, including open, close, high, low, volume, and VWAP. + + Please expand the formulation to use the source data I have provided. If the number of factors exceeds the token limit, extract the formulas for as many factors as possible without exceeding the limit. Ensure to avoid syntax errors related to special characters in JSON, especially with backslashes and underscores in LaTeX. + + Provide your analysis in JSON format, using the following schema: + { + "factor name 1": { + "formulation": "latex formulation of factor 1", + "variables": { + "variable or function name 1": "description of variable or function 1", + "variable or function name 2": "description of variable or function 2" + } + }, + "factor name 2": { + "formulation": "latex formulation of factor 2", + "variables": { + "variable or function name 1": "description of variable or function 1", + "variable or function name 2": "description of variable or function 2" + } + } + } + + +extract_factor_formulation_user: |- + ===========================Report content:============================= + {{ report_content }} + ===========================Factor list in dataframe============================= + {{ factor_dict }} + +classify_system_chinese: |- + 你是一个研报分类助手。用户会输入一篇金融研报。请按照要求回答: + 因子指能够解释资产收益率或价格等的变量;而模型则指机器学习或深度学习模型,利用因子等变量来预测价格或收益率变化。 + + 请你对研报进行分类,考虑两个条件: + 1. 是金工量化领域中选股(需与择时,选基等严格区分开)方面的研报; + 2. 涉及了因子或模型的构成,或者是测试了它们的表现。 + 如果研报同时满足上述两个条件,请输出1;若没有,请输出0。 + + 请使用json进行回答。json key为:class + +classify_system: |- + Your job is classify whether the user input document is a quantitative investment research report. The user will input a document and you should classify it based on the following conditions: + 1. The document is about finance other than other fields like biology, physics, chemistry, etc. + 2. The document is a research report on stock selection (which needs to be strictly separated from time selection and base selection) in the field of metalworking quantification. + 3. The document involves the composition of factors or models, or tests their performance. + + If the document meets all the above conditions, please return 1; otherwise, please return 0. + + Please respond with your decision in JSON format. Just respond the output json string without any interaction and explanation. + The JSON schema should include: + { + "class": 1 + } + +factor_viability_system: |- + User has designed several factors in quant investment. Please help the user to check the viability of these factors. + These factors are used to build a daily frequency strategy in China A-share market. + + User will provide a pandas dataframe like table containing following information: + 1. The name of the factor; + 2. The simple description of the factor; + 3. The formulation of the factor in latex format; + 4. The description to the variables and functions in the formulation of the factor. + + User has several source data: + 1. The Stock Trade Data Table containing information about stock trades, such as daily open, close, high, low, vwap prices, volume, and turnover; + 2. The Financial Data Table containing company financial statements such as the balance sheet, income statement, and cash flow statement; + 3. The Stock Fundamental Data Table containing basic information about stocks, like total shares outstanding, free float shares, industry classification, market classification, etc; + 4. The high frequency data containing price and volume of each stock containing open close high low volume vwap in each minute; + 5. The Consensus Expectations Factor containing the consensus expectations of the analysts about the future performance of the company. + + + A viable factor should satisfy the following conditions: + 1. The factor should be able to be calculated in daily frequency; + 2. The factor should be able to be calculated based on each stock; + 3. The factor should be able to be calculated based on the source data provided by user. + + You should give decision to each factor provided by the user. You should reject the factor based on very solid reason. + Please return true to the viable factor and false to the non-viable factor. + + Notice, you can just return part of the factors due to token limit. Your factor name should be the same as the user's factor name. + + Please respond with your decision in JSON format. Just respond the output json string without any interaction and explanation. + The JSON schema should include: + { + "Name to factor 1": + { + "viability": true, + "reason": "The reason to the viability of this factor" + }, + "Name to factor 2": + { + "viability": false, + "reason": "The reason to the non-viability of this factor" + } + "Name to factor 3": + { + "viability": true, + "reason": "The reason to the viability of this factor" + } + } + +factor_relevance_system: |- + User has designed several factors in quant investment. Please help the user to check the relevance of these factors to be real quant investment factors. + These factors are used to build a daily frequency strategy in China A-share market. + + User will provide a pandas dataframe like table containing following information: + 1. The name of the factor; + 2. The simple description of the factor; + 3. The formulation of the factor in latex format; + 4. The description to the variables and functions in the formulation of the factor. + + A relevant factor should satisfy the following conditions: + 1. The factor should be able to be calculated in daily frequency; + 2. The factor should be able to be calculated based on each stock; + 3. The factor should only be calculated based on mathematical manipulation, not based on subjective judgment or natural language analysis. + + You should give decision to each factor provided by the user. You should reject the factor based on very solid reason. + Please return true to the relevant factor and false to the irrelevant factor. + + Notice, you can just return part of the factors due to token limit. Your factor name should be the same as the user's factor name. + + Please respond with your decision in JSON format. Just respond the output json string without any interaction and explanation. + The JSON schema should include: + { + "Name to factor 1": + { + "relevance": true, + "reason": "The reason to the relevance of this factor" + }, + "Name to factor 2": + { + "relevance": false, + "reason": "The reason to the non-relevance of this factor" + } + "Name to factor 3": + { + "relevance": true, + "reason": "The reason to the relevance of this factor" + } + } + + +factor_duplicate_system: |- + User has designed several factors in quant investment. Please help the user to duplicate these factors. + These factors are used to build a daily frequency strategy in China A-share market. + + User will provide a pandas dataframe like table containing following information: + 1. The name of the factor; + 2. The simple description of the factor; + 3. The formulation of the factor in latex format; + 4. The description to the variables and functions in the formulation of the factor. + + User wants to find whether there are duplicated groups. The factors in a duplicate group should satisfy the following conditions: + 1. They might differ in the name, description, formulation, or the description to the variables and functions in the formulation, some upper or lower case difference is included; + 2. They should be talking about exactly the same factor; + 3. If horizon information like 1 day, 5 days, 10 days, etc is provided, the horizon information should be the same. + + To make your response valid, we have some very important constraint for you to follow! Listed here: + 1. You should be very confident to put duplicated factors into a group; + 2. A group should contain at least two factors; + 3. To a factor which has no duplication, don't put them into your response; + 4. To avoid merging too many similar factor, don't put more than ten factors into a group! + You should always follow the above constraints to make your response valid. + + Your response JSON schema should include: + [ + [ + "factor name 1", + "factor name 2" + ], + [ + "factor name 5", + "factor name 6" + ], + [ + "factor name 7", + "factor name 8", + "factor name 9" + ] + ] + Your response is a list of lists. Each list represents a duplicate group containing all the factor names in this group. + The factor names in the list should be unique and the factor names should be the same as the user's factor name. + To avoid reaching token limit, don't respond more than fifty groups in one response. You should respond the output json string without any interaction and explanation. \ No newline at end of file diff --git a/alphaagent/scenarios/qlib/prompts_alphaagent.yaml b/alphaagent/scenarios/qlib/prompts_alphaagent.yaml new file mode 100755 index 00000000..5e0a6411 --- /dev/null +++ b/alphaagent/scenarios/qlib/prompts_alphaagent.yaml @@ -0,0 +1,411 @@ +potential_direction_transformation: |- + It's the first round, the user provided a potential direction: "{{ potential_direction }}". Referring to it, you need to transform it into a hypothesis in formal language that is clear and actionable for factor generation. Consider the following aspects while formulating the hypothesis: + 1. **Clarity**: Ensure the hypothesis is specific and unambiguous. + 2. **Actionability**: The hypothesis should suggest a clear path for experimentation or investigation. + 3. **Relevance**: Ensure the hypothesis is directly related to the potential direction provided by the user. + + +hypothesis_and_feedback: |- + {% for hypothesis, experiment, feedback in trace.hist[-10:] %} + Hypothesis {{ loop.index }}: {{ hypothesis }} + Corresponding Code (that leads to the difference in performance): {{experiment.sub_workspace_list[0].code_dict.get("model.py")}} + Observation on the result with the hypothesis: {{ feedback.observations }} + Feedback on the original hypothesis: {{ feedback.hypothesis_evaluation }} + New Feedback for Context (For you to agree or improve upon): {{ feedback.new_hypothesis }} + Reasoning for new hypothesis: {{ feedback.reason }} + Did changing to this hypothesis work? (focus on the change): {{ feedback.decision }} + {% endfor %} + +hypothesis_output_format: |- + The output should follow JSON format. Do not add any other text in your response. The schema is as follows: + { + "hypothesis": "A SINGLE LINE OF TEXT. The new hypothesis generated based on the information provided.", + "concise_knowledge": "A SINGLE LINE OF TEXT. Transferable knowledge based on theoretical principles. Use conditional grammar. eg. 'If...., ..; When..., .; and etc' Make sure that you state things clearly without ambiguity. Eg. avoid saying 'previous hypothesis', because one wouldn't know what that is.", + "concise_observation": "A SINGLE LINE OF TEXT. It focuses on the observation of the given scenario, data characteristics, or previous experiences (failures & succeses).", + "concise_justification": "A SINGLE LINE OF TEXT. Justify the hypothesis based on theoretical principles or initial assumptions.", + "concise_specification": "A SINGLE LINE OF TEXT. Define the scope, conditions, constraints of the hypothesis. Specify the expected relationships, variables, and thresholds, ensuring testability and relevance to the observed data." + } + + +factor_hypothesis_specification: |- + 1. **Data-Driven Hypothesis Formation:** + - Ground hypotheses within the scope of available data for seamless testing. + - Align hypotheses with the temporal, cross-sectional, and distributional properties of the data. + - Avoid overfitting by focusing on robust, economically intuitive, and innovative relationships. + + 2. **Justification of the Hypothesis:** + - Use observed market patterns to creatively infer underlying economic or behavioral drivers. + - Build on empirical evidence while exploring innovative connections or untested relationships. + - Propose actionable insights that challenge conventional assumptions, yet remain testable. + - Emphasize the factor's potential to uncover unique, predictive market behaviors. + + 3. **Continuous Optimization and Exploration:** + - Refine the first hypothesis iteratively by testing across different variants. + - Incorporate feedback from empirical results to enhance the factor's predictive power. + + +function_lib_description: |- + Only the following operations are allowed in expressions: + ### **Cross-sectional Functions** + - **RANK(A)**: Ranking of each element in the cross-sectional dimension of A. + - **ZSCORE(A)**: Z-score of each element in the cross-sectional dimension of A. + - **MEAN(A)**: Mean value of each element in the cross-sectional dimension of A. + - **STD(A)**: Standard deviation in the cross-sectional dimension of A. + - **SKEW(A)**: Skewness in the cross-sectional dimension of A. + - **KURT(A)**: Kurtosis in the cross-sectional dimension of A. + - **MAX(A)**: Maximum value in the cross-sectional dimension of A. + - **MIN(A)**: Minimum value in the cross-sectional dimension of A. + - **MEDIAN(A)**: Median value in the cross-sectional dimension of A + + ### **Time-Series Functions** + - **DELTA(A, n)**: Change in value of A over n periods. + - **DELAY(A, n)**: Value of A delayed by n periods. + - **TS_MEAN(A, n)**: Mean value of sequence A over the past n days. + - **TS_SUM(A, n)**: Sum of sequence A over the past n days. + - **TS_RANK(A, n)**: Time-series rank of the last value of A in the past n days. + - **TS_ZSCORE(A, n)**: Z-score for each sequence in A over the past n days. + - **TS_MEDIAN(A, n)**: Median value of sequence A over the past n days. + - **TS_PCTCHANGE(A, p)**: Percentage change in the value of sequence A over p periods. + - **TS_MIN(A, n)**: Minimum value of A in the past n days. + - **TS_MAX(A, n)**: Maximum value of A in the past n days. + - **TS_ARGMAX(A, n)**: The index (relative to the current time) of the maximum value of A over the past n days. + - **TS_ARGMIN(A, n)**: The index (relative to the current time) of the minimum value of A over the past n days. + - **TS_QUANTILE(A, p, q)**: Rolling quantile of sequence A over the past p periods, where q is the quantile value between 0 and 1. + - **TS_STD(A, n)**: Standard deviation of sequence A over the past n days. + - **TS_VAR(A, p)**: Rolling variance of sequence A over the past p periods. + - **TS_CORR(A, B, n)**: Correlation coefficient between sequences A and B over the past n days. + - **TS_COVARIANCE(A, B, n)**: Covariance between sequences A and B over the past n days. + - **TS_MAD(A, n)**: Rolling Median Absolute Deviation of sequence A over the past n days. + - **PERCENTILE(A, q, p)**: Quantile of sequence A, where q is the quantile value between 0 and 1. If p is provided, it calculates the rolling quantile over the past p periods. + - **HIGHDAY(A, n)**: Number of days since the highest value of A in the past n days. + - **LOWDAY(A, n)**: Number of days since the lowest value of A in the past n days. + - **SUMAC(A, n)**: Cumulative sum of A over the past n days. + + ### **Moving Averages and Smoothing Functions** + - **SMA(A, n, m)**: Simple moving average of A over n periods with modifier m. + - **WMA(A, n)**: Weighted moving average of A over n periods, with weights decreasing from 0.9 to 0.9^(n). + - **EMA(A, n)**: Exponential moving average of A over n periods, where the decay factor is 2/(n+1). + - **DECAYLINEAR(A, d)**: Linearly weighted moving average of A over d periods, with weights increasing from 1 to d. + + ### **Mathematical Operations** + - **PROD(A, n)**: Product of values in A over the past n days. Use `*` for general multiplication. + - **LOG(A)**: Natural logarithm of each element in A. + - **SQRT(A)**: Square root of each element in A. + - **POW(A, n)**: Raise each element in A to the power of n. + - **SIGN(A)**: Sign of each element in A, one of 1, 0, or -1. + - **EXP(A)**: Exponential of each element in A. + - **ABS(A)**: Absolute value of A. + - **MAX(A, B)**: Maximum value between A and B. + - **MIN(A, B)**: Minimum value between A and B. + - **INV(A)**: Reciprocal (1/x) of each element in sequence A. + - **FLOOR(A)**: Floor of each element in sequence A. + + ### **Conditional and Logical Functions** + - **COUNT(C, n)**: Count of samples satisfying condition C in the past n periods. Here, C is a logical expression, e.g., `$close > $open`. + - **SUMIF(A, n, C)**: Sum of A over the past n periods if condition C is met. Here, C is a logical expression. + - **FILTER(A, C)**: Filtering multi-column sequence A based on condition C. Here, C is presented in a logical expression form, with the same size as A. + - **(C1)&&(C2)**: Logical operation "and". Both C1 and C2 are logical expressions, such as A > B. + - **(C1)||(C2)**: Logical operation "or". Both C1 and C2 are logical expressions, such as A > B. + - **(C1)?(A):(B)**: Logical operation "If condition C1 holds, then A, otherwise B". C1 is a logical expression, such as A > B. + + ### **Regression and Residual Functions** + - **SEQUENCE(n)**: A single-column sequence of length n, ranging from 1 to integer n. `SEQUENCE()` should always be nested in `REGBETA()` or `REGRESI()` as argument B. + - **REGBETA(A, B, n)**: Regression coefficient of A on B using the past n samples, where A MUST be a multi-column sequence and B a single-column or multi-column sequence. + - **REGRESI(A, B, n)**: Residual of regression of A on B using the past n samples, where A MUST be a multi-column sequence and B a single-column or multi-column sequence. + + ### **Technical Indicators** + - **RSI(A, n)**: Relative Strength Index of sequence A over n periods. Measures momentum by comparing the magnitude of recent gains to recent losses. + - **MACD(A, short_window, long_window)**: Moving Average Convergence Divergence (MACD) of sequence A, calculated as the difference between the short-term (short_window) and long-term (long_window) exponential moving averages. + - **BB_MIDDLE(A, n)**: Middle Bollinger Band, calculated as the n-period simple moving average of sequence A. + - **BB_UPPER(A, n)**: Upper Bollinger Band, calculated as middle band plus two standard deviations of sequence A over n periods. + - **BB_LOWER(A, n)**: Lower Bollinger Band, calculated as middle band minus two standard deviations of sequence A over n periods. + + + Note that: + - Only the variables provided in data (e.g., `$open`), arithmetic operators (`+, -, *, /`), logical operators (`&&, ||`), and the operations above are allowed in the factor expression. + - Make sure your factor expression contain at least one variables within the dataframe columns (e.g. $open), combined with registered operations above. Do NOT use any undeclared variable (e.g. 'n', 'w_1') and undefined symbols (e.g., '=') in the expression. + - Pay attention to the distinction between operations with the TS prefix (e.g., `TS_STD()`) and those without (e.g., `STD()`). + + +factor_experiment_output_format: |- + Do NOT use any undeclared variables. The factor expression should be strictly based on the function library (e.g. `RANK(.)`) and the variables provided in data (e.g., `$open`). + The output should follow JSON format without other content. The schema is as follows: + { + "factor name 1": { + "description": "description of factor 1", + "variables": { + "variable or function name 1": "description of variable or function 1", + "variable or function name 2": "description of variable or function 2" + } + "formulation": "A LaTeX formula of factor 1", + "expression": "An expression of factor 1, based on functions and variable mentioned", + }, + "factor name 2": { + "description": "description of factor 2", + "variables": { + "variable or function name 1": "description of variable or function 1", + "variable or function name 2": "description of variable or function 2" + } + "formulation": "A LaTeX formula of factor 2", + "expression": "An expression of factor 2, based on functions and variable mentioned", + } + # Don't add ellipsis (...) or any filler text that might cause JSON parsing errors here! + } + + Here is an example: + { + "Normalized_Intraday_Range_Factor_10D": { + "description": "This factor integrates candlestick movement patterns with market volatility to enhance predictive accuracy for short-term price movements. The factor computes the normalized difference between the candlestick body size and the standard deviation of closing prices over a 10-day period.", + "variables": { + "$close": "Close price of the stock on that day.", + "$open": "Open price of the stock on that day.", + "ABS(A)": "Absolute value of A.", + "TS_STD(A, n)": "Standard deviation of sequence A over the past n days." + } + "formulation": "NIR_\\text{10D} = \\frac{\\text{ABS}(\\text{close} - \\text{open})}{\\text{STD}(\\text{close}, 10)}", + "expression": "ABS($close - $open) / (TS_STD($close, 10) + 1e-8)", + }, + "Volume_Range_Correlation_Factor_20D": { + "description": "This factor measures the correlation between the candlestick range (high - low) and the trading volume over a 20-day period, aiming to capture the relationship between price range and market participation.", + "variables": { + "$high": "High price of the stock on that day.", + "$low": "Low price of the stock on that day.", + "$volume": "Volume of the stock on that day.", + "TS_CORR(A, B, n)": "Correlation coefficient between sequences A and B over the past n days." + } + "formulation": "VRC_\\text{20D} = \\text{TS_CORR}(\\text{high} - \\text{low}, \\text{volume}, 20)", + "expression": "TS_CORR($high - $low, $volume, 20)", + } + } + +factor_feedback_generation: + system: |- + Please understand the following operation logic and then make your feedback that is suitable for the scenario: + + {{ scenario }} + + You will receive a hypothesis, multiple tasks with their factors, their results, and the SOTA result. + Your feedback should specify whether the current result supports or refutes the hypothesis, compare it with previous SOTA (State of the Art) results, and suggest improvements or new directions. + Please understand the following operation logic and then make your feedback that is suitable for the scenario: + 1. Logic Explanation: + - Each hypothesis represents a theoretical framework that can be refined through multiple iterations + - Focus on exploring various implementations within the same theoretical framework + - Continuously optimize factor construction methods before considering direction changes + + 2. Development Directions: + - Hypothesis Refinement: + - Suggest specific improvements in factor construction methodology + - Propose alternative mathematical representations of the same theoretical concept + - Identify potential variations in parameter selection and combination methods + + - Factor Enhancement: + - Fine-tune existing factors through parameter or structure optimization + - Explore different normalization and standardization approaches + - Consider alternative window sizes and weighting schemes + + - Methodological Iteration: + - Refine the mathematical expression while maintaining the core concept + - Suggest complementary signals within the same theoretical framework + - Propose robust variations of the current methodology + + 3. Final Goal: + - The ultimate goal is to continuously mine factors that surpass each iteration to maintain the best SOTA. + + When analyzing results: + 1. **Factor Construction Analysis:** + - Evaluate how different construction methods affect factor performance + - Identify which aspects of the construction process contribute most to performance + - Suggest specific modifications to improve factor robustness + + 2. **Parameter Sensitivity:** + - Analyze the impact of different parameter choices + - Recommend parameter ranges for further exploration + - Identify critical components in the factor construction process + + Focus on Continuous Refinement: + - Exhaust all possible variations within the current theoretical framework + - Document the effectiveness of different implementation approaches + + Please provide detailed and constructive feedback for future exploration. + Respond in JSON format. Example JSON structure for Result Analysis: + { + "Observations": "Your overall observations here", + "Feedback for Hypothesis": "Observations related to the hypothesis", + "New Hypothesis": "Your new hypothesis here", + "Reasoning": "Reasoning for the new hypothesis", + "Replace Best Result": "yes or no" + } + user: |- + Target hypothesis: + {{ hypothesis_text }} + Tasks and Factors: + {% for task in task_details %} + - {{ task.factor_name }}: {{ task.factor_description }} + - Factor Formulation: {{ task.factor_formulation }} + - Variables: {{ task.variables }} + - Factor Implementation: {{ task.factor_implementation }} + {% if task.factor_implementation == "False" %} + **Note: This factor was not implemented in the current experiment. Only the hypothesis for implemented factors can be verified.** + {% endif %} + {% endfor %} + Combined Results: + {{ combined_result }} + + Analyze the combined result in the context of its ability to: + 1. Support or refute the hypothesis. + 2. Show improvement or deterioration compared to the SOTA experiment. + + Evaluation Metrics Explanations: + Below are the financial meanings of each metric, which should be used to judge the results: + + - 1day.excess_return_without_cost.max_drawdown: Measures the maximum loss from a peak to a trough without considering transaction costs. (the smaller the better) + - 1day.excess_return_without_cost.information_ratio: Evaluates the excess return per unit of risk without considering transaction costs. (the bigger the better) + - 1day.excess_return_without_cost.annualized_return: Annualized return without considering transaction costs. (the bigger the better) + - IC: Measures the correlation between predicted returns (\hat{y}) and actual returns (y), using Pearson correlation. (the bigger the better) + + When judging the results: + 1. **Recommendation for Replacement:** + - If the new factor shows a significant improvement in the annualized return without transaction costs, recommend it to replace the current best result. + - If the annualized return and any other single metric are better than SOTA, recommend the replacement. + - Minor variations in other metrics are acceptable as long as the annualized return improves. + + Note: Only factors with 'Factor Implementation' as True are implemented and tested in this experiment. If 'Factor Implementation' is False, the hypothesis for that factor cannot be verified in this run. + + +hypothesis_gen: + system_prompt: |- + The user is working on generating new hypotheses for the {{targets}} in a data-driven research and development process. + The {{targets}} are used in the following scenario: + {{scenario}} + The user has already proposed several hypotheses and conducted evaluations on them. This information will be provided to you. + Your task is to check whether a hypothesis has already been generated. If one exists, follow it or generate an improved version. + {% if hypothesis_specification %} + To assist you in formulating new hypotheses, the user has provided some additional information: + {{hypothesis_specification}}. + **Important:** If the hypothesis_specification outlines the next steps you need to follow, ensure you adhere to those instructions. + {% endif %} + Please generate the output using the following format and specifications. Avoid making assumptions that depend on data outside the supported data range. + {{ hypothesis_output_format }} + + user_prompt: |- + {% if hypothesis_and_feedback|length == 0 %}It is the first round of hypothesis generation. The user has no hypothesis on this scenario yet. You are encouraged to propose an innovative hypothesis that diverges significantly from existing perspectives. + {% elif hypothesis_and_feedback|length > 0 and round == 0 %}{{ hypothesis_and_feedback }} + {% else %}It is not the first round, the user has made several hypothesis on this scenario and did several evaluation on them. + The former hypothesis and the corresponding feedbacks are as follows (focus on the last one & the new hypothesis that it provides and reasoning to see if you agree): + {{ hypothesis_and_feedback }} + {% endif %} + {% if RAG %} + To assist you in generating new {{targets}}, we have provided the following information: {{RAG}}. + **Note:** The provided RAG is for reference only. + You must carefully assess whether the RAG aligns with the {{targets}}. + If it does not, it should not be used. Exercise caution and make your own judgment. + {% endif %} + Also generate the relevant keys for the reasoning and the distilled knowledge that follows. For those keys, in particular for knowledge, explain in the context of the specific scenario to build up domain knowledge in the specific field rather than general knowledge. + +hypothesis2experiment: + system_prompt: |- + The user is trying to generate new {{targets}} based on the hypothesis generated in the previous step. + The {{targets}} are used in certain scenario, the scenario is as follows: + {{ scenario }} + + The user will use the {{targets}} generated to do some experiments. The user will provide this information to you: + 1. The target hypothesis you are targeting to generate {{targets}} for. + 2. The hypothesis generated in the previous steps and their corresponding feedbacks. + 3. Former proposed {{targets}} on similar hypothesis. + 4. Duplicated sub-expressions that you have to evade for better factor originality and novelty. + 5. Some additional information to help you generate new {{targets}}. + + + 1. **2-3 Factors per Generation:** + - Ensure each generation produces 2-3 factors. + - Balance simplicity and innovation to build a robust factor library. + - Note that each factor is independent. Please do NOT reference other factors within the factor expression. + + + 2.**Key Considerations in Factor Construction:** + - **Data Preprocessing and Standardization:** + - Avoid using raw prices and volumes directly due to scale differences + - Use relative changes or standardized data (e.g., RANK(), ZSCORE()) + - Convert prices to returns, e.g. `(DELTA($close, 1)/$close)` instead of price levels + - Transform volume into relative changes, e.g. `(DELTA($volume, 1)/$volume)` + + - **Time Series Processing:** + - Consider appropriate sample periods for indicators requiring historical data + - Choose suitable window sizes for moving averages SMA(), EMA(), WMA() + + - **Normalization and Stability:** + - Add small constants (e.g., 1e-8) to denominators to prevent division by zero + - Use TS_ZSCORE() for factor value standardization + - Consider SIGN() to reduce impact of extreme values + - Apply MAX(MIN(x, upper), lower) for value truncation + + - **Cross-sectional Treatment:** + - Apply RANK() or ZSCORE() for cross-sectional comparability + - Use FILTER() for outlier handling + - Ensure sufficient window length for correlation calculations + + - **Robustness Considerations:** + - Validate factor stability across multiple time windows + - Consider TS_MEDIAN() over TS_MEAN() to reduce outlier impact + - Apply moving averages to smooth high-frequency variations + + - **Flexibility Considerations:** + - Allow for a range of values or flexibility when defining factors, rather than imposing strict equality constraints. + - For example, in expression `(TS_MIN($low, 10) == DELAY(TS_MIN($low, 10), 1))`, `==` is too restrictive. + - Instead, use a range-based approach like: `(TS_MIN($low, 10) < DELAY(TS_MIN($low, 10), 1) + 1/10 * TS_STD($low, 20)) && (TS_MIN($low, 10) > DELAY(TS_MIN($low, 10), 1) - 1/10 * TS_STD($low, 20))`. + + - **Handling Duplicated Sub-expressions:** + - When given specific duplicated sub-expressions to avoid, ensure new factor expressions use alternative calculations + - Replace duplicated patterns with semantically similar but structurally different expressions + - For example, if `ABS($close - $open)` is flagged as duplicated: + - Consider using `($high - $low)` for price range + - Use `SIGN($close - $open) * ($close - $open)` for directional magnitude + - Explore other price difference combinations like `($high - $low) / ($open + $close)` + - Maintain factor interpretability while avoiding structural repetition + - Focus on unique combinations of operators and variables to ensure originality + + Please generate the output following the format below: + {{ experiment_output_format }} + + Strictly adhere to the syntax requirements of factor expressions; do not use undeclared variables (e.g., n) or functions. + + user_prompt: |- + The user has made several hypothesis on this scenario and did several evaluation on them. + The target hypothesis you are targeting to generate {{targets}} for is as follows: + {{ target_hypothesis }} + + The former hypothesis and the corresponding feedbacks are as follows: + {{ hypothesis_and_feedback }} + + When constructing factor expressions, you are restricted to utilizing only the following daily-level variable: + - $open: open price of the stock on that day. + - $close: close price of the stock on that day. + - $high: high price of the stock on that day. + - $low: low price of the stock on that day. + - $volume: volume of the stock on that day. + - $return: daily return of the stock on that day. + + Allowed operators and functions in factor expressions are: + {{function_lib_description}} + + + {% if expression_duplication %} + **Alert: Duplication Detected in Previous Factor Expressions** + {{ expression_duplication }} + + Recommendations: + - Avoid the duplicated sub-expressions above + - Generate novel factor by uniquely combining data variables and operations + - Experiment with a mix of mathematical operations (e.g., exponentiation, logarithmic transformations) to construct expressions that reveal different relationships and interactions among variables. + - Replace raw variables with transformed variants to enhance expressiveness, such as using `$open`, `$close/TS_MEAN($close, 10)`, or `($open + $close) / 2` instead of `$close` to normalize or adjust for trends. + {% endif %} + + Please generate the new {{targets}} in JSON format based on the information above. + + + +expression_duplication: |- + - Proposed Expression: {{ prev_expression }} + - Duplicated Sub-expression Size: {{ duplicated_subtree_size }} + - Duplicated Sub-expression: {{ duplicated_subtree }} + diff --git a/alphaagent/scenarios/qlib/prompts_rdagent.yaml b/alphaagent/scenarios/qlib/prompts_rdagent.yaml new file mode 100755 index 00000000..e0c4b4e2 --- /dev/null +++ b/alphaagent/scenarios/qlib/prompts_rdagent.yaml @@ -0,0 +1,266 @@ +hypothesis_and_feedback: |- + {% for hypothesis, experiment, feedback in trace.hist[-10:] %} + Hypothesis {{ loop.index }}: {{ hypothesis }} + Corresponding Code (that leads to the difference in performance): {{experiment.sub_workspace_list[0].code_dict.get("model.py")}} + Observation on the result with the hypothesis: {{ feedback.observations }} + Feedback on the original hypothesis: {{ feedback.hypothesis_evaluation }} + New Feedback for Context (For you to agree or improve upon): {{ feedback.new_hypothesis }} + Reasoning for new hypothesis: {{ feedback.reason }} + Did changing to this hypothesis work? (focus on the change): {{ feedback.decision }} + {% endfor %} + +hypothesis_output_format: |- + The output should follow JSON format, without any other content. The schema is as follows: + { + "hypothesis": "The new hypothesis generated based on the information provided.", + "reason": "The reason why you generate this hypothesis. It should be comprehensive and logical. It should cover the other keys below and extend them.", + "concise_reason": "Two-line summary. First line focuses on a concise justification for the change. Second line generalizes a knowledge statement.", + "concise_observation": "One line summary. It focuses on the observation of the given scenario, data characteristics, or previous experiences (failures & succeses).", + "concise_justification": "One line summary. Justify the hypothesis based on theoretical principles or initial assumptions.", + "concise_knowledge": "One line summary. Transferable knowledge based on theoretical principles. Use conditional grammar. eg. "If...., ..; When..., .; and etc" Make sure that you state things clearly without ambiguity. Eg. avoid saying "previous hypothesis", because one wouldn't know what that is." + } + + + +model_hypothesis_specification: |- + Additional Specifications: + + Hypotheses should grow and evolve based on the previous hypothesis. If there is no previous hypothesis, start with something simple. Gradually Build Up Upon previous hypothesis & feedbacks. In each round, hypothesis is different. Pay attention to your previous hypothesis. + + Ensure that the hypothesis focuses on the architecture of a PyTorch model. Each hypothesis should address specific architectural choices such as the type of layers, activation functions, regularization techniques, and the overall structure of the model. Avoid hypotheses related to input features or optimization processes. + + Remember: if there is no hypothesis, start with something simple like MLP. + + Usually, a larger model works better than a smaller one. + + Logic for generating a new hypothesis: If the previous hypothesis works, try to inherit from it and grow deeper. If the previous hypotheis doesn't work, try to make changes in the current level. + + Sample hypothesis evolution loop: (This is the entire loop, see what stage you are at. We want hypothesis to continue growing.) Levels include **Model Type**, **Layer Configuration**, **Activation Functions**, **Regularization Techniques** + + 1st Round Hypothesis: The model should be a CNN. + + 2nd Round Hypothesis (If first round worked: CNN is the model type level, which means that we should extend to the next level, like layer configuration): The model should be a CNN. The CNN should have 5 convolutional layers. (Reasoning: As CNN worked, we now specify the layers specification to grow the hypothesis deeper.) + + 3rd Round Hypothesis (If second round didn't work): The model should be a CNN. The CNN should have 3 convolutional layers. (Reasoning: As 5-layer structure didn't work in the 2nd round hypothesis, try something else within the layer configuration level.) + + 4th Round Hypothesis (If third round worked): The model should be a CNN. The CNN should have 3 convolutional layers. Use Leaky ReLU activation for all layers. (As last round worked, now proceed to the next level: activation functions) + + 5th Round Hypothesis (If fourth round worked): The model should be a CNN. The CNN should have 3 convolutional layers. Use Leaky ReLU activation for all layers. Use dropout regularization with a rate of 0.5. (Similar Reasoning & Continuing to Grow to the dropout setup) + + 6th Round Hypothesis (If fourth round didn't work): The model should be a CNN. The CNN should have 5 convolutional layers. Use Leaky ReLU activation for all layers. Use dropout regularization with a rate of 0.3. (Reasoning: As regularisation rate of 0.5 didn't work, we only change a new regularisation and keep the other elements that worked. This means making changes in the current level.) + +factor_hypothesis_specification: |- + 1. **Type of Factor and Financial Trends:** + - Define the type of factor introduced. + - Explain the financial trends or market behaviors indicated by this factor. + - Omit unnecessary or redundant details. + + 2. **Simple and Effective Factors First:** + - Start with factors that are simple and likely effective. + - Concisely explain why these factors are expected to work. + - Avoid complex or combined factors initially. + + 3. **Gradual Complexity Increase:** + - Introduce more complex factors as more experimental results are gathered. + - Discuss potential advantages and complexities. + - Combine factors only after simpler ones are tested and validated. + + 4. **New Directions and Optimizations:** + - If a new direction is needed, explain why based on financial principles, economic theories, or market behaviors. + - Suggest only one new direction at a time for clarity. + - If a previous hypothesis did not surpass SOTA but seems optimizable, you may continue in the same direction. + - Highlight that factors surpassing SOTA are included in the library to avoid re-implementation. + + 5. **1-2 Factors per Generation:** + - Ensure each generation produces 1-2 factors. + - Balance simplicity and complexity to build a robust factor library. + +factor_experiment_output_format: |- + The output should follow JSON format, without any other content. The schema is as follows: + { + "factor name 1": { + "description": "description of factor 1", + "formulation": "latex formulation of factor 1", + "variables": { + "variable or function name 1": "description of variable or function 1", + "variable or function name 2": "description of variable or function 2" + } + }, + "factor name 2": { + "description": "description of factor 1", + "formulation": "latex formulation of factor 2", + "variables": { + "variable or function name 1": "description of variable or function 1", + "variable or function name 2": "description of variable or function 2" + } + } + # Don't add ellipsis (...) or any filler text that might cause JSON parsing errors here! + } + +model_experiment_output_format: |- + So far please only design one model to test the hypothesis! + The output should follow JSON format. The schema is as follows: + { + "model_name 1 (The name of the model)": { + "description": "A detailed description of the model", + "formulation": "A LaTeX formula representing the model's formulation", + "architecture": "A detailed description of the model's architecture, e.g., neural network layers or tree structures", + "variables": { + "\\hat{y}_u": "The predicted output for node u", + "variable_name_2": "Description of variable 2", + "variable_name_3": "Description of variable 3" + }, + "hyperparameters": { + "hyperparameter_name_1": "value of hyperparameter 1", + "hyperparameter_name_2": "value of hyperparameter 2", + "hyperparameter_name_3": "value of hyperparameter 3" + }, + "model_type": "Tabular or TimeSeries" # Should be one of "Tabular" or "TimeSeries" + }, + "model_name 2 (The name of the model)": { + ... + } + } + Usually a larger model works better than a smaller one. Hence, the parameters should be larger. + +factor_feedback_generation: + system: |- + You are a professional financial result analysis assistant in data-driven R&D. + The task is described in the following scenario: + + {{ scenario }} + + You will receive a hypothesis, multiple tasks with their factors, their results, and the SOTA result. + Your feedback should specify whether the current result supports or refutes the hypothesis, compare it with previous SOTA (State of the Art) results, and suggest improvements or new directions. + + Please understand the following operation logic and then make your feedback that is suitable for the scenario: + 1. Logic Explanation: + - If the previous hypothesis factor surpasses the SOTA, include this factor in the SOTA factor library. + - New experiments will generate new factors, which will be combined with the factors in the SOTA library. + - These combined factors will be backtested and compared against the current SOTA to continuously iterate. + 2. Development Directions: + - New Direction: + - Propose a new factor direction for exploration and development. + - Optimization of Existing Direction: + - If the previous experiment's factor replaced the SOTA, suggest further improvements to that factor. + - Clearly specify the differences in name and improvements compared to the previous factor. + - Continued Research: + - If the previous experiment's factor did not replace the SOTA, suggest ways to optimize and develop factors in this direction. + 3. Final Goal: + - The ultimate goal is to continuously accumulate factors that surpass each iteration to maintain the best SOTA. + + When judging the results: + 1. **Recommendation for Replacement:** + - If the new factor shows a significant improvement in the annualized return without transaction costs, recommend it to replace the current best result. + - If the annualized return and any other single metric are better than SOTA, recommend the replacement. + - Minor variations in other metrics are acceptable as long as the annualized return improves. + + Consider Changing Direction for Significant Gaps with SOTA: + - If the new results significantly differ from the SOTA, consider exploring a new direction. + - Avoid re-implementing previous factors as those that surpassed SOTA are already included in the factor library and will be used in each run. + + Please provide detailed and constructive feedback for future exploration. + Respond in JSON format. Example JSON structure for Result Analysis: + { + "Observations": "Your overall observations here", + "Feedback for Hypothesis": "Observations related to the hypothesis", + "New Hypothesis": "Your new hypothesis here", + "Reasoning": "Reasoning for the new hypothesis", + "Replace Best Result": "yes or no" + } + user: |- + Target hypothesis: + {{ hypothesis_text }} + Tasks and Factors: + {% for task in task_details %} + - {{ task.factor_name }}: {{ task.factor_description }} + - Factor Formulation: {{ task.factor_formulation }} + - Variables: {{ task.variables }} + - Factor Implementation: {{ task.factor_implementation }} + {% if task.factor_implementation == "False" %} + **Note: This factor was not implemented in the current experiment. Only the hypothesis for implemented factors can be verified.** + {% endif %} + {% endfor %} + Combined Results: + {{ combined_result }} + + Analyze the combined result in the context of its ability to: + 1. Support or refute the hypothesis. + 2. Show improvement or deterioration compared to the SOTA experiment. + + Evaluation Metrics Explanations: + Below are the financial meanings of each metric, which should be used to judge the results: + + - 1day.excess_return_without_cost.max_drawdown: Measures the maximum loss from a peak to a trough without considering transaction costs. (the smaller the better) + - 1day.excess_return_without_cost.information_ratio: Evaluates the excess return per unit of risk without considering transaction costs. (the bigger the better) + - 1day.excess_return_without_cost.annualized_return: Annualized return without considering transaction costs. (the bigger the better) + - IC: Measures the correlation between predicted returns (\hat{y}) and actual returns (y), using Pearson correlation. (the bigger the better) + + When judging the results: + 1. **Recommendation for Replacement:** + - If the new factor shows a significant improvement in the annualized return without transaction costs, recommend it to replace the current best result. + - If the annualized return and any other single metric are better than SOTA, recommend the replacement. + - Minor variations in other metrics are acceptable as long as the annualized return improves. + + Consider Changing Direction for Significant Gaps with SOTA: + - If the new results significantly differ from the SOTA, consider exploring a new direction. + - Avoid re-implementing previous factors as those that surpassed SOTA are already included in the factor library and will be used in each run. + + Note: Only factors with 'Factor Implementation' as True are implemented and tested in this experiment. If 'Factor Implementation' is False, the hypothesis for that factor cannot be verified in this run. + +model_feedback_generation: + system: |- + You are a professional result analysis assistant. You will receive a result and a hypothesis. + Your task is to provide feedback on how well the result supports or refutes the hypothesis by judging from the observation of performance increase or decrease. + Please provide detailed and constructive feedback. Note that as hypothesis evolve, a general trend should be that the model grows larger. + Example JSON Structure for Result Analysis: + { + "Observations": "Your overall observations here", + "Feedback for Hypothesis": "Observations related to the hypothesis", + "New Hypothesis": "Put your new hypothesis here.", + "Reasoning": "Provide reasoning for the hypothesis here.", + "Decision": , + } + + Focus on the changes in hypothesis and justify why do hypothesis evolve like this. Also, increase complexity as the hypothesis evolves (give more layers, more neurons, and etc) + + Logic for generating a new hypothesis: If the previous hypothesis works, try to inherit from it and grow deeper. If the previous hypotheis doesn't work, try to make changes in the current level. + + Sample hypothesis evolution loop: (This is the entire loop, see what stage you are at. We want hypothesis to continue growing.) Levels include **Model Type**, **Layer Configuration**, **Activation Functions**, **Regularization Techniques** + + 1st Round Hypothesis: The model should be a CNN. + + 2nd Round Hypothesis (If first round worked: CNN is the model type level, which means that we should extend to the next level, like layer configuration): The model should be a CNN. The CNN should have 5 convolutional layers. (Reasoning: As CNN worked, we now specify the layers specification to grow the hypothesis deeper.) + + 3rd Round Hypothesis (If second round didn't work): The model should be a CNN. The CNN should have 3 convolutional layers. (Reasoning: As 5-layer structure didn't work in the 2nd round hypothesis, try something else within the layer configuration level.) + + 4th Round Hypothesis (If third round worked): The model should be a CNN. The CNN should have 3 convolutional layers. Use Leaky ReLU activation for all layers. (As last round worked, now proceed to the next level: activation functions) + + 5th Round Hypothesis (If fourth round worked): The model should be a CNN. The CNN should have 3 convolutional layers. Use Leaky ReLU activation for all layers. Use dropout regularization with a rate of 0.5. (Similar Reasoning & Continuing to Grow to the dropout setup) + + 6th Round Hypothesis (If fourth round didn't work): The model should be a CNN. The CNN should have 5 convolutional layers. Use Leaky ReLU activation for all layers. Use dropout regularization with a rate of 0.3. (Reasoning: As regularisation rate of 0.5 didn't work, we only change a new regularisation and keep the other elements that worked. This means making changes in the current level.) + + + user: |- + We are in an experiment of finding hypothesis and validating or rejecting them so that in the end we have a powerful model generated. + Here are the context: {{context}}. + + {% if last_hypothesis %} + Last Round Information: + Hypothesis: {{last_hypothesis.hypothesis}} + Task: {{last_task}} + Code Implemented: {{last_code}} + Result: {{last_result}} + {% else %} + This is the first round. No previous information available. As long as the performance is not too negative (eg.ICIR is greater than 0), treat it as successful. Do not set the threshold too high. + {% endif %} + + Now let's come to this round. You will receive the result and you will evaluate if the performance increases or decreases. + Hypothesis: {{hypothesis.hypothesis}} + Experiment Setup: {{exp.sub_tasks[0]}} + Code Implemented: {{exp.sub_workspace_list[0].code_dict.get("model.py")}} + Relevant Reasoning: {{hypothesis.reason}} + Result: {{exp.result}} + + Compare and observe. Which result has a better return and lower risk? If the performance increases, the hypothesis should be considered positive (working). + Hence, with the hypotheses, relevant reasoning, and results in mind (comparison), provide detailed and constructive feedback and suggest a new hypothesis. diff --git a/alphaagent/scenarios/qlib/proposal/factor_proposal.py b/alphaagent/scenarios/qlib/proposal/factor_proposal.py new file mode 100755 index 00000000..9655d02d --- /dev/null +++ b/alphaagent/scenarios/qlib/proposal/factor_proposal.py @@ -0,0 +1,490 @@ +import json +from pathlib import Path +from typing import List, Tuple + +from jinja2 import Environment, StrictUndefined + +from alphaagent.components.coder.factor_coder.factor import FactorExperiment, FactorTask +from alphaagent.components.proposal import FactorHypothesis2Experiment, FactorHypothesisGen +from alphaagent.core.prompts import Prompts +from alphaagent.core.proposal import Hypothesis, Scenario, Trace +from alphaagent.core.experiment import Experiment +from alphaagent.scenarios.qlib.experiment.factor_experiment import QlibFactorExperiment +from alphaagent.oai.llm_utils import APIBackend +import os +import pandas as pd +from alphaagent.log import logger +from alphaagent.scenarios.qlib.regulator.factor_regulator import FactorRegulator + +QlibFactorHypothesis = Hypothesis +alphaagent_prompt_dict = Prompts(file_path=Path(__file__).parent / "prompts_alphaagent.yaml") + +class AlphaAgentHypothesis(Hypothesis): + """ + AlphaAgentHypothesis extends the Hypothesis class to include a potential_direction, + which represents the initial idea or starting point for the hypothesis. + """ + + def __init__( + self, + hypothesis: str, + concise_observation: str, + concise_justification: str, + concise_knowledge: str, + concise_specification: str + ) -> None: + super().__init__( + hypothesis, + "", + "", + concise_observation, + concise_justification, + concise_knowledge, + ) + self.concise_specification = concise_specification + + def __str__(self) -> str: + return f"""Hypothesis: {self.hypothesis} + Concise Observation: {self.concise_observation} + Concise Justification: {self.concise_justification} + Concise Knowledge: {self.concise_knowledge} + concise Specification: {self.concise_specification} + """ + +rdagent_prompt_dict = Prompts(file_path=Path(__file__).parent.parent / "prompts_rdagent.yaml") + +class QlibFactorHypothesisGen(FactorHypothesisGen): + def __init__(self, scen: Scenario) -> Tuple[dict, bool]: + super().__init__(scen) + + def prepare_context(self, trace: Trace) -> Tuple[dict, bool]: + hypothesis_and_feedback = ( + ( + Environment(undefined=StrictUndefined) + .from_string(rdagent_prompt_dict["hypothesis_and_feedback"]) + .render(trace=trace) + ) + if len(trace.hist) > 0 + else "No previous hypothesis and feedback available since it's the first round." + ) + context_dict = { + "hypothesis_and_feedback": hypothesis_and_feedback, + "RAG": None, + "hypothesis_output_format": rdagent_prompt_dict["hypothesis_output_format"], + "hypothesis_specification": rdagent_prompt_dict["factor_hypothesis_specification"], + } + return context_dict, True + + def convert_response(self, response: str) -> Hypothesis: + response_dict = json.loads(response) + hypothesis = QlibFactorHypothesis( + hypothesis=response_dict["hypothesis"], + reason=response_dict["reason"], + concise_reason=response_dict["concise_reason"], + concise_observation=response_dict["concise_observation"], + concise_justification=response_dict["concise_justification"], + concise_knowledge=response_dict["concise_knowledge"], + ) + return hypothesis + + +class QlibFactorHypothesis2Experiment(FactorHypothesis2Experiment): + def prepare_context(self, hypothesis: Hypothesis, trace: Trace) -> Tuple[dict | bool]: + scenario = trace.scen.get_scenario_all_desc() + experiment_output_format = rdagent_prompt_dict["factor_experiment_output_format"] + + hypothesis_and_feedback = ( + ( + Environment(undefined=StrictUndefined) + .from_string(rdagent_prompt_dict["hypothesis_and_feedback"]) + .render(trace=trace) + ) + if len(trace.hist) > 0 + else "No previous hypothesis and feedback available since it's the first round." + ) + + experiment_list: List[FactorExperiment] = [t[1] for t in trace.hist] + + factor_list = [] + for experiment in experiment_list: + factor_list.extend(experiment.sub_tasks) + + return { + "target_hypothesis": str(hypothesis), + "scenario": scenario, + "hypothesis_and_feedback": hypothesis_and_feedback, + "experiment_output_format": experiment_output_format, + "target_list": factor_list, + "RAG": None, + }, True + + def convert_response(self, response: str, trace: Trace) -> FactorExperiment: + response_dict = json.loads(response) + tasks = [] + + for factor_name in response_dict: + description = response_dict[factor_name]["description"] + formulation = response_dict[factor_name]["formulation"] + # expression = response_dict[factor_name]["expression"] + variables = response_dict[factor_name]["variables"] + tasks.append( + FactorTask( + factor_name=factor_name, + factor_description=description, + factor_formulation=formulation, + # factor_expression=expression, + variables=variables, + ) + ) + + exp = QlibFactorExperiment(tasks) + exp.based_experiments = [QlibFactorExperiment(sub_tasks=[])] + [t[1] for t in trace.hist if t[2]] + + unique_tasks = [] + + for task in tasks: + duplicate = False + for based_exp in exp.based_experiments: + for sub_task in based_exp.sub_tasks: + if task.factor_name == sub_task.factor_name: + duplicate = True + break + if duplicate: + break + if not duplicate: + unique_tasks.append(task) + + exp.tasks = unique_tasks + return exp + + + +alphaagent_prompt_dict = Prompts(file_path=Path(__file__).parent.parent / "prompts_alphaagent.yaml") + +# prompt_dict不能作为属性,因为后续整个类的实例要被转为pickle,而prompt_dict不能转 +class AlphaAgentHypothesisGen(FactorHypothesisGen): + def __init__(self, scen: Scenario, potential_direction: str=None) -> Tuple[dict, bool]: + super().__init__(scen) + self.potential_direction = potential_direction + + def prepare_context(self, trace: Trace) -> Tuple[dict, bool]: + + if len(trace.hist) > 0: + hypothesis_and_feedback = ( + Environment(undefined=StrictUndefined) + .from_string(alphaagent_prompt_dict["hypothesis_and_feedback"]) + .render(trace=trace) + ) + + elif self.potential_direction is not None: + hypothesis_and_feedback = ( + Environment(undefined=StrictUndefined) + .from_string(alphaagent_prompt_dict["potential_direction_transformation"]) + .render(potential_direction=self.potential_direction) + ) # + else: + hypothesis_and_feedback = "No previous hypothesis and feedback available since it's the first round. You are encouraged to propose an innovative hypothesis that diverges significantly from existing perspectives." + + context_dict = { + "hypothesis_and_feedback": hypothesis_and_feedback, + "RAG": None, + "hypothesis_output_format": alphaagent_prompt_dict["hypothesis_output_format"], + "hypothesis_specification": alphaagent_prompt_dict["factor_hypothesis_specification"], + } + return context_dict, True + + def convert_response(self, response: str) -> AlphaAgentHypothesis: + response_dict = json.loads(response) + hypothesis = AlphaAgentHypothesis( + hypothesis=response_dict["hypothesis"], + concise_observation=response_dict["concise_observation"], + concise_knowledge=response_dict["concise_knowledge"], + concise_justification=response_dict["concise_justification"], + concise_specification=response_dict["concise_specification"], + ) + return hypothesis + + def gen(self, trace: Trace) -> AlphaAgentHypothesis: + context_dict, json_flag = self.prepare_context(trace) + system_prompt = ( + Environment(undefined=StrictUndefined) + .from_string(alphaagent_prompt_dict["hypothesis_gen"]["system_prompt"]) + .render( + targets=self.targets, + scenario=self.scen.get_scenario_all_desc(filtered_tag="hypothesis_and_experiment"), + hypothesis_output_format=context_dict["hypothesis_output_format"], + hypothesis_specification=context_dict["hypothesis_specification"], + ) + ) + user_prompt = ( + Environment(undefined=StrictUndefined) + .from_string(alphaagent_prompt_dict["hypothesis_gen"]["user_prompt"]) + .render( + targets=self.targets, + hypothesis_and_feedback=context_dict["hypothesis_and_feedback"], + RAG=context_dict["RAG"], + round=len(trace.hist) + ) + ) + + resp = APIBackend().build_messages_and_create_chat_completion(user_prompt, system_prompt, json_mode=json_flag) + + hypothesis = self.convert_response(resp) + + return hypothesis + + + +class EmptyHypothesisGen(FactorHypothesisGen): + def __init__(self, scen: Scenario) -> Tuple[dict, bool]: + super().__init__(scen) + + def convert_response(self, *args, **kwargs) -> AlphaAgentHypothesis: + return super().convert_response(*args, **kwargs) + + def prepare_context(self, *args, **kwargs) -> Tuple[dict | bool]: + return super().prepare_context(*args, **kwargs) + + def gen(self, trace: Trace) -> AlphaAgentHypothesis: + + hypothesis = AlphaAgentHypothesis( + hypothesis="", + concise_observation="", + concise_justification="", + concise_knowledge="", + concise_specification="" + ) + + return hypothesis + + + + +class AlphaAgentHypothesis2FactorExpression(FactorHypothesis2Experiment): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.factor_regulator = FactorRegulator() + + def prepare_context(self, hypothesis: Hypothesis, trace: Trace) -> Tuple[dict | bool]: + scenario = trace.scen.get_scenario_all_desc() + experiment_output_format = alphaagent_prompt_dict["factor_experiment_output_format"] + function_lib_description = alphaagent_prompt_dict['function_lib_description'] + hypothesis_and_feedback = ( + ( + Environment(undefined=StrictUndefined) + .from_string(alphaagent_prompt_dict["hypothesis_and_feedback"]) + .render(trace=trace) + ) + if len(trace.hist) > 0 + else "No previous hypothesis and feedback available since it's the first round." + ) + + experiment_list: List[FactorExperiment] = [t[1] for t in trace.hist] + + factor_list = [] + for experiment in experiment_list: + factor_list.extend(experiment.sub_tasks) + + return { + "target_hypothesis": str(hypothesis), + "scenario": scenario, + "hypothesis_and_feedback": hypothesis_and_feedback, + "function_lib_description": function_lib_description, + "experiment_output_format": experiment_output_format, + "target_list": factor_list, + "RAG": None, + }, True + + def convert(self, hypothesis: Hypothesis, trace: Trace) -> Experiment: + context, json_flag = self.prepare_context(hypothesis, trace) + system_prompt = ( + Environment(undefined=StrictUndefined) + .from_string(alphaagent_prompt_dict["hypothesis2experiment"]["system_prompt"]) + .render( + targets=self.targets, + scenario=trace.scen.background, # get_scenario_all_desc(filtered_tag="hypothesis_and_experiment"), + experiment_output_format=context["experiment_output_format"], + ) + ) + user_prompt = ( + Environment(undefined=StrictUndefined) + .from_string(alphaagent_prompt_dict["hypothesis2experiment"]["user_prompt"]) + .render( + targets=self.targets, + target_hypothesis=context["target_hypothesis"], + hypothesis_and_feedback=context["hypothesis_and_feedback"], + function_lib_description=context["function_lib_description"], + target_list=context["target_list"], + RAG=context["RAG"], + expression_duplication=None + ) + ) + + # Detect duplicated sub-expressions + flag = False + expression_duplication_prompt = None + while True: + if flag: + break + + resp = APIBackend().build_messages_and_create_chat_completion(user_prompt, system_prompt, json_mode=json_flag) + response_dict = json.loads(resp) + proposed_names = [] + proposed_exprs = [] + + for i, factor_name in enumerate(response_dict): + expr = response_dict[factor_name]["expression"] + + # Check if expression is parsable + if not self.factor_regulator.is_parsable(expr): + logger.info(f"Failed to parse expr: {expr}, retrying...") + break + + success, eval_dict = self.factor_regulator.evaluate(expr) + if not success: + break + + # If expression has problems, regenerate with feedback + if not self.factor_regulator.is_expression_acceptable(eval_dict): + if expression_duplication_prompt is not None: + expression_duplication_prompt = '\n\n'.join([expression_duplication_prompt, + (Environment(undefined=StrictUndefined) + .from_string(alphaagent_prompt_dict["expression_duplication"]) + .render( + prev_expression=expr, + duplicated_subtree_size=eval_dict['duplicated_subtree_size'], + duplicated_subtree=eval_dict['duplicated_subtree'] + ) + )]) + else: + expression_duplication_prompt = ( + Environment(undefined=StrictUndefined) + .from_string(alphaagent_prompt_dict["expression_duplication"]) + .render( + prev_expression=expr, + duplicated_subtree_size=eval_dict['duplicated_subtree_size'], + duplicated_subtree=eval_dict['duplicated_subtree'] + ) + ) + + user_prompt = ( + Environment(undefined=StrictUndefined) + .from_string(alphaagent_prompt_dict["hypothesis2experiment"]["user_prompt"]) + .render( + targets=self.targets, + target_hypothesis=context["target_hypothesis"], + hypothesis_and_feedback=context["hypothesis_and_feedback"], + function_lib_description=context["function_lib_description"], + target_list=context["target_list"], + RAG=context["RAG"], + expression_duplication=expression_duplication_prompt + ) + ) + break + else: + proposed_names.append(factor_name) + proposed_exprs.append(expr) + if i == len(response_dict) - 1: + flag = True + else: + continue + + + # Add valid factors to the factor regulator + self.factor_regulator.add_factor(proposed_names, proposed_exprs) + + + return self.convert_response(resp, trace) + + + def convert_response(self, response: str, trace: Trace) -> FactorExperiment: + response_dict = json.loads(response) + tasks = [] + + for factor_name in response_dict: + description = response_dict[factor_name]["description"] + formulation = response_dict[factor_name]["formulation"] + expression = response_dict[factor_name]["expression"] + variables = response_dict[factor_name]["variables"] + tasks.append( + FactorTask( + factor_name=factor_name, + factor_description=description, + factor_formulation=formulation, + factor_expression=expression, + variables=variables, + ) + ) + + exp = QlibFactorExperiment(tasks) + exp.based_experiments = [QlibFactorExperiment(sub_tasks=[])] + [t[1] for t in trace.hist if t[2]] + + unique_tasks = [] + + for task in tasks: + duplicate = False + for based_exp in exp.based_experiments: + for sub_task in based_exp.sub_tasks: + if task.factor_name == sub_task.factor_name: + duplicate = True + break + if duplicate: + break + if not duplicate: + unique_tasks.append(task) + + exp.tasks = unique_tasks + return exp + + + +class BacktestHypothesis2FactorExpression(FactorHypothesis2Experiment): + def __init__(self, factor_path, *args, **kwargs): + super().__init__(*args, **kwargs) + self.factor_path = factor_path + + def convert_response(self, *args, **kwargs) -> FactorExperiment: + return super().convert_response(*args, **kwargs) + + def prepare_context(self, *args, **kwargs) -> Tuple[dict | bool]: + return super().prepare_context(*args, **kwargs) + + def convert(self, hypothesis: Hypothesis, trace: Trace) -> FactorExperiment: + if os.path.exists(self.factor_path): + tasks = [] + factor_df = pd.read_csv(self.factor_path, usecols=["factor_name", "factor_expression"], index_col=None) + for index, row in factor_df.iterrows(): + tasks.append( + FactorTask( + factor_name=row["factor_name"], + factor_description="", + factor_formulation="", + factor_expression=row["factor_expression"], + variables="", + ) + ) + + exp = QlibFactorExperiment(tasks) + exp.based_experiments = [QlibFactorExperiment(sub_tasks=[])] + [t[1] for t in trace.hist if t[2]] + + unique_tasks = [] + + for task in tasks: + duplicate = False + for based_exp in exp.based_experiments: + for sub_task in based_exp.sub_tasks: + if task.factor_name == sub_task.factor_name: + duplicate = True + break + if duplicate: + break + if not duplicate: + unique_tasks.append(task) + + exp.tasks = unique_tasks + return exp + + else: + raise ValueError(f"File {self.factor_csv_path} does not exist. ") + + \ No newline at end of file diff --git a/alphaagent/scenarios/qlib/proposal/model_proposal.py b/alphaagent/scenarios/qlib/proposal/model_proposal.py new file mode 100755 index 00000000..3f4b3b4b --- /dev/null +++ b/alphaagent/scenarios/qlib/proposal/model_proposal.py @@ -0,0 +1,106 @@ +import json +from pathlib import Path +from typing import List, Tuple + +from jinja2 import Environment, StrictUndefined + +from alphaagent.components.coder.model_coder.model import ModelExperiment, ModelTask +from alphaagent.components.proposal import ModelHypothesis2Experiment, ModelHypothesisGen +from alphaagent.core.prompts import Prompts +from alphaagent.core.proposal import Hypothesis, Scenario, Trace +from alphaagent.scenarios.qlib.experiment.model_experiment import QlibModelExperiment + +prompt_dict = Prompts(file_path=Path(__file__).parent.parent / "prompts.yaml") + +QlibModelHypothesis = Hypothesis + + +class QlibModelHypothesisGen(ModelHypothesisGen): + def __init__(self, scen: Scenario) -> Tuple[dict, bool]: + super().__init__(scen) + + def prepare_context(self, trace: Trace) -> Tuple[dict, bool]: + hypothesis_and_feedback = ( + ( + Environment(undefined=StrictUndefined) + .from_string(prompt_dict["hypothesis_and_feedback"]) + .render(trace=trace) + ) + if len(trace.hist) > 0 + else "No previous hypothesis and feedback available since it's the first round." + ) + context_dict = { + "hypothesis_and_feedback": hypothesis_and_feedback, + "RAG": "In Quantitative Finance, market data could be time-series, and GRU model/LSTM model are suitable for them. Do not generate GNN model as for now.", + "hypothesis_output_format": prompt_dict["hypothesis_output_format"], + "hypothesis_specification": prompt_dict["model_hypothesis_specification"], + } + return context_dict, True + + def convert_response(self, response: str) -> Hypothesis: + response_dict = json.loads(response) + hypothesis = QlibModelHypothesis( + hypothesis=response_dict["hypothesis"], + reason=response_dict["reason"], + concise_reason=response_dict["concise_reason"], + concise_observation=response_dict["concise_observation"], + concise_justification=response_dict["concise_justification"], + concise_knowledge=response_dict["concise_knowledge"], + ) + return hypothesis + + +class QlibModelHypothesis2Experiment(ModelHypothesis2Experiment): + def prepare_context(self, hypothesis: Hypothesis, trace: Trace) -> Tuple[dict, bool]: + scenario = trace.scen.get_scenario_all_desc() + experiment_output_format = prompt_dict["model_experiment_output_format"] + + hypothesis_and_feedback = ( + ( + Environment(undefined=StrictUndefined) + .from_string(prompt_dict["hypothesis_and_feedback"]) + .render(trace=trace) + ) + if len(trace.hist) > 0 + else "No previous hypothesis and feedback available since it's the first round." + ) + + experiment_list: List[ModelExperiment] = [t[1] for t in trace.hist] + + model_list = [] + for experiment in experiment_list: + model_list.extend(experiment.sub_tasks) + + return { + "target_hypothesis": str(hypothesis), + "scenario": scenario, + "hypothesis_and_feedback": hypothesis_and_feedback, + "experiment_output_format": experiment_output_format, + "target_list": model_list, + "RAG": None, + }, True + + def convert_response(self, response: str, trace: Trace) -> ModelExperiment: + response_dict = json.loads(response) + tasks = [] + for model_name in response_dict: + description = response_dict[model_name]["description"] + formulation = response_dict[model_name]["formulation"] + architecture = response_dict[model_name]["architecture"] + variables = response_dict[model_name]["variables"] + hyperparameters = response_dict[model_name]["hyperparameters"] + model_type = response_dict[model_name]["model_type"] + tasks.append( + ModelTask( + name=model_name, + description=description, + formulation=formulation, + architecture=architecture, + variables=variables, + hyperparameters=hyperparameters, + model_type=model_type, + ) + ) + exp = QlibModelExperiment(tasks) + exp.based_experiments = [t[1] for t in trace.hist if t[2]] + return exp diff --git a/alphaagent/scenarios/qlib/proposal/prompts_alphaagent.yaml b/alphaagent/scenarios/qlib/proposal/prompts_alphaagent.yaml new file mode 100755 index 00000000..f98bde6b --- /dev/null +++ b/alphaagent/scenarios/qlib/proposal/prompts_alphaagent.yaml @@ -0,0 +1,48 @@ +hypothesis_gen: + system_prompt: |- + The user is working on generating new hypotheses for the {{targets}} in a data-driven research and development process. + The {{targets}} are used in the following scenario: + {{scenario}} + The user has already proposed several hypotheses and conducted evaluations on them. This information will be provided to you. Your task is to check whether a similar hypothesis has already been generated. + If one exists and you agree with it, feel free to use it. If you disagree, please generate an improved version. + {% if hypothesis_specification %} + To assist you in formulating new hypotheses, the user has provided some additional information: {{hypothesis_specification}}. + **Important:** If the hypothesis_specification outlines the next steps you need to follow, ensure you adhere to those instructions. + {% endif %} + Please generate the output using the following format and specifications: + {{ hypothesis_output_format }} + + user_prompt: |- + {% if hypothesis_and_feedback|length == 0 %}It is the first round of hypothesis generation. The user has no hypothesis on this scenario yet. + {% else %}It is not the first round, the user has made several hypothesis on this scenario and did several evaluation on them. + The former hypothesis and the corresponding feedbacks are as follows (focus on the last one & the new hypothesis that it provides and reasoning to see if you agree): + {{ hypothesis_and_feedback }} + {% endif %} + {% if RAG %} + To assist you in generating new {{targets}}, we have provided the following information: {{RAG}}. + **Note:** The provided RAG is for reference only. + You must carefully assess whether the RAG aligns with the {{targets}}. + If it does not, it should not be used. Exercise caution and make your own judgment. + {% endif %} + Also generate the relevant keys for the reasoning and the distilled knowledge that follows. For those keys, in particular for knowledge, explain in the context of the specific scenario to build up domain knowledge in the specific field rather than general knowledge. + +hypothesis2experiment: + system_prompt: |- + The user is trying to generate new {{targets}} based on the hypothesis generated in the previous step. + The {{targets}} are used in certain scenario, the scenario is as follows: + {{ scenario }} + The user will use the {{targets}} generated to do some experiments. The user will provide this information to you: + 1. The target hypothesis you are targeting to generate {{targets}} for. + 2. The hypothesis generated in the previous steps and their corresponding feedbacks. + 3. Former proposed {{targets}} on similar hypothesis. + 4. Some additional information to help you generate new {{targets}}. + Please generate the output following the format below: + {{ experiment_output_format }} + + user_prompt: |- + The user has made several hypothesis on this scenario and did several evaluation on them. + The target hypothesis you are targeting to generate {{targets}} for is as follows: + {{ target_hypothesis }} + The former hypothesis and the corresponding feedbacks are as follows: + {{ hypothesis_and_feedback }} + Please generate the new {{targets}} based on the information above. diff --git a/alphaagent/scenarios/qlib/regulator/factor_regulator.py b/alphaagent/scenarios/qlib/regulator/factor_regulator.py new file mode 100755 index 00000000..6e7a3034 --- /dev/null +++ b/alphaagent/scenarios/qlib/regulator/factor_regulator.py @@ -0,0 +1,185 @@ +import pandas as pd +import numpy as np +from typing import Tuple, List, Dict, Any, Optional +from alphaagent.core.evaluation import Evaluator +from alphaagent.log import logger +from alphaagent.core.scenario import Scenario +from alphaagent.components.coder.factor_coder.factor_ast import match_alphazoo, count_free_args, count_unique_vars, count_all_nodes +from alphaagent.components.coder.factor_coder.expr_parser import parse_expression + +class FactorRegulator(Evaluator): + """ + FactorRegulator class to evaluate expressions for duplication and manage the factor zoo database. + This class provides functionality to detect duplicated subtrees in factor expressions + and ensure new factors maintain appropriate originality. + """ + + def __init__(self, factor_zoo_path: str = None, duplication_threshold: int = 8): + """ + Initialize the FactorRegulator with a reference to the factor zoo. + + Args: + factor_zoo_path (str): Path to the CSV file containing the factor zoo database. + duplication_threshold (int): Threshold for duplication detection. + """ + super().__init__(None) + self.factor_zoo_path = factor_zoo_path + if factor_zoo_path: + self.alphazoo = pd.read_csv(factor_zoo_path, index_col=None) + else: + self.alphazoo = pd.DataFrame() + self.duplication_threshold = duplication_threshold + self.new_factors = [] + + + + def is_parsable(self, expression: str) -> bool: + """ + Checks if an expression can be successfully parsed. + + Args: + expression (str): The factor expression to check. + + Returns: + bool: True if the expression can be parsed, False otherwise. + """ + try: + parse_expression(expression) + return True + except Exception as e: + logger.error(f"Failed to parse expression: {expression}. Error: {str(e)}") + return False + + def evaluate(self, expression: str) -> Tuple[int, str, Optional[str]]: + """ + Evaluates an expression for duplication with existing factors in the factor zoo. + + Args: + expression (str): The factor expression to evaluate. + + Returns: + Tuple containing: + - duplicated_subtree_size (int): Size of the duplicated subtree + - duplicated_subtree (str): The duplicated subtree expression + - matched_alpha (str or None): Name of the matched alpha if available + """ + try: + # Check for duplication + duplicated_subtree_size, duplicated_subtree, matched_alpha = match_alphazoo( + expression, self.alphazoo + ) + + num_free_args = count_free_args(expression) + num_unique_vars = count_unique_vars(expression) + num_all_nodes = count_all_nodes(expression) + + logger.info(f""" + Evaluated expr: {expression} + Duplicated Size: {duplicated_subtree_size} + Duplicated Subtree: {duplicated_subtree} + # Free Args: {num_free_args} + # Unique Vars: {num_unique_vars} + """) + + eval_dict = { + "expr": expression, + "duplicated_subtree_size": duplicated_subtree_size, + "duplicated_subtree": duplicated_subtree, + "matched_alpha": matched_alpha, + "num_free_args": num_free_args, + "num_unique_vars": num_unique_vars, + "num_all_nodes": num_all_nodes + } + + return True, eval_dict + + except Exception as e: + logger.error(f"Failed to evaluate expression: {expression}. Error: {str(e)}") + return False, None + + + def is_expression_acceptable(self, eval_dict) -> bool: + """ + Determines if an expression is acceptable based on the duplication threshold, + and the ratio of num_free_args and num_unique_vars to the total number of nodes in the expression. + + Args: + eval_dict (dict): Dictionary containing evaluation results of the expression. + + Returns: + bool: True if the expression is acceptable, False otherwise. + """ + # Condition 1: Check if the duplicated subtree size is within the threshold + cond1 = eval_dict['duplicated_subtree_size'] <= self.duplication_threshold + + # Get the number of free arguments, unique variables, and total nodes + num_free_args = eval_dict['num_free_args'] + num_unique_vars = eval_dict['num_unique_vars'] + num_all_nodes = eval_dict['num_all_nodes'] + + # Avoid division by zero and invalid ratios + if num_all_nodes == 0: + logger.warning(f"Expression has no nodes: {eval_dict['expr']}") + return False + + # Calculate ratios + free_args_ratio = float(num_free_args) / float(num_all_nodes) + unique_vars_ratio = float(num_unique_vars) / float(num_all_nodes) + + # Ensure ratios are within valid range (0 <= ratio < 1) + if free_args_ratio >= 1 or unique_vars_ratio >= 1: + logger.warning(f"Invalid ratio detected: free_args_ratio={free_args_ratio}, unique_vars_ratio={unique_vars_ratio}") + return False + + # Condition 2: Ensure the ratio of num_free_args to total nodes is not too high using -log(1 - ratio) + # -log(1 - x) increases as x increases, so we set a threshold (e.g., -log(1 - 0.5) ≈ 0.693) + # This ensures the ratio is not too high (e.g., x < 0.5) + cond2 = -np.log(1 - free_args_ratio) < 0.693 # Threshold for x < 0.5 + + # Condition 3: Ensure the ratio of num_unique_vars to total nodes is not too high using -log(1 - ratio) + cond3 = -np.log(1 - unique_vars_ratio) < 0.693 # Threshold for x < 0.5 + + # The expression is acceptable if all conditions are met + return cond1 and cond2 and cond3 + + + def add_factor(self, factor_name: str, factor_expression: str) -> bool: + """ + Adds a new factor to the in-memory factor zoo if it passes the duplication check. + + Args: + factor_name (str): Name of the new factor. + factor_expression (str): Expression of the new factor. + + Returns: + bool: True if the factor was added, False otherwise. + """ + new_factor = pd.DataFrame({ + 'factor_name': factor_name, + 'factor_expression': factor_expression + }) + + self.alphazoo = pd.concat([self.alphazoo, new_factor]) + self.new_factors.append((factor_name, factor_expression)) + logger.info(f"Added new factor: {factor_name} with expression: {factor_expression}") + + def save_factor_zoo(self, output_path: Optional[str] = None) -> None: + """ + Saves the updated factor zoo to a CSV file. + + Args: + output_path (str, optional): Path to save the updated factor zoo. + If None, updates the original file. + """ + save_path = output_path if output_path else self.factor_zoo_path + self.alphazoo.to_csv(save_path, index=False) + logger.info(f"Saved updated factor zoo to {save_path}") + + def get_new_factors(self) -> List[Tuple[str, str]]: + """ + Returns the list of new factors added during this session. + + Returns: + List[Tuple[str, str]]: List of (factor_name, factor_expression) tuples. + """ + return self.new_factors \ No newline at end of file diff --git a/alphaagent/utils/__init__.py b/alphaagent/utils/__init__.py new file mode 100755 index 00000000..41a096ac --- /dev/null +++ b/alphaagent/utils/__init__.py @@ -0,0 +1,55 @@ +""" +This is some common utils functions. +it is not binding to the scenarios or framework (So it is not placed in rdagent.core.utils) +""" + +# TODO: merge the common utils in `rdagent.core.utils` into this folder +# TODO: split the utils in this module into different modules in the future. + +import importlib +import re +import sys +from types import ModuleType +from typing import Union + + +def get_module_by_module_path(module_path: Union[str, ModuleType]): + """Load module from path like a/b/c/d.py or a.b.c.d + + :param module_path: + :return: + :raises: ModuleNotFoundError + """ + if module_path is None: + raise ModuleNotFoundError("None is passed in as parameters as module_path") + + if isinstance(module_path, ModuleType): + module = module_path + else: + if module_path.endswith(".py"): + module_name = re.sub("^[^a-zA-Z_]+", "", re.sub("[^0-9a-zA-Z_]", "", module_path[:-3].replace("/", "_"))) + module_spec = importlib.util.spec_from_file_location(module_name, module_path) + module = importlib.util.module_from_spec(module_spec) + sys.modules[module_name] = module + module_spec.loader.exec_module(module) + else: + module = importlib.import_module(module_path) + return module + + +def convert2bool(value: Union[str, bool]) -> bool: + """ + Motivation: the return value of LLM is not stable. Try to convert the value into bool + """ + # TODO: if we have more similar functions, we can build a library to converting unstable LLM response to stable results. + if isinstance(value, str): + v = value.lower().strip() + if v in ["true", "yes", "ok"]: + return True + if v in ["false", "no"]: + return False + raise ValueError(f"Can not convert {value} to bool") + elif isinstance(value, bool): + return value + else: + raise ValueError(f"Unknown value type {value} to bool") diff --git a/alphaagent/utils/agent/__init__.py b/alphaagent/utils/agent/__init__.py new file mode 100755 index 00000000..e69de29b diff --git a/alphaagent/utils/agent/ret.py b/alphaagent/utils/agent/ret.py new file mode 100755 index 00000000..f1343007 --- /dev/null +++ b/alphaagent/utils/agent/ret.py @@ -0,0 +1,34 @@ +""" +The output of a agent is very important. + +We think this part can be shared. +""" + +import re +from abc import abstractclassmethod +from typing import Any + +from alphaagent.utils.agent.tpl import T + + +class AgentOut: + @abstractclassmethod + def get_spec(cls, **context: Any) -> str: + raise NotImplementedError(f"Please implement the `get_spec` method") + + @classmethod + def extract_output(cls, resp: str) -> Any: + raise resp + + +class PythonAgentOut(AgentOut): + @classmethod + def get_spec(cls): + return T(".tpl:PythonAgentOut").r() + + @classmethod + def extract_output(cls, resp: str): + match = re.search(r".*```[Pp]ython\n(.*)\n```.*", resp, re.DOTALL) + if match: + code = match.group(1) + return code diff --git a/alphaagent/utils/agent/tpl.py b/alphaagent/utils/agent/tpl.py new file mode 100755 index 00000000..0d5c02e1 --- /dev/null +++ b/alphaagent/utils/agent/tpl.py @@ -0,0 +1,69 @@ +""" +Here are some infrastruture to build a agent + +The motivation of tempalte and AgentOutput Design +""" + +import inspect +from pathlib import Path +from typing import Any + +import yaml +from jinja2 import Environment, StrictUndefined + +from alphaagent.core.utils import SingletonBaseClass + +DIRNAME = Path(__file__).absolute().resolve().parent +PROJ_PATH = DIRNAME.parent.parent + + +# class T(SingletonBaseClass): TODO: singleton does not support args now. +class RDAT: + """ + RD-Agent's Template + Use the simplest way to (C)reate a Template and (r)ender it!! + """ + + def __init__(self, uri: str): + """ + here are some uri usages + case 1) "a.b.c:x.y.z" + It will load DIRNAME/a/b/c.yaml as `yaml` and load yaml[x][y][z] + case 2) ".c:x.y.z" + It will load c.yaml in caller's (who call `T(uri)`) directory as `yaml` and load yaml[x][y][z] + + the loaded content will be saved in `self.template` + """ + # Inspect the calling stack to get the caller's directory + stack = inspect.stack() + caller_frame = stack[1] + caller_module = inspect.getmodule(caller_frame[0]) + caller_dir = Path(caller_module.__file__).parent + + # Parse the URI + path_part, yaml_path = uri.split(":") + yaml_keys = yaml_path.split(".") + + if path_part.startswith("."): + yaml_file_path = caller_dir / f"{path_part[1:].replace('.', '/')}.yaml" + else: + yaml_file_path = (PROJ_PATH / path_part.replace(".", "/")).with_suffix(".yaml") + + # Load the YAML file + with open(yaml_file_path, "r") as file: + yaml_content = yaml.safe_load(file) + + # Traverse the YAML content to get the desired template + for key in yaml_keys: + yaml_content = yaml_content[key] + + self.template = yaml_content + + def r(self, **context: Any): + """ + Render the template with the given context. + """ + return Environment(undefined=StrictUndefined).from_string(self.template).render(**context) + + +T = RDAT # shortcuts diff --git a/alphaagent/utils/agent/tpl.yaml b/alphaagent/utils/agent/tpl.yaml new file mode 100755 index 00000000..2b41013a --- /dev/null +++ b/alphaagent/utils/agent/tpl.yaml @@ -0,0 +1,6 @@ +PythonAgentOut: |- + The return code should be like + ```Python + + ``` + diff --git a/alphaagent/utils/env.py b/alphaagent/utils/env.py new file mode 100755 index 00000000..2ae7a511 --- /dev/null +++ b/alphaagent/utils/env.py @@ -0,0 +1,525 @@ +""" +The motiviation of the utils is for environment management + +Tries to create uniform environment for the agent to run; +- All the code and data is expected included in one folder +""" + +# TODO: move the scenario specific docker env into other folders. + +import json +import os +import pickle +import subprocess +import uuid +from abc import abstractmethod +from pathlib import Path +from typing import Generic, Optional, TypeVar + +import docker +import docker.models +import docker.models.containers +from pydantic import BaseModel +from rich import print +from rich.console import Console +from rich.progress import Progress, SpinnerColumn, TextColumn +from rich.rule import Rule +from rich.table import Table + +from alphaagent.core.conf import ExtendedBaseSettings, ExtendedSettingsConfigDict +from alphaagent.log import logger + +ASpecificBaseModel = TypeVar("ASpecificBaseModel", bound=BaseModel) + + +class Env(Generic[ASpecificBaseModel]): + """ + We use BaseModel as the setting due to the features it provides + - It provides base typing and checking features. + - loading and dumping the information will be easier: for example, we can use package like `pydantic-yaml` + """ + + conf: ASpecificBaseModel # different env have different conf. + + def __init__(self, conf: ASpecificBaseModel): + self.conf = conf + + @abstractmethod + def prepare(self): + """ + Prepare for the environment based on it's configure + """ + + @abstractmethod + def run(self, entry: str | None, local_path: str | None = None, env: dict | None = None) -> str: + """ + Run the folder under the environment. + + Parameters + ---------- + entry : str | None + We may we the entry point when we run it. + For example, we may have different entries when we run and summarize the project. + local_path : str | None + the local path (to project, mainly for code) will be mounted into the docker + Here are some examples for a None local path + - for example, run docker for updating the data in the extra_volumes. + - simply run the image. The results are produced by output or network + env : dict | None + Run the code with your specific environment. + + Returns + ------- + the stdout + """ + + +## Local Environment ----- + + +class LocalConf(BaseModel): + py_bin: str + default_entry: str + + +class LocalEnv(Env[LocalConf]): + """ + Sometimes local environment may be more convinient for testing + """ + + def prepare(self): + if not (Path("~/.qlib/qlib_data/cn_data").expanduser().resolve().exists()): + self.run( + entry="python -m qlib.run.get_data qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn", + ) + else: + print("Data already exists. Download skipped.") + + def run(self, entry: str | None = None, local_path: Optional[str] = None, env: dict | None = None) -> str: + if env is None: + env = {} + + if entry is None: + entry = self.conf.default_entry + + command = str(Path(self.conf.py_bin).joinpath(entry)).split(" ") + + cwd = None + if local_path: + cwd = Path(local_path).resolve() + result = subprocess.run(command, cwd=cwd, env={**os.environ, **env}, capture_output=True, text=True) + + if result.returncode != 0: + raise RuntimeError(f"Error while running the command: {result.stderr}") + + return result.stdout + + +class QlibLocalEnv(LocalEnv): + """本地运行Qlib环境,替代Docker容器""" + + def __init__(self): + conf = LocalConf( + py_bin="python", + default_entry="qrun conf.yaml" + ) + super().__init__(conf) + + def prepare(self): + """确保本地环境已准备就绪""" + logger.info("Use local environment to run Qlib backtest") + # 确保Qlib数据目录存在 + qlib_data_path = Path("~/.qlib/qlib_data/cn_data").expanduser() + if not qlib_data_path.exists(): + logger.warning(f"Qlib数据目录不存在: {qlib_data_path},请确保已下载数据") + + def run( + self, + entry: str | None = None, + local_path: Optional[str] = None, + env: dict | None = None, + **kwargs + ) -> str: + """在本地运行命令""" + if env is None: + env = {} + + if entry is None: + entry = self.conf.default_entry + + # 记录运行信息 + table = Table(title="Local Run Info", show_header=False) + table.add_column("Key", style="bold cyan") + table.add_column("Value", style="bold magenta") + table.add_row("Entry", entry) + table.add_row("Working Directory", local_path) + table.add_row("Environment Variables", "\n".join(f"{k}:{v}" for k, v in env.items())) + print(table) + + # 分割命令 + command = entry.split() + + # 设置工作目录 + cwd = None + if local_path: + cwd = Path(local_path).resolve() + + print(Rule("[bold green]开始本地执行[/bold green]", style="dark_orange")) + + # 运行命令 + result = subprocess.run( + command, + cwd=cwd, + env={**os.environ, **env}, + capture_output=True, + text=True + ) + + # 输出结果 + output = result.stdout + print(output) + + if result.stderr: + print(f"[bold red]错误输出:[/bold red] {result.stderr}") + + print(Rule("[bold green]本地执行结束[/bold green]", style="dark_orange")) + + if result.returncode != 0: + logger.error(f"命令执行失败: {result.stderr}") + + return output + + +## Docker Environment ----- + + +class DockerConf(ExtendedBaseSettings): + build_from_dockerfile: bool = False + dockerfile_folder_path: Optional[Path] = ( + None # the path to the dockerfile optional path provided when build_from_dockerfile is False + ) + image: str # the image you want to build + mount_path: str # the path in the docker image to mount the folder + default_entry: str # the entry point of the image + + extra_volumes: dict | None = {} + # Sometime, we need maintain some extra data for the workspace. + # And the extra data may be shared and the downloading can be time consuming. + # So we just want to download it once. + network: str | None = "host" # the network mode for the docker, none + shm_size: str | None = None + enable_gpu: bool = True # because we will automatically disable GPU if not available. So we enable it by default. + mem_limit: str | None = "48g" # Add memory limit attribute + + running_timeout_period: int = 3600 # 1 hour + + +class QlibDockerConf(DockerConf): + model_config = ExtendedSettingsConfigDict(env_prefix="QLIB_DOCKER_") + + build_from_dockerfile: bool = True + dockerfile_folder_path: Path = Path(__file__).parent.parent / "scenarios" / "qlib" / "docker" + image: str = "local_qlib:latest" + mount_path: str = "/workspace/qlib_workspace/" + default_entry: str = "qrun conf.yaml" + extra_volumes: dict = {Path("~/.qlib/").expanduser().resolve(): "/root/.qlib/"} + shm_size: str | None = "16g" + enable_gpu: bool = True + + +class DMDockerConf(DockerConf): + model_config = ExtendedSettingsConfigDict(env_prefix="DM_DOCKER_") + + build_from_dockerfile: bool = True + dockerfile_folder_path: Path = Path(__file__).parent.parent / "scenarios" / "data_mining" / "docker" + image: str = "local_dm:latest" + mount_path: str = "/workspace/dm_workspace/" + default_entry: str = "python train.py" + extra_volumes: dict = { + Path("~/.rdagent/.data/physionet.org/files/mimic-eicu-fiddle-feature/1.0.0/FIDDLE_mimic3/") + .expanduser() + .resolve(): "/root/.data/" + } + shm_size: str | None = "16g" + + +class KGDockerConf(DockerConf): + model_config = ExtendedSettingsConfigDict(env_prefix="KG_DOCKER_") + + build_from_dockerfile: bool = True + dockerfile_folder_path: Path = Path(__file__).parent.parent / "scenarios" / "kaggle" / "docker" / "kaggle_docker" + image: str = "local_kg:latest" + # image: str = "gcr.io/kaggle-gpu-images/python:latest" + mount_path: str = "/workspace/kg_workspace/" + default_entry: str = "python train.py" + # extra_volumes: dict = { + # # TODO connect to the place where the data is stored + # Path("git_ignore_folder/data").resolve(): "/root/.data/" + # } + + running_timeout_period: int = 600 + mem_limit: str | None = ( + "48g" # Add memory limit attribute # new-york-city-taxi-fare-prediction may need more memory + ) + + +class MLEBDockerConf(DockerConf): + model_config = ExtendedSettingsConfigDict(env_prefix="MLEB_DOCKER_") + + build_from_dockerfile: bool = True + dockerfile_folder_path: Path = Path(__file__).parent.parent / "scenarios" / "kaggle" / "docker" / "mle_bench_docker" + image: str = "local_mle:latest" + # image: str = "gcr.io/kaggle-gpu-images/python:latest" + mount_path: str = "/workspace/data_folder/" + default_entry: str = "mlebench prepare --all" + # extra_volumes: dict = { + # # TODO connect to the place where the data is stored + # Path("git_ignore_folder/data").resolve(): "/root/.data/" + # } + mem_limit: str | None = ( + "48g" # Add memory limit attribute # new-york-city-taxi-fare-prediction may need more memory + ) + + +# physionet.org/files/mimic-eicu-fiddle-feature/1.0.0/FIDDLE_mimic3 +class DockerEnv(Env[DockerConf]): + # TODO: Save the output into a specific file + + def prepare(self): + """ + Download image if it doesn't exist + """ + client = docker.from_env() + if self.conf.build_from_dockerfile and self.conf.dockerfile_folder_path.exists(): + logger.info(f"Building the image from dockerfile: {self.conf.dockerfile_folder_path}") + resp_stream = client.api.build( + path=str(self.conf.dockerfile_folder_path), tag=self.conf.image, network_mode=self.conf.network + ) + if isinstance(resp_stream, str): + logger.info(resp_stream) + with Progress(SpinnerColumn(), TextColumn("{task.description}")) as p: + task = p.add_task("[cyan]Building image...") + for part in resp_stream: + lines = part.decode("utf-8").split("\r\n") + for line in lines: + if line.strip(): + status_dict = json.loads(line) + if "error" in status_dict: + p.update(task, description=f"[red]error: {status_dict['error']}") + raise docker.errors.BuildError(status_dict["error"], "") + if "stream" in status_dict: + p.update(task, description=status_dict["stream"]) + logger.info(f"Finished building the image from dockerfile: {self.conf.dockerfile_folder_path}") + try: + client.images.get(self.conf.image) + except docker.errors.ImageNotFound: + image_pull = client.api.pull(self.conf.image, stream=True, decode=True) + current_status = "" + layer_set = set() + completed_layers = 0 + with Progress(TextColumn("{task.description}"), TextColumn("{task.fields[progress]}")) as sp: + main_task = sp.add_task("[cyan]Pulling image...", progress="") + status_task = sp.add_task("[bright_magenta]layer status", progress="") + for line in image_pull: + if "error" in line: + sp.update(status_task, description=f"[red]error", progress=line["error"]) + raise docker.errors.APIError(line["error"]) + + layer_id = line["id"] + status = line["status"] + p_text = line.get("progress", None) + + if layer_id not in layer_set: + layer_set.add(layer_id) + + if p_text: + current_status = p_text + + if status == "Pull complete" or status == "Already exists": + completed_layers += 1 + + sp.update(main_task, progress=f"[green]{completed_layers}[white]/{len(layer_set)} layers completed") + sp.update( + status_task, + description=f"[bright_magenta]layer {layer_id} [yellow]{status}", + progress=current_status, + ) + except docker.errors.APIError as e: + raise RuntimeError(f"Error while pulling the image: {e}") + + def _gpu_kwargs(self, client): + """get gpu kwargs based on its availability""" + if not self.conf.enable_gpu: + return {} + gpu_kwargs = { + "device_requests": ( + [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if self.conf.enable_gpu else None + ), + } + try: + client.containers.run(self.conf.image, "nvidia-smi", **gpu_kwargs) + logger.info("GPU Devices are available.") + except docker.errors.APIError: + return {} + return gpu_kwargs + + def __run( + self, + entry: str | None = None, + local_path: str | None = None, + env: dict | None = None, + running_extra_volume: dict | None = None, + ) -> str: + if env is None: + env = {} + client = docker.from_env() + # import pdb; pdb.set_trace() + volumns = {} + if local_path is not None: + local_path = os.path.abspath(local_path) + volumns[local_path] = {"bind": self.conf.mount_path, "mode": "rw"} + if self.conf.extra_volumes is not None: + for lp, rp in self.conf.extra_volumes.items(): + volumns[lp] = {"bind": rp, "mode": "rw"} + if running_extra_volume is not None: + for lp, rp in running_extra_volume.items(): + volumns[lp] = {"bind": rp, "mode": "rw"} + + log_output = "" + + try: + container: docker.models.containers.Container = client.containers.run( + image=self.conf.image, + command=entry, + volumes=volumns, + environment=env, + detach=True, + working_dir=self.conf.mount_path, + # auto_remove=True, # remove too fast might cause the logs not to be get + network=self.conf.network, + shm_size=self.conf.shm_size, + mem_limit=self.conf.mem_limit, # Set memory limit + **self._gpu_kwargs(client), + ) + logs = container.logs(stream=True) + print(Rule("[bold green]Docker Logs Begin[/bold green]", style="dark_orange")) + table = Table(title="Run Info", show_header=False) + table.add_column("Key", style="bold cyan") + table.add_column("Value", style="bold magenta") + table.add_row("Image", self.conf.image) + table.add_row("Container ID", container.id) + table.add_row("Container Name", container.name) + table.add_row("Entry", entry) + table.add_row("Env", "\n".join(f"{k}:{v}" for k, v in env.items())) + table.add_row("Volumns", "\n".join(f"{k}:{v}" for k, v in volumns.items())) + print(table) + for log in logs: + decoded_log = log.strip().decode() + Console().print(decoded_log, markup=False) + log_output += decoded_log + "\n" + print(Rule("[bold green]Docker Logs End[/bold green]", style="dark_orange")) + container.wait() + container.stop() + container.remove() + return log_output + except docker.errors.ContainerError as e: + raise RuntimeError(f"Error while running the container: {e}") + except docker.errors.ImageNotFound: + raise RuntimeError("Docker image not found.") + except docker.errors.APIError as e: + raise RuntimeError(f"Error while running the container: {e}") + + def run( + self, + entry: str | None = None, + local_path: str | None = None, + env: dict | None = None, + running_extra_volume: dict | None = None, + ): + if entry is None: + entry = self.conf.default_entry + entry_add_timeout = f"timeout {self.conf.running_timeout_period} {entry}" + return self.__run(entry_add_timeout, local_path, env, running_extra_volume) + + def dump_python_code_run_and_get_results( + self, + code: str, + dump_file_names: list[str], + local_path: str | None = None, + env: dict | None = None, + running_extra_volume: dict | None = None, + code_dump_file_py_name: Optional[str] = None, + ): + """ + Dump the code into the local path and run the code. + """ + random_file_name = f"{uuid.uuid4()}.py" if code_dump_file_py_name is None else f"{code_dump_file_py_name}.py" + with open(os.path.join(local_path, random_file_name), "w") as f: + f.write(code) + entry = f"python {random_file_name}" + log_output = self.run(entry, local_path, env, running_extra_volume=running_extra_volume) + results = [] + os.remove(os.path.join(local_path, random_file_name)) + for name in dump_file_names: + if os.path.exists(os.path.join(local_path, f"{name}")): + results.append(pickle.load(open(os.path.join(local_path, f"{name}"), "rb"))) + os.remove(os.path.join(local_path, f"{name}")) + else: + return log_output, None + return log_output, results + + +class QTDockerEnv(DockerEnv): + """Qlib运行环境,可选择Docker或本地环境""" + + def __init__(self, conf: DockerConf = QlibDockerConf(), is_local=False): + self.is_local = is_local + if is_local: + self.env = QlibLocalEnv() + else: + self.env = DockerEnv(conf) + + def prepare(self): + """准备环境""" + self.env.prepare() + + def run(self, local_path=None, entry=None, env=None, running_extra_volume=None): + """运行命令""" + return self.env.run(entry=entry, local_path=local_path, env=env, + running_extra_volume=running_extra_volume if not self.is_local else None) + + +class DMDockerEnv(DockerEnv): + """Qlib Torch Docker""" + + def __init__(self, conf: DockerConf = DMDockerConf()): + super().__init__(conf) + + def prepare(self, username: str, password: str): + """ + Download image & data if it doesn't exist + """ + super().prepare() + data_path = next(iter(self.conf.extra_volumes.keys())) + if not (Path(data_path)).exists(): + logger.info("We are downloading!") + cmd = "wget -r -N -c -np --user={} --password={} -P ~/.rdagent/.data/ https://physionet.org/files/mimic-eicu-fiddle-feature/1.0.0/".format( + username, password + ) + os.system(cmd) + else: + logger.info("Data already exists. Download skipped.") + + +class KGDockerEnv(DockerEnv): + """Kaggle Competition Docker""" + + def __init__(self, competition: str = None, conf: DockerConf = KGDockerConf()): + super().__init__(conf) + + +class MLEBDockerEnv(DockerEnv): + """MLEBench Docker""" + + def __init__(self, conf: DockerConf = MLEBDockerConf()): + super().__init__(conf) diff --git a/alphaagent/utils/repo/README.md b/alphaagent/utils/repo/README.md new file mode 100755 index 00000000..c88503dd --- /dev/null +++ b/alphaagent/utils/repo/README.md @@ -0,0 +1,112 @@ +# RepoAnalyzer + +RepoAnalyzer is a Python utility for analyzing and summarizing the contents of a Python repository. It provides a high-level overview of the repository structure, including a tree-like representation of the directory structure and details about files, classes, and functions. + +## Features + +- Generate a tree-like structure of the repository +- Summarize an entire repository +- Adjust verbosity levels for summaries +- Extract content from specific files +- Analyze Python files for classes and functions + + +## Usage + +### Basic Usage + +```python +from repo_utils import RepoAnalyzer + +# Initialize the RepoAnalyzer with the path to your repository +repo_analyzer = RepoAnalyzer("/path/to/your/repo") + +# Generate a summary of the repository +summary = repo_analyzer.summarize_repo() +print(summary) + +# Extract content from specific files +highlighted_content = repo_analyzer.highlight(["file1.py", "file2.py"]) +print(highlighted_content) +``` + +### Adjusting Verbosity Levels + +You can adjust the verbosity of the summary using the following parameters: + +- `verbose_level`: Controls the overall detail level of the summary + - 0: Minimal (file names only) + - 1: Default (file info, class names, function names) + - 2+: Detailed (includes method details within classes) +- `doc_str_level`: Controls the inclusion of docstrings (0-2) +- `sign_level`: Controls the inclusion of function signatures (0-2) + +Example: + +```python +detailed_summary = repo_analyzer.summarize_repo(verbose_level=2, doc_str_level=1, sign_level=1) +print(detailed_summary) +``` + +## Example Output + +### Repository Summary + +``` +Workspace Summary for my_project +======================================== + +Repository Structure: +my_project/ +├── main.py +├── utils/ +│ ├── helper.py +│ └── config.py +├── models/ +│ ├── model_a.py +│ └── model_b.py + +This workspace contains 5 Python files. + +File 1 of 5: +File: main.py +---------------------------------------- +This file contains 1 class and 2 top-level functions. + +Class: MainApp + Description: Main application class for the project. + This class has 3 methods. + +Function: setup_logging + Accepts parameters: log_level + Purpose: Configure the logging for the application. + +Function: main + Purpose: Entry point of the application. + +... +``` + +### File Highlight + +```python +highlighted_content = repo_analyzer.highlight(["main.py"]) +print(highlighted_content["main.py"]) +``` + +This will print the entire content of the `main.py` file. + +## Key Components + +### RepoAnalyzer Class + +The main class that provides the functionality for analyzing repositories. + +#### Methods: + +- `summarize_repo(verbose_level=1, doc_str_level=1, sign_level=1)`: Generates a comprehensive summary of the repository, including a tree-like structure. +- `highlight(file_names)`: Extracts and returns the content of specified files. + +### Tree-like Structure + +The summary now includes a visual representation of the repository's directory structure, making it easier to understand the overall organization of the project. \ No newline at end of file diff --git a/alphaagent/utils/repo/repo_utils.py b/alphaagent/utils/repo/repo_utils.py new file mode 100755 index 00000000..9523be14 --- /dev/null +++ b/alphaagent/utils/repo/repo_utils.py @@ -0,0 +1,161 @@ +import ast +import inspect +import os +from pathlib import Path +from typing import Dict, List, Union + + +class RepoAnalyzer: + def __init__(self, repo_path: str): + self.repo_path = Path(repo_path) + self.summaries = {} + + def summarize_repo(self, verbose_level: int = 1, doc_str_level: int = 1, sign_level: int = 1) -> str: + """ + Generate a natural language summary of the entire repository workspace. + + :param verbose_level: Level of verbosity for the summary (0-2) + :param doc_str_level: Level of detail for docstrings (0-2) + :param sign_level: Level of detail for function signatures (0-2) + :return: A string containing the workspace summary + """ + file_summaries = [] + tree_structure = self._generate_tree_structure() + + for root, _, files in os.walk(self.repo_path): + for file in files: + if file.endswith(".py"): + file_path = Path(root) / file + relative_path = file_path.relative_to(self.repo_path) + file_summaries.append(self._summarize_file(file_path, verbose_level, doc_str_level, sign_level)) + + total_files = len(file_summaries) + workspace_summary = f"Workspace Summary for {self.repo_path.name}\n" + workspace_summary += f"{'=' * 40}\n\n" + workspace_summary += "Workspace Structure:\n" + workspace_summary += tree_structure + workspace_summary += ( + f"\nThis workspace contains {total_files} Python file{'s' if total_files != 1 else ''}.\n\n" + ) + + for i, summary in enumerate(file_summaries, 1): + workspace_summary += f"File {i} of {total_files}:\n{summary}\n" + + workspace_summary += f"\nEnd of Workspace Summary for {self.repo_path.name}" + return workspace_summary + + def _generate_tree_structure(self) -> str: + """ + Generate a tree-like structure of the repository. + """ + tree = [] + for root, dirs, files in os.walk(self.repo_path): + level = root.replace(str(self.repo_path), "").count(os.sep) + indent = "│ " * (level - 1) + "├── " if level > 0 else "" + rel_path = os.path.relpath(root, self.repo_path) + tree.append(f"{indent}{os.path.basename(root)}/") + + subindent = "│ " * level + "├── " + for file in files: + if file.endswith(".py"): + tree.append(f"{subindent}{file}") + + return "\n".join(tree) + + def _summarize_file(self, file_path: Path, verbose_level: int, doc_str_level: int, sign_level: int) -> str: + with open(file_path, "r") as f: + content = f.read() + + tree = ast.parse(content) + summary = f"File: {file_path.relative_to(self.repo_path)}\n" + summary += f"{'-' * 40}\n" + + classes = [node for node in ast.iter_child_nodes(tree) if isinstance(node, ast.ClassDef)] + functions = [node for node in ast.iter_child_nodes(tree) if isinstance(node, ast.FunctionDef)] + + if classes: + summary += f"This file contains {len(classes)} class{'es' if len(classes) > 1 else ''}.\n" + if functions: + summary += f"This file contains {len(functions)} top-level function{'s' if len(functions) > 1 else ''}.\n" + + for node in classes + functions: + if isinstance(node, ast.ClassDef): + summary += self._summarize_class(node, verbose_level, doc_str_level, sign_level) + elif isinstance(node, ast.FunctionDef): + summary += self._summarize_function(node, verbose_level, doc_str_level, sign_level) + + return summary + + def _summarize_class(self, node: ast.ClassDef, verbose_level: int, doc_str_level: int, sign_level: int) -> str: + summary = f"\nClass: {node.name}\n" + if doc_str_level > 0 and ast.get_docstring(node): + summary += f" Description: {ast.get_docstring(node).split('.')[0]}.\n" + + methods = [n for n in node.body if isinstance(n, ast.FunctionDef)] + if methods: + summary += f" This class has {len(methods)} method{'s' if len(methods) > 1 else ''}.\n" + + if verbose_level > 1: + for method in methods: + summary += self._summarize_function(method, verbose_level, doc_str_level, sign_level, indent=" ") + return summary + + def _summarize_function( + self, node: ast.FunctionDef, verbose_level: int, doc_str_level: int, sign_level: int, indent: str = "" + ) -> str: + summary = f"{indent}Function: {node.name}\n" + if sign_level > 0: + # Generate the function signature + args = [] + for arg in node.args.args: + arg_str = arg.arg + if arg.annotation: + arg_str += f": {ast.unparse(arg.annotation)}" + args.append(arg_str) + + if node.args.vararg: + args.append(f"*{node.args.vararg.arg}") + if node.args.kwarg: + args.append(f"**{node.args.kwarg.arg}") + + returns = f" -> {ast.unparse(node.returns)}" if node.returns else "" + signature = f"{node.name}({', '.join(args)}){returns}" + summary += f"{indent} Signature: {signature}\n" + + if doc_str_level > 0 and ast.get_docstring(node): + doc = ast.get_docstring(node) + summary += f"{indent} Purpose: {doc.split('.')[0]}.\n" + return summary + + def highlight(self, file_names: Union[str, List[str]]) -> Dict[str, str]: + """ + Extract content from specified file(s) within the repo. + + :param file_names: A single file name or a list of file names to highlight + :return: Dictionary of file names and their content + """ + if isinstance(file_names, str): + file_names = [file_names] + + highlighted_content = {} + for file_name in file_names: + file_path = self.repo_path / file_name + if file_path.exists() and file_path.is_file(): + with open(file_path, "r") as f: + highlighted_content[file_name] = f.read() + else: + highlighted_content[file_name] = f"File not found: {file_name}" + + return highlighted_content + + +if __name__ == "__main__": + analyzer = RepoAnalyzer(repo_path="features") + summary = analyzer.summarize_repo(verbose_level=2, doc_str_level=2, sign_level=2) + print(summary) + highlighted_files = analyzer.highlight( + file_names=["utils/repo/repo_utils.py", "components/benchmark/eval_method.py"] + ) + print("\nHighlighted Files:") + for file_name, content in highlighted_files.items(): + print(f"\n{file_name}\n{'=' * len(file_name)}\n{content}") diff --git a/alphaagent/utils/workflow.py b/alphaagent/utils/workflow.py new file mode 100755 index 00000000..4681611d --- /dev/null +++ b/alphaagent/utils/workflow.py @@ -0,0 +1,161 @@ +""" +This is a class that try to store/resume/traceback the workflow session + + +Postscripts: +- Originally, I want to implement it in a more general way with python generator. + However, Python generator is not picklable (dill does not support pickle as well) + +""" + +import datetime +import pickle +from collections import defaultdict +from dataclasses import dataclass, field +from pathlib import Path +from typing import Callable + +from tqdm.auto import tqdm + +from alphaagent.core.exception import CoderError +from alphaagent.log import logger +import threading + +class LoopMeta(type): + @staticmethod + def _get_steps(bases): + """ + Recursively get all the `steps` from the base classes and combine them into a single list. + + Args: + bases (tuple): A tuple of base classes. + + Returns: + List[Callable]: A list of steps combined from all base classes. + """ + # import pdb; pdb.set_trace() + steps = [] + for base in bases: + for step in LoopMeta._get_steps(base.__bases__) + getattr(base, "steps", []): + if step not in steps: + steps.append(step) + return steps + + def __new__(cls, clsname, bases, attrs): + """ + Create a new class with combined steps from base classes and current class. + + Args: + clsname (str): Name of the new class. + bases (tuple): Base classes. + attrs (dict): Attributes of the new class. + + Returns: + LoopMeta: A new instance of LoopMeta. + """ + steps = LoopMeta._get_steps(bases) # all the base classes of parents + for name, attr in attrs.items(): + if not name.startswith("__") and isinstance(attr, Callable): + if name not in steps: + # NOTE: if we override the step in the subclass + # Then it is not the new step. So we skip it. + steps.append(name) + attrs["steps"] = steps + return super().__new__(cls, clsname, bases, attrs) + + +@dataclass +class LoopTrace: + start: datetime.datetime # the start time of the trace + end: datetime.datetime # the end time of the trace + # TODO: more information about the trace + + +class LoopBase: + steps: list[Callable] # a list of steps to work on + loop_trace: dict[int, list[LoopTrace]] + + skip_loop_error: tuple[Exception] = field( + default_factory=tuple + ) # you can define a list of error that will skip current loop + + def __init__(self): + self.loop_idx = 0 # current loop index + self.step_idx = 0 # the index of next step to be run + self.loop_prev_out = {} # the step results of current loop + self.loop_trace = defaultdict(list[LoopTrace]) # the key is the number of loop + self.session_folder = logger.log_trace_path / "__session__" + + def run(self, step_n: int | None = None, stop_event: threading.Event = None): + """ + + Parameters + ---------- + step_n : int | None + How many steps to run; + `None` indicates to run forever until error or KeyboardInterrupt + """ + with tqdm(total=len(self.steps), desc="Workflow Progress", unit="step") as pbar: + while True: + if step_n is not None: + if step_n <= 0: + break + step_n -= 1 + + li, si = self.loop_idx, self.step_idx + + start = datetime.datetime.now(datetime.timezone.utc) + + name = self.steps[si] + func = getattr(self, name) + try: + self.loop_prev_out[name] = func(self.loop_prev_out) + + # TODO: Fix the error logger.exception(f"Skip loop {li} due to {e}") + except self.skip_loop_error as e: + logger.warning(f"Skip loop {li} due to {e}") + self.loop_idx += 1 + self.step_idx = 0 + continue + except CoderError as e: + logger.warning(f"Traceback loop {li} due to {e}") + self.step_idx = 0 + continue + + end = datetime.datetime.now(datetime.timezone.utc) + + self.loop_trace[li].append(LoopTrace(start, end)) + + # Update tqdm progress bar + pbar.set_postfix(loop_index=li, step_index=si, step_name=name) + pbar.update(1) + + # index increase and save session + self.step_idx = (self.step_idx + 1) % len(self.steps) + if self.step_idx == 0: # reset to step 0 in next round + self.loop_idx += 1 + self.loop_prev_out = {} + pbar.reset() # reset the progress bar for the next loop + self.dump(self.session_folder / f"{li}" / f"{si}_{name}") # save a snapshot after the session + + if stop_event is not None and stop_event.is_set(): + # break + raise Exception("Mining stopped by user") + + + def dump(self, path: str | Path): + path = Path(path) + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("wb") as f: + pickle.dump(self, f) + + @classmethod + def load(cls, path: str | Path): + path = Path(path) + with path.open("rb") as f: + session = pickle.load(f) + logger.set_trace_path(session.session_folder.parent) + + max_loop = max(session.loop_trace.keys()) + logger.storage.truncate(time=session.loop_trace[max_loop][-1].end) + return session diff --git a/artifacts/factorzoo/stock_1d/expressions/amihud_log_30d_winsor.dsl b/artifacts/factorzoo/stock_1d/expressions/amihud_log_30d_winsor.dsl deleted file mode 100644 index d2365403..00000000 --- a/artifacts/factorzoo/stock_1d/expressions/amihud_log_30d_winsor.dsl +++ /dev/null @@ -1,5 +0,0 @@ -amihud = DIVIDE(ABS($ret), ADD($amount, 1e-12)) -amihud_win = CS_WINSORIZE(amihud, 0.01, 0.99) -amihud_log = LOG(ADD(amihud_win, 1e-12)) -amihud_smooth = TS_MEAN(amihud_log, 30) -CS_ZSCORE(amihud_smooth) diff --git a/artifacts/factorzoo/stock_1d/expressions/bps_leverage_to_price_ind_neutral.dsl b/artifacts/factorzoo/stock_1d/expressions/bps_leverage_to_price_ind_neutral.dsl deleted file mode 100644 index ee915ff8..00000000 --- a/artifacts/factorzoo/stock_1d/expressions/bps_leverage_to_price_ind_neutral.dsl +++ /dev/null @@ -1,4 +0,0 @@ -bps_lev = MULTIPLY($funda_bps, $funda_debt_to_assets) -bps_lev_ratio = DIVIDE(bps_lev, $adj_close) -bps_lev_w = CS_WINSORIZE(bps_lev_ratio, 0.02, 0.98) -CS_NEUTRALIZE(CS_ZSCORE(bps_lev_w), $industry_sw_l1) diff --git a/artifacts/factorzoo/stock_1d/expressions/crowd_eff_fluency_vol18.dsl b/artifacts/factorzoo/stock_1d/expressions/crowd_eff_fluency_vol18.dsl deleted file mode 100644 index 695ff9f1..00000000 --- a/artifacts/factorzoo/stock_1d/expressions/crowd_eff_fluency_vol18.dsl +++ /dev/null @@ -1,7 +0,0 @@ -w = 18 -price = $vwap * $adjfactor -ret = $ret -eff = TS_EFFICIENCY_RATIO(price, w) -crowd = CROWD_CONTRAST(eff, ret, w) -vol_w = TS_MEAN(ADD($amount/$float_cap, 1), w) -MULTIPLY(crowd, vol_w) diff --git a/artifacts/factorzoo/stock_1d/expressions/gap_streak_weighted_rank.dsl b/artifacts/factorzoo/stock_1d/expressions/gap_streak_weighted_rank.dsl deleted file mode 100644 index f5aba72b..00000000 --- a/artifacts/factorzoo/stock_1d/expressions/gap_streak_weighted_rank.dsl +++ /dev/null @@ -1,12 +0,0 @@ -prev_close = DELAY($adj_close, 1) -gap = DIVIDE(SUBTRACT($adj_open, prev_close), prev_close) -gap_up = IF_THEN_ELSE(GT(gap, 0), gap, 0) -gap_dn = IF_THEN_ELSE(LT(gap, 0), ABS(gap), 0) -up_streak = TS_STREAK(GT(gap, 0)) -dn_streak = TS_STREAK(LT(gap, 0)) -up_weighted = MULTIPLY(CAST(up_streak, 'float'), gap_up) -dn_weighted = MULTIPLY(CAST(dn_streak, 'float'), gap_dn) -streak_net = SUBTRACT(up_weighted, dn_weighted) -streak_rank = RANK(streak_net) -streak_rank_ema3 = EMA(streak_rank, 3) -CS_NEUTRALIZE(streak_rank_ema3, CS_BUCKET(LOG($float_cap), 3)) diff --git a/artifacts/factorzoo/stock_1d/expressions/hl_div_amt_smooth20.dsl b/artifacts/factorzoo/stock_1d/expressions/hl_div_amt_smooth20.dsl deleted file mode 100644 index 9e8c8511..00000000 --- a/artifacts/factorzoo/stock_1d/expressions/hl_div_amt_smooth20.dsl +++ /dev/null @@ -1,6 +0,0 @@ -avg_hl = TS_MEAN(SUBTRACT($adj_high, $adj_low), 20) -avg_close = TS_MEAN($adj_close, 20) -avg_hl_pct = DIVIDE(avg_hl, avg_close) -avg_amt = TS_MEAN($amount, 20) -raw = DIVIDE(avg_hl_pct, ADD(avg_amt, 1)) -LOG(ADD(raw, 1e-12)) diff --git a/artifacts/factorzoo/stock_1d/expressions/idio_qspread_win_20.dsl b/artifacts/factorzoo/stock_1d/expressions/idio_qspread_win_20.dsl deleted file mode 100644 index 2d699464..00000000 --- a/artifacts/factorzoo/stock_1d/expressions/idio_qspread_win_20.dsl +++ /dev/null @@ -1,7 +0,0 @@ -ret = $ret -idio_ret = CS_DEMEAN(ret) -ret_win = CS_WINSORIZE(idio_ret, 0.01, 0.99) -q90 = TS_QUANTILE(ret_win, 20, 0.9) -q10 = TS_QUANTILE(ret_win, 20, 0.1) -raw_spread = SUBTRACT(q90, q10) -CS_NEUTRALIZE(raw_spread, CS_BUCKET(LOG($float_cap), 3)) diff --git a/artifacts/factorzoo/stock_1d/expressions/intraday_overnight_gap.dsl b/artifacts/factorzoo/stock_1d/expressions/intraday_overnight_gap.dsl deleted file mode 100644 index 65255518..00000000 --- a/artifacts/factorzoo/stock_1d/expressions/intraday_overnight_gap.dsl +++ /dev/null @@ -1,5 +0,0 @@ -prev_close = DELAY($adj_close, 1) -overnight_gap = DIVIDE(SUBTRACT($adj_open, prev_close), prev_close) -gap_w = CS_WINSORIZE(overnight_gap, 0.01, 0.99) -gap_ma3 = TS_MEAN(gap_w, 3) -CS_NEUTRALIZE(gap_ma3, CS_BUCKET(LOG($float_cap), 3)) diff --git a/artifacts/factorzoo/stock_1d/expressions/intraday_overnight_rel_strength_ema10.dsl b/artifacts/factorzoo/stock_1d/expressions/intraday_overnight_rel_strength_ema10.dsl deleted file mode 100644 index 5a5423da..00000000 --- a/artifacts/factorzoo/stock_1d/expressions/intraday_overnight_rel_strength_ema10.dsl +++ /dev/null @@ -1,8 +0,0 @@ -intraday_ret = DIVIDE(SUBTRACT($adj_close, $adj_open), $adj_open) -prev_close = DELAY($adj_close, 1) -overnight_ret = DIVIDE(SUBTRACT($adj_open, prev_close), prev_close) -rel_spread = DIVIDE(SUBTRACT(intraday_ret, overnight_ret), ADD(ABS(intraday_ret), ABS(overnight_ret))) -rel_spread_w = CS_WINSORIZE(rel_spread, 0.02, 0.98) -rel_spread_ema = EMA(rel_spread_w, 10) -neutralized = CS_NEUTRALIZE(rel_spread_ema, CS_BUCKET(LOG($float_cap), 10)) -CS_ZSCORE(neutralized) diff --git a/artifacts/factorzoo/stock_1d/expressions/intraday_overnight_spread_ema10_neutral.dsl b/artifacts/factorzoo/stock_1d/expressions/intraday_overnight_spread_ema10_neutral.dsl deleted file mode 100644 index d7bd9537..00000000 --- a/artifacts/factorzoo/stock_1d/expressions/intraday_overnight_spread_ema10_neutral.dsl +++ /dev/null @@ -1,6 +0,0 @@ -intraday_ret = DIVIDE(SUBTRACT($adj_close, $adj_open), $adj_open) -overnight_ret = DIVIDE(SUBTRACT($adj_open, DELAY($adj_close, 1)), DELAY($adj_close, 1)) -spread = SUBTRACT(intraday_ret, overnight_ret) -spread_ema = EMA(CS_WINSORIZE(spread, 0.02, 0.98), 10) -neutralized = CS_NEUTRALIZE(spread_ema, CS_BUCKET(LOG($float_cap), 10)) -CS_ZSCORE(neutralized) diff --git a/artifacts/factorzoo/stock_1d/expressions/netprofit_yoy_lowvol_turnover_gaussian.dsl b/artifacts/factorzoo/stock_1d/expressions/netprofit_yoy_lowvol_turnover_gaussian.dsl deleted file mode 100644 index 5cabcbe2..00000000 --- a/artifacts/factorzoo/stock_1d/expressions/netprofit_yoy_lowvol_turnover_gaussian.dsl +++ /dev/null @@ -1,9 +0,0 @@ -np_z = CS_ZSCORE(CS_WINSORIZE($funda_netprofit_yoy, 0.01, 0.99)) -vol_z = CS_ZSCORE(TS_STD($ret, 20)) -amt = TS_MEAN($amount, 20) -turnover_z = CS_ZSCORE(DIVIDE(amt, $float_cap)) -np_center = EXP(MULTIPLY(np_z, MULTIPLY(np_z, -0.5))) -vol_center = EXP(MULTIPLY(vol_z, MULTIPLY(vol_z, -0.5))) -to_center = EXP(MULTIPLY(turnover_z, MULTIPLY(turnover_z, -0.5))) -score = ADD(ADD(np_center, vol_center), to_center) -CS_NEUTRALIZE(CS_WINSORIZE(score, 0.01, 0.99), CS_BUCKET(LOG($float_cap), 10)) diff --git a/artifacts/factorzoo/stock_1d/expressions/roe_lowvol_center_smooth.dsl b/artifacts/factorzoo/stock_1d/expressions/roe_lowvol_center_smooth.dsl deleted file mode 100644 index 9ee9c6d1..00000000 --- a/artifacts/factorzoo/stock_1d/expressions/roe_lowvol_center_smooth.dsl +++ /dev/null @@ -1,7 +0,0 @@ -roe_r = RANK(CS_WINSORIZE($funda_roe, 0.01, 0.99)) -vol = TS_STD($ret, 20) -vol_r = RANK(DIVIDE(1, ADD(vol, 0.001))) -roe_center = MULTIPLY(roe_r, SUBTRACT(1, roe_r)) -vol_center = MULTIPLY(vol_r, SUBTRACT(1, vol_r)) -score = ADD(roe_center, vol_center) -CS_NEUTRALIZE(CS_WINSORIZE(score, 0.01, 0.99), CS_BUCKET(LOG($float_cap), 10)) diff --git a/artifacts/factorzoo/stock_1d/expressions/roe_yoy_low_asset_equity_quality.dsl b/artifacts/factorzoo/stock_1d/expressions/roe_yoy_low_asset_equity_quality.dsl deleted file mode 100644 index 542a16d4..00000000 --- a/artifacts/factorzoo/stock_1d/expressions/roe_yoy_low_asset_equity_quality.dsl +++ /dev/null @@ -1,15 +0,0 @@ -lev = DIVIDE($funda_fs_total_assets, $funda_fs_total_equity) -lev_win = CS_WINSORIZE(lev, 0.01, 0.99) -lev_z = CS_ZSCORE(lev_win) -lev_ind = CS_NEUTRALIZE(lev_z, $industry_sw_l1) -lev_size = CS_NEUTRALIZE(lev_ind, CS_BUCKET(LOG($float_cap), 10)) -roe_win = CS_WINSORIZE($funda_roe, 0.01, 0.99) -roe_z = CS_ZSCORE(roe_win) -roe_ind = CS_NEUTRALIZE(roe_z, $industry_sw_l1) -roe_size = CS_NEUTRALIZE(roe_ind, CS_BUCKET(LOG($float_cap), 10)) -yoy_win = CS_WINSORIZE($funda_netprofit_yoy, 0.01, 0.99) -yoy_z = CS_ZSCORE(yoy_win) -yoy_ind = CS_NEUTRALIZE(yoy_z, $industry_sw_l1) -yoy_size = CS_NEUTRALIZE(yoy_ind, CS_BUCKET(LOG($float_cap), 10)) -quality = RANK(roe_size) + RANK(yoy_size) - RANK(lev_size) -quality diff --git a/artifacts/factorzoo/stock_1d/expressions/vpin_amount_60d_2m_min2.dsl b/artifacts/factorzoo/stock_1d/expressions/vpin_amount_60d_2m_min2.dsl deleted file mode 100644 index def531d9..00000000 --- a/artifacts/factorzoo/stock_1d/expressions/vpin_amount_60d_2m_min2.dsl +++ /dev/null @@ -1,2 +0,0 @@ -vpin = VOLUME_CLOCK_VPIN($adj_close, $amount, 60, 2000000, 'tick', 2, 20) -CS_ZSCORE(vpin) diff --git a/artifacts/factorzoo/stock_1d/mining_delivered_registry.json b/artifacts/factorzoo/stock_1d/mining_delivered_registry.json deleted file mode 100644 index 5a149e7f..00000000 --- a/artifacts/factorzoo/stock_1d/mining_delivered_registry.json +++ /dev/null @@ -1,930 +0,0 @@ -{ - "intraday_overnight_spread_ema10_neutral": { - "name": "intraday_overnight_spread_ema10_neutral", - "comment": "日内隔夜收益价差动量:分别计算日内的 adj_close/adj_open 收益与隔夜 adj_open/delay(adj_close,1) 收益,取日内收益减隔夜收益作为 spread,经截面 2%/98% 缩尾后用 EMA(span=10) 平滑,最后按流通市值对数 10 组中性化并截面 z-score。因子为负 IC:近期持续“日内强于隔夜”的股票倾向于次日开盘反向收益,反映日内情绪透支与隔夜信息不足的均值回归。", - "expression_file": "artifacts/factorzoo/stock_1d/expressions/intraday_overnight_spread_ema10_neutral.dsl", - "ingest_config": { - "train_start": "2019-01-01", - "ingest_start": "2019-01-01", - "ingest_end": "2024-12-31", - "label_col": "label_1d_open_to_open" - }, - "ingested_at": "2026-07-01T14:35:47.200713+00:00", - "ingest_metrics": { - "coverage": 0.999848806979274, - "ic": -0.032209384410660825, - "icir": -0.3059823926357548, - "rank_ic": -0.04119767180642285, - "n_days": 1456, - "cs_pearson_autocorr": 0.8549643093744185, - "decile_mean_label": [ - { - "decile": 1, - "mean_label": 0.001123 - }, - { - "decile": 2, - "mean_label": 0.000871 - }, - { - "decile": 3, - "mean_label": 0.000889 - }, - { - "decile": 4, - "mean_label": 0.000912 - }, - { - "decile": 5, - "mean_label": 0.000827 - }, - { - "decile": 6, - "mean_label": 0.00094 - }, - { - "decile": 7, - "mean_label": 0.000894 - }, - { - "decile": 8, - "mean_label": 0.000791 - }, - { - "decile": 9, - "mean_label": 0.000512 - }, - { - "decile": 10, - "mean_label": -0.001527 - } - ], - "mls_fmb": { - "mean_rho": -0.08058608058608058, - "mean_ls": -0.0026450714977555093, - "ir_ls": -0.23224180262796465, - "ir_ls_annual": -3.6867243227216484, - "mls": 0.2970986633695101, - "nw_t_rho": -4.875394206691405, - "nw_t_ls": -9.42288491294723, - "nw_se_rho": 0.0165291414744427, - "nw_se_ls": 0.0002807071849217991, - "n_days_rho": 1456, - "n_days_ls": 1456, - "nw_lags": 7, - "n_deciles": 10, - "n_deciles_requested": 10, - "min_stocks": 30, - "min_stocks_requested": 30, - "annualization_factor": 252.0, - "note": "MLS+FMB 非参数版:ρ_t=逐日十分组 Spearman 单调性,LS_t=Q10-Q1 多空;IR_LS 为日频 LS 均值/标准差,ir_ls_annual=IR_LS×√252;MLS=mean(ρ)×ir_ls_annual。" - }, - "label_col": "label_1d_open_to_open", - "skew": 0.7308265541211697, - "kurt": 4.005278555633155, - "eval_start": "2019-01-01", - "eval_end": "2024-12-31", - "finite_ratio": 0.7183476074732533 - }, - "ingest_status": "stored", - "similarity": { - "col_idx": 5, - "n_factors": 6, - "kind": "cross_sectional_pearson_mean", - "max_abs_corr": 0.4851318426316107, - "top_neighbors": [ - { - "factor_id": "intraday_overnight_gap", - "name": "intraday_overnight_gap", - "cs_corr": -0.4851318426316107, - "expr": "prev_close = DELAY($adj_close, 1)\novernight_gap = DIVIDE(SUBTRACT($adj_open, prev_close), prev_close)\ngap_w = CS_WINSORIZE(overnight_gap, 0.01, 0.99)\ngap_ma3 = TS_MEAN(gap_w, 3)\nCS_NEUTRALIZE(gap_ma3, CS_BUCKET(LOG($float_cap), 3))" - }, - { - "factor_id": "gap_streak_weighted_rank", - "name": "gap_streak_weighted_rank", - "cs_corr": -0.4691724829506931, - "expr": "prev_close = DELAY($adj_close, 1)\ngap = DIVIDE(SUBTRACT($adj_open, prev_close), prev_close)\ngap_up = IF_THEN_ELSE(GT(gap, 0), gap, 0)\ngap_dn = IF_THEN_ELSE(LT(gap, 0), ABS(gap), 0)\nup_streak = TS_STREAK(GT(gap, 0))\ndn_streak = TS_STREAK(LT(gap, 0))\nup_weighted = MULTIPLY(CAST(up_streak, 'float'), gap_up)\ndn_weighted = MULTIPLY(CAST(dn_streak, 'float'), gap_dn)\nstreak_net = SUBTRACT(up_weighted, dn_weighted)\nstreak_rank = RANK(streak_net)\nstreak_rank_ema3 = EMA(streak_rank, 3)\nCS_NEUTRALIZE(streak_rank_ema3, CS_BUCKET(LOG($float_cap), 3))" - }, - { - "factor_id": "idio_qspread_win_20", - "name": "idio_qspread_win_20", - "cs_corr": 0.2877535108467391, - "expr": "ret = $ret\nidio_ret = CS_DEMEAN(ret)\nret_win = CS_WINSORIZE(idio_ret, 0.01, 0.99)\nq90 = TS_QUANTILE(ret_win, 20, 0.9)\nq10 = TS_QUANTILE(ret_win, 20, 0.1)\nraw_spread = SUBTRACT(q90, q10)\nCS_NEUTRALIZE(raw_spread, CS_BUCKET(LOG($float_cap), 3))" - } - ] - }, - "source": "submit" - }, - "intraday_overnight_rel_strength_ema10": { - "name": "intraday_overnight_rel_strength_ema10", - "comment": "日内隔夜相对强度:计算日内收益率与隔夜收益率之差,并除以两者绝对值之和进行标准化,得到相对强度。经截面 2%/98% 缩尾后 EMA(span=10) 平滑,再按流通市值对数 10 组中性化并截面 z-score。因子为负 IC:日内相对隔夜表现越强,次日开盘越倾向于反向收益,反映短期信息拥挤后的均值回归。", - "expression_file": "artifacts/factorzoo/stock_1d/expressions/intraday_overnight_rel_strength_ema10.dsl", - "ingest_config": { - "train_start": "2019-01-01", - "ingest_start": "2019-01-01", - "ingest_end": "2024-12-31", - "label_col": "label_1d_open_to_open" - }, - "ingested_at": "2026-07-01T14:41:26.057533+00:00", - "ingest_metrics": { - "coverage": 0.999848806979274, - "ic": -0.01943329775653871, - "icir": -0.24318489507472868, - "rank_ic": -0.03024059030506239, - "n_days": 1456, - "cs_pearson_autocorr": 0.8458426127457558, - "decile_mean_label": [ - { - "decile": 1, - "mean_label": 0.001041 - }, - { - "decile": 2, - "mean_label": 0.000879 - }, - { - "decile": 3, - "mean_label": 0.000942 - }, - { - "decile": 4, - "mean_label": 0.000903 - }, - { - "decile": 5, - "mean_label": 0.000845 - }, - { - "decile": 6, - "mean_label": 0.000823 - }, - { - "decile": 7, - "mean_label": 0.000861 - }, - { - "decile": 8, - "mean_label": 0.000554 - }, - { - "decile": 9, - "mean_label": 0.000222 - }, - { - "decile": 10, - "mean_label": -0.000839 - } - ], - "mls_fmb": { - "mean_rho": -0.10285547785547787, - "mean_ls": -0.0019830469484266246, - "ir_ls": -0.23138603547056552, - "ir_ls_annual": -3.6731394402497197, - "mls": 0.37780251235668744, - "nw_t_rho": -5.801506074437481, - "nw_t_ls": -8.879541253817344, - "nw_se_rho": 0.017729099398633454, - "nw_se_ls": 0.0002233276350367882, - "n_days_rho": 1456, - "n_days_ls": 1456, - "nw_lags": 7, - "n_deciles": 10, - "n_deciles_requested": 10, - "min_stocks": 30, - "min_stocks_requested": 30, - "annualization_factor": 252.0, - "note": "MLS+FMB 非参数版:ρ_t=逐日十分组 Spearman 单调性,LS_t=Q10-Q1 多空;IR_LS 为日频 LS 均值/标准差,ir_ls_annual=IR_LS×√252;MLS=mean(ρ)×ir_ls_annual。" - }, - "label_col": "label_1d_open_to_open", - "skew": 0.01464175099611569, - "kurt": -0.010009687984218196, - "eval_start": "2019-01-01", - "eval_end": "2024-12-31", - "finite_ratio": 0.7183474457663267 - }, - "ingest_status": "stored", - "similarity": { - "col_idx": 6, - "n_factors": 7, - "kind": "cross_sectional_pearson_mean", - "max_abs_corr": 0.7637510871752855, - "top_neighbors": [ - { - "factor_id": "intraday_overnight_spread_ema10_neutral", - "name": "intraday_overnight_spread_ema10_neutral", - "cs_corr": 0.7637510871752855, - "expr": "intraday_ret = DIVIDE(SUBTRACT($adj_close, $adj_open), $adj_open)\novernight_ret = DIVIDE(SUBTRACT($adj_open, DELAY($adj_close, 1)), DELAY($adj_close, 1))\nspread = SUBTRACT(intraday_ret, overnight_ret)\nspread_ema = EMA(CS_WINSORIZE(spread, 0.02, 0.98), 10)\nneutralized = CS_NEUTRALIZE(spread_ema, CS_BUCKET(LOG($float_cap), 10))\nCS_ZSCORE(neutralized)" - }, - { - "factor_id": "gap_streak_weighted_rank", - "name": "gap_streak_weighted_rank", - "cs_corr": -0.513191543357923, - "expr": "prev_close = DELAY($adj_close, 1)\ngap = DIVIDE(SUBTRACT($adj_open, prev_close), prev_close)\ngap_up = IF_THEN_ELSE(GT(gap, 0), gap, 0)\ngap_dn = IF_THEN_ELSE(LT(gap, 0), ABS(gap), 0)\nup_streak = TS_STREAK(GT(gap, 0))\ndn_streak = TS_STREAK(LT(gap, 0))\nup_weighted = MULTIPLY(CAST(up_streak, 'float'), gap_up)\ndn_weighted = MULTIPLY(CAST(dn_streak, 'float'), gap_dn)\nstreak_net = SUBTRACT(up_weighted, dn_weighted)\nstreak_rank = RANK(streak_net)\nstreak_rank_ema3 = EMA(streak_rank, 3)\nCS_NEUTRALIZE(streak_rank_ema3, CS_BUCKET(LOG($float_cap), 3))" - }, - { - "factor_id": "intraday_overnight_gap", - "name": "intraday_overnight_gap", - "cs_corr": -0.415126285025848, - "expr": "prev_close = DELAY($adj_close, 1)\novernight_gap = DIVIDE(SUBTRACT($adj_open, prev_close), prev_close)\ngap_w = CS_WINSORIZE(overnight_gap, 0.01, 0.99)\ngap_ma3 = TS_MEAN(gap_w, 3)\nCS_NEUTRALIZE(gap_ma3, CS_BUCKET(LOG($float_cap), 3))" - } - ] - }, - "source": "submit" - }, - "vpin_amount_60d_2m_min2": { - "name": "vpin_amount_60d_2m_min2", - "comment": "VPIN (Volume-Synchronized Probability of Informed Trading) using dollar turnover ($amount) instead of share volume. VPIN measures the imbalance between buy- and sell-initiated volume on volume buckets, normalized by short-term volatility. Higher VPIN indicates greater informed trading pressure, which in this A-share daily panel predicts lower next-day open-to-open returns (negative IC). Price input is $adj_close, bucket size is $2M turnover, min_buckets=2, classification='tick', and a 60-day rolling window. The final factor is cross-sectionally z-scored. Stronger and more robust than shorter-window variants, with monotonic decile spread (D1 > D10).", - "expression_file": "artifacts/factorzoo/stock_1d/expressions/vpin_amount_60d_2m_min2.dsl", - "ingest_config": { - "train_start": "2019-01-01", - "ingest_start": "2019-01-01", - "ingest_end": "2024-12-31", - "label_col": "label_1d_open_to_open" - }, - "ingested_at": "2026-07-01T16:45:08.554634+00:00", - "ingest_metrics": { - "coverage": 0.998132354880939, - "ic": -0.012011769516465647, - "icir": -0.14305345122290872, - "rank_ic": -0.01922051528666379, - "n_days": 1456, - "cs_pearson_autocorr": 0.9994099417882606, - "decile_mean_label": [ - { - "decile": 1, - "mean_label": 0.001099 - }, - { - "decile": 2, - "mean_label": 0.000746 - }, - { - "decile": 3, - "mean_label": 0.000836 - }, - { - "decile": 4, - "mean_label": 0.000695 - }, - { - "decile": 5, - "mean_label": 0.000678 - }, - { - "decile": 6, - "mean_label": 0.000574 - }, - { - "decile": 7, - "mean_label": 0.000601 - }, - { - "decile": 8, - "mean_label": 0.000527 - }, - { - "decile": 9, - "mean_label": 0.000299 - }, - { - "decile": 10, - "mean_label": -0.000106 - } - ], - "mls_fmb": { - "mean_rho": -0.09703629703629703, - "mean_ls": -0.0011031321258723283, - "ir_ls": -0.1309794352446955, - "ir_ls_annual": -2.0792340751269167, - "mls": 0.20176117532200583, - "nw_t_rho": -5.951534026282151, - "nw_t_ls": -4.640871529474645, - "nw_se_rho": 0.016304417753100604, - "nw_se_ls": 0.00023769934566518906, - "n_days_rho": 1456, - "n_days_ls": 1456, - "nw_lags": 7, - "n_deciles": 10, - "n_deciles_requested": 10, - "min_stocks": 30, - "min_stocks_requested": 30, - "annualization_factor": 252.0, - "note": "MLS+FMB 非参数版:ρ_t=逐日十分组 Spearman 单调性,LS_t=Q10-Q1 多空;IR_LS 为日频 LS 均值/标准差,ir_ls_annual=IR_LS×√252;MLS=mean(ρ)×ir_ls_annual。" - }, - "label_col": "label_1d_open_to_open", - "skew": 1.6411296566810207, - "kurt": 4.562078084967922, - "eval_start": "2019-01-01", - "eval_end": "2024-12-31", - "finite_ratio": 0.7173505225640185 - }, - "ingest_status": "stored", - "similarity": { - "col_idx": 7, - "n_factors": 8, - "kind": "cross_sectional_pearson_mean", - "max_abs_corr": 0.5946685966464244, - "top_neighbors": [ - { - "factor_id": "hl_div_amt_smooth20", - "name": "hl_div_amt_smooth20", - "cs_corr": -0.5946685966464244, - "expr": "avg_hl = TS_MEAN(SUBTRACT($adj_high, $adj_low), 20)\navg_close = TS_MEAN($adj_close, 20)\navg_hl_pct = DIVIDE(avg_hl, avg_close)\navg_amt = TS_MEAN($amount, 20)\nraw = DIVIDE(avg_hl_pct, ADD(avg_amt, 1))\nLOG(ADD(raw, 1e-12))" - }, - { - "factor_id": "idio_qspread_win_20", - "name": "idio_qspread_win_20", - "cs_corr": 0.15154913926092134, - "expr": "ret = $ret\nidio_ret = CS_DEMEAN(ret)\nret_win = CS_WINSORIZE(idio_ret, 0.01, 0.99)\nq90 = TS_QUANTILE(ret_win, 20, 0.9)\nq10 = TS_QUANTILE(ret_win, 20, 0.1)\nraw_spread = SUBTRACT(q90, q10)\nCS_NEUTRALIZE(raw_spread, CS_BUCKET(LOG($float_cap), 3))" - }, - { - "factor_id": "gap_streak_weighted_rank", - "name": "gap_streak_weighted_rank", - "cs_corr": -0.07785233222250788, - "expr": "prev_close = DELAY($adj_close, 1)\ngap = DIVIDE(SUBTRACT($adj_open, prev_close), prev_close)\ngap_up = IF_THEN_ELSE(GT(gap, 0), gap, 0)\ngap_dn = IF_THEN_ELSE(LT(gap, 0), ABS(gap), 0)\nup_streak = TS_STREAK(GT(gap, 0))\ndn_streak = TS_STREAK(LT(gap, 0))\nup_weighted = MULTIPLY(CAST(up_streak, 'float'), gap_up)\ndn_weighted = MULTIPLY(CAST(dn_streak, 'float'), gap_dn)\nstreak_net = SUBTRACT(up_weighted, dn_weighted)\nstreak_rank = RANK(streak_net)\nstreak_rank_ema3 = EMA(streak_rank, 3)\nCS_NEUTRALIZE(streak_rank_ema3, CS_BUCKET(LOG($float_cap), 3))" - } - ] - }, - "source": "submit" - }, - "amihud_log_30d_winsor": { - "name": "amihud_log_30d_winsor", - "comment": "Amihud illiquidity proxy: daily absolute return divided by dollar turnover, log-transformed, winsorized at 1%/99%, and smoothed over 20 days (raw window in implementation uses 30-day lookback to match effective smoothing). Higher values indicate less liquid/higher price impact stocks; lower values indicate more liquid stocks. In this A-share daily panel, more liquid stocks (low Amihud) tend to earn higher next-day open-to-open returns (positive IC). Cross-sectionally z-scored. Robust across train and val with strong monotonic decile spread and high coverage.", - "expression_file": "artifacts/factorzoo/stock_1d/expressions/amihud_log_30d_winsor.dsl", - "ingest_config": { - "train_start": "2019-01-01", - "ingest_start": "2019-01-01", - "ingest_end": "2024-12-31", - "label_col": "label_1d_open_to_open" - }, - "ingested_at": "2026-07-01T16:59:17.535106+00:00", - "ingest_metrics": { - "coverage": 0.999851927285518, - "ic": 0.014780469764999792, - "icir": 0.1689962227102664, - "rank_ic": 0.015616857945947332, - "n_days": 1456, - "cs_pearson_autocorr": 0.9921186278047275, - "decile_mean_label": [ - { - "decile": 1, - "mean_label": -2.1e-05 - }, - { - "decile": 2, - "mean_label": 0.000219 - }, - { - "decile": 3, - "mean_label": 0.000327 - }, - { - "decile": 4, - "mean_label": 0.000503 - }, - { - "decile": 5, - "mean_label": 0.000615 - }, - { - "decile": 6, - "mean_label": 0.000697 - }, - { - "decile": 7, - "mean_label": 0.000778 - }, - { - "decile": 8, - "mean_label": 0.000952 - }, - { - "decile": 9, - "mean_label": 0.000946 - }, - { - "decile": 10, - "mean_label": 0.00121 - } - ], - "mls_fmb": { - "mean_rho": 0.10247252747252747, - "mean_ls": 0.0012257134058327073, - "ir_ls": 0.13028410006199537, - "ir_ls_annual": 2.0681959712993674, - "mls": 0.21193326848754507, - "nw_t_rho": 5.447300180862563, - "nw_t_ls": 4.466216276901322, - "nw_se_rho": 0.01881161751146625, - "nw_se_ls": 0.00027444112193400364, - "n_days_rho": 1456, - "n_days_ls": 1456, - "nw_lags": 7, - "n_deciles": 10, - "n_deciles_requested": 10, - "min_stocks": 30, - "min_stocks_requested": 30, - "annualization_factor": 252.0, - "note": "MLS+FMB 非参数版:ρ_t=逐日十分组 Spearman 单调性,LS_t=Q10-Q1 多空;IR_LS 为日频 LS 均值/标准差,ir_ls_annual=IR_LS×√252;MLS=mean(ρ)×ir_ls_annual。" - }, - "label_col": "label_1d_open_to_open", - "skew": -0.23124618709744238, - "kurt": 0.298366005763965, - "eval_start": "2019-01-01", - "eval_end": "2024-12-31", - "finite_ratio": 0.7183493862494456 - }, - "ingest_status": "stored", - "similarity": { - "col_idx": 8, - "n_factors": 9, - "kind": "cross_sectional_pearson_mean", - "max_abs_corr": 0.7887964311609298, - "top_neighbors": [ - { - "factor_id": "hl_div_amt_smooth20", - "name": "hl_div_amt_smooth20", - "cs_corr": 0.7887964311609298, - "expr": "avg_hl = TS_MEAN(SUBTRACT($adj_high, $adj_low), 20)\navg_close = TS_MEAN($adj_close, 20)\navg_hl_pct = DIVIDE(avg_hl, avg_close)\navg_amt = TS_MEAN($amount, 20)\nraw = DIVIDE(avg_hl_pct, ADD(avg_amt, 1))\nLOG(ADD(raw, 1e-12))" - }, - { - "factor_id": "vpin_amount_60d_2m_min2", - "name": "vpin_amount_60d_2m_min2", - "cs_corr": -0.5175624522476836, - "expr": "vpin = VOLUME_CLOCK_VPIN($adj_close, $amount, 60, 2000000, 'tick', 2, 20)\nCS_ZSCORE(vpin)" - }, - { - "factor_id": "idio_qspread_win_20", - "name": "idio_qspread_win_20", - "cs_corr": -0.07675178612131603, - "expr": "ret = $ret\nidio_ret = CS_DEMEAN(ret)\nret_win = CS_WINSORIZE(idio_ret, 0.01, 0.99)\nq90 = TS_QUANTILE(ret_win, 20, 0.9)\nq10 = TS_QUANTILE(ret_win, 20, 0.1)\nraw_spread = SUBTRACT(q90, q10)\nCS_NEUTRALIZE(raw_spread, CS_BUCKET(LOG($float_cap), 3))" - } - ] - }, - "source": "submit" - }, - "roe_lowvol_center_smooth": { - "name": "roe_lowvol_center_smooth", - "comment": "以ROE和日内收益波动率为核心的基本面质量中心因子。首先对ROE截面温莎化并转秩,再用抛物线变换 rank*(1-rank) 使中等ROE(既非极端低质量也非极端高估)得分最高;对20日收益波动率取倒数并同样做秩与抛物线中心变换,偏好中等低波动。两中心得分相加后,经截面温莎化与市值十分组中性化,最终形成“质量适中、波动适中”的中心化选股信号。适用于10日持有期的基本面低频alpha。IC方向为正。", - "expression_file": "artifacts/factorzoo/stock_1d/expressions/roe_lowvol_center_smooth.dsl", - "ingest_config": { - "train_start": "2018-01-01", - "ingest_start": "2018-01-01", - "ingest_end": "2023-12-31", - "label_col": "label_10d_close_to_close" - }, - "ingested_at": "2026-07-02T05:12:28.763258+00:00", - "ingest_metrics": { - "coverage": 0.9996466718013501, - "ic": 0.023358469415137095, - "icir": 0.4561095480646409, - "rank_ic": 0.0320924738178898, - "n_days": 1457, - "cs_pearson_autocorr": 0.9745005136816224, - "decile_mean_label": [ - { - "decile": 1, - "mean_label": -0.002281 - }, - { - "decile": 2, - "mean_label": 0.000837 - }, - { - "decile": 3, - "mean_label": 0.001311 - }, - { - "decile": 4, - "mean_label": 0.002368 - }, - { - "decile": 5, - "mean_label": 0.003329 - }, - { - "decile": 6, - "mean_label": 0.004069 - }, - { - "decile": 7, - "mean_label": 0.004626 - }, - { - "decile": 8, - "mean_label": 0.00507 - }, - { - "decile": 9, - "mean_label": 0.004598 - }, - { - "decile": 10, - "mean_label": 0.004272 - } - ], - "mls_fmb": { - "mean_rho": 0.21987479461741646, - "mean_ls": 0.006320498176603308, - "ir_ls": 0.4131678353907721, - "ir_ls_annual": 6.558836053049126, - "mls": 1.4421227300934831, - "nw_t_rho": 7.19809948234407, - "nw_t_ls": 6.768979698575461, - "nw_se_rho": 0.030546228925668304, - "nw_se_ls": 0.0009337445904784533, - "n_days_rho": 1457, - "n_days_ls": 1457, - "nw_lags": 7, - "n_deciles": 10, - "n_deciles_requested": 10, - "min_stocks": 30, - "min_stocks_requested": 30, - "annualization_factor": 252.0, - "note": "MLS+FMB 非参数版:ρ_t=逐日十分组 Spearman 单调性,LS_t=Q10-Q1 多空;IR_LS 为日频 LS 均值/标准差,ir_ls_annual=IR_LS×√252;MLS=mean(ρ)×ir_ls_annual。" - }, - "label_col": "label_10d_close_to_close", - "skew": -0.4013955341884441, - "kurt": -0.47072499007458735, - "eval_start": "2018-01-01", - "eval_end": "2023-12-31", - "finite_ratio": 0.7980088055889795 - }, - "ingest_status": "stored", - "similarity": { - "col_idx": 9, - "n_factors": 10, - "kind": "cross_sectional_pearson_mean", - "max_abs_corr": 0.30566881137110025, - "top_neighbors": [ - { - "factor_id": "idio_qspread_win_20", - "name": "idio_qspread_win_20", - "cs_corr": -0.30566881137110025, - "expr": "ret = $ret\nidio_ret = CS_DEMEAN(ret)\nret_win = CS_WINSORIZE(idio_ret, 0.01, 0.99)\nq90 = TS_QUANTILE(ret_win, 20, 0.9)\nq10 = TS_QUANTILE(ret_win, 20, 0.1)\nraw_spread = SUBTRACT(q90, q10)\nCS_NEUTRALIZE(raw_spread, CS_BUCKET(LOG($float_cap), 3))" - }, - { - "factor_id": "intraday_overnight_spread_ema10_neutral", - "name": "intraday_overnight_spread_ema10_neutral", - "cs_corr": -0.09314836383426792, - "expr": "intraday_ret = DIVIDE(SUBTRACT($adj_close, $adj_open), $adj_open)\novernight_ret = DIVIDE(SUBTRACT($adj_open, DELAY($adj_close, 1)), DELAY($adj_close, 1))\nspread = SUBTRACT(intraday_ret, overnight_ret)\nspread_ema = EMA(CS_WINSORIZE(spread, 0.02, 0.98), 10)\nneutralized = CS_NEUTRALIZE(spread_ema, CS_BUCKET(LOG($float_cap), 10))\nCS_ZSCORE(neutralized)" - }, - { - "factor_id": "crowd_eff_fluency_vol18", - "name": "crowd_eff_fluency_vol18", - "cs_corr": -0.08653398255641621, - "expr": "w = 18\nprice = $vwap * $adjfactor\nret = $ret\neff = TS_EFFICIENCY_RATIO(price, w)\ncrowd = CROWD_CONTRAST(eff, ret, w)\nvol_w = TS_MEAN(ADD($amount/$float_cap, 1), w)\nMULTIPLY(crowd, vol_w)" - } - ] - }, - "source": "submit" - }, - "netprofit_yoy_lowvol_turnover_gaussian": { - "name": "netprofit_yoy_lowvol_turnover_gaussian", - "comment": "高斯中心化的净利润同比增长、低波动与低换手三维复合因子。对净利润同比增速($funda_netprofit_yoy)、20日收益波动率和20日换手率(amount/float_cap)分别截面温莎化并z-score,再用exp(-0.5*z^2)压缩极端高/低值,使处于中等水平的盈利增长、波动率和换手率得分最高;三项相加后经温莎化与市值十分组中性化。正IC,适用于10日持有期的基本面低频alpha。", - "expression_file": "artifacts/factorzoo/stock_1d/expressions/netprofit_yoy_lowvol_turnover_gaussian.dsl", - "ingest_config": { - "train_start": "2018-01-01", - "ingest_start": "2018-01-01", - "ingest_end": "2023-12-31", - "label_col": "label_10d_close_to_close" - }, - "ingested_at": "2026-07-02T05:30:16.439028+00:00", - "ingest_metrics": { - "coverage": 0.9917628341099101, - "ic": 0.03713709612897651, - "icir": 0.47485256771247863, - "rank_ic": 0.04153618822591869, - "n_days": 1446, - "cs_pearson_autocorr": 0.9841478018389902, - "decile_mean_label": [ - { - "decile": 1, - "mean_label": -0.00483 - }, - { - "decile": 2, - "mean_label": 0.00168 - }, - { - "decile": 3, - "mean_label": 0.002393 - }, - { - "decile": 4, - "mean_label": 0.002722 - }, - { - "decile": 5, - "mean_label": 0.002708 - }, - { - "decile": 6, - "mean_label": 0.00283 - }, - { - "decile": 7, - "mean_label": 0.003288 - }, - { - "decile": 8, - "mean_label": 0.003705 - }, - { - "decile": 9, - "mean_label": 0.00587 - }, - { - "decile": 10, - "mean_label": 0.006957 - } - ], - "mls_fmb": { - "mean_rho": 0.1965547592103609, - "mean_ls": 0.01145101422834596, - "ir_ls": 0.5170494275926762, - "ir_ls_annual": 8.207905205631116, - "mls": 1.6133028313142916, - "nw_t_rho": 7.490461172515917, - "nw_t_ls": 8.219526352486271, - "nw_se_rho": 0.026240675264636814, - "nw_se_ls": 0.0013931476994270134, - "n_days_rho": 1446, - "n_days_ls": 1446, - "nw_lags": 7, - "n_deciles": 10, - "n_deciles_requested": 10, - "min_stocks": 30, - "min_stocks_requested": 30, - "annualization_factor": 252.0, - "note": "MLS+FMB 非参数版:ρ_t=逐日十分组 Spearman 单调性,LS_t=Q10-Q1 多空;IR_LS 为日频 LS 均值/标准差,ir_ls_annual=IR_LS×√252;MLS=mean(ρ)×ir_ls_annual。" - }, - "label_col": "label_10d_close_to_close", - "skew": -1.4138127447215347, - "kurt": 1.7887258964462647, - "eval_start": "2018-01-01", - "eval_end": "2023-12-31", - "finite_ratio": 0.7936726343529872 - }, - "ingest_status": "stored", - "similarity": { - "col_idx": 10, - "n_factors": 11, - "kind": "cross_sectional_pearson_mean", - "max_abs_corr": 0.5818793284595696, - "top_neighbors": [ - { - "factor_id": "roe_lowvol_center_smooth", - "name": "roe_lowvol_center_smooth", - "cs_corr": 0.5818793284595696, - "expr": "roe_r = RANK(CS_WINSORIZE($funda_roe, 0.01, 0.99))\nvol = TS_STD($ret, 20)\nvol_r = RANK(DIVIDE(1, ADD(vol, 0.001)))\nroe_center = MULTIPLY(roe_r, SUBTRACT(1, roe_r))\nvol_center = MULTIPLY(vol_r, SUBTRACT(1, vol_r))\nscore = ADD(roe_center, vol_center)\nCS_NEUTRALIZE(CS_WINSORIZE(score, 0.01, 0.99), CS_BUCKET(LOG($float_cap), 10))" - }, - { - "factor_id": "idio_qspread_win_20", - "name": "idio_qspread_win_20", - "cs_corr": -0.4800749855396444, - "expr": "ret = $ret\nidio_ret = CS_DEMEAN(ret)\nret_win = CS_WINSORIZE(idio_ret, 0.01, 0.99)\nq90 = TS_QUANTILE(ret_win, 20, 0.9)\nq10 = TS_QUANTILE(ret_win, 20, 0.1)\nraw_spread = SUBTRACT(q90, q10)\nCS_NEUTRALIZE(raw_spread, CS_BUCKET(LOG($float_cap), 3))" - }, - { - "factor_id": "hl_div_amt_smooth20", - "name": "hl_div_amt_smooth20", - "cs_corr": 0.2019463433173388, - "expr": "avg_hl = TS_MEAN(SUBTRACT($adj_high, $adj_low), 20)\navg_close = TS_MEAN($adj_close, 20)\navg_hl_pct = DIVIDE(avg_hl, avg_close)\navg_amt = TS_MEAN($amount, 20)\nraw = DIVIDE(avg_hl_pct, ADD(avg_amt, 1))\nLOG(ADD(raw, 1e-12))" - } - ] - }, - "source": "submit" - }, - "roe_yoy_low_asset_equity_quality": { - "name": "roe_yoy_low_asset_equity_quality", - "comment": "基本面质量+债务杠杆复合因子:在行业中性化与市值中性化后,将 ROE 与归母净利同比增速(YoY)的截面排序相加,再减去总资产/净资产杠杆(Assets-to-Equity)的截面排序,形成“高盈利、高成长、低杠杆”的综合质量信号。因子的经济学含义在于:在剔除行业与规模差异后,财务稳健且盈利质量高的公司具有长期风险收益优势。因子对 10 日 close-to-close 收益呈现正向预测能力。", - "expression_file": "artifacts/factorzoo/stock_1d/expressions/roe_yoy_low_asset_equity_quality.dsl", - "ingest_config": { - "train_start": "2018-01-01", - "ingest_start": "2018-01-01", - "ingest_end": "2023-12-31", - "label_col": "label_10d_close_to_close" - }, - "ingested_at": "2026-07-03T04:29:14.775143+00:00", - "ingest_metrics": { - "coverage": 0.9533515577331216, - "ic": 0.016170167574704828, - "icir": 0.25245061365309496, - "rank_ic": 0.02176258712460612, - "n_days": 1457, - "cs_pearson_autocorr": 0.9914477818506634, - "decile_mean_label": [ - { - "decile": 1, - "mean_label": 0.000306 - }, - { - "decile": 2, - "mean_label": 0.001957 - }, - { - "decile": 3, - "mean_label": 0.00207 - }, - { - "decile": 4, - "mean_label": 0.001893 - }, - { - "decile": 5, - "mean_label": 0.002523 - }, - { - "decile": 6, - "mean_label": 0.003115 - }, - { - "decile": 7, - "mean_label": 0.00348 - }, - { - "decile": 8, - "mean_label": 0.003186 - }, - { - "decile": 9, - "mean_label": 0.005094 - }, - { - "decile": 10, - "mean_label": 0.004286 - } - ], - "mls_fmb": { - "mean_rho": 0.16769617936398995, - "mean_ls": 0.0036728822742138232, - "ir_ls": 0.1922524695543718, - "ir_ls_annual": 3.051913340273307, - "mls": 0.5117942069138263, - "nw_t_rho": 4.579884122743361, - "nw_t_ls": 2.9547326542828127, - "nw_se_rho": 0.03661581273011326, - "nw_se_ls": 0.0012430506255413905, - "n_days_rho": 1457, - "n_days_ls": 1457, - "nw_lags": 7, - "n_deciles": 10, - "n_deciles_requested": 10, - "min_stocks": 30, - "min_stocks_requested": 30, - "annualization_factor": 252.0, - "note": "MLS+FMB 非参数版:ρ_t=逐日十分组 Spearman 单调性,LS_t=Q10-Q1 多空;IR_LS 为日频 LS 均值/标准差,ir_ls_annual=IR_LS×√252;MLS=mean(ρ)×ir_ls_annual。" - }, - "label_col": "label_10d_close_to_close", - "skew": -0.13423616763432822, - "kurt": -0.5483165218763326, - "eval_start": "2018-01-01", - "eval_end": "2023-12-31", - "finite_ratio": 0.7687426008974411 - }, - "ingest_status": "stored", - "similarity": { - "col_idx": 11, - "n_factors": 12, - "kind": "cross_sectional_pearson_mean", - "max_abs_corr": 0.13036764777550702, - "top_neighbors": [ - { - "factor_id": "vpin_amount_60d_2m_min2", - "name": "vpin_amount_60d_2m_min2", - "cs_corr": -0.13036764777550702, - "expr": "vpin = VOLUME_CLOCK_VPIN($adj_close, $amount, 60, 2000000, 'tick', 2, 20)\nCS_ZSCORE(vpin)" - }, - { - "factor_id": "amihud_log_30d_winsor", - "name": "amihud_log_30d_winsor", - "cs_corr": 0.1175341812859241, - "expr": "amihud = DIVIDE(ABS($ret), ADD($amount, 1e-12))\namihud_win = CS_WINSORIZE(amihud, 0.01, 0.99)\namihud_log = LOG(ADD(amihud_win, 1e-12))\namihud_smooth = TS_MEAN(amihud_log, 30)\nCS_ZSCORE(amihud_smooth)" - }, - { - "factor_id": "roe_lowvol_center_smooth", - "name": "roe_lowvol_center_smooth", - "cs_corr": 0.11072949798069724, - "expr": "roe_r = RANK(CS_WINSORIZE($funda_roe, 0.01, 0.99))\nvol = TS_STD($ret, 20)\nvol_r = RANK(DIVIDE(1, ADD(vol, 0.001)))\nroe_center = MULTIPLY(roe_r, SUBTRACT(1, roe_r))\nvol_center = MULTIPLY(vol_r, SUBTRACT(1, vol_r))\nscore = ADD(roe_center, vol_center)\nCS_NEUTRALIZE(CS_WINSORIZE(score, 0.01, 0.99), CS_BUCKET(LOG($float_cap), 10))" - } - ] - }, - "source": "submit" - }, - "bps_leverage_to_price_ind_neutral": { - "name": "bps_leverage_to_price_ind_neutral", - "comment": "杠杆账面价值比价格因子:BPS × 资产负债率 / 收盘价,然后行业中性化。核心逻辑:高账面价值+高杠杆+低股价的公司具有深度价值特征,杠杆放大了B/P的价值信号。正向IC(高因子值→高未来收益),train IC=0.0065,val IC=0.0197,跨周期同号稳定。MLS-FMB优异(val nw_t_rho=5.52, nw_t_ls=5.42)。适合10日持有期的基本面价值策略。", - "expression_file": "artifacts/factorzoo/stock_1d/expressions/bps_leverage_to_price_ind_neutral.dsl", - "ingest_config": { - "train_start": "2018-01-01", - "ingest_start": "2018-01-01", - "ingest_end": "2023-12-31", - "label_col": "label_10d_close_to_close" - }, - "ingested_at": "2026-07-03T04:49:49.188761+00:00", - "ingest_metrics": { - "coverage": 0.9536731128714433, - "ic": 0.013074003014404326, - "icir": 0.3039648178984557, - "rank_ic": 0.019123944070519046, - "n_days": 1457, - "cs_pearson_autocorr": 0.9993658281319892, - "decile_mean_label": [ - { - "decile": 1, - "mean_label": 0.00014 - }, - { - "decile": 2, - "mean_label": 0.001029 - }, - { - "decile": 3, - "mean_label": 0.001963 - }, - { - "decile": 4, - "mean_label": 0.002278 - }, - { - "decile": 5, - "mean_label": 0.002785 - }, - { - "decile": 6, - "mean_label": 0.003311 - }, - { - "decile": 7, - "mean_label": 0.003788 - }, - { - "decile": 8, - "mean_label": 0.003717 - }, - { - "decile": 9, - "mean_label": 0.004732 - }, - { - "decile": 10, - "mean_label": 0.004252 - } - ], - "mls_fmb": { - "mean_rho": 0.14258854849108796, - "mean_ls": 0.003928594223889572, - "ir_ls": 0.39949444315244076, - "ir_ls_annual": 6.341777680401533, - "mls": 0.9042648743016333, - "nw_t_rho": 6.73988168490874, - "nw_t_ls": 6.276363864637335, - "nw_se_rho": 0.021155942367706213, - "nw_se_ls": 0.0006259347463942129, - "n_days_rho": 1457, - "n_days_ls": 1457, - "nw_lags": 7, - "n_deciles": 10, - "n_deciles_requested": 10, - "min_stocks": 30, - "min_stocks_requested": 30, - "annualization_factor": 252.0, - "note": "MLS+FMB 非参数版:ρ_t=逐日十分组 Spearman 单调性,LS_t=Q10-Q1 多空;IR_LS 为日频 LS 均值/标准差,ir_ls_annual=IR_LS×√252;MLS=mean(ρ)×ir_ls_annual。" - }, - "label_col": "label_10d_close_to_close", - "skew": 1.8382125027225293, - "kurt": 4.130628981307595, - "eval_start": "2018-01-01", - "eval_end": "2023-12-31", - "finite_ratio": 0.7689313128807491 - }, - "ingest_status": "stored", - "similarity": { - "col_idx": 12, - "n_factors": 13, - "kind": "cross_sectional_pearson_mean", - "max_abs_corr": 0.15670959413703273, - "top_neighbors": [ - { - "factor_id": "roe_yoy_low_asset_equity_quality", - "name": "roe_yoy_low_asset_equity_quality", - "cs_corr": -0.15670959413703273, - "expr": "lev = DIVIDE($funda_fs_total_assets, $funda_fs_total_equity)\nlev_win = CS_WINSORIZE(lev, 0.01, 0.99)\nlev_z = CS_ZSCORE(lev_win)\nlev_ind = CS_NEUTRALIZE(lev_z, $industry_sw_l1)\nlev_size = CS_NEUTRALIZE(lev_ind, CS_BUCKET(LOG($float_cap), 10))\nroe_win = CS_WINSORIZE($funda_roe, 0.01, 0.99)\nroe_z = CS_ZSCORE(roe_win)\nroe_ind = CS_NEUTRALIZE(roe_z, $industry_sw_l1)\nroe_size = CS_NEUTRALIZE(roe_ind, CS_BUCKET(LOG($float_cap), 10))\nyoy_win = CS_WINSORIZE($funda_netprofit_yoy, 0.01, 0.99)\nyoy_z = CS_ZSCORE(yoy_win)\nyoy_ind = CS_NEUTRALIZE(yoy_z, $industry_sw_l1)\nyoy_size = CS_NEUTRALIZE(yoy_ind, CS_BUCKET(LOG($float_cap), 10))\nquality = RANK(roe_size) + RANK(yoy_size) - RANK(lev_size)\nquality" - }, - { - "factor_id": "hl_div_amt_smooth20", - "name": "hl_div_amt_smooth20", - "cs_corr": 0.12936790974601187, - "expr": "avg_hl = TS_MEAN(SUBTRACT($adj_high, $adj_low), 20)\navg_close = TS_MEAN($adj_close, 20)\navg_hl_pct = DIVIDE(avg_hl, avg_close)\navg_amt = TS_MEAN($amount, 20)\nraw = DIVIDE(avg_hl_pct, ADD(avg_amt, 1))\nLOG(ADD(raw, 1e-12))" - }, - { - "factor_id": "vpin_amount_60d_2m_min2", - "name": "vpin_amount_60d_2m_min2", - "cs_corr": -0.11474117953100038, - "expr": "vpin = VOLUME_CLOCK_VPIN($adj_close, $amount, 60, 2000000, 'tick', 2, 20)\nCS_ZSCORE(vpin)" - } - ] - }, - "source": "submit" - } -} diff --git a/artifacts/factorzoo/stock_1d/mls_fmb_percentiles.json b/artifacts/factorzoo/stock_1d/mls_fmb_percentiles.json deleted file mode 100644 index 09257d7b..00000000 --- a/artifacts/factorzoo/stock_1d/mls_fmb_percentiles.json +++ /dev/null @@ -1,420 +0,0 @@ -{ - "n_factors": 20, - "train_range": [ - "2019-01-01", - "2021-12-31" - ], - "val_range": [ - "2022-01-01", - "2024-12-31" - ], - "label_col": "label_1d_open_to_open", - "abs_p25": { - "train": { - "mean_rho": 0.04774595267745952, - "mean_ls": 0.0008468296428362961, - "ir_ls_annual": 1.8990883863716144, - "mls": 0.08877995200883332, - "nw_t_rho": 2.3518814456699113, - "nw_t_ls": 3.279952500816756 - }, - "val": { - "mean_rho": 0.05738375490441605, - "mean_ls": 0.0011464187133623353, - "ir_ls_annual": 2.0918232523847458, - "mls": 0.12485072772692243, - "nw_t_rho": 2.76597243110037, - "nw_t_ls": 3.6794631425072293 - }, - "both": { - "mean_rho": 0.052839889898646825, - "mean_ls": 0.000984086686808388, - "ir_ls_annual": 1.9650063816562007, - "mls": 0.11641178698475951, - "nw_t_rho": 2.6712148087376297, - "nw_t_ls": 3.3720555861971677 - } - }, - "factors": [ - { - "factor_id": "chip_dual_crowd_mr_60d", - "train": { - "mean_rho": 0.11234537152345372, - "mean_ls": 0.0012908516446748523, - "ir_ls_annual": 4.14936446642789, - "mls": 0.46616189256705864, - "nw_t_rho": 6.810662272258692, - "nw_t_ls": 7.399175241956816 - }, - "val": { - "mean_rho": 0.12690541781450873, - "mean_ls": 0.0012728882930405322, - "ir_ls_annual": 4.267069043026693, - "mls": 0.5415141797486583, - "nw_t_rho": 6.908425873303515, - "nw_t_ls": 6.682583670183972 - } - }, - { - "factor_id": "chip_ma120_x_crowd_mr_winsor", - "train": { - "mean_rho": -0.07784142797841428, - "mean_ls": -0.0011683168398560258, - "ir_ls_annual": -3.8218264022079587, - "mls": 0.297496424633473, - "nw_t_rho": -4.4030202798108755, - "nw_t_ls": -6.468417556229962 - }, - "val": { - "mean_rho": -0.1367059019951582, - "mean_ls": -0.0015063792415269314, - "ir_ls_annual": -5.151072512055394, - "mls": 0.704182014002998, - "nw_t_rho": -8.891883930720706, - "nw_t_ls": -8.945772928347038 - } - }, - { - "factor_id": "chip_mass_asym_rank_neut", - "train": { - "mean_rho": 0.10482357824823578, - "mean_ls": 0.0022148034214525903, - "ir_ls_annual": 4.153952867272946, - "mls": 0.43543220342206906, - "nw_t_rho": 4.450608369101177, - "nw_t_ls": 6.954374209158402 - }, - "val": { - "mean_rho": 0.1370231237999833, - "mean_ls": 0.0017972257712276284, - "ir_ls_annual": 3.087315724134455, - "mls": 0.4230336446777105, - "nw_t_rho": 5.822889962599371, - "nw_t_ls": 5.130875084440782 - } - }, - { - "factor_id": "chip_peak_eff_rank", - "train": { - "mean_rho": -0.09949356579493565, - "mean_ls": -0.0009980742123204208, - "ir_ls_annual": -2.531270233706081, - "mls": 0.2518451015419981, - "nw_t_rho": -5.063492746505141, - "nw_t_ls": -4.874261845131654 - }, - "val": { - "mean_rho": -0.06815260038400534, - "mean_ls": -0.001097620133927494, - "ir_ls_annual": -1.7368945445119817, - "mls": 0.11837387980128405, - "nw_t_rho": -2.825379087523432, - "nw_t_ls": -3.1613558355406033 - } - }, - { - "factor_id": "chip_tsz_winsor_retvol", - "train": { - "mean_rho": 0.07822332918223328, - "mean_ls": 0.0009421241102722897, - "ir_ls_annual": 2.2059303959912158, - "mls": 0.17255521951871508, - "nw_t_rho": 3.7740793640779566, - "nw_t_ls": 4.17221074927605 - }, - "val": { - "mean_rho": 0.046698388847149185, - "mean_ls": 0.0011034837187301008, - "ir_ls_annual": 2.3667948994333057, - "mls": 0.11052550853518586, - "nw_t_rho": 2.1573939258333352, - "nw_t_ls": 3.7401288326426427 - } - }, - { - "factor_id": "crowd_eff_fluency_vol18", - "train": { - "mean_rho": -0.050294728102947286, - "mean_ls": -0.0010327169460470794, - "ir_ls_annual": -1.8712712117610626, - "mls": 0.09411507680239534, - "nw_t_rho": -2.394128881288319, - "nw_t_ls": -3.382241971146388 - }, - "val": { - "mean_rho": -0.05750062609566741, - "mean_ls": -0.0014147162725483104, - "ir_ls_annual": -2.242758906040025, - "mls": 0.12896004127893557, - "nw_t_rho": -2.853481584274011, - "nw_t_ls": -4.737835501664051 - } - }, - { - "factor_id": "cs_mom60_w_amt_filter", - "train": { - "mean_rho": 0.0265670402656704, - "mean_ls": 0.002210772473573461, - "ir_ls_annual": 2.5482918048974463, - "mls": 0.06770057098938835, - "nw_t_rho": 0.9790386819456726, - "nw_t_ls": 4.810448731102717 - }, - "val": { - "mean_rho": 0.10979213623841724, - "mean_ls": 0.002300715687837468, - "ir_ls_annual": 2.7924920982807757, - "mls": 0.3065936728991466, - "nw_t_rho": 4.450450607653586, - "nw_t_ls": 5.098163045671851 - } - }, - { - "factor_id": "fluency_crowd_eff15", - "train": { - "mean_rho": -0.04443337484433376, - "mean_ls": -0.0008484633063599244, - "ir_ls_annual": -1.6188486157057893, - "mls": 0.07193090735788615, - "nw_t_rho": -2.2251391388146877, - "nw_t_ls": -2.9701786364214766 - }, - "val": { - "mean_rho": -0.05927038984890225, - "mean_ls": -0.0013479366954258349, - "ir_ls_annual": -2.1151458263618537, - "mls": 0.12536551771574556, - "nw_t_rho": -2.710879130195466, - "nw_t_ls": -3.991060772489292 - } - }, - { - "factor_id": "gap_size_amt_neut", - "train": { - "mean_rho": 0.07681195516811955, - "mean_ls": 0.0015502024596403536, - "ir_ls_annual": 3.5059175761853165, - "mls": 0.26929638368506886, - "nw_t_rho": 3.478369045111733, - "nw_t_ls": 6.163298664733297 - }, - "val": { - "mean_rho": 0.10864012021036815, - "mean_ls": 0.0015046285436305137, - "ir_ls_annual": 3.815792565723828, - "mls": 0.41454816303806574, - "nw_t_rho": 5.283390645157991, - "nw_t_ls": 6.654331728669472 - } - }, - { - "factor_id": "hl_div_amt_smooth10", - "train": { - "mean_rho": 0.0821585720215857, - "mean_ls": 0.0013396348192688129, - "ir_ls_annual": 1.9838882495721124, - "mls": 0.162993425635248, - "nw_t_rho": 2.8313358860751334, - "nw_t_ls": 3.020697568567264 - }, - "val": { - "mean_rho": 0.18298689373069538, - "mean_ls": 0.001692986765606134, - "ir_ls_annual": 2.021855530453423, - "mls": 0.36997306308989925, - "nw_t_rho": 6.140540111237702, - "nw_t_ls": 3.2239616241529743 - } - }, - { - "factor_id": "idio_qspread_8w", - "train": { - "mean_rho": -0.07878787878787878, - "mean_ls": -0.002012697008571066, - "ir_ls_annual": -2.526507216554589, - "mls": 0.19905814433460398, - "nw_t_rho": -3.085759844849322, - "nw_t_ls": -4.822079216436656 - }, - "val": { - "mean_rho": -0.10503380916604056, - "mean_ls": -0.002396141103866554, - "ir_ls_annual": -3.377070048628057, - "mls": 0.35470653102795063, - "nw_t_rho": -3.924454009341901, - "nw_t_ls": -6.207376151325046 - } - }, - { - "factor_id": "idio_qspread_win_20", - "train": { - "mean_rho": -0.04860107928601078, - "mean_ls": -0.001712438352587104, - "ir_ls_annual": -2.050345015522653, - "mls": 0.09964898066309347, - "nw_t_rho": -1.9079911411915313, - "nw_t_ls": -3.8845392002038843 - }, - "val": { - "mean_rho": -0.07908840470823943, - "mean_ls": -0.0020458041502714208, - "ir_ls_annual": -2.71476091108723, - "mls": 0.21470610962217565, - "nw_t_rho": -2.956093960798172, - "nw_t_ls": -4.9103399786021855 - } - }, - { - "factor_id": "idio_tail_asym_20", - "train": { - "mean_rho": -0.045180572851805725, - "mean_ls": -0.000576668053727702, - "ir_ls_annual": -1.6107493339416272, - "mls": 0.07277457762814724, - "nw_t_rho": -2.5522218443641225, - "nw_t_ls": -2.657166502153954 - }, - "val": { - "mean_rho": -0.08244427748559979, - "mean_ls": -0.001185261308331864, - "ir_ls_annual": -2.8651770643060583, - "mls": 0.23621745293502489, - "nw_t_rho": -4.56809731423552, - "nw_t_ls": -5.11874459719205 - } - }, - { - "factor_id": "mass_asym120_ac20_zscore", - "train": { - "mean_rho": -0.053150684931506834, - "mean_ls": -0.0011731650085845034, - "ir_ls_annual": -2.3496348913382046, - "mls": 0.12488470381359221, - "nw_t_rho": -2.8994285204751122, - "nw_t_ls": -4.160867874434569 - }, - "val": { - "mean_rho": -0.04524584689873946, - "mean_ls": -0.0013726967508340974, - "ir_ls_annual": -2.725252508510087, - "mls": 0.12330635776045305, - "nw_t_rho": -2.514617477039493, - "nw_t_ls": -4.950674403343847 - } - }, - { - "factor_id": "massasym_z_crowd_meanratio_z", - "train": { - "mean_rho": -0.06899128268991282, - "mean_ls": -0.0006764436430811836, - "ir_ls_annual": -2.099179047446997, - "mls": 0.1448250550791577, - "nw_t_rho": -4.283221982711893, - "nw_t_ls": -3.894234356667728 - }, - "val": { - "mean_rho": -0.06865347691793973, - "mean_ls": -0.0009126184418348923, - "ir_ls_annual": -2.7611541718511283, - "mls": 0.18956283420405443, - "nw_t_rho": -3.8665306604588165, - "nw_t_ls": -5.094923502186385 - } - }, - { - "factor_id": "mi_eff_fluency_20d", - "train": { - "mean_rho": -0.1230552096305521, - "mean_ls": -0.0010744618185923832, - "ir_ls_annual": -2.4059114818944103, - "mls": 0.2960599417570689, - "nw_t_rho": -5.467950807497874, - "nw_t_ls": -4.373049587035879 - }, - "val": { - "mean_rho": -0.10388179313799147, - "mean_ls": -0.0011517042234207595, - "ir_ls_annual": -1.662953362568796, - "mls": 0.17275057720849898, - "nw_t_rho": -4.164592948228115, - "nw_t_ls": -2.7411593715373375 - } - }, - { - "factor_id": "near_extreme_rev_min10", - "train": { - "mean_rho": -0.07058530510585305, - "mean_ls": -0.0007341457855033345, - "ir_ls_annual": -1.908360777908465, - "mls": 0.13470222776071208, - "nw_t_rho": -3.9112120232581833, - "nw_t_ls": -3.3414964313495084 - }, - "val": { - "mean_rho": -0.051907504800066784, - "mean_ls": -0.0011305621831870628, - "ir_ls_annual": -1.8748222974987665, - "mls": 0.09731734740668946, - "nw_t_rho": -2.767416990573626, - "nw_t_ls": -3.49746607210099 - } - }, - { - "factor_id": "peak_sharp_voladj", - "train": { - "mean_rho": -0.014113740141137402, - "mean_ls": 0.00035095776900630606, - "ir_ls_annual": 0.784225772903938, - "mls": -0.011068358770748814, - "nw_t_rho": -0.6529243037651761, - "nw_t_ls": 1.5183769818618222 - }, - "val": { - "mean_rho": 0.05703314133066198, - "mean_ls": 0.0005145835429512881, - "ir_ls_annual": 1.1064276177815473, - "mls": 0.06310304269708264, - "nw_t_rho": 2.2229790385707884, - "nw_t_ls": 1.796492541462532 - } - }, - { - "factor_id": "shadow_corr_diff_30", - "train": { - "mean_rho": -0.09849730178497303, - "mean_ls": -0.0009102244802868553, - "ir_ls_annual": -2.5169734014631384, - "mls": 0.24791508870866483, - "nw_t_rho": -4.402301356030357, - "nw_t_ls": -4.047685760211998 - }, - "val": { - "mean_rho": -0.12101177059854747, - "mean_ls": -0.0016510946271231064, - "ir_ls_annual": -2.8114509051871726, - "mls": 0.34021865198758877, - "nw_t_rho": -5.004358078570023, - "nw_t_ls": -4.639095962712524 - } - }, - { - "factor_id": "trend_fluency_crowd_vol_12d", - "train": { - "mean_rho": -0.033607305936073056, - "mean_ls": -0.0008419286522654111, - "ir_ls_annual": -1.6218428139507295, - "mls": 0.05450576762866378, - "nw_t_rho": -1.680924419761737, - "nw_t_ls": -3.095320709218498 - }, - "val": { - "mean_rho": -0.05369396443776609, - "mean_ls": -0.0015028462536098432, - "ir_ls_annual": -2.5279797294758346, - "mls": 0.13573725369386902, - "nw_t_rho": -2.7616387526806023, - "nw_t_ls": -5.330633625944736 - } - } - ] -} \ No newline at end of file diff --git a/configs/data.yaml b/configs/data.yaml deleted file mode 100644 index e0b6b770..00000000 --- a/configs/data.yaml +++ /dev/null @@ -1,12 +0,0 @@ -# Panel 数据配置 -panel: - path: artifacts/panel/panel_1d.parquet - -# 默认股票池:中证1000 成分并集 -universe: - default: zz1000 - -# Tushare 拉数限速(秒/请求) -tushare: - sleep_sec: 0.35 - batch_size: 40 diff --git a/configs/factors/registry.example.json b/configs/factors/registry.example.json deleted file mode 100644 index 429be088..00000000 --- a/configs/factors/registry.example.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "ma_dev": { - "name": "3日均线偏离", - "expression_file": "examples/factors/ma_dev.dsl" - }, - "ret_factor": { - "name": "日收益", - "expression": "$ret" - } -} diff --git a/constraints/3.10.txt b/constraints/3.10.txt new file mode 100644 index 00000000..6e5730ff --- /dev/null +++ b/constraints/3.10.txt @@ -0,0 +1,7 @@ +azure-identity==1.25.3 +dill==0.4.1 +pillow==12.2.0 +psutil==7.2.2 +rich==15.0.0 +scipy==1.15.3 +tqdm==4.67.3 diff --git a/constraints/3.11.txt b/constraints/3.11.txt new file mode 100644 index 00000000..6e5730ff --- /dev/null +++ b/constraints/3.11.txt @@ -0,0 +1,7 @@ +azure-identity==1.25.3 +dill==0.4.1 +pillow==12.2.0 +psutil==7.2.2 +rich==15.0.0 +scipy==1.15.3 +tqdm==4.67.3 diff --git a/docs/Makefile b/docs/Makefile new file mode 100755 index 00000000..ed880990 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/_static/excess_return.png b/docs/_static/excess_return.png new file mode 100644 index 00000000..595d3c21 Binary files /dev/null and b/docs/_static/excess_return.png differ diff --git a/docs/_static/logo.png b/docs/_static/logo.png new file mode 100755 index 00000000..7ea907b1 Binary files /dev/null and b/docs/_static/logo.png differ diff --git a/docs/_static/workflow.png b/docs/_static/workflow.png new file mode 100755 index 00000000..f212c236 Binary files /dev/null and b/docs/_static/workflow.png differ diff --git a/docs/changelog.md b/docs/changelog.md new file mode 100755 index 00000000..6922c2b3 --- /dev/null +++ b/docs/changelog.md @@ -0,0 +1,151 @@ + +## [v0.1.0](https://github.com/RndmVariableQ/AlphaAgent/releases/tag/v0.1.0) - 2025-03-30 + +[Compare with first commit](https://github.com/RndmVariableQ/AlphaAgent/compare/1004756b7c7500b97fcb4e0d6aea486568c0bb9b...v0.1.0) + +### Added + +- Add files via upload ([f9273b5](https://github.com/RndmVariableQ/AlphaAgent/commit/f9273b5217be6caf603040eda3f25a076f1c277a) by shatianming5). +- add alphaagent/log dir ([b593438](https://github.com/RndmVariableQ/AlphaAgent/commit/b5934385f8b2e4fc2001f40003aeca1d3bea856c) by RndmVariableQ). +- add kaggle tpl (#482) ([3ddba41](https://github.com/RndmVariableQ/AlphaAgent/commit/3ddba41bfb58a29845764f791a9c184743c53b49) by XianBW). +- add docs for scen catalog (#458) ([58cf397](https://github.com/RndmVariableQ/AlphaAgent/commit/58cf3978785659f2803022cb9f245418f1392c98) by Haoran Pan). +- add inf evaluator to factor costeer and some minor improvement (#435) ([b82e597](https://github.com/RndmVariableQ/AlphaAgent/commit/b82e5972c7a324561809bb52407d165df17f6179) by Xu Yang). +- add a button to control feature selection (#342) ([eb1e3d3](https://github.com/RndmVariableQ/AlphaAgent/commit/eb1e3d3e7dc9df77a020942f27c28546a79e226e) by WinstonLiyt). +- add relevance check to quant factors (#210) ([6a30909](https://github.com/RndmVariableQ/AlphaAgent/commit/6a30909ff70c99d639a2b4d41bc9c3588679f501) by Xu Yang). +- add git push env (#163) ([193be44](https://github.com/RndmVariableQ/AlphaAgent/commit/193be4448a41b430ab3e4df0ea7d111e346d4673) by Linlang). +- Add some of the badges (#149) ([11db482](https://github.com/RndmVariableQ/AlphaAgent/commit/11db48234f85eeb20fab5640f1305173877c4c99) by Linlang). +- add CELA disclaimer to README.md (#141) ([3335a21](https://github.com/RndmVariableQ/AlphaAgent/commit/3335a2138990566208552a35a4abe30727371ecb) by Xu Yang). +- Added three new keys on hypothesis reasoning (#138) ([c7f24a6](https://github.com/RndmVariableQ/AlphaAgent/commit/c7f24a6bcfb7cc3954636c9db2495331b03e5885) by Xisen Wang). +- add_logger_to_factor_and_fix_a_small_bug_in_factor_runner (#85) ([fb22caf](https://github.com/RndmVariableQ/AlphaAgent/commit/fb22cafb275ce28d33b9785cd8a0254b4e58ec04) by Xu Yang). +- add a comment for GPU support ([e044988](https://github.com/RndmVariableQ/AlphaAgent/commit/e044988fb85d5904b76f435c7f06997aa85947e6) by Xu Yang). +- Add template for reading experiment (#48) ([44e5610](https://github.com/RndmVariableQ/AlphaAgent/commit/44e5610a961ac85418023b2cdcb2761c45d1c7c9) by Linlang). Co-authored-by: Young +- add CI fix tool to app (#10) ([cbd4de8](https://github.com/RndmVariableQ/AlphaAgent/commit/cbd4de8946d0645b29a1f7af3a8d1732aeb7c818) by XianBW). +- add code security check CI and dependbot (#3) ([4e71f4f](https://github.com/RndmVariableQ/AlphaAgent/commit/4e71f4ff81eb2176a11cc882a469534cef92338c) by Xu Yang). Co-authored-by: xuyang1 + +### Fixed + +- fix frontend backtest table ([cbe62c2](https://github.com/RndmVariableQ/AlphaAgent/commit/cbe62c2d0a5edc7eac455ff9b19e1aeea81cde75) by Mak). +- fix bug in model evaluator final feedback ignoring shape feedback (#501) ([ae1742f](https://github.com/RndmVariableQ/AlphaAgent/commit/ae1742f5fb3198c3ccce9eee49ed936b076d997c) by Haoran Pan). +- fix: patching weird azure deployment (#494) ([89c50ae](https://github.com/RndmVariableQ/AlphaAgent/commit/89c50aee2ec8bfd1cb23767ddf7dcdd023daac8b) by you-n-g). +- fix a bug (#470) ([d3a924a](https://github.com/RndmVariableQ/AlphaAgent/commit/d3a924a0b25dfb51252a4f85882fcf6f7e7b4ed7) by Xu Yang). +- fix: unzip kaggle data (#464) ([3a9fc8e](https://github.com/RndmVariableQ/AlphaAgent/commit/3a9fc8e73337d3757267b6f4482499499a1b6792) by Linlang). Co-authored-by: Xu Yang +- fix a bug in kaggle runner cache and kaggle costeer execute template (#467) ([83b3f78](https://github.com/RndmVariableQ/AlphaAgent/commit/83b3f78f59a07c0279a9aeec42e58ba7686e5347) by WinstonLiyt). +- fix: fix a bug in kaggle conf (#459) ([b4ed32b](https://github.com/RndmVariableQ/AlphaAgent/commit/b4ed32b17ef07d8557450063765585a48d5fcd32) by WinstonLiyt). +- fix: templates bug (#456) ([434a868](https://github.com/RndmVariableQ/AlphaAgent/commit/434a8687eeda77e27b4938fb19694c15858ee446) by Haoran Pan). +- fix KGFactorRunner cache system bug (#451) ([adcf599](https://github.com/RndmVariableQ/AlphaAgent/commit/adcf59937f6a9ee3b15947d3f97b09276c1da419) by XianBW). +- fix display bug in webapp (#440) ([c3fa245](https://github.com/RndmVariableQ/AlphaAgent/commit/c3fa245464329ee1889df4a12fd2a8813a32479e) by XianBW). +- fix problem of reading log file tags on Windows (#437) ([24aec11](https://github.com/RndmVariableQ/AlphaAgent/commit/24aec11b38dcafd943aa685b62ddd484c71a16a1) by XianBW). Co-authored-by: Bowen Xian (Shanghai Wicresoft Co Ltd) +- fix: fix cache result logic (#430) ([5e34263](https://github.com/RndmVariableQ/AlphaAgent/commit/5e342637dcc862679fd0642c6ba9ef048c984845) by Xu Yang). +- fix: fix a small bug in cache using module name and function name as unique folder name (#429) ([4f8134a](https://github.com/RndmVariableQ/AlphaAgent/commit/4f8134a697d952f7ac824d7ebeec64bbc4545ab3) by Xu Yang). +- fix: fix command injection (#421) ([52f30a6](https://github.com/RndmVariableQ/AlphaAgent/commit/52f30a6184af1295be15e855a80b84bc424fc75d) by Linlang). +- fix: fix json load error (#386) ([bba55fb](https://github.com/RndmVariableQ/AlphaAgent/commit/bba55fb48fe105f4847c1b9c476eedc80835f523) by Linlang). +- fix: fix a bug in competition metric evaluation (#407) ([94c47d6](https://github.com/RndmVariableQ/AlphaAgent/commit/94c47d6fd5c3e38fc786a83e6d0d05e8d04498f3) by WinstonLiyt). +- fix: refine the ucb algorithm (#406) ([14f7d97](https://github.com/RndmVariableQ/AlphaAgent/commit/14f7d976e03c92d6e727524e0cdad8a03b585016) by WinstonLiyt). +- fix: fix some bugs in rag (#399) ([194215c](https://github.com/RndmVariableQ/AlphaAgent/commit/194215c4559aee5b6ece18d65c95fb30968e2db6) by WinstonLiyt). +- fix bug in feature selection (#398) ([706b7df](https://github.com/RndmVariableQ/AlphaAgent/commit/706b7df0508609b50de38ce9704c168d2d875d8e) by XianBW). +- fix: fix a bug in mini case (#389) ([e75bb57](https://github.com/RndmVariableQ/AlphaAgent/commit/e75bb5746f63933b750406bbd34ee63c5ba76b9f) by WinstonLiyt). +- fix: fix a bug in scenario.py (#388) ([999a1eb](https://github.com/RndmVariableQ/AlphaAgent/commit/999a1eb0eff9088e1b02419db741db4acf8d9ff7) by Haoran Pan). +- fix: debug dsagent (#387) ([8fe9511](https://github.com/RndmVariableQ/AlphaAgent/commit/8fe9511e606ba148c66f384add6ab94857079541) by cyncyw). +- fix: cache (#383) ([f2a6e75](https://github.com/RndmVariableQ/AlphaAgent/commit/f2a6e75b36ca96f7733b9c2a7154ac67bd2d7c6f) by you-n-g). +- fix: rag save file (#385) ([1cb01dd](https://github.com/RndmVariableQ/AlphaAgent/commit/1cb01dd6fe595f2f5fb86487601326611dd1a57a) by cyncyw). +- fix: partial bug in bench (#368) ([af9808f](https://github.com/RndmVariableQ/AlphaAgent/commit/af9808f98736a2df07e121c2f6d7bfeb7b7d3581) by you-n-g). Co-authored-by: Tim +- fix: fix some bugs in knowledge base (#378) ([fa6ff8e](https://github.com/RndmVariableQ/AlphaAgent/commit/fa6ff8e591cf1847df77d73116649c5623161573) by WinstonLiyt). +- fix two template (#376) ([c1d94b5](https://github.com/RndmVariableQ/AlphaAgent/commit/c1d94b54ab428d66db900d4fe5b453c1c36595e5) by WinstonLiyt). +- fix a bug in code costeer (#373) ([c41686f](https://github.com/RndmVariableQ/AlphaAgent/commit/c41686f3c7cdd0f286979559afd7fa48ea324bba) by WinstonLiyt). +- fix: fix some errors in scenario.py, proposal.py and runner.py and several complex competition scenarios(#365) ([2e383b1](https://github.com/RndmVariableQ/AlphaAgent/commit/2e383b175d8448a67cb470f4e3ae8977d8ec6b5b) by WinstonLiyt). +- fix undefined error (#363) ([ff5e269](https://github.com/RndmVariableQ/AlphaAgent/commit/ff5e269354df341507349bbaedbbcaa8fa853326) by Tim). +- fix: fix a typo (#362) ([9fafabd](https://github.com/RndmVariableQ/AlphaAgent/commit/9fafabdf321b818bdd2211a2324d50cd0ebe1c1f) by WinstonLiyt). +- fix: refine some codes (#353) ([866c2e6](https://github.com/RndmVariableQ/AlphaAgent/commit/866c2e63ffa3876a3d16ad37f96da41d0558b714) by WinstonLiyt). +- fix: preprocess output format & some mistake in spelling (#358) ([b8b2cd6](https://github.com/RndmVariableQ/AlphaAgent/commit/b8b2cd6ccd3b27aa73de847e50899a8a53b71b8f) by Haoran Pan). +- fix: Optiver fixes (#357) ([b054017](https://github.com/RndmVariableQ/AlphaAgent/commit/b054017463af0d1784407030f2477d212118f341) by Way2Learn). +- fix: revert model and make SOTA model available to COSTEER (#351) ([3b7437b](https://github.com/RndmVariableQ/AlphaAgent/commit/3b7437b87e685188259779cd85a78a0b592de9de) by Xu Yang). +- fix submission exception (#349) ([c18cc6a](https://github.com/RndmVariableQ/AlphaAgent/commit/c18cc6a5439a5200717411233d5bf8633af875bd) by XianBW). +- fix: Update prompts.yaml to constrain only one model type (#341) ([5b5dfee](https://github.com/RndmVariableQ/AlphaAgent/commit/5b5dfeefbc7eb9dcbd9923544005c5d281262c03) by Way2Learn). Co-authored-by: WinstonLiyt <104308117+WinstonLiyt@users.noreply.github.com>, Co-authored-by: WinstonLiye <1957922024@qq.com> +- fix: a bug of developer& edit s4e8 template (#338) ([f12ce72](https://github.com/RndmVariableQ/AlphaAgent/commit/f12ce726e7de96d478a232a3c27f92439820f8b4) by Haoran Pan). Co-authored-by: WinstonLiyt <104308117+WinstonLiyt@users.noreply.github.com> +- fix a bug in model tuning feedback (#339) ([c86afad](https://github.com/RndmVariableQ/AlphaAgent/commit/c86afadec5e420498c02d24cea74ab0db5e3d730) by WinstonLiyt). +- fix a bug in the definition of source data shape (#335) ([d902220](https://github.com/RndmVariableQ/AlphaAgent/commit/d90222038744e4240ea2acced8175273832bd7da) by WinstonLiyt). +- fix: stop using markup in docker env print (#336) ([3009889](https://github.com/RndmVariableQ/AlphaAgent/commit/3009889b5e2605b5427c76f3084e0e58026bb5ae) by Xu Yang). +- fix: template for kaggle foreset & s4e9 (#334) ([2393a41](https://github.com/RndmVariableQ/AlphaAgent/commit/2393a41e7237615ced2c3fdd5c49308236b9f276) by Haoran Pan). * forest-cover-type-prediction: cross validation +- fix a bug in feature selection prompt (#333) ([4cdbf0e](https://github.com/RndmVariableQ/AlphaAgent/commit/4cdbf0efe362e71d344e71ee0648fff0e50f5fcc) by WinstonLiyt). +- fix: fix a bug in the format of the model input (#327) ([8f0574e](https://github.com/RndmVariableQ/AlphaAgent/commit/8f0574eaaadb245b8c38e09ad4821306996d926f) by WinstonLiyt). +- fix sf-crime nn model (#318) ([bed4842](https://github.com/RndmVariableQ/AlphaAgent/commit/bed4842338cfaab98793e1ef9266101e2333b6d5) by Haoran Pan). +- fix: fix a bug in model tuning feedback (#316) ([8aa088d](https://github.com/RndmVariableQ/AlphaAgent/commit/8aa088da2dc7525a3970c01d01987246f47d6238) by WinstonLiyt). +- fix: raise error in demo when no Metric in a Loop (#313) ([e46a78e](https://github.com/RndmVariableQ/AlphaAgent/commit/e46a78eb69271cb19978aab2f3b976c2870ca082) by XianBW). +- fix: Use fixed file name in model costeer & fixing cache (#311) ([1f910a5](https://github.com/RndmVariableQ/AlphaAgent/commit/1f910a5248bc576895ed66c2f7b2c3e046a2bc28) by Way2Learn). +- fix: bug of saving preprocess cache files (#310) ([5fb0608](https://github.com/RndmVariableQ/AlphaAgent/commit/5fb0608f39f113cc9807fb1f381284a0bd4da318) by XianBW). +- fix a small bug in model runner which might cause error when model is the first try (#309) ([dab2cff](https://github.com/RndmVariableQ/AlphaAgent/commit/dab2cfffbaf2be18994a5d509cc1cf73561f8376) by Xu Yang). +- fix: change css tag of kaggle competition info crawler (#306) ([1e3d38b](https://github.com/RndmVariableQ/AlphaAgent/commit/1e3d38bf1ca3654f3a90ff392ecba1dbb4e80224) by XianBW). +- fix: update code to fix a small bug in model cache md5 hash (#303) ([b00e4dc](https://github.com/RndmVariableQ/AlphaAgent/commit/b00e4dc2eff5b16029a2a12a6589eadac5cfd148) by Xu Yang). +- fix: refactor Bench (#302) ([78a87f6](https://github.com/RndmVariableQ/AlphaAgent/commit/78a87f624780ff67c0fa995ae4692678a120f99c) by you-n-g). +- fix: kaggle data mount problem (#297) ([795df31](https://github.com/RndmVariableQ/AlphaAgent/commit/795df311e3f93cd2f3fb51ba5698adaf10f6bd62) by XianBW). +- fix: test kaggle method (#296) ([91a6196](https://github.com/RndmVariableQ/AlphaAgent/commit/91a619618be1d7db660ea2b413a78dfaba9417a1) by XianBW). +- fix: fix some bugs in feedback.py and refine the prompt (#292) ([d834052](https://github.com/RndmVariableQ/AlphaAgent/commit/d8340527f133dcc649d599d90d6402eddd37859e) by WinstonLiyt). +- fix: refine the prompt (#286) ([77966c4](https://github.com/RndmVariableQ/AlphaAgent/commit/77966c4f5e9f492c437c5b4b78d89c0f875ef0d8) by WinstonLiyt). +- fix ensemble bug in train.py (#283) ([da3779e](https://github.com/RndmVariableQ/AlphaAgent/commit/da3779ecabeeb09e06c329461afa7a08c1a347e5) by Haoran Pan). +- fix: Update runner.py to fix a small bug (#282) ([8aef3ab](https://github.com/RndmVariableQ/AlphaAgent/commit/8aef3abcecd6002bd4bfeedcbe2c786d8bbfe2be) by Xu Yang). +- fix: improve_execution_time_in_kaggle_loop (#279) ([4c8f998](https://github.com/RndmVariableQ/AlphaAgent/commit/4c8f998c76f1e983a5687d2c65d3251750f2a9a0) by Xu Yang). +- fix: support seed and fix absolute path (#278) ([26352e1](https://github.com/RndmVariableQ/AlphaAgent/commit/26352e13121cad5be95c0de78bb9f5dda4330614) by you-n-g). +- fix: fix some bugs in the entire loop (#274) ([8a564ec](https://github.com/RndmVariableQ/AlphaAgent/commit/8a564ece1d87b27ee98b76db317935e802468965) by WinstonLiyt). +- fix a small bug in kaggle scenario caused by the second RAG update (#273) ([140fdcc](https://github.com/RndmVariableQ/AlphaAgent/commit/140fdccba37640fb78331774c056fe567d4afda1) by Xu Yang). +- fix: update new feature engineering code format (#272) ([7850b80](https://github.com/RndmVariableQ/AlphaAgent/commit/7850b8006a7c89d22629b345b4f361b0f35bc60d) by Xu Yang). +- fix: actively raised errors aer also considered as negative feedback. (#268) ([46ec908](https://github.com/RndmVariableQ/AlphaAgent/commit/46ec908e3594ac5e4cdc4057268e2f8800f5ed1f) by you-n-g). +- fix: eval_method cannot catch run factor error (#260) ([2aaab31](https://github.com/RndmVariableQ/AlphaAgent/commit/2aaab317ccb7a0121063bcd85fc36c21c7b8a391) by Tim). +- fix rdagent factor bug when FactorTask init (#261) ([4523b93](https://github.com/RndmVariableQ/AlphaAgent/commit/4523b93d9f16e109a10a163a544ba47433167f5c) by XianBW). +- fix: fix_dotenv_error (#257) ([923063c](https://github.com/RndmVariableQ/AlphaAgent/commit/923063c1fd957c4ed42e97272c72b5e9545451dc) by Linlang). Co-authored-by: you-n-g +- fix: default model value in config (#256) ([c097585](https://github.com/RndmVariableQ/AlphaAgent/commit/c097585f631f401c2c0966f6ad4c17286924f011) by you-n-g). +- fix: readme (#248) ([8cede22](https://github.com/RndmVariableQ/AlphaAgent/commit/8cede2209922876490148459e1134da828e1fda0) by Suhan Cui). +- fix: package dependency. (#234) ([46be295](https://github.com/RndmVariableQ/AlphaAgent/commit/46be2952952af534fd8d98a656c704c688d7cbdd) by you-n-g). +- fix: remove useless line (#177) ([64e9a8e](https://github.com/RndmVariableQ/AlphaAgent/commit/64e9a8e39a2072a962111db18f5b9565df5b0176) by Tim). +- fix: fix some bugs in llm calling (#217) ([7b010f8](https://github.com/RndmVariableQ/AlphaAgent/commit/7b010f8b5940aba65a58f1d78192aa80bcd0e654) by WinstonLiyt). +- fix build docs error (#204) ([77226d1](https://github.com/RndmVariableQ/AlphaAgent/commit/77226d1934b36246e1262e3b2edf0dbbdecdc23b) by Linlang). +- fix_docs_warning (#195) ([4a93329](https://github.com/RndmVariableQ/AlphaAgent/commit/4a9332961c18aa62d2de28300ce60f2f25dbae57) by Linlang). +- fix: update command line in readme.md (#192) ([9c45d24](https://github.com/RndmVariableQ/AlphaAgent/commit/9c45d24a192da02f7d9765cb001097da1bc36c61) by Xu Yang). +- fix: fix quick start problem (#191) ([44f61bf](https://github.com/RndmVariableQ/AlphaAgent/commit/44f61bfa1058a8efb59ca48b7f1417765aeea33e) by Xu Yang). +- fix: Fix a fail href in readme (#189) ([1b89218](https://github.com/RndmVariableQ/AlphaAgent/commit/1b89218f6bc697494f4a1b8a42ad18963002714f) by Xu Yang). +- fix: fix release CI (#165) ([85d6a5e](https://github.com/RndmVariableQ/AlphaAgent/commit/85d6a5ed91113fda34ae079b23c89aa24acd2cb2) by Linlang). Co-authored-by: you-n-g +- fix: Add framework handling for task coding failure. (#176) ([5e14fa5](https://github.com/RndmVariableQ/AlphaAgent/commit/5e14fa54a9dd30a94aebe2643b8c9a3b85517a11) by WinstonLiyt). +- fix: Fixed some bugs introduced during refactoring. (#167) ([f8f1445](https://github.com/RndmVariableQ/AlphaAgent/commit/f8f1445283fb89aefeb2918243c35a219a51a56c) by WinstonLiyt). Co-authored-by: Xu Yang +- fix: first round app folder cleaning (#166) ([6a5a750](https://github.com/RndmVariableQ/AlphaAgent/commit/6a5a75021912927deb5e8e4c7ad3ec4b51bfc788) by Xu Yang). +- fix: fix release CI error (#160) ([1c9f8ef](https://github.com/RndmVariableQ/AlphaAgent/commit/1c9f8ef287961731944acc9008496b4dddeddca7) by Linlang). +- fix: fix several bugs in data mining scenario (#147) ([b233380](https://github.com/RndmVariableQ/AlphaAgent/commit/b233380e2c66fb030db39424f0f040c86e37f5c4) by Suhan Cui). +- fix: optimize some prompts in factor loop. (#158) ([c2c1330](https://github.com/RndmVariableQ/AlphaAgent/commit/c2c13300b9ad315a663ec2d0eada414e56c6f54f) by WinstonLiyt). +- fix: fix some small bugs in report-factor loop (#152) ([a79f9f9](https://github.com/RndmVariableQ/AlphaAgent/commit/a79f9f93406aff6305a76e6a6abd3852642e4c62) by WinstonLiyt). Co-authored-by: Young , Co-authored-by: you-n-g , Co-authored-by: Taozhi Wang , Co-authored-by: Suhan Cui <51844791+SH-Src@users.noreply.github.com> +- fix: fix_release_ci_error (#150) ([4f82e99](https://github.com/RndmVariableQ/AlphaAgent/commit/4f82e9960a2638af9d831581185ddd3bac5711fc) by Linlang). +- fix: Comprehensive update to factor extraction. (#143) ([b5ea040](https://github.com/RndmVariableQ/AlphaAgent/commit/b5ea04019fd5fa15c0f8b9a7e4f18f490f7057d4) by WinstonLiyt). Co-authored-by: Young , Co-authored-by: you-n-g , Co-authored-by: Taozhi Wang , Co-authored-by: Suhan Cui <51844791+SH-Src@users.noreply.github.com> +- Fix a small bug in model value evaluation (#142) ([c14eafb](https://github.com/RndmVariableQ/AlphaAgent/commit/c14eafbc41773ed6f8d34dc1f9fdc0bcb15cec8b) by Xu Yang). +- fix: fix pickle problem (#140) ([7ee4258](https://github.com/RndmVariableQ/AlphaAgent/commit/7ee42587b60d94417f34332cee395cf210dc8a0e) by you-n-g). +- Fix a small bug in the hypothesis prompts. ([859cbbf](https://github.com/RndmVariableQ/AlphaAgent/commit/859cbbfe1433ba529e5c833b96ecc385b09371a3) by WinstonLiyt). +- Fix some minor bugs caused by version changes. (#128) ([fde1042](https://github.com/RndmVariableQ/AlphaAgent/commit/fde10423de5b95da77968ea1e6dc83008288fd1f) by WinstonLiyt). +- Fix loop bug (#122) ([aacab8e](https://github.com/RndmVariableQ/AlphaAgent/commit/aacab8e8dbaacb98776636c606f228a62470d1b8) by you-n-g). +- Fix two small bugs. (#121) ([245cb71](https://github.com/RndmVariableQ/AlphaAgent/commit/245cb710a116b4f5aeed2c4fb102fea9821a42ab) by WinstonLiyt). +- fix record (#113) ([df68078](https://github.com/RndmVariableQ/AlphaAgent/commit/df680789c528a11533291cbb8a1c8b8f73e02327) by you-n-g). +- fix bugs (#111) ([9ad421b](https://github.com/RndmVariableQ/AlphaAgent/commit/9ad421b34192f479ccf7c04bac166c8e3018c0d8) by Suhan Cui). +- fix new demo bugs ([144252a](https://github.com/RndmVariableQ/AlphaAgent/commit/144252a310416b446c5c199d58890da0066777ee) by bowen xian). +- fix mypy error (#91) ([63f019c](https://github.com/RndmVariableQ/AlphaAgent/commit/63f019c27ac933b4d9b9b6f405dc5bfaccc888ba) by Linlang). Co-authored-by: Ubuntu , Co-authored-by: Young , Co-authored-by: you-n-g +- Fix import bug ([e88ec32](https://github.com/RndmVariableQ/AlphaAgent/commit/e88ec32d85126274073585e4fe73298a3c9e648d) by Young). +- Fix ruff error1 (#81) ([1608ec9](https://github.com/RndmVariableQ/AlphaAgent/commit/1608ec9fee2fde5588a76b25fdf2dde5cf8b200c) by Linlang). +- fix_some_errors_when_debug_factor (#84) ([8117656](https://github.com/RndmVariableQ/AlphaAgent/commit/81176569070f4667c0db14ab96fc160a3dca4754) by WinstonLiyt). +- Fix Logger context bug. (#83) ([c47240e](https://github.com/RndmVariableQ/AlphaAgent/commit/c47240ebb1e52829d2ef2ba3e7b330c3ef9b9fc8) by you-n-g). +- Fix two minor bugs related to factor extraction. ([dbc0337](https://github.com/RndmVariableQ/AlphaAgent/commit/dbc0337f206d6b62d534ad53433d5ec45df1234d) by WinstonLiyt). +- fix a small bug in evaluators.py ([d273056](https://github.com/RndmVariableQ/AlphaAgent/commit/d2730568ef74bacae33a345837463e176478cf26) by WinstonLiyt). +- Fix a bug. ([e133916](https://github.com/RndmVariableQ/AlphaAgent/commit/e133916a11852eaffc91188e6cf0e427b4137215) by WinstonLiyt). +- fix a small bug ([bade19a](https://github.com/RndmVariableQ/AlphaAgent/commit/bade19a087dbafe5434828bc957dfef105c4053d) by Xu Yang). +- Fix a bug when proposing model ideas using factor as key words (#63) ([3190d43](https://github.com/RndmVariableQ/AlphaAgent/commit/3190d43812abf478ce1443af8098bc5fbf7e205e) by Xu Yang). +- fix based_experiments bug ([53126d1](https://github.com/RndmVariableQ/AlphaAgent/commit/53126d1d7adbdd8068a9b51ed03b68dd7511e330) by WinstonLiyt). +- fix pytest localenv error (#62) ([095cb99](https://github.com/RndmVariableQ/AlphaAgent/commit/095cb99d9ac6e96b28fa57da7892c721501eeb55) by Linlang). +- Fix missing image path in Readme (#37) ([f52ebc3](https://github.com/RndmVariableQ/AlphaAgent/commit/f52ebc326f49a80ed0d999854c60b818752ecfdf) by Xinjie Shen). +- Fix model bug and push (#35) ([da05927](https://github.com/RndmVariableQ/AlphaAgent/commit/da05927d630f1f03b505bdf05888d2e9582a53d2) by you-n-g). + +### Changed + +- change docker logs print (#326) ([87c23f4](https://github.com/RndmVariableQ/AlphaAgent/commit/87c23f494add012c835ddde0de703f06734ddcc0) by XianBW). +- change the mechanism of traceback and action choosing (#320) ([9456761](https://github.com/RndmVariableQ/AlphaAgent/commit/9456761c8d7c857def3ae5187d779e8f87f6ac41) by WinstonLiyt). + +### Removed + +- remove irrelevant files ([0bc7a34](https://github.com/RndmVariableQ/AlphaAgent/commit/0bc7a34ed9701a0149ae990b6484e7c73b347ea0) by RndmVariableQ). +- remove_package (#447) ([162d191](https://github.com/RndmVariableQ/AlphaAgent/commit/162d191f695afa6243da345b2286f6f18c2a07c5) by Linlang). +- remove temp debug prompt (#352) ([13c116d](https://github.com/RndmVariableQ/AlphaAgent/commit/13c116d0785ad947f9438e5721deee679b0cda60) by Xu Yang). +- Remove an absolute path. ([38ec1ea](https://github.com/RndmVariableQ/AlphaAgent/commit/38ec1ea0c0f877b928ca5e1a9dcee163fd9d2b60) by WinstonLiyt). +- remove useless print command ([4069353](https://github.com/RndmVariableQ/AlphaAgent/commit/4069353338fdc5b9a2a37adc2345f1aadc417709) by Xu Yang). +- remove the new test file ([26c0515](https://github.com/RndmVariableQ/AlphaAgent/commit/26c0515196a17c30090b270c04af5d7c2c42b1da) by Xu Yang). diff --git a/docs/conf.py b/docs/conf.py new file mode 100755 index 00000000..a1fea424 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,72 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +import subprocess + +latest_tag = subprocess.check_output(["git", "describe", "--tags", "--abbrev=0"], text=True).strip() + +project = "RDAgent" +copyright = "2024, Microsoft" +author = "Microsoft" + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = ["sphinx.ext.autodoc", "sphinxcontrib.autodoc_pydantic"] + +autodoc_member_order = "bysource" + +# The suffix of source filenames. +source_suffix = {".rst": "restructuredtext"} + +# The encoding of source files. +source_encoding = "utf-8" + +# The main toctree document. +master_doc = "index" + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = latest_tag +release = latest_tag + +# The language for content autogenerated by Sphinx. Refer to documentation for +# a list of supported languages. +language = "en" + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ["build"] + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = "sphinx" + +try: + import furo + + html_theme = "furo" + html_theme_options = { + "navigation_with_keys": True, + } +except ImportError: + html_theme = "default" + +html_logo = "_static/logo.png" +html_static_path = ["_static"] +html_favicon = "_static/favicon.ico" + +html_theme_options = { + "source_repository": "https://github.com/microsoft/RD-Agent", + "source_branch": "main", + "source_directory": "docs/", +} diff --git a/docs/data_release.md b/docs/data_release.md deleted file mode 100644 index 5cc2af52..00000000 --- a/docs/data_release.md +++ /dev/null @@ -1,108 +0,0 @@ -# Data pipeline & open-data release - -> Two-stage design: **fetch (online) → build panel (offline)**. Panel construction -> never touches Tushare. Market data is fetched into a raw hq cache, then the panel -> is built/updated offline from that cache. This mirrors the fundamentals flow. - -## Architecture - -``` -Stage 1 — fetch (online) Raw parquet caches Stage 2 — build panel (offline) -scripts/fetch_market.py -> artifacts/market/daily_hq.parquet ┐ - artifacts/index/_members... │ -scripts/fetch_fundamentals.py -> artifacts/fundamental/quarterly... │-> scripts/build_panel.py - artifacts/fundamental/disclosure... │ -> artifacts/panel/panel_1d.parquet -build_panel --with-industry -> artifacts/industry/sw_l1_membership ┘ - -Incremental: scripts/update_panel.py - = update_market_cache (append hq) + update_panel_from_hq (tail-merge + re-derive) -``` - -- `seekalpha/data/market_fetch.py` — all Tushare fetching + hq cache IO + `fetch_and_save_market` (full) / `update_market_cache` (incremental). -- `seekalpha/data/index_members.py` — index constituents cached as monthly snapshots under `artifacts/index/`. Used only at fetch time to decide which stocks to pull; reused on same-range re-fetch (`--refresh-members` to force re-pull). The offline panel build does not depend on it. -- `seekalpha/data/panel.py` — offline only: `build_panel` (from hq cache) and `update_panel_from_hq` (incremental). No Tushare import at call time. - -## Full build (with token) - -```bash -# 1. fetch raw market hq into artifacts/market/daily_hq.parquet -uv run python scripts/fetch_market.py --start 2015-01-01 --end 2026-06-30 --universe zz1000 -# 2. fetch quarterly fundamentals + disclosure calendar -uv run python scripts/fetch_fundamentals.py --start 2015-01-01 --end 2026-12-31 --with-statements -# 3. build the panel offline (also caches SW L1 industry on first run) -uv run python scripts/build_panel.py --with-fundamentals --with-industry -``` - -## Incremental update - -```bash -uv run python scripts/update_panel.py --universe zz1000 --with-fundamentals --with-industry -``` - -Step 1 appends new trade dates to `daily_hq.parquet`; step 2 tail-merges the new -rows into the panel and re-derives `ret`/`label_*` from the trading day before the gap. - -## Packaging for open-data distribution - -```bash -uv run python scripts/pack_data_release.py --zip -``` - -Produces `dist/alphaagent-data-/` (mirroring `artifacts/`) with the four raw -caches, plus `MANIFEST.json` (sha256) and a bilingual `README.md`. Upload the folder -or zip to your file host (Baidu Cloud, etc.). The factor library (`artifacts/factorzoo`) -is not packaged; rebuild it via `scripts/ingest_factors.py`. - -## Restore (consumer side, offline) - -1. Extract the package into the repo root so `artifacts/market|fundamental|industry` are populated. -2. `uv run python scripts/build_panel.py --with-fundamentals --with-industry` -3. Optionally verify file sha256 against `MANIFEST.json`. - ---- - -# 数据管线与开源数据发布(中文) - -> 两段式:**拉取(联网)→ 建 panel(离线)**。panel 构建全程不联网——行情先拉进 -> hq 缓存,再从缓存离线构建/更新 panel,与基本面流程对称。 - -## 架构 - -- `seekalpha/data/market_fetch.py`:所有 Tushare 抓取 + hq 缓存读写 + `fetch_and_save_market`(全量)/ `update_market_cache`(增量)。 -- `seekalpha/data/panel.py`:纯离线,`build_panel`(读 hq 缓存)与 `update_panel_from_hq`(增量)。 - -## 全量构建(需 token) - -```bash -# 1. 拉行情 → artifacts/market/daily_hq.parquet -uv run python scripts/fetch_market.py --start 2015-01-01 --end 2026-06-30 --universe zz1000 -# 2. 拉季频基本面 + 披露日历 -uv run python scripts/fetch_fundamentals.py --start 2015-01-01 --end 2026-12-31 --with-statements -# 3. 离线建 panel(首次会顺带缓存申万一级行业) -uv run python scripts/build_panel.py --with-fundamentals --with-industry -``` - -## 增量更新 - -```bash -uv run python scripts/update_panel.py --universe zz1000 --with-fundamentals --with-industry -``` - -步骤 1 把新交易日追加进 `daily_hq.parquet`;步骤 2 把新增行 merge 进 panel,并从缺口 -前一交易日起重算 `ret`/`label_*`。 - -## 打包发布 - -```bash -uv run python scripts/pack_data_release.py --zip -``` - -生成 `dist/alphaagent-data-<日期>/`(对齐 `artifacts/` 布局),含四个原始缓存 + -`MANIFEST.json`(sha256)+ 双语 README。把目录或 zip 上传到网盘(百度云等)即可。 -因子库(`artifacts/factorzoo`)不打包,用 `scripts/ingest_factors.py` 重建。 - -## 重构(使用方,离线) - -1. 解压到仓库根,使 `artifacts/market|fundamental|industry` 就位。 -2. `uv run python scripts/build_panel.py --with-fundamentals --with-industry` -3. (可选)用 `MANIFEST.json` 的 sha256 校验。 diff --git a/docs/dev_log.md b/docs/dev_log.md deleted file mode 100644 index 6915461d..00000000 --- a/docs/dev_log.md +++ /dev/null @@ -1,84 +0,0 @@ -# AlphaAgent 开发记录 - -> 截至 2026-07-02。统一 monorepo `seekalpha`:Tushare 数据源 + AlphaAgent DSL + FactorZoo,目标覆盖因子研究 → 模型 → 回测 → 实盘。 - ---- - -## 已完成 - -### 基础框架(Phase 1) - -- `seekalpha` 包骨架:`core` / `data` / `dsl` -- DSL 从 AlphaAgent-Stock 迁移,表达式求值可用 -- Panel 全量构建 + 增量更新(parquet,`--update` 自动补 gap、回填 ret/label) -- Tushare 客户端(`.env` token、重试/超时) -- 分层单测:`test_core` / `test_data` / `test_dsl` - -### 数据层 - -- ZZ1000 成分股:`index_weight` 按月并集(~2696 只,非仅当前 1000 只) -- ST:`stock_st` 日度 `is_st`;`float_cap`:按 `trade_date` 拉 `daily_basic` -- 复权:`adj_* = OHLC × adjfactor`,新增 `adj_vwap`;建议 `--batch-size 20` 避免 6000 行截断 -- 当前 Panel:~618 万行 × 20 列,2015-01 ~ 2026-06,与 `pro_bar(hfq)` 在 2 位小数内一致 - -### 季频基本面(PIT) - -- `fetch_fundamentals.py`:`fina_indicator` / `fina_indicator_vip` 拉全市场季频 → `artifacts/fundamental/` -- `fundamental.py`:披露日 T+1 生效 PIT 展开(port AlphaAgent)、披露距离特征 -- `build_panel.py`:`--with-fundamentals` / `--enrich-only` 并入 panel -- 当前字段:`funda_roe`、`funda_netprofit_yoy`、`funda_fs_ebit` 等 17 指标 + 2 披露日历列 -- 挖掘 prompt 已文档化 `$funda_*` 用法;单测 `test_fundamental_pit` / `test_fundamental_fetch` - -### 因子研究(Phase 2) - -- 评估/入库从 AlphaAgent 迁移:`eval`、`metrics`、`report`、FactorZoo、`ingest` -- CLI:`eval_factor.py`(IC 报告)、`ingest_factors.py`(`--expr-file` / registry)、`init_factorlib.py`、`realign_factorlib.py` -- 默认 label:`label_1d_open_to_open` -- 因子库:memmap 存储 + 截面查重;panel 变更后 `realign` 重算已有因子 -- T+N 窗口增量更新(T=240 天 + 新窗口,overlap 一致性已测) -- 文档:`docs/factor_metrics.md` -- 单测:`test_factor`(含 rolling probe) - -### 可选 - -- LLM 因子挖掘:`scripts/factor_mining.py` -- Panel 复权修补:`scripts/repair_panel_adjfactor.py`(rebuild 后通常不需要) - ---- - -## 规划(待办) - -### Phase 3 — 模型 + 回测 - -- [ ] `seekalpha/model/`:dataset、walk-forward 训练(Linear / LightGBM) -- [ ] `seekalpha/portfolio/`:alpha → 目标持仓 -- [ ] `seekalpha/backtest/`:朴素逐日回测引擎(与 live 共用 portfolio 逻辑) -- [ ] `scripts/train_model.py`、`scripts/backtest.py` -- [ ] YAML 驱动策略配置(`configs/strategies/`) -- [ ] StrategyBundle 版本化(`artifacts/bundles/`) -- [ ] `test_model` / `test_portfolio` / `test_backtest` - -### Phase 4 — 实盘 - -- [ ] `seekalpha/live/`:inference、reconciler(目标 vs 持仓 → 订单) -- [ ] `seekalpha/exec/qmt/`:精简 QMT adapter -- [ ] `seekalpha/risk/`、`seekalpha/monitor/` -- [ ] `scripts/run_live.py` -- [ ] `test_live` / `test_exec` / `test_integration` 全链路 - -### Phase 5 — 优化(可选) - -- [ ] 三大表全量 `funda_fs_*` 科目(income/balancesheet/cashflow) -- [ ] `daily_basic` 扩展 PE/PB 等日频估值列 -- [ ] 回测性能:预计算 alpha 表、向量化 PnL -- [ ] 时点成分股 mask(回测/实盘过滤当日 zz1000) -- [ ] Panel 原子写入、断点续传 build -- [ ] adjfactor merge 彻底清零(单股补拉 + ffill/bfill) - ---- - -## 已知遗留(低优先级) - -- ~3% 行 `adjfactor=1.0`(新股初期正常 + 少量 merge 遗漏) -- `vwap` 单位:amount 千元 / volume 手(与 close 差 10 倍,可用 `$adj_vwap`) -- 无「当日成分股」过滤,信号层需自行处理 diff --git a/docs/factor_metrics.md b/docs/factor_metrics.md deleted file mode 100644 index 6138d5fa..00000000 --- a/docs/factor_metrics.md +++ /dev/null @@ -1,138 +0,0 @@ -# 因子评估指标说明 - -本文档说明 `scripts/eval_factor.py --report` 输出报告中各指标的含义。计算逻辑见 [`seekalpha/factor/metrics.py`](../seekalpha/factor/metrics.py)。 - -## 如何生成报告 - -```bash -uv run python scripts/eval_factor.py --expr-file examples/factors/ma_dev.dsl --report -uv run python scripts/eval_factor.py --expr-file your.dsl --report --start-time 2024-06-01 --end-time 2026-05-31 -``` - -- DSL 在**全量 panel** 上求值(保证长窗口因子 warmup 正确)。 -- `--start-time` / `--end-time` 仅用于**指标统计区间**切片。 -- 默认标签列:`label_1d_open_to_open`(可用 `--label-col` 修改)。 - ---- - -## 报告头信息 - -| 字段 | 含义 | -|------|------| -| **评估区间** | 参与 IC / MLS 等汇总的交易日范围 | -| **标签列** | 因子与之计算相关性的收益标签(见下文) | -| **有效 IC 天数** | 截面 IC 非 NaN 的交易日数量 | - -### 常用标签列 - -| 列名 | 定义(简化) | -|------|----------------| -| `label_1d_open_to_open` | 从 **T+1 开盘** 到 **T+2 开盘** 的收益率(CLI 默认) | -| `label_1d_close_to_close` | 从 **T+1 收盘** 到 **T+2 收盘**(持有 1 个交易日;**推荐价量因子**) | -| `label_10d_close_to_close` | 从 **T+1 收盘** 到 **T+11 收盘**(持有 10 个交易日;**推荐基本面因子**) | -| `label_20d_close_to_close` | 从 **T+1 收盘** 到 **T+21 收盘**(持有 20 个交易日) | -| `ret` | 当日相对前一日收盘的日收益率(多用于描述性统计,作 label 较少) | - -**选用建议**:评估 / 挖掘时用 `--label-col` 显式指定——基本面 → `label_10d_close_to_close`;价量 → `label_1d_close_to_close`。 - ---- - -## 截面 IC - -每个交易日,在股票截面上计算因子值与未来 label 的相关性,再对时间求均值。 - -| 指标 | 含义 | 解读提示 | -|------|------|----------| -| **IC** | 逐日截面 **Pearson** 相关的均值 | 绝对值越大,线性预测力越强;符号表示因子方向 | -| **ICIR** | `mean(IC) / std(IC)` | IC 的稳定性;\|ICIR\| 越大,信号越稳定 | -| **Rank IC** | 逐日截面 **Spearman** 秩相关的均值 | 对极端值更稳健,关注排序而非线性关系 | -| **Coverage** | 因子有限值占比 | 过低说明表达式大量 NaN,或窗口/数据字段缺失 | -| **CS lag-1 ρ** | 逐日截面 lag-1 自相关 `corr(f_t, f_{t-1})` 的均值 | 过高(如 >0.6)表示排名日度延续性强、换手可能偏低;过低则因子噪声大、换手高 | - ---- - -## MLS / FMB - -**Monotonicity + Long-Short,Fama–MacBeth 时序聚合**(非参数版)。 - -每日在截面上把股票按因子值等频分成 10 组(Q1=最低,Q10=最高),计算: - -- **ρ_t**:组号 `{1…10}` 与组内 label 均值 `{R_{1,t}…R_{10,t}}` 的 Spearman 相关,衡量**单调性**(因子越高,label 是否系统性越高)。 -- **LS_t**:`R_{Q10,t} - R_{Q1,t}`,**多空组合**当日 label 差(最高组减最低组)。 - -再对 `{ρ_t}`、`{LS_t}` 时序序列做 Newey–West 稳健检验。 - -| 指标 | 含义 | -|------|------| -| **mean ρ** | 逐日单调性 ρ_t 的均值 | -| **mean LS** | 逐日多空 LS_t 的均值 | -| **IR_LS** | `mean(LS) / std(LS)`,日频多空收益的信息比 | -| **IR_LS 年化** | `IR_LS × √252` | -| **MLS** | `mean(ρ) × IR_LS 年化`,综合单调性与多空 Sharpe 的得分 | -| **NW-t(ρ)** | mean(ρ) 的 Newey–West t 统计量(检验是否显著异于 0) | -| **NW-t(LS)** | mean(LS) 的 Newey–West t 统计量 | -| **样本天数** | 参与 ρ 序列估计的有效交易日数 | - ---- - -## 十分组 label 均值 - -全样本(评估区间内)按因子值等频分成 10 组,展示每组的 label 均值。 - -| 符号 | 含义 | -|------|------| -| **D1** | 因子值**最低**组 | -| **D10** | 因子值**最高**组 | -| **柱状图** | 相对 D1–D10 均值范围的示意,便于肉眼检查单调性 | -| **数值** | 该组 label 的样本均值 | - -理想情况下,从 D1 到 D10 label 均值应单调递增(做多因子)或递减(做空因子)。 - ---- - -## 入库相关指标(`ingest_factors.py`) - -批量入库时除上述 IC 类指标外,还会做**截面相似度查重**(与库内已有因子比较): - -| 规则 | 说明 | -|------|------| -| **max_cs_corr** | 候选因子与库内因子的最大截面 \|Pearson\| 均值;默认 ≥0.8 拒绝入库 | -| **train_start 之前 mask** | 入库值在 `train_start` 之前置 NaN,评估区间从 train_start 起算 | - -默认入库策略见 [`seekalpha/factor/types.py`](../seekalpha/factor/types.py) 中的 `IngestPolicy`。 - ---- - -## panel 更新后增量 realign(`realign_factorlib.py`) - -`update_panel.py` 追加新交易日后,若因子库 index **前缀不变**(仅尾部追加行),可用增量 realign: - -```bash -uv run python scripts/update_panel.py --universe zz1000 -uv run python scripts/realign_factorlib.py -``` - -| 步骤 | 说明 | -|------|------| -| **窗口** | 默认取近 **240** 个交易日 + 新增行 N 做 DSL batch 求值 | -| **校验** | update 前最后 **K** 个交易日(默认 **20**)与库内 memmap **float32 完全一致** | -| **失败** | 扩窗至 **480** 重试;仍失败则该因子 **全 panel 重算** | -| **前缀变化** | index 非 append-only 时自动 **全量 realign** | - -可选:`--overlap-verify-days 10` 调整校验天数。 - -| 试跑(不写盘) | `uv run python scripts/realign_factorlib.py --dry-run` | -| **滚动 probe**(库已对齐、模拟多窗口) | `uv run python tests/test_factor/rolling_probe_incremental_realign.py` | - ---- - -## 参考阈值(挖掘 / 入库,非硬约束) - -| 指标 | 常见参考 | -|------|----------| -| \|IC\| | > 0.005 有一定预测力 | -| \|ICIR\| | > 0.1 较稳定 | -| Coverage | > 0.9 可实盘化 | -| CS lag-1 ρ | > 0.6 排名延续性尚可(过高可能换手过低) | - -具体门槛以策略与 universe 为准,报告仅作研究参考。 diff --git a/docs/index.rst b/docs/index.rst new file mode 100755 index 00000000..0dea85c6 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,8 @@ + Welcome to RD-Agent's documentation! + ================================== + + .. toctree:: + :maxdepth: 2 + :caption: Contents: + + changelog \ No newline at end of file diff --git a/docs/make.bat b/docs/make.bat new file mode 100755 index 00000000..8f56bebd --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/operations_manual.md b/docs/operations_manual.md deleted file mode 100644 index d27705f9..00000000 --- a/docs/operations_manual.md +++ /dev/null @@ -1,633 +0,0 @@ -# AlphaAgent 操作手册 - -> 按**推荐执行顺序**整理:环境 → 拉数据 / 建 Panel(含 label)→ 因子调试 → 建因子库 → 入库 / 查重 → Panel 更新与 realign → LLM 挖因子。 -> 所有命令均在**仓库根目录**执行,前缀统一为 `uv run python scripts/...`。 - ---- - -## 0. 环境准备 - -### 0.1 安装依赖 - -```powershell -cd D:\AlphaAgent2026 -uv sync -``` - -可选能力: - -| extra | 用途 | 命令 | -|-------|------|------| -| `dev` | pytest(默认已装) | — | -| `mining` | LLM 因子挖掘 | `uv sync --extra mining` | - -### 0.2 配置 `.env` - -在项目根目录创建 `.env`: - -```env -# 拉 Panel 必填 -TUSHARE_TOKEN=your_tushare_token - -# 因子挖掘必填(仅 mining 脚本) -OPENAI_API_KEY=sk-... -OPENAI_API_BASE=https://... # 可选,兼容 OpenAI 协议的中转 -MODEL=gpt-5.5 # 或 deepseek-chat 等 -MAX_PARALLEL_EVAL=4 # 可选,train/val 并行评估上限(默认 1;也可用 --max-parallel-eval 覆盖) -``` - -### 0.3 关键路径(默认值) - -| 路径 | 说明 | -|------|------| -| `artifacts/panel/panel_1d.parquet` | 日频 Panel(量价 + label;可选 `funda_*` 基本面列) | -| `artifacts/fundamental/quarterly.parquet` | 季频财务指标缓存(全市场 VIP 拉取) | -| `artifacts/fundamental/disclosure_calendar.parquet` | 财报披露日历(PIT 生效日) | -| `artifacts/factorzoo/stock_1d/` | 因子库(本地 memmap;**Git 仅同步** `expressions/*.dsl` 等少量元数据) | -| `artifacts/factorzoo/stock_1d/expressions/` | 已入库因子 DSL(**进 Git**,协作用源码) | -| `examples/factors/*.dsl` | 示例 / 手写 DSL(未入库) | -| `logs/factor_mining/` | 挖掘会话日志 | - -### 0.4 默认评估口径 - -| 项 | 默认值 | -|----|--------| -| label(脚本默认) | `label_1d_open_to_open`(T+1 开盘 → T+2 开盘收益) | -| train | 2019-01-01 ~ 2021-12-31 | -| val | 2022-01-01 ~ 2024-12-31 | -| 截面查重阈值 | `\|cs_corr\| < 0.8` | - -**`--label-col` 选用建议**(评估 / 挖掘时显式指定): - -| 因子类型 | 推荐 label | -|----------|------------| -| 基本面(主要用 `funda_*`) | `label_10d_close_to_close` | -| 价量(OHLC / `ret` / `volume` / 筹码等) | `label_1d_close_to_close` | - -指标含义见 [factor_metrics.md](./factor_metrics.md)。 - ---- - -## 1. 拉数据 & 建 Panel(含 label) - -Panel 构建时会**自动**写入行情、复权 OHLC、`ret` 与两个前瞻 label,**无需单独建 label 脚本**。 - -| 列 | 含义 | -|----|------| -| `label_1d_open_to_open` | `(adj_open[t+2] - adj_open[t+1]) / adj_open[t+1]` | -| `label_{N}d_close_to_close` | `(adj_close[t+N+1] - adj_close[t+1]) / adj_close[t+1]`,如 `label_10d_close_to_close` = T+1 close → T+11 close | -| `ret` | 按 instrument 的 adj_close 日收益 | -| `adj_*` | 后复权 OHLC;`adj_vwap = vwap × adjfactor` | - -### 1.1 全量构建(ZZ1000 成分并集,推荐) - -两段式:**先拉行情落盘 hq 缓存(联网),再离线建 panel(不联网)**。 - -```powershell -# 1. 拉行情 → artifacts/market/daily_hq.parquet -uv run python scripts/fetch_market.py --start 2015-01-01 --end 2026-06-30 --universe zz1000 -# 2. 从 hq 缓存离线构建 panel -uv run python scripts/build_panel.py -``` - -输出默认:`artifacts/panel/panel_1d.parquet`。 - -常用参数: - -```powershell -# 降低 Tushare 限流风险(大批量建议 batch-size 20~40) -uv run python scripts/fetch_market.py --start 2024-01-01 --end 2024-12-31 --batch-size 20 --sleep 0.35 - -# 全市场按日拉取(慢) -uv run python scripts/fetch_market.py --start 2024-01-01 --end 2024-01-31 --universe none - -# 从 hq 缓存切片构建、指定输出 -uv run python scripts/build_panel.py --start 2024-01-01 --end 2024-12-31 --out artifacts/panel/panel_1d.parquet -``` - -### 1.2 增量更新 - -一条命令:增量拉取新交易日 → 追加 hq 缓存 → panel 尾部 merge + 回填 `ret` / label: - -```powershell -uv run python scripts/update_panel.py --universe zz1000 --with-fundamentals --with-industry -``` - -指定若干交易日: - -```powershell -uv run python scripts/update_panel.py --dates 2026-06-27 2026-06-30 --universe zz1000 -``` - -已有 panel 补算新 label 列(schema 升级后一次性执行): - -```powershell -uv run python -c "from seekalpha.core.paths import PANEL_PATH; from seekalpha.data.panel import load_panel, save_panel, backfill_panel_derived_columns; p = backfill_panel_derived_columns(load_panel(PANEL_PATH)); save_panel(p, PANEL_PATH); print('ok', [c for c in p.columns if c.startswith('label_')])" -``` - -评估时按因子类型指定 `--label-col`: - -```powershell -# 基本面 -uv run python scripts/eval_factor.py --expr-file your.dsl --report --label-col label_10d_close_to_close -# 价量 -uv run python scripts/eval_factor.py --expr-file your.dsl --report --label-col label_1d_close_to_close -``` - -### 1.4 季频基本面(PIT 展开) - -季频 `fina_indicator` 单独缓存,Panel enrich 时按**披露日 T+1 交易日**严格 PIT 展开为日频 `funda_*` 列(与 AlphaAgent 语义一致)。 - -**推荐流程(全市场缓存 + zz1000 panel):** - -```powershell -# 1. 拉季频(VIP 每期 1 次请求,全 A 股落盘;每期 merge 后立即写盘) -uv run python scripts/fetch_fundamentals.py --start 2015-01-01 --end 2026-12-31 - -# 积分不足时逐股慢拉(须指定 universe) -uv run python scripts/fetch_fundamentals.py --start 2015-01-01 --end 2026-12-31 --universe zz1000 --no-vip - -# 2a. 从 hq 缓存离线建 panel 时一并 enrich -uv run python scripts/build_panel.py --with-fundamentals - -# 2b. 已有 panel,仅补基本面列(不重建量价) -uv run python scripts/build_panel.py --enrich-only - -# 2c. 增量更新行情后顺带 refresh 基本面 -uv run python scripts/update_panel.py --universe zz1000 --with-fundamentals -``` - -| 参数 | 说明 | -|------|------| -| `--with-fundamentals` | 构建/更新后 PIT 并入 `funda_*` | -| `--enrich-only` | 只读已有 panel + 本地 fundamental 缓存 enrich | -| `--no-disclosure-distance` | 不写入 `funda_days_since_disclose` 等披露距离列 | - -**Panel 内基本面列(当前,`fina_indicator`):** -`funda_roe`、`funda_roa`、`funda_debt_to_assets`、`funda_eps`、`funda_bps`、`funda_netprofit_yoy`、`funda_or_yoy`、`funda_grossprofit_margin`、`funda_fs_working_capital`、`funda_fs_ebit` 等;披露特征 `funda_days_since_disclose`、`funda_days_since_quarter_start`。 -挖掘 agent 系统提示词已包含字段说明。 - -**label 选用**:基本面因子 `--label-col label_10d_close_to_close`;价量因子 `--label-col label_1d_close_to_close`。 - -**三大表(可选)**:`fetch_fundamentals.py --with-statements` 额外拉 income/balancesheet/cashflow,并入 `funda_fs_*` 列(利润表/现金流为年初至今累计,列名带 `_ytd`;资产负债表为时点值)。 - -**行业分类(可选)**:`build_panel.py --with-industry` 并入申万一级行业码 `industry_sw_l1`(严格 PIT,缓存于 `artifacts/industry/`)。DSL 里 `CS_NEUTRALIZE($factor, $industry_sw_l1)` 做行业中性。详见 `docs/panel_fundamental_fields.md` §3.4。 - -### 1.5 Panel 复权修补(可选) - -全量 rebuild 后通常不需要;发现 adjfactor 断层时可用 `market_fetch.repair_panel_adjfactor`(联网单股重拉 adj_factor 并重算): - -```powershell -uv run python -c "from seekalpha.core.paths import PANEL_PATH; from seekalpha.data.panel import load_panel, save_panel; from seekalpha.data.market_fetch import repair_panel_adjfactor; p, stats = repair_panel_adjfactor(load_panel(PANEL_PATH)); print(stats); save_panel(p, PANEL_PATH)" -``` - ---- - -## 2. 因子表达式调试(不入库) - -在入库或挖掘前,先用 `eval_factor.py` 验证 DSL 能否跑通、看 IC 报告。 - -### 2.1 只看求值结果(coverage + 样本) - -```powershell -uv run python scripts/eval_factor.py --expr-file examples/factors/ma20_dev.dsl -``` - -PowerShell 内联表达式请用**单引号**,避免 `$` 被 shell 吃掉: - -```powershell -uv run python scripts/eval_factor.py --expr 'SUBTRACT($adj_close, TS_MEAN($adj_close, 20))' -``` - -### 2.2 IC / ICIR / RANKIC / MLS 报告 - -```powershell -uv run python scripts/eval_factor.py --expr-file examples/factors/ma_dev.dsl --report -``` - -指定 label 与评估区间(DSL 仍在**全量 panel** 上求值,`--start-time` 只切 metrics): - -```powershell -uv run python scripts/eval_factor.py --expr-file examples/factors/ma_dev.dsl --report ` - --label-col label_1d_open_to_open ` - --start-time 2019-01-01 --end-time 2021-12-31 -``` - -JSON 输出(便于脚本解析): - -```powershell -uv run python scripts/eval_factor.py --expr-file examples/factors/ma_dev.dsl --report --json -``` - ---- - -## 3. 初始化因子库(factorzoo) - -**前提**:已有与目标研究一致的 Panel 文件。 - -```powershell -uv run python scripts/init_factorlib.py -``` - -默认绑定: - -- Panel:`artifacts/panel/panel_1d.parquet` -- 因子库:`artifacts/factorzoo/stock_1d/` - -自定义: - -```powershell -uv run python scripts/init_factorlib.py ` - --panel artifacts/panel/panel_1d.parquet ` - --output artifacts/factorzoo/stock_1d ` - --n-sample-rows 200000 ` - --max-factors 2048 -``` - -成功后会生成 `manifest.json`、`index/shards.json`、相似度矩阵占位等。**同一 Panel 只需 init 一次**;Panel 行数变更后应走 [第 5 节 realign](#5-panel-更新后-realign-因子库),而不是重复 init 覆盖。 - ---- - -## 4. 因子入库(手动) - -### 4.1 单文件入库 - -```powershell -uv run python scripts/ingest_factors.py --expr-file examples/factors/ma20_dev.dsl -``` - -指定 ID / 名称: - -```powershell -uv run python scripts/ingest_factors.py ` - --expr-file examples/factors/ma20_dev.dsl ` - --factor-id ma20_dev ` - --name "20日均线偏离" -``` - -### 4.2 按 registry 批量入库 - -编辑 `configs/factors/registry.example.json` 后: - -```powershell -uv run python scripts/ingest_factors.py --registry configs/factors/registry.example.json -``` - -只入库其中一个: - -```powershell -uv run python scripts/ingest_factors.py --registry configs/factors/registry.example.json --factor-id ma_dev -``` - -### 4.3 查重与 dry-run - -入库前**自动截面查重**:与库内已有因子逐日截面 Pearson 相关均值,`|corr| ≥ 0.8` 拒绝。 - -```powershell -# 只算指标 + 查重,不写库 -uv run python scripts/ingest_factors.py --expr-file examples/factors/ma20_dev.dsl --dry-run - -# 调整查重阈值 / top 邻居数 -uv run python scripts/ingest_factors.py --expr-file examples/factors/ma20_dev.dsl --max-cs-corr 0.75 --similar-top-k 5 -``` - -覆盖已存在 factor_id: - -```powershell -uv run python scripts/ingest_factors.py --expr-file examples/factors/ma20_dev.dsl --overwrite -``` - -### 4.4 查看因子库 - -```powershell -# 列表 -uv run python scripts/factorlib_info.py - -# 单个因子详情(含 expr) -uv run python scripts/factorlib_info.py --factor-id ma20_dev - -# JSON -uv run python scripts/factorlib_info.py --json -``` - -### 4.5 从 Git 的 `.dsl` 全量重建因子库(memmap + 查重矩阵) - -**背景**:Git **只同步** `artifacts/factorzoo/stock_1d/expressions/*.dsl`(及可选的 `mining_delivered_registry.json` 等),**不同步** memmap 数值、`similarity/` 相关矩阵、`meta/factors.parquet` 等(见根目录 `.gitignore`)。 - -因此协作者 `git clone` / `git pull` 之后,本地只有表达式源码,**还没有**可用于: - -- `submit_factor` / `ingest_factors.py` 的**截面查重**(与库内因子算 `\|cs_corr\|`) -- `factorlib_info.py` 查看已入库因子列表 -- LLM 挖掘时 `similarity.top_neighbors` 返回相似因子 - -查重依赖 **`values/*.memmap` 里的全量因子值** 和 **`similarity/pearson.f32.memmap`**,必须从 `.dsl` **重新物化入库** 才能恢复。 - -#### 两个方向 - -| 方向 | 命令 | 何时用 | -|------|------|--------| -| **导出**(zoo → `.dsl`,准备 commit) | `sync_factor_exprs.py` | 本地挖掘/入库后,把 catalog 同步到 `expressions/` 再 `git push` | -| **导入**(`.dsl` → zoo 全量值) | `ingest_factors.py --expr-dir ...` | clone / pull 后,从 Git 里的 `.dsl` 重建 memmap + 查重矩阵 | - -#### 首次 clone / pull 后:全量重建(推荐) - -**前提**:已有与团队一致的 Panel(`artifacts/panel/panel_1d.parquet`),且行数与 `init_factorlib` 时绑定的一致。 - -```powershell -# 1. 拉代码后确认 expressions 已在 -dir artifacts\factorzoo\stock_1d\expressions\*.dsl - -# 2. 若 factorzoo 尚未初始化(无 manifest.json) -uv run python scripts/init_factorlib.py - -# 3. 从 expressions 批量物化入库(重建 values + similarity + catalog) -uv run python scripts/ingest_factors.py ` - --expr-dir artifacts/factorzoo/stock_1d/expressions ` - --overwrite - -# 4. 确认因子数与 Git 中 .dsl 数量一致 -uv run python scripts/factorlib_info.py -``` - -说明: - -- `--expr-dir`:扫描目录下全部 `*.dsl`,`factor_id` = 文件名(不含扩展名)。 -- `--overwrite`:已存在同 id 因子时用新表达式重算并覆盖(pull 后建议加上,保证与 Git 一致)。 -- 首次空库可不写 `--overwrite`;pull 更新已有因子时**建议始终加**。 -- 入库过程会逐因子 DSL 求值 → 写 memmap → 更新截面相似度矩阵,**查重能力随之恢复**。 - -#### 仅增量同步(同事新提交了少量因子) - -```powershell -git pull -# 只入库新增 .dsl(不覆盖已有) -uv run python scripts/ingest_factors.py --expr-dir artifacts/factorzoo/stock_1d/expressions - -# 若某因子表达式被修改,单独覆盖: -uv run python scripts/ingest_factors.py ` - --expr-file artifacts/factorzoo/stock_1d/expressions/new_factor.dsl ` - --overwrite -``` - -#### 提交侧:入库 / 挖掘后推送到 Git - -```powershell -# 挖掘 submit 成功或手动 ingest 后,导出 DSL(与 catalog 对齐) -uv run python scripts/sync_factor_exprs.py - -git add artifacts/factorzoo/stock_1d/expressions/*.dsl -git commit -m "sync factor expressions" -git push -``` - -#### 与「挖因子 + 查重」的关系 - -开始 LLM 挖掘**之前**,请确认本地 factorzoo 已按上一节重建完毕: - -```powershell -uv run python scripts/factorlib_info.py -# n_factors 应 > 0,且与 expressions/*.dsl 数量一致 -``` - -否则 `submit_factor` 可能: - -- 库为空 → 查重跳过,误把与已有 Git 因子重复的表达式当作新因子; -- 库过期 → `\|cs_corr\|` 与团队不一致,协作混乱。 - -**推荐顺序**:`git pull` → `ingest --expr-dir --overwrite` → 再跑 `factor_mining_agentscope.py`。 - -### 4.6 更新手改过的 `.dsl` 因子 - -手动编辑并用 `eval_factor.py`([§2](#2-因子表达式调试不入库))验证过某个 `.dsl` 后,用 `ingest_factors.py --overwrite` 把新表达式写回 factorzoo(重算 memmap 值 + 指标 + 相似度)。 - -```powershell -# 1. 先 dry-run 看指标,确认能跑通 -uv run python scripts/ingest_factors.py ` - --expr-file artifacts/factorzoo/stock_1d/expressions/.dsl ` - --factor-id ` - --dry-run - -# 2. 确认无误后正式覆盖 -uv run python scripts/ingest_factors.py ` - --expr-file artifacts/factorzoo/stock_1d/expressions/.dsl ` - --factor-id ` - --overwrite -``` - -要点: - -- **必须加 `--overwrite`**。否则命中已存在因子会直接跳过(`skipped_reason=already_exists`),指标算完但不写库。 -- **`--factor-id` 要与 catalog 中已有 ID 完全一致**。不传时会用文件名 stem 经 slug(转小写/替换特殊字符)推导,万一对不上会当成**新因子插入**而非覆盖;稳妥起见显式传。 -- **覆盖不走查重闸门**。overwrite 分支调用 `zoo.overwrite_factor`,重新物化 + 重算指标/相似度并覆盖,**不会**因 `max_cs_corr` 过高被拦;只要 DSL 能求值即可更新。 -- **指标口径保持一致**。若在意前后可比,`--label-col` / `--train-start` / `--eval-end` 用与初次入库相同的值。 -- `ingest_factors.py` **只读** `.dsl`、不回写,手改文本会原样保留;catalog 里存的是物化值,覆盖时按当前 panel 重新求值。 -- 更新后如需同步到 Git,走 [§4.5 提交侧](#45-从-git-的-dsl-全量重建因子库memmap--查重矩阵):`sync_factor_exprs.py`(可选,DSL 已是最新)→ `git add/commit/push`。 - ---- - -## 5. Panel 更新后 realign 因子库 - -Panel 增量更新后,已有因子 memmap 需与新的 canonical index 对齐: - -```powershell -# 1. 更新 Panel(增量拉行情 + panel 增量重建) -uv run python scripts/update_panel.py --universe zz1000 - -# 2. 增量 realign(默认 T+N 窗口,warmup=240 交易日) -uv run python scripts/realign_factorlib.py -``` - -其他用法: - -```powershell -# 只校验 overlap,不写库 -uv run python scripts/realign_factorlib.py --dry-run - -# 强制全量重算 -uv run python scripts/realign_factorlib.py --full -``` - ---- - -## 6. LLM 因子挖掘 - -### 6.1 安装与配置 - -```powershell -uv sync --extra mining -``` - -`.env` 中配置 `OPENAI_API_KEY`、`MODEL`(及可选 `OPENAI_API_BASE`)。 - -> **挖因子前必读**:[§4.5 从 Git 的 `.dsl` 全量重建因子库](#45-从-git-的-dsl-全量重建因子库memmap--查重矩阵)。 -> `submit_factor` 的截面查重依赖本地 memmap;仅 clone 仓库而不 `ingest --expr-dir`,查重矩阵为空,无法正常协作。 - -### 6.2 打印系统提示词(检查算子清单 / 门槛) - -```powershell -uv run python -c "from seekalpha.factor.mining.prompts import build_system_prompt; print(build_system_prompt())" > logs/mining_system_prompt.md -``` - -不含算子清单的精简版: - -```powershell -uv run python -c "from seekalpha.factor.mining.prompts import build_system_prompt; print(build_system_prompt(include_operator_catalog=False))" -``` - -### 6.3 AgentScope 版(推荐,终端流式输出) - -```powershell -uv run python scripts/factor_mining_agentscope.py --panel artifacts/panel/panel_1d.parquet -``` - -仅评估、不入库(调试 prompt / 工具链): - -```powershell -uv run python scripts/factor_mining_agentscope.py --panel artifacts/panel/panel_1d.parquet --no-submit -``` - -只挖价量因子、不载入基本面列(省内存;prompt 也会隐藏 `$funda_*` 字段): - -```powershell -uv run python scripts/factor_mining_agentscope.py --panel artifacts/panel/panel_1d.parquet --no-fundamentals -``` - -> 接了三大表后 panel 有 70+ 个 `funda_fs_*` 列,全量驻内存较大;挖价量因子时加 `--no-fundamentals`,会话会丢弃所有 `funda_*` 列并从系统提示词中移除基本面字段说明。`factor_mining.py`(OpenAI 直连版)同样支持该开关。 - -### 6.4 OpenAI 直连版 - -```powershell -uv run python scripts/factor_mining.py --panel artifacts/panel/panel_1d.parquet -``` - -### 6.5 常用参数 - -```powershell -uv run python scripts/factor_mining_agentscope.py ` - --panel artifacts/panel/panel_1d.parquet ` - --seed-factor examples/factors/ma20_dev.dsl ` - --user-message "在种子因子基础上优化 IC 与月度稳健性" ` - --train-start 2019-01-01 --train-end 2021-12-31 ` - --val-start 2022-01-01 --val-end 2024-12-31 ` - --label-col label_10d_close_to_close ` - --max-cs-corr 0.8 ` - --log-dir logs/factor_mining ` - --quiet -``` - -> Panel 须已 `--with-fundamentals` 或 `--enrich-only` 写入 `funda_*` 列后,agent 方可引用 `$funda_roe` 等变量。 - -| 参数 | 说明 | -|------|------| -| `--no-submit` | 禁用 `submit_factor`,只跑 train/val eval | -| `--seed-factor PATH [PATH ...]` | 种子 `.dsl`,可多次指定 | -| `--user-file PATH` | 从文件读 user 消息 | -| `--factorlib PATH` | 因子库根目录(默认 `artifacts/factorzoo/stock_1d`) | -| `--ingest-overwrite` | submit 时覆盖已存在 factor_id | -| `--no-operator-catalog` | system prompt 不注入算子清单 | -| `--max-parallel-eval N` | 同时进行的 train/val 评估上限(不传则读环境变量 `MAX_PARALLEL_EVAL`,默认 1)。评估以 numpy/pandas 为主会释放 GIL,放开后可真正并行;建议与 `--max-tool-workers` 匹配 | - -### 6.6 挖掘会话里的工具链 - -| 工具 | 作用 | -|------|------| -| `eval_on_train_set` | train 区间评估(IC、ICIR、MLS-FMB、月度稳健性等) | -| `eval_on_val_set` | val 泛化抽检(须传 `expected_sign`) | -| `submit_factor` | 达标后交付入库(**默认开启**;`--no-submit` 时不可用) | - -**交付入库链路**(无 `--no-submit` 时): - -``` -submit_factor - → ingest_factor(物化 + 指标 + 截面查重) - → factorzoo memmap - → artifacts/factorzoo/stock_1d/mining_delivered_registry.json - → artifacts/factorzoo/stock_1d/expressions/{factor_id}.dsl -``` - -查重失败时 tool 返回 `similarity.top_neighbors`(含相似因子 `expr`),模型可改写后重试。 - -日志:`logs/factor_mining/run_YYYYMMDD_HHMMSS.jsonl` 及同名的 `.summary.json` / `.messages.json`。 - ---- - -## 7. 推荐端到端流程(从零开始) - -```powershell -# 0. 环境 -uv sync -uv sync --extra mining # 若要挖因子 - -# 1. 拉行情 → hq 缓存,再离线建 Panel(含 label) -uv run python scripts/fetch_market.py --start 2015-01-01 --end 2026-06-30 --universe zz1000 --batch-size 20 -uv run python scripts/build_panel.py - -# 2. 调试一个 DSL -uv run python scripts/eval_factor.py --expr-file examples/factors/ma20_dev.dsl --report - -# 3. 初始化因子库(仅首次) -uv run python scripts/init_factorlib.py - -# 4. 从 Git expressions 全量重建 memmap(挖因子 / 查重前必做,见 §4.5) -uv run python scripts/ingest_factors.py --expr-dir artifacts/factorzoo/stock_1d/expressions --overwrite -uv run python scripts/factorlib_info.py - -# 5. LLM 挖因子(正式交付去掉 --no-submit) -uv run python scripts/factor_mining_agentscope.py --panel artifacts/panel/panel_1d.parquet - -# 6. 挖掘/入库后推 Git:sync DSL → commit → push(见 §4.5) -uv run python scripts/sync_factor_exprs.py - -# 7. 日常:Panel 增量 → realign -uv run python scripts/update_panel.py --universe zz1000 -uv run python scripts/realign_factorlib.py -``` - ---- - -## 8. 测试 - -```powershell -uv run pytest tests/ -q -``` - -因子 / 挖掘相关: - -```powershell -uv run pytest tests/test_factor/ tests/test_dsl/ -q -``` - ---- - -## 9. 常见问题 - -| 现象 | 处理 | -|------|------| -| `未找到 TUSHARE_TOKEN` | 检查根目录 `.env` | -| `panel 行数 != 库 n_rows` | Panel 与 init 时不一致;用全量 panel 或先 `realign_factorlib.py` | -| `factorlib_not_initialized` | 先 `init_factorlib.py` | -| PowerShell 里 `$adj_close` 变 `@adj_close` | 用 `--expr-file`,或单引号包裹 `--expr` | -| 挖掘无 `submit_factor` | 去掉 `--no-submit` | -| clone 后查重不生效 / n_factors=0 | 先 §4.5:`ingest --expr-dir ... --overwrite` 重建 memmap | -| 入库 `cs_corr=0.xx >= 0.8` | 查重拒绝;看返回的 `top_neighbors[].expr` 改写 | -| 入库 `delivery_check_failed` | IC/ICIR/coverage/cs_pearson_autocorr 未达门槛 | - ---- - -## 10. 脚本索引 - -| 脚本 | 用途 | -|------|------| -| `fetch_market.py` | 拉 Tushare 日频行情 → `artifacts/market/daily_hq.parquet`(hq 缓存) | -| `build_panel.py` | 从 hq 缓存**离线**建 Panel(含 label、`--with-fundamentals`、`--enrich-only`) | -| `update_panel.py` | 增量:拉新交易日 → 追加 hq 缓存 → panel 尾部重建 | -| `fetch_fundamentals.py` | 拉 Tushare 季频 `fina_indicator` → `artifacts/fundamental/` | -| `pack_data_release.py` | 打包 market/fundamental/industry 缓存为可分发数据包 | -| `eval_factor.py` | DSL 调试求值 / IC 报告 | -| `init_factorlib.py` | 初始化 factorzoo | -| `ingest_factors.py` | 手动入库 / **`--expr-dir` 从 .dsl 批量重建 memmap** | -| `factorlib_info.py` | 查看因子库 catalog | -| `realign_factorlib.py` | Panel 变更后对齐已有因子 | -| `factor_mining.py` | LLM 挖掘(OpenAI 直连) | -| `factor_mining_agentscope.py` | LLM 挖掘(AgentScope 流式) | -| `sync_factor_exprs.py` | catalog → `expressions/*.dsl`(commit 前导出) | diff --git a/docs/panel_fundamental_fields.md b/docs/panel_fundamental_fields.md deleted file mode 100644 index ee7ed4ad..00000000 --- a/docs/panel_fundamental_fields.md +++ /dev/null @@ -1,316 +0,0 @@ -# Panel 基本面数据字段说明 - -> 面向学习与理解:讲清 AlphaAgent 的 panel 里都有哪些基本面(财务)字段、它们从哪来、怎么进面板、以及在 DSL 表达式里怎么用。 -> -> 相关代码:`seekalpha/data/panel.py`、`seekalpha/data/fundamental.py`、`seekalpha/data/fundamental_fetch.py`、`seekalpha/core/types.py`、`seekalpha/factor/mining/prompts.py`。 -> 操作命令见 `docs/operations_manual.md` §1.3–1.5。 - ---- - -## 1. Panel 是什么 - -- **格式**:单个 Parquet 文件,默认路径 `artifacts/panel/panel_1d.parquet`(常量 `PANEL_PATH`,见 `seekalpha/core/paths.py`)。 -- **索引**:两层 `MultiIndex = (datetime, instrument)` - - `datetime`:交易日(`DatetimeIndex`) - - `instrument`:Tushare `ts_code`,如 `000001.SZ` -- **频率**:日频(1d);DSL 里可用 `$field@1w` 引用周线(W-FRI,无前视 backward 广播)。 -- **数值类型**:`float32`(`is_trade` / `not_st` 标记列除外)。 -- **默认股票池**:`zz1000`(中证 1000 成分并集)。 -- **数据源**:**全部来自 Tushare Pro API**(无 akshare / wind / 本地文件)。Token 读取自 `.env` 的 `TUSHARE_TOKEN`。 - -Panel 的列分三类:**行情/衍生/label 列**(始终存在)、**基本面财务列 `funda_*`**(仅 `--with-fundamentals` 时并入)、**披露日历特征列 `funda_days_*`**。本文档重点是后两类。 - -在 DSL 表达式中引用任意列的语法都是 **`$` + 列名**,例如 `$funda_roe`、`$adj_close`。 - ---- - -## 2. 基本面字段总览 - -基本面字段分组如下,均来自 Tushare、经**严格 PIT** 展开/映射为日频(见 §4): - -- **§2.1 财务指标**(17 个,前缀 `funda_`)+ **§2.2 披露日历特征**(2 个)——来自 `fina_indicator`,默认拉取。 -- **§3 三大表科目**(约 70 个,前缀 `funda_fs_*`)——来自 `income`/`balancesheet`/`cashflow`,加 `--with-statements` 时并入。 -- **§3.4 行业分类**(`industry_sw_l1`)——申万一级行业离散码,加 `--with-industry` 时并入,用于**行业中性化**。 - -### 2.1 财务指标(前缀 `funda_`) - -来源:Tushare `fina_indicator`(VIP 用 `fina_indicator_vip` 拉全市场)。 -映射定义:`seekalpha/data/fundamental_fetch.py` 中的 `FINA_INDICATOR_COLUMN_MAP`。 - -| Panel 列名 | Tushare 原字段 | 含义 | 单位/量纲 | -|---|---|---|---| -| `funda_roe` | `roe` | 净资产收益率 | %(百分比) | -| `funda_roa` | `roa` | 总资产报酬率 | % | -| `funda_debt_to_assets` | `debt_to_assets` | 资产负债率 | % | -| `funda_netprofit_yoy` | `netprofit_yoy` | 归母净利润同比增长率 | % | -| `funda_or_yoy` | `or_yoy` | 营业收入同比增长率 | % | -| `funda_tr_yoy` | `tr_yoy` | 营业总收入同比增长率 | % | -| `funda_bps` | `bps` | 每股净资产 | 元/股 | -| `funda_eps` | `eps` | 基本每股收益 | 元/股 | -| `funda_grossprofit_margin` | `grossprofit_margin` | 销售毛利率 | % | -| `funda_netprofit_margin` | `netprofit_margin` | 销售净利率 | % | -| `funda_ocfps` | `ocfps` | 每股经营活动现金流净额 | 元/股 | -| `funda_profit_dedt` | `profit_dedt` | 扣非净利润(扣除非经常性损益) | 元 | -| `funda_current_ratio` | `current_ratio` | 流动比率 | 倍(无量纲) | -| `funda_quick_ratio` | `quick_ratio` | 速动比率 | 倍(无量纲) | -| `funda_fs_working_capital` | `working_capital` | 营运资本 | 元 | -| `funda_fs_ebit` | `ebit` | 息税前利润 | 元 | -| `funda_fs_rd_exp` | `rd_exp` | 研发费用(部分股票为 NaN) | 元 | - -> 命名说明:`working_capital` / `ebit` / `rd_exp` 虽用了 `funda_fs_` 前缀(财报科目命名规范),但它们**实际是从 `fina_indicator` 拉取的**,与 §3 三大表接口来源不同(§3 里不再重复这三项)。 - -### 2.2 披露日历特征(前缀 `funda_days_`) - -由 `seekalpha/data/fundamental.py` 计算(纯日历/PIT 推导,非 Tushare 原字段)。定义于 `DISCLOSURE_DISTANCE_COLUMNS`。 - -| Panel 列名 | 含义 | 单位 | -|---|---|---| -| `funda_days_since_disclose` | 距**上一期**财报披露**生效日**的交易日数(生效日 = 0);严格 PIT,披露前为 NaN | 交易日数 | -| `funda_days_since_quarter_start` | 距当前季报区间首日(1/1、4/1、7/1、10/1)的交易日数 | 交易日数 | - -> 可用 `--no-disclosure-distance` 关闭这两列的写入。 - ---- - -## 3. 三大表科目(`funda_fs_*`,可选并入) - -三大表(利润表 / 资产负债表 / 现金流量表)来自 Tushare `income` / `balancesheet` / `cashflow`(VIP 用 `*_vip` 按期拉全市场,需 5000 积分)。 -拉取时加 `--with-statements` 即并入同一份 `quarterly.parquet`,随 `fina_indicator` 一起走**同一套严格 PIT 展开**为日频(映射见 `seekalpha/data/fundamental_fetch.py` 的 `INCOME_COLUMN_MAP` / `BALANCESHEET_COLUMN_MAP` / `CASHFLOW_COLUMN_MAP`)。 - -**口径约定(按 Tushare 原始值存储,不做单季差分):** -- **资产负债表**:**时点值**,列名无后缀(如 `funda_fs_total_assets`)。 -- **利润表 / 现金流量表**:Tushare 返回**年初至今累计值(YTD)**,列名统一带 **`_ytd`** 后缀(Q1=当季,中报/三季报/年报为累计)。 -- 期末/期初现金余额为时点值,用 `funda_fs_cash_equiv_end` / `_beg`。 -- 仅取 `report_type='1'`(合并报表);同 `(ts_code, end_date)` 保留 `ann_date` 最新一条。 -- 单位均为**元**(EPS 为元/股,`funda_fs_total_share` 为股)。 - -### 3.1 利润表(`income` → `_ytd` 累计) - -| Panel 列名 | Tushare 字段 | 含义 | -|---|---|---| -| `funda_fs_total_revenue_ytd` | `total_revenue` | 营业总收入 | -| `funda_fs_oper_revenue_ytd` | `revenue` | 营业收入 | -| `funda_fs_total_cogs_ytd` | `total_cogs` | 营业总成本 | -| `funda_fs_oper_cost_ytd` | `oper_cost` | 营业成本 | -| `funda_fs_selling_expense_ytd` | `sell_exp` | 销售费用 | -| `funda_fs_admin_expense_ytd` | `admin_exp` | 管理费用 | -| `funda_fs_finance_expense_ytd` | `fin_exp` | 财务费用 | -| `funda_fs_interest_expense_ytd` | `int_exp` | 利息支出 | -| `funda_fs_tax_surcharge_ytd` | `biz_tax_surchg` | 营业税金及附加 | -| `funda_fs_operate_profit_ytd` | `operate_profit` | 营业利润 | -| `funda_fs_total_profit_ytd` | `total_profit` | 利润总额 | -| `funda_fs_income_tax_ytd` | `income_tax` | 所得税费用 | -| `funda_fs_net_profit_ytd` | `n_income` | 净利润(含少数股东损益) | -| `funda_fs_net_profit_parent_ytd` | `n_income_attr_p` | 归母净利润 | -| `funda_fs_minority_interest_ytd` | `minority_gain` | 少数股东损益 | -| `funda_fs_comprehensive_income_ytd` | `t_compr_income` | 综合收益总额 | -| `funda_fs_comprehensive_income_parent_ytd` | `compr_inc_attr_p` | 归母综合收益 | -| `funda_fs_eps_basic_ytd` | `basic_eps` | 基本每股收益 | -| `funda_fs_eps_diluted_ytd` | `diluted_eps` | 稀释每股收益 | - -### 3.2 资产负债表(`balancesheet` → 时点值) - -| Panel 列名 | Tushare 字段 | 含义 | -|---|---|---| -| `funda_fs_total_assets` | `total_assets` | 资产总计 | -| `funda_fs_current_assets` | `total_cur_assets` | 流动资产合计 | -| `funda_fs_noncurrent_assets` | `total_nca` | 非流动资产合计 | -| `funda_fs_total_liabilities` | `total_liab` | 负债合计 | -| `funda_fs_current_liabilities` | `total_cur_liab` | 流动负债合计 | -| `funda_fs_noncurrent_liabilities` | `total_ncl` | 非流动负债合计 | -| `funda_fs_total_equity` | `total_hldr_eqy_exc_min_int` | 股东权益(不含少数) | -| `funda_fs_total_equity_incl_mi` | `total_hldr_eqy_inc_min_int` | 股东权益(含少数) | -| `funda_fs_total_liab_equity` | `total_liab_hldr_eqy` | 负债及股东权益总计 | -| `funda_fs_minority_interest_equity` | `minority_int` | 少数股东权益 | -| `funda_fs_money_cap` | `money_cap` | 货币资金 | -| `funda_fs_notes_receivable` | `notes_receiv` | 应收票据 | -| `funda_fs_accounts_receivable` | `accounts_receiv` | 应收账款 | -| `funda_fs_inventories` | `inventories` | 存货 | -| `funda_fs_fixed_assets` | `fix_assets` | 固定资产 | -| `funda_fs_construction_in_progress` | `cip` | 在建工程 | -| `funda_fs_intangible_assets` | `intan_assets` | 无形资产 | -| `funda_fs_goodwill` | `goodwill` | 商誉 | -| `funda_fs_rd_capitalized` | `r_and_d` | 研发支出(资本化) | -| `funda_fs_short_term_borrow` | `st_borr` | 短期借款 | -| `funda_fs_long_term_borrow` | `lt_borr` | 长期借款 | -| `funda_fs_bond_payable` | `bond_payable` | 应付债券 | -| `funda_fs_notes_payable` | `notes_payable` | 应付票据 | -| `funda_fs_accounts_payable` | `acct_payable` | 应付账款 | -| `funda_fs_advance_receipts` | `adv_receipts` | 预收款项 | -| `funda_fs_taxes_payable` | `taxes_payable` | 应交税费 | -| `funda_fs_payroll_payable` | `payroll_payable` | 应付职工薪酬 | -| `funda_fs_other_payables` | `oth_payable` | 其他应付款 | -| `funda_fs_retained_earnings` | `undistr_porfit` | 未分配利润 | -| `funda_fs_surplus_reserve` | `surplus_rese` | 盈余公积 | -| `funda_fs_capital_reserve` | `cap_rese` | 资本公积 | -| `funda_fs_total_share` | `total_share` | 期末总股本(股) | -| `funda_fs_other_comprehensive_income` | `oth_comp_income` | 其他综合收益 | - -### 3.3 现金流量表(`cashflow`) - -`_ytd` 为年初至今累计;`funda_fs_cash_equiv_end/beg` 为时点余额。 - -| Panel 列名 | Tushare 字段 | 含义 | -|---|---|---| -| `funda_fs_cash_from_sales_ytd` | `c_fr_sale_sg` | 销售商品/劳务收到的现金 | -| `funda_fs_ocf_inflow_ytd` | `c_inf_fr_operate_a` | 经营活动现金流入小计 | -| `funda_fs_cash_paid_goods_ytd` | `c_paid_goods_s` | 购买商品/劳务支付的现金 | -| `funda_fs_cash_paid_employees_ytd` | `c_paid_to_for_empl` | 支付给职工的现金 | -| `funda_fs_cash_paid_taxes_ytd` | `c_paid_for_taxes` | 支付的各项税费 | -| `funda_fs_ocf_outflow_ytd` | `st_cash_out_act` | 经营活动现金流出小计 | -| `funda_fs_ocf_net_ytd` | `n_cashflow_act` | 经营活动现金流量净额 | -| `funda_fs_capex_ytd` | `c_pay_acq_const_fiolta` | 购建固定/无形/长期资产支付现金 | -| `funda_fs_cash_paid_invest_ytd` | `c_paid_invest` | 投资支付的现金 | -| `funda_fs_icf_net_ytd` | `n_cashflow_inv_act` | 投资活动现金流量净额 | -| `funda_fs_cash_from_borrow_ytd` | `c_recp_borrow` | 取得借款收到的现金 | -| `funda_fs_cash_repay_debt_ytd` | `c_prepay_amt_borr` | 偿还债务支付的现金 | -| `funda_fs_fcf_net_ytd` | `n_cash_flows_fnc_act` | 筹资活动现金流量净额 | -| `funda_fs_free_cashflow_ytd` | `free_cashflow` | 企业自由现金流量 | -| `funda_fs_cash_net_incr_ytd` | `n_incr_cash_cash_equ` | 现金及等价物净增加额 | -| `funda_fs_depreciation_ytd` | `depr_fa_coga_dpba` | 固定资产折旧/油气折耗/生物折旧 | -| `funda_fs_amortization_intangible_ytd` | `amort_intang_assets` | 无形资产摊销 | -| `funda_fs_ocf_indirect_ytd` | `im_net_cashflow_oper_act` | 经营活动现金流量净额(间接法) | -| `funda_fs_cash_equiv_beg` | `c_cash_equ_beg_period` | 期初现金及等价物余额(时点) | -| `funda_fs_cash_equiv_end` | `c_cash_equ_end_period` | 期末现金及等价物余额(时点) | - -> 注:`funda_fs_working_capital`(营运资本)、`funda_fs_ebit`(息税前利润)、`funda_fs_rd_exp`(研发费用)由 `fina_indicator` 提供(见 §2.1),不来自三大表接口,避免重复。 -> `seekalpha/data/fundamental.py` 里另有一套中文名 `FUNDAMENTAL_STATEMENT_COLUMN_MAP`(对齐 AlphaAgent 数据源)用于读取历史中文列名,与上述英文接入路径互不冲突。 - -**DSL 使用**:与其他 `funda_*` 列一致,用 `$funda_fs_total_assets` 引用。累计值可用同比/环比构造:`TS_PCTCHANGE($funda_fs_oper_revenue_ytd, 60)` ≈ 单季环比参考;跨年注意 YTD 在 Q1 归零的阶跃。 - -### 3.4 行业分类:`industry_sw_l1`(可选并入) - -基本面要放到行业里看——行业中性化能剔除"某因子只是反映了行业 beta"的部分。加 `--with-industry` 即并入一列申万一级行业码。 - -| Panel 列名 | 含义 | 类型 | 说明 | -|---|---|---|---| -| `industry_sw_l1` | 申万一级行业(SW2021)**离散整数码** 1..N | float32 | 严格 PIT;未归类为 NaN | - -- **数据源**:Tushare `index_classify(level='L1', src='SW2021')`(行业目录,31 个)+ `index_member`(个股成员,含 `in_date`/`out_date`)。映射与拉取见 `seekalpha/data/industry.py`。 -- **整数编码**:按行业 `index_code` 排序分配 1..N,跨次运行稳定;`sw_l1_code_map()` 可取"码→行业名"。 -- **严格 PIT**:用 `merge_asof(backward)` 按 `in_date` 把每个交易日映射到当日有效行业;晚于 `out_date` 的样本置 NaN。**无前视**。 -- **缓存**:成员表落 `artifacts/industry/sw_l1_membership.parquet`;首次自动拉取,`--refresh-industry` 强制重拉。 -- **只做分组、不做数值运算**:行业码是类别标签,做加减/排序无意义。 - -**DSL 用法(行业中性化)**:行业码本身就是离散组号,**直接**当 `CS_NEUTRALIZE` 的分组参数,**勿**再套 `CS_BUCKET`: - -```text -# 行业内去均值(行业中性) -CS_NEUTRALIZE($factor, $industry_sw_l1) - -# 行业 + 市值双重中性 -CS_NEUTRALIZE(CS_NEUTRALIZE($factor, $industry_sw_l1), CS_BUCKET(LOG($float_cap), 10)) -``` - -> 原理:DSL 变量从 `panel.columns` 自动派生,`$industry_sw_l1` 即绑定该列;`CS_NEUTRALIZE(x, group)` 按 `group` 的离散值分组做 `x − 组内均值`。因此整数行业码可直接用作分组,无需改动 DSL。 - ---- - -## 4. 从 Tushare 到日频 Panel 的链路 - -### 4.1 拉季频缓存(`scripts/fetch_fundamentals.py`) - -1. `fetch_fina_indicator_period`:VIP 每期一次拉全 A 股 `fina_indicator`。 -2. `raw_fina_to_quarterly`:整理为索引 `(report_end, instrument)` 的季频宽表 → 写 `artifacts/fundamental/quarterly.parquet`。 -3. `raw_fina_to_disclosure_events`:从 `ann_date`(公告日)提取披露日 → 写 `artifacts/fundamental/disclosure_calendar.parquet`(行 = `report_end`,列 = `instrument`,值 = 披露日期)。 -4. (可选,`--with-statements`)`fetch_statement_period` + `raw_statement_to_quarterly`:每期拉 `income`/`balancesheet`/`cashflow`,过滤 `report_type='1'`、同键取最新 `ann_date`,按 `(report_end, instrument)` **列向合并**进同一份 `quarterly.parquet`。 - -### 4.2 PIT 展开并入 panel(`enrich_panel_fundamentals` → `expand_quarterly_fundamentals_pit`) - -**严格 PIT(Point-In-Time,避免前视偏差)语义**: - -- 财报公告日 D **当天不可用**;从 **D 的下一个交易日**起,该期字段才生效。 -- 两期财报之间用 **ffill** 保持"最近一期已披露值"(即日频上表现为**阶跃 + 持有**)。 -- 首次披露之前为 **NaN**,这是正常现象,不是数据缺失错误。 - -因此在日频上,`funda_*` 列在整个季度内是一条水平线,只在下一份财报生效日跳变一次。 - ---- - -## 5. 在 DSL 里使用基本面字段 - -基本面因子是"慢因子",用法与价量因子不同,几条要点: - -- **窗口单位是交易日**:`TS_PCTCHANGE($funda_roe, 20)` 的 20 指 20 个交易日;约 **60 交易日 ≈ 一个季度**。 -- **市值中性**:截面组合建议加 `CS_NEUTRALIZE(..., CS_BUCKET(LOG($float_cap), 10))`。 -- **先去极值再排序**:比率类字段可先 `CS_WINSORIZE` 再 `RANK`。 -- **label 选用**:基本面/慢因子用 `--label-col label_10d_close_to_close`(或 `label_20d_...`);价量/短周期因子用 `label_1d_close_to_close`。 -- **事件窗**:可借助披露距离列,如 `TS_PCTCHANGE($xxx, $funda_days_since_disclose)`。 - -### 已入库示例 - -`artifacts/factorzoo/stock_1d/expressions/roe_lowvol_center_smooth.dsl`(引用 `$funda_roe`): - -```1:7:artifacts/factorzoo/stock_1d/expressions/roe_lowvol_center_smooth.dsl -roe_r = RANK(CS_WINSORIZE($funda_roe, 0.01, 0.99)) -vol = TS_STD($ret, 20) -vol_r = RANK(DIVIDE(1, ADD(vol, 0.001))) -roe_center = MULTIPLY(roe_r, SUBTRACT(1, roe_r)) -vol_center = MULTIPLY(vol_r, SUBTRACT(1, vol_r)) -score = ADD(roe_center, vol_center) -CS_NEUTRALIZE(CS_WINSORIZE(score, 0.01, 0.99), CS_BUCKET(LOG($float_cap), 10)) -``` - -`artifacts/factorzoo/stock_1d/expressions/netprofit_yoy_lowvol_turnover_gaussian.dsl`(引用 `$funda_netprofit_yoy`): - -```1:4:artifacts/factorzoo/stock_1d/expressions/netprofit_yoy_lowvol_turnover_gaussian.dsl -np_z = CS_ZSCORE(CS_WINSORIZE($funda_netprofit_yoy, 0.01, 0.99)) -vol_z = CS_ZSCORE(TS_STD($ret, 20)) -amt = TS_MEAN($amount, 20) -turnover_z = CS_ZSCORE(DIVIDE(amt, $float_cap)) -``` - ---- - -## 6. 附:行情 / 衍生 / label 列(非基本面,便于对照) - -始终存在,`build_panel` 自动生成(`OUTPUT_COLUMNS`,见 `seekalpha/core/types.py`)。 - -| 字段 | 含义 | 说明 | -|---|---|---| -| `open` `high` `low` `close` | 原始 OHLC | Tushare `pro.daily` | -| `adj_open` `adj_high` `adj_low` `adj_close` | 后复权 OHLC | 因子**优先**用复权价 | -| `adjfactor` | 复权因子 | Tushare `pro.adj_factor` | -| `volume` `amount` | 成交量 / 成交额 | `amount` 为千元口径 | -| `float_cap` `tot_cap` | 流通市值 / 总市值 | 元(Tushare `circ_mv`/`total_mv` × 10000) | -| `is_trade` `not_st` | 可交易 / 非 ST 标记 | 0/1 | -| `ret` | 日收益 | adj_close 按 instrument pct_change | -| `vwap` `adj_vwap` | 量加权均价 / 其复权版 | `amount/volume`、`vwap×adjfactor` | -| `label_1d_open_to_open` | T+1 开盘 → T+2 开盘 | 脚本默认 label | -| `label_1d_close_to_close` | T+1 → T+2 收盘(1 日持有) | 适合价量因子 | -| `label_10d_close_to_close` | T+1 → T+11 收盘(10 日持有) | 适合基本面因子 | -| `label_20d_close_to_close` | T+1 → T+21 收盘(20 日持有) | 适合基本面因子 | - ---- - -## 7. 相关命令速查 - -```powershell -# 拉季频基本面缓存(VIP 全 A 股,仅 fina_indicator) -uv run python scripts/fetch_fundamentals.py --start 2015-01-01 --end 2026-12-31 - -# 同时拉三大表(income/balancesheet/cashflow,需 VIP 5000 积分) -uv run python scripts/fetch_fundamentals.py --start 2015-01-01 --end 2026-12-31 --with-statements - -# 从 hq 缓存离线建 panel 并一并 enrich 基本面(+ 行业列) -uv run python scripts/build_panel.py --with-fundamentals --with-industry - -# 已有 panel,仅补基本面列(可叠加 --with-industry 补行业列) -uv run python scripts/build_panel.py --enrich-only --with-industry - -# 增量更新行情后刷新基本面 + 行业 -uv run python scripts/update_panel.py --universe zz1000 --with-fundamentals --with-industry - -# 调试基本面因子(用 10 日 label) -uv run python scripts/eval_factor.py --expr-file your.dsl --report --label-col label_10d_close_to_close -``` - ---- - -## 8. 权威信息源 - -字段说明目前分散在三处(**无独立数据字典文件**,本文档即为补充): - -1. `seekalpha/factor/mining/prompts.py`(注入挖掘 LLM 的字段说明表,最贴近实际使用) -2. `docs/operations_manual.md` §1.4(拉数/enrich 命令与 PIT 语义) -3. 代码内映射字典(机器可读的字段定义源头):`FINA_INDICATOR_COLUMN_MAP`、`INCOME_COLUMN_MAP`、`BALANCESHEET_COLUMN_MAP`、`CASHFLOW_COLUMN_MAP`(均在 `seekalpha/data/fundamental_fetch.py`);行业分类见 `seekalpha/data/industry.py` diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100755 index 00000000..df024d65 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,4 @@ +sphinx +sphinx_rtd_theme +furo +importlib.metadata \ No newline at end of file diff --git a/examples/factors/ma20_dev.dsl b/examples/factors/ma20_dev.dsl deleted file mode 100644 index d7d30395..00000000 --- a/examples/factors/ma20_dev.dsl +++ /dev/null @@ -1 +0,0 @@ -SUBTRACT($adj_close, TS_MEAN($adj_close, 20)) diff --git a/examples/factors/ma_dev.dsl b/examples/factors/ma_dev.dsl deleted file mode 100644 index 7c714f27..00000000 --- a/examples/factors/ma_dev.dsl +++ /dev/null @@ -1 +0,0 @@ -SUBTRACT($adj_close, TS_MEAN($adj_close, 3)) diff --git a/examples/factors/mom_vol.dsl b/examples/factors/mom_vol.dsl deleted file mode 100644 index ad51a976..00000000 --- a/examples/factors/mom_vol.dsl +++ /dev/null @@ -1,3 +0,0 @@ -mom = TS_DELTA($adj_close, 5) -vol = TS_STD($adj_close, 20) -DIVIDE(mom, ADD(vol, 1e-8)) diff --git a/examples/ma20.dsl b/examples/ma20.dsl deleted file mode 100644 index 03c62210..00000000 --- a/examples/ma20.dsl +++ /dev/null @@ -1 +0,0 @@ -TS_MEAN($adj_close, 20) diff --git a/prepare_cn_data.py b/prepare_cn_data.py new file mode 100755 index 00000000..ab00cf39 --- /dev/null +++ b/prepare_cn_data.py @@ -0,0 +1,179 @@ +import baostock as bs +import pandas as pd +from datetime import datetime, timedelta +import time +from tqdm import tqdm +from pathlib import Path +from dateutil.relativedelta import relativedelta +import numpy as np + +def get_all_stocks_in_period(start_date, end_date): + """获取指定时间段内所有出现过的股票代码""" + all_stocks = set() + start = datetime.strptime(start_date, '%Y-%m-%d') + end = datetime.strptime(end_date, '%Y-%m-%d') + current = start + while current <= end: + query_date = current.strftime('%Y-%m-%d') + stock_rs = bs.query_all_stock(query_date) + stock_df = stock_rs.get_data() + if not stock_df.empty: + all_stocks.update(stock_df['code'].tolist()) + current += relativedelta(years=1) + if current > end: + break + print(f"共获取到 {len(all_stocks)} 只股票") + return all_stocks + +def download_stock_data(start_date, end_date, output_dir): + """下载或更新股票数据到最新日期""" + output_path = Path(output_dir).expanduser() + output_path.mkdir(parents=True, exist_ok=True) + + lg = bs.login() + if lg.error_code != '0': + print(f'登录失败: {lg.error_msg}') + return + + try: + all_stocks = get_all_stocks_in_period(start_date, end_date) + fields = "date,code,open,high,low,close,preclose,volume,amount,turn,tradestatus,pctChg,peTTM,pbMRQ,psTTM,pcfNcfTTM,isST" + + from concurrent.futures import ThreadPoolExecutor, as_completed + + def download_single_stock(code): + code_clean = code.replace('.', '') + output_file = output_path / f"{code_clean}.csv" + + # 确定该股票的下载起始日期 + if output_file.exists(): + existing_df = pd.read_csv(output_file) + if not existing_df.empty: + existing_df['date'] = pd.to_datetime(existing_df['date']) + last_date = existing_df['date'].max() + code_download_start_date = (last_date + timedelta(days=1)).strftime('%Y-%m-%d') + # 如果无需更新则跳过 + # print(f"股票 {code} 已下载开始日期:{code_download_start_date},结束日期:{last_date.strftime('%Y-%m-%d')}") + if code_download_start_date == end_date: + print(f"股票 {code} 无需更新") + return + else: + code_download_start_date = start_date + else: + code_download_start_date = start_date + + # 下载增量数据 + print(f"下载 {code} 数据...日期范围:{code_download_start_date} 至 {end_date}") + rs = bs.query_history_k_data_plus( + code, + fields, + start_date=code_download_start_date, + end_date=end_date, + frequency="d", + adjustflag="1" # 后复权 + ) + + if rs.error_code != '0': + print(f"获取 {code} 数据失败: {rs.error_msg}") + return + + data_list = [] + while rs.next(): + data_list.append(rs.get_row_data()) + + + + # 获取复权因子 + rs_list = [] + rs_adj = bs.query_adjust_factor( + code, + start_date=code_download_start_date, + end_date=end_date, + ) + + while (rs_adj.error_code == '0') & rs_adj.next(): + rs_list.append(rs_adj.get_row_data()) + + adj_df = pd.DataFrame(rs_list, columns=rs_adj.fields).set_index('dividOperateDate')['adjustFactor'] + adj_df = adj_df.rename('factor') + + + if data_list: + new_df = pd.DataFrame(data_list, columns=rs.fields).set_index('date') + new_df = pd.concat([new_df, adj_df], axis=1).ffill().fillna(1) + + new_df['code'] = new_df['code'].str.replace('.', '', regex=False) + # new_df['factor'] = np.ones(len(new_df)) + numeric_cols = new_df.columns[2:] + new_df[numeric_cols] = new_df[numeric_cols].apply(pd.to_numeric, errors='coerce') + + new_df = new_df.reset_index() + new_df = new_df.rename(columns={'index': 'date'}) + + # 合并并保存数据 + if output_file.exists(): + combined_df = pd.concat([existing_df, new_df], ignore_index=True) + combined_df = combined_df.drop_duplicates(subset=['date', 'code']) + combined_df['date'] = pd.to_datetime(combined_df['date']) + combined_df = combined_df.sort_values('date') + else: + combined_df = new_df + + + combined_df.to_csv(output_file, index=False, encoding='utf-8') + + # time.sleep(0.5) + + # # 使用线程池进行并发下载 + with ThreadPoolExecutor(max_workers=1) as executor: + futures = [executor.submit(download_single_stock, code) for code in all_stocks] + + # 使用tqdm显示进度 + for _ in tqdm(as_completed(futures), total=len(futures), desc="下载进度"): + pass + + # for code in all_stocks: + # download_single_stock(code) + + finally: + bs.logout() + + + +def download_oneday_stock_data_(date): + + #### 登陆系统 #### + lg = bs.login() + # 显示登陆返回信息 + print('login respond error_code:'+lg.error_code) + print('login respond error_msg:'+lg.error_msg) + + #### 获取某日所有证券信息 #### + rs = bs.query_all_stock(day=date) + print('query_all_stock respond error_code:'+rs.error_code) + print('query_all_stock respond error_msg:'+rs.error_msg) + + #### 打印结果集 #### + data_list = [] + while (rs.error_code == '0') & rs.next(): + # 获取一条记录,将记录合并在一起 + data_list.append(rs.get_row_data()) + result = pd.DataFrame(data_list, columns=rs.fields) + + #### 结果集输出到csv文件 #### + # result.to_csv("D:\\all_stock.csv", encoding="gbk", index=False) + print(result) + + #### 登出系统 #### + bs.logout() + +if __name__ == '__main__': + # 动态设置结束日期为当前日期 + START_DATE = '2014-12-31' + END_DATE = (datetime.now()).strftime('%Y-%m-%d') # '2025-01-01' - timedelta(days=7) + DATA_DIR = '~/.qlib/qlib_data/cn_data/raw_data_back_adjust' + + print("开始下载股票数据...日期范围:", START_DATE, "至", END_DATE) + download_stock_data(START_DATE, END_DATE, DATA_DIR) + # download_oneday_stock_data_((datetime.now()-timedelta(days=1)).strftime('%Y-%m-%d')) + print("下载完成!") \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml old mode 100644 new mode 100755 index 85fef98f..3385c912 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,39 +1,55 @@ +[build-system] +build-backend = "setuptools.build_meta" +requires = [ + "setuptools", + "setuptools-scm", +] + [project] -name = "seekalpha" -version = "0.1.0" -description = "??????? + ????" -requires-python = ">=3.11" -dependencies = [ - "pandas>=2.0,<3", - "pyarrow>=14", - "numpy>=1.24,<2", - "pyparsing>=3.2", - "numba>=0.59", - "joblib>=1.4", - "tushare>=1.4", - "python-dotenv>=1.0", - "pyyaml>=6.0", +authors = [ + {email = "tangzy27@mail2.sysu.edu.cn", name = "Ziyi Tang"}, +] +classifiers = [ + "Development Status :: 3 - Alpha", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", ] +description = "Alpha Factor Mining Agent" +dynamic = [ + "dependencies", + "optional-dependencies", + "version", +] +keywords = [ + "Autonomous Agents", + "Large Language Models", + "Research and Development", +] +name = "alphaagent" +readme = "README.md" +requires-python = ">=3.10" -[project.optional-dependencies] -dev = ["pytest>=8"] -model = ["lightgbm>=4.0", "scikit-learn>=1.4"] -mining = ["openai>=1.0", "agentscope>=2.0.0"] -qmt = [] +[project.scripts] +alphaagent = "alphaagent.app.cli:app" -[dependency-groups] -dev = ["pytest>=8"] +[project.urls] +homepage = "https://github.com/microsoft/RD-Agent/" +issue = "https://github.com/microsoft/RD-Agent/issues" -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" +[tool.setuptools] +packages = ["alphaagent"] -[tool.hatch.build.targets.wheel] -packages = ["seekalpha"] +[tool.setuptools.dynamic] +dependencies = {file = ["requirements.txt"]} -[tool.pytest.ini_options] -testpaths = ["tests"] +[tool.setuptools.dynamic.optional-dependencies] +docs = {file = ["requirements/docs.txt"]} +lint = {file = ["requirements/lint.txt"]} +package = {file = ["requirements/package.txt"]} +test = {file = ["requirements/test.txt"]} -[tool.uv] -index-url = "https://pypi.tuna.tsinghua.edu.cn/simple" -default-groups = ["dev"] +[tool.setuptools_scm] +local_scheme = "no-local-version" +version_scheme = "guess-next-dev" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..97710e95 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,52 @@ +# Requirements for runtime. +pydantic-settings + +python-Levenshtein +filelock +loguru +fire +fuzzywuzzy +openai + +scikit-learn==1.7.2 +xgboost==3.2.0 +catboost==1.2.10 + +numpy==2.2.6 # we use numpy as default data format. So we have to install numpy +pandas==2.3.3 # we use pandas as default data format. So we have to install pandas +pandarallel # parallelize pandas +matplotlib +langchain +langchain-community +tiktoken +pymupdf # Extract shotsreens from pdf + +# PDF related +pypdf +azure-ai-formrecognizer + +# factor implementations +tables + +# CI Fix Tool +tree-sitter-python +tree-sitter + +python-dotenv + +# infrastructure related. +docker + +# demo related +streamlit +plotly +st-theme + +# kaggle crawler +selenium +kaggle +nbformat + +# tool +seaborn +setuptools-scm diff --git a/requirements/docs.txt b/requirements/docs.txt new file mode 100755 index 00000000..8092dd68 --- /dev/null +++ b/requirements/docs.txt @@ -0,0 +1,13 @@ +# Requirements for docs. +autodoc-pydantic +coverage +furo +git-changelog +mypy[reports] +myst-parser +pytest +Sphinx +sphinx-autobuild +sphinx-click +sphinx-togglebutton +sphinx_rtd_theme diff --git a/requirements/lint.txt b/requirements/lint.txt new file mode 100755 index 00000000..03ed7089 --- /dev/null +++ b/requirements/lint.txt @@ -0,0 +1,9 @@ +# Requirements for lint. +black +isort +mypy +ruff +toml-sort +types-PyYAML +types-psutil +types-tqdm diff --git a/requirements/package.txt b/requirements/package.txt new file mode 100755 index 00000000..2ec4324b --- /dev/null +++ b/requirements/package.txt @@ -0,0 +1,5 @@ +# Requirements for package. +build +setuptools-scm +twine +wheel diff --git a/requirements/test.txt b/requirements/test.txt new file mode 100755 index 00000000..4b4fbc0b --- /dev/null +++ b/requirements/test.txt @@ -0,0 +1,3 @@ +# Requirements for test. +coverage +pytest diff --git a/scripts/build_panel.py b/scripts/build_panel.py deleted file mode 100644 index ee8cad06..00000000 --- a/scripts/build_panel.py +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/env python3 -"""从本地 hq 缓存**离线**构建 Panel(不联网)。 - -前置:先用 scripts/fetch_market.py 拉取行情写入 hq 缓存, - 用 scripts/fetch_fundamentals.py 拉取基本面缓存。 - -示例: - # 从 hq 缓存全量构建(含基本面 + 行业): - uv run python scripts/build_panel.py --with-fundamentals --with-industry - # 仅量价 panel(不 enrich): - uv run python scripts/build_panel.py - # 对已有 panel 仅补 enrich(不重建量价): - uv run python scripts/build_panel.py --enrich-only --with-industry - # 增量更新(新交易日)请用: scripts/update_panel.py -""" - -from __future__ import annotations - -import argparse -import sys -from pathlib import Path - -ROOT = Path(__file__).resolve().parents[1] -sys.path.insert(0, str(ROOT)) - -from seekalpha.core.paths import ( # noqa: E402 - DISCLOSURE_CALENDAR_PATH, - FUNDAMENTAL_QUARTERLY_PATH, - INDUSTRY_SW_PATH, - MARKET_HQ_PATH, - PANEL_PATH, -) -from seekalpha.data.fundamental import enrich_panel_fundamentals, list_funda_columns # noqa: E402 -from seekalpha.data.industry import enrich_panel_industry # noqa: E402 -from seekalpha.data.panel import build_panel, load_panel, save_panel # noqa: E402 - - -def main() -> None: - parser = argparse.ArgumentParser(description="离线构建 Panel(从 hq 缓存)") - parser.add_argument("--start", type=str, help="切片起始日期 YYYY-MM-DD(默认全量)") - parser.add_argument("--end", type=str, help="切片结束日期 YYYY-MM-DD(默认全量)") - parser.add_argument("--out", type=Path, default=PANEL_PATH, help="输出 panel parquet 路径") - parser.add_argument( - "--market-cache", - type=Path, - default=MARKET_HQ_PATH, - help="hq 行情缓存路径(由 fetch_market.py 生成)", - ) - parser.add_argument( - "--no-universe-mask", - action="store_true", - help="不做可交易/非ST过滤(默认过滤)", - ) - parser.add_argument( - "--with-fundamentals", - action="store_true", - help="构建后 PIT 并入季频基本面(需先 fetch_fundamentals)", - ) - parser.add_argument( - "--enrich-only", - action="store_true", - help="仅对已有 panel 做 enrich(不重建量价)", - ) - parser.add_argument( - "--quarterly", - type=Path, - default=FUNDAMENTAL_QUARTERLY_PATH, - help="季频基本面缓存路径", - ) - parser.add_argument( - "--disclosure", - type=Path, - default=DISCLOSURE_CALENDAR_PATH, - help="披露日历缓存路径", - ) - parser.add_argument( - "--no-disclosure-distance", - action="store_true", - help="不计算 funda_days_since_disclose 等披露距离特征", - ) - parser.add_argument( - "--with-industry", - action="store_true", - help="并入申万一级行业码 industry_sw_l1(缓存缺失时会联网拉取)", - ) - parser.add_argument( - "--refresh-industry", - action="store_true", - help="强制重新从 Tushare 拉取行业成员(忽略本地缓存)", - ) - parser.add_argument( - "--industry-path", - type=Path, - default=INDUSTRY_SW_PATH, - help="申万一级行业成员缓存路径", - ) - args = parser.parse_args() - - if args.enrich_only: - panel = load_panel(args.out) - panel = enrich_panel_fundamentals( - panel, - quarterly_path=args.quarterly, - disclosure_path=args.disclosure, - include_disclosure_features=not args.no_disclosure_distance, - ) - if args.with_industry: - panel = enrich_panel_industry( - panel, - membership_path=args.industry_path, - refresh=args.refresh_industry, - ) - save_panel(panel, args.out) - funda_cols = list_funda_columns(panel.columns) - print(f"已 enrich: {args.out} shape={panel.shape}") - if funda_cols: - print(f"基本面列 ({len(funda_cols)}): {funda_cols[:8]}{'...' if len(funda_cols) > 8 else ''}") - if args.with_industry: - print("已并入行业列: industry_sw_l1") - return - - build_panel( - start=args.start, - end=args.end, - out_path=args.out, - market_path=args.market_cache, - universe_mask=not args.no_universe_mask, - with_fundamentals=args.with_fundamentals, - quarterly_path=args.quarterly, - disclosure_path=args.disclosure, - include_disclosure_features=not args.no_disclosure_distance, - with_industry=args.with_industry, - industry_path=args.industry_path, - refresh_industry=args.refresh_industry, - ) - - -if __name__ == "__main__": - main() diff --git a/scripts/eval_factor.py b/scripts/eval_factor.py deleted file mode 100644 index 2732368d..00000000 --- a/scripts/eval_factor.py +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env python3 -""" -DSL 因子调试求值 -- python scripts/eval_factor.py --expr "TS_MEAN($adj_close, 20)" -- python scripts/eval_factor.py --expr-file examples/ma20.dsl -注意: PowerShell 请用单引号包裹 --expr,或用 --expr-file -""" - -from __future__ import annotations - -import argparse -import re -import sys -import time -from pathlib import Path - -import pandas as pd - -ROOT = Path(__file__).resolve().parents[1] -sys.path.insert(0, str(ROOT)) - -from seekalpha.core.paths import PANEL_PATH # noqa: E402 -from seekalpha.data.panel import load_panel, slice_panel # noqa: E402 -from seekalpha.dsl import eval_factor # noqa: E402 -from seekalpha.dsl.eval import collect_aux_intervals_from_expr # noqa: E402 -from seekalpha.factor import evaluate_factor # noqa: E402 -from seekalpha.factor.report import format_factor_report_json, print_factor_report # noqa: E402 -from seekalpha.factor.types import DEFAULT_LABEL_COL # noqa: E402 - -_SHELL_STRIPPED_DOLLAR_RE = re.compile(r"(? str: - if expr_file is not None: - path = expr_file if expr_file.is_absolute() else ROOT / expr_file - if not path.is_file(): - raise FileNotFoundError(f"表达式文件不存在: {path}") - return path.read_text(encoding="utf-8").strip() - if expr is None: - raise ValueError("必须提供 --expr 或 --expr-file") - if expr == "-": - return sys.stdin.read().strip() - return expr.strip() - - -def _warn_if_shell_stripped(expr: str) -> None: - if _SHELL_STRIPPED_DOLLAR_RE.search(expr): - print( - "警告: 表达式里出现孤立的 @周期,可能是 shell 吃掉了 $列名。\n" - " 推荐: python scripts/eval_factor.py --expr-file examples/ma20.dsl", - file=sys.stderr, - ) - - -def _summary(series: pd.Series, sample: int) -> None: - vals = series.to_numpy(dtype=float, copy=False) - finite = pd.notna(vals) - coverage = float(finite.mean()) if len(vals) else 0.0 - print(f"shape: {series.shape}") - print(f"coverage: {coverage:.4f}") - print(f"sample (后 {sample} 行):") - for key, val in series.iloc[-sample:].items(): - dt, inst = key - shown = "NaN" if pd.isna(val) else f"{float(val):.6g}" - print(f" {dt} {inst} -> {shown}") - - -def main() -> None: - parser = argparse.ArgumentParser(description="DSL 表达式调试求值 / IC 报告") - parser.add_argument("--panel", type=Path, default=PANEL_PATH) - parser.add_argument("--report", action="store_true", help="输出 IC/ICIR/RANKIC/MLS 报告") - parser.add_argument("--json", action="store_true", help="--report 时以 JSON 输出") - parser.add_argument( - "--label-col", - type=str, - default=DEFAULT_LABEL_COL, - help="--report 时使用的标签列", - ) - group = parser.add_mutually_exclusive_group() - group.add_argument("--expr", type=str) - group.add_argument("--expr-file", type=Path) - parser.add_argument("--sample", type=int, default=5) - parser.add_argument( - "--start-time", - type=str, - default=None, - help="metrics 切片起始(DSL 仍在全量 panel 上求值)", - ) - parser.add_argument( - "--end-time", - type=str, - default=None, - help="metrics 切片结束(DSL 仍在全量 panel 上求值)", - ) - args = parser.parse_args() - - if args.expr is None and args.expr_file is None: - args.expr = "TS_MEAN($adj_close, 20)" - - expr = load_expr(expr=args.expr, expr_file=args.expr_file) - _warn_if_shell_stripped(expr) - - t_panel_load = time.perf_counter() - panel_full = load_panel(args.panel) - print(f"panel loaded in {(time.perf_counter() - t_panel_load) * 1000:.1f}ms") - print(f"panel: {args.panel} shape={panel_full.shape}") - - aux_tags = collect_aux_intervals_from_expr(expr) - if aux_tags: - print(f"辅频: {aux_tags}") - - print(f"expr:\n{expr}\n") - - if args.report: - t0 = time.perf_counter() - metrics = evaluate_factor( - expr, - panel_full, - label_col=args.label_col, - start=args.start_time, - end=args.end_time, - ) - if args.json: - print(format_factor_report_json(metrics)) - else: - print_factor_report(metrics) - print(f"elapsed_ms: {(time.perf_counter() - t0) * 1000:.1f}") - return - - panel = slice_panel(panel_full, start=args.start_time, end=args.end_time) - if args.start_time or args.end_time: - print(f"eval slice: shape={panel.shape}") - - t0 = time.perf_counter() - out = eval_factor(expr, panel) - _summary(out, args.sample) - print(f"elapsed_ms: {(time.perf_counter() - t0) * 1000:.1f}") - - -if __name__ == "__main__": - main() - # good - r""" - D:\AlphaAgent-Stock\data\factors\expressions\intraday_overnight_gap.dsl - D:\AlphaAgent-Stock\data\factors\expressions\crowd_eff_fluency_vol18.dsl - D:\AlphaAgent-Stock\data\factors\expressions\hl_div_amt_smooth20.dsl - D:\AlphaAgent-Stock\data\factors\expressions\shadow_corr_diff_30.dsl - D:\AlphaAgent-Stock\data\factors\expressions\cs_mom60_w_amt_filter.dsl # 多头不明显,空头明显,但排序还可以 - D:\AlphaAgent-Stock\data\factors\expressions\near_extreme_rev_min10.dsl # 头部明显,排序不行 - D:\AlphaAgent-Stock\data\factors\expressions\idio_qspread_win_20.dsl # good - D:\AlphaAgent-Stock\data\factors\expressions\idio_tail_asym_20.dsl - D:\AlphaAgent-Stock\data\factors\expressions\gap_streak_weighted_rank.dsl - D:\AlphaAgent-Stock\data\factors\expressions\chip_peak_10_win_neut.dsl # 头部组一般 - - # 分布近似 - D:\AlphaAgent-Stock\data\factors\expressions\massasym_z_crowd_meanratio_z.dsl - - # 基本面 - .\artifacts\factorzoo\stock_1d\expressions\roe_lowvol_center_smooth.dsl - """ \ No newline at end of file diff --git a/scripts/factor_mining.py b/scripts/factor_mining.py deleted file mode 100644 index daf71910..00000000 --- a/scripts/factor_mining.py +++ /dev/null @@ -1,166 +0,0 @@ -#!/usr/bin/env python3 -"""LLM 股票因子挖掘 CLI。 - -示例: - uv sync --extra mining - uv run python scripts/factor_mining.py --panel artifacts/panel/panel_1d.parquet - -环境变量(仓库根 .env):OPENAI_API_KEY、OPENAI_API_BASE、MODEL。 -""" - -from __future__ import annotations - -import argparse -import json -import os -import sys -from pathlib import Path - -ROOT = Path(__file__).resolve().parents[1] -sys.path.insert(0, str(ROOT)) - -try: - from dotenv import load_dotenv -except ImportError: - load_dotenv = None # type: ignore[assignment,misc] - -from seekalpha.core.paths import FACTORZOO_DIR, PANEL_PATH # noqa: E402 -from seekalpha.factor.mining import MiningConfig, run_factor_mining # noqa: E402 -from seekalpha.factor.mining.context import StockEvalContext # noqa: E402 -from seekalpha.factor.mining.seed_factors import build_user_message_with_seed_factors # noqa: E402 -from seekalpha.factor.types import DEFAULT_LABEL_COL # noqa: E402 - - -def _load_env() -> None: - if load_dotenv is not None: - load_dotenv(ROOT / ".env") - - -def _parse_args() -> argparse.Namespace: - p = argparse.ArgumentParser(description="LLM 股票因子挖掘") - p.add_argument("--panel", default=str(PANEL_PATH)) - p.add_argument("--train-start", default="2019-01-01") - p.add_argument("--train-end", default="2021-12-31") - p.add_argument("--val-start", default="2022-01-01") - p.add_argument("--val-end", default="2024-12-31") - p.add_argument("--label-col", default=DEFAULT_LABEL_COL) - p.add_argument( - "--no-fundamentals", - action="store_true", - help="不载入基本面列(funda_*),省内存;prompt 也会隐藏基本面字段", - ) - p.add_argument("--temperature", type=float, default=None) - p.add_argument("--max-tokens", type=int, default=8192) - p.add_argument("--max-turns", type=int, default=10) - p.add_argument("--max-tool-calls-per-round", type=int, default=8) - p.add_argument("--max-tool-workers", type=int, default=4) - p.add_argument( - "--max-parallel-eval", - type=int, - default=None, - help="同时进行的 train/val 评估上限;不传则读环境变量 MAX_PARALLEL_EVAL(默认 1)。建议与 --max-tool-workers 匹配", - ) - p.add_argument("--min-tool-call-rounds", type=int, default=3) - p.add_argument("--log-dir", default="logs/factor_mining") - p.add_argument( - "--user-message", - default="请在训练集上提出并迭代多个多行因子表达式,再于验证集上检验泛化;目标为提高 abs(IC)/RANKIC 与 ICIR,并兼顾月度稳健性。", - ) - p.add_argument("--user-file", type=Path, help="从文件读取 user 消息(覆盖 --user-message)") - p.add_argument( - "--seed-factor", - dest="seed_factors", - action="append", - nargs="+", - default=[], - metavar="PATH", - help="初始种子因子 .dsl 路径,可重复指定;单次可跟多个路径,如 --seed-factor a.dsl b.dsl", - ) - p.add_argument("--no-operator-catalog", action="store_true") - p.add_argument("--quiet", action="store_true") - p.add_argument("--factorlib", type=Path, default=None, help=f"默认 {FACTORZOO_DIR}") - p.add_argument("--no-submit", action="store_true") - p.add_argument("--max-cs-corr", type=float, default=0.8) - p.add_argument("--similar-top-k", type=int, default=3) - p.add_argument("--ingest-overwrite", action="store_true") - return p.parse_args() - - -def _resolve(path: str) -> Path: - p = Path(path) - return p if p.is_absolute() else ROOT / p - - -def main() -> int: - _load_env() - args = _parse_args() - - api_key = os.environ.get("OPENAI_API_KEY") - if not api_key: - print("错误:未设置 OPENAI_API_KEY,请在 .env 中配置", file=sys.stderr) - return 2 - - model = os.environ.get("MODEL") - if not model: - print("错误:未设置 MODEL,请在 .env 中配置", file=sys.stderr) - return 2 - - try: - from openai import OpenAI - except ImportError: - print("错误:请安装 openai(uv sync --extra mining)", file=sys.stderr) - return 2 - - base_url = os.environ.get("OPENAI_API_BASE") - client = OpenAI(api_key=api_key, **({"base_url": base_url} if base_url else {})) - - user_message = args.user_file.read_text(encoding="utf-8") if args.user_file else args.user_message - seed_paths = [Path(p) for batch in args.seed_factors for p in batch] - if seed_paths: - try: - user_message = build_user_message_with_seed_factors( - user_message, seed_paths, repo_root=ROOT - ) - except FileNotFoundError as exc: - print(f"错误:{exc}", file=sys.stderr) - return 2 - - config = MiningConfig( - eval=StockEvalContext( - panel_path=_resolve(args.panel), - train_start=args.train_start, - train_end=args.train_end, - val_start=args.val_start, - val_end=args.val_end, - label_col=args.label_col, - include_fundamentals=not args.no_fundamentals, - ), - model=model, - temperature=args.temperature, - max_tokens=args.max_tokens, - max_turns=args.max_turns, - max_tool_calls_per_round=args.max_tool_calls_per_round, - max_tool_workers=args.max_tool_workers, - max_parallel_eval=args.max_parallel_eval, - min_tool_call_rounds_before_allow_stop=args.min_tool_call_rounds, - factorlib_path=_resolve(str(args.factorlib)) if args.factorlib else None, - enable_submit=not args.no_submit, - max_cs_corr=args.max_cs_corr, - similar_top_k=args.similar_top_k, - ingest_overwrite=args.ingest_overwrite, - ) - - out = run_factor_mining( - config, - user_message, - client=client, - log_dir=args.log_dir, - include_operator_catalog=not args.no_operator_catalog, - verbose=not args.quiet, - ) - print(json.dumps(out, ensure_ascii=False, indent=2)) - return 0 - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/scripts/factor_mining_agentscope.py b/scripts/factor_mining_agentscope.py deleted file mode 100644 index bcb0d645..00000000 --- a/scripts/factor_mining_agentscope.py +++ /dev/null @@ -1,186 +0,0 @@ -#!/usr/bin/env python3 -"""LLM 股票因子挖掘 CLI(AgentScope 版,终端流式输出)。 - -与 scripts/factor_mining.py 使用相同的 system prompt 与 eval/submit 工具语义; -模型思考、回复与工具结果通过 AgentScope reply_stream 实时打印。 - -示例: - uv sync --extra mining - uv run python scripts/factor_mining_agentscope.py \\ - --panel artifacts/panel/panel_1d.parquet - uv run python scripts/factor_mining_agentscope.py \\ - --panel artifacts/panel/panel_1d.parquet \\ - --seed-factor examples/factors/ma20_dev.dsl \\ - --user-message "在种子因子基础上继续优化" - -环境变量(仓库根 .env):OPENAI_API_KEY、OPENAI_API_BASE、MODEL。 -""" - -from __future__ import annotations - -import argparse -import asyncio -import json -import os -import sys -from pathlib import Path - -ROOT = Path(__file__).resolve().parents[1] -sys.path.insert(0, str(ROOT)) - -try: - from dotenv import load_dotenv -except ImportError: - load_dotenv = None # type: ignore[assignment,misc] - -from seekalpha.core.paths import FACTORZOO_DIR, PANEL_PATH # noqa: E402 -from seekalpha.factor.mining import MiningConfig # noqa: E402 -from seekalpha.factor.mining.agentscope_run import run_factor_mining_agentscope # noqa: E402 -from seekalpha.factor.mining.context import StockEvalContext # noqa: E402 -from seekalpha.factor.mining.seed_factors import build_user_message_with_seed_factors # noqa: E402 -from seekalpha.factor.types import DEFAULT_LABEL_COL # noqa: E402 - - -def _load_env() -> None: - if load_dotenv is not None: - load_dotenv(ROOT / ".env") - - -def _parse_args() -> argparse.Namespace: - p = argparse.ArgumentParser(description="LLM 股票因子挖掘(AgentScope 流式 CLI)") - p.add_argument("--panel", default=str(PANEL_PATH)) - p.add_argument("--train-start", default="2018-01-01") - p.add_argument("--train-end", default="2020-12-31") - p.add_argument("--val-start", default="2021-01-01") - p.add_argument("--val-end", default="2023-12-31") - p.add_argument("--label-col", default=DEFAULT_LABEL_COL) - p.add_argument( - "--no-fundamentals", - action="store_true", - help="不载入基本面列(funda_*),省内存;prompt 也会隐藏基本面字段(适合只挖价量因子)", - ) - p.add_argument("--temperature", type=float, default=None) - p.add_argument("--max-tokens", type=int, default=8192) - p.add_argument( - "--max-turns", - type=int, - default=5, - help=( - "外层重进 agent 的次数上限(每次 = 一整轮 ReAct,模型自愿停手才回外层);" - "同时间接决定 ReAct 内循环上限 max(max_turns*max_tool_calls_per_round, max_turns, 20)。" - "注意:它无法打断进行中的单次 reply_stream,实际运行长度主要由内循环上限与模型何时停手决定" - ), - ) - p.add_argument("--max-tool-calls-per-round", type=int, default=8) - p.add_argument("--max-tool-workers", type=int, default=4) - p.add_argument( - "--max-parallel-eval", - type=int, - default=None, - help="同时进行的 train/val 评估上限;不传则读环境变量 MAX_PARALLEL_EVAL(默认 1)。建议与 --max-tool-workers 匹配", - ) - p.add_argument("--min-tool-call-rounds", type=int, default=3) - p.add_argument("--log-dir", default="logs/factor_mining") - p.add_argument( - "--user-message", - default="请在训练集上提出并迭代多个多行因子表达式,再于验证集上检验泛化;目标为提高 abs(IC)/RANKIC 与 ICIR,并兼顾月度稳健性。", - ) - p.add_argument("--user-file", type=Path, help="从文件读取 user 消息(覆盖 --user-message)") - p.add_argument( - "--seed-factor", - dest="seed_factors", - action="append", - nargs="+", - default=[], - metavar="PATH", - help="初始种子因子 .dsl 路径,可重复指定;单次可跟多个路径,如 --seed-factor a.dsl b.dsl", - ) - p.add_argument("--no-operator-catalog", action="store_true", help="不在 system prompt 中注入算子清单") - p.add_argument("--quiet", action="store_true", help="不在终端流式打印(仍写 JSONL 日志)") - p.add_argument("--factorlib", type=Path, default=None, help=f"factorzoo 根目录(默认 {FACTORZOO_DIR})") - p.add_argument("--no-submit", action="store_true", help="禁用 submit_factor 交付工具") - p.add_argument("--max-cs-corr", type=float, default=0.8, help="submit 截面去重 |corr| 上限") - p.add_argument("--similar-top-k", type=int, default=3, help="查重失败时返回的最相似因子数") - p.add_argument("--ingest-overwrite", action="store_true", help="submit 时覆盖已存在 factor_id") - return p.parse_args() - - -def _resolve(path: str) -> Path: - p = Path(path) - return p if p.is_absolute() else ROOT / p - - -def main() -> int: - _load_env() - args = _parse_args() - - api_key = os.environ.get("OPENAI_API_KEY") - if not api_key: - print("错误:未设置 OPENAI_API_KEY,请在 .env 中配置", file=sys.stderr) - return 2 - - model = os.environ.get("MODEL") - if not model: - print("错误:未设置 MODEL,请在 .env 中配置", file=sys.stderr) - return 2 - - try: - import agentscope # noqa: F401 - except ImportError: - print("错误:请安装 agentscope(uv sync --extra mining)", file=sys.stderr) - return 2 - - user_message = args.user_file.read_text(encoding="utf-8") if args.user_file else args.user_message - seed_paths = [Path(p) for batch in args.seed_factors for p in batch] - if seed_paths: - try: - user_message = build_user_message_with_seed_factors( - user_message, seed_paths, repo_root=ROOT - ) - except FileNotFoundError as exc: - print(f"错误:{exc}", file=sys.stderr) - return 2 - base_url = os.environ.get("OPENAI_API_BASE") - - config = MiningConfig( - eval=StockEvalContext( - panel_path=_resolve(args.panel), - train_start=args.train_start, - train_end=args.train_end, - val_start=args.val_start, - val_end=args.val_end, - label_col=args.label_col, - include_fundamentals=not args.no_fundamentals, - ), - model=model, - temperature=args.temperature, - max_tokens=args.max_tokens, - max_turns=args.max_turns, - max_tool_calls_per_round=args.max_tool_calls_per_round, - max_tool_workers=args.max_tool_workers, - max_parallel_eval=args.max_parallel_eval, - min_tool_call_rounds_before_allow_stop=args.min_tool_call_rounds, - factorlib_path=_resolve(str(args.factorlib)) if args.factorlib else None, - enable_submit=not args.no_submit, - max_cs_corr=args.max_cs_corr, - similar_top_k=args.similar_top_k, - ingest_overwrite=args.ingest_overwrite, - ) - - out = asyncio.run( - run_factor_mining_agentscope( - config, - user_message, - api_key=api_key, - base_url=base_url, - log_dir=args.log_dir, - include_operator_catalog=not args.no_operator_catalog, - verbose=not args.quiet, - ) - ) - print(json.dumps(out, ensure_ascii=False, indent=2)) - return 0 - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/scripts/factorlib_info.py b/scripts/factorlib_info.py deleted file mode 100644 index edaa7e4a..00000000 --- a/scripts/factorlib_info.py +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/env python3 -"""查看因子库 catalog 与抽样摘要。""" - -from __future__ import annotations - -import argparse -import json -import sys -from pathlib import Path - -ROOT = Path(__file__).resolve().parents[1] -sys.path.insert(0, str(ROOT)) - -from seekalpha.core.paths import FACTORZOO_DIR # noqa: E402 -from seekalpha.factor import FactorZoo # noqa: E402 -from seekalpha.factor.zoo import SimilarityMatrix # noqa: E402 - - -def main() -> None: - parser = argparse.ArgumentParser(description="因子库信息") - parser.add_argument("--lib", type=Path, default=FACTORZOO_DIR) - parser.add_argument("--factor-id", type=str, default=None) - parser.add_argument("--json", action="store_true", help="JSON 输出") - args = parser.parse_args() - - zoo = FactorZoo.open(args.lib) - sim = SimilarityMatrix(zoo.paths, zoo.manifest.max_factors) - - if args.factor_id: - meta = zoo.catalog.get(args.factor_id) - if meta is None: - raise SystemExit(f"因子不存在: {args.factor_id}") - payload = { - "factor_id": meta.factor_id, - "name": meta.name, - "expr": meta.expr, - "col_idx": meta.col_idx, - "status": meta.status.value, - "finite_count": meta.finite_count, - "created_at": meta.created_at, - "extra": meta.extra, - } - if args.json: - print(json.dumps(payload, ensure_ascii=False, indent=2)) - else: - print(f"factor_id: {meta.factor_id}") - print(f"name: {meta.name}") - print(f"col_idx: {meta.col_idx}") - print(f"status: {meta.status.value}") - print(f"finite_count: {meta.finite_count}") - print(f"expr:\n{meta.expr}") - return - - rows = [] - for fid in zoo.catalog.list_factor_ids(): - meta = zoo.catalog.get(fid) - if meta is None: - continue - rows.append( - { - "factor_id": fid, - "name": meta.name, - "col_idx": meta.col_idx, - "finite_count": meta.finite_count, - "coverage": meta.finite_count / zoo.manifest.n_rows, - } - ) - - if args.json: - print( - json.dumps( - { - "lib": str(zoo.paths.root), - "n_rows": zoo.manifest.n_rows, - "n_factors": zoo.n_factors, - "index_hash": zoo.manifest.index_hash, - "factors": rows, - "similarity_meta": sim.load_meta(), - }, - ensure_ascii=False, - indent=2, - ) - ) - else: - print(f"lib: {zoo.paths.root}") - print(f"n_rows={zoo.manifest.n_rows} n_factors={zoo.n_factors}") - print(f"index_hash={zoo.manifest.index_hash}") - print(f"panel_path={zoo.manifest.panel_path}") - for row in rows: - print( - f" {row['factor_id']}: col={row['col_idx']} " - f"coverage={row['coverage']:.4f} name={row['name']}" - ) - - -if __name__ == "__main__": - main() diff --git a/scripts/fetch_fundamentals.py b/scripts/fetch_fundamentals.py deleted file mode 100644 index 8beb746b..00000000 --- a/scripts/fetch_fundamentals.py +++ /dev/null @@ -1,136 +0,0 @@ -#!/usr/bin/env python3 -"""从 Tushare 拉取季频 fina_indicator 并写入 fundamental 缓存。 - -默认(VIP):每期 fina_indicator_vip 拉**全 A 股**,合并后**每期落盘**。 -panel enrich 时按 panel 内 instrument 自然 join,无需在拉数阶段指定 universe。 - -示例: - uv run python scripts/fetch_fundamentals.py --start 2015-01-01 --end 2026-12-31 - uv run python scripts/fetch_fundamentals.py --periods 20240331 20240630 - # 附带三大表(income/balancesheet/cashflow,需 VIP 5000 积分): - uv run python scripts/fetch_fundamentals.py --start 2015-01-01 --end 2026-12-31 --with-statements - # 积分不足、无 VIP 时逐股慢拉(须指定 universe): - uv run python scripts/fetch_fundamentals.py --start 2023-01-01 --end 2024-06-30 --universe zz1000 --no-vip -""" - -from __future__ import annotations - -import argparse -import sys -from pathlib import Path - -ROOT = Path(__file__).resolve().parents[1] -sys.path.insert(0, str(ROOT)) - -from seekalpha.core.paths import DISCLOSURE_CALENDAR_PATH, FUNDAMENTAL_QUARTERLY_PATH # noqa: E402 -from seekalpha.data import tushare_client # noqa: E402 -from seekalpha.data.fundamental_fetch import ( # noqa: E402 - fetch_and_save_periods, - quarter_periods_between, -) -from seekalpha.data.tushare_client import get_pro # noqa: E402 -from seekalpha.data.universe import fetch_index_members, resolve_index_code # noqa: E402 - - -def _resolve_ts_codes(universe: str, start: str, end: str) -> list[str]: - pro = get_pro() - return fetch_index_members(pro, resolve_index_code(universe), start, end) - - -def _resolve_periods(args: argparse.Namespace) -> list[str]: - if args.periods and (args.start or args.end): - raise SystemExit("请只使用 --periods 或 --start/--end 之一") - if args.periods: - return sorted(args.periods) - if args.start and args.end: - periods = quarter_periods_between(args.start, args.end) - if not periods: - raise SystemExit(f"区间 {args.start} ~ {args.end} 内无标准季报季末") - return periods - raise SystemExit("须指定 --start/--end 或 --periods") - - -def main() -> None: - parser = argparse.ArgumentParser(description="拉取 Tushare 季频财务指标缓存") - parser.add_argument("--start", type=str, help="起始日期 YYYY-MM-DD(含区间内全部季报季末)") - parser.add_argument("--end", type=str, help="结束日期 YYYY-MM-DD") - parser.add_argument( - "--periods", - type=str, - nargs="+", - default=None, - help="显式报告期列表,如 20240331 20240630(与 --start/--end 二选一)", - ) - parser.add_argument( - "--quarterly-out", - type=Path, - default=FUNDAMENTAL_QUARTERLY_PATH, - help="季频 parquet 输出路径", - ) - parser.add_argument( - "--disclosure-out", - type=Path, - default=DISCLOSURE_CALENDAR_PATH, - help="披露日历 parquet 输出路径", - ) - parser.add_argument( - "--universe", - type=str, - default=None, - help="仅 --no-vip 时生效:限定逐股拉取的成分池", - ) - parser.add_argument("--sleep", type=float, default=0.35, help="Tushare 请求间隔秒") - parser.add_argument( - "--no-vip", - action="store_true", - help="禁用 fina_indicator_vip,强制按股票逐只拉取(慢,仅积分不足时用)", - ) - parser.add_argument( - "--with-statements", - action="store_true", - help="额外拉取三大表(income/balancesheet/cashflow)并入季频缓存的 funda_fs_* 列", - ) - parser.add_argument("--max-retries", type=int, default=5) - parser.add_argument("--timeout", type=int, default=60) - args = parser.parse_args() - - tushare_client.configure(max_retries=args.max_retries, timeout=args.timeout) - - periods = _resolve_periods(args) - range_start = args.start or f"{periods[0][:4]}-{periods[0][4:6]}-{periods[0][6:8]}" - range_end = args.end or f"{periods[-1][:4]}-{periods[-1][4:6]}-{periods[-1][6:8]}" - - print(f"报告期共 {len(periods)} 个: {periods[0]} ~ {periods[-1]}") - - use_vip = not args.no_vip - if use_vip: - if args.universe: - print( - f"提示: VIP 模式忽略 --universe {args.universe!r}," - "全市场落盘;build_panel enrich 时按 panel 成分 join" - ) - ts_codes = None - print("拉取模式: vip 全市场(每期 1 次请求,每期合并后落盘)") - else: - if not args.universe: - parser.error("--no-vip 须配合 --universe 指定成分池") - ts_codes = _resolve_ts_codes(args.universe, range_start, range_end) - print(f"拉取模式: 逐股({len(ts_codes)} 只 × {len(periods)} 期)") - - if args.with_statements: - print("附加拉取: 三大表 income / balancesheet / cashflow(funda_fs_* 原始值)") - - fetch_and_save_periods( - periods, - ts_codes=ts_codes, - quarterly_path=args.quarterly_out, - disclosure_path=args.disclosure_out, - sleep_sec=args.sleep, - verbose=True, - use_vip=use_vip, - with_statements=args.with_statements, - ) - - -if __name__ == "__main__": - main() diff --git a/scripts/fetch_market.py b/scripts/fetch_market.py deleted file mode 100644 index 138e561c..00000000 --- a/scripts/fetch_market.py +++ /dev/null @@ -1,96 +0,0 @@ -#!/usr/bin/env python3 -"""从 Tushare 拉取日频行情并写入 market hq 缓存(artifacts/market/daily_hq.parquet)。 - -与 fetch_fundamentals.py 对称:本脚本**只拉取行情、落盘 hq 缓存**,不建 panel。 -建 panel 请随后运行 scripts/build_panel.py(离线,从 hq 缓存构建)。 - -示例: - # ZZ1000 成分并集全量: - uv run python scripts/fetch_market.py --start 2015-01-01 --end 2026-06-30 --universe zz1000 - # 全市场按日: - uv run python scripts/fetch_market.py --start 2024-01-01 --end 2024-01-31 --universe none - # 增量补最新交易日: - uv run python scripts/fetch_market.py --update --universe zz1000 -""" - -from __future__ import annotations - -import argparse -import sys -from pathlib import Path - -ROOT = Path(__file__).resolve().parents[1] -sys.path.insert(0, str(ROOT)) - -from seekalpha.core.paths import MARKET_HQ_PATH # noqa: E402 -from seekalpha.data import tushare_client # noqa: E402 -from seekalpha.data.market_fetch import fetch_and_save_market, update_market_cache # noqa: E402 - - -def main() -> None: - parser = argparse.ArgumentParser(description="拉取 Tushare 日频行情 → hq 缓存") - parser.add_argument("--start", type=str, help="起始日期 YYYY-MM-DD") - parser.add_argument("--end", type=str, help="结束日期 YYYY-MM-DD") - parser.add_argument("--out", type=Path, default=MARKET_HQ_PATH, help="hq 缓存 parquet 输出路径") - parser.add_argument( - "--universe", - type=str, - default="zz1000", - help="指数成分池,如 zz1000 / zz500 / hs300;传 none 表示全市场按日拉取", - ) - parser.add_argument( - "--update", - action="store_true", - help="增量更新:从缓存末日起补至最新交易日(自动填 gap)", - ) - parser.add_argument( - "--dates", - type=str, - nargs="+", - default=None, - help="增量指定交易日,如 2026-06-28(配合 --update)", - ) - parser.add_argument("--sleep", type=float, default=0.35, help="Tushare 请求间隔秒") - parser.add_argument("--batch-size", type=int, default=40, help="按股票池拉取时每批股票数") - parser.add_argument( - "--refresh-members", - action="store_true", - help="忽略成分缓存,重新从 Tushare 拉取指数成分(artifacts/index/)", - ) - parser.add_argument("--max-retries", type=int, default=5, help="网络超时/限流最大重试次数") - parser.add_argument("--timeout", type=int, default=60, help="单次 Tushare HTTP 请求超时秒数") - args = parser.parse_args() - - tushare_client.configure(max_retries=args.max_retries, timeout=args.timeout) - universe = None if args.universe.lower() == "none" else args.universe - - if args.update: - new_hq, backfill_since = update_market_cache( - out_path=args.out, - universe=universe, - dates=args.dates, - sleep_sec=args.sleep, - batch_size=args.batch_size, - ) - if new_hq.empty: - print("无新交易日,hq 缓存已是最新") - else: - print(f"增量完成: +{new_hq.shape[0]} 行(backfill_since={backfill_since})") - return - - if not args.start or not args.end: - parser.error("全量拉取需指定 --start 和 --end;增量请用 --update") - - fetch_and_save_market( - args.start, - args.end, - out_path=args.out, - universe=universe, - batch_size=args.batch_size, - sleep_sec=args.sleep, - refresh_members=args.refresh_members, - ) - - -if __name__ == "__main__": - main() diff --git a/scripts/ingest_factors.py b/scripts/ingest_factors.py deleted file mode 100644 index 98628529..00000000 --- a/scripts/ingest_factors.py +++ /dev/null @@ -1,218 +0,0 @@ -#!/usr/bin/env python3 -"""因子入库:registry / --expr-file / --expr-dir → DSL 求值 → 指标 → 查重 → factorzoo。 - -示例: - uv run python scripts/ingest_factors.py --expr-file examples/factors/ma_dev.dsl - uv run python scripts/ingest_factors.py --expr-dir artifacts/factorzoo/stock_1d/expressions - uv run python scripts/ingest_factors.py --registry configs/factors/registry.example.json --dry-run -""" - -from __future__ import annotations - -import argparse -import re -import sys -import time -from pathlib import Path - -ROOT = Path(__file__).resolve().parents[1] -sys.path.insert(0, str(ROOT)) - -from seekalpha.core.paths import FACTOR_REGISTRY_EXAMPLE, FACTORZOO_DIR # noqa: E402 -from seekalpha.data.panel import load_panel, slice_panel # noqa: E402 -from seekalpha.factor import ( # noqa: E402 - DEFAULT_INGEST_POLICY, - FactorZoo, - IngestPolicy, - IngestResult, - ingest_factor, - list_factor_entries, -) -from seekalpha.factor.report import format_factor_report_json, print_factor_report # noqa: E402 - - -def _slug_factor_id(name: str) -> str: - s = re.sub(r"[^a-zA-Z0-9_]+", "_", str(name).strip().lower()) - return re.sub(r"_+", "_", s).strip("_") or "factor" - - -def _load_expr(*, expr: str | None, expr_file: Path | None) -> str: - if expr_file is not None: - path = expr_file if expr_file.is_absolute() else ROOT / expr_file - if not path.is_file(): - raise FileNotFoundError(f"表达式文件不存在: {path}") - return path.read_text(encoding="utf-8").strip() - if expr is None: - raise ValueError("必须提供 --expr 或 --expr-file") - return expr.strip() - - -def _resolve_entries( - args: argparse.Namespace, -) -> list[tuple[str, str, str]]: - if args.expr_dir is not None: - expr_root = args.expr_dir if args.expr_dir.is_absolute() else ROOT / args.expr_dir - from seekalpha.factor.expr_store import list_expr_dir_entries - - entries = list_expr_dir_entries(expr_root) - if args.factor_id: - entries = [e for e in entries if e[0] == args.factor_id] - if not entries: - raise SystemExit(f"目录中无因子: {args.factor_id}") - return entries - - if args.expr_file is not None or args.expr is not None: - expr = _load_expr(expr=args.expr, expr_file=args.expr_file) - if args.factor_id: - factor_id = args.factor_id - elif args.name: - factor_id = _slug_factor_id(args.name) - elif args.expr_file is not None: - path = args.expr_file if args.expr_file.is_absolute() else ROOT / args.expr_file - factor_id = _slug_factor_id(path.stem) - else: - factor_id = "factor" - name = args.name or factor_id - return [(factor_id, name, expr)] - - entries = list_factor_entries(args.registry, repo_root=ROOT) - if args.factor_id: - entries = [e for e in entries if e[0] == args.factor_id] - if not entries: - raise SystemExit(f"registry 中无因子: {args.factor_id}") - return entries - - -def _print_ingest_result(r: IngestResult, *, json_out: bool = False) -> None: - if json_out: - payload = { - "factor_id": r.factor_id, - "stored": r.stored, - "col_idx": r.col_idx, - "skipped_reason": r.skipped_reason, - "metrics": r.metrics, - "similarity": r.similarity, - "extra": r.extra, - } - print(format_factor_report_json(payload)) - return - - print(f"\n因子: {r.factor_id}") - print_factor_report(r.metrics) - finite_ratio = r.metrics.get("finite_ratio") - if finite_ratio is not None: - print(f" {'finite_ratio (全 panel)':<16} {float(finite_ratio):>34.4f}") - skew = r.metrics.get("skew") - kurt = r.metrics.get("kurt") - if skew is not None and kurt is not None: - print(f" {'skew / kurt':<16} {float(skew):>15.4f} / {float(kurt):<15.4f}") - - if r.stored: - print(f"\n入库结果: 已入库 (col_idx={r.col_idx})") - else: - print(f"\n入库结果: 未入库 ({r.skipped_reason or 'unknown'})") - - if r.similarity: - max_corr = r.similarity.get("max_abs_corr") - if max_corr is not None: - print(f" max_cs_corr: {float(max_corr):.4f}") - for nb in r.similarity.get("top_neighbors") or []: - print(f" ~{nb.get('factor_id')} cs_corr={nb.get('cs_corr'):.4f}") - - -def main() -> None: - parser = argparse.ArgumentParser(description="因子入库(registry 或 --expr-file)") - parser.add_argument("--lib", type=Path, default=FACTORZOO_DIR, help="因子库根目录") - parser.add_argument( - "--registry", - type=Path, - default=FACTOR_REGISTRY_EXAMPLE, - help="因子清单 JSON(与 --expr-file 互斥)", - ) - parser.add_argument("--panel", type=Path, default=None, help="panel 路径(默认读 manifest)") - expr_group = parser.add_mutually_exclusive_group() - expr_group.add_argument("--expr-file", type=Path, help="DSL 表达式文件路径") - expr_group.add_argument("--expr", type=str, help="DSL 表达式字符串") - expr_group.add_argument( - "--expr-dir", - type=Path, - help="批量入库:目录下全部 *.dsl(factor_id=文件名 stem)", - ) - parser.add_argument("--name", type=str, default=None, help="因子显示名(配合 --expr-file)") - parser.add_argument("--factor-id", type=str, default=None, help="因子 ID;registry 模式下可筛选单个因子") - parser.add_argument("--overwrite", action="store_true") - parser.add_argument("--dry-run", action="store_true", help="只算指标与查重,不写入 factorzoo") - parser.add_argument("--json", action="store_true", help="以 JSON 输出指标与入库结果") - parser.add_argument("--max-cs-corr", type=float, default=0.8) - parser.add_argument("--similar-top-k", type=int, default=3) - parser.add_argument("--label-col", type=str, default=DEFAULT_INGEST_POLICY.label_col) - parser.add_argument("--train-start", type=str, default=DEFAULT_INGEST_POLICY.train_start) - parser.add_argument("--eval-end", type=str, default=DEFAULT_INGEST_POLICY.val_end) - parser.add_argument("--start-time", type=str, default=None) - parser.add_argument("--end-time", type=str, default=None) - parser.add_argument( - "--clip", - nargs=2, - type=float, - metavar=("LOWER", "UPPER"), - default=None, - help="入库前全局分位 clip,如 --clip 1 99", - ) - args = parser.parse_args() - - zoo = FactorZoo.open(args.lib) - panel_path = args.panel or Path(zoo.manifest.panel_path) - panel = load_panel(panel_path) - if args.start_time or args.end_time: - panel = slice_panel(panel, start=args.start_time, end=args.end_time) - if len(panel) != zoo.manifest.n_rows: - raise SystemExit( - f"panel 行数 {len(panel)} != 库 n_rows {zoo.manifest.n_rows};" - "请用全量 panel 初始化库,或去掉日期切片" - ) - panel = panel.sort_index() - - try: - entries = _resolve_entries(args) - except FileNotFoundError as e: - raise SystemExit(str(e)) from e - if not entries: - raise SystemExit("无待入库因子") - - clip_pct = (args.clip[0], args.clip[1]) if args.clip else None - policy = IngestPolicy( - train_start=args.train_start, - val_end=args.eval_end, - label_col=args.label_col, - max_cs_corr=args.max_cs_corr, - similar_top_k=args.similar_top_k, - clip_pct=clip_pct, - ) - t0 = time.perf_counter() - results = [] - for fid, name, expr in entries: - results.append( - ingest_factor( - zoo, - factor_id=fid, - name=name, - expr=expr, - panel=panel, - policy=policy, - overwrite=args.overwrite, - dry_run=args.dry_run, - ) - ) - - elapsed = time.perf_counter() - t0 - stored = sum(1 for r in results if r.stored) - skipped = len(results) - stored - if args.dry_run: - print("模式: dry-run(不写入 factorzoo)") - print(f"完成: stored={stored} skipped={skipped} elapsed={elapsed:.1f}s") - for r in results: - _print_ingest_result(r, json_out=args.json) - - -if __name__ == "__main__": - main() diff --git a/scripts/init_factorlib.py b/scripts/init_factorlib.py deleted file mode 100644 index 52f6f5ba..00000000 --- a/scripts/init_factorlib.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python3 -"""初始化股票日频因子库:panel → canonical index + manifest.""" - -from __future__ import annotations - -import argparse -import sys -import time -from pathlib import Path - -ROOT = Path(__file__).resolve().parents[1] -sys.path.insert(0, str(ROOT)) - -from seekalpha.core.paths import FACTORZOO_DIR, PANEL_PATH # noqa: E402 -from seekalpha.data.panel import load_panel # noqa: E402 -from seekalpha.factor.zoo import init_library # noqa: E402 - - -def main() -> None: - parser = argparse.ArgumentParser(description="初始化 factorzoo 因子库") - parser.add_argument("--output", type=Path, default=FACTORZOO_DIR, help="因子库根目录") - parser.add_argument("--panel", type=Path, default=PANEL_PATH, help="panel parquet 路径") - parser.add_argument("--n-sample-rows", type=int, default=200_000) - parser.add_argument("--max-factors", type=int, default=2048) - parser.add_argument("--sample-seed", type=int, default=42) - args = parser.parse_args() - - t0 = time.perf_counter() - panel = load_panel(args.panel) - paths, manifest, index = init_library( - args.output, - panel=panel, - panel_path=args.panel.resolve(), - n_sample_rows=min(args.n_sample_rows, len(panel)), - max_factors=args.max_factors, - sample_seed=args.sample_seed, - ) - elapsed = time.perf_counter() - t0 - - print(f"因子库: {paths.root}") - print(f"panel: {args.panel} n_rows={manifest.n_rows}") - print(f"n_sample_rows={manifest.n_sample_rows} max_factors={manifest.max_factors}") - print(f"index_hash={manifest.index_hash}") - print(f"shards={len(index.shards)}") - print(f"耗时: {elapsed:.1f}s") - - -if __name__ == "__main__": - main() diff --git a/scripts/realign_factorlib.py b/scripts/realign_factorlib.py deleted file mode 100644 index f01a82ce..00000000 --- a/scripts/realign_factorlib.py +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/env python3 -"""panel 更新后增量/全量 realign 因子库。""" - -from __future__ import annotations - -import argparse -import json -import sys -import time -from pathlib import Path - -ROOT = Path(__file__).resolve().parents[1] -sys.path.insert(0, str(ROOT)) - -from seekalpha.core.paths import FACTORZOO_DIR, PANEL_PATH # noqa: E402 -from seekalpha.data.panel import load_panel # noqa: E402 -from seekalpha.factor.types import DEFAULT_INGEST_POLICY, IngestPolicy # noqa: E402 -from seekalpha.factor.zoo.realign import ( # noqa: E402 - DEFAULT_OVERLAP_VERIFY_DAYS, - DEFAULT_WARMUP_DAYS, - DEFAULT_WARMUP_RETRY_DAYS, - incremental_realign_factorlib_to_panel, - realign_factorlib_to_panel, -) - - -def main() -> None: - parser = argparse.ArgumentParser(description="panel 更新后 realign 因子库") - parser.add_argument("--lib", type=Path, default=FACTORZOO_DIR, help="因子库根目录") - parser.add_argument("--panel", type=Path, default=None, help="panel 路径(默认读 manifest)") - parser.add_argument( - "--full", - action="store_true", - help="跳过错量路径,强制全量 realign", - ) - parser.add_argument("--warmup-days", type=int, default=DEFAULT_WARMUP_DAYS) - parser.add_argument("--warmup-retry-days", type=int, default=DEFAULT_WARMUP_RETRY_DAYS) - parser.add_argument( - "--overlap-verify-days", - type=int, - default=DEFAULT_OVERLAP_VERIFY_DAYS, - help="update 前最后 K 个交易日做 overlap 精确校验(默认 20)", - ) - parser.add_argument("--train-start", type=str, default=DEFAULT_INGEST_POLICY.train_start) - parser.add_argument("--eval-end", type=str, default=DEFAULT_INGEST_POLICY.val_end) - parser.add_argument("--label-col", type=str, default=DEFAULT_INGEST_POLICY.label_col) - parser.add_argument("--dry-run", action="store_true", help="只校验 overlap,不写 memmap/index") - args = parser.parse_args() - - from seekalpha.factor import FactorZoo - - zoo = FactorZoo.open(args.lib, verify_hash=False) - panel_path = args.panel or Path(zoo.manifest.panel_path) - panel = load_panel(panel_path).sort_index() - - policy = IngestPolicy( - train_start=args.train_start, - val_end=args.eval_end, - label_col=args.label_col, - ) - - t0 = time.perf_counter() - if args.full: - info = realign_factorlib_to_panel( - args.lib, - panel=panel, - panel_path=panel_path, - policy=policy, - ) - else: - info = incremental_realign_factorlib_to_panel( - args.lib, - panel=panel, - panel_path=panel_path, - policy=policy, - warmup_days=args.warmup_days, - warmup_retry_days=args.warmup_retry_days, - overlap_verify_days=args.overlap_verify_days, - dry_run=args.dry_run, - ) - elapsed = time.perf_counter() - t0 - - print(json.dumps(info, ensure_ascii=False, indent=2, default=str)) - print(f"elapsed={elapsed:.1f}s") - mode = info.get("mode", "?") - if args.dry_run: - print("dry-run: 未写入 factorzoo") - print(f"mode={mode}") - - -if __name__ == "__main__": - main() diff --git a/scripts/sync_factor_exprs.py b/scripts/sync_factor_exprs.py deleted file mode 100644 index b8bb9b27..00000000 --- a/scripts/sync_factor_exprs.py +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env python3 -"""从 factorzoo catalog 同步 DSL 到 {lib}/expressions/。 - -示例: - uv run python scripts/sync_factor_exprs.py - uv run python scripts/sync_factor_exprs.py --lib artifacts/factorzoo/stock_1d --dry-run -""" - -from __future__ import annotations - -import argparse -import sys -from pathlib import Path - -ROOT = Path(__file__).resolve().parents[1] -sys.path.insert(0, str(ROOT)) - -from seekalpha.core.paths import FACTORZOO_DIR # noqa: E402 -from seekalpha.factor import FactorZoo # noqa: E402 -from seekalpha.factor.expr_store import export_zoo_expressions # noqa: E402 - - -def main() -> int: - parser = argparse.ArgumentParser(description="同步 factorzoo → {lib}/expressions") - parser.add_argument("--lib", type=Path, default=FACTORZOO_DIR, help="因子库根目录") - parser.add_argument( - "--out", - type=Path, - default=None, - help="DSL 输出目录(默认 {lib}/expressions)", - ) - parser.add_argument("--no-overwrite", action="store_true", help="已存在 .dsl 时跳过") - parser.add_argument("--dry-run", action="store_true", help="只打印,不写文件") - args = parser.parse_args() - - zoo = FactorZoo.open(args.lib) - out_dir = args.out or (args.lib / "expressions") - factor_ids = zoo.catalog.list_factor_ids() - if not factor_ids: - print("因子库为空,无 DSL 可导出") - return 0 - - if args.dry_run: - for fid in factor_ids: - meta = zoo.catalog.get(fid) - path = out_dir / f"{fid}.dsl" - print(f" {fid} -> {path}") - print(f"共 {len(factor_ids)} 个(dry-run)") - return 0 - - written = export_zoo_expressions( - zoo, - expr_dir=out_dir, - overwrite=not args.no_overwrite, - ) - print(f"已写入 {len(written)} 个 DSL -> {out_dir.resolve()}") - for fid, path in written: - print(f" {fid}.dsl") - return 0 - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/scripts/update_panel.py b/scripts/update_panel.py deleted file mode 100644 index 3b67839f..00000000 --- a/scripts/update_panel.py +++ /dev/null @@ -1,106 +0,0 @@ -#!/usr/bin/env python3 -"""增量更新:一条命令完成「增量拉取行情 → 追加 hq 缓存 → panel 尾部增量重建 → 离线 re-enrich」。 - -流程: - 1. update_market_cache: 检测缺失交易日 → 从 Tushare 拉取 → 追加写入 daily_hq.parquet - 2. update_panel_from_hq: 新增行 merge 进 panel,从缺口前一交易日起重算 ret/label, - 并从本地缓存离线并入 funda_* / industry_sw_l1 - -示例: - uv run python scripts/update_panel.py --universe zz1000 --with-fundamentals --with-industry - uv run python scripts/update_panel.py --universe zz1000 --dates 2026-06-30 -""" - -from __future__ import annotations - -import argparse -import sys -from pathlib import Path - -ROOT = Path(__file__).resolve().parents[1] -sys.path.insert(0, str(ROOT)) - -from seekalpha.core.paths import ( # noqa: E402 - DISCLOSURE_CALENDAR_PATH, - FUNDAMENTAL_QUARTERLY_PATH, - INDUSTRY_SW_PATH, - MARKET_HQ_PATH, - PANEL_PATH, -) -from seekalpha.data import tushare_client # noqa: E402 -from seekalpha.data.market_fetch import update_market_cache # noqa: E402 -from seekalpha.data.panel import update_panel_from_hq # noqa: E402 - - -def main() -> None: - parser = argparse.ArgumentParser(description="增量更新 hq 缓存 + panel") - parser.add_argument("--panel", type=Path, default=PANEL_PATH, help="panel parquet 路径") - parser.add_argument("--market-cache", type=Path, default=MARKET_HQ_PATH, help="hq 缓存路径") - parser.add_argument( - "--universe", - type=str, - default="zz1000", - help="指数成分池;传 none 表示全市场按日拉取", - ) - parser.add_argument( - "--dates", - type=str, - nargs="+", - default=None, - help="增量指定交易日,如 2026-06-30(默认自动检测缺失日)", - ) - parser.add_argument( - "--with-fundamentals", - action="store_true", - help="并入季频基本面(离线读缓存 PIT 展开)", - ) - parser.add_argument( - "--with-industry", - action="store_true", - help="并入申万一级行业码 industry_sw_l1(离线读缓存)", - ) - parser.add_argument( - "--no-disclosure-distance", - action="store_true", - help="不计算 funda_days_since_disclose 等披露距离特征", - ) - parser.add_argument("--quarterly", type=Path, default=FUNDAMENTAL_QUARTERLY_PATH) - parser.add_argument("--disclosure", type=Path, default=DISCLOSURE_CALENDAR_PATH) - parser.add_argument("--industry-path", type=Path, default=INDUSTRY_SW_PATH) - parser.add_argument("--sleep", type=float, default=0.35, help="Tushare 请求间隔秒") - parser.add_argument("--batch-size", type=int, default=40) - parser.add_argument("--max-retries", type=int, default=5) - parser.add_argument("--timeout", type=int, default=60) - args = parser.parse_args() - - tushare_client.configure(max_retries=args.max_retries, timeout=args.timeout) - universe = None if args.universe.lower() == "none" else args.universe - - print("== 步骤 1/2: 增量拉取行情 → hq 缓存 ==") - new_hq, backfill_since = update_market_cache( - out_path=args.market_cache, - universe=universe, - dates=args.dates, - sleep_sec=args.sleep, - batch_size=args.batch_size, - ) - if new_hq.empty: - print("无新交易日,panel 已是最新,无需更新") - return - - print(f"== 步骤 2/2: panel 增量重建(backfill_since={backfill_since})==") - update_panel_from_hq( - new_hq, - backfill_since, - out_path=args.panel, - with_fundamentals=args.with_fundamentals, - quarterly_path=args.quarterly, - disclosure_path=args.disclosure, - include_disclosure_features=not args.no_disclosure_distance, - with_industry=args.with_industry, - industry_path=args.industry_path, - ) - - -if __name__ == "__main__": - main() diff --git a/seekalpha/__init__.py b/seekalpha/__init__.py deleted file mode 100644 index 94cfdfa6..00000000 --- a/seekalpha/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -"""SeekAlpha:多因子策略研究 + 实盘框架。""" - -__version__ = "0.1.0" diff --git a/seekalpha/core/__init__.py b/seekalpha/core/__init__.py deleted file mode 100644 index 4aaa7de7..00000000 --- a/seekalpha/core/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -"""核心类型、路径、配置。""" - -from seekalpha.core.paths import ARTIFACTS_DIR, PANEL_PATH, ROOT -from seekalpha.core.types import OUTPUT_COLUMNS - -__all__ = ["ARTIFACTS_DIR", "OUTPUT_COLUMNS", "PANEL_PATH", "ROOT"] diff --git a/seekalpha/core/config.py b/seekalpha/core/config.py deleted file mode 100644 index 3aa94d31..00000000 --- a/seekalpha/core/config.py +++ /dev/null @@ -1,22 +0,0 @@ -"""YAML 配置加载。""" - -from __future__ import annotations - -from pathlib import Path -from typing import Any - -import yaml - -from seekalpha.core.paths import CONFIGS_DIR - - -def load_yaml(path: Path | str) -> dict[str, Any]: - """加载 YAML 配置文件。""" - p = Path(path) - if not p.is_absolute(): - p = CONFIGS_DIR / p - if not p.is_file(): - raise FileNotFoundError(f"配置文件不存在: {p}") - with open(p, encoding="utf-8") as f: - data = yaml.safe_load(f) - return data or {} diff --git a/seekalpha/core/hash.py b/seekalpha/core/hash.py deleted file mode 100644 index 9aed9679..00000000 --- a/seekalpha/core/hash.py +++ /dev/null @@ -1,17 +0,0 @@ -"""Panel 指纹,用于 factorzoo / bundle 绑定。""" - -from __future__ import annotations - -import hashlib - -import pandas as pd - - -def panel_index_hash(panel: pd.DataFrame) -> str: - """根据 (datetime, instrument) 行序计算短 hash。""" - if not isinstance(panel.index, pd.MultiIndex): - raise TypeError("panel 须为 MultiIndex(datetime, instrument)") - dt = panel.index.get_level_values("datetime").astype(str) - inst = panel.index.get_level_values("instrument").astype(str) - payload = "\n".join(f"{d}\t{i}" for d, i in zip(dt, inst, strict=False)) - return hashlib.sha256(payload.encode()).hexdigest()[:16] diff --git a/seekalpha/core/paths.py b/seekalpha/core/paths.py deleted file mode 100644 index 32535d7d..00000000 --- a/seekalpha/core/paths.py +++ /dev/null @@ -1,22 +0,0 @@ -"""仓库路径常量。""" - -from pathlib import Path - -ROOT = Path(__file__).resolve().parents[2] -ARTIFACTS_DIR = ROOT / "artifacts" -MARKET_DIR = ARTIFACTS_DIR / "market" -MARKET_HQ_PATH = MARKET_DIR / "daily_hq.parquet" -PANEL_PATH = ARTIFACTS_DIR / "panel" / "panel_1d.parquet" -FUNDAMENTAL_DIR = ARTIFACTS_DIR / "fundamental" -FUNDAMENTAL_QUARTERLY_PATH = FUNDAMENTAL_DIR / "quarterly.parquet" -DISCLOSURE_CALENDAR_PATH = FUNDAMENTAL_DIR / "disclosure_calendar.parquet" -INDUSTRY_DIR = ARTIFACTS_DIR / "industry" -INDUSTRY_SW_PATH = INDUSTRY_DIR / "sw_l1_membership.parquet" -INDEX_DIR = ARTIFACTS_DIR / "index" -FACTORZOO_DIR = ARTIFACTS_DIR / "factorzoo" / "stock_1d" -FACTOR_EXPR_DIR = FACTORZOO_DIR / "expressions" -CONFIGS_DIR = ROOT / "configs" -FACTOR_REGISTRY_EXAMPLE = CONFIGS_DIR / "factors" / "registry.example.json" -MLS_FMB_PERCENTILES_PATH = FACTORZOO_DIR / "mls_fmb_percentiles.json" -MINING_REGISTRY_PATH = FACTORZOO_DIR / "mining_delivered_registry.json" -MINING_EXPR_DIR = FACTOR_EXPR_DIR diff --git a/seekalpha/core/types.py b/seekalpha/core/types.py deleted file mode 100644 index 0564c957..00000000 --- a/seekalpha/core/types.py +++ /dev/null @@ -1,87 +0,0 @@ -"""跨模块共享的数据结构定义。""" - -from __future__ import annotations - -from dataclasses import dataclass -from enum import Enum -from typing import Literal - -import pandas as pd - -# Tushare daily_basic 每日指标(除 close/total_mv/circ_mv 外,直接入库的原生字段) -# 单位:turnover_rate/turnover_rate_f/dv_ratio/dv_ttm 为 %,volume_ratio 为倍数, -# pe/pe_ttm/pb/ps/ps_ttm 为比值,total_share/float_share/free_share 为万股。 -DAILY_BASIC_COLUMNS = [ - "turnover_rate", - "turnover_rate_f", - "volume_ratio", - "pe", - "pe_ttm", - "pb", - "ps", - "ps_ttm", - "dv_ratio", - "dv_ttm", - "total_share", - "float_share", - "free_share", -] - -# Panel 输出列(与 AlphaAgent-Stock 保持一致) -OUTPUT_COLUMNS = [ - "open", - "high", - "low", - "close", - "adj_open", - "adj_high", - "adj_low", - "adj_close", - "adjfactor", - "volume", - "amount", - "float_cap", - "tot_cap", - *DAILY_BASIC_COLUMNS, - "is_trade", - "not_st", - "ret", - "vwap", - "adj_vwap", - "label_1d_close_to_close", - "label_1d_open_to_open", - "label_10d_close_to_close", - "label_20d_close_to_close", -] - -Panel = pd.DataFrame -AlphaTable = pd.DataFrame -TargetBook = pd.DataFrame - - -class OrderSide(str, Enum): - buy = "buy" - sell = "sell" - - -@dataclass -class OrderIntent: - """执行层订单意图(与 broker 无关)。""" - - instrument: str - side: OrderSide - volume: int - price_type: Literal["limit", "market"] = "limit" - limit_price: float | None = None - - -@dataclass -class StrategyBundle: - """策略版本化产物 manifest。""" - - name: str - version: str - factor_exprs: dict[str, str] - model_path: str | None - portfolio_config: dict - panel_hash: str diff --git a/seekalpha/data/__init__.py b/seekalpha/data/__init__.py deleted file mode 100644 index 2fce9b6a..00000000 --- a/seekalpha/data/__init__.py +++ /dev/null @@ -1,40 +0,0 @@ -"""数据层:Tushare 拉数(market_fetch / fundamental_fetch)+ Panel 离线构建(panel)。""" - -from seekalpha.data.fundamental import enrich_panel_fundamentals, list_funda_columns -from seekalpha.data.index_members import ( - load_index_members, - resolve_index_members_cached, -) -from seekalpha.data.market_fetch import ( - fetch_and_save_market, - load_market_hq, - save_market_hq, - update_market_cache, -) -from seekalpha.data.panel import ( - build_panel, - build_panel_from_hq, - load_panel, - save_panel, - slice_panel, - update_panel_from_hq, -) -from seekalpha.data.tushare_client import get_pro - -__all__ = [ - "build_panel", - "build_panel_from_hq", - "enrich_panel_fundamentals", - "fetch_and_save_market", - "get_pro", - "list_funda_columns", - "load_index_members", - "load_market_hq", - "load_panel", - "resolve_index_members_cached", - "save_market_hq", - "save_panel", - "slice_panel", - "update_market_cache", - "update_panel_from_hq", -] diff --git a/seekalpha/data/fundamental.py b/seekalpha/data/fundamental.py deleted file mode 100644 index 7ca2d729..00000000 --- a/seekalpha/data/fundamental.py +++ /dev/null @@ -1,398 +0,0 @@ -"""季频基本面 PIT 展开与披露日历特征(对齐 AlphaAgent-Stock 语义)。""" - -from __future__ import annotations - -from collections.abc import Sequence -from pathlib import Path - -import numpy as np -import pandas as pd - -from seekalpha.core.paths import DISCLOSURE_CALENDAR_PATH, FUNDAMENTAL_QUARTERLY_PATH - -# 财报科目中文列名 → DSL 列名(与 AlphaAgent 一致,供未来三大表接入) -FUNDAMENTAL_STATEMENT_COLUMN_MAP: dict[str, str] = { - "总资产": "funda_fs_total_assets", - "总负债": "funda_fs_total_liabilities", - "流动资产": "funda_fs_current_assets", - "流动负债": "funda_fs_current_liabilities", - "非流动负债": "funda_fs_noncurrent_liabilities", - "股东权益合计": "funda_fs_total_equity", - "股东权益合计含少数": "funda_fs_total_equity_incl_mi", - "股东权益及负债总计": "funda_fs_total_liab_equity", - "未分配利润": "funda_fs_retained_earnings", - "息税前利润": "funda_fs_ebit", - "留存收益": "funda_fs_retained_income", - "应交税费": "funda_fs_taxes_payable", - "其他应付款": "funda_fs_other_payables", - "应付职工薪酬": "funda_fs_payroll_payable", - "在建工程": "funda_fs_construction_in_progress", - "盈余公积金": "funda_fs_surplus_reserve", - "其他综合收益": "funda_fs_other_comprehensive_income", - "营业收入_季度": "funda_fs_oper_revenue_q", - "总营业成本_季度": "funda_fs_oper_cost_q", - "所得税_季度": "funda_fs_income_tax_q", - "营业税金及附加_季度": "funda_fs_tax_surcharge_q", - "归母净利润_季度": "funda_fs_net_profit_parent_q", - "持续经营净利润_季度": "funda_fs_net_profit_continuing_q", - "母公司综合收益_季度": "funda_fs_comprehensive_income_parent_q", - "利息费用_季度": "funda_fs_interest_expense_q", - "销售费用_季度": "funda_fs_selling_expense_q", - "稀释每股收益_季度": "funda_fs_eps_diluted_q", - "基本每股收益_季度": "funda_fs_eps_basic_q", - "少数股东损益_季度": "funda_fs_minority_interest_q", - "经营现金净流_季度": "funda_fs_ocf_net_q", - "经营现金流入_季度": "funda_fs_ocf_inflow_q", - "投资现金净流_季度": "funda_fs_icf_net_q", - "综合收益_季度": "funda_fs_comprehensive_income_q", - "其他收入_季度": "funda_fs_other_income_q", - "购建长期资产支付现金_季度": "funda_fs_capex_cash_q", - "期末现金等价物_季度": "funda_fs_cash_equiv_end_q", - "支付职工现金_季度": "funda_fs_cash_paid_employees_q", - "支付税费_季度": "funda_fs_cash_paid_taxes_q", - "固定资产折旧_季度累计": "funda_fs_depreciation_q_ytd", - "间接法经营活动现金流量净额_季度累计": "funda_fs_ocf_indirect_q_ytd", - "营运资本": "funda_fs_working_capital", -} - -DISCLOSURE_DISTANCE_COLUMNS = ( - "funda_days_since_disclose", - "funda_days_since_quarter_start", -) - -_QUARTER_PERIOD_START_MD = { - (3, 31): (1, 1), - (6, 30): (4, 1), - (9, 30): (7, 1), - (12, 31): (10, 1), -} - - -def _load_quarterly_fundamentals(path: Path | str) -> pd.DataFrame: - """读取季末基本面宽表,索引 (report_end, instrument)。""" - raw_path = Path(path).expanduser() - if not raw_path.is_file(): - raise FileNotFoundError(f"基本面文件不存在: {raw_path}") - - raw = pd.read_parquet(raw_path) - if "instrument" not in raw.index.names and "code" in raw.index.names: - raw = raw.rename_axis(index={"code": "instrument", "datetime": "report_end"}) - elif raw.index.names[0] == "datetime": - raw = raw.rename_axis(index={"datetime": "report_end"}) - - rename = { - col: FUNDAMENTAL_STATEMENT_COLUMN_MAP[col] - for col in raw.columns - if col in FUNDAMENTAL_STATEMENT_COLUMN_MAP - } - if rename: - raw = raw.rename(columns=rename) - - allowed = set(FUNDAMENTAL_STATEMENT_COLUMN_MAP.values()) - unknown = [ - c - for c in raw.columns - if not str(c).startswith("funda_") and c not in allowed - ] - if unknown: - raise ValueError(f"{raw_path} 含未识别列: {sorted(unknown)}") - - report_end = pd.to_datetime(raw.index.get_level_values("report_end")) - instrument = raw.index.get_level_values("instrument") - raw.index = pd.MultiIndex.from_arrays( - [report_end, instrument], - names=["report_end", "instrument"], - ) - return raw.sort_index() - - -def load_disclosure_calendar(path: Path | str) -> pd.DataFrame: - """宽表 (report_end × instrument) → long: report_end, instrument, disclosure。""" - wide = pd.read_parquet(path) - wide.index = pd.to_datetime(wide.index) - long = wide.stack(future_stack=True).rename("disclosure").reset_index() - long.columns = ["report_end", "instrument", "disclosure"] - long["disclosure"] = pd.to_datetime(long["disclosure"], errors="coerce") - return long.dropna(subset=["disclosure"]) - - -def validate_quarter_report_ends(path: Path | str) -> None: - """确认披露映射表行索引均为标准 A 股季报季末。""" - ends = pd.to_datetime(pd.read_parquet(path).index) - bad = [d for d in ends if (d.month, d.day) not in _QUARTER_PERIOD_START_MD] - if bad: - sample = ", ".join(str(x.date()) for x in bad[:5]) - raise ValueError(f"非标准季报季末 index: {sample} ...") - - -def quarter_period_start(trade_day: pd.Timestamp) -> pd.Timestamp: - """交易日所属报告区间的首日(严格 PIT,纯日历边界)。""" - ts = pd.Timestamp(trade_day).normalize() - month = ts.month - if month <= 3: - return pd.Timestamp(ts.year, 1, 1) - if month <= 6: - return pd.Timestamp(ts.year, 4, 1) - if month <= 9: - return pd.Timestamp(ts.year, 7, 1) - return pd.Timestamp(ts.year, 10, 1) - - -def _map_disclosure_to_effective_trade_dates( - events: pd.DataFrame, - trade_dates_by_inst: dict[str, pd.DatetimeIndex], -) -> pd.DataFrame: - """披露日历日 → 信息可交易的首个交易日(D 之后首个 bar)。""" - chunks: list[pd.DataFrame] = [] - meta_cols = {"report_end", "instrument", "disclosure"} - value_cols = [c for c in events.columns if c not in meta_cols] - - for inst, grp in events.groupby("instrument", sort=False): - trade_dates = trade_dates_by_inst.get(inst) - if trade_dates is None or len(trade_dates) == 0: - continue - - td_arr = trade_dates.values.astype("datetime64[ns]") - disc = pd.to_datetime(grp["disclosure"]).values.astype("datetime64[ns]") - pos = np.searchsorted(td_arr, disc, side="right") - ok = pos < len(td_arr) - if not ok.any(): - continue - - part = grp.loc[ok, list(meta_cols | set(value_cols))].copy() - part["datetime"] = trade_dates.take(pos[ok]) - chunks.append(part) - - if not chunks: - return pd.DataFrame(columns=["datetime", "instrument", *value_cols]) - return pd.concat(chunks, ignore_index=True) - - -def expand_quarterly_fundamentals_pit( - panel: pd.DataFrame, - fundamentals_path: Path | str, - disclosure_map_path: Path | str, - *, - dtype: str = "float32", -) -> pd.DataFrame: - """季末基本面 + 披露映射 → 严格 PIT 日频宽表,left join 到 panel。""" - if panel.index.names != ["datetime", "instrument"]: - raise ValueError(f"panel 索引须为 (datetime, instrument),当前: {panel.index.names}") - - raw = _load_quarterly_fundamentals(fundamentals_path) - value_cols = list(raw.columns) - overlap = set(panel.columns) & set(value_cols) - if overlap: - raise ValueError(f"panel 已含基本面列: {sorted(overlap)}") - - validate_quarter_report_ends(disclosure_map_path) - - cal = load_disclosure_calendar(disclosure_map_path) - events = raw.reset_index().merge(cal, on=["report_end", "instrument"], how="inner") - - trade_dates_by_inst = { - inst: grp.index.get_level_values("datetime").unique().sort_values() - for inst, grp in panel.groupby(level="instrument", sort=False) - } - effective = _map_disclosure_to_effective_trade_dates(events, trade_dates_by_inst) - if effective.empty: - out = panel.copy() - for col in value_cols: - out[col] = np.nan - return out - - effective = effective.sort_values(["instrument", "datetime", "report_end"]) - effective = effective.drop_duplicates(["instrument", "datetime"], keep="last") - - chunks: list[pd.DataFrame] = [] - for inst, grp in effective.groupby("instrument", sort=False): - trade_dates = trade_dates_by_inst[inst] - daily = grp.set_index("datetime")[value_cols].reindex(trade_dates).ffill() - idx = pd.MultiIndex.from_product( - [trade_dates, [inst]], - names=["datetime", "instrument"], - ) - chunks.append(daily.set_index(idx)) - - expanded = pd.concat(chunks).sort_index() - out = panel.join(expanded, how="left") - for col in value_cols: - if pd.api.types.is_numeric_dtype(out[col]): - out[col] = out[col].astype(dtype) - return out.sort_index() - - -def _disclosure_effective_trade_positions( - trade_dates: pd.DatetimeIndex, - disclosure_dates: np.ndarray, -) -> np.ndarray: - """实际披露日历日 → 信息可交易的首个交易日位置(严格晚于披露日的下一交易日)。""" - td = trade_dates.values.astype("datetime64[ns]") - disc = np.sort(np.unique(disclosure_dates.astype("datetime64[ns]"))) - if disc.size == 0: - return np.array([], dtype=np.int64) - - pos = np.searchsorted(td, disc, side="right") - pos = pos[(pos > 0) & (pos < len(td))] - return np.unique(pos.astype(np.int64)) - - -def _disclosure_days_since( - trade_dates: pd.DatetimeIndex, - disclosure_dates: np.ndarray, -) -> np.ndarray: - """按单只股票交易日历,计算距上一期实际披露生效日的交易日天数。""" - n = len(trade_dates) - if n == 0: - return np.array([], dtype=float) - - disc_pos = _disclosure_effective_trade_positions(trade_dates, disclosure_dates) - if disc_pos.size == 0: - return np.full(n, np.nan) - - idx = np.arange(n, dtype=np.int64) - prev_i = np.searchsorted(disc_pos, idx, side="right") - 1 - days_since = np.full(n, np.nan, dtype=float) - has_prev = prev_i >= 0 - days_since[has_prev] = idx[has_prev] - disc_pos[prev_i[has_prev]] - return days_since - - -def _days_since_quarter_start(trade_dates: pd.DatetimeIndex) -> np.ndarray: - """各交易日距所属季报区间首日的交易日天数(区间内首日=0)。""" - n = len(trade_dates) - if n == 0: - return np.array([], dtype=float) - - td_ns = trade_dates.values.astype("datetime64[ns]") - starts = np.array( - [np.datetime64(quarter_period_start(pd.Timestamp(d)), "ns") for d in trade_dates], - dtype="datetime64[ns]", - ) - idx = np.arange(n, dtype=np.int64) - pos = np.searchsorted(td_ns, starts, side="left") - out = np.full(n, np.nan, dtype=float) - ok = pos <= idx - out[ok] = (idx[ok] - pos[ok]).astype(float) - return out - - -def append_disclosure_distance_features( - panel: pd.DataFrame, - disclosure_map_path: Path | str, - *, - dtype: str = "float32", -) -> pd.DataFrame: - """并入距上一期财报实际披露生效日的交易日天数。""" - if panel.index.names != ["datetime", "instrument"]: - raise ValueError(f"panel 索引须为 (datetime, instrument),当前: {panel.index.names}") - - overlap = set(panel.columns) & set(DISCLOSURE_DISTANCE_COLUMNS) - if overlap: - raise ValueError(f"panel 已含披露距离列: {sorted(overlap)}") - - path = Path(disclosure_map_path).expanduser() - if not path.is_file(): - raise FileNotFoundError(f"披露日映射文件不存在: {path}") - - cal = load_disclosure_calendar(path) - disc_by_inst = { - inst: np.sort(grp["disclosure"].unique()) - for inst, grp in cal.groupby("instrument", sort=False) - } - - since_chunks: list[pd.Series] = [] - for inst, grp in panel.groupby(level="instrument", sort=False): - trade_dates = grp.index.get_level_values("datetime").unique().sort_values() - disc = disc_by_inst.get(inst) - if disc is None or len(disc) == 0: - since = np.full(len(trade_dates), np.nan) - else: - since = _disclosure_days_since(trade_dates, disc) - idx = pd.MultiIndex.from_product( - [trade_dates, [inst]], - names=["datetime", "instrument"], - ) - since_chunks.append(pd.Series(since, index=idx, dtype=float)) - - since_s = pd.concat(since_chunks).sort_index() - out = panel.copy() - out["funda_days_since_disclose"] = since_s.astype(dtype) - return out.sort_index() - - -def append_quarter_period_features( - panel: pd.DataFrame, - disclosure_map_path: Path | str | None = None, - *, - dtype: str = "float32", -) -> pd.DataFrame: - """并入距当前季报区间首日的交易日天数(与披露映射表季报划分一致,严格 PIT)。""" - if panel.index.names != ["datetime", "instrument"]: - raise ValueError(f"panel 索引须为 (datetime, instrument),当前: {panel.index.names}") - - if "funda_days_since_quarter_start" in panel.columns: - raise ValueError("panel 已含 funda_days_since_quarter_start") - - if disclosure_map_path is not None: - validate_quarter_report_ends(disclosure_map_path) - - chunks: list[pd.Series] = [] - for inst, grp in panel.groupby(level="instrument", sort=False): - trade_dates = grp.index.get_level_values("datetime").unique().sort_values() - vals = _days_since_quarter_start(trade_dates) - idx = pd.MultiIndex.from_product( - [trade_dates, [inst]], - names=["datetime", "instrument"], - ) - chunks.append(pd.Series(vals, index=idx, dtype=float)) - - out = panel.copy() - out["funda_days_since_quarter_start"] = pd.concat(chunks).sort_index().astype(dtype) - return out.sort_index() - - -def enrich_panel_fundamentals( - panel: pd.DataFrame, - *, - quarterly_path: Path | str = FUNDAMENTAL_QUARTERLY_PATH, - disclosure_path: Path | str = DISCLOSURE_CALENDAR_PATH, - include_disclosure_features: bool = True, - dtype: str = "float32", -) -> pd.DataFrame: - """将季频基本面 PIT 展开并 left join 到 panel。 - - 幂等:先删除 panel 中已有的 ``funda_*`` 列再重新展开,因此重复 enrich、 - 或缓存新增列(如 ``--with-statements`` 后的 ``funda_fs_*``)时都会全量刷新。 - """ - stale = [c for c in panel.columns if str(c).startswith("funda_")] - if stale: - panel = panel.drop(columns=stale) - - panel = expand_quarterly_fundamentals_pit( - panel, - quarterly_path, - disclosure_path, - dtype=dtype, - ) - if include_disclosure_features: - panel = append_disclosure_distance_features( - panel, - disclosure_path, - dtype=dtype, - ) - panel = append_quarter_period_features( - panel, - disclosure_path, - dtype=dtype, - ) - return panel - - -def list_funda_columns(columns: Sequence[str]) -> list[str]: - """返回列名中的基本面相关列。""" - return sorted( - c - for c in columns - if str(c).startswith("funda_") - ) diff --git a/seekalpha/data/fundamental_fetch.py b/seekalpha/data/fundamental_fetch.py deleted file mode 100644 index e3b0fef0..00000000 --- a/seekalpha/data/fundamental_fetch.py +++ /dev/null @@ -1,534 +0,0 @@ -"""从 Tushare 拉取季频财务指标并写入 quarterly / disclosure 缓存。""" - -from __future__ import annotations - -import time -from pathlib import Path - -import pandas as pd - -from seekalpha.core.paths import DISCLOSURE_CALENDAR_PATH, FUNDAMENTAL_DIR, FUNDAMENTAL_QUARTERLY_PATH -from seekalpha.data.fundamental import validate_quarter_report_ends -from seekalpha.data.tushare_client import call_with_retry, get_pro - -# fina_indicator 字段 → panel 列名 -FINA_INDICATOR_COLUMN_MAP: dict[str, str] = { - "roe": "funda_roe", - "roa": "funda_roa", - "debt_to_assets": "funda_debt_to_assets", - "netprofit_yoy": "funda_netprofit_yoy", - "or_yoy": "funda_or_yoy", - "tr_yoy": "funda_tr_yoy", - "bps": "funda_bps", - "eps": "funda_eps", - "grossprofit_margin": "funda_grossprofit_margin", - "netprofit_margin": "funda_netprofit_margin", - "ocfps": "funda_ocfps", - "working_capital": "funda_fs_working_capital", - "ebit": "funda_fs_ebit", - "rd_exp": "funda_fs_rd_exp", - "profit_dedt": "funda_profit_dedt", - "current_ratio": "funda_current_ratio", - "quick_ratio": "funda_quick_ratio", -} - -FINA_INDICATOR_API_FIELDS = ( - "ts_code,ann_date,end_date," - + ",".join(FINA_INDICATOR_COLUMN_MAP.keys()) -) - -# ---------------------------------------------------------------------------- -# 三大表(income / balancesheet / cashflow)字段 → panel 列名 -# -# 口径说明(按 Tushare 原始值存储,不做单季差分): -# * 资产负债表:时点值(无后缀)。 -# * 利润表 / 现金流量表:Tushare 返回**年初至今累计值**,列名统一带 `_ytd` -# 后缀以示区分(Q1=当季,H1/Q3/年报为累计);期末/期初现金余额为时点值。 -# 仅取 report_type='1'(合并报表);同 (ts_code, end_date) 保留 ann_date 最新一条。 -# ---------------------------------------------------------------------------- - -INCOME_COLUMN_MAP: dict[str, str] = { - "total_revenue": "funda_fs_total_revenue_ytd", - "revenue": "funda_fs_oper_revenue_ytd", - "total_cogs": "funda_fs_total_cogs_ytd", - "oper_cost": "funda_fs_oper_cost_ytd", - "sell_exp": "funda_fs_selling_expense_ytd", - "admin_exp": "funda_fs_admin_expense_ytd", - "fin_exp": "funda_fs_finance_expense_ytd", - "int_exp": "funda_fs_interest_expense_ytd", - "biz_tax_surchg": "funda_fs_tax_surcharge_ytd", - "operate_profit": "funda_fs_operate_profit_ytd", - "total_profit": "funda_fs_total_profit_ytd", - "income_tax": "funda_fs_income_tax_ytd", - "n_income": "funda_fs_net_profit_ytd", - "n_income_attr_p": "funda_fs_net_profit_parent_ytd", - "minority_gain": "funda_fs_minority_interest_ytd", - "t_compr_income": "funda_fs_comprehensive_income_ytd", - "compr_inc_attr_p": "funda_fs_comprehensive_income_parent_ytd", - "basic_eps": "funda_fs_eps_basic_ytd", - "diluted_eps": "funda_fs_eps_diluted_ytd", -} - -BALANCESHEET_COLUMN_MAP: dict[str, str] = { - "total_assets": "funda_fs_total_assets", - "total_cur_assets": "funda_fs_current_assets", - "total_nca": "funda_fs_noncurrent_assets", - "total_liab": "funda_fs_total_liabilities", - "total_cur_liab": "funda_fs_current_liabilities", - "total_ncl": "funda_fs_noncurrent_liabilities", - "total_hldr_eqy_exc_min_int": "funda_fs_total_equity", - "total_hldr_eqy_inc_min_int": "funda_fs_total_equity_incl_mi", - "total_liab_hldr_eqy": "funda_fs_total_liab_equity", - "minority_int": "funda_fs_minority_interest_equity", - "money_cap": "funda_fs_money_cap", - "notes_receiv": "funda_fs_notes_receivable", - "accounts_receiv": "funda_fs_accounts_receivable", - "inventories": "funda_fs_inventories", - "fix_assets": "funda_fs_fixed_assets", - "cip": "funda_fs_construction_in_progress", - "intan_assets": "funda_fs_intangible_assets", - "goodwill": "funda_fs_goodwill", - "r_and_d": "funda_fs_rd_capitalized", - "st_borr": "funda_fs_short_term_borrow", - "lt_borr": "funda_fs_long_term_borrow", - "bond_payable": "funda_fs_bond_payable", - "notes_payable": "funda_fs_notes_payable", - "acct_payable": "funda_fs_accounts_payable", - "adv_receipts": "funda_fs_advance_receipts", - "taxes_payable": "funda_fs_taxes_payable", - "payroll_payable": "funda_fs_payroll_payable", - "oth_payable": "funda_fs_other_payables", - "undistr_porfit": "funda_fs_retained_earnings", - "surplus_rese": "funda_fs_surplus_reserve", - "cap_rese": "funda_fs_capital_reserve", - "total_share": "funda_fs_total_share", - "oth_comp_income": "funda_fs_other_comprehensive_income", -} - -CASHFLOW_COLUMN_MAP: dict[str, str] = { - "c_fr_sale_sg": "funda_fs_cash_from_sales_ytd", - "c_inf_fr_operate_a": "funda_fs_ocf_inflow_ytd", - "c_paid_goods_s": "funda_fs_cash_paid_goods_ytd", - "c_paid_to_for_empl": "funda_fs_cash_paid_employees_ytd", - "c_paid_for_taxes": "funda_fs_cash_paid_taxes_ytd", - "st_cash_out_act": "funda_fs_ocf_outflow_ytd", - "n_cashflow_act": "funda_fs_ocf_net_ytd", - "c_pay_acq_const_fiolta": "funda_fs_capex_ytd", - "c_paid_invest": "funda_fs_cash_paid_invest_ytd", - "n_cashflow_inv_act": "funda_fs_icf_net_ytd", - "c_recp_borrow": "funda_fs_cash_from_borrow_ytd", - "c_prepay_amt_borr": "funda_fs_cash_repay_debt_ytd", - "n_cash_flows_fnc_act": "funda_fs_fcf_net_ytd", - "free_cashflow": "funda_fs_free_cashflow_ytd", - "n_incr_cash_cash_equ": "funda_fs_cash_net_incr_ytd", - "depr_fa_coga_dpba": "funda_fs_depreciation_ytd", - "amort_intang_assets": "funda_fs_amortization_intangible_ytd", - "im_net_cashflow_oper_act": "funda_fs_ocf_indirect_ytd", - "c_cash_equ_beg_period": "funda_fs_cash_equiv_beg", - "c_cash_equ_end_period": "funda_fs_cash_equiv_end", -} - - -class StatementSpec: - """一张财报表的拉取规格。""" - - __slots__ = ("name", "api", "vip_api", "column_map") - - def __init__(self, name: str, api: str, vip_api: str, column_map: dict[str, str]) -> None: - self.name = name - self.api = api - self.vip_api = vip_api - self.column_map = column_map - - @property - def api_fields(self) -> str: - return "ts_code,ann_date,end_date,report_type," + ",".join(self.column_map.keys()) - - -STATEMENT_SPECS: tuple[StatementSpec, ...] = ( - StatementSpec("income", "income", "income_vip", INCOME_COLUMN_MAP), - StatementSpec("balancesheet", "balancesheet", "balancesheet_vip", BALANCESHEET_COLUMN_MAP), - StatementSpec("cashflow", "cashflow", "cashflow_vip", CASHFLOW_COLUMN_MAP), -) - -_STANDARD_QUARTER_ENDS = ("0331", "0630", "0930", "1231") -_QUARTER_END_MD = ((3, 31), (6, 30), (9, 30), (12, 31)) - - -def quarter_periods_between(start: str, end: str) -> list[str]: - """返回 [start, end] 内所有标准 A 股季报季末(YYYYMMDD)。""" - start_ts = pd.Timestamp(start).normalize() - end_ts = pd.Timestamp(end).normalize() - if start_ts > end_ts: - raise ValueError(f"start 不能晚于 end: {start} > {end}") - - periods: list[str] = [] - for year in range(start_ts.year, end_ts.year + 1): - for month, day in _QUARTER_END_MD: - qe = pd.Timestamp(year=year, month=month, day=day) - if start_ts <= qe <= end_ts: - periods.append(qe.strftime("%Y%m%d")) - return periods - - -def _normalize_period(period: str) -> str: - p = period.replace("-", "") - if len(p) != 8: - raise ValueError(f"period 须为 YYYYMMDD,收到: {period!r}") - if p[4:] not in _STANDARD_QUARTER_ENDS: - raise ValueError(f"非标准季报季末: {period!r}") - return p - - -def _dedupe_fina_raw(df: pd.DataFrame) -> pd.DataFrame: - """同一 (ts_code, end_date) 保留 ann_date 最新的一条。""" - if df.empty: - return df - out = df.copy() - out["ann_date"] = pd.to_datetime(out["ann_date"], errors="coerce") - out["end_date"] = pd.to_datetime(out["end_date"], errors="coerce") - out = out.dropna(subset=["ts_code", "end_date", "ann_date"]) - out = out.sort_values(["ts_code", "end_date", "ann_date"]) - return out.groupby(["ts_code", "end_date"], as_index=False).tail(1) - - -def fetch_fina_indicator_period( - period: str, - *, - ts_codes: list[str] | None = None, - sleep_sec: float = 0.35, - verbose: bool = True, - use_vip: bool = True, -) -> pd.DataFrame: - """拉取单个报告期的 fina_indicator 原始表。 - - use_vip=True 时用 ``fina_indicator_vip`` 拉**全市场**(每期 1 次请求,完整落盘)。 - use_vip=False 时按 ts_codes 逐股拉取(须指定股票列表)。 - """ - period = _normalize_period(period) - pro = get_pro() - - if use_vip: - if verbose: - print(f" fina_indicator_vip period={period}(全市场)") - raw = call_with_retry( - pro.fina_indicator_vip, - period=period, - fields=FINA_INDICATOR_API_FIELDS, - label=f"fina_indicator_vip_{period}", - ) - time.sleep(sleep_sec) - return _dedupe_fina_raw(raw) - - if not ts_codes: - raise ValueError("无 VIP 权限时须指定 ts_codes(--no-vip 且 --universe)") - - chunks: list[pd.DataFrame] = [] - n = len(ts_codes) - for i, code in enumerate(ts_codes): - if verbose and (i == 0 or (i + 1) % 50 == 0 or i + 1 == n): - print(f" fina_indicator [{i + 1}/{n}] {code} period={period}") - part = call_with_retry( - pro.fina_indicator, - ts_code=code, - period=period, - fields=FINA_INDICATOR_API_FIELDS, - label=f"fina_indicator_{code}_{period}", - ) - if part is not None and not part.empty: - chunks.append(part) - time.sleep(sleep_sec) - - if not chunks: - return pd.DataFrame() - non_empty = [c for c in chunks if not c.empty] - if not non_empty: - return pd.DataFrame() - return _dedupe_fina_raw(pd.concat(non_empty, ignore_index=True)) - - -def raw_fina_to_quarterly(raw: pd.DataFrame) -> pd.DataFrame: - """fina_indicator 原始表 → (report_end, instrument) 索引的 panel 列。""" - if raw.empty: - return pd.DataFrame() - - df = raw.copy() - df["instrument"] = df["ts_code"] - df["report_end"] = pd.to_datetime(df["end_date"]) - rename = {k: v for k, v in FINA_INDICATOR_COLUMN_MAP.items() if k in df.columns} - df = df.rename(columns=rename) - value_cols = list(rename.values()) - out = df.set_index(["report_end", "instrument"])[value_cols] - return out.sort_index() - - -def fetch_statement_period( - spec: StatementSpec, - period: str, - *, - ts_codes: list[str] | None = None, - sleep_sec: float = 0.35, - verbose: bool = True, - use_vip: bool = True, -) -> pd.DataFrame: - """拉取单个报告期的一张三大表原始数据(income/balancesheet/cashflow)。 - - use_vip=True 时用 ``*_vip`` 接口拉**全市场**(每期 1 次请求)。 - use_vip=False 时按 ts_codes 逐股拉取(须指定股票列表)。 - """ - period = _normalize_period(period) - pro = get_pro() - - if use_vip: - if verbose: - print(f" {spec.vip_api} period={period}(全市场)") - raw = call_with_retry( - getattr(pro, spec.vip_api), - period=period, - fields=spec.api_fields, - label=f"{spec.vip_api}_{period}", - ) - time.sleep(sleep_sec) - return _dedupe_statement_raw(raw) - - if not ts_codes: - raise ValueError("无 VIP 权限时须指定 ts_codes(--no-vip 且 --universe)") - - chunks: list[pd.DataFrame] = [] - n = len(ts_codes) - for i, code in enumerate(ts_codes): - if verbose and (i == 0 or (i + 1) % 50 == 0 or i + 1 == n): - print(f" {spec.api} [{i + 1}/{n}] {code} period={period}") - part = call_with_retry( - getattr(pro, spec.api), - ts_code=code, - period=period, - fields=spec.api_fields, - label=f"{spec.api}_{code}_{period}", - ) - if part is not None and not part.empty: - chunks.append(part) - time.sleep(sleep_sec) - - non_empty = [c for c in chunks if not c.empty] - if not non_empty: - return pd.DataFrame() - return _dedupe_statement_raw(pd.concat(non_empty, ignore_index=True)) - - -def _dedupe_statement_raw(df: pd.DataFrame) -> pd.DataFrame: - """三大表原始表:仅留合并报表(report_type='1'),同 (ts_code, end_date) 取 ann_date 最新。""" - if df is None or df.empty: - return pd.DataFrame() - out = df.copy() - if "report_type" in out.columns: - out = out[out["report_type"].astype(str) == "1"] - out["ann_date"] = pd.to_datetime(out["ann_date"], errors="coerce") - out["end_date"] = pd.to_datetime(out["end_date"], errors="coerce") - out = out.dropna(subset=["ts_code", "end_date", "ann_date"]) - if out.empty: - return pd.DataFrame() - out = out.sort_values(["ts_code", "end_date", "ann_date"]) - return out.groupby(["ts_code", "end_date"], as_index=False).tail(1) - - -def raw_statement_to_quarterly(raw: pd.DataFrame, column_map: dict[str, str]) -> pd.DataFrame: - """三大表原始表 → (report_end, instrument) 索引的 panel 列(保留原始值)。 - - 内部先做 report_type='1' 过滤 + 同 (ts_code, end_date) 取 ann_date 最新, - 因此对未去重的原始表也安全(幂等)。 - """ - if raw is None or raw.empty: - return pd.DataFrame() - - df = _dedupe_statement_raw(raw) - if df.empty: - return pd.DataFrame() - df["instrument"] = df["ts_code"] - df["report_end"] = pd.to_datetime(df["end_date"]) - rename = {k: v for k, v in column_map.items() if k in df.columns} - if not rename: - return pd.DataFrame() - df = df.rename(columns=rename) - value_cols = list(rename.values()) - out = df.set_index(["report_end", "instrument"])[value_cols] - out = out.apply(pd.to_numeric, errors="coerce") - return out[~out.index.duplicated(keep="last")].sort_index() - - -def _join_quarterly_columns(base: pd.DataFrame, extra: pd.DataFrame) -> pd.DataFrame: - """按 (report_end, instrument) 索引列向合并(outer join)。""" - if base.empty: - return extra - if extra.empty: - return base - dup = [c for c in extra.columns if c in base.columns] - extra_clean = extra.drop(columns=dup) if dup else extra - return base.join(extra_clean, how="outer") - - -def raw_fina_to_disclosure_events(raw: pd.DataFrame) -> pd.DataFrame: - """从 fina_indicator 提取披露日历 long 表。""" - if raw.empty: - return pd.DataFrame(columns=["report_end", "instrument", "disclosure"]) - - df = raw.copy() - df["report_end"] = pd.to_datetime(df["end_date"]) - df["instrument"] = df["ts_code"] - df["disclosure"] = pd.to_datetime(df["ann_date"], errors="coerce") - return df[["report_end", "instrument", "disclosure"]].dropna() - - -def disclosure_events_to_wide(events: pd.DataFrame) -> pd.DataFrame: - """long 披露表 → 宽表 (report_end × instrument)。""" - if events.empty: - return pd.DataFrame() - wide = events.pivot_table( - index="report_end", - columns="instrument", - values="disclosure", - aggfunc="last", - ) - wide.index = pd.to_datetime(wide.index) - return wide.sort_index() - - -def merge_quarterly(existing: pd.DataFrame, new: pd.DataFrame) -> pd.DataFrame: - """合并季频缓存,同键以 new 为准。""" - if existing.empty: - return new.sort_index() - if new.empty: - return existing.sort_index() - combined = pd.concat([existing, new]) - return combined[~combined.index.duplicated(keep="last")].sort_index() - - -def merge_disclosure_wide(existing: pd.DataFrame, new: pd.DataFrame) -> pd.DataFrame: - """合并披露宽表:新数据覆盖同 (report_end, instrument) 单元格。""" - if existing.empty: - return new.sort_index() - if new.empty: - return existing.sort_index() - - all_index = existing.index.union(new.index) - all_cols = existing.columns.union(new.columns) - base = existing.reindex(index=all_index, columns=all_cols) - overlay = new.reindex(index=all_index, columns=all_cols) - return base.combine_first(overlay).sort_index() - - -def save_quarterly(df: pd.DataFrame, path: Path | str = FUNDAMENTAL_QUARTERLY_PATH) -> Path: - out = Path(path) - out.parent.mkdir(parents=True, exist_ok=True) - df.to_parquet(out) - return out - - -def save_disclosure_calendar(wide: pd.DataFrame, path: Path | str = DISCLOSURE_CALENDAR_PATH) -> Path: - out = Path(path) - out.parent.mkdir(parents=True, exist_ok=True) - wide.to_parquet(out) - if not wide.empty: - validate_quarter_report_ends(out) - return out - - -def load_quarterly_cache(path: Path | str = FUNDAMENTAL_QUARTERLY_PATH) -> pd.DataFrame: - p = Path(path) - if not p.is_file(): - return pd.DataFrame() - df = pd.read_parquet(p) - if df.index.names != ["report_end", "instrument"]: - if "datetime" in df.index.names: - df = df.rename_axis(index={"datetime": "report_end"}) - if "code" in df.index.names: - df = df.rename_axis(index={"code": "instrument"}) - return df.sort_index() - - -def load_disclosure_wide(path: Path | str = DISCLOSURE_CALENDAR_PATH) -> pd.DataFrame: - p = Path(path) - if not p.is_file(): - return pd.DataFrame() - wide = pd.read_parquet(p) - wide.index = pd.to_datetime(wide.index) - return wide.sort_index() - - -def fetch_and_save_periods( - periods: list[str], - *, - ts_codes: list[str] | None = None, - quarterly_path: Path | str = FUNDAMENTAL_QUARTERLY_PATH, - disclosure_path: Path | str = DISCLOSURE_CALENDAR_PATH, - sleep_sec: float = 0.35, - verbose: bool = True, - use_vip: bool = True, - with_statements: bool = False, -) -> tuple[pd.DataFrame, pd.DataFrame]: - """拉取多个报告期并增量写入缓存。 - - with_statements=True 时额外拉取三大表(income/balancesheet/cashflow), - 按 (report_end, instrument) 列向合并进同一份季频缓存;PIT 展开逻辑通用, - 这些 ``funda_fs_*`` 列会随 fina 指标一起展开为日频。 - """ - quarterly_acc = load_quarterly_cache(quarterly_path) - disclosure_acc = load_disclosure_wide(disclosure_path) - - for period in periods: - period = _normalize_period(period) - if verbose: - print(f"拉取 fina_indicator: {period}") - raw = fetch_fina_indicator_period( - period, - ts_codes=ts_codes, - sleep_sec=sleep_sec, - verbose=verbose, - use_vip=use_vip, - ) - - q = raw_fina_to_quarterly(raw) if not raw.empty else pd.DataFrame() - wide = disclosure_events_to_wide(raw_fina_to_disclosure_events(raw)) if not raw.empty else pd.DataFrame() - - if with_statements: - for spec in STATEMENT_SPECS: - if verbose: - print(f"拉取 {spec.name}: {period}") - raw_s = fetch_statement_period( - spec, - period, - ts_codes=ts_codes, - sleep_sec=sleep_sec, - verbose=verbose, - use_vip=use_vip, - ) - q = _join_quarterly_columns(q, raw_statement_to_quarterly(raw_s, spec.column_map)) - if wide.empty and not raw_s.empty: - wide = disclosure_events_to_wide(raw_fina_to_disclosure_events(raw_s)) - - if q.empty: - if verbose: - print(f" 警告: {period} 无数据") - continue - - quarterly_acc = merge_quarterly(quarterly_acc, q) - if not wide.empty: - disclosure_acc = merge_disclosure_wide(disclosure_acc, wide) - save_quarterly(quarterly_acc, quarterly_path) - save_disclosure_calendar(disclosure_acc, disclosure_path) - if verbose: - n_inst = q.index.get_level_values("instrument").nunique() - print( - f" 本期 +{len(q)} 条({n_inst} 只股票,{q.shape[1]} 列)" - f" → 已落盘 cumulative={quarterly_acc.shape}" - ) - - if verbose: - print(f"季频缓存: {quarterly_path} shape={quarterly_acc.shape}") - print(f"披露缓存: {disclosure_path} shape={disclosure_acc.shape}") - return quarterly_acc, disclosure_acc - - -def ensure_fundamental_dir() -> Path: - FUNDAMENTAL_DIR.mkdir(parents=True, exist_ok=True) - return FUNDAMENTAL_DIR diff --git a/seekalpha/data/index_members.py b/seekalpha/data/index_members.py deleted file mode 100644 index 2a9a6f04..00000000 --- a/seekalpha/data/index_members.py +++ /dev/null @@ -1,173 +0,0 @@ -"""指数成分(universe)缓存:把指数成分股按月快照落盘,供拉取阶段复用与复现。 - -- 缓存 schema:long 表 ``[trade_date, instrument]``(每月一份成分快照)。 -- 全量拉取时按月抓 ``index_weight`` 快照并落盘;相同区间再拉可直接读缓存,免于重复请求。 -- 仅用于**拉取阶段**决定拉哪些股票;离线建 panel 不依赖本缓存。 -""" - -from __future__ import annotations - -import time -from pathlib import Path - -import pandas as pd - -from seekalpha.core.paths import INDEX_DIR -from seekalpha.data.tushare_client import get_pro -from seekalpha.data.universe import fetch_index_members, resolve_index_code - -MEMBER_COLUMNS = ["trade_date", "instrument"] - - -def index_members_path(index: str, *, base_dir: Path | str = INDEX_DIR) -> Path: - """成分缓存路径,如 artifacts/index/000852_SH_members.parquet。""" - code = resolve_index_code(index).replace(".", "_") - return Path(base_dir) / f"{code}_members.parquet" - - -def load_index_members(index: str | None = None, *, path: Path | str | None = None) -> pd.DataFrame: - """读取成分快照缓存;不存在时返回空表。""" - p = Path(path) if path is not None else index_members_path(index or "") - if not p.is_file(): - return pd.DataFrame(columns=MEMBER_COLUMNS) - df = pd.read_parquet(p) - df["trade_date"] = pd.to_datetime(df["trade_date"]) - df["instrument"] = df["instrument"].astype(str) - return df.sort_values(MEMBER_COLUMNS).reset_index(drop=True) - - -def save_index_members( - df: pd.DataFrame, index: str | None = None, *, path: Path | str | None = None -) -> Path: - p = Path(path) if path is not None else index_members_path(index or "") - p.parent.mkdir(parents=True, exist_ok=True) - df.sort_values(MEMBER_COLUMNS).reset_index(drop=True).to_parquet(p) - return p - - -def merge_members(old: pd.DataFrame, new: pd.DataFrame) -> pd.DataFrame: - """合并成分快照,按 (trade_date, instrument) 去重。""" - if old is None or old.empty: - return new.sort_values(MEMBER_COLUMNS).reset_index(drop=True) - if new is None or new.empty: - return old.sort_values(MEMBER_COLUMNS).reset_index(drop=True) - both = pd.concat([old, new]).drop_duplicates(subset=MEMBER_COLUMNS) - return both.sort_values(MEMBER_COLUMNS).reset_index(drop=True) - - -def members_union(cache: pd.DataFrame, start: str, end: str) -> list[str]: - """缓存中 [start, end] 内所有快照的成分并集。""" - if cache is None or cache.empty: - return [] - td = cache["trade_date"] - mask = (td >= pd.Timestamp(start)) & (td <= pd.Timestamp(end)) - return sorted(cache.loc[mask, "instrument"].astype(str).unique()) - - -def _cache_covers(cache: pd.DataFrame, start: str, end: str) -> bool: - """缓存是否覆盖 [start, end](月粒度:已含 start 所在月至 end 所在月的快照)。 - - 月快照总落在月末,故按 period[M] 比较:缓存最早月 <= start 月且最晚月 >= end 月。 - """ - if cache is None or cache.empty: - return False - months = cache["trade_date"].dt.to_period("M") - start_m = pd.Timestamp(start).to_period("M") - end_m = pd.Timestamp(end).to_period("M") - return months.min() <= start_m and months.max() >= end_m - - -def fetch_monthly_snapshots( - pro, - index: str, - start: str, - end: str, - *, - sleep_sec: float = 0.35, - verbose: bool = True, -) -> pd.DataFrame: - """按月抓 index_weight 快照,返回 long 表 [trade_date, instrument]。""" - index_code = resolve_index_code(index) - start_ts = pd.Timestamp(start) - end_ts = pd.Timestamp(end) - month_starts = pd.date_range(start_ts.replace(day=1), end_ts.replace(day=1), freq="MS") - - rows: list[dict] = [] - for i, m in enumerate(month_starts): - snap = min(m + pd.offsets.MonthEnd(0), end_ts) - if snap < start_ts: - continue - d = snap.strftime("%Y%m%d") - df = pro.index_weight(index_code=index_code, start_date=d, end_date=d) - if df is not None and not df.empty: - snap_norm = pd.Timestamp(snap).normalize() - for code in df["con_code"].dropna().astype(str).unique(): - rows.append({"trade_date": snap_norm, "instrument": code}) - if verbose and (i + 1) % 12 == 0: - print(f" index_weight 月快照 {i + 1}/{len(month_starts)} 月, 累计 {len(rows)} 行") - if sleep_sec > 0: - time.sleep(sleep_sec) - - return pd.DataFrame(rows, columns=MEMBER_COLUMNS) - - -def resolve_index_members_cached( - index: str, - start: str, - end: str, - *, - pro=None, - path: Path | str | None = None, - refresh: bool = False, - sleep_sec: float = 0.35, - verbose: bool = True, -) -> list[str]: - """解析 [start, end] 指数成分并集,优先读缓存;缺失/未覆盖时拉取并落盘。 - - 返回成分股 ts_code 列表。缓存以月快照形式持久化于 artifacts/index/。 - """ - cache_path = Path(path) if path is not None else index_members_path(index) - cache = load_index_members(path=cache_path) - - if not refresh and _cache_covers(cache, start, end): - members = members_union(cache, start, end) - if members: - if verbose: - print(f" 成分来源: 缓存 {cache_path.name}, 共 {len(members)} 只") - return members - - if pro is None: - pro = get_pro() - - snaps = fetch_monthly_snapshots(pro, index, start, end, sleep_sec=sleep_sec, verbose=verbose) - if snaps.empty: - # 月快照为空:回退到 universe 的多路解析(index_member 等),并以单份快照持久化 - members = fetch_index_members(pro, index, start, end, sleep_sec=sleep_sec, verbose=verbose) - snaps = pd.DataFrame( - {"trade_date": pd.Timestamp(end).normalize(), "instrument": list(members)}, - columns=MEMBER_COLUMNS, - ) - - cache = merge_members(cache, snaps) - save_index_members(cache, path=cache_path) - members = members_union(cache, start, end) - if verbose: - print(f" 成分来源: index_weight(monthly) 已缓存 → {cache_path.name}, 共 {len(members)} 只") - return members - - -def append_snapshot( - index: str, - trade_date: str, - instruments: list[str] | set[str], - *, - path: Path | str | None = None, -) -> Path: - """把某一交易日的成分快照追加进缓存(增量更新时用)。""" - cache_path = Path(path) if path is not None else index_members_path(index) - snap = pd.DataFrame( - {"trade_date": pd.Timestamp(trade_date).normalize(), "instrument": sorted(set(instruments))}, - columns=MEMBER_COLUMNS, - ) - cache = merge_members(load_index_members(path=cache_path), snap) - return save_index_members(cache, path=cache_path) diff --git a/seekalpha/data/industry.py b/seekalpha/data/industry.py deleted file mode 100644 index d44d0cff..00000000 --- a/seekalpha/data/industry.py +++ /dev/null @@ -1,176 +0,0 @@ -"""申万一级(SW2021)行业分类:拉取成员(带 in/out 日期) → 严格 PIT 映射为 panel 行业码列。 - -产出列 ``industry_sw_l1``:整数编码(1..N,按行业 index_code 排序,可复现),落为 float32, -未归类样本为 NaN。DSL 里可直接用 ``$industry_sw_l1`` 做行业中性: -``CS_NEUTRALIZE(factor, $industry_sw_l1)``(行业码是离散组号,勿再套 CS_BUCKET)。 - -数据源 Tushare:``index_classify``(行业目录) + ``index_member``(个股成员,含 in/out 日期)。 -PIT:按 in_date/out_date 用 merge_asof(backward) 把每个交易日映射到当日有效行业,无前视。 -""" - -from __future__ import annotations - -import time -from pathlib import Path - -import numpy as np -import pandas as pd - -from seekalpha.core.paths import INDUSTRY_SW_PATH -from seekalpha.data.tushare_client import call_with_retry, get_pro - -INDUSTRY_COLUMN = "industry_sw_l1" -SW_SRC = "SW2021" -SW_LEVEL = "L1" - -_MEMBERSHIP_COLUMNS = ["instrument", "industry_code", "industry_name", "in_date", "out_date"] - - -def fetch_sw_l1_membership( - pro=None, - *, - sleep_sec: float = 0.35, - verbose: bool = True, -) -> pd.DataFrame: - """拉 SW2021 一级行业成员,返回 long 表。 - - 列: ``instrument, industry_code(int 1..N), industry_name, in_date, out_date``。 - 整数码按行业 ``index_code`` 排序分配,保证跨次运行稳定。 - """ - if pro is None: - pro = get_pro() - - classify = call_with_retry( - pro.index_classify, - level=SW_LEVEL, - src=SW_SRC, - label="index_classify_L1", - ) - if classify is None or classify.empty: - raise RuntimeError("index_classify 返回空:检查 Tushare 权限/积分(申万行业需相应权限)") - - name_col = "industry_name" if "industry_name" in classify.columns else "name" - classify = classify.dropna(subset=["index_code"]).sort_values("index_code").reset_index(drop=True) - code_map = {code: i + 1 for i, code in enumerate(classify["index_code"])} - name_map = dict(zip(classify["index_code"], classify[name_col])) - - rows: list[pd.DataFrame] = [] - n = len(classify) - for i, idx_code in enumerate(classify["index_code"]): - if verbose and (i == 0 or (i + 1) % 10 == 0 or i + 1 == n): - print(f" index_member [{i + 1}/{n}] {idx_code} {name_map[idx_code]}") - mem = call_with_retry( - pro.index_member, - index_code=idx_code, - is_new="", - label=f"index_member_{idx_code}", - ) - time.sleep(sleep_sec) - if mem is None or mem.empty: - continue - m = mem.copy() - m["instrument"] = m["con_code"] - m["industry_code"] = code_map[idx_code] - m["industry_name"] = name_map[idx_code] - m["in_date"] = pd.to_datetime(m["in_date"], errors="coerce") - m["out_date"] = pd.to_datetime(m["out_date"], errors="coerce") - rows.append(m[_MEMBERSHIP_COLUMNS]) - - if not rows: - raise RuntimeError("index_member 全部为空:无法构建行业映射") - - out = pd.concat(rows, ignore_index=True) - out = out.dropna(subset=["instrument", "in_date"]) - return out.sort_values(["instrument", "in_date"]).reset_index(drop=True) - - -def save_membership(df: pd.DataFrame, path: Path | str = INDUSTRY_SW_PATH) -> Path: - out = Path(path) - out.parent.mkdir(parents=True, exist_ok=True) - df.to_parquet(out) - return out - - -def load_membership(path: Path | str = INDUSTRY_SW_PATH) -> pd.DataFrame: - p = Path(path) - if not p.is_file(): - return pd.DataFrame(columns=_MEMBERSHIP_COLUMNS) - df = pd.read_parquet(p) - df["in_date"] = pd.to_datetime(df["in_date"], errors="coerce") - df["out_date"] = pd.to_datetime(df["out_date"], errors="coerce") - return df - - -def sw_l1_code_map(membership: pd.DataFrame) -> dict[int, str]: - """行业整数码 → 行业名称。""" - if membership.empty: - return {} - pairs = membership[["industry_code", "industry_name"]].dropna().drop_duplicates() - return {int(c): str(nm) for c, nm in zip(pairs["industry_code"], pairs["industry_name"])} - - -def build_industry_column( - panel: pd.DataFrame, - membership: pd.DataFrame, - *, - dtype: str = "float32", -) -> pd.Series: - """按 (in_date, out_date) 严格 PIT 映射,返回与 panel 同索引的行业码列。""" - if panel.index.names != ["datetime", "instrument"]: - raise ValueError(f"panel 索引须为 (datetime, instrument),当前: {panel.index.names}") - - empty = pd.Series(np.nan, index=panel.index, name=INDUSTRY_COLUMN, dtype=dtype) - if membership is None or membership.empty: - return empty - - left = panel.index.to_frame(index=False)[["datetime", "instrument"]] - left["_row"] = np.arange(len(left)) - left = left.sort_values("datetime", kind="mergesort") - - right = membership[["instrument", "in_date", "out_date", "industry_code"]].dropna( - subset=["in_date"] - ) - right = right.sort_values("in_date", kind="mergesort") - if right.empty: - return empty - - merged = pd.merge_asof( - left, - right, - left_on="datetime", - right_on="in_date", - by="instrument", - direction="backward", - ) - # 已离开该行业(datetime 晚于 out_date)的样本置空,避免延用过期分类。 - invalid = merged["out_date"].notna() & (merged["datetime"] > merged["out_date"]) - merged.loc[invalid, "industry_code"] = np.nan - - merged = merged.sort_values("_row", kind="mergesort") - return pd.Series( - merged["industry_code"].to_numpy(), - index=panel.index, - name=INDUSTRY_COLUMN, - ).astype(dtype) - - -def enrich_panel_industry( - panel: pd.DataFrame, - *, - membership_path: Path | str = INDUSTRY_SW_PATH, - refresh: bool = False, - dtype: str = "float32", - sleep_sec: float = 0.35, - verbose: bool = True, -) -> pd.DataFrame: - """把申万一级行业码 left-join 到 panel(缺缓存或 refresh 时自动从 Tushare 拉取并落盘)。""" - membership = pd.DataFrame() if refresh else load_membership(membership_path) - if membership.empty: - membership = fetch_sw_l1_membership(sleep_sec=sleep_sec, verbose=verbose) - save_membership(membership, membership_path) - - out = panel.copy() - if INDUSTRY_COLUMN in out.columns: - out = out.drop(columns=[INDUSTRY_COLUMN]) - out[INDUSTRY_COLUMN] = build_industry_column(panel, membership, dtype=dtype) - return out.sort_index() diff --git a/seekalpha/data/market_fetch.py b/seekalpha/data/market_fetch.py deleted file mode 100644 index 7020e23f..00000000 --- a/seekalpha/data/market_fetch.py +++ /dev/null @@ -1,728 +0,0 @@ -"""从 Tushare 拉取日频行情并写入 market hq 缓存。 - -与 ``fundamental_fetch`` 对称:本模块**只负责联网拉取 + 落盘 hq 缓存**, -不做 panel 构建。panel 由 ``seekalpha.data.panel`` 从 hq 缓存离线构建。 - -hq 宽表 schema:索引 (datetime, instrument),列 -``open/high/low/close/adjfactor/volume/amount/float_cap/tot_cap`` + -daily_basic 每日指标(turnover_rate/pe_ttm/pb/ps_ttm/dv_ttm/total_share/... 见 -``DAILY_BASIC_COLUMNS``)+ ``is_trade/is_st/not_st``。 -""" - -from __future__ import annotations - -import time -from pathlib import Path - -import numpy as np -import pandas as pd - -from seekalpha.core.paths import MARKET_HQ_PATH -from seekalpha.core.types import DAILY_BASIC_COLUMNS -from seekalpha.data.index_members import append_snapshot, resolve_index_members_cached -from seekalpha.data.tushare_client import get_pro -from seekalpha.data.universe import ( - apply_is_st, - fetch_index_members_for_dates, - fetch_st_table, -) - -# daily_basic 请求字段(circ_mv/total_mv → float_cap/tot_cap;其余原样入库) -DAILY_BASIC_FIELDS = ",".join(["ts_code", "trade_date", "circ_mv", "total_mv", *DAILY_BASIC_COLUMNS]) - -# hq 缓存列顺序(含 is_st,供 build 侧 filter_universe 使用) -HQ_COLUMNS = [ - "open", - "high", - "low", - "close", - "adjfactor", - "volume", - "amount", - "float_cap", - "tot_cap", - *DAILY_BASIC_COLUMNS, - "is_trade", - "is_st", - "not_st", -] - - -def _format_yyyymmdd(d: str) -> str: - return d.replace("-", "") - - -# --------------------------------------------------------------------------- -# 交易日历 -# --------------------------------------------------------------------------- -def _fetch_trade_dates(pro, start: str, end: str) -> list[str]: - cal = pro.trade_cal( - exchange="SSE", - start_date=_format_yyyymmdd(start), - end_date=_format_yyyymmdd(end), - is_open="1", - ) - return sorted(cal["cal_date"].tolist()) - - -def _prev_trade_date(pro, date: str) -> str | None: - """给定 YYYY-MM-DD 或 YYYYMMDD,返回上一个交易日 YYYY-MM-DD。""" - td = _format_yyyymmdd(date) - cal = pro.trade_cal( - exchange="SSE", - start_date=(pd.Timestamp(td) - pd.Timedelta(days=30)).strftime("%Y%m%d"), - end_date=td, - is_open="1", - ) - if cal is None or cal.empty: - return None - open_days = sorted(cal["cal_date"].tolist()) - prior = [d for d in open_days if d < td] - if not prior: - return None - last = prior[-1] - return f"{last[:4]}-{last[4:6]}-{last[6:8]}" - - -def _latest_trade_date(pro, *, end: str | None = None) -> str: - """不晚于 end(默认今天)的最近一个 SSE 交易日,返回 YYYY-MM-DD。""" - end_ts = pd.Timestamp(end) if end is not None else pd.Timestamp.today() - cal = pro.trade_cal( - exchange="SSE", - start_date=(end_ts - pd.Timedelta(days=10)).strftime("%Y%m%d"), - end_date=end_ts.strftime("%Y%m%d"), - is_open="1", - ) - last = cal["cal_date"].max() - return f"{last[:4]}-{last[4:6]}-{last[6:8]}" - - -def _next_trade_date(pro, date: str) -> str | None: - """给定 YYYY-MM-DD 或 YYYYMMDD,返回下一个交易日 YYYY-MM-DD。""" - td = _format_yyyymmdd(date) - cal = pro.trade_cal( - exchange="SSE", - start_date=td, - end_date=(pd.Timestamp(td) + pd.Timedelta(days=30)).strftime("%Y%m%d"), - is_open="1", - ) - if cal is None or cal.empty: - return None - open_days = sorted(cal["cal_date"].tolist()) - later = [d for d in open_days if d > td] - if not later: - return None - nxt = later[0] - return f"{nxt[:4]}-{nxt[4:6]}-{nxt[6:8]}" - - -def _panel_missing_trade_dates(pro, frame: pd.DataFrame, latest: str) -> list[str]: - """frame 覆盖区间 [min, latest] 内缺失的 SSE 交易日(含中间空洞),YYYY-MM-DD。 - - frame 可以是 hq 缓存或 panel,只要索引含 datetime 层。 - """ - dt = frame.index.get_level_values("datetime") - frame_min = pd.Timestamp(dt.min()).normalize() - latest_ts = pd.Timestamp(latest).normalize() - if frame_min > latest_ts: - return [] - - all_trade = _fetch_trade_dates(pro, frame_min.strftime("%Y-%m-%d"), latest) - existing = {pd.Timestamp(t).normalize() for t in dt.unique()} - - missing: list[str] = [] - for td in all_trade: - ts = pd.Timestamp(f"{td[:4]}-{td[4:6]}-{td[6:8]}").normalize() - if ts not in existing: - missing.append(ts.strftime("%Y-%m-%d")) - return missing - - -def _group_contiguous_trade_dates(pro, iso_dates: list[str]) -> list[tuple[str, str]]: - """将缺失交易日列表合并为若干闭区间 [start, end](单次 trade_cal,避免逐日查询)。""" - if not iso_dates: - return [] - sorted_iso = sorted(iso_dates) - yyyymmdd_list = _fetch_trade_dates(pro, sorted_iso[0], sorted_iso[-1]) - trade_idx = {f"{d[:4]}-{d[4:6]}-{d[6:8]}": i for i, d in enumerate(yyyymmdd_list)} - - ranges: list[tuple[str, str]] = [] - start = end = sorted_iso[0] - for d in sorted_iso[1:]: - prev_i = trade_idx.get(end) - cur_i = trade_idx.get(d) - if prev_i is not None and cur_i is not None and cur_i == prev_i + 1: - end = d - else: - ranges.append((start, end)) - start = end = d - ranges.append((start, end)) - return ranges - - -def _expand_update_dates(pro, dates: list[str]) -> tuple[list[str], str | None]: - """增量更新日期扩展:在 user dates 基础上加入最早日期的上一交易日。 - - 返回 (sorted fetch dates, backfill_since YYYY-MM-DD)。 - """ - normalized = sorted({_format_yyyymmdd(d) for d in dates}) - iso_dates = [f"{d[:4]}-{d[4:6]}-{d[6:8]}" for d in normalized] - prev = _prev_trade_date(pro, iso_dates[0]) - fetch_set = set(normalized) - if prev is not None: - fetch_set.add(_format_yyyymmdd(prev)) - fetch_sorted = sorted(fetch_set) - fetch_iso = [f"{d[:4]}-{d[4:6]}-{d[6:8]}" for d in fetch_sorted] - backfill_since = prev if prev is not None else iso_dates[0] - return fetch_iso, backfill_since - - -# --------------------------------------------------------------------------- -# 原始行情拉取 -# --------------------------------------------------------------------------- -def _merge_raw_daily( - daily: pd.DataFrame, - adj: pd.DataFrame, - basic: pd.DataFrame, - st_table: pd.DataFrame, - *, - fill_adj: bool = True, -) -> pd.DataFrame: - """合并 daily + adj_factor + daily_basic + stock_st 为 hq 宽表行。 - - fill_adj=True 时,缺失 adj_factor 直接填 1.0(历史行为,适用于按日拉取); - fill_adj=False 时保留 NaN,交由调用方按单股 ffill/bfill(避免伪造尺度断层)。 - """ - if daily is None or daily.empty: - return pd.DataFrame() - - df = daily.copy() - if adj is not None and not adj.empty: - df = df.merge( - adj[["ts_code", "trade_date", "adj_factor"]], - on=["ts_code", "trade_date"], - how="left", - ) - else: - df["adj_factor"] = 1.0 - - if basic is not None and not basic.empty: - df = df.merge(basic, on=["ts_code", "trade_date"], how="left") - else: - df["circ_mv"] = 0.0 - df["total_mv"] = 0.0 - - df = apply_is_st(df, st_table) - - df["adjfactor"] = df["adj_factor"].fillna(1.0) if fill_adj else df["adj_factor"] - df["volume"] = df["vol"] - df["float_cap"] = df["circ_mv"].fillna(0) * 10000 - df["tot_cap"] = df["total_mv"].fillna(0) * 10000 - df["is_trade"] = (df["volume"] > 0).astype("int8") - - # daily_basic 每日指标:缺失(basic 为空或未返回该字段)时置 NaN - for col in DAILY_BASIC_COLUMNS: - if col not in df.columns: - df[col] = np.nan - - df["datetime"] = pd.to_datetime(df["trade_date"]) - df["instrument"] = df["ts_code"] - - cols = ["datetime", "instrument", *HQ_COLUMNS] - return df[cols].set_index(["datetime", "instrument"]) - - -def _fetch_one_day(pro, trade_date: str) -> pd.DataFrame: - """拉取单个交易日全市场数据并合并为 hq 行。""" - daily = pro.daily(trade_date=trade_date) - if daily is None or daily.empty: - return pd.DataFrame() - - adj = pro.adj_factor(trade_date=trade_date) - basic = pro.daily_basic(trade_date=trade_date, fields=DAILY_BASIC_FIELDS) - st_table = fetch_st_table(pro, trade_date=trade_date) - - return _merge_raw_daily(daily, adj, basic, st_table) - - -def _year_chunks(start: str, end: str) -> list[tuple[str, str]]: - """按自然年切分日期区间,避免单次 daily 行数超限。""" - start_ts = pd.Timestamp(start) - end_ts = pd.Timestamp(end) - chunks: list[tuple[str, str]] = [] - for year in range(start_ts.year, end_ts.year + 1): - chunk_start = max(start_ts, pd.Timestamp(f"{year}-01-01")) - chunk_end = min(end_ts, pd.Timestamp(f"{year}-12-31")) - if chunk_start <= chunk_end: - chunks.append((chunk_start.strftime("%Y-%m-%d"), chunk_end.strftime("%Y-%m-%d"))) - return chunks - - -def _select_daily_basic(basic: pd.DataFrame, codes: set[str] | list[str]) -> pd.DataFrame: - """从按 trade_date 缓存的 daily_basic 中筛出指定股票。""" - cols = ["ts_code", "trade_date", "circ_mv", "total_mv"] - if basic is None or basic.empty: - return pd.DataFrame(columns=cols) - code_set = set(codes) - return basic[basic["ts_code"].isin(code_set)].copy() - - -def _fetch_daily_basic_for_dates( - pro, - trade_dates: list[str], - *, - codes: set[str] | list[str] | None = None, - sleep_sec: float = 0.35, - verbose: bool = False, -) -> pd.DataFrame: - """按 trade_date 拉 daily_basic。 - - Tushare 的 daily_basic 不支持「多 ts_code + start/end 区间」组合,会返回空表。 - """ - cols = ["ts_code", "trade_date", "circ_mv", "total_mv"] - if not trade_dates: - return pd.DataFrame(columns=cols) - - code_set = set(codes) if codes is not None else None - chunks: list[pd.DataFrame] = [] - n = len(trade_dates) - for i, td in enumerate(trade_dates): - if verbose and (i == 0 or i + 1 == n or (i + 1) % 50 == 0): - print(f" daily_basic [{i + 1}/{n}] {td}") - df = pro.daily_basic(trade_date=td, fields=DAILY_BASIC_FIELDS) - if df is not None and not df.empty: - if code_set is not None: - df = df[df["ts_code"].isin(code_set)] - if not df.empty: - chunks.append(df) - if sleep_sec > 0: - time.sleep(sleep_sec) - - if not chunks: - return pd.DataFrame(columns=cols) - return pd.concat(chunks, ignore_index=True) - - -def fetch_hq_for_pool( - start: str, - end: str, - members: list[str], - *, - batch_size: int = 40, - sleep_sec: float = 0.35, - verbose: bool = True, -) -> pd.DataFrame: - """按股票池拉取 [start, end] 行情(daily + adj + basic + stock_st)。 - - **逐只股票、全区间**拉取 daily / adj_factor / daily_basic,避免「多 ts_code × 长区间」 - 单次请求超过 Tushare 6000 行上限被静默截断(截断会丢失约 35% 行情行)。 - 参数 batch_size 已弃用(保留以兼容旧调用),当前按单股拉取。 - """ - del batch_size # 兼容旧签名,当前逐股拉取 - pro = get_pro() - if not members: - raise ValueError("股票池为空") - - d0 = _format_yyyymmdd(start) - d1 = _format_yyyymmdd(end) - st_table = fetch_st_table(pro, start_date=d0, end_date=d1) - - n = len(members) - chunks: list[pd.DataFrame] = [] - total_rows = 0 - for i, code in enumerate(members, start=1): - daily = pro.daily(ts_code=code, start_date=d0, end_date=d1) - if daily is None or daily.empty: - if verbose and (i % 100 == 0 or i == n): - print(f" [{i}/{n}] 逐股拉取,累计 {total_rows} 行") - if sleep_sec > 0: - time.sleep(sleep_sec) - continue - - adj = pro.adj_factor(ts_code=code, start_date=d0, end_date=d1) - basic_df = pro.daily_basic( - ts_code=code, - start_date=d0, - end_date=d1, - fields=DAILY_BASIC_FIELDS, - ) - hq = _merge_raw_daily(daily, adj, basic_df, st_table, fill_adj=False) - if not hq.empty: - hq = hq.sort_index() - # 单股内 adjfactor 前后向填充;整只缺失才退回 1.0(避免伪造尺度断层) - hq["adjfactor"] = hq["adjfactor"].ffill().bfill().fillna(1.0) - chunks.append(hq) - total_rows += len(hq) - - if verbose and (i % 100 == 0 or i == n): - print(f" [{i}/{n}] 逐股拉取,累计 {total_rows} 行") - if sleep_sec > 0: - time.sleep(sleep_sec) - - if not chunks: - raise ValueError("股票池拉取未获得任何行情数据") - - return pd.concat(chunks).sort_index() - - -def fetch_hq_for_index( - start: str, - end: str, - index: str = "zz1000", - *, - batch_size: int = 40, - sleep_sec: float = 0.35, - verbose: bool = True, - refresh_members: bool = False, -) -> pd.DataFrame: - """拉取指数成分并集在 [start, end] 的全部股票行情(成分股按月快照缓存到 artifacts/index/)。""" - pro = get_pro() - members = resolve_index_members_cached( - index, - start, - end, - pro=pro, - refresh=refresh_members, - sleep_sec=sleep_sec, - verbose=verbose, - ) - if verbose: - print(f"指数 {index}: {len(members)} 只股票({start} ~ {end} 成分并集)") - return fetch_hq_for_pool( - start, - end, - members, - batch_size=batch_size, - sleep_sec=sleep_sec, - verbose=verbose, - ) - - -def fetch_hq_from_tushare( - start: str, - end: str, - *, - sleep_sec: float = 0.35, - verbose: bool = True, -) -> pd.DataFrame: - """从 Tushare 按交易日拉取全市场行情(原始 hq 格式)。""" - pro = get_pro() - trade_dates = _fetch_trade_dates(pro, start, end) - if not trade_dates: - raise ValueError(f"区间 {start} ~ {end} 无交易日") - - chunks: list[pd.DataFrame] = [] - for i, td in enumerate(trade_dates): - if verbose: - print(f" [{i + 1}/{len(trade_dates)}] {td}") - day_df = _fetch_one_day(pro, td) - if not day_df.empty: - chunks.append(day_df) - if sleep_sec > 0: - time.sleep(sleep_sec) - - if not chunks: - raise ValueError("未拉取到任何行情数据") - - return pd.concat(chunks).sort_index() - - -# --------------------------------------------------------------------------- -# hq 缓存 IO -# --------------------------------------------------------------------------- -def save_market_hq(hq: pd.DataFrame, path: Path | str = MARKET_HQ_PATH) -> Path: - """写出 hq 缓存 parquet。""" - out = Path(path) - out.parent.mkdir(parents=True, exist_ok=True) - hq.sort_index().to_parquet(out) - return out - - -def load_market_hq(path: Path | str = MARKET_HQ_PATH) -> pd.DataFrame: - """加载 hq 缓存 parquet;不存在时返回空表。""" - p = Path(path) - if not p.is_file(): - return pd.DataFrame() - hq = pd.read_parquet(p) - if "instrument" not in hq.index.names and "code" in hq.index.names: - hq = hq.rename_axis(index={"code": "instrument"}) - dt = hq.index.get_level_values("datetime") - if not pd.api.types.is_datetime64_any_dtype(dt): - inst = hq.index.get_level_values("instrument") - hq.index = pd.MultiIndex.from_arrays( - [pd.to_datetime(dt), inst], names=["datetime", "instrument"] - ) - return hq.sort_index() - - -def merge_market_hq(old: pd.DataFrame, new: pd.DataFrame) -> pd.DataFrame: - """合并 hq 缓存,同 (datetime, instrument) 以 new 为准。""" - if old is None or old.empty: - return new.sort_index() - if new is None or new.empty: - return old.sort_index() - merged = pd.concat([old, new]) - merged = merged[~merged.index.duplicated(keep="last")] - return merged.sort_index() - - -# --------------------------------------------------------------------------- -# 编排:全量 / 增量 -# --------------------------------------------------------------------------- -def fetch_and_save_market( - start: str, - end: str, - *, - out_path: Path | str = MARKET_HQ_PATH, - universe: str | None = "zz1000", - batch_size: int = 40, - sleep_sec: float = 0.35, - verbose: bool = True, - refresh_members: bool = False, -) -> pd.DataFrame: - """全量/区间拉取行情并写入 hq 缓存(与已有缓存 merge,同键 keep last)。""" - if verbose: - mode = f"指数池 {universe}" if universe else "全市场按日" - print(f"fetch_market: {start} ~ {end} ({mode})") - - if universe: - hq = fetch_hq_for_index( - start, - end, - universe, - batch_size=batch_size, - sleep_sec=sleep_sec, - verbose=verbose, - refresh_members=refresh_members, - ) - else: - hq = fetch_hq_from_tushare(start, end, sleep_sec=sleep_sec, verbose=verbose) - - existing = load_market_hq(out_path) - merged = merge_market_hq(existing, hq) - save_market_hq(merged, out_path) - if verbose: - n_inst = merged.index.get_level_values("instrument").nunique() - print(f"已保存 hq 缓存: {out_path} shape={merged.shape} 股票数={n_inst}") - return merged - - -def update_market_cache( - *, - out_path: Path | str = MARKET_HQ_PATH, - universe: str | None = "zz1000", - dates: list[str] | None = None, - sleep_sec: float = 0.35, - batch_size: int = 40, - verbose: bool = True, -) -> tuple[pd.DataFrame, str | None]: - """增量拉取新交易日并追加写入 hq 缓存。 - - - dates=None:检测 [缓存最早日, 最新交易日] 内全部缺失日并批量回填。 - - 返回 (new_hq, backfill_since);无新数据时返回 (空表, None)。 - """ - pro = get_pro() - existing = load_market_hq(out_path) - - bulk_fill = False - gap_ranges: list[tuple[str, str]] | None = None - - if dates is None: - latest = _latest_trade_date(pro) - if not existing.empty: - dates = _panel_missing_trade_dates(pro, existing, latest) - if not dates: - if verbose: - hq_max = existing.index.get_level_values("datetime").max() - print(f"hq 缓存已完整: 末日 {hq_max.date()},最新交易日 {latest},无缺失") - return pd.DataFrame(), None - bulk_fill = True - gap_ranges = _group_contiguous_trade_dates(pro, dates) - if verbose: - print( - f"检测到缺口: 共 {len(dates)} 个交易日,{len(gap_ranges)} 段" - f" ({dates[0]} ~ {dates[-1]}),按股票池批量拉取" - ) - else: - dates = [latest] - - chunks: list[pd.DataFrame] = [] - backfill_since: str | None - - if bulk_fill: - assert gap_ranges is not None - backfill_since = _prev_trade_date(pro, dates[0]) or dates[0] - for start, end in gap_ranges: - if verbose: - print(f"update_market_cache: 批量拉取 {start} ~ {end}") - if universe: - hq = fetch_hq_for_index( - start, end, universe, batch_size=batch_size, sleep_sec=sleep_sec, verbose=verbose - ) - else: - hq = fetch_hq_from_tushare(start, end, sleep_sec=sleep_sec, verbose=verbose) - if not hq.empty: - chunks.append(hq) - else: - fetch_dates, backfill_since = _expand_update_dates(pro, dates) - - pool: set[str] | None = None - if universe: - pool = fetch_index_members_for_dates( - pro, universe, fetch_dates, sleep_sec=sleep_sec, verbose=verbose - ) - # 持久化本次成分快照,日期取增量区间末日 - append_snapshot(universe, fetch_dates[-1], pool) - - for d in fetch_dates: - td = _format_yyyymmdd(d) - if verbose: - tag = " (回填上一交易日)" if d == backfill_since and d not in dates else "" - print(f"update_market_cache: 拉取 {td}{tag}") - hq_day = _fetch_one_day(pro, td) - if hq_day.empty: - print(f" 警告: {td} 无数据,跳过") - continue - if pool is not None: - inst = hq_day.index.get_level_values("instrument") - hq_day = hq_day[inst.isin(pool)] - if not hq_day.empty: - chunks.append(hq_day) - if sleep_sec > 0: - time.sleep(sleep_sec) - - if not chunks: - raise ValueError("增量拉取未获得任何数据") - - new_hq = pd.concat(chunks).sort_index() - new_hq = new_hq[~new_hq.index.duplicated(keep="last")] - - merged = merge_market_hq(existing, new_hq) - save_market_hq(merged, out_path) - if verbose: - print(f"已增量更新 hq 缓存: {out_path} +{new_hq.shape[0]} 行 → {merged.shape}") - return new_hq, backfill_since - - -# --------------------------------------------------------------------------- -# adjfactor 修补(联网重拉单股 adj_factor) -# --------------------------------------------------------------------------- -def _fetch_adj_factor_for_instrument(pro, instrument: str, start: str, end: str) -> pd.Series: - """拉取单股 adj_factor,返回 datetime 索引 Series。""" - d0 = _format_yyyymmdd(start) - d1 = _format_yyyymmdd(end) - adj = pro.adj_factor(ts_code=instrument, start_date=d0, end_date=d1) - if adj is None or adj.empty: - return pd.Series(dtype=float) - adj = adj.copy() - adj["datetime"] = pd.to_datetime(adj["trade_date"]) - return ( - adj.drop_duplicates(subset=["datetime"], keep="last") - .set_index("datetime")["adj_factor"] - .sort_index() - ) - - -def repair_panel_adjfactor( - panel: pd.DataFrame, - *, - instruments: list[str] | None = None, - min_real_factor: float = 1.5, - candidate_mode: str = "jump", - sleep_sec: float = 0.35, - verbose: bool = True, - pro=None, -) -> tuple[pd.DataFrame, dict[str, int | float]]: - """对可疑股票按 Tushare 单股重拉 adj_factor,重算 adj/ret/label。 - - candidate_mode: - - ``jump``(默认): 仅尺度断层 - - ``adj_one``: 宽口径(易误报) - """ - from seekalpha.data.panel import ( - _rederive_adj_price_columns, - _rederive_since, - count_suspect_adjfactor_rows, - find_adjfactor_jump_instruments, - find_suspect_adjfactor_instruments, - ) - - if pro is None: - pro = get_pro() - - panel = panel.copy() - if instruments is not None: - targets = instruments - elif candidate_mode == "adj_one": - targets = find_suspect_adjfactor_instruments(panel, min_real_factor=min_real_factor) - else: - targets = find_adjfactor_jump_instruments(panel) - stats: dict[str, int | float] = { - "n_target_instruments": len(targets), - "n_rows_adj_one_before": count_suspect_adjfactor_rows(panel, targets), - "n_cells_updated": 0, - "n_instruments_patched": 0, - "n_rows_adj_one_after": 0, - } - if not targets: - if verbose: - print("无可疑 adjfactor 股票,跳过修补(panel 与 API 可能已一致)") - return panel, stats - - if verbose: - print( - f"修补 adjfactor: {len(targets)} 只股票," - f"adj≈1 行 {stats['n_rows_adj_one_before']} 条" - ) - - updated_cells = 0 - patched_inst = 0 - for i, inst in enumerate(targets, start=1): - if inst not in panel.index.get_level_values("instrument"): - continue - sub = panel.xs(inst, level="instrument") - start = sub.index.min().strftime("%Y-%m-%d") - end = sub.index.max().strftime("%Y-%m-%d") - api_adj = _fetch_adj_factor_for_instrument(pro, inst, start, end) - if api_adj.empty: - if verbose: - print(f" [{i}/{len(targets)}] {inst} API 无 adj_factor,跳过") - if sleep_sec > 0: - time.sleep(sleep_sec) - continue - - common = sub.index.intersection(api_adj.index) - if common.empty: - if verbose: - print(f" [{i}/{len(targets)}] {inst} 与 panel 无交集,跳过") - if sleep_sec > 0: - time.sleep(sleep_sec) - continue - - old = panel.loc[(common, inst), "adjfactor"].astype(float) - new = api_adj.loc[common].astype(float) - changed = ~np.isclose(old.values, new.values, rtol=0, atol=1e-6, equal_nan=True) - n_changed = int(changed.sum()) - if n_changed: - panel.loc[(common[changed], inst), "adjfactor"] = new.loc[common[changed]].values - updated_cells += n_changed - patched_inst += 1 - - if verbose and (i % 50 == 0 or i == len(targets)): - print(f" [{i}/{len(targets)}] 已处理,累计更新 {updated_cells} 单元格") - if sleep_sec > 0: - time.sleep(sleep_sec) - - panel = _rederive_adj_price_columns(panel) - since = panel.index.get_level_values("datetime").min() - panel = _rederive_since(panel, since) - - stats["n_cells_updated"] = updated_cells - stats["n_instruments_patched"] = patched_inst - stats["n_rows_adj_one_after"] = count_suspect_adjfactor_rows(panel, targets) - if verbose: - print( - f"完成: 更新 {patched_inst} 只股票 / {updated_cells} 个 adjfactor 单元格;" - f"adj≈1 行 {stats['n_rows_adj_one_before']} → {stats['n_rows_adj_one_after']}" - ) - return panel, stats diff --git a/seekalpha/data/panel.py b/seekalpha/data/panel.py deleted file mode 100644 index b5d3d8e0..00000000 --- a/seekalpha/data/panel.py +++ /dev/null @@ -1,482 +0,0 @@ -"""Panel 构建与持久化(离线)。 - -本模块**不联网**:panel 由本地 hq 缓存(`artifacts/market/daily_hq.parquet`) -离线构建。行情拉取见 `seekalpha.data.market_fetch`。 - -- 历史全量:build_panel(读 hq 缓存)→ parquet -- 实盘增量:update_panel_from_hq(新增交易日的 hq → 尾部 merge + 重算) -- 衍生列逻辑与 AlphaAgent-Stock 保持一致 -""" - -from __future__ import annotations - -from pathlib import Path - -import numpy as np -import pandas as pd - -from seekalpha.core.paths import PANEL_PATH -from seekalpha.core.types import OUTPUT_COLUMNS -from seekalpha.data.universe import filter_universe - -DEFAULT_PANEL_PATH = PANEL_PATH - -# label_{N}d_close_to_close:T+1 收盘 → T+(N+1) 收盘 -CLOSE_TO_CLOSE_LABEL_HOLD_DAYS = (1, 10, 20) - - -def close_to_close_label_name(hold_days: int) -> str: - return f"label_{hold_days}d_close_to_close" - - -_DERIVED_COLUMNS = ( - "ret", - "label_1d_open_to_open", - *(close_to_close_label_name(n) for n in CLOSE_TO_CLOSE_LABEL_HOLD_DAYS), -) - - -def _coerce_datetime_index(panel: pd.DataFrame) -> pd.DataFrame: - """确保 MultiIndex datetime 层为 DatetimeIndex。""" - if not isinstance(panel.index, pd.MultiIndex): - return panel - if panel.index.names[0] != "datetime": - return panel - - dt = panel.index.get_level_values("datetime") - if not pd.api.types.is_datetime64_any_dtype(dt): - dt = pd.to_datetime(dt) - inst = panel.index.get_level_values("instrument") - panel = panel.copy() - panel.index = pd.MultiIndex.from_arrays([dt, inst], names=["datetime", "instrument"]) - return panel.sort_index() - - -def slice_panel( - panel: pd.DataFrame, - *, - start: str | None = None, - end: str | None = None, -) -> pd.DataFrame: - """按 datetime 闭区间 [start, end] 切片。""" - if start is None and end is None: - return panel - - dt = panel.index.get_level_values("datetime") - mask = pd.Series(True, index=panel.index) - if start is not None: - mask &= dt >= pd.Timestamp(start) - if end is not None: - mask &= dt <= pd.Timestamp(end) - return panel.loc[mask] - - -def _calc_label_1d_open_to_open(adj_open: pd.Series) -> pd.Series: - open_t1 = adj_open.shift(-1) - open_t2 = adj_open.shift(-2) - denom = open_t1.replace(0, np.nan) - return (open_t2 - open_t1) / denom - - -def _calc_label_nd_close_to_close(adj_close: pd.Series, hold_days: int) -> pd.Series: - """T+1 收盘 → T+(hold_days+1) 收盘。例:hold_days=10 即 T+1 close 到 T+11 close。""" - entry = adj_close.shift(-1) - exit_ = adj_close.shift(-(hold_days + 1)) - denom = entry.replace(0, np.nan) - return (exit_ - entry) / denom - - -def _derive_base_columns(df: pd.DataFrame) -> pd.DataFrame: - """从原始行情宽表衍生 adj_*、vwap 等(不含 ret / label)。""" - df = df.copy() - df = df.rename_axis(index={"code": "instrument"}) - - for col in ("open", "high", "low", "close"): - df[f"adj_{col}"] = df[col] * df["adjfactor"] - - if "isTrade" in df.columns: - df = df.rename(columns={"isTrade": "is_trade", "notST": "not_st"}) - - vol = df["volume"].replace(0, np.nan) - df["vwap"] = df["amount"] / vol - df["adj_vwap"] = df["vwap"] * df["adjfactor"] - return df - - -def _add_derived_columns(df: pd.DataFrame) -> pd.DataFrame: - """在完整时间序列上计算 ret / label。""" - df = df.copy() - df["ret"] = df.groupby(level="instrument", sort=False)["adj_close"].pct_change(fill_method=None) - - g_close = df.groupby(level="instrument", sort=False)["adj_close"] - for hold_days in CLOSE_TO_CLOSE_LABEL_HOLD_DAYS: - col = close_to_close_label_name(hold_days) - df[col] = g_close.transform(lambda s, d=hold_days: _calc_label_nd_close_to_close(s, d)) - - df["label_1d_open_to_open"] = df.groupby(level="instrument", sort=False)[ - "adj_open" - ].transform(_calc_label_1d_open_to_open) - return df - - -def _finalize_panel(df: pd.DataFrame, *, dtype: str = "float32") -> pd.DataFrame: - # 兼容缺列的旧 hq 缓存 / 合成数据:缺失的输出列置 NaN - for col in OUTPUT_COLUMNS: - if col not in df.columns: - df[col] = np.nan - panel = df[OUTPUT_COLUMNS].copy() - numeric_cols = [c for c in OUTPUT_COLUMNS if c not in ("is_trade", "not_st")] - for col in numeric_cols: - panel[col] = panel[col].astype(dtype) - - panel = panel.sort_index() - panel = _coerce_datetime_index(panel) - - assert panel.index.names == ["datetime", "instrument"] - assert not panel.index.duplicated().any() - return panel - - -def _derive_panel_columns(df: pd.DataFrame, *, dtype: str = "float32") -> pd.DataFrame: - """从原始行情宽表衍生 adj_*、ret、vwap、label 列。""" - df = _derive_base_columns(df) - df = _add_derived_columns(df) - return _finalize_panel(df, dtype=dtype) - - -def _panel_base_from_hq( - hq: pd.DataFrame, - *, - universe_mask: bool = True, - dtype: str = "float32", -) -> pd.DataFrame: - """hq → panel 基础列(ret / label 置 NaN,供增量 merge 后统一重算)。""" - df = hq.copy() - if universe_mask: - df = filter_universe(df) - if df.empty: - return df - - df = _derive_base_columns(df) - for col in _DERIVED_COLUMNS: - df[col] = np.nan - - return _finalize_panel(df, dtype=dtype) - - -def _ensure_derived_columns(panel: pd.DataFrame, *, dtype: str = "float32") -> pd.DataFrame: - """补齐缺失的 ret / label 列(panel schema 升级时用)。""" - panel = panel.copy() - for col in _DERIVED_COLUMNS: - if col not in panel.columns: - panel[col] = np.nan - panel[col] = panel[col].astype(dtype) - return panel - - -def backfill_panel_derived_columns(panel: pd.DataFrame, *, dtype: str = "float32") -> pd.DataFrame: - """全量重算 ret / 全部 label 列。""" - if panel.empty: - return panel - panel = _ensure_derived_columns(panel, dtype=dtype) - since = panel.index.get_level_values("datetime").min() - return _rederive_since(panel, since, dtype=dtype) - - -def _rederive_since(panel: pd.DataFrame, since: pd.Timestamp, *, dtype: str = "float32") -> pd.DataFrame: - """基于 panel 内 adj 列,从 since 起重算 ret / label(用全历史 groupby,避免前视缺失)。""" - if panel.empty: - return panel - - panel = _ensure_derived_columns(panel, dtype=dtype) - since = pd.Timestamp(since) - dt = panel.index.get_level_values("datetime") - mask = dt >= since - if not mask.any(): - return panel - - full_ret = panel.groupby(level="instrument", sort=False)["adj_close"].pct_change(fill_method=None) - - g_close = panel.groupby(level="instrument", sort=False)["adj_close"] - full_labels_c2c = { - close_to_close_label_name(hold_days): g_close.transform( - lambda s, d=hold_days: _calc_label_nd_close_to_close(s, d) - ) - for hold_days in CLOSE_TO_CLOSE_LABEL_HOLD_DAYS - } - - full_label_o = panel.groupby(level="instrument", sort=False)["adj_open"].transform( - _calc_label_1d_open_to_open - ) - - panel.loc[mask, "ret"] = full_ret.loc[mask].astype(dtype) - for col, series in full_labels_c2c.items(): - panel.loc[mask, col] = series.loc[mask].astype(dtype) - panel.loc[mask, "label_1d_open_to_open"] = full_label_o.loc[mask].astype(dtype) - return panel - - -def build_panel_from_hq( - hq: pd.DataFrame, - *, - start: str | None = None, - end: str | None = None, - universe_mask: bool = True, - dtype: str = "float32", -) -> pd.DataFrame: - """从 (datetime, code) 行情宽表构建 panel。""" - df = hq.copy() - if start is not None or end is not None: - dt = pd.to_datetime(df.index.get_level_values(0)) - mask = pd.Series(True, index=df.index) - if start is not None: - mask &= dt >= pd.Timestamp(start) - if end is not None: - mask &= dt <= pd.Timestamp(end) - df = df.loc[mask] - - if universe_mask: - df = filter_universe(df) - - if df.empty: - return df - - df = _derive_base_columns(df) - df = _add_derived_columns(df) - return _finalize_panel(df, dtype=dtype) - - -def _enrich_panel( - panel: pd.DataFrame, - *, - with_fundamentals: bool, - quarterly_path, - disclosure_path, - include_disclosure_features: bool, - with_industry: bool, - industry_path, - refresh_industry: bool, - verbose: bool = True, -) -> pd.DataFrame: - """离线 enrich:从本地缓存并入 funda_* / industry_sw_l1 列。""" - if with_fundamentals: - from seekalpha.core.paths import DISCLOSURE_CALENDAR_PATH, FUNDAMENTAL_QUARTERLY_PATH - from seekalpha.data.fundamental import enrich_panel_fundamentals - - panel = enrich_panel_fundamentals( - panel, - quarterly_path=quarterly_path or FUNDAMENTAL_QUARTERLY_PATH, - disclosure_path=disclosure_path or DISCLOSURE_CALENDAR_PATH, - include_disclosure_features=include_disclosure_features, - ) - - if with_industry: - from seekalpha.data.industry import enrich_panel_industry - - ind_kwargs: dict = {"refresh": refresh_industry, "verbose": verbose} - if industry_path is not None: - ind_kwargs["membership_path"] = industry_path - panel = enrich_panel_industry(panel, **ind_kwargs) - - return panel - - -def build_panel( - *, - start: str | None = None, - end: str | None = None, - out_path: Path | str | None = None, - market_path: Path | str | None = None, - universe_mask: bool = True, - with_fundamentals: bool = False, - quarterly_path: Path | str | None = None, - disclosure_path: Path | str | None = None, - include_disclosure_features: bool = True, - with_industry: bool = False, - industry_path: Path | str | None = None, - refresh_industry: bool = False, - verbose: bool = True, -) -> pd.DataFrame: - """从本地 hq 缓存离线构建 panel(不联网)→ 可选写出 parquet。""" - from seekalpha.core.paths import MARKET_HQ_PATH - from seekalpha.data.market_fetch import load_market_hq - - if market_path is None: - market_path = MARKET_HQ_PATH - - hq = load_market_hq(market_path) - if hq.empty: - raise FileNotFoundError( - f"hq 缓存不存在或为空: {market_path};请先运行 " - "scripts/fetch_market.py 拉取行情" - ) - if verbose: - print(f"build_panel(offline): hq={hq.shape} from {market_path}") - - panel = build_panel_from_hq(hq, start=start, end=end, universe_mask=universe_mask) - panel = _enrich_panel( - panel, - with_fundamentals=with_fundamentals, - quarterly_path=quarterly_path, - disclosure_path=disclosure_path, - include_disclosure_features=include_disclosure_features, - with_industry=with_industry, - industry_path=industry_path, - refresh_industry=refresh_industry, - verbose=verbose, - ) - - if out_path is not None: - save_panel(panel, out_path) - if verbose: - n_inst = panel.index.get_level_values("instrument").nunique() - print(f"已保存: {out_path} shape={panel.shape} 股票数={n_inst}") - - return panel - - -def update_panel_from_hq( - new_hq: pd.DataFrame, - backfill_since: str | pd.Timestamp, - *, - out_path: Path | str = DEFAULT_PANEL_PATH, - universe_mask: bool = True, - with_fundamentals: bool = False, - quarterly_path: Path | str | None = None, - disclosure_path: Path | str | None = None, - include_disclosure_features: bool = True, - with_industry: bool = False, - industry_path: Path | str | None = None, - refresh_industry: bool = False, - verbose: bool = True, -) -> pd.DataFrame: - """离线增量:新增交易日的 hq → panel 尾部 merge + 从 backfill_since 重算 ret/label。 - - new_hq 为 `update_market_cache` 返回的增量 hq;backfill_since 为缺口首日的上一交易日。 - """ - out_path = Path(out_path) - base = _panel_base_from_hq(new_hq, universe_mask=universe_mask) - if base.empty: - raise ValueError("增量 hq 过滤后为空,无可更新数据") - - if out_path.is_file(): - old = load_panel(out_path) - merged = pd.concat([old, base]) - merged = merged[~merged.index.duplicated(keep="last")].sort_index() - else: - merged = base - - merged = _rederive_since(merged, pd.Timestamp(backfill_since)) - merged = _enrich_panel( - merged, - with_fundamentals=with_fundamentals, - quarterly_path=quarterly_path, - disclosure_path=disclosure_path, - include_disclosure_features=include_disclosure_features, - with_industry=with_industry, - industry_path=industry_path, - refresh_industry=refresh_industry, - verbose=verbose, - ) - - save_panel(merged, out_path) - if verbose: - print(f"已增量更新 panel: {out_path} shape={merged.shape}(自 {backfill_since} 起重算)") - return merged - - -def load_panel(path: Path | str = DEFAULT_PANEL_PATH) -> pd.DataFrame: - """加载 panel parquet。""" - p = Path(path) - if not p.is_file(): - raise FileNotFoundError(f"panel 不存在: {p}") - panel = pd.read_parquet(p) - if "instrument" not in panel.index.names and "code" in panel.index.names: - panel = panel.rename_axis(index={"code": "instrument"}) - return _coerce_datetime_index(panel) - - -def save_panel(panel: pd.DataFrame, path: Path | str) -> Path: - """写出 panel parquet。""" - out = Path(path) - out.parent.mkdir(parents=True, exist_ok=True) - panel.to_parquet(out) - return out - - -# --------------------------------------------------------------------------- -# adjfactor 诊断(纯函数,不联网;修补见 market_fetch.repair_panel_adjfactor) -# --------------------------------------------------------------------------- -def find_suspect_adjfactor_instruments( - panel: pd.DataFrame, - *, - min_real_factor: float = 1.5, -) -> list[str]: - """宽口径候选:曾有 adjfactor>min_real_factor,且仍存在 adjfactor≈1 的行。 - - 新股上市初期 adjfactor=1 也符合此条件,**误报多**;修补请用 find_adjfactor_jump_instruments。 - """ - if panel.empty: - return [] - - inst_max = panel.groupby(level="instrument")["adjfactor"].max() - candidates = inst_max[inst_max > min_real_factor].index - suspects: list[str] = [] - for inst in candidates: - s = panel.xs(inst, level="instrument")["adjfactor"] - if (s <= 1.0 + 1e-6).any(): - suspects.append(str(inst)) - return sorted(suspects) - - -def find_adjfactor_jump_instruments( - panel: pd.DataFrame, - *, - low: float = 1.01, - high: float = 1.5, - max_close_move: float = 0.25, -) -> list[str]: - """窄口径候选:相邻交易日 adjfactor 从≈1 跳到≥high(或反向),且 raw close 涨跌幅不大。 - - 对应 merge 失败导致的尺度断层(如 600601 的 1.0 → 5764);正常上市/除权不会命中。 - """ - if panel.empty: - return [] - - suspects: list[str] = [] - for inst in panel.index.get_level_values("instrument").unique(): - s = panel.xs(inst, level="instrument").sort_index() - adj = s["adjfactor"].to_numpy(dtype=float, copy=False) - close = s["close"].to_numpy(dtype=float, copy=False) - if len(adj) < 2: - continue - for i in range(len(adj) - 1): - if close[i] <= 0: - continue - if abs(close[i + 1] / close[i] - 1.0) > max_close_move: - continue - if adj[i] <= low and adj[i + 1] >= high: - suspects.append(str(inst)) - break - if adj[i] >= high and adj[i + 1] <= low: - suspects.append(str(inst)) - break - return sorted(set(suspects)) - - -def count_suspect_adjfactor_rows(panel: pd.DataFrame, instruments: list[str]) -> int: - """指定股票列表中 adjfactor≈1 的行数。""" - if not instruments: - return 0 - inst_idx = panel.index.get_level_values("instrument") - mask = inst_idx.isin(instruments) & (panel["adjfactor"] <= 1.0 + 1e-6) - return int(mask.sum()) - - -def _rederive_adj_price_columns(panel: pd.DataFrame, *, dtype: str = "float32") -> pd.DataFrame: - """按 adjfactor 重算 adj_* / adj_vwap。""" - panel = panel.copy() - for col in ("open", "high", "low", "close"): - panel[f"adj_{col}"] = (panel[col] * panel["adjfactor"]).astype(dtype) - panel["adj_vwap"] = (panel["vwap"] * panel["adjfactor"]).astype(dtype) - return panel diff --git a/seekalpha/data/tushare_client.py b/seekalpha/data/tushare_client.py deleted file mode 100644 index 696f7d77..00000000 --- a/seekalpha/data/tushare_client.py +++ /dev/null @@ -1,153 +0,0 @@ -#!/usr/bin/env python3 -""" -Tushare 客户端 -- 从项目根目录 .env 加载 TUSHARE_TOKEN -- 返回 pro_api 对象供 data 层调用 -- 网络超时 / 限流等可恢复错误自动重试(指数退避) -""" - -from __future__ import annotations - -import os -import random -import time -from pathlib import Path -from typing import Any, Callable, TypeVar - -import requests -import tushare as ts -from dotenv import load_dotenv - -ROOT = Path(__file__).resolve().parents[2] -ENV_FILE = ROOT / ".env" - -T = TypeVar("T") - -RETRYABLE_NETWORK_ERRORS = ( - requests.exceptions.ConnectionError, - requests.exceptions.Timeout, - requests.exceptions.ChunkedEncodingError, - TimeoutError, -) - -# Tushare 限流 / 临时性 API 报错关键词 -RETRYABLE_API_MARKERS = ( - "最多访问", - "请稍后再试", - "频繁", - "限流", - "timeout", - "timed out", - "连接", - "繁忙", -) - -_config: dict[str, float | int] = { - "max_retries": int(os.getenv("TUSHARE_MAX_RETRIES", "5")), - "timeout": int(os.getenv("TUSHARE_TIMEOUT", "60")), - "retry_base_delay": float(os.getenv("TUSHARE_RETRY_BASE_DELAY", "2.0")), - "retry_max_delay": float(os.getenv("TUSHARE_RETRY_MAX_DELAY", "120.0")), -} - - -def configure( - *, - max_retries: int | None = None, - timeout: int | None = None, - retry_base_delay: float | None = None, - retry_max_delay: float | None = None, -) -> None: - """覆盖 Tushare 重试 / 超时配置(供 CLI 或脚本调用)。""" - if max_retries is not None: - _config["max_retries"] = max_retries - if timeout is not None: - _config["timeout"] = timeout - if retry_base_delay is not None: - _config["retry_base_delay"] = retry_base_delay - if retry_max_delay is not None: - _config["retry_max_delay"] = retry_max_delay - - -def _read_token() -> str: - """从 .env 或环境变量读取 token。""" - load_dotenv(ENV_FILE) - token = os.getenv("TUSHARE_TOKEN", "") - return token.strip().strip('"').strip("'") - - -def _is_retryable(exc: Exception) -> bool: - if isinstance(exc, RETRYABLE_NETWORK_ERRORS): - return True - msg = str(exc) - return any(marker in msg for marker in RETRYABLE_API_MARKERS) - - -def _retry_delay(attempt: int) -> float: - base = float(_config["retry_base_delay"]) - cap = float(_config["retry_max_delay"]) - delay = min(cap, base * (2**attempt)) - # 轻微抖动,避免并发任务同一时刻重试 - return delay + random.uniform(0, min(1.0, delay * 0.1)) - - -def call_with_retry( - func: Callable[..., T], - *args: Any, - label: str | None = None, - **kwargs: Any, -) -> T: - """对单次 Tushare 调用做指数退避重试。""" - max_retries = int(_config["max_retries"]) - name = label or getattr(func, "__name__", "tushare") - last_exc: Exception | None = None - - for attempt in range(max_retries + 1): - try: - return func(*args, **kwargs) - except Exception as exc: - last_exc = exc - if attempt >= max_retries or not _is_retryable(exc): - raise - delay = _retry_delay(attempt) - print( - f" [retry {attempt + 1}/{max_retries}] {name}: {exc}," - f"{delay:.1f}s 后重试", - flush=True, - ) - time.sleep(delay) - - assert last_exc is not None - raise last_exc - - -class _RetryingPro: - """包装 Tushare pro_api,为各接口调用注入重试。""" - - def __init__(self, pro: Any) -> None: - self._pro = pro - - def __getattr__(self, name: str) -> Any: - attr = getattr(self._pro, name) - if not callable(attr): - return attr - - def wrapper(*args: Any, **kwargs: Any) -> Any: - return call_with_retry(attr, *args, label=name, **kwargs) - - return wrapper - - -def get_pro(): - """初始化并返回 Tushare pro_api(带自动重试)。""" - token = _read_token() - if not token: - raise ValueError( - f"未找到 TUSHARE_TOKEN。请在 {ENV_FILE} 中配置," - "格式: TUSHARE_TOKEN=your_token" - ) - timeout = int(_config["timeout"]) - pro = ts.pro_api(token, timeout=timeout) - max_retries = int(_config["max_retries"]) - if max_retries <= 0: - return pro - return _RetryingPro(pro) diff --git a/seekalpha/data/universe.py b/seekalpha/data/universe.py deleted file mode 100644 index 8568f035..00000000 --- a/seekalpha/data/universe.py +++ /dev/null @@ -1,602 +0,0 @@ -"""Universe:指数成分、ST/停牌过滤。""" - - - -from __future__ import annotations - - - -import time - -import warnings - - - -import numpy as np - -import pandas as pd - - - -# 常用指数 Tushare 代码 - -INDEX_CODES: dict[str, str] = { - - "zz1000": "000852.SH", # 中证1000 - - "zz500": "000905.SH", - - "hs300": "000300.SH", - -} - - - - - -def resolve_index_code(name: str) -> str: - - """解析指数别名或原始 ts_code。""" - - key = name.strip().lower() - - if key in INDEX_CODES: - - return INDEX_CODES[key] - - if name.endswith(".SH") or name.endswith(".SZ"): - - return name - - raise ValueError(f"未知指数: {name},可选: {list(INDEX_CODES)}") - - - - - -def _to_yyyymmdd(d: str) -> str: - - return d.replace("-", "")[:8] - - - - - -def _members_from_index_member( - - df: pd.DataFrame, - - start: str, - - end: str, - -) -> list[str]: - - """ - - 从 index_member 结果筛选与 [start, end] 有交集的成分股。 - - 字段: con_code, in_date, out_date(out_date 空表示仍在成分内) - - """ - - start_s = _to_yyyymmdd(start) - - end_s = _to_yyyymmdd(end) - - out: set[str] = set() - - - - for _, row in df.iterrows(): - - code = row.get("con_code") - - if not code or pd.isna(code): - - continue - - in_d = str(row.get("in_date", "")).replace("-", "")[:8] - - if not in_d or in_d == "nan": - - continue - - - - raw_out = row.get("out_date") - - if raw_out is None or (isinstance(raw_out, float) and pd.isna(raw_out)): - - out_d = "99991231" - - else: - - out_d = str(raw_out).replace("-", "")[:8] - - if not out_d or out_d == "nan": - - out_d = "99991231" - - - - # 区间有交集: in_date <= end 且 out_date >= start - - if in_d <= end_s and out_d >= start_s: - - out.add(str(code)) - - - - return sorted(out) - - - - - -def _fetch_members_index_member( - - pro, - - index_code: str, - - start: str, - - end: str, - -) -> list[str]: - - """pro.index_member:含 in_date/out_date 的全历史成分。""" - - df = pro.index_member(index_code=index_code, is_new="") - - if df is None or df.empty: - - return [] - - return _members_from_index_member(df, start, end) - - - - - -def _fetch_members_index_weight_range( - pro, - index_code: str, - start: str, - end: str, -) -> list[str]: - """pro.index_weight 区间查询,返回 con_code 并集。""" - d0 = _to_yyyymmdd(start) - d1 = _to_yyyymmdd(end) - df = pro.index_weight(index_code=index_code, start_date=d0, end_date=d1) - if df is None or df.empty: - return [] - return sorted(df["con_code"].dropna().astype(str).unique()) - - -def _fetch_members_index_weight_at( - pro, - index_code: str, - trade_date: str, - *, - lookback_days: int = 60, - sleep_sec: float = 0, -) -> list[str]: - """ - 取 trade_date 当日 index_weight;若无数据则向前 lookback_days 内取最近一日快照。 - """ - td = _to_yyyymmdd(trade_date) - df = pro.index_weight(index_code=index_code, start_date=td, end_date=td) - if df is not None and not df.empty: - return sorted(df["con_code"].dropna().astype(str).unique()) - - start = (pd.Timestamp(td) - pd.Timedelta(days=lookback_days)).strftime("%Y%m%d") - df = pro.index_weight(index_code=index_code, start_date=start, end_date=td) - if sleep_sec > 0: - time.sleep(sleep_sec) - if df is None or df.empty: - return [] - - latest = str(df["trade_date"].max()) - sub = df[df["trade_date"] == latest] - return sorted(sub["con_code"].dropna().astype(str).unique()) - - -def _fetch_members_index_weight_monthly( - - pro, - - index_code: str, - - start: str, - - end: str, - - *, - - sleep_sec: float = 0.35, - - verbose: bool = False, - -) -> list[str]: - - """ - - pro.index_weight 按月快照循环(宽区间查询有 ~7000 行上限,会丢历史)。 - - """ - - start_ts = pd.Timestamp(start) - - end_ts = pd.Timestamp(end) - - month_starts = pd.date_range( - - start_ts.replace(day=1), - - end_ts.replace(day=1), - - freq="MS", - - ) - - - - seen: set[str] = set() - - for i, m in enumerate(month_starts): - - snap = min(m + pd.offsets.MonthEnd(0), end_ts) - - if snap < start_ts: - - continue - - d = snap.strftime("%Y%m%d") - - df = pro.index_weight(index_code=index_code, start_date=d, end_date=d) - - if df is not None and not df.empty: - - seen |= set(df["con_code"].dropna().astype(str)) - - if verbose and (i + 1) % 12 == 0: - - print(f" index_weight 进度 {i + 1}/{len(month_starts)} 月, 累计 {len(seen)} 只") - - if sleep_sec > 0: - - time.sleep(sleep_sec) - - - - return sorted(seen) - - - - - -def fetch_index_members( - - pro, - - index: str, - - start: str, - - end: str, - - *, - - sleep_sec: float = 0.35, - - verbose: bool = True, - -) -> list[str]: - - """ - - 获取 [start, end] 期间曾出现在指数中的全部股票(并集)。 - - - - 优先 index_member(纳入/剔除日期,适合长历史); - - 若无数据则回退 index_weight 按月循环。 - - """ - - index_code = resolve_index_code(index) - - - - members = _fetch_members_index_member(pro, index_code, start, end) - - source = "index_member" - - if not members: - - if verbose: - - print(f" index_member 无数据,回退 index_weight...") - - span = (pd.Timestamp(end) - pd.Timestamp(start)).days - - if span <= 62: - - members = _fetch_members_index_weight_range(pro, index_code, start, end) - - source = "index_weight(range)" - - if not members: - - members = _fetch_members_index_weight_at( - - pro, index_code, end, lookback_days=90, sleep_sec=sleep_sec - - ) - - source = "index_weight(latest)" - - if not members: - - members = _fetch_members_index_weight_monthly( - - pro, - - index_code, - - start, - - end, - - sleep_sec=sleep_sec, - - verbose=verbose, - - ) - - source = "index_weight(monthly)" - - - - if not members: - - raise ValueError( - - f"无法获取指数成分: {index_code} {start} ~ {end}," - - "请检查 Tushare 积分(index_member / index_weight)" - - ) - - - - if verbose: - - print(f" 成分来源: {source}, 共 {len(members)} 只") - - return members - - - - - -def fetch_index_members_for_dates( - - pro, - - index: str, - - dates: list[str], - - *, - - lookback_days: int = 60, - - sleep_sec: float = 0.35, - - verbose: bool = True, - -) -> set[str]: - - """ - - 增量更新用:对多个交易日分别取 index_weight 快照并集。 - - 单日无数据时向前 lookback_days 内取最近可用成分。 - - """ - - index_code = resolve_index_code(index) - - pool: set[str] = set() - - for d in sorted({_to_yyyymmdd(x) for x in dates}): - - iso = f"{d[:4]}-{d[4:6]}-{d[6:8]}" - - members = _fetch_members_index_weight_at( - - pro, - - index_code, - - iso, - - lookback_days=lookback_days, - - sleep_sec=sleep_sec, - - ) - - if members: - - pool |= set(members) - - if verbose: - - print(f" {iso} 成分 {len(members)} 只") - - elif verbose: - - print(f" 警告: {iso} index_weight 无成分数据") - - if sleep_sec > 0: - - time.sleep(sleep_sec) - - - - if not pool: - - raise ValueError( - - f"无法获取指数成分: {index_code} dates={dates}," - - "请检查 Tushare 积分(index_weight)或指定 --dates 为已有数据的交易日" - - ) - - if verbose: - - print(f" 成分来源: index_weight(按日), 并集 {len(pool)} 只") - - return pool - - - - - -def fetch_st_table( - - pro, - - *, - - trade_date: str | None = None, - - start_date: str | None = None, - - end_date: str | None = None, - -) -> pd.DataFrame: - - """ - - 拉取 Tushare stock_st,返回 ts_code / trade_date / is_st(=1) 表。 - - 无 ST 记录时返回空表(列齐全)。 - - """ - - kwargs: dict[str, str] = {} - - if trade_date is not None: - - kwargs["trade_date"] = _to_yyyymmdd(trade_date) - - if start_date is not None: - - kwargs["start_date"] = _to_yyyymmdd(start_date) - - if end_date is not None: - - kwargs["end_date"] = _to_yyyymmdd(end_date) - - - - empty = pd.DataFrame(columns=["ts_code", "trade_date", "is_st"]) - - if not kwargs: - - return empty - - - - try: - - df = pro.stock_st(fields="ts_code,trade_date", **kwargs) - - except Exception as exc: - - warnings.warn(f"stock_st 拉取失败: {exc},当日 is_st 按 0 处理", stacklevel=2) - - return empty - - - - if df is None or df.empty: - - return empty - - - - out = df[["ts_code", "trade_date"]].drop_duplicates().copy() - - out["is_st"] = np.int8(1) - - return out - - - - - -def apply_is_st(df: pd.DataFrame, st_table: pd.DataFrame) -> pd.DataFrame: - - """按日 stock_st 结果写入 is_st / not_st。""" - - out = df.copy() - - if st_table is None or st_table.empty: - - out["is_st"] = np.int8(0) - - else: - - st = st_table[["ts_code", "trade_date", "is_st"]].drop_duplicates() - - out = out.merge(st, on=["ts_code", "trade_date"], how="left") - - out["is_st"] = out["is_st"].fillna(0).astype(np.int8) - - out["not_st"] = (1 - out["is_st"]).astype(np.int8) - - return out - - - - - -def mark_not_st(names: pd.Series) -> pd.Series: - - """根据股票名称标记 not_st(1=非ST,0=ST)。仅作兼容/测试,生产请用 apply_is_st。""" - - is_st = names.str.contains(r"ST", case=False, na=False) - - return (~is_st).astype("int8") - - - - - -def filter_universe(df: pd.DataFrame, *, universe_mask: bool = True) -> pd.DataFrame: - - """过滤可交易、非 ST 样本(优先 is_st 日度标记)。""" - - if not universe_mask: - - return df - - if "is_st" in df.columns: - - st_ok = df["is_st"] == 0 - - else: - - st_ok = df["not_st"] == 1 - - return df[(df["is_trade"] == 1) & st_ok] - - diff --git a/seekalpha/dsl/__init__.py b/seekalpha/dsl/__init__.py deleted file mode 100644 index 2fd62a98..00000000 --- a/seekalpha/dsl/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -"""因子 DSL:解析多行表达式并在面板数据上求值。""" - -from seekalpha.dsl.core.errors import MultiLineFactorEvalError -from seekalpha.dsl.eval import ( - compile_multi_line_factor, - eval_factor, - eval_multi_line_factor, -) - -__all__ = [ - "MultiLineFactorEvalError", - "compile_multi_line_factor", - "eval_factor", - "eval_multi_line_factor", -] diff --git a/seekalpha/dsl/catalog.py b/seekalpha/dsl/catalog.py deleted file mode 100644 index ebd847d6..00000000 --- a/seekalpha/dsl/catalog.py +++ /dev/null @@ -1,29 +0,0 @@ -"""DSL 算子清单:供 eval、挖掘 prompt 等模块共享。""" - -from __future__ import annotations - -import inspect - -from seekalpha.dsl.registry import build_operator_namespace - - -def list_operator_names() -> list[str]: - return sorted(build_operator_namespace()) - - -def operator_catalog_markdown() -> str: - ns = build_operator_namespace() - lines: list[str] = [] - for name in sorted(ns): - fn = ns[name] - try: - sig = str(inspect.signature(fn)) - except (TypeError, ValueError): - sig = "(...)" - doc = (inspect.getdoc(fn) or "").strip().splitlines() - summary = doc[0].strip() if doc else "" - line = f"- `{name}{sig}`" - if summary: - line += f" — {summary}" - lines.append(line) - return "\n".join(lines) diff --git a/seekalpha/dsl/core/.gitignore b/seekalpha/dsl/core/.gitignore deleted file mode 100644 index fd4ab837..00000000 --- a/seekalpha/dsl/core/.gitignore +++ /dev/null @@ -1,9 +0,0 @@ -# C++ extension build artifacts -*.so -*.pyd -build/ -dist/ -*.egg-info/ -MANIFEST -__pycache__/ -*.py[cod] diff --git a/seekalpha/dsl/core/README.md b/seekalpha/dsl/core/README.md deleted file mode 100644 index 3999d3c1..00000000 --- a/seekalpha/dsl/core/README.md +++ /dev/null @@ -1,203 +0,0 @@ -# AlphaAgent DSL Core - -股票因子表达式 DSL 核心:负责把文本表达式编译成可执行 Python 代码,并在 `(datetime, instrument)` 面板上求值。提供 Numba / 纯 Python 加速后端。 - -## 职责范围 - -- **表达式解析**:把多行 DSL(如 `x = DELTA($close,1); TS_MEAN(x, 10)`)编译成 Python 代码。 -- **时序算子**:`TS_*`、`DELTA`、`SLOPE` 等,按 **instrument** 分组计算。 -- **截面算子**:`RANK`、`CS_ZSCORE`、`CS_DEMEAN`、`CS_WINSORIZE`、`CS_BUCKET`、`CS_NEUTRALIZE`,按 **datetime** 截面计算。 -- **动态窗口**:按品种分组的时间序列滚动(dynamic window),支持变长窗口。 -- **混频支持**:主频日 panel + 辅周期(`@1d` / `@1w`),自动广播对齐。 - -## 设计:三层后端 - -| 后端 | 位置 | 触发条件 | 特点 | -|---|---|---|---| -| C++ | `fam_accel.cpp` + `setup.py` | `FUTURE_ALPHA_MINER_ACCEL_BACKEND=cxx` 或自动检测到 `aqra.dsl._shared.dsl_core._fam_accel` | OpenMP 并行,`double` 内部计算,`float32` 输入输出 | -| Numba | `accel.py` / `dyn_window.py` 中 `@njit` 函数 | C++ 未安装或强制 `numba` | JIT 编译,纯 `float32` | -| 纯 Python | 同上函数的回退分支 | 无 Numba 无 C++ | 解释执行,兼容性好 | - -默认自动选择:C++ > Numba > Python。当前 `aqra/dsl/accel.py` 会尝试导入本目录下的 `_fam_accel` 扩展并注入后端。 - -## 精度策略 - -- 算子内部默认使用 `float32`(中间数组、输出数组)。 -- C++ 扩展通过 `pybind11::forcecast` 接受 `float32` 输入,内部按 `double` 计算,输出再 cast 回 `float32`。 -- 行情 / label 加载后自动 downcast 数值列到 `float32`(见 `aqra/api/data_loader.py::_downcast_floats`)。 -- 因此 **Python/Numba 路径与 C++ 路径的 float32 输出在 f32 精度下一致**(详见一致性测试)。 - -## 文件与代码位置 - -### 算子层(`operators.py`) - -面向 DSL 的算子入口,例如 `TS_MEAN`、`DELTA`、`EMA`、`WMA`、`TS_CORR`、`CHIP` 等。 - -| 功能 | 入口 | 位置 | -|---|---|---| -| 一元滚动算子(TS_MEAN/STD/SUM/MIN/MAX/MEDIAN/SKEW/KURT/PROD) | `TS_*` 系列 | `operators.py` | -| 时序秩(窗口内) | `TS_RANK` | `operators.py` | -| 截面秩 / 标准化 | `RANK`、`CS_ZSCORE`、`CS_DEMEAN`、`CS_WINSORIZE`、`CS_BUCKET`、`CS_NEUTRALIZE` | `operators.py` | -| 滞后/差分/涨跌幅 | `DELTA`、`TS_SHIFT`、`TS_PCTCHANGE` | `operators.py` | -| EMA / WMA | `EMA`、`WMA` | `operators.py:852` 起 | -| 相关/协方差 | `TS_CORR`、`TS_COV` | `operators.py` | -| 筹码分布 | `CHIP_*` 系列(日频 uniform/cyq/tri) | `operators.py` / `chip_daily.py` | -| 局部极值 | `TS_ARGMAX`、`TS_ARGMIN`、`TS_LOCAL_PEAK` 等 | `operators.py` | -| 自变量引用解析 | `dollar_ref_to_pyname` | `parser.py` | - -### 加速后端(`accel.py`) - -底层向量化算子,按后端自动分发。 - -| 功能 | 入口 | 位置 | -|---|---|---| -| 固定窗滚动聚合 | `roll_fixed` | `accel.py:129` | -| EMA | `ema` | `accel.py:385` | -| WMA | `wma` | `accel.py:408` | -| 滞后/差分/涨跌幅 | `shift_fixed` / `delta` / `pctchange` | `accel.py:358` / `454` / `490` | -| 局部极值位置/值 | `arg_local_extreme` / `local_extreme_value` | `accel.py:564` / `594` | -| 滚动协方差/相关系数 | `roll_cov_fixed` / `roll_corr_fixed` | `accel.py:913` / `928` | -| 滚动分位数 | `roll_quantile_fixed` | `accel.py:986` | -| 互信息 | `roll_mutual_info_lag_fixed` | `accel.py:1616` | -| 效率比率 | `roll_efficiency_ratio_fixed` | `accel.py` | -| 排列熵 | `roll_permutation_entropy_fixed` | `accel.py:2512` | -| 筹码指标 | `roll_chip_metric_fixed` / `chip_daily.py` | `accel.py` / `chip_daily.py` | -| C++ 后端可用性探测 | `accel_available` | `accel.py:72` | - -### 动态窗口(`dyn_window.py`) - -按品种(instrument)分组做时间序列运算,再写回原始索引。 - -| 功能 | 入口 | 位置 | -|---|---|---| -| 通用动态窗口聚合 | `roll_dynamic` | `dyn_window.py` | -| 动态滞后 | `delay_dynamic` | `dyn_window.py` | -| 动态 ARG 极值 | `arg_extreme_dynamic` | `dyn_window.py:451` | - -### 其他核心文件 - -| 文件 | 作用 | -|---|---| -| `parser.py` | DSL 文本 → Python 代码;`$col` / `$col@freq` 解析 | -| `ops_kit.py` | 面板分组工具:`gb_instrument`、`gb_datetime`、`per_*` 包装 | -| `resample.py` | 辅周期面板构建、主频对齐广播 | -| `intervals.py` | 周期归一化(`1m` / `5m` / `1h` / `1d` 等) | -| `errors.py` | 结构化异常 `MultiLineFactorEvalError` | -| `fam_accel.cpp` | C++ 加速核源码,与 `DSL_CORE` 子仓库一起提交 | - -## 目录结构 - -```text -aqra/dsl/_shared/dsl_core/ - __init__.py - operators.py # DSL 算子入口 - accel.py # 加速后端分发 + Numba 实现 - dyn_window.py # 动态窗口(按 instrument 分组) - parser.py # 表达式解析 - ops_kit.py # 小工具 - resample.py # 混频对齐 - chip_daily.py # 日频筹码分布内核(uniform/cyq/tri) - intervals.py # 周期归一化 - errors.py # 异常类型 - fam_accel.cpp # C++ 加速核源码 - .gitignore # 忽略 *.so / build / -``` - -## 快速开始 - -```python -import numpy as np -import pandas as pd -from aqra.dsl.evaluator import eval_multi_line_factor - -idx = pd.MultiIndex.from_product( - [pd.date_range("2024-01-01", periods=100, freq="min"), ["A", "B"]], - names=["datetime", "instrument"], -) -panel = pd.DataFrame( - { - "open": np.random.rand(len(idx)).astype(np.float32), - "close": np.random.rand(len(idx)).astype(np.float32), - }, - index=idx, -) - -expr = """ -x = DELTA($close, 1) -TS_MEAN(x, 10) -""" -result = eval_multi_line_factor(expr, panel) -``` - -切换后端: - -```python -import os -os.environ["FUTURE_ALPHA_MINER_ACCEL_BACKEND"] = "cxx" # 或 "numba" / "python" -# 然后重新导入 aqra.dsl.accel -``` - -## 编译 C++ 扩展 - -在项目根目录执行: - -```bash -uv run python setup.py build_ext --inplace -``` - -编译产物会生成在: - -```text -aqra/dsl/_shared/dsl_core/_fam_accel.cpython-311-x86_64-linux-gnu.so -``` - -跳过 C++(纯 Python/Numba): - -```bash -AQRA_SKIP_CXX=1 uv run python setup.py build_ext --inplace -``` - -## 一致性测试 - -验证 C++ 与 Numba 路径在 `float32` 输入下输出一致: - -```bash -uv run python scripts/test_dsl_core_f32_consistency.py -``` - -输出包括: - -- 30+ 个 `accel` 直接函数的最大绝对误差、相对最大误差、Pearson 相关。 -- 20 个常见 DSL 表达式的双后端对比。 - -结果会保存到: - -```text -data/test_artifacts/dsl_core_f32_consistency.csv -``` - -## 日频筹码算子(`CHIP_*`) - -默认 **CYQ 换手衰减**(`method='cyq'`、`nbins=64`),标准 6 参: - -```text -CHIP_PEAK_LOC($adj_close, $adj_low, $adj_high, $volume, window, $float_cap) -``` - -| 位置 | 含义 | -|------|------| -| 1–3 | `$adj_close` / `$adj_low` / `$adj_high` | -| 4 | `$volume` | -| 5 | 窗口(交易日,推荐 20~120) | -| 6 | `$float_cap`(CYQ 换手率分母) | -| 7 | 可选 `nbins`(默认 64) | -| 8+ | 可选 `method`(仅 `'tri'`/`'uniform'`);`'tri'` 时第 6 参改传 `$vwap` | - -内核见 `chip_daily.py`。 - -## 当前限制 - -- C++ 核内部仍是 `double` 计算,只是把输入输出 cast 成 `float32`;内存占用没有真正减半。 -- 动态窗口算子(`dyn_window.py`)目前走 Numba/Python 路径,未接入 C++ 扩展。 -- 部分复杂算子(chip、permutation entropy)在 C++ 与 Numba 路径上可能存在 `float32` 舍入差异,但相关系数为 1.0。 -- C++ 扩展依赖 `pybind11` 和系统编译器(GCC + OpenMP),Windows/macOS 需要单独调整编译参数。 diff --git a/seekalpha/dsl/core/__init__.py b/seekalpha/dsl/core/__init__.py deleted file mode 100644 index 5ec2ee8c..00000000 --- a/seekalpha/dsl/core/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -"""Shared DSL runtime core. - -This package is the boundary that can later be replaced by a git submodule. -Modules inside this package should avoid importing project package names such -as ``aqra`` or ``future_alpha_miner``. -""" diff --git a/seekalpha/dsl/core/accel.py b/seekalpha/dsl/core/accel.py deleted file mode 100644 index b8a64ef4..00000000 --- a/seekalpha/dsl/core/accel.py +++ /dev/null @@ -1,2808 +0,0 @@ -"""C++ 加速后端管理:统一检测、环境变量控制、双后端调用接口。 - -环境变量: -- FUTURE_ALPHA_MINER_ACCEL_BACKEND=cxx: 强制使用 C++ 扩展 -- FUTURE_ALPHA_MINER_ACCEL_BACKEND=numba: 强制使用 Numba -- 未设置或 C++ 未编译: 自动选择可用后端 -- C++ 路径下 ``n_jobs``/``parallel`` 参数为 ``None`` 时 **默认启用 OpenMP 并行**; - 传入 ``parallel=False`` 可强制串行。 -""" -from __future__ import annotations - -import importlib -import os -from typing import Optional, Union - -import numpy as np -import pandas as pd - -from . import chip_daily as _chip_daily - -_fam_accel = None # type: ignore[misc] - - -def _load_cxx_backend(): - """Load the optional C++ backend without binding the core to one project.""" - module_name = os.environ.get("AQRA_DSL_CXX_BACKEND", "").strip() - if not module_name: - return None - try: - return importlib.import_module(module_name) - except ImportError: - return None - - -def set_cxx_backend(module) -> None: - """Inject a compiled backend module from the project adapter layer.""" - global _fam_accel, _HAS_CXX, _CXX_ROLL_FIXED_MAX_OP - _fam_accel = module - _HAS_CXX = module is not None - _CXX_ROLL_FIXED_MAX_OP = _cxx_roll_fixed_max_op() - - -_fam_accel = _load_cxx_backend() -_HAS_CXX = _fam_accel is not None - -try: - from numba import njit, prange - _HAS_NUMBA = True -except ImportError: - _HAS_NUMBA = False - prange = range # type: ignore[misc, assignment] - - def njit(*args, **kwargs): - def _wrap(f): - return f - return _wrap if not args else args[0] - - -def _use_cxx_backend() -> bool: - """根据环境变量和可用性决定是否使用 C++ 后端。""" - backend = os.environ.get("FUTURE_ALPHA_MINER_ACCEL_BACKEND", "").strip().lower() - if backend == "cxx": - if not _HAS_CXX: - raise RuntimeError("C++ extension requested but not available. " - "Install python3-devel and rebuild with 'pip install -e .'") - return True - if backend == "numba": - return False - # Auto: use C++ if available - return _HAS_CXX - - -def accel_available() -> dict: - """返回加速后端可用性信息。""" - return { - "cxx": _HAS_CXX, - "numba": _HAS_NUMBA, - "active": "cxx" if _use_cxx_backend() else ("numba" if _HAS_NUMBA else "python"), - } - - -# ============================================================================= -# Fixed-Window Rolling: MEAN, STD, SUM, MIN, MAX, RANK, VAR, MEDIAN, SKEW, KURT, PROD -# ============================================================================= - -_OP_MAP_FIXED = { - "mean": 0, - "std": 1, - "sum": 2, - "min": 3, - "max": 4, - "rank_pct": 5, - "var": 6, - "median": 7, - "skew": 8, - "kurt": 9, - "prod": 10, -} - - -def _cxx_has(name: str) -> bool: - """C++ 扩展是否已编译并包含指定符号(用于新算子的平滑回退)。""" - return _HAS_CXX and hasattr(_fam_accel, name) - - -def _cxx_parallel(parallel: Optional[bool]) -> bool: - """传给 C++ 内核的 OpenMP 开关:``parallel is None`` 时默认启用并行。""" - return True if parallel is None else bool(parallel) - - -def _cxx_roll_fixed_max_op() -> int: - """一次性探测当前编译的 C++ 内核 ``roll_fixed`` 支持到哪个 op, - 避免旧扩展与新 Python 层(op=9 kurt)版本错位。""" - if not _HAS_CXX: - return -1 - probe = np.zeros(4, dtype=np.float32) - max_op = 8 - for op in range(9, 15): - try: - _fam_accel.roll_fixed(probe, 1, op, 1, False) - max_op = op - except Exception: - break - return max_op - - -_CXX_ROLL_FIXED_MAX_OP = _cxx_roll_fixed_max_op() - - -def roll_fixed( - vals: np.ndarray, - window: int, - kind: str, - *, - ddof: int = 1, - parallel: Optional[bool] = None, -) -> np.ndarray: - """固定窗滚动聚合,自动选择 C++ 或 Numba 后端。 - - Args: - vals: 1-D float64 数组 - window: 窗口长度 - kind: "mean", "std", "sum", "min", "max", "rank_pct", "var", "median", "skew", "kurt", "prod" - ddof: 标准差自由度(默认 1) - parallel: 是否并行;``None`` 时在 C++ 后端下默认 ``True``(OpenMP) - """ - use_cxx = _use_cxx_backend() - op = _OP_MAP_FIXED.get(kind) - if op is None: - raise ValueError(f"Unknown kind: {kind}") - - if use_cxx and op <= _CXX_ROLL_FIXED_MAX_OP: - return np.asarray(_fam_accel.roll_fixed(vals, window, op, ddof, _cxx_parallel(parallel)), dtype=np.float32) - - # Numba fallback (also used when C++ extension is older than current Python layer) - return _roll_fixed_numba(vals, window, op, ddof) - - -@njit(cache=True) -def _roll_fixed_numba(vals: np.ndarray, window: int, op: int, ddof: int) -> np.ndarray: - """Numba 实现的固定窗滚动(与 C++ 语义一致)。""" - n = vals.shape[0] - out = np.empty(n, dtype=np.float32) - - for i in range(n): - w = window - if w < 1: - w = 1 - if w > i + 1: - w = i + 1 - lo = i + 1 - w - - if op == 0: # mean - s = 0.0 - c = 0 - for j in range(lo, i + 1): - v = vals[j] - if v == v: # not nan - s += v - c += 1 - out[i] = s / c if c > 0 else np.nan - elif op == 1: # std - s = 0.0 - sq = 0.0 - c = 0 - for j in range(lo, i + 1): - v = vals[j] - if v == v: - s += v - sq += v * v - c += 1 - if c > ddof: - mean = s / c - var = (sq - 2 * mean * s + c * mean * mean) / (c - ddof) - out[i] = np.sqrt(max(0.0, var)) - else: - out[i] = np.nan - elif op == 2: # sum - s = 0.0 - c = 0 - for j in range(lo, i + 1): - v = vals[j] - if v == v: - s += v - c += 1 - out[i] = s if c > 0 else np.nan - elif op == 3: # min - m = np.inf - for j in range(lo, i + 1): - v = vals[j] - if v == v and v < m: - m = v - out[i] = m if m != np.inf else np.nan - elif op == 4: # max - m = -np.inf - for j in range(lo, i + 1): - v = vals[j] - if v == v and v > m: - m = v - out[i] = m if m != -np.inf else np.nan - elif op == 5: # rank_pct — 与 pandas ``rolling().rank(pct=True)``:average rank / nvalid - nvalid = 0 - for j in range(lo, i + 1): - v = vals[j] - if v == v: - nvalid += 1 - if nvalid == 0: - out[i] = np.nan - else: - curr = vals[i] - if curr != curr: - out[i] = np.nan - else: - less = 0 - equal = 0 - for j in range(lo, i + 1): - v = vals[j] - if v == v: - if v < curr: - less += 1 - elif v == curr: - equal += 1 - rank_low = float(less + 1) - rank_high = float(less + equal) - rank_avg = (rank_low + rank_high) / 2.0 - out[i] = rank_avg / float(nvalid) - elif op == 6: # var - s = 0.0 - sq = 0.0 - c = 0 - for j in range(lo, i + 1): - v = vals[j] - if v == v: - s += v - sq += v * v - c += 1 - if c > ddof: - mean = s / c - var = (sq - 2 * mean * s + c * mean * mean) / (c - ddof) - out[i] = max(0.0, var) - else: - out[i] = np.nan - elif op == 7: # median - wlen = i - lo + 1 - tmp = np.empty(wlen, dtype=np.float32) - c = 0 - for j in range(lo, i + 1): - v = vals[j] - if v == v: - tmp[c] = v - c += 1 - if c == 0: - out[i] = np.nan - else: - buf = np.sort(tmp[:c]) - if c % 2 == 1: - out[i] = buf[c // 2] - else: - out[i] = (buf[c // 2 - 1] + buf[c // 2]) / 2.0 - elif op == 8: # skew — pandas nanops.nanskew - wlen = i - lo + 1 - tmp = np.empty(wlen, dtype=np.float32) - c = 0 - for j in range(lo, i + 1): - v = vals[j] - if v == v: - tmp[c] = v - c += 1 - if c < 3: - out[i] = np.nan - else: - buf = tmp[:c] - sm = 0.0 - for t in range(c): - sm += buf[t] - mean = sm / c - m2 = 0.0 - m3 = 0.0 - for t in range(c): - d = buf[t] - mean - d2 = d * d - m2 += d2 - m3 += d2 * d - if m2 == 0.0: - out[i] = 0.0 - else: - out[i] = ( - (c * np.sqrt(c - 1) / (c - 2)) * (m3 / (m2 ** 1.5)) - ) - elif op == 9: # kurt — pandas rolling().kurt() adjusted Fisher–Pearson (excess) - wlen = i - lo + 1 - tmp = np.empty(wlen, dtype=np.float32) - c = 0 - for j in range(lo, i + 1): - v = vals[j] - if v == v: - tmp[c] = v - c += 1 - if c < 4: - out[i] = np.nan - else: - buf = tmp[:c] - sm = 0.0 - for t in range(c): - sm += buf[t] - mean = sm / c - m2 = 0.0 - m4 = 0.0 - for t in range(c): - d = buf[t] - mean - d2 = d * d - m2 += d2 - m4 += d2 * d2 - if m2 == 0.0: - out[i] = 0.0 - else: - nf = float(c) - numer = nf * (nf - 1.0) * (nf + 1.0) * m4 - denom = (nf - 2.0) * (nf - 3.0) * m2 * m2 - adj = 3.0 * (nf - 1.0) * (nf - 1.0) / ((nf - 2.0) * (nf - 3.0)) - out[i] = numer / denom - adj - elif op == 10: # prod — NaN as 1 - p = 1.0 - for j in range(lo, i + 1): - v = vals[j] - p *= v if v == v else 1.0 - out[i] = p - else: - out[i] = np.nan - - return out - - -# ============================================================================= -# Fixed shift (DELAY 整数窗) -# ============================================================================= - - -def shift_fixed( - vals: np.ndarray, - periods: int, - parallel: Optional[bool] = None, -) -> np.ndarray: - """与 ``pandas.Series.shift(periods)`` 对齐的 1-D 滞后;C++ 可用时用 ``shift_fixed`` 内核。""" - p = int(periods) - if p < 0: - p = 0 - use_cxx = _use_cxx_backend() - if use_cxx: - return np.asarray( - _fam_accel.shift_fixed(vals, p, _cxx_parallel(parallel)), dtype=np.float32 - ) - n = vals.shape[0] - out = np.empty(n, dtype=np.float32) - for i in range(n): - j = i - p - out[i] = vals[j] if j >= 0 else np.nan - return out - - -# ============================================================================= -# EMA (Exponential Moving Average) -# ============================================================================= - - -def ema(vals: np.ndarray, span: int, parallel: Optional[bool] = None) -> np.ndarray: - """与 ``pandas.Series.ewm(span, min_periods=1, adjust=False).mean()`` 一致。 - - 在 C++ 扩展可用且未强制 Numba 时使用 ``_fam_accel.ema``;否则回退到 pandas。 - """ - use_cxx = _use_cxx_backend() - if use_cxx: - return np.asarray( - _fam_accel.ema(vals, span, _cxx_parallel(parallel)), dtype=np.float32 - ) - return ( - pd.Series(vals, dtype=np.float32) - .ewm(span=span, min_periods=1, adjust=False) - .mean() - .to_numpy(dtype=np.float32, copy=False) - ) - - -# ============================================================================= -# WMA (Weighted Moving Average) -# ============================================================================= - - -def wma(vals: np.ndarray, window: int, parallel: Optional[bool] = None) -> np.ndarray: - """WMA,自动选择 C++ 或 Numba 后端。""" - use_cxx = _use_cxx_backend() - - if use_cxx: - return np.asarray(_fam_accel.wma(vals, window, _cxx_parallel(parallel)), dtype=np.float32) - - return _wma_numba(vals, window) - - -@njit(cache=True) -def _wma_numba(vals: np.ndarray, window: int) -> np.ndarray: - """与 ``function_registry`` 原 pandas 实现一致:窗口长 L 时使用 ``1..window`` 的后 L 项作权。""" - n = vals.shape[0] - out = np.empty(n, dtype=np.float32) - - for i in range(n): - w = window - if w < 1: - w = 1 - if w > i + 1: - w = i + 1 - lo = i + 1 - w - L = i - lo + 1 - first_w = float(window - L + 1) - - weighted_sum = 0.0 - weight_sum = 0.0 - for t in range(L): - j = lo + t - v = vals[j] - if v == v: - weight = first_w + float(t) - weighted_sum += v * weight - weight_sum += weight - - out[i] = weighted_sum / weight_sum if weight_sum > 0 else np.nan - - return out - - -# ============================================================================= -# DELTA (Difference) -# ============================================================================= - - -def delta(vals: np.ndarray, periods: int = 1, parallel: Optional[bool] = None) -> np.ndarray: - """DELTA,自动选择 C++ 或 Numba 后端。""" - use_cxx = _use_cxx_backend() - - if use_cxx: - return np.asarray(_fam_accel.delta(vals, periods, _cxx_parallel(parallel)), dtype=np.float32) - - return _delta_numba(vals, periods) - - -@njit(cache=True) -def _delta_numba(vals: np.ndarray, periods: int) -> np.ndarray: - """Numba DELTA 实现。""" - n = vals.shape[0] - out = np.empty(n, dtype=np.float32) - - for i in range(n): - j = i - periods - if j >= 0: - curr = vals[i] - prev = vals[j] - if curr == curr and prev == prev: # both not nan - out[i] = curr - prev - else: - out[i] = np.nan - else: - out[i] = np.nan - - return out - - -# ============================================================================= -# TS_PCTCHANGE (Percent Change) -# ============================================================================= - - -def pctchange(vals: np.ndarray, periods: int = 1, parallel: Optional[bool] = None) -> np.ndarray: - """TS_PCTCHANGE,自动选择 C++ 或 Numba 后端。""" - use_cxx = _use_cxx_backend() - - if use_cxx: - return np.asarray(_fam_accel.pctchange(vals, periods, _cxx_parallel(parallel)), dtype=np.float32) - - return _pctchange_numba(vals, periods) - - -@njit(cache=True) -def _pctchange_numba(vals: np.ndarray, periods: int) -> np.ndarray: - """Numba TS_PCTCHANGE 实现。""" - n = vals.shape[0] - out = np.empty(n, dtype=np.float32) - - for i in range(n): - j = i - periods - if j >= 0: - curr = vals[i] - prev = vals[j] - if curr == curr and prev == prev and prev != 0.0: - r = (curr - prev) / prev - # Handle inf - if np.isinf(r): - out[i] = np.nan - else: - out[i] = r - else: - out[i] = np.nan - else: - out[i] = np.nan - - return out - - -# ============================================================================= -# TS_LAST_ARGPEAK / … (confirmed centered local extremes) + 最大左右振幅选峰/谷 -# ============================================================================= - - -@njit(cache=True) -def _marks_center_extreme_numba( - vals: np.ndarray, half_window: int, want_max: int -) -> np.ndarray: - """某位置 ``j`` 为中心、窗 ``[j-w,j+w]`` 内的局部峰(1) / 谷(0);并列取窗内最右侧。""" - n = vals.shape[0] - marks = np.zeros(n, dtype=np.uint8) - if n == 0 or half_window < 1: - return marks - for j in range(half_window, n - half_window): - center = vals[j] - if center != center: - continue - lo = j - half_window - hi = j + half_window - ok = True - if want_max == 1: - for t in range(lo, hi + 1): - v = vals[t] - if v == v and (v > center or (t > j and v == center)): - ok = False - break - else: - for t in range(lo, hi + 1): - v = vals[t] - if v == v and (v < center or (t > j and v == center)): - ok = False - break - if ok: - marks[j] = 1 - return marks - - -def arg_local_extreme( - vals: np.ndarray, - half_window: int = 10, - *, - want_max: bool, - parallel: Optional[bool] = None, -) -> np.ndarray: - """最近一次已确认中心局部极值距今 bar 数。 - - 定义某位置 ``j`` 为 peak/trough 的条件是:``vals[j]`` 在 - ``[j-half_window, j+half_window]`` 内分别为最高/最低值。为了避免数据窥探, - 只有在 ``j+half_window`` 时刻之后,该拐点才会被确认并出现在输出里。 - - 对并列极值采用“右侧优先”消歧:若窗口内右边还有相同高/低点,则当前 ``j`` 不算, - 仅保留该中心窗内最右侧的那个极值点。 - """ - hw = int(half_window) - if hw < 1: - raise ValueError("half_window must be >= 1") - use_cxx = _use_cxx_backend() - - if use_cxx: - return np.asarray( - _fam_accel.arg_local_extreme(vals, hw, 1 if want_max else 0, _cxx_parallel(parallel)), - dtype=np.float32, - ) - - return _arg_local_extreme_numba(vals, hw, 1 if want_max else 0) - - -def local_extreme_value( - vals: np.ndarray, - half_window: int = 10, - *, - want_max: bool, - parallel: Optional[bool] = None, -) -> np.ndarray: - """最近一次已确认中心局部峰/谷的价格值。""" - hw = int(half_window) - if hw < 1: - raise ValueError("half_window must be >= 1") - use_cxx = _use_cxx_backend() - - if use_cxx: - return np.asarray( - _fam_accel.local_extreme_value( - vals, hw, 1 if want_max else 0, _cxx_parallel(parallel) - ), - dtype=np.float32, - ) - - return _local_extreme_value_numba(vals, hw, 1 if want_max else 0) - - -@njit(cache=True) -def _arg_local_extreme_numba(vals: np.ndarray, half_window: int, want_max: int) -> np.ndarray: - """Numba 版本的已确认局部峰/谷定位。""" - n = vals.shape[0] - out = np.empty(n, dtype=np.float32) - - for i in range(n): - out[i] = np.nan - - if n == 0 or half_window < 1: - return out - - marks = _marks_center_extreme_numba(vals, half_window, want_max) - - last = -1 - for i in range(n): - cand = i - half_window - if cand >= 0 and marks[cand] == 1: - last = cand - if last >= 0: - out[i] = float(i - last) - - return out - - -@njit(cache=True) -def _local_extreme_value_numba( - vals: np.ndarray, half_window: int, want_max: int -) -> np.ndarray: - """Numba 版本的最近一次已确认局部峰/谷价格。""" - n = vals.shape[0] - out = np.empty(n, dtype=np.float32) - - for i in range(n): - out[i] = np.nan - - if n == 0 or half_window < 1: - return out - - marks = _marks_center_extreme_numba(vals, half_window, want_max) - - last = -1 - for i in range(n): - cand = i - half_window - if cand >= 0 and marks[cand] == 1: - last = cand - if last >= 0: - out[i] = vals[last] - - return out - - -# ============================================================================= -# 三 K 线顶/底分型(高+低联合判定),带 1 根确认延迟 -# ============================================================================= - - -@njit(cache=True) -def _fractal_chan_3bar_marks(high: np.ndarray, low: np.ndarray, want_top: int) -> np.ndarray: - """中心下标 ``j`` 上是否形成分型(严格不等);与 bar 1,2,3 = j-1,j,j+1 对应。""" - n = high.shape[0] - marks = np.zeros(n, dtype=np.uint8) - for j in range(1, n - 1): - h0 = high[j - 1] - h1 = high[j] - h2 = high[j + 1] - l0 = low[j - 1] - l1 = low[j] - l2 = low[j + 1] - if not (h0 == h0 and h1 == h1 and h2 == h2 and l0 == l0 and l1 == l1 and l2 == l2): - continue - if want_top == 1: - if h0 < h1 and h1 > h2 and l0 < l1 and l1 > l2: - marks[j] = 1 - else: - if h0 > h1 and h1 < h2 and l0 > l1 and l1 < l2: - marks[j] = 1 - return marks - - -@njit(cache=True) -def _fractal_chan_last_from_marks( - high: np.ndarray, - low: np.ndarray, - marks: np.ndarray, - want_top: int, - want_arg: int, -) -> np.ndarray: - """分型中心 ``j`` 仅在 ``i=j+1``(第三根收盘)后确认;与 TS_LAST_ARGPEAK 的滞后对齐方式一致。""" - n = high.shape[0] - out = np.empty(n, dtype=np.float32) - for i in range(n): - out[i] = np.nan - last = -1 - for i in range(n): - cand = i - 1 - if cand >= 1 and cand < n - 1 and marks[cand] == 1: - last = cand - if last >= 0: - if want_arg == 1: - out[i] = float(i - last) - else: - out[i] = high[last] if want_top == 1 else low[last] - return out - - -def fractal_chan_3bar_last( - high: np.ndarray, - low: np.ndarray, - *, - want_top_fractal: bool, - want_arg: bool, -) -> np.ndarray: - """三 K 线顶/底分型:双输入 high/low,输出距今 bar 数或分型中枢价(顶=中 K 高、底=中 K 低)。""" - h = np.asarray(high, dtype=np.float32) - l = np.asarray(low, dtype=np.float32) - if h.shape != l.shape: - raise ValueError("fractal_chan_3bar_last: high and low must have the same shape") - wt = 1 if want_top_fractal else 0 - wa = 1 if want_arg else 0 - marks = _fractal_chan_3bar_marks(h, l, wt) - return _fractal_chan_last_from_marks(h, l, marks, wt, wa) - - -def _rolling_ending_min_max( - vals: np.ndarray, half_window: int -) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: - """对每个 ``j``:``[j-w,j]`` 与 ``[j,j+w]`` 上的 ``min`` / ``max``(O(n) ``rolling``,含反序)。""" - s = pd.Series(np.asarray(vals, dtype=np.float32), copy=False) - w = int(half_window) - if w < 1: - raise ValueError("half_window must be >= 1") - k = w + 1 - left_min = s.rolling(window=k, min_periods=1).min().to_numpy(dtype=np.float32, copy=False) - left_max = s.rolling(window=k, min_periods=1).max().to_numpy(dtype=np.float32, copy=False) - sr = s.iloc[::-1] - rmin = sr.rolling(window=k, min_periods=1).min().iloc[::-1].to_numpy( - dtype=np.float32, copy=False - ) - rmax = sr.rolling(window=k, min_periods=1).max().iloc[::-1].to_numpy( - dtype=np.float32, copy=False - ) - return left_min, rmin, left_max, rmax - - -def _maxamp_from_marks( - vals: np.ndarray, - half_window: int, - want_max: bool, -) -> tuple[np.ndarray, np.ndarray]: - """在已确认峰/谷中,选左右振幅和最大的那一个;与 ``_marks_center_extreme_numba`` 一致。""" - w = int(half_window) - n = len(vals) - out_arg = np.full(n, np.nan, dtype=np.float32) - out_val = np.full(n, np.nan, dtype=np.float32) - if n == 0 or w < 1: - return out_arg, out_val - - marks = _marks_center_extreme_numba( - np.asarray(vals, dtype=np.float32), w, 1 if want_max else 0 - ) - lmin, rmin, lmax, rmax = _rolling_ending_min_max(vals, w) - - best_a = -np.inf - best_j = -1 - for i in range(n): - cj = i - w - if 2 * w <= i < n and marks[cj] == 1: - vj = float(vals[cj]) - if want_max: - a = 2.0 * vj - float(lmin[cj]) - float(rmin[cj]) - else: - a = float(lmax[cj]) + float(rmax[cj]) - 2.0 * vj - if np.isfinite(a) and ( - a > best_a or (a == best_a and cj > best_j) - ): - best_a, best_j = a, cj - if best_j >= 0: - out_arg[i] = float(i - best_j) - out_val[i] = float(vals[best_j]) - - return out_arg, out_val - - -def maxamp_arg_local_extreme( - vals: np.ndarray, - half_window: int = 10, - *, - want_max: bool, -) -> np.ndarray: - """在已确认局部峰/谷中,选(左峰谷距 + 右峰谷距)最大者,输出距今 bar 数。 - - 不在 C++ 中实现,始终与 ``rolling``/marks 的 Numba 路径一致;避免 C++ 与 Python 双份语义。 - """ - a, _ = _maxamp_from_marks(vals, int(half_window), want_max) - return a - - -def maxamp_local_extreme_value( - vals: np.ndarray, - half_window: int = 10, - *, - want_max: bool, -) -> np.ndarray: - """在已确认局部峰/谷中,选左右振幅和最大者,输出该点价格。""" - _, v = _maxamp_from_marks(vals, int(half_window), want_max) - return v - - -# ============================================================================= -# Rolling Covariance & Correlation (bivariate, fixed window) -# ============================================================================= - - -@njit(cache=True) -def _roll_cov_numba(xvals: np.ndarray, yvals: np.ndarray, window: int, ddof: int) -> np.ndarray: - n = xvals.shape[0] - out = np.empty(n, dtype=np.float32) - - for i in range(n): - w = window - if w < 1: - w = 1 - if w > i + 1: - w = i + 1 - lo = i + 1 - w - - sx = 0.0 - sy = 0.0 - sxy = 0.0 - c = 0 - for j in range(lo, i + 1): - x = xvals[j] - y = yvals[j] - if x == x and y == y: # both not NaN - sx += x - sy += y - sxy += x * y - c += 1 - if c > ddof: - mx = sx / c - my = sy / c - out[i] = (sxy - c * mx * my) / (c - ddof) - else: - out[i] = np.nan - - return out - - -@njit(cache=True) -def _roll_corr_numba(xvals: np.ndarray, yvals: np.ndarray, window: int) -> np.ndarray: - n = xvals.shape[0] - out = np.empty(n, dtype=np.float32) - - for i in range(n): - w = window - if w < 1: - w = 1 - if w > i + 1: - w = i + 1 - lo = i + 1 - w - - sx = 0.0 - sy = 0.0 - sxx = 0.0 - syy = 0.0 - sxy = 0.0 - c = 0 - for j in range(lo, i + 1): - x = xvals[j] - y = yvals[j] - if x == x and y == y: - sx += x - sy += y - sxx += x * x - syy += y * y - sxy += x * y - c += 1 - if c < 2: - out[i] = np.nan - else: - cn = float(c) - mx = sx / cn - my = sy / cn - vx = sxx - cn * mx * mx - vy = syy - cn * my * my - if vx <= 0.0 or vy <= 0.0: - out[i] = np.nan - else: - cov = sxy - cn * mx * my - out[i] = cov / np.sqrt(vx * vy) - - return out - - -def roll_cov_fixed( - xvals: np.ndarray, - yvals: np.ndarray, - window: int, - *, - ddof: int = 1, - parallel: Optional[bool] = None, -) -> np.ndarray: - """固定窗滚动协方差,自动选择 C++ 或 Numba 后端。""" - use_cxx = _use_cxx_backend() - if use_cxx: - return np.asarray(_fam_accel.roll_cov_fixed(xvals, yvals, window, ddof, _cxx_parallel(parallel)), dtype=np.float32) - return _roll_cov_numba(xvals, yvals, window, ddof) - - -def roll_corr_fixed( - xvals: np.ndarray, - yvals: np.ndarray, - window: int, - *, - parallel: Optional[bool] = None, -) -> np.ndarray: - """固定窗滚动 Pearson 相关系数,自动选择 C++ 或 Numba 后端。""" - use_cxx = _use_cxx_backend() - if use_cxx: - return np.asarray(_fam_accel.roll_corr_fixed(xvals, yvals, window, _cxx_parallel(parallel)), dtype=np.float32) - return _roll_corr_numba(xvals, yvals, window) - - -# ============================================================================= -# Rolling Quantile (fixed window, linear interpolation) -# ============================================================================= - - -@njit(cache=True) -def _roll_quantile_numba(vals: np.ndarray, window: int, q: float) -> np.ndarray: - n = vals.shape[0] - out = np.empty(n, dtype=np.float32) - - for i in range(n): - w = window - if w < 1: - w = 1 - if w > i + 1: - w = i + 1 - lo = i + 1 - w - - wlen = i - lo + 1 - tmp = np.empty(wlen, dtype=np.float32) - c = 0 - for j in range(lo, i + 1): - v = vals[j] - if v == v: - tmp[c] = v - c += 1 - if c == 0: - out[i] = np.nan - else: - buf = np.sort(tmp[:c]) - k = float(c) - pos = q * (k - 1.0) - if pos < 0.0: - pos = 0.0 - if pos > k - 1.0: - pos = k - 1.0 - lo_i = int(np.floor(pos)) - hi_i = int(np.ceil(pos)) - frac = pos - float(lo_i) - out[i] = buf[lo_i] * (1.0 - frac) + buf[hi_i] * frac - - return out - - -def roll_quantile_fixed( - vals: np.ndarray, - window: int, - q: float, - *, - parallel: Optional[bool] = None, -) -> np.ndarray: - """固定窗滚动 q 分位数(线性插值,等价 pandas ``rolling(w, min_periods=1).quantile(q)``)。""" - if not (0.0 <= q <= 1.0): - raise ValueError("q must be in [0, 1]") - if _use_cxx_backend() and _cxx_has("roll_quantile_fixed"): - return np.asarray(_fam_accel.roll_quantile_fixed(vals, window, q, _cxx_parallel(parallel)), dtype=np.float32) - return _roll_quantile_numba(vals, window, float(q)) - - -# ============================================================================= -# Event-driven kernels: TS_SINCE, TS_RUNLENGTH, TS_CROSS -# ============================================================================= - - -@njit(cache=True) -def _ts_since_numba(cond: np.ndarray) -> np.ndarray: - """距上一次 truthy(有限且非零)的 bar 数;首个事件前为 NaN。""" - n = cond.shape[0] - out = np.empty(n, dtype=np.float32) - last = -1 - for i in range(n): - v = cond[i] - if v == v and v != 0.0: - last = i - if last >= 0: - out[i] = float(i - last) - else: - out[i] = np.nan - return out - - -def ts_since(cond: np.ndarray) -> np.ndarray: - """距上一次 cond 为真(有限且非零)的 bar 数。""" - if _use_cxx_backend() and _cxx_has("ts_since"): - return np.asarray(_fam_accel.ts_since(cond), dtype=np.float32) - return _ts_since_numba(cond) - - -@njit(cache=True) -def _ts_since_nth_numba(cond: np.ndarray, nth: int) -> np.ndarray: - """距倒数第 nth 次 truthy 事件的 bar 数;nth=1 同 ``_ts_since_numba``。""" - n = cond.shape[0] - n_ev = max(1, int(nth)) - out = np.empty(n, dtype=np.float32) - for i in range(n): - seen = 0 - target = -1 - for j in range(i, -1, -1): - v = cond[j] - if v == v and v != 0.0: - seen += 1 - if seen == n_ev: - target = j - break - if target >= 0: - out[i] = float(i - target) - else: - out[i] = np.nan - return out - - -def ts_since_nth(cond: np.ndarray, nth: int) -> np.ndarray: - """距倒数第 ``nth`` 次 truthy 事件的 bar 数(``nth=1`` 同 ``ts_since``)。""" - n_ev = max(1, int(nth)) - if _use_cxx_backend() and _cxx_has("ts_since_nth"): - return np.asarray(_fam_accel.ts_since_nth(cond, n_ev), dtype=np.float32) - return _ts_since_nth_numba(cond, n_ev) - - -@njit(cache=True) -def _ts_runlength_numba(vals: np.ndarray, direction: int) -> np.ndarray: - """连续严格上行 / 下行 bar 数;NaN 位置输出 NaN 并重置计数。""" - n = vals.shape[0] - out = np.empty(n, dtype=np.float32) - run = 0 - for i in range(n): - curr = vals[i] - if not (curr == curr): - run = 0 - out[i] = np.nan - continue - if i == 0: - run = 0 - out[i] = 0.0 - continue - prev = vals[i - 1] - if not (prev == prev): - run = 0 - out[i] = 0.0 - continue - if direction > 0: - hit = curr > prev - else: - hit = curr < prev - if hit: - run += 1 - else: - run = 0 - out[i] = float(run) - return out - - -def ts_runlength(vals: np.ndarray, direction: int) -> np.ndarray: - """连续严格上涨 (direction=1) 或下跌 (direction=-1) 的根数。""" - if direction not in (1, -1): - raise ValueError("direction must be 1 (up) or -1 (down)") - if _use_cxx_backend() and _cxx_has("ts_runlength"): - return np.asarray(_fam_accel.ts_runlength(vals, direction), dtype=np.float32) - return _ts_runlength_numba(vals, direction) - - -@njit(cache=True) -def _ts_cross_numba(x: np.ndarray, y: np.ndarray, direction: int) -> np.ndarray: - """上穿 (direction=1) / 下穿 (direction=-1) 事件;输出 0/1,缺失为 NaN。""" - n = x.shape[0] - out = np.empty(n, dtype=np.float32) - if n > 0: - out[0] = 0.0 - for i in range(1, n): - xc = x[i] - yc = y[i] - xp = x[i - 1] - yp = y[i - 1] - if not (xc == xc and yc == yc and xp == xp and yp == yp): - out[i] = np.nan - continue - if direction > 0: - out[i] = 1.0 if (xp <= yp and xc > yc) else 0.0 - else: - out[i] = 1.0 if (xp >= yp and xc < yc) else 0.0 - return out - - -def ts_cross(x: np.ndarray, y: np.ndarray, direction: int) -> np.ndarray: - """上穿 (direction=1) / 下穿 (direction=-1):返回 0/1 / NaN 的 1-D 面板。""" - if direction not in (1, -1): - raise ValueError("direction must be 1 (above) or -1 (below)") - if _use_cxx_backend() and _cxx_has("ts_cross"): - return np.asarray(_fam_accel.ts_cross(x, y, direction), dtype=np.float32) - return _ts_cross_numba(x, y, direction) - - -# Event rolling: op 0=count, 1=rate, 2=any, 3=all (truthy = finite && != 0) - - -@njit(cache=True) -def _ts_event_roll_numba(cond: np.ndarray, window: int, op: int) -> np.ndarray: - n = cond.shape[0] - out = np.empty(n, dtype=np.float32) - for i in range(n): - w = window - if w < 1: - w = 1 - if w > i + 1: - w = i + 1 - lo = i + 1 - w - finite_cnt = 0 - truthy_cnt = 0 - for j in range(lo, i + 1): - v = cond[j] - if v == v: - finite_cnt += 1 - if v != 0.0: - truthy_cnt += 1 - if finite_cnt == 0: - out[i] = np.nan - elif op == 0: - out[i] = float(truthy_cnt) - elif op == 1: - out[i] = float(truthy_cnt) / float(finite_cnt) - elif op == 2: - out[i] = 1.0 if truthy_cnt > 0 else 0.0 - elif op == 3: - out[i] = 1.0 if truthy_cnt == finite_cnt else 0.0 - else: - out[i] = np.nan - return out - - -@njit(cache=True) -def _ts_event_roll_dyn_numba(cond: np.ndarray, wvals: np.ndarray, op: int) -> np.ndarray: - n = cond.shape[0] - out = np.empty(n, dtype=np.float32) - for i in range(n): - w = int(wvals[i]) - if w < 1: - w = 1 - if w > i + 1: - w = i + 1 - lo = i + 1 - w - finite_cnt = 0 - truthy_cnt = 0 - for j in range(lo, i + 1): - v = cond[j] - if v == v: - finite_cnt += 1 - if v != 0.0: - truthy_cnt += 1 - if finite_cnt == 0: - out[i] = np.nan - elif op == 0: - out[i] = float(truthy_cnt) - elif op == 1: - out[i] = float(truthy_cnt) / float(finite_cnt) - elif op == 2: - out[i] = 1.0 if truthy_cnt > 0 else 0.0 - elif op == 3: - out[i] = 1.0 if truthy_cnt == finite_cnt else 0.0 - else: - out[i] = np.nan - return out - - -def ts_event_roll(cond: np.ndarray, window: int, op: int) -> np.ndarray: - """固定窗事件滚动:op 0=count, 1=rate, 2=any, 3=all。""" - if op not in (0, 1, 2, 3): - raise ValueError("op must be 0=count, 1=rate, 2=any, 3=all") - w = max(1, int(window)) - if _use_cxx_backend() and _cxx_has("ts_event_roll"): - return np.asarray(_fam_accel.ts_event_roll(cond, w, op), dtype=np.float32) - return _ts_event_roll_numba(cond, w, op) - - -def ts_event_roll_dyn(cond: np.ndarray, wvals: np.ndarray, op: int) -> np.ndarray: - """动态窗事件滚动;``wvals`` 为每 bar 窗长(≥1 整数)。""" - if op not in (0, 1, 2, 3): - raise ValueError("op must be 0=count, 1=rate, 2=any, 3=all") - return _ts_event_roll_dyn_numba(cond, wvals.astype(np.int64, copy=False), op) - - -@njit(cache=True) -def _ts_streak_numba(cond: np.ndarray) -> np.ndarray: - n = cond.shape[0] - out = np.empty(n, dtype=np.float32) - run = 0 - for i in range(n): - v = cond[i] - if not (v == v): - run = 0 - out[i] = np.nan - continue - if v != 0.0: - run += 1 - out[i] = float(run) - else: - run = 0 - out[i] = 0.0 - return out - - -def ts_streak(cond: np.ndarray) -> np.ndarray: - """当前连续 truthy 根数;NaN 重置。""" - if _use_cxx_backend() and _cxx_has("ts_streak"): - return np.asarray(_fam_accel.ts_streak(cond), dtype=np.float32) - return _ts_streak_numba(cond) - - -# ============================================================================= -# TS_ARGMEDIAN — 窗口内最接近中位数的位置(距今 bar 数) -# ============================================================================= - - -@njit(cache=True) -def _arg_median_numba(vals: np.ndarray, window: int) -> np.ndarray: - """返回窗口内值最接近中位数的 bar 距今数(0=当前)。 - 若有多个相同距离,取最近(索引最大)的。""" - n = vals.shape[0] - out = np.empty(n, dtype=np.float32) - for i in range(n): - w = window - if w < 1: - w = 1 - if w > i + 1: - w = i + 1 - lo = i + 1 - w - - # 收集有效值及其原始索引偏移 - wlen = i - lo + 1 - tmp = np.empty(wlen, dtype=np.float32) - idx_map = np.empty(wlen, dtype=np.int64) - c = 0 - for j in range(lo, i + 1): - v = vals[j] - if v == v: - tmp[c] = v - idx_map[c] = j - lo # 0-based offset within window - c += 1 - - if c == 0: - out[i] = np.nan - continue - - # 求中位数 - buf = tmp[:c] - med = np.median(buf) - - # 找最接近中位数的元素,距离相同取最近的(索引大的优先) - best_dist = 1e308 - best_offset = 0 - for k in range(c): - d = abs(tmp[k] - med) - offset = idx_map[k] - if d < best_dist or (d == best_dist and offset > best_offset): - best_dist = d - best_offset = offset - - out[i] = float(wlen - 1 - best_offset) - return out - - -def arg_median_fixed( - vals: np.ndarray, - window: int, - parallel: Optional[bool] = None, -) -> np.ndarray: - """固定窗 TS_ARGMEDIAN:返回窗口内最接近中位数的 bar 距今数。""" - return _arg_median_numba(vals, window) - - -# ============================================================================= -# TS_ARGNTH — 窗口内第 n 大/小的位置(距今 bar 数) -# ============================================================================= - - -@njit(cache=True) -def _arg_nth_numba(vals: np.ndarray, window: int, n: int, ascending: bool, unique: bool = False) -> np.ndarray: - """返回窗口内第 n 大 (ascending=False) 或第 n 小 (ascending=True) 的 bar 距今数。 - n >= 1;若有效值不足 n 个(unique=True 时为不足 n 个不同值),输出 NaN。 - 若有重复值: - - unique=False: 取位置最近(索引最大)的 - - unique=True: 跳过重复值,找严格第 n 个不同的值""" - n = max(1, n) - out = np.empty(vals.shape[0], dtype=np.float32) - for i in range(vals.shape[0]): - w = window - if w < 1: - w = 1 - if w > i + 1: - w = i + 1 - lo = i + 1 - w - - wlen = i - lo + 1 - # 收集有效值及其原始索引偏移 - max_valid = wlen - val_buf = np.empty(max_valid, dtype=np.float32) - idx_buf = np.empty(max_valid, dtype=np.int64) - c = 0 - for j in range(lo, i + 1): - v = vals[j] - if v == v: - val_buf[c] = v - idx_buf[c] = j - lo # offset within window - c += 1 - - if c == 0: - out[i] = np.nan - continue - - # 排序 - for ii in range(c): - for jj in range(ii + 1, c): - vi, vj = val_buf[ii], val_buf[jj] - ii_idx, jj_idx = idx_buf[ii], idx_buf[jj] - if not ascending: # descending: 大的在前 - # 降序:值大的在前,值相同则偏移大的在前(更近) - if vj > vi or (vj == vi and jj_idx > ii_idx): - val_buf[ii], val_buf[jj] = vj, vi - idx_buf[ii], idx_buf[jj] = jj_idx, ii_idx - else: # ascending: 小的在前 - # 升序:值小的在前,值相同则偏移大的在前 - if vj < vi or (vj == vi and jj_idx > ii_idx): - val_buf[ii], val_buf[jj] = vj, vi - idx_buf[ii], idx_buf[jj] = jj_idx, ii_idx - - if unique: - # 去重:只保留每个值的第一个(排序后最近的) - uniq_val = np.empty(c, dtype=np.float32) - uniq_idx = np.empty(c, dtype=np.int64) - uniq_c = 0 - last_val = np.nan - for k in range(c): - if val_buf[k] != last_val: - uniq_val[uniq_c] = val_buf[k] - uniq_idx[uniq_c] = idx_buf[k] - uniq_c += 1 - last_val = val_buf[k] - if uniq_c < n: - out[i] = np.nan - continue - target_offset = uniq_idx[n - 1] - else: - if c < n: - out[i] = np.nan - continue - target_offset = idx_buf[n - 1] - - out[i] = float(wlen - 1 - target_offset) - return out - -def arg_nth_fixed( - vals: np.ndarray, - window: int, - n: int, - ascending: bool, - unique: bool = False, - parallel: Optional[bool] = None, -) -> np.ndarray: - """固定窗 TS_ARGNTH:第 n 大 (ascending=False) 或第 n 小 (ascending=True) 的 bar 距今数。 - Args: - ascending: 排序方向,False=降序(大的在前),True=升序(小的在前) - unique: 若为 True,跳过重复值,找严格第 n 个不同的值 - """ - return _arg_nth_numba(vals, window, n, ascending, unique) - - -# ============================================================================= -# Rolling Spearman (rank) Correlation — bivariate, fixed window -# ============================================================================= - - -@njit(cache=True) -def _roll_rankcorr_numba(xvals: np.ndarray, yvals: np.ndarray, window: int) -> np.ndarray: - """与 C++ ``roll_rankcorr_fixed_impl`` 语义一致: - 每根 bar 收集窗口内 (x,y) 有效对,对 x、y 分别算平均秩(等分布 ``rank(method='average')``), - 再对秩序列求 Pearson 相关系数。有效对数 <2 或任一维方差为零输出 NaN。""" - n = xvals.shape[0] - out = np.empty(n, dtype=np.float32) - - for i in range(n): - w = window - if w < 1: - w = 1 - if w > i + 1: - w = i + 1 - lo = i + 1 - w - - wlen = i - lo + 1 - xb = np.empty(wlen, dtype=np.float32) - yb = np.empty(wlen, dtype=np.float32) - c = 0 - for j in range(lo, i + 1): - xv = xvals[j] - yv = yvals[j] - if xv == xv and yv == yv: - xb[c] = xv - yb[c] = yv - c += 1 - - if c < 2: - out[i] = np.nan - continue - - xr = np.empty(c, dtype=np.float32) - yr = np.empty(c, dtype=np.float32) - for k in range(c): - less_x = 0 - eq_x = 0 - less_y = 0 - eq_y = 0 - xk = xb[k] - yk = yb[k] - for j in range(c): - xj = xb[j] - yj = yb[j] - if xj < xk: - less_x += 1 - elif xj == xk: - eq_x += 1 - if yj < yk: - less_y += 1 - elif yj == yk: - eq_y += 1 - xr[k] = (2.0 * less_x + eq_x + 1) / 2.0 - yr[k] = (2.0 * less_y + eq_y + 1) / 2.0 - - sx = 0.0 - sy = 0.0 - sxx = 0.0 - syy = 0.0 - sxy = 0.0 - for k in range(c): - xr_k = xr[k] - yr_k = yr[k] - sx += xr_k - sy += yr_k - sxx += xr_k * xr_k - syy += yr_k * yr_k - sxy += xr_k * yr_k - cn = float(c) - mx = sx / cn - my = sy / cn - vx = sxx - cn * mx * mx - vy = syy - cn * my * my - if vx <= 0.0 or vy <= 0.0: - out[i] = np.nan - else: - cov = sxy - cn * mx * my - out[i] = cov / np.sqrt(vx * vy) - - return out - - -def roll_rankcorr_fixed( - xvals: np.ndarray, - yvals: np.ndarray, - window: int, - *, - parallel: Optional[bool] = None, -) -> np.ndarray: - """固定窗滚动 Spearman(秩)相关系数,自动选择 C++ 或 Numba 后端。""" - if _use_cxx_backend() and _cxx_has("roll_rankcorr_fixed"): - return np.asarray(_fam_accel.roll_rankcorr_fixed(xvals, yvals, window, _cxx_parallel(parallel)), dtype=np.float32) - return _roll_rankcorr_numba(xvals, yvals, window) - - -# ============================================================================= -# Rolling mutual information — bivariate, price vs lagged volume (histogram / rank bins) -# ============================================================================= - - -@njit(cache=True) -def _roll_mutual_info_lag_numba( - close: np.ndarray, - volume: np.ndarray, - window: int, - lag: int, - n_bins: int, - min_pairs: int, -) -> np.ndarray: - """窗内估计 I(close(t); volume(t-lag)):对窗内有效样本分别做秩分箱,再算 Shannon MI(nat)。 - - 每个时点 t:取 j ∈ [t-window+1, t] 且 j≥lag,配对 (close[j], volume[j-lag]); - 跳过非有限值;有效对数 < min_pairs 输出 NaN。 - """ - n = close.shape[0] - out = np.empty(n, dtype=np.float32) - B = n_bins - if B < 2: - for i in range(n): - out[i] = np.nan - return out - - lag_i = lag - if lag_i < 0: - lag_i = 0 - - for i in range(n): - w = window - if w < 1: - w = 1 - if w > i + 1: - w = i + 1 - lo = i + 1 - w - - max_c = i - lo + 1 - xb = np.empty(max_c, dtype=np.float32) - yb = np.empty(max_c, dtype=np.float32) - c = 0 - for j in range(lo, i + 1): - if j < lag_i: - continue - xv = close[j] - yv = volume[j - lag_i] - if xv == xv and yv == yv: - xb[c] = xv - yb[c] = yv - c += 1 - - if c < min_pairs: - out[i] = np.nan - continue - - bx = np.empty(c, dtype=np.int64) - by = np.empty(c, dtype=np.int64) - - order_x = np.argsort(xb[:c], kind="mergesort") - for p in range(c): - orig = int(order_x[p]) - bb = (p * B) // c - if bb >= B: - bb = B - 1 - bx[orig] = bb - - order_y = np.argsort(yb[:c], kind="mergesort") - for p in range(c): - orig = int(order_y[p]) - bb = (p * B) // c - if bb >= B: - bb = B - 1 - by[orig] = bb - - nb2 = B * B - cnt = np.zeros(nb2, dtype=np.float32) - for k in range(c): - idx = bx[k] * B + by[k] - cnt[idx] += 1.0 - - px = np.zeros(B, dtype=np.float32) - py = np.zeros(B, dtype=np.float32) - total = float(c) - for ix in range(B): - for iy in range(B): - pxy = cnt[ix * B + iy] / total - px[ix] += pxy - py[iy] += pxy - - eps = 1e-15 - mi = 0.0 - for ix in range(B): - for iy in range(B): - pxy = cnt[ix * B + iy] / total - if pxy > 0.0: - mi += pxy * ( - np.log(pxy + eps) - - np.log(px[ix] + eps) - - np.log(py[iy] + eps) - ) - if mi < 0.0: - mi = 0.0 - out[i] = mi - - return out - - -def roll_mutual_info_lag_fixed( - close: np.ndarray, - volume: np.ndarray, - window: int, - lag: int, - *, - n_bins: int = 8, - min_pairs: Optional[int] = None, - parallel: Optional[bool] = None, -) -> np.ndarray: - """固定窗滚动互信息 close(t) vs volume(t-lag);秩分箱 + 列联表 MI。 - - 后端:优先 C++(与 Numba 同一数值语义);否则 Numba;均无则回落到解释执行的 - ``_roll_mutual_info_lag_numba``(无 Numba 时)。 - """ - if int(n_bins) < 2: - raise ValueError("n_bins must be >= 2") - w = max(1, int(window)) - lag_i = max(0, int(lag)) - B = int(n_bins) - mp = int(min_pairs) if min_pairs is not None else max(B + 2, 8) - if mp < 2: - raise ValueError("min_pairs must be >= 2") - c = np.asarray(close, dtype=np.float32) - v = np.asarray(volume, dtype=np.float32) - if c.shape[0] != v.shape[0]: - raise ValueError("close and volume must have the same length") - if _use_cxx_backend() and _cxx_has("roll_mutual_info_lag_fixed"): - return np.asarray( - _fam_accel.roll_mutual_info_lag_fixed( - c, v, w, lag_i, B, mp, _cxx_parallel(parallel) - ), - dtype=np.float32, - ) - # Numba / 纯 Python:未对 parallel 做 OpenMP;与 historical 行为一致 - return _roll_mutual_info_lag_numba(c, v, w, lag_i, B, mp) - - -# ============================================================================= -# Kaufman Efficiency Ratio — univariate, fixed window -# ============================================================================= - - -@njit(cache=True) -def _efficiency_ratio_segment(vals: np.ndarray, lo: int, i: int) -> float: - """窗口 [lo, i] 内 Kaufman ER;无效时返回 NaN。""" - first_valid = -1 - last_valid = -1 - for j in range(lo, i + 1): - v = vals[j] - if v == v: - if first_valid < 0: - first_valid = j - last_valid = j - - if first_valid < 0 or last_valid == first_valid: - return np.nan - - total_path = 0.0 - prev = vals[first_valid] - for j in range(first_valid + 1, last_valid + 1): - v = vals[j] - if v == v: - d = v - prev - if d < 0.0: - d = -d - total_path += d - prev = v - - if total_path == 0.0: - return np.nan - - net = vals[last_valid] - vals[first_valid] - if net < 0.0: - net = -net - return net / total_path - - -@njit(cache=True) -def _roll_efficiency_ratio_numba(vals: np.ndarray, window: int) -> np.ndarray: - """ER = |窗口首末价差| / 窗口内逐 bar 绝对变化之和;固定窗。""" - n = vals.shape[0] - out = np.empty(n, dtype=np.float32) - - for i in range(n): - w = window - if w < 2: - w = 2 - if w > i + 1: - w = i + 1 - lo = i + 1 - w - er = _efficiency_ratio_segment(vals, lo, i) - out[i] = er if er == er else np.nan - - return out - - -@njit(cache=True) -def _roll_efficiency_ratio_dynamic_numba(vals: np.ndarray, wvals: np.ndarray) -> np.ndarray: - """逐 bar 窗长的 Kaufman ER;``wvals[i]`` 为 bar i 处回看长度(≥2)。""" - n = vals.shape[0] - out = np.empty(n, dtype=np.float32) - if wvals.shape[0] != n: - for i in range(n): - out[i] = np.nan - return out - - for i in range(n): - w = int(wvals[i]) - if w < 2: - w = 2 - if w > i + 1: - w = i + 1 - lo = i + 1 - w - er = _efficiency_ratio_segment(vals, lo, i) - out[i] = er if er == er else np.nan - - return out - - -def roll_efficiency_ratio_fixed( - vals: np.ndarray, - window: int, - *, - parallel: Optional[bool] = None, -) -> np.ndarray: - """固定窗 Kaufman Efficiency Ratio,自动选择 C++ 或 Numba 后端。""" - if int(window) < 2: - raise ValueError("window must be >= 2 for efficiency ratio") - if _use_cxx_backend() and _cxx_has("roll_efficiency_ratio_fixed"): - return np.asarray(_fam_accel.roll_efficiency_ratio_fixed(vals, window, _cxx_parallel(parallel)), dtype=np.float32) - return _roll_efficiency_ratio_numba(vals, int(window)) - - -def roll_efficiency_ratio_dynamic(vals: np.ndarray, wvals: np.ndarray) -> np.ndarray: - """动态窗 Kaufman ER;``wvals`` 与 ``vals`` 等长,逐 bar 窗长(由 ``dynamic_window_int_series`` 预处理)。""" - w_arr = np.asarray(wvals, dtype=np.int64).ravel() - if w_arr.shape[0] != np.asarray(vals).shape[0]: - raise ValueError("dynamic window length must match values length") - return _roll_efficiency_ratio_dynamic_numba( - np.asarray(vals, dtype=np.float32).ravel(), - w_arr, - ) - - -# Wick efficiency — lagged upper/lower wick coupling (cross-time, non DELTA-able) -# ============================================================================= - - -@njit(cache=True) -def _wick_efficiency_numba( - open_: np.ndarray, - high: np.ndarray, - low: np.ndarray, - close: np.ndarray, - lag: int, - eps: float, -) -> np.ndarray: - """影线能量效率:当前上影线 × (t-k) 下影线 / (当期实体 × 前期实体 + eps)。 - - UP_WICK(t) = high - max(open, close) - DN_WICK(t) = min(open, close) - low - BODY(t) = |close - open| - OUT[t] = UP_WICK(t) * DN_WICK(t-k) / (BODY(t) * BODY(t-k) + eps) - - i < lag 或与 t、t-k 任一侧 OHLC 非有限时为 NaN。lag >= 1。 - """ - n = open_.shape[0] - kk = int(lag) - out = np.empty(n, dtype=np.float32) - if kk < 1: - for i in range(n): - out[i] = np.nan - return out - - for i in range(n): - if i < kk: - out[i] = np.nan - continue - j = i - kk - ot = open_[i] - ht = high[i] - lt = low[i] - ct = close[i] - oj = open_[j] - hj = high[j] - lj = low[j] - cj = close[j] - if not (ot == ot and ht == ht and lt == lt and ct == ct): - out[i] = np.nan - continue - if not (oj == oj and hj == hj and lj == lj and cj == cj): - out[i] = np.nan - continue - - oc_max_t = ot if ot >= ct else ct - up_wick = ht - oc_max_t - oc_min_j = oj if oj <= cj else cj - dn_wick_j = oc_min_j - lj - - body_t = ct - ot - if body_t < 0.0: - body_t = -body_t - body_j = cj - oj - if body_j < 0.0: - body_j = -body_j - - denom = body_t * body_j + eps - out[i] = (up_wick * dn_wick_j) / denom - - return out - - -def wick_efficiency_fixed( - open_: np.ndarray, - high: np.ndarray, - low: np.ndarray, - close: np.ndarray, - lag: int, - *, - eps: float = 1e-12, -) -> np.ndarray: - """WICK_EFFICIENCY:四维 OHLC + 滞后 k。""" - n = open_.shape[0] - if not (high.shape[0] == n and low.shape[0] == n and close.shape[0] == n): - raise ValueError("open, high, low, close must have same length") - if int(lag) < 1: - raise ValueError("lag must be >= 1 for WICK_EFFICIENCY") - eps_f = float(eps) - if _use_cxx_backend() and _cxx_has("wick_efficiency_fixed"): - return np.asarray(_fam_accel.wick_efficiency_fixed(open_, high, low, close, int(lag), eps_f), dtype=np.float32) - return _wick_efficiency_numba(open_, high, low, close, int(lag), eps_f) - - -# KLINE_GEOMETRY — OHLC 矩阵行 SVD 奇异值比 σ₂/σ₁(形态混乱度) -# ============================================================================= - - -def roll_kline_geometry( - o: np.ndarray, - h: np.ndarray, - l: np.ndarray, - c: np.ndarray, - window: Union[int, float, np.ndarray], - *, - eps: float = 1e-15, -) -> np.ndarray: - """固定窗或逐 bar 窗长:窗口内 ``X∈R^{k×4}``(每行 o,h,l,c),``σ₂/σ₁``,∈ [0,1]。 - - 需要有效窗长 ``≥ 2`` 且窗内 OHLC 全有限;``σ₁≤eps`` 时为 NaN。使用 ``numpy.linalg.svd`` - (``full_matrices=False``),无 C++/Numba 路径。 - - ``window`` 可为标量(``int`` / ``float`` / numpy 标量)或与 ``o`` 等长的整数向量。 - """ - if o.shape[0] != h.shape[0] or o.shape[0] != l.shape[0] or o.shape[0] != c.shape[0]: - raise ValueError("open, high, low, close must have the same length") - - n = int(o.shape[0]) - out = np.full(n, np.nan, dtype=np.float32) - eps_f = float(eps) - - wa = np.asarray(window) - if wa.ndim == 0: - w0 = int(wa) - if w0 < 2: - raise ValueError("window must be >= 2") - for i in range(n): - wi = w0 if w0 <= i + 1 else i + 1 - if wi < 2: - continue - lo = i + 1 - wi - X = np.empty((wi, 4), dtype=np.float32) - X[:, 0] = o[lo : i + 1] - X[:, 1] = h[lo : i + 1] - X[:, 2] = l[lo : i + 1] - X[:, 3] = c[lo : i + 1] - if not np.isfinite(X).all(): - continue - try: - _, s, _ = np.linalg.svd(X, full_matrices=False) - except np.linalg.LinAlgError: - continue - if s.size < 2 or float(s[0]) <= eps_f: - continue - out[i] = float(s[1]) / float(s[0]) - return out - - w_arr = np.maximum(wa.astype(np.int64, copy=False).ravel(), 1) - if w_arr.shape[0] != n: - raise ValueError("dynamic window length must match OHLC length") - for i in range(n): - wi = int(w_arr[i]) - if wi > i + 1: - wi = i + 1 - if wi < 2: - continue - lo = i + 1 - wi - X = np.empty((wi, 4), dtype=np.float32) - X[:, 0] = o[lo : i + 1] - X[:, 1] = h[lo : i + 1] - X[:, 2] = l[lo : i + 1] - X[:, 3] = c[lo : i + 1] - if not np.isfinite(X).all(): - continue - try: - _, s, _ = np.linalg.svd(X, full_matrices=False) - except np.linalg.LinAlgError: - continue - if s.size < 2 or float(s[0]) <= eps_f: - continue - out[i] = float(s[1]) / float(s[0]) - - return out - - -def roll_kline_geometry_fixed( - o: np.ndarray, - h: np.ndarray, - l: np.ndarray, - c: np.ndarray, - window: int, - *, - eps: float = 1e-15, -) -> np.ndarray: - """``roll_kline_geometry(..., window=int, ...)`` 别名。""" - return roll_kline_geometry(o, h, l, c, int(window), eps=eps) - - -@njit(cache=True) -def _roll_permutation_entropy_numba( - vals: np.ndarray, window: int, order: int -) -> np.ndarray: - """Bandt-Pompe 排列熵:窗口内所有长度 order 的子序列按序数排列模式计数, - 再按 Shannon 熵归一化到 [0,1](除以 log(order!))。 - 子序列内含 NaN 则该模式不计;总数为 0 输出 NaN。""" - n = vals.shape[0] - m = order - out = np.empty(n, dtype=np.float32) - - # log(order!) for m in [2,7] - if m == 2: - norm = 0.6931471805599453 - elif m == 3: - norm = 1.791759469228055 - elif m == 4: - norm = 3.1780538303479458 - elif m == 5: - norm = 4.787491742782046 - elif m == 6: - norm = 6.579251212010101 - else: # m == 7 - norm = 8.525161361065414 - - pow_size = 1 - for _ in range(m): - pow_size *= m - - ranks = np.empty(m, dtype=np.int64) - - for i in range(n): - w = window - if w < m: - w = m - if w > i + 1: - w = i + 1 - lo = i + 1 - w - wlen = i - lo + 1 - - if wlen < m: - out[i] = np.nan - continue - - counts = np.zeros(pow_size, dtype=np.int64) - total = 0 - - for start in range(lo, i - m + 2): - valid = True - for k in range(m): - v = vals[start + k] - if not (v == v): - valid = False - break - if not valid: - continue - - for k in range(m): - r = 0 - vk = vals[start + k] - for j in range(m): - if j == k: - continue - vj = vals[start + j] - if vj < vk or (vj == vk and j < k): - r += 1 - ranks[k] = r - - pid = 0 - for k in range(m): - pid = pid * m + ranks[k] - counts[pid] += 1 - total += 1 - - if total == 0: - out[i] = np.nan - continue - - H = 0.0 - inv_tot = 1.0 / float(total) - for k in range(pow_size): - c_k = counts[k] - if c_k > 0: - p = c_k * inv_tot - H -= p * np.log(p) - out[i] = H / norm - - return out - - -def roll_permutation_entropy_fixed( - vals: np.ndarray, - window: int, - order: int = 3, - *, - parallel: Optional[bool] = None, -) -> np.ndarray: - """固定窗 Bandt-Pompe 排列熵,自动选择 C++ 或 Numba 后端。``order`` 典型 3–5。""" - if int(window) < 2: - raise ValueError("window must be >= 2") - if int(order) < 2 or int(order) > 7: - raise ValueError("order must be in [2, 7]") - if _use_cxx_backend() and _cxx_has("roll_permutation_entropy_fixed"): - return np.asarray(_fam_accel.roll_permutation_entropy_fixed( - vals, int(window), int(order), _cxx_parallel(parallel) - ), dtype=np.float32) - return _roll_permutation_entropy_numba(vals, int(window), int(order)) - - -# ============================================================================= -# Chip distribution metrics (daily: uniform / cyq / triangular) -# ============================================================================= - -_CHIP_OP = _chip_daily.CHIP_OP -chip_wass_implementation_id = _chip_daily.chip_wass_implementation_id -chip_peak_sharpness_impl_id = _chip_daily.chip_peak_sharpness_impl_id -chip_bimodal_impl_id = _chip_daily.chip_bimodal_impl_id - - -def roll_chip_metric_fixed( - close: np.ndarray, - volume: np.ndarray, - low: np.ndarray, - high: np.ndarray, - aux: np.ndarray, - window: int, - nbins: int, - op: str, - method: str = "cyq", - *, - parallel: Optional[bool] = None, -) -> np.ndarray: - """日频筹码密度指标(uniform / cyq / triangular)。""" - del parallel - if int(window) < 1: - raise ValueError("window must be >= 1") - if int(nbins) < 2: - raise ValueError("nbins must be >= 2") - return _chip_daily.roll_chip_metric_daily( - close, volume, low, high, aux, int(window), int(nbins), op, method - ) - - -def _broadcast_chip_win_vec(x: Union[int, np.ndarray], n: int, *, floor: int) -> np.ndarray: - """标量或可迭代 → 长度 ``n`` 的 int64 向量。""" - arr = np.asarray(x, dtype=np.int64).reshape(-1) - if arr.size == 1: - v = max(floor, int(arr[0])) - return np.full(n, v, dtype=np.int64) - if arr.size != n: - raise ValueError(f"window vector length mismatch: expected {n}, got {arr.size}") - return np.maximum(arr, floor).astype(np.int64, copy=False) - - -def roll_chip_wass_dist( - close: np.ndarray, - volume: np.ndarray, - low: np.ndarray, - high: np.ndarray, - aux: np.ndarray, - wa: Union[int, np.ndarray], - wb: Union[int, np.ndarray], - rho: Union[int, np.ndarray], - nbins: int, - implementation: str = "moment", - method: str = "cyq", - *, - parallel: Optional[bool] = None, -) -> np.ndarray: - """日频筹码双窗漂移。""" - del parallel - nb = int(nbins) - if nb < 2: - raise ValueError("nbins must be >= 2") - n = int(close.shape[0]) - for arr in (volume, low, high, aux): - if arr.shape[0] != n: - raise ValueError("chip wass arrays must have the same length") - wa_v = _broadcast_chip_win_vec(wa, n, floor=1) - wb_v = _broadcast_chip_win_vec(wb, n, floor=1) - rho_v = _broadcast_chip_win_vec(rho, n, floor=0) - return _chip_daily.roll_chip_wass_dist_daily( - close, volume, low, high, aux, wa_v, wb_v, rho_v, nb, implementation, method - ) - - -def roll_chip_wass_dist_fixed( - close: np.ndarray, - volume: np.ndarray, - low: np.ndarray, - high: np.ndarray, - aux: np.ndarray, - window: int, - nbins: int, - lag: int = 0, - *, - implementation: str = "moment", - method: str = "cyq", - parallel: Optional[bool] = None, -) -> np.ndarray: - """固定整窗双窗漂移。""" - del parallel - if int(window) < 1: - raise ValueError("window must be >= 1") - if int(nbins) < 2: - raise ValueError("nbins must be >= 2") - if int(lag) < 0: - raise ValueError("lag must be >= 0") - w = int(window) - return roll_chip_wass_dist( - close, volume, low, high, aux, w, w, int(lag), int(nbins), implementation, method - ) - - -def roll_chip_peak_sharpness_fixed( - close: np.ndarray, - volume: np.ndarray, - low: np.ndarray, - high: np.ndarray, - aux: np.ndarray, - window: int, - nbins: int, - implementation: str = "curvature", - method: str = "cyq", - *, - parallel: Optional[bool] = None, -) -> np.ndarray: - """日频主峰尖锐度。""" - del parallel - if int(window) < 1: - raise ValueError("window must be >= 1") - if int(nbins) < 2: - raise ValueError("nbins must be >= 2") - return _chip_daily.roll_chip_peak_sharpness_daily( - close, volume, low, high, aux, int(window), int(nbins), implementation, method - ) - - -def roll_chip_bimodal_fixed( - close: np.ndarray, - volume: np.ndarray, - low: np.ndarray, - high: np.ndarray, - aux: np.ndarray, - window: int, - nbins: int, - implementation: str = "simple", - method: str = "cyq", - *, - lambda_scale: float = 1.0, - parallel: Optional[bool] = None, -) -> np.ndarray: - """日频双峰结构得分。""" - del parallel - if int(window) < 1: - raise ValueError("window must be >= 1") - if int(nbins) < 2: - raise ValueError("nbins must be >= 2") - return _chip_daily.roll_chip_bimodal_daily( - close, - volume, - low, - high, - aux, - int(window), - int(nbins), - implementation, - method, - lambda_scale=lambda_scale, - ) - - -# VOLUME_CLOCK_VPIN — Volume-Synchronized PIN -# ============================================================================= - - -def vpin_classification_id(name: str) -> int: - """``tick``(默认,平盘沿用上一方向)/ ``lee_ready``(平盘 50/50 分拆)。""" - k = str(name).strip().lower().replace("-", "_") - if k == "tick": - return 0 - if k in ("lee_ready", "leeready", "lr"): - return 1 - raise ValueError('classification must be "tick" or "lee_ready"') - - -@njit(cache=True) -def _vpin_classify_volume( - price: float, - price_prev: float, - vol: float, - cls_id: int, - last_sign: int, -) -> tuple: - """返回 (buy_vol, sell_vol, new_last_sign)。""" - if not (price == price) or not (vol == vol) or vol <= 0.0: - return 0.0, 0.0, last_sign - if not (price_prev == price_prev): - return 0.0, 0.0, last_sign - - if price > price_prev: - return vol, 0.0, 1 - if price < price_prev: - return 0.0, vol, -1 - - if cls_id == 0: - if last_sign > 0: - return vol, 0.0, last_sign - if last_sign < 0: - return 0.0, vol, last_sign - half = 0.5 * vol - return half, half, last_sign - - half = 0.5 * vol - return half, half, last_sign - - -@njit(cache=True) -def _vpin_push_imbalance( - imb_buf: np.ndarray, - n_in_buf: int, - window: int, - bucket_buy: float, - bucket_sell: float, - eps: float, -) -> int: - total = bucket_buy + bucket_sell - if total <= eps: - return n_in_buf - imb = abs(bucket_buy - bucket_sell) / (total + eps) - if imb > 1.0: - imb = 1.0 - elif imb < 0.0: - imb = 0.0 - w = window - if n_in_buf < w: - imb_buf[n_in_buf] = imb - return n_in_buf + 1 - for k in range(w - 1): - imb_buf[k] = imb_buf[k + 1] - imb_buf[w - 1] = imb - return w - - -@njit(cache=True) -def _vpin_buf_mean( - imb_buf: np.ndarray, n_in_buf: int, window: int, min_buckets: int -) -> float: - """滚动桶 imbalance 均值;``min_buckets`` 为最少已满桶数,不足则 NaN。""" - mb = min_buckets - if mb < 1: - mb = 1 - if mb > window: - mb = window - if n_in_buf < mb: - return np.nan - s = 0.0 - for k in range(n_in_buf): - s += imb_buf[k] - return s / float(n_in_buf) - - -@njit(cache=True) -def _vpin_add_to_bucket( - rem_buy: float, - rem_sell: float, - bucket_buy: float, - bucket_sell: float, - bucket_fill: float, - bsize: float, - imb_buf: np.ndarray, - n_in_buf: int, - window: int, - eps: float, -) -> tuple: - """将剩余买卖量灌入当前桶;满桶则结算。""" - while rem_buy + rem_sell > eps: - space = bsize - bucket_fill - if space <= eps: - n_in_buf = _vpin_push_imbalance(imb_buf, n_in_buf, window, bucket_buy, bucket_sell, eps) - bucket_buy = 0.0 - bucket_sell = 0.0 - bucket_fill = 0.0 - continue - - chunk = rem_buy + rem_sell - if chunk <= space + eps: - bucket_buy += rem_buy - bucket_sell += rem_sell - bucket_fill += chunk - rem_buy = 0.0 - rem_sell = 0.0 - if bucket_fill >= bsize - eps: - n_in_buf = _vpin_push_imbalance(imb_buf, n_in_buf, window, bucket_buy, bucket_sell, eps) - bucket_buy = 0.0 - bucket_sell = 0.0 - bucket_fill = 0.0 - else: - ratio = space / chunk - take_buy = rem_buy * ratio - take_sell = rem_sell * ratio - bucket_buy += take_buy - bucket_sell += take_sell - bucket_fill += space - rem_buy -= take_buy - rem_sell -= take_sell - n_in_buf = _vpin_push_imbalance(imb_buf, n_in_buf, window, bucket_buy, bucket_sell, eps) - bucket_buy = 0.0 - bucket_sell = 0.0 - bucket_fill = 0.0 - return rem_buy, rem_sell, bucket_buy, bucket_sell, bucket_fill, n_in_buf - - -@njit(cache=True) -def _volume_clock_vpin_numba( - price: np.ndarray, - volume: np.ndarray, - window: int, - bucket_size: float, - cls_id: int, - eps: float, - min_buckets: int, -) -> np.ndarray: - """成交量同步 VPIN:固定成交量桶 + 买卖分类,滚动桶 imbalance 均值。""" - n = price.shape[0] - out = np.empty(n, dtype=np.float32) - w = window - if w < 1: - w = 1 - bsize = bucket_size - if bsize <= eps: - for i in range(n): - out[i] = np.nan - return out - - imb_buf = np.zeros(w, dtype=np.float32) - n_in_buf = 0 - bucket_buy = 0.0 - bucket_sell = 0.0 - bucket_fill = 0.0 - last_sign = 0 - - for i in range(n): - p = price[i] - p_prev = price[i - 1] if i > 0 else np.nan - buy_v, sell_v, last_sign = _vpin_classify_volume(p, p_prev, volume[i], cls_id, last_sign) - - rem_buy = buy_v - rem_sell = sell_v - rem_buy, rem_sell, bucket_buy, bucket_sell, bucket_fill, n_in_buf = _vpin_add_to_bucket( - rem_buy, - rem_sell, - bucket_buy, - bucket_sell, - bucket_fill, - bsize, - imb_buf, - n_in_buf, - w, - eps, - ) - - out[i] = _vpin_buf_mean(imb_buf, n_in_buf, w, min_buckets) - - return out - - -def volume_clock_vpin_fixed( - price: np.ndarray, - volume: np.ndarray, - window: int, - bucket_size: float, - classification: str = "tick", - *, - min_buckets: int = 5, - eps: float = 1e-12, - parallel: Optional[bool] = None, -) -> np.ndarray: - """Volume-Synchronized PIN ∈ [0,1];``window`` 为成交量桶个数(非 bar 数)。""" - del parallel - if int(window) < 1: - raise ValueError("window must be >= 1") - if float(bucket_size) <= 0.0: - raise ValueError("bucket_size must be > 0") - if price.shape[0] != volume.shape[0]: - raise ValueError("price and volume must have the same length") - cls = vpin_classification_id(classification) - mb = int(min_buckets) - if mb < 1: - raise ValueError("min_buckets must be >= 1") - return _volume_clock_vpin_numba( - price.astype(np.float32, copy=False), - volume.astype(np.float32, copy=False), - int(window), - float(bucket_size), - cls, - float(eps), - mb, - ) - - -# Generalized crowding (CROWD_*) — rolling dimension-bucket statistics -# ============================================================================= - -# op: 0=share, 1=mean_ratio, 2=contrast, 3=rank_weighted -# bucket_mode: 0=quantile (side high/low + split), 1=equal_freq (n_buckets + bucket_idx) -# side_high: 1=high tail, 0=low tail (quantile mode only) - - -@njit(cache=True) -def _crowd_sorted_quantile(buf: np.ndarray, c: int, q: float) -> float: - if c <= 0: - return np.nan - k = float(c) - pos = q * (k - 1.0) - if pos < 0.0: - pos = 0.0 - if pos > k - 1.0: - pos = k - 1.0 - lo_i = int(np.floor(pos)) - hi_i = int(np.ceil(pos)) - if lo_i == hi_i: - return buf[lo_i] - frac = pos - float(lo_i) - return buf[lo_i] * (1.0 - frac) + buf[hi_i] * frac - - -@njit(cache=True) -def _crowd_avg_rank(vals: np.ndarray, c: int, ranks: np.ndarray) -> None: - for k in range(c): - vk = vals[k] - less = 0 - eq = 0 - for j in range(c): - vj = vals[j] - if vj < vk: - less += 1 - elif vj == vk: - eq += 1 - ranks[k] = (2.0 * less + eq + 1) / 2.0 - - -@njit(cache=True) -def _crowd_in_target_quantile( - dim_val: float, - dim_buf: np.ndarray, - c: int, - split_q: float, - side_high: int, -) -> bool: - tmp = np.empty(c, dtype=np.float32) - for k in range(c): - tmp[k] = dim_buf[k] - tmp_sorted = np.sort(tmp) - thr = _crowd_sorted_quantile(tmp_sorted, c, split_q) - if side_high == 1: - return dim_val >= thr - return dim_val < thr - - -@njit(cache=True) -def _crowd_equal_freq_bucket(rank: float, n: int, n_buckets: int) -> int: - b = int((rank - 1.0) / float(n) * float(n_buckets)) - if b < 0: - b = 0 - if b >= n_buckets: - b = n_buckets - 1 - return b - - -@njit(cache=True) -def _crowd_in_target_equal_freq( - dim_val: float, - dim_buf: np.ndarray, - c: int, - n_buckets: int, - bucket_idx0: int, -) -> bool: - ranks = np.empty(c, dtype=np.float32) - _crowd_avg_rank(dim_buf, c, ranks) - for k in range(c): - if dim_buf[k] == dim_val: - b = _crowd_equal_freq_bucket(ranks[k], c, n_buckets) - if b == bucket_idx0: - return True - return False - - -@njit(cache=True) -def _crowd_is_target( - dim_val: float, - dim_buf: np.ndarray, - c: int, - bucket_mode: int, - split_q: float, - side_high: int, - n_buckets: int, - bucket_idx0: int, -) -> bool: - if bucket_mode == 0: - return _crowd_in_target_quantile(dim_val, dim_buf, c, split_q, side_high) - return _crowd_in_target_equal_freq(dim_val, dim_buf, c, n_buckets, bucket_idx0) - - -@njit(cache=True) -def _crowd_mark_target( - dim_buf: np.ndarray, - c: int, - in_target: np.ndarray, - bucket_mode: int, - split_q: float, - side_high: int, - n_buckets: int, - bucket_idx0: int, -) -> None: - """窗口内一次标记目标桶(避免 share/mean_ratio 内重复 sort)。""" - if bucket_mode == 0: - tmp = np.empty(c, dtype=np.float32) - for k in range(c): - tmp[k] = dim_buf[k] - thr = _crowd_sorted_quantile(np.sort(tmp), c, split_q) - for k in range(c): - if side_high == 1: - in_target[k] = dim_buf[k] >= thr - else: - in_target[k] = dim_buf[k] < thr - else: - ranks = np.empty(c, dtype=np.float32) - _crowd_avg_rank(dim_buf, c, ranks) - for k in range(c): - b = _crowd_equal_freq_bucket(ranks[k], c, n_buckets) - in_target[k] = b == bucket_idx0 - - -@njit(cache=True) -def _roll_crowd_numba( - dim: np.ndarray, - attr: np.ndarray, - weight: np.ndarray, - window: int, - op: int, - bucket_mode: int, - split_q: float, - side_high: int, - n_buckets: int, - bucket_idx0: int, - min_valid: int, - use_attr: int, - use_weight: int, -) -> np.ndarray: - n = dim.shape[0] - out = np.empty(n, dtype=np.float32) - eps = 1e-12 - - for i in range(n): - w = window - if w < 1: - w = 1 - if w > i + 1: - w = i + 1 - lo = i + 1 - w - - wlen = i - lo + 1 - dim_buf = np.empty(wlen, dtype=np.float32) - attr_buf = np.empty(wlen, dtype=np.float32) - wgt_buf = np.empty(wlen, dtype=np.float32) - c = 0 - for j in range(lo, i + 1): - d = dim[j] - if d != d: - continue - if use_attr == 1: - a = attr[j] - if a != a: - continue - else: - a = 0.0 - if use_weight == 1: - wg = weight[j] - if wg != wg or wg <= 0.0: - continue - else: - wg = 1.0 - dim_buf[c] = d - attr_buf[c] = a - wgt_buf[c] = wg - c += 1 - - if c < min_valid: - out[i] = np.nan - continue - - if op == 3: - if c < 2: - out[i] = np.nan - continue - ranks = np.empty(c, dtype=np.float32) - _crowd_avg_rank(dim_buf, c, ranks) - denom = 0.0 - numer = 0.0 - for k in range(c): - denom += wgt_buf[k] - if denom <= eps: - out[i] = np.nan - continue - inv_nm1 = 1.0 / float(c - 1) - for k in range(c): - rn = (ranks[k] - 1.0) * inv_nm1 - numer += rn * attr_buf[k] * wgt_buf[k] - out[i] = numer / denom - continue - - if op == 2: - tmp = np.empty(c, dtype=np.float32) - for k in range(c): - tmp[k] = dim_buf[k] - tmp_sorted = np.sort(tmp) - thr = _crowd_sorted_quantile(tmp_sorted, c, split_q) - sum_h = 0.0 - cnt_h = 0 - sum_l = 0.0 - cnt_l = 0 - for k in range(c): - if dim_buf[k] >= thr: - sum_h += attr_buf[k] - cnt_h += 1 - else: - sum_l += attr_buf[k] - cnt_l += 1 - if cnt_h == 0 or cnt_l == 0: - out[i] = np.nan - else: - out[i] = (sum_h / float(cnt_h)) - (sum_l / float(cnt_l)) - continue - - in_tgt = np.empty(c, dtype=np.int8) - _crowd_mark_target( - dim_buf, - c, - in_tgt, - bucket_mode, - split_q, - side_high, - n_buckets, - bucket_idx0, - ) - - if op == 0: - sum_all = 0.0 - sum_tgt = 0.0 - for k in range(c): - sum_all += wgt_buf[k] - if in_tgt[k]: - sum_tgt += wgt_buf[k] - if sum_all <= eps or sum_tgt <= eps: - out[i] = np.nan - else: - out[i] = sum_tgt / sum_all - continue - - # mean_ratio - sum_all_a = 0.0 - sum_tgt_a = 0.0 - cnt_all = 0 - cnt_tgt = 0 - for k in range(c): - sum_all_a += attr_buf[k] - cnt_all += 1 - if in_tgt[k]: - sum_tgt_a += attr_buf[k] - cnt_tgt += 1 - if cnt_all == 0 or cnt_tgt == 0: - out[i] = np.nan - continue - mean_all = sum_all_a / float(cnt_all) - mean_tgt = sum_tgt_a / float(cnt_tgt) - if abs(mean_all) <= eps: - out[i] = np.nan - else: - out[i] = mean_tgt / mean_all - - return out - - -def crowd_op_id(name: str) -> int: - m = { - "share": 0, - "mean_ratio": 1, - "contrast": 2, - "rank_weighted": 3, - } - key = str(name).strip().lower() - if key not in m: - raise ValueError(f"unknown crowd op: {name!r}") - return m[key] - - -def crowd_side_id(side: str) -> int: - s = str(side).strip().lower() - if s == "high": - return 1 - if s == "low": - return 0 - raise ValueError(f"side must be 'high' or 'low', got {side!r}") - - -def roll_crowd_fixed( - dim: np.ndarray, - attr: np.ndarray, - weight: np.ndarray, - window: int, - op: str, - *, - bucket_mode: str = "quantile", - side: str = "high", - split: float = 0.5, - n_buckets: int = 2, - bucket_idx: int = 1, - min_valid: int = 0, - use_attr: bool = True, - use_weight: bool = True, - parallel: Optional[bool] = None, -) -> np.ndarray: - """滚动广义拥挤度统计(Numba;无前视)。""" - del parallel - w = int(window) - if w < 1: - raise ValueError("window must be >= 1") - op_id = crowd_op_id(op) - mv = int(min_valid) - if mv < 1: - mv = max(3, w // 4) - sq = float(split) - if not (0.0 < sq < 1.0): - raise ValueError("split must be in (0, 1)") - bm = 0 if str(bucket_mode).strip().lower() == "quantile" else 1 - nb = int(n_buckets) - bidx0 = int(bucket_idx) - 1 - if bm == 1: - if nb < 2: - raise ValueError("n_buckets must be >= 2") - if bidx0 < 0 or bidx0 >= nb: - raise ValueError("bucket_idx must be in [1, n_buckets]") - sh = crowd_side_id(side) - d = dim.astype(np.float32, copy=False) - a = attr.astype(np.float32, copy=False) if use_attr else np.zeros_like(d) - wg = weight.astype(np.float32, copy=False) if use_weight else np.ones_like(d) - if not (d.shape[0] == a.shape[0] == wg.shape[0]): - raise ValueError("dim, attr, weight must have the same length") - return _roll_crowd_numba( - d, - a, - wg, - w, - op_id, - bm, - sq, - sh, - nb, - bidx0, - mv, - 1 if use_attr else 0, - 1 if use_weight else 0, - ) diff --git a/seekalpha/dsl/core/chip_daily.py b/seekalpha/dsl/core/chip_daily.py deleted file mode 100644 index a11cd07c..00000000 --- a/seekalpha/dsl/core/chip_daily.py +++ /dev/null @@ -1,820 +0,0 @@ -"""日频筹码分布内核:uniform / cyq / triangular 三种构建方式。""" -from __future__ import annotations - -import numpy as np - -try: - from numba import njit -except ImportError: # pragma: no cover - def njit(*args, **kwargs): - def deco(fn): - return fn - return deco - -# method: 0=uniform, 1=cyq, 2=triangular -_CHIP_METHOD = { - "uniform": 0, - "cyq": 1, - "tri": 2, - "triangular": 2, -} - -# metric op: 0=peak_loc, 1=entropy, 2=com_w_gap, 3=mass_asym, 4=peak_sharpness -CHIP_OP = { - "peak_loc": 0, - "entropy": 1, - "com_w_gap": 2, - "mass_asym": 3, - "peak_sharpness": 4, -} - - -def chip_method_id(method: str) -> int: - k = str(method).strip().lower() - if k not in _CHIP_METHOD: - raise ValueError( - 'method must be "uniform", "cyq", or "tri" (alias: "triangular")' - ) - return _CHIP_METHOD[k] - - -def chip_wass_implementation_id(name: str) -> int: - k = str(name).strip().lower() - if k == "moment": - return 0 - if k in ("transport", "w1", "earth"): - return 1 - raise ValueError( - 'implementation must be "moment" or "transport" (aliases: "w1", "earth")' - ) - - -def chip_peak_sharpness_impl_id(name: str) -> int: - k = str(name).strip().lower() - if k in ("curvature", "curv", "s_curv"): - return 0 - if k in ("fwhm", "s_fwhm"): - return 1 - if k in ("combined", "sharp", "s_sharp"): - return 2 - raise ValueError('implementation must be "curvature", "fwhm", or "combined"') - - -def chip_bimodal_impl_id(name: str) -> int: - k = str(name).strip().lower() - if k in ("simple", "ratio"): - return 0 - if k in ("dip", "hartigan"): - return 1 - raise ValueError('implementation must be "simple" or "dip"') - - -@njit(cache=True) -def _chip_pmin_pmax(lo_arr: np.ndarray, hi_arr: np.ndarray, lo: int, hi: int, eps: float): - pmin = np.inf - pmax = -np.inf - for j in range(lo, hi + 1): - lj = lo_arr[j] - hj = hi_arr[j] - if lj == lj and hj == hj and hj >= lj: - if lj < pmin: - pmin = lj - if hj > pmax: - pmax = hj - ok = (pmax - pmin) > eps - return pmin, pmax, ok - - -@njit(cache=True) -def _chip_bin_index(price: float, pmin: float, bin_w: float, nbins: int) -> int: - b = int((price - pmin) / bin_w) - if b < 0: - b = 0 - if b >= nbins: - b = nbins - 1 - return b - - -@njit(cache=True) -def _chip_add_uniform_range( - q: np.ndarray, - weight: float, - low_p: float, - high_p: float, - pmin: float, - bin_w: float, - nbins: int, -) -> None: - if weight <= 0.0 or not (low_p == low_p) or not (high_p == high_p): - return - if high_p < low_p: - return - b_lo = _chip_bin_index(low_p, pmin, bin_w, nbins) - b_hi = _chip_bin_index(high_p, pmin, bin_w, nbins) - n_cov = b_hi - b_lo + 1 - if n_cov < 1: - return - per = weight / float(n_cov) - for b in range(b_lo, b_hi + 1): - q[b] += per - - -@njit(cache=True) -def _chip_add_triangular_range( - q: np.ndarray, - weight: float, - low_p: float, - high_p: float, - peak_p: float, - pmin: float, - bin_w: float, - nbins: int, - eps: float, -) -> None: - if weight <= 0.0 or not (low_p == low_p) or not (high_p == high_p) or not (peak_p == peak_p): - return - if high_p <= low_p + eps: - b = _chip_bin_index(peak_p, pmin, bin_w, nbins) - q[b] += weight - return - span = high_p - low_p - wsum = 0.0 - ws = np.empty(nbins, dtype=np.float64) - for k in range(nbins): - ws[k] = 0.0 - for k in range(nbins): - center = pmin + (k + 0.5) * bin_w - if center < low_p or center > high_p: - continue - if center <= peak_p: - denom = peak_p - low_p - if denom <= eps: - wk = 1.0 - else: - wk = (center - low_p) / denom - else: - denom = high_p - peak_p - if denom <= eps: - wk = 1.0 - else: - wk = (high_p - center) / denom - ws[k] = wk - wsum += wk - if wsum <= eps: - return - inv = weight / wsum - for k in range(nbins): - q[k] += ws[k] * inv - - -@njit(cache=True) -def _chip_turnover_rate(close_p: float, volume: float, cap: float, eps: float) -> float: - if not (close_p == close_p) or not (volume == volume) or not (cap == cap): - return 0.0 - if cap <= eps or volume <= 0.0: - return 0.0 - tr = (close_p * volume) / cap - if tr > 0.999: - tr = 0.999 - if tr < 0.0: - tr = 0.0 - return tr - - -@njit(cache=True) -def _chip_build_hist_window( - close: np.ndarray, - volume: np.ndarray, - low: np.ndarray, - high: np.ndarray, - aux: np.ndarray, - lo: int, - hi: int, - nbins: int, - method_id: int, - q: np.ndarray, - eps: float, -) -> tuple: - """构建窗口 [lo,hi] 归一化筹码直方图 q。返回 (pmin, bin_w, ok)。""" - pmin, pmax, ok = _chip_pmin_pmax(low, high, lo, hi, eps) - if not ok: - return pmin, 0.0, False - bin_w = (pmax - pmin) / float(nbins) - for k in range(nbins): - q[k] = 0.0 - - if method_id == 1: - for j in range(lo, hi + 1): - lj = low[j] - hj = high[j] - if not (lj == lj and hj == hj and hj >= lj): - continue - tr = _chip_turnover_rate(close[j], volume[j], aux[j], eps) - if tr <= 0.0: - continue - for k in range(nbins): - q[k] *= (1.0 - tr) - _chip_add_uniform_range(q, tr, lj, hj, pmin, bin_w, nbins) - elif method_id == 2: - for j in range(lo, hi + 1): - lj = low[j] - hj = high[j] - vj = volume[j] - pj = aux[j] - if not (lj == lj and hj == hj and vj == vj and pj == pj and vj > 0.0): - continue - if hj < lj: - continue - peak = pj - if peak < lj: - peak = lj - if peak > hj: - peak = hj - _chip_add_triangular_range(q, vj, lj, hj, peak, pmin, bin_w, nbins, eps) - else: - for j in range(lo, hi + 1): - lj = low[j] - hj = high[j] - vj = volume[j] - if not (lj == lj and hj == hj and vj == vj and vj > 0.0): - continue - if hj < lj: - continue - _chip_add_uniform_range(q, vj, lj, hj, pmin, bin_w, nbins) - - total = 0.0 - for k in range(nbins): - total += q[k] - if total <= eps: - return pmin, bin_w, False - inv = 1.0 / total - for k in range(nbins): - q[k] *= inv - return pmin, bin_w, True - - -@njit(cache=True) -def _chip_metric_from_q( - q: np.ndarray, - pmin: float, - bin_w: float, - nbins: int, - P: float, - op: int, - eps: float, -) -> float: - if op == 0: - istar = 0 - qmax = q[0] - for k in range(1, nbins): - if q[k] > qmax: - qmax = q[k] - istar = k - peak_price = pmin + (istar + 0.5) * bin_w - if abs(P) > eps: - return (peak_price - P) / P - return np.nan - if op == 1: - H = 0.0 - for k in range(nbins): - p = q[k] - if p > 0.0: - H -= p * np.log(p) - norm = np.log(float(nbins)) - if norm > 0.0: - return H / norm - return np.nan - if op == 2: - bar_p = 0.0 - for k in range(nbins): - center = pmin + (k + 0.5) * bin_w - bar_p += center * q[k] - if abs(P) > eps: - return (bar_p - P) / P - return np.nan - if op == 3: - pos = (P - pmin) / bin_w - if pos < 0.0: - pos = 0.0 - if pos > float(nbins): - pos = float(nbins) - bP = int(pos) - if bP >= nbins: - bP = nbins - 1 - frac_below = pos - float(bP) - if frac_below < 0.0: - frac_below = 0.0 - if frac_below > 1.0: - frac_below = 1.0 - below = 0.0 - for k in range(bP): - below += q[k] - below += q[bP] * frac_below - return below - (1.0 - below) - if op == 4: - istar = 0 - qmax = q[0] - for k in range(1, nbins): - if q[k] > qmax: - qmax = q[k] - istar = k - if istar == 0: - qL = q[istar + 1] - qR = q[istar + 1] - elif istar == nbins - 1: - qL = q[istar - 1] - qR = q[istar - 1] - else: - qL = q[istar - 1] - qR = q[istar + 1] - return (2.0 * qmax - qL - qR) / (qmax + eps) - return np.nan - - -@njit(cache=False) -def roll_chip_metric_daily_numba( - close: np.ndarray, - volume: np.ndarray, - low: np.ndarray, - high: np.ndarray, - aux: np.ndarray, - window: int, - nbins: int, - op: int, - method_id: int, -) -> np.ndarray: - n = close.shape[0] - out = np.empty(n, dtype=np.float32) - eps = 1e-12 - q = np.zeros(nbins, dtype=np.float32) - - for i in range(n): - w = window - if w < 1: - w = 1 - if w > i + 1: - w = i + 1 - lo = i + 1 - w - P = close[i] - if not (P == P): - out[i] = np.nan - continue - - pmin, bin_w, ok = _chip_build_hist_window( - close, volume, low, high, aux, lo, i, nbins, method_id, q, eps - ) - if not ok: - if op == 4: - out[i] = 1.0 - elif op in (0, 1, 2, 3): - out[i] = 0.0 - else: - out[i] = np.nan - continue - out[i] = _chip_metric_from_q(q, pmin, bin_w, nbins, P, op, eps) - - return out - - -@njit(cache=True) -def _chip_peak_curvature(q: np.ndarray, nbins: int, eps: float) -> float: - istar = 0 - qmax = q[0] - for k in range(1, nbins): - if q[k] > qmax: - qmax = q[k] - istar = k - if istar == 0: - qL = q[istar + 1] - qR = q[istar + 1] - elif istar == nbins - 1: - qL = q[istar - 1] - qR = q[istar - 1] - else: - qL = q[istar - 1] - qR = q[istar + 1] - return (2.0 * qmax - qL - qR) / (qmax + eps) - - -@njit(cache=True) -def _chip_peak_fwhm_width(q: np.ndarray, nbins: int, bin_w: float, eps: float) -> float: - istar = 0 - qmax = q[0] - for k in range(1, nbins): - if q[k] > qmax: - qmax = q[k] - istar = k - half = 0.5 * qmax - left = istar - while left > 0 and q[left - 1] >= half: - left -= 1 - right = istar - while right < nbins - 1 and q[right + 1] >= half: - right += 1 - return (float(right - left) + 1.0) * bin_w - - -@njit(cache=True) -def _chip_peak_fwhm(q: np.ndarray, nbins: int, bin_w: float, span: float, eps: float) -> float: - fwhm = _chip_peak_fwhm_width(q, nbins, bin_w, eps) - if span <= eps: - return 1.0 - val = fwhm / span - if val > 1.0: - val = 1.0 - if val < 0.0: - val = 0.0 - return val - - -@njit(cache=True) -def _chip_sigma_close_vol( - close: np.ndarray, volume: np.ndarray, lo: int, hi: int, eps: float -) -> float: - total = 0.0 - mean = 0.0 - for j in range(lo, hi + 1): - v = close[j] - m = volume[j] - if (v == v) and (m == m) and m > 0.0: - mean += v * m - total += m - if total <= 0.0: - return eps - mean /= total - var = 0.0 - for j in range(lo, hi + 1): - v = close[j] - m = volume[j] - if (v == v) and (m == m) and m > 0.0: - d = v - mean - var += m * d * d - var /= total - if var <= 0.0: - return eps - return np.sqrt(var) + eps - - -@njit(cache=True) -def _chip_hist_dip(q: np.ndarray, nbins: int) -> float: - csum = 0.0 - for k in range(nbins): - csum += q[k] - if csum <= 0.0: - return 0.0 - inv = 1.0 / csum - max_dev = 0.0 - c = 0.0 - for k in range(nbins): - c += q[k] * inv - uni = float(k + 1) / float(nbins) - dev = c - uni - if dev < 0.0: - dev = -dev - if dev > max_dev: - max_dev = dev - return max_dev - - -@njit(cache=False) -def roll_chip_peak_sharpness_daily_numba( - close: np.ndarray, - volume: np.ndarray, - low: np.ndarray, - high: np.ndarray, - aux: np.ndarray, - window: int, - nbins: int, - impl: int, - method_id: int, -) -> np.ndarray: - n = close.shape[0] - out = np.empty(n, dtype=np.float32) - eps = 1e-12 - q = np.zeros(nbins, dtype=np.float32) - - for i in range(n): - w = window - if w < 1: - w = 1 - if w > i + 1: - w = i + 1 - lo = i + 1 - w - if not (close[i] == close[i]): - out[i] = np.nan - continue - - pmin, bin_w, ok = _chip_build_hist_window( - close, volume, low, high, aux, lo, i, nbins, method_id, q, eps - ) - if not ok: - out[i] = 1.0 - continue - - pmax = pmin + bin_w * float(nbins) - span = pmax - pmin - s_curv = _chip_peak_curvature(q, nbins, eps) - if impl == 0: - out[i] = s_curv - elif impl == 1: - out[i] = _chip_peak_fwhm(q, nbins, bin_w, span, eps) - else: - fwhm_price = _chip_peak_fwhm_width(q, nbins, bin_w, eps) - sigma = _chip_sigma_close_vol(close, volume, lo, i, eps) - decay = np.exp(-fwhm_price / (4.0 * sigma)) - val = s_curv * decay - if val < 0.0: - val = 0.0 - out[i] = val - - return out - - -@njit(cache=False) -def roll_chip_bimodal_daily_numba( - close: np.ndarray, - volume: np.ndarray, - low: np.ndarray, - high: np.ndarray, - aux: np.ndarray, - window: int, - nbins: int, - impl: int, - method_id: int, - lambda_scale: float, -) -> np.ndarray: - n = close.shape[0] - out = np.empty(n, dtype=np.float32) - eps = 1e-12 - q = np.zeros(nbins, dtype=np.float32) - lam = lambda_scale - if lam <= eps: - lam = 1.0 - - for i in range(n): - w = window - if w < 1: - w = 1 - if w > i + 1: - w = i + 1 - lo = i + 1 - w - if not (close[i] == close[i]): - out[i] = np.nan - continue - - pmin, bin_w, ok = _chip_build_hist_window( - close, volume, low, high, aux, lo, i, nbins, method_id, q, eps - ) - if not ok: - out[i] = 0.0 - continue - - if impl == 1: - out[i] = _chip_hist_dip(q, nbins) - continue - - istar = 0 - qmax = q[0] - for k in range(1, nbins): - if q[k] > qmax: - qmax = q[k] - istar = k - p_star = pmin + (istar + 0.5) * bin_w - - istar2 = -1 - q2max = 0.0 - for k in range(nbins): - if k >= istar - 1 and k <= istar + 1: - continue - if q[k] > q2max: - q2max = q[k] - istar2 = k - - if istar2 < 0 or q2max <= eps: - out[i] = 0.0 - continue - - R_peak = q2max / (qmax + eps) - p_2 = pmin + (istar2 + 0.5) * bin_w - sigma = _chip_sigma_close_vol(close, volume, lo, i, eps) - D_peak = abs(p_star - p_2) / sigma - out[i] = R_peak * np.exp(-(D_peak * D_peak) / (2.0 * lam * lam)) - - return out - - -@njit(cache=False) -def roll_chip_wass_dist_daily_numba( - close: np.ndarray, - volume: np.ndarray, - low: np.ndarray, - high: np.ndarray, - aux: np.ndarray, - wa: np.ndarray, - wb: np.ndarray, - rho: np.ndarray, - nbins: int, - impl: int, - method_id: int, -) -> np.ndarray: - n = close.shape[0] - out = np.empty(n, dtype=np.float32) - eps = 1e-12 - q_curr = np.zeros(nbins, dtype=np.float32) - q_lag = np.zeros(nbins, dtype=np.float32) - - for i in range(n): - w_a = wa[i] - if w_a < 1: - w_a = 1 - if w_a > i + 1: - w_a = i + 1 - w_b = wb[i] - if w_b < 1: - w_b = 1 - rho_i = rho[i] - if rho_i < 0: - rho_i = 0 - - hi_a = i - lo_a = hi_a + 1 - w_a - hi_b = i - rho_i - if hi_b < 0: - out[i] = np.nan - continue - if w_b > hi_b + 1: - w_b = hi_b + 1 - lo_b = hi_b + 1 - w_b - if lo_a < 0 or lo_b < 0: - out[i] = np.nan - continue - - P = close[i] - if not (P == P): - out[i] = np.nan - continue - - pmin_a, pmax_a, ok_a = _chip_pmin_pmax(low, high, lo_a, hi_a, eps) - pmin_b, pmax_b, ok_b = _chip_pmin_pmax(low, high, lo_b, hi_b, eps) - if not ok_a or not ok_b: - out[i] = np.nan - continue - - pmin = pmin_a - if pmin_b < pmin: - pmin = pmin_b - pmax = pmax_a - if pmax_b > pmax: - pmax = pmax_b - bin_w = (pmax - pmin) / float(nbins) - if bin_w <= eps: - out[i] = np.nan - continue - span = pmax - pmin - - _, _, ok_ca = _chip_build_hist_window( - close, volume, low, high, aux, lo_a, hi_a, nbins, method_id, q_curr, eps - ) - _, _, ok_cb = _chip_build_hist_window( - close, volume, low, high, aux, lo_b, hi_b, nbins, method_id, q_lag, eps - ) - if not ok_ca or not ok_cb: - out[i] = np.nan - continue - - if impl == 0: - mean_a = 0.0 - mean_b = 0.0 - for k in range(nbins): - ck = pmin + (k + 0.5) * bin_w - mean_a += ck * q_curr[k] - mean_b += ck * q_lag[k] - sig = (mean_a - mean_b) / (abs(P) + eps) - if sig > 1.0: - sig = 1.0 - elif sig < -1.0: - sig = -1.0 - out[i] = sig - else: - partial = 0.0 - raw = 0.0 - for k in range(nbins - 1): - partial += q_curr[k] - q_lag[k] - raw += abs(partial) - raw *= bin_w - val = raw / (span + eps) - if val > 1.0: - val = 1.0 - elif val < 0.0: - val = 0.0 - out[i] = val - - return out - - -def roll_chip_metric_daily( - close: np.ndarray, - volume: np.ndarray, - low: np.ndarray, - high: np.ndarray, - aux: np.ndarray, - window: int, - nbins: int, - op: str, - method: str, -) -> np.ndarray: - op_id = CHIP_OP.get(op) - if op_id is None: - raise ValueError(f"Unknown chip op: {op}") - mid = chip_method_id(method) - arrays = [ - close.astype(np.float32, copy=False), - volume.astype(np.float32, copy=False), - low.astype(np.float32, copy=False), - high.astype(np.float32, copy=False), - aux.astype(np.float32, copy=False), - ] - n = arrays[0].shape[0] - for a in arrays[1:]: - if a.shape[0] != n: - raise ValueError("chip daily arrays must have the same length") - return roll_chip_metric_daily_numba( - arrays[0], arrays[1], arrays[2], arrays[3], arrays[4], - int(window), int(nbins), int(op_id), int(mid), - ) - - -def roll_chip_peak_sharpness_daily( - close: np.ndarray, - volume: np.ndarray, - low: np.ndarray, - high: np.ndarray, - aux: np.ndarray, - window: int, - nbins: int, - implementation: str, - method: str, -) -> np.ndarray: - impl = chip_peak_sharpness_impl_id(implementation) - mid = chip_method_id(method) - return roll_chip_peak_sharpness_daily_numba( - close.astype(np.float32, copy=False), - volume.astype(np.float32, copy=False), - low.astype(np.float32, copy=False), - high.astype(np.float32, copy=False), - aux.astype(np.float32, copy=False), - int(window), int(nbins), int(impl), int(mid), - ) - - -def roll_chip_bimodal_daily( - close: np.ndarray, - volume: np.ndarray, - low: np.ndarray, - high: np.ndarray, - aux: np.ndarray, - window: int, - nbins: int, - implementation: str, - method: str, - *, - lambda_scale: float = 1.0, -) -> np.ndarray: - impl = chip_bimodal_impl_id(implementation) - mid = chip_method_id(method) - return roll_chip_bimodal_daily_numba( - close.astype(np.float32, copy=False), - volume.astype(np.float32, copy=False), - low.astype(np.float32, copy=False), - high.astype(np.float32, copy=False), - aux.astype(np.float32, copy=False), - int(window), int(nbins), int(impl), int(mid), float(lambda_scale), - ) - - -def roll_chip_wass_dist_daily( - close: np.ndarray, - volume: np.ndarray, - low: np.ndarray, - high: np.ndarray, - aux: np.ndarray, - wa: np.ndarray, - wb: np.ndarray, - rho: np.ndarray, - nbins: int, - implementation: str, - method: str, -) -> np.ndarray: - impl = chip_wass_implementation_id(implementation) - mid = chip_method_id(method) - return roll_chip_wass_dist_daily_numba( - close.astype(np.float32, copy=False), - volume.astype(np.float32, copy=False), - low.astype(np.float32, copy=False), - high.astype(np.float32, copy=False), - aux.astype(np.float32, copy=False), - wa.astype(np.int64, copy=False), - wb.astype(np.int64, copy=False), - rho.astype(np.int64, copy=False), - int(nbins), int(impl), int(mid), - ) diff --git a/seekalpha/dsl/core/dyn_window.py b/seekalpha/dsl/core/dyn_window.py deleted file mode 100644 index ea83bb4d..00000000 --- a/seekalpha/dsl/core/dyn_window.py +++ /dev/null @@ -1,545 +0,0 @@ -"""动态窗滚动、滞后与 ARG 极值加速:按品种分组算时间序列 → ``get_indexer`` 写回 → 可选 Numba(njit/prange),否则 NumPy。""" -from __future__ import annotations - -import os -from typing import Callable, Optional - -import numpy as np -import pandas as pd - -from .accel import ( - _CXX_ROLL_FIXED_MAX_OP, - _use_cxx_backend, -) - -try: - from numba import njit, prange - - _HAS_NUMBA = True -except ImportError: - _HAS_NUMBA = False - prange = range # type: ignore[misc, assignment] - - def njit(*args, **kwargs): - def _wrap(f): - return f - - return _wrap if not args else args[0] - - -def _use_cxx_for_dynamic_kernels() -> bool: - """与 accel_backend 相同策略:可用且未强制 numba 时用 C++。""" - return _use_cxx_backend() and _accel_backend() is not None - - -def _accel_backend(): - from . import accel - - return accel._fam_accel - - -def _numba_parallel_mode() -> Optional[bool]: - """None = 按行数阈值自动;False = 强制串行内核;True = 强制并行内核。""" - v = os.environ.get("FUTURE_ALPHA_MINER_NUMBA_PARALLEL", "").strip().lower() - if v in ("", "auto"): - return None - if v in ("0", "false", "no", "off"): - return False - if v in ("1", "true", "yes", "on", "force"): - return True - return None - - -def _numba_parallel_min_rows() -> int: - s = os.environ.get("FUTURE_ALPHA_MINER_NUMBA_PARALLEL_MIN_ROWS", "").strip() - if not s: - return 4096 - try: - return max(1, int(s)) - except ValueError: - return 4096 - - -def _use_numba_parallel_for_length(n: int) -> bool: - if not _HAS_NUMBA or n < 1: - return False - mode = _numba_parallel_mode() - if mode is False: - return False - if mode is True: - return True - return n >= _numba_parallel_min_rows() - - -# --- Numba:变长窗口 min / max / sum / mean --------------------------------- - - -@njit(cache=True) -def _roll_dyn_mm_sm_numba(vals: np.ndarray, wvals: np.ndarray, op: int) -> np.ndarray: - """op: 0=min, 1=max, 2=sum, 3=mean""" - n = vals.shape[0] - out = np.empty(n, dtype=np.float32) - for i in range(n): - wi = wvals[i] - if wi < 1: - wi = 1 - if wi > i + 1: - wi = i + 1 - lo = i + 1 - wi - if op == 0: - m = np.inf - for j in range(lo, i + 1): - v = vals[j] - if v == v: - if v < m: - m = v - out[i] = m if m != np.inf else np.nan - elif op == 1: - m = -np.inf - for j in range(lo, i + 1): - v = vals[j] - if v == v: - if v > m: - m = v - out[i] = m if m != -np.inf else np.nan - elif op == 2: - s = 0.0 - c = 0 - for j in range(lo, i + 1): - v = vals[j] - if v == v: - s += v - c += 1 - out[i] = s if c > 0 else np.nan - else: - s = 0.0 - c = 0 - for j in range(lo, i + 1): - v = vals[j] - if v == v: - s += v - c += 1 - out[i] = (s / c) if c > 0 else np.nan - return out - - -if _HAS_NUMBA: - - @njit(cache=True, parallel=True) - def _roll_dyn_mm_sm_numba_parallel(vals: np.ndarray, wvals: np.ndarray, op: int) -> np.ndarray: - """与 _roll_dyn_mm_sm_numba 相同,外层 ``i`` 用 ``prange`` 并行。""" - n = vals.shape[0] - out = np.empty(n, dtype=np.float32) - for i in prange(n): - wi = wvals[i] - if wi < 1: - wi = 1 - if wi > i + 1: - wi = i + 1 - lo = i + 1 - wi - if op == 0: - m = np.inf - for j in range(lo, i + 1): - v = vals[j] - if v == v: - if v < m: - m = v - out[i] = m if m != np.inf else np.nan - elif op == 1: - m = -np.inf - for j in range(lo, i + 1): - v = vals[j] - if v == v: - if v > m: - m = v - out[i] = m if m != -np.inf else np.nan - elif op == 2: - s = 0.0 - c = 0 - for j in range(lo, i + 1): - v = vals[j] - if v == v: - s += v - c += 1 - out[i] = s if c > 0 else np.nan - else: - s = 0.0 - c = 0 - for j in range(lo, i + 1): - v = vals[j] - if v == v: - s += v - c += 1 - out[i] = (s / c) if c > 0 else np.nan - return out - - -@njit(cache=True) -def _delay_dyn_numba(vals: np.ndarray, lags: np.ndarray) -> np.ndarray: - n = vals.shape[0] - out = np.empty(n, dtype=np.float32) - for i in range(n): - k = lags[i] - if k < 0: - out[i] = np.nan - continue - j = i - k - if j >= 0: - out[i] = vals[j] - else: - out[i] = np.nan - return out - - -if _HAS_NUMBA: - - @njit(cache=True, parallel=True) - def _delay_dyn_numba_parallel(vals: np.ndarray, lags: np.ndarray) -> np.ndarray: - n = vals.shape[0] - out = np.empty(n, dtype=np.float32) - for i in prange(n): - k = lags[i] - if k < 0: - out[i] = np.nan - continue - j = i - k - if j >= 0: - out[i] = vals[j] - else: - out[i] = np.nan - return out - - -@njit(cache=True) -def _arg_extreme_dyn_numba(vals: np.ndarray, wvals: np.ndarray, want_max: int) -> np.ndarray: - n = vals.shape[0] - out = np.empty(n, dtype=np.float32) - for i in range(n): - wi = wvals[i] - if wi < 1: - wi = 1 - if wi > i + 1: - wi = i + 1 - lo = i + 1 - wi - if want_max: - best_j = lo - best_v = vals[lo] - for j in range(lo + 1, i + 1): - v = vals[j] - if v == v and (not (best_v == best_v) or v > best_v): - best_v = v - best_j = j - if best_v == best_v: - out[i] = float((i - best_j)) - else: - out[i] = np.nan - else: - best_j = lo - best_v = vals[lo] - for j in range(lo + 1, i + 1): - v = vals[j] - if v == v and (not (best_v == best_v) or v < best_v): - best_v = v - best_j = j - if best_v == best_v: - out[i] = float((i - best_j)) - else: - out[i] = np.nan - return out - - -if _HAS_NUMBA: - - @njit(cache=True, parallel=True) - def _arg_extreme_dyn_numba_parallel( - vals: np.ndarray, wvals: np.ndarray, want_max: int - ) -> np.ndarray: - n = vals.shape[0] - out = np.empty(n, dtype=np.float32) - for i in prange(n): - wi = wvals[i] - if wi < 1: - wi = 1 - if wi > i + 1: - wi = i + 1 - lo = i + 1 - wi - if want_max: - best_j = lo - best_v = vals[lo] - for j in range(lo + 1, i + 1): - v = vals[j] - if v == v and (not (best_v == best_v) or v > best_v): - best_v = v - best_j = j - if best_v == best_v: - out[i] = float((i - best_j)) - else: - out[i] = np.nan - else: - best_j = lo - best_v = vals[lo] - for j in range(lo + 1, i + 1): - v = vals[j] - if v == v and (not (best_v == best_v) or v < best_v): - best_v = v - best_j = j - if best_v == best_v: - out[i] = float((i - best_j)) - else: - out[i] = np.nan - return out - - -# 与 ``cpp/fam_accel.cpp`` 中 ``dyn_op_to_roll_fixed_op`` 一致:0=min … 8=skew, 9=kurt, 10=prod -_DYN_OP_MAP = { - "min": 0, - "max": 1, - "sum": 2, - "mean": 3, - "std": 4, - "var": 5, - "median": 6, - "rank_pct": 7, - "skew": 8, - "kurt": 9, - "prod": 10, -} - -# Numba 仅实现前四种;其余在无 C++ 时用 ``_roll_segment_py`` -_DYN_NUMBA_KINDS = frozenset({"min", "max", "sum", "mean"}) - -# 兼容旧测试:仅含与 Numba 参考一致的四种动态 op -_OP_MAP = {"min": 0, "max": 1, "sum": 2, "mean": 3} - - -def _delay_segment_py(vals: np.ndarray, lags: np.ndarray) -> np.ndarray: - """逐元素动态滞后(与 ``shift_dynamic`` 无 Numba 分支同语义);供测试与 C++ 对照。""" - n = len(vals) - o = np.empty(n, dtype=np.float32) - for i in range(n): - k = int(lags[i]) - if k < 0: - o[i] = np.nan - continue - j = i - k - o[i] = vals[j] if j >= 0 else np.nan - return o - - -def _roll_segment_py(vals: np.ndarray, wvals: np.ndarray, kind: str, ddof: int) -> np.ndarray: - n = len(vals) - out = np.empty(n, dtype=np.float32) - for i in range(n): - wi = int(wvals[i]) - wi = max(1, min(wi, i + 1)) - lo = i + 1 - wi - sl = vals[lo : i + 1] - if kind == "min": - out[i] = float(np.nanmin(sl)) - elif kind == "max": - out[i] = float(np.nanmax(sl)) - elif kind == "sum": - out[i] = float(np.nansum(sl)) - elif kind == "mean": - out[i] = float(np.nanmean(sl)) - elif kind == "std": - out[i] = float(np.nanstd(sl, ddof=ddof)) if sl.size > 1 else 0.0 - elif kind == "var": - out[i] = float(np.nanvar(sl, ddof=ddof)) - elif kind == "median": - out[i] = float(np.nanmedian(sl)) - elif kind == "rank_pct": - out[i] = float(pd.Series(sl).rank(pct=True).iloc[-1]) - elif kind == "skew": - out[i] = float(pd.Series(sl).skew()) if len(sl) > 2 else np.nan - elif kind == "kurt": - out[i] = float(pd.Series(sl).kurt()) if len(sl) > 3 else np.nan - elif kind == "prod": - p = 1.0 - for v in sl: - if v == v: - p *= float(v) - out[i] = float(p) - else: - raise ValueError(kind) - return out - - -def rolling_dynamic( - df: pd.DataFrame, - win_df: pd.DataFrame, - wvals_fn: Callable[[pd.DataFrame, pd.Index], np.ndarray], - kind: str, - *, - ddof: int = 1, -) -> pd.DataFrame: - """动态滚动聚合;``wvals_fn`` 与 function_registry._dynamic_window_int_series 同类。""" - result = np.full(len(df), np.nan, dtype=np.float32) - use_numba = _HAS_NUMBA and kind in _DYN_NUMBA_KINDS - - for _, sub in df.groupby(level="instrument", sort=False): - sub = sub.sort_index(level="datetime") - wsub = win_df.reindex(sub.index) - wvals = wvals_fn(wsub, sub.index).astype(np.int64, copy=False) - vals = sub.iloc[:, 0].to_numpy(dtype=np.float32, copy=False) - pos = df.index.get_indexer(sub.index) - op_map_val = _DYN_OP_MAP.get(kind) - # 旧 C++ 扩展的 dyn_op_to_roll_fixed_op 仅支持到 _CXX_ROLL_FIXED_MAX_OP; - # 超出范围(例如 kurt=9)时回退,避免版本不匹配抛异常。 - cxx_supports = ( - _use_cxx_for_dynamic_kernels() - and op_map_val is not None - and op_map_val <= _CXX_ROLL_FIXED_MAX_OP - ) - if cxx_supports: - par = _use_numba_parallel_for_length(len(vals)) - o = np.asarray( - _accel_backend().roll_dyn_mm_sm(vals, wvals, op_map_val, par, ddof), dtype=np.float32 - ).reshape(-1) - elif use_numba: - op = _DYN_OP_MAP[kind] - if _use_numba_parallel_for_length(len(vals)): - o = _roll_dyn_mm_sm_numba_parallel(vals, wvals, op) - else: - o = _roll_dyn_mm_sm_numba(vals, wvals, op) - else: - o = _roll_segment_py(vals, wvals, kind, ddof) - result[pos] = o - - return pd.DataFrame(result, index=df.index, columns=df.columns[:1]) - - -def shift_dynamic( - df: pd.DataFrame, - lag_df: pd.DataFrame, - lags_fn: Callable[[pd.DataFrame, pd.Index], np.ndarray], -) -> pd.DataFrame: - """动态滞后;``lags_fn`` 应产生非负整数(含 0)。""" - result = np.full(len(df), np.nan, dtype=np.float32) - - for _, sub in df.groupby(level="instrument", sort=False): - sub = sub.sort_index(level="datetime") - lsub = lag_df.reindex(sub.index) - lags = lags_fn(lsub, sub.index).astype(np.int64, copy=False) - vals = sub.iloc[:, 0].to_numpy(dtype=np.float32, copy=False) - pos = df.index.get_indexer(sub.index) - if _use_cxx_for_dynamic_kernels(): - par = _use_numba_parallel_for_length(len(vals)) - o = np.asarray(_accel_backend().delay_dyn(vals, lags, par), dtype=np.float32).reshape( - -1 - ) - elif _HAS_NUMBA: - if _use_numba_parallel_for_length(len(vals)): - o = _delay_dyn_numba_parallel(vals, lags) - else: - o = _delay_dyn_numba(vals, lags) - else: - n = len(vals) - o = np.empty(n, dtype=np.float32) - for i in range(n): - k = int(lags[i]) - if k < 0: - o[i] = np.nan - continue - j = i - k - o[i] = vals[j] if j >= 0 else np.nan - result[pos] = o - - return pd.DataFrame(result, index=df.index, columns=df.columns[:1]) - - -def arg_extreme_dynamic( - df: pd.DataFrame, - win_df: pd.DataFrame, - wvals_fn: Callable[[pd.DataFrame, pd.Index], np.ndarray], - want_max: bool, -) -> pd.DataFrame: - result = np.full(len(df), np.nan, dtype=np.float32) - wm = 1 if want_max else 0 - - for _, sub in df.groupby(level="instrument", sort=False): - sub = sub.sort_index(level="datetime") - wsub = win_df.reindex(sub.index) - wvals = wvals_fn(wsub, sub.index).astype(np.int64, copy=False) - vals = sub.iloc[:, 0].to_numpy(dtype=np.float32, copy=False) - pos = df.index.get_indexer(sub.index) - if _use_cxx_for_dynamic_kernels(): - par = _use_numba_parallel_for_length(len(vals)) - o = np.asarray( - _accel_backend().arg_extreme_dyn(vals, wvals, wm, par), dtype=np.float32 - ).reshape(-1) - elif _HAS_NUMBA: - if _use_numba_parallel_for_length(len(vals)): - o = _arg_extreme_dyn_numba_parallel(vals, wvals, wm) - else: - o = _arg_extreme_dyn_numba(vals, wvals, wm) - else: - n = len(vals) - o = np.empty(n, dtype=np.float32) - for i in range(n): - wi = int(wvals[i]) - wi = max(1, min(wi, i + 1)) - sl = vals[i - wi + 1 : i + 1] - if want_max: - j = int(np.nanargmax(sl)) - else: - j = int(np.nanargmin(sl)) - o[i] = float(len(sl) - 1 - j) - result[pos] = o - - return pd.DataFrame(result, index=df.index, columns=df.columns[:1]) - - -def arg_extreme_fixed(df: pd.DataFrame, window: int, want_max: bool) -> pd.DataFrame: - """固定窗 TS_ARGMAX / TS_ARGMIN:与 ``rolling(W).apply(nanargmax)`` 语义一致,走 Numba 而非 pandas apply。 - - 窗宽第 i 根为 ``min(W, i+1)``,与 ``rolling(W, min_periods=1)`` 一致。 - """ - W = max(1, int(window)) - wm = 1 if want_max else 0 - result = np.full(len(df), np.nan, dtype=np.float32) - - for _, sub in df.groupby(level="instrument", sort=False): - sub = sub.sort_index(level="datetime") - vals = sub.iloc[:, 0].to_numpy(dtype=np.float32, copy=False) - n = len(vals) - wvals = np.minimum(W, np.arange(1, n + 1, dtype=np.int64)) - pos = df.index.get_indexer(sub.index) - if _use_cxx_for_dynamic_kernels(): - par = _use_numba_parallel_for_length(n) - o = np.asarray( - _accel_backend().arg_extreme_dyn(vals, wvals, wm, par), dtype=np.float32 - ).reshape(-1) - elif _HAS_NUMBA: - if _use_numba_parallel_for_length(n): - o = _arg_extreme_dyn_numba_parallel(vals, wvals, wm) - else: - o = _arg_extreme_dyn_numba(vals, wvals, wm) - else: - o = np.empty(n, dtype=np.float32) - for i in range(n): - wi = int(wvals[i]) - wi = max(1, min(wi, i + 1)) - sl = vals[i - wi + 1 : i + 1] - if want_max: - j = int(np.nanargmax(sl)) - else: - j = int(np.nanargmin(sl)) - o[i] = float(len(sl) - 1 - j) - result[pos] = o - - return pd.DataFrame(result, index=df.index, columns=df.columns[:1]) - - -def numba_available() -> bool: - return _HAS_NUMBA - - -def numba_parallel_config() -> dict: - """当前 Numba 并行策略(便于排查性能)。""" - return { - "has_numba": _HAS_NUMBA, - "parallel_mode": _numba_parallel_mode(), - "parallel_min_rows_default": 4096, - "parallel_min_rows_effective": _numba_parallel_min_rows(), - } diff --git a/seekalpha/dsl/core/errors.py b/seekalpha/dsl/core/errors.py deleted file mode 100644 index c7f848ac..00000000 --- a/seekalpha/dsl/core/errors.py +++ /dev/null @@ -1,51 +0,0 @@ -"""多行因子求值专用异常:MultiLineFactorEvalError 在解析、符号绑定、执行、求值各阶段携带 phase、生成代码片段、用户源行号与 traceback,供上层统一展示或记录。""" -from __future__ import annotations - -from typing import List, Optional - - -class MultiLineFactorEvalError(Exception): - """parse / symbol / exec / eval 失败时携带生成代码行号等信息。""" - - def __init__( - self, - message: str, - *, - phase: str, - problem: Optional[str] = None, - exception_type: Optional[str] = None, - generated_code: Optional[str] = None, - generated_line_no: Optional[int] = None, - generated_line_text: Optional[str] = None, - user_source: Optional[str] = None, - user_line_no: Optional[int] = None, - user_line_text: Optional[str] = None, - traceback_text: Optional[str] = None, - ): - super().__init__(message) - self.phase = phase - self.problem = problem - self.exception_type = exception_type - self.generated_code = generated_code - self.generated_line_no = generated_line_no - self.generated_line_text = generated_line_text - self.user_source = user_source - self.user_line_no = user_line_no - self.user_line_text = user_line_text - self.traceback_text = traceback_text - - def __str__(self) -> str: - head = self.args[0] if self.args else "MultiLineFactorEvalError" - if not (self.generated_code and str(self.generated_code).strip()): - return head - err_line = self.generated_line_no - body: List[str] = [] - for i, ln in enumerate(self.generated_code.strip().split("\n"), start=1): - mark = "--> " if err_line is not None and i == err_line else " " - body.append(f"{mark}{i:4d} | {ln}") - return head + "\n" + "\n".join(body) - - -if __name__ == "__main__": - e = MultiLineFactorEvalError("demo", phase="parse", problem="示例") - print(e) diff --git a/seekalpha/dsl/core/intervals.py b/seekalpha/dsl/core/intervals.py deleted file mode 100644 index fc7bfbfb..00000000 --- a/seekalpha/dsl/core/intervals.py +++ /dev/null @@ -1,38 +0,0 @@ -"""周期归一化(混频 resample / evaluator 依赖)。""" -from __future__ import annotations - -import re - -_INTERVAL_RE = re.compile(r"^\s*(\d+)\s*([mhd])(?:in)?\s*$", re.IGNORECASE) - - -def normalize_bar_interval(value: str | int) -> str: - """把 ``5`` / ``5min`` / ``5m`` / ``1H`` 归一为 ``5m`` / ``1h``。""" - if isinstance(value, int): - if value <= 0: - raise ValueError(f"bar_interval 必须为正整数,收到: {value!r}") - return f"{value}m" - s = str(value).strip().lower() - if not s: - raise ValueError("bar_interval 不能为空") - m = _INTERVAL_RE.match(s) - if m is None: - raise ValueError(f"不支持的 bar_interval: {value!r}") - num = int(m.group(1)) - unit = m.group(2).lower() - return f"{num}{unit}" - - -def bar_interval_to_minutes(value: str | int) -> int: - tag = normalize_bar_interval(value) - m = _INTERVAL_RE.match(tag) - assert m is not None - num = int(m.group(1)) - unit = m.group(2).lower() - if unit == "m": - return num - if unit == "h": - return num * 60 - if unit == "d": - return num * 24 * 60 - raise ValueError(f"不支持的 bar_interval: {value!r}") diff --git a/seekalpha/dsl/core/operators.py b/seekalpha/dsl/core/operators.py deleted file mode 100644 index 36e37bf3..00000000 --- a/seekalpha/dsl/core/operators.py +++ /dev/null @@ -1,2480 +0,0 @@ -"""股票 DSL 算子:时序算子 per instrument、截面算子 per datetime;Window 为整数固定窗或单列 DataFrame 动态窗/滞后。""" -from __future__ import annotations - -from typing import Any, Callable, Union - -import numpy as np -import pandas as pd -from joblib import Parallel, delayed - -from .dyn_window import ( - arg_extreme_dynamic, - arg_extreme_fixed, - rolling_dynamic, - shift_dynamic, -) -from . import accel as _accel -from . import chip_daily as _chip_daily -from .ops_kit import ( - Window, - as_int_window as _as_int_window, - dynamic_window_int_series as _dynamic_window_int_series, - first_series as _first_series, - gb_instrument as _gb_instrument, - is_dynamic_window as _is_dynamic_window, - lag_int_series as _lag_int_series, - out_frame as _out_frame, - per_datetime_transform as _per_datetime_transform, - per_instrument_bivariate as _ts_bivariate_event_accel, - per_instrument_unary as _ts_unary_accel, - series_from_group as _series_from_group, -) - -# TS_CROSS_* 第二 operand 可为与之对齐的面板单列,或与 ADD 二元算子一致的 Python / NumPy 标量(按 x 索引广播) -TsCrossOperand = Union[pd.DataFrame, int, float, np.integer, np.floating] - - -def _as_ts_cross_y_panel(x: pd.DataFrame, y: TsCrossOperand) -> pd.DataFrame: - if isinstance(y, pd.DataFrame): - return y - col = x.columns[:1] - try: - v = float(y) - except (TypeError, ValueError) as e: - raise TypeError( - "TS_CROSS_ABOVE/BELOW 的 y 须为单列 DataFrame 或与 rank 对齐可 float() 的标量" - ) from e - return pd.DataFrame(v, index=x.index, columns=col) - - -def _chip_wass_win_series(w: Window, index: pd.Index) -> np.ndarray: - """筹码窗长:整数或单列动态窗 → 每 bar 整数长度 ≥1。""" - if _is_dynamic_window(w): - return _dynamic_window_int_series(w.reindex(index), index) - return np.full(len(index), max(1, int(w)), dtype=np.int64) - - -def _chip_wass_rho_series(rho_w: Window, index: pd.Index) -> np.ndarray: - """参照窗右端偏移 ρ(≥0):整数滞后或动态单列面板。""" - if _is_dynamic_window(rho_w): - return _lag_int_series(rho_w.reindex(index), index) - return np.full(len(index), max(0, int(rho_w)), dtype=np.int64) - - -def _ts_agg_fixed_accel(df: pd.DataFrame, window: int, kind: str, ddof: int = 1) -> pd.DataFrame: - """固定窗滚动聚合,使用 C++ 或 Numba 加速后端。 - - Args: - kind: "mean", "std", "sum", "min", "max", "rank_pct", "median", "var", "skew", "kurt", "prod" - """ - kind_map = { - "mean": "mean", - "std": "std", - "sum": "sum", - "min": "min", - "max": "max", - "rank_pct": "rank_pct", - "median": "median", - "var": "var", - "skew": "skew", - "kurt": "kurt", - "prod": "prod", - } - accel_kind = kind_map.get(kind) - if accel_kind is None: - raise ValueError(f"Unknown kind: {kind}") - - def _roll_accelerated(s: pd.Series) -> pd.Series: - vals = s.to_numpy(dtype=float, copy=False) - out = _accel.roll_fixed(vals, window, accel_kind, ddof=ddof) - return pd.Series(out, index=s.index) - - ser = _gb_instrument(df).transform(lambda x: _roll_accelerated(_series_from_group(x))) - return _out_frame(ser, df) - - -def _ts_agg( - df: pd.DataFrame, - window: Window, - agg_fixed: Callable[[int], pd.DataFrame], - *, - dyn_kind: str, - ddof: int = 1, -) -> pd.DataFrame: - if _is_dynamic_window(window): - return rolling_dynamic(df, window, _dynamic_window_int_series, dyn_kind, ddof=ddof) - # Use accelerated backend for fixed windows - return _ts_agg_fixed_accel(df, _as_int_window(window), dyn_kind, ddof=ddof) - - -def _shift_fixed(df: pd.DataFrame, periods: int) -> pd.DataFrame: - p = int(periods) - if p < 0: - raise ValueError("DELAY 的周期数不能为负") - - def _shift_accelerated(s: pd.Series) -> pd.Series: - vals = s.to_numpy(dtype=float, copy=False) - out = _accel.shift_fixed(vals, p) - return pd.Series(out, index=s.index) - - return _gb_instrument(df).transform(lambda x: _shift_accelerated(_series_from_group(x))) - - -def _shift_dynamic(df: pd.DataFrame, lag_df: pd.DataFrame) -> pd.DataFrame: - """每行延迟 lag[i] 根(同 instrument 内整数位移;按时间序而非展平下标)。""" - return shift_dynamic(df, lag_df, _lag_int_series) - - -def _binary_op_panel_df(df1: pd.DataFrame, df2: pd.DataFrame, ufunc) -> pd.DataFrame: - if ( - df1.shape == df2.shape - and df1.index.equals(df2.index) - and df1.shape[1] == 1 - and df2.shape[1] == 1 - and list(df1.columns) != list(df2.columns) - ): - out = ufunc( - df1.iloc[:, 0].to_numpy(dtype=float, copy=False), - df2.iloc[:, 0].to_numpy(dtype=float, copy=False), - ) - return pd.DataFrame(out, index=df1.index, columns=df1.columns[:1]) - return ufunc(df1, df2) - - -def _is_ufunc_broadcast_scalar(x: object) -> bool: - """二元逐元素算子:与面板混算时允许的标量(含 numpy 标量,不含 str / 非标量数组)。""" - if x is None: - return False - if isinstance(x, (str, bytes, pd.DataFrame, pd.Series)): - return False - if isinstance(x, (bool, int, float, np.integer, np.floating, np.bool_)): - return True - if isinstance(x, np.ndarray) and x.shape == (): - return True - return bool(np.isscalar(x)) - - -def _broadcast_ufunc_panel_scalar( - df: pd.DataFrame, - scalar: object, - ufunc, - *, - scalar_left: bool, -) -> pd.DataFrame: - vals = df.to_numpy(dtype=float, copy=False) - s = float(np.asarray(scalar, dtype=np.float32)) - res = ufunc(s, vals) if scalar_left else ufunc(vals, s) - return pd.DataFrame(res, index=df.index, columns=df.columns) - - -def _binary_op_panel_mixed(a: object, b: object, ufunc) -> pd.DataFrame: - """与 ``ADD`` 类似:双面板走 ``_binary_op_panel_df``;任一侧为标量则向面板广播。""" - if isinstance(a, pd.DataFrame) and isinstance(b, pd.DataFrame): - return _binary_op_panel_df(a, b, ufunc) - if isinstance(a, pd.DataFrame) and _is_ufunc_broadcast_scalar(b): - return _broadcast_ufunc_panel_scalar(a, b, ufunc, scalar_left=False) - if isinstance(b, pd.DataFrame) and _is_ufunc_broadcast_scalar(a): - return _broadcast_ufunc_panel_scalar(b, a, ufunc, scalar_left=True) - out = ufunc(a, b) - if not isinstance(out, pd.DataFrame): - raise TypeError( - "二元算子需要至少一侧为 DataFrame 或双方为可 ufunc 的标量数组,得到 %s" % type(out).__name__ - ) - return out - - -# ----------------------------------------------------------------------------- -# 基础时序 -# ----------------------------------------------------------------------------- - - -def DELTA(df: pd.DataFrame, p: int = 1) -> pd.DataFrame: - """同品种差分 diff(p);df 单列面板,p 为正整数步长。 - 优先使用 C++ 加速,否则回退到 pandas。""" - p = int(p) - - def _delta_accelerated(s: pd.Series) -> pd.Series: - vals = s.to_numpy(dtype=float, copy=False) - out = _accel.delta(vals, p) - return pd.Series(out, index=s.index) - - return _gb_instrument(df).transform(lambda x: _delta_accelerated(_series_from_group(x))) - - -def DELAY(df: pd.DataFrame, p: Window) -> pd.DataFrame: - """滞后 shift;p 为 int 根数或非负,或为与 df 对齐的单列 DataFrame 表示逐行动态滞后。""" - if _is_dynamic_window(p): - return _shift_dynamic(df, p) - return _shift_fixed(df, _as_int_window(p)) - - -def TS_PCTCHANGE(df: pd.DataFrame, p: int = 1) -> pd.DataFrame: - """相对 p 根前的涨跌幅;±inf 置 NaN。 - 优先使用 C++ 加速,否则回退到 pandas。""" - p = int(p) - - def _pctchange_accelerated(s: pd.Series) -> pd.Series: - vals = s.to_numpy(dtype=float, copy=False) - out = _accel.pctchange(vals, p) - return pd.Series(out, index=s.index) - - return _gb_instrument(df).transform(lambda x: _pctchange_accelerated(_series_from_group(x))) - - -def TS_CUMPROD( - df: pd.DataFrame, - base: Union[int, float] = 1.0, -) -> pd.DataFrame: - """逐品种从开始到当前的**连乘** ``cumprod``(非滚动窗)。 - - 常用于把收益率序列还原为指数**点位**: - 例如 ``TS_CUMPROD(ADD(1, $ret), 100)`` 将日收益还原为指数点位; - 即 ``base × ∏_{s≤t}(1+R_s)``;``NaN`` 因子按 **1** 跳过(该步不改变累计积)。 - - 第二参 ``base`` 为初始尺度(如 ``100`` 表示基点 100 起算)。 - """ - scale = float(base) - - def _cumprod_skip_nan(s: pd.Series) -> pd.Series: - vals = s.to_numpy(dtype=np.float32, copy=False) - acc = scale - out = np.empty(len(vals), dtype=np.float32) - for i, v in enumerate(vals): - if np.isfinite(v): - acc *= v - out[i] = acc - return pd.Series(out, index=s.index, dtype=np.float32) - - return _gb_instrument(df).transform( - lambda x: _cumprod_skip_nan(_series_from_group(x)) - ) - - -def EMA(df: pd.DataFrame, p: Window) -> pd.DataFrame: - """EWM,span=p(须可转 int,勿传动态 DataFrame)。 - 优先使用 C++ 加速,否则回退到 pandas。""" - span = _as_int_window(p) - - # Use C++ or Numba accelerated backend - def _ema_accelerated(s: pd.Series) -> pd.Series: - vals = s.to_numpy(dtype=float, copy=False) - out = _accel.ema(vals, span) - return pd.Series(out, index=s.index) - - return _gb_instrument(df).transform(lambda x: _ema_accelerated(_series_from_group(x))) - - -def WMA(df: pd.DataFrame, p: int = 20) -> pd.DataFrame: - """线性加权均线,近端权重大,窗口 p 根。 - 优先使用 C++ 加速,否则回退到 pandas。""" - p = max(1, int(p)) - - # Use C++ or Numba accelerated backend - def _wma_accelerated(s: pd.Series) -> pd.Series: - vals = s.to_numpy(dtype=float, copy=False) - out = _accel.wma(vals, p) - return pd.Series(out, index=s.index) - - ser = _gb_instrument(df).transform(lambda x: _wma_accelerated(_series_from_group(x))) - return _out_frame(ser, df) - - -def SMA(df: pd.DataFrame, m: Optional[float] = None, n: Optional[float] = None) -> pd.DataFrame: - """SMA(df,m) 滚动均线;SMA(df,m,n) 为 alpha=n/m 的 ewm。""" - if isinstance(m, (int, float)) and m is not None and n is None: - w = int(m) - - def _sma_mean(s: pd.Series) -> pd.Series: - vals = s.to_numpy(dtype=float, copy=False) - out = _accel.roll_fixed(vals, w, "mean") - return pd.Series(out, index=s.index) - - return _gb_instrument(df).transform(lambda x: _sma_mean(_series_from_group(x))) - if m is None or n is None: - raise ValueError("SMA 请使用 SMA(df, m) 指定整数均线周期,或提供 (m,n) 自定义递推") - alpha = float(n) / float(m) - return _gb_instrument(df).transform(lambda x: x.ewm(alpha=alpha, adjust=False).mean()) - - -def ABS(df: pd.DataFrame) -> pd.DataFrame: - """逐元素绝对值。""" - return df.abs() - - -def SIGN(df: pd.DataFrame) -> pd.DataFrame: - """逐元素符号。""" - return np.sign(df) - - -def NEG(df: pd.DataFrame) -> pd.DataFrame: - """逐元素取负;等价 ``MULTIPLY(df, -1)``。""" - return -df - - -def _cond_truthy_mask(cond: pd.DataFrame) -> np.ndarray: - """与 ``TS_SINCE`` 一致:有限且非零为真。""" - c = cond.iloc[:, 0].to_numpy(dtype=float, copy=False) - return np.isfinite(c) & (c != 0.0) - - -def _if_then_else_operand_array(template: pd.DataFrame, operand: object, arg_name: str) -> np.ndarray: - if isinstance(operand, pd.DataFrame): - op = operand - if op.shape[1] != 1: - op = op.iloc[:, :1] - if not op.index.equals(template.index): - op = op.reindex(template.index) - return op.iloc[:, 0].to_numpy(dtype=float, copy=False) - if _is_ufunc_broadcast_scalar(operand): - return np.full(len(template), float(np.asarray(operand, dtype=np.float32)), dtype=np.float32) - raise TypeError(f"IF_THEN_ELSE {arg_name} 须为面板 DataFrame 或数值标量,收到: {type(operand).__name__}") - - -def IF_THEN_ELSE(cond: pd.DataFrame, then_val: object, else_val: object = 0.0) -> pd.DataFrame: - """条件选择:``cond`` 为真取 ``then_val``,否则取 ``else_val``(默认 0)。 - - ``cond`` 须为单列面板;**真** = 有限且 ≠0(与 ``TS_SINCE`` 一致),比较结果请先 ``CAST(..., 'float64')``。 - ``then_val`` / ``else_val`` 可为同索引单列面板或数值标量。 - """ - if not isinstance(cond, pd.DataFrame): - raise TypeError("IF_THEN_ELSE 第一参数 cond 须为面板 DataFrame") - if cond.shape[1] < 1: - raise ValueError("IF_THEN_ELSE cond 须至少一列") - mask = _cond_truthy_mask(cond if cond.shape[1] == 1 else cond.iloc[:, :1]) - then_arr = _if_then_else_operand_array(cond, then_val, "then_val") - else_arr = _if_then_else_operand_array(cond, else_val, "else_val") - out = np.where(mask, then_arr, else_arr) - return pd.DataFrame(out, index=cond.index, columns=cond.columns[:1], dtype=np.float32) - - -def FILLNA(df: pd.DataFrame, value: float = 0.0) -> pd.DataFrame: - """非有限值(NaN/±inf)替换为 ``value``(默认 0);有限值不变。 - - 常用于滚动暖启动导致的缺失:在**最终因子**上 ``FILLNA(expr, 0)`` 表示缺失 bar - 不参与排序但覆盖率按 0 计入。 - """ - v = float(value) - out = df.astype(float).copy() - return out.where(np.isfinite(out), other=v) - - -def CAST(df: pd.DataFrame, dtype: str) -> pd.DataFrame: - """逐元素 ``astype``;常用于将比较/逻辑得到的 bool 面板转为 float 再参与算术。""" - if not isinstance(df, pd.DataFrame): - raise TypeError("CAST 第一参数须为面板 DataFrame") - d = str(dtype).strip() - if len(d) >= 2 and d[0] == d[-1] and d[0] in "'\"": - d = d[1:-1] - return df.astype(d) - - -def LOG(df: pd.DataFrame) -> pd.DataFrame: - """自然对数;0 先变 NaN。""" - return np.log(df.replace(0, np.nan)) - - -def SQRT(df: pd.DataFrame) -> pd.DataFrame: - """逐元素平方根。""" - return np.sqrt(df) - - -def EXP(df: pd.DataFrame) -> pd.DataFrame: - """逐元素 exp。""" - return np.exp(df) - - -def INV(df: pd.DataFrame) -> pd.DataFrame: - """逐元素倒数;0 变 NaN。""" - return 1.0 / df.replace(0, np.nan) - - -def POW(df: pd.DataFrame, n: float) -> pd.DataFrame: - """逐元素 x**n。""" - return np.power(df, float(n)) - - -# ----------------------------------------------------------------------------- -# TS_* :固定或动态窗口 -# ----------------------------------------------------------------------------- - - -def TS_MIN(df: pd.DataFrame, window: Window) -> pd.DataFrame: - """滚动最小值;window 为 int 或单列 DataFrame 动态窗宽。""" - return _ts_agg( - df, - window, - lambda w: _gb_instrument(df).transform( - lambda x: x.rolling(w, min_periods=1).min() - ), - dyn_kind="min", - ) - - -def TS_MAX(df: pd.DataFrame, window: Window) -> pd.DataFrame: - """滚动最大值;window 同 TS_MIN。""" - return _ts_agg( - df, - window, - lambda w: _gb_instrument(df).transform( - lambda x: x.rolling(w, min_periods=1).max() - ), - dyn_kind="max", - ) - - -def TS_MEAN(df: pd.DataFrame, window: Window) -> pd.DataFrame: - """滚动均值;window 同 TS_MIN。""" - return _ts_agg( - df, - window, - lambda w: _gb_instrument(df).transform( - lambda x: x.rolling(w, min_periods=1).mean() - ), - dyn_kind="mean", - ) - - -def TS_SUM(df: pd.DataFrame, window: Window) -> pd.DataFrame: - """滚动求和;window 同 TS_MIN。""" - return _ts_agg( - df, - window, - lambda w: _gb_instrument(df).transform( - lambda x: x.rolling(w, min_periods=1).sum() - ), - dyn_kind="sum", - ) - - -def TS_PROD(df: pd.DataFrame, window: Window) -> pd.DataFrame: - """滚动乘积;window 同 TS_MIN。窗口内 ``NaN`` 按乘法单位元 **1** 参与(不改变乘积)。 - 固定窗优先 C++/Numba;与 pandas ``rolling(...).prod()`` 等对 NaN 的约定不同。""" - return _ts_agg( - df, - window, - lambda w: _gb_instrument(df).transform( - lambda x: x.rolling(w, min_periods=1).sum() - ), - dyn_kind="prod", - ) - - -def TS_STD(df: pd.DataFrame, window: Window) -> pd.DataFrame: - """滚动样本标准差(ddof=1);window 同 TS_MIN。""" - return _ts_agg( - df, - window, - lambda w: _gb_instrument(df).transform( - lambda x: x.rolling(w, min_periods=1).std() - ), - dyn_kind="std", - ddof=1, - ) - - -def TS_VAR(df: pd.DataFrame, window: Window, ddof: int = 1) -> pd.DataFrame: - """滚动方差;window 同 TS_MIN,ddof 默认 1。""" - return _ts_agg( - df, - window, - lambda w: _gb_instrument(df).transform( - lambda x: x.rolling(w, min_periods=1).var(ddof=ddof) - ), - dyn_kind="var", - ddof=ddof, - ) - - -def TS_MEDIAN(df: pd.DataFrame, window: Window) -> pd.DataFrame: - """滚动中位数;window 同 TS_MIN。""" - return _ts_agg( - df, - window, - lambda w: _gb_instrument(df).transform( - lambda x: x.rolling(w, min_periods=1).median() - ), - dyn_kind="median", - ) - - -def TS_RANK(df: pd.DataFrame, window: Window) -> pd.DataFrame: - """滚动分位秩 pct=True;window 同 TS_MIN。""" - return _ts_agg( - df, - window, - lambda w: _gb_instrument(df).transform( - lambda x: x.rolling(w, min_periods=1).rank(pct=True) - ), - dyn_kind="rank_pct", - ) - - -def TS_SKEW(df: pd.DataFrame, window: Window) -> pd.DataFrame: - """滚动偏度;window 同 TS_MIN。""" - return _ts_agg( - df, - window, - lambda w: _gb_instrument(df).transform( - lambda x: x.rolling(w, min_periods=1).skew() - ), - dyn_kind="skew", - ) - - -def TS_KURT(df: pd.DataFrame, window: Window) -> pd.DataFrame: - """滚动峰度(pandas adjusted Fisher–Pearson 超额峰度 G2);窗宽同 TS_MIN, - 有效样本数 < 4 输出 NaN,窗口内方差为 0 输出 0。""" - return _ts_agg( - df, - window, - lambda w: _gb_instrument(df).transform( - lambda x: x.rolling(w, min_periods=1).kurt() - ), - dyn_kind="kurt", - ) - - -def TS_QUANTILE(df: pd.DataFrame, window: int, q: float) -> pd.DataFrame: - """滚动 q 分位数(线性插值),等价 pandas ``rolling(w, min_periods=1).quantile(q)``。 - q ∈ [0,1];窗口内全 NaN 输出 NaN。""" - w = max(1, int(window)) - qf = float(q) - - def _quantile_accelerated(s: pd.Series) -> pd.Series: - vals = s.to_numpy(dtype=float, copy=False) - out = _accel.roll_quantile_fixed(vals, w, qf) - return pd.Series(out, index=s.index) - - ser = _gb_instrument(df).transform( - lambda x: _quantile_accelerated(_series_from_group(x)) - ) - return _out_frame(ser, df) - - -def TS_ZSCORE(df: pd.DataFrame, window: Window, ddof: int = 1) -> pd.DataFrame: - """滚动 z-score:``(x - TS_MEAN) / TS_STD``;std=0 时输出 NaN,ddof 默认 1。""" - w = _as_int_window(window) - - def _zscore_accelerated(s: pd.Series) -> pd.Series: - vals = s.to_numpy(dtype=float, copy=False) - m = _accel.roll_fixed(vals, w, "mean") - sd = _accel.roll_fixed(vals, w, "std", ddof=ddof) - denom = np.where(sd == 0.0, np.nan, sd) - out = (vals - m) / denom - return pd.Series(out, index=s.index) - - ser = _gb_instrument(df).transform( - lambda x: _zscore_accelerated(_series_from_group(x)) - ) - return _out_frame(ser, df) - - -# ----------------------------------------------------------------------------- -# 事件驱动:TS_SINCE / TS_SINCE_N / TS_STREAK / TS_COUNT / TS_RATE / TS_ANY / TS_ALL / TS_RUNLENGTH_* / TS_CROSS_* / -# TS_MONTH_POS / PRICE_GAP_* / TS_LAST_ARGGAP -# ----------------------------------------------------------------------------- - - -def TS_SINCE(cond: pd.DataFrame) -> pd.DataFrame: - """距上一次 cond 为真的 bar 数;``cond`` 为面板 DataFrame(单列), - 非 NaN 且不等于 0 视为 True;首次事件前输出 NaN,发生当根输出 0。""" - return _ts_unary_accel(cond, _accel.ts_since) - - -def TS_SINCE_N(cond: pd.DataFrame, n: int) -> pd.DataFrame: - """距倒数第 ``n`` 次 cond 为真的 bar 数(``n=1`` 同 ``TS_SINCE``)。 - - 从当前 bar 向历史回溯计数事件:``n=1`` 为最近一次,``n=2`` 为倒数第二次,依此类推。 - 历史上不足 ``n`` 次事件时输出 NaN;落在该次事件当根时输出 0。 - **真** = 有限且 ≠0(与 ``TS_SINCE`` 一致);比较结果请先 ``CAST(..., 'float64')``。 - - 取该次事件当时的字段值:``DELAY(x, TS_SINCE_N(cond, n))``(``DELAY`` 第二参可为动态滞后列)。""" - if not isinstance(cond, pd.DataFrame): - raise TypeError("TS_SINCE_N 第一参数 cond 须为面板 DataFrame") - if cond.shape[1] < 1: - raise ValueError("cond 须至少一列") - n_ev = max(1, int(n)) - return _ts_unary_accel(cond, lambda v: _accel.ts_since_nth(v, n_ev)) - - -def TS_RUNLENGTH_UP(df: pd.DataFrame) -> pd.DataFrame: - """当前连续严格上涨根数(``x[i] > x[i-1]``);中断或 NaN 重置为 0。""" - return _ts_unary_accel(df, lambda v: _accel.ts_runlength(v, 1)) - - -def TS_RUNLENGTH_DOWN(df: pd.DataFrame) -> pd.DataFrame: - """当前连续严格下跌根数(``x[i] < x[i-1]``);中断或 NaN 重置为 0。""" - return _ts_unary_accel(df, lambda v: _accel.ts_runlength(v, -1)) - - -def _ts_cond_roll(cond: pd.DataFrame, window: Window, op: int) -> pd.DataFrame: - """事件滚动聚合;``op``:0=count,1=rate,2=any,3=all。真值规则同 ``TS_SINCE``。""" - if not isinstance(cond, pd.DataFrame): - raise TypeError("事件算子第一参数 cond 须为面板 DataFrame") - if cond.shape[1] < 1: - raise ValueError("cond 须至少一列") - if op not in (0, 1, 2, 3): - raise ValueError("内部 op 须为 0=count, 1=rate, 2=any, 3=all") - - if _is_dynamic_window(window): - result = np.full(len(cond), np.nan, dtype=np.float32) - for _, sub in _gb_instrument(cond): - idx = sub.index - wsub = window.reindex(idx) - vals = sub.iloc[:, 0].to_numpy(dtype=float, copy=False) - wvals = _dynamic_window_int_series(wsub, idx) - pos = cond.index.get_indexer(idx) - result[pos] = _accel.ts_event_roll_dyn(vals, wvals, op) - return pd.DataFrame(result, index=cond.index, columns=cond.columns[:1]) - - w = _as_int_window(window) - - def _roll(vals: np.ndarray) -> np.ndarray: - return _accel.ts_event_roll(vals, w, op) - - return _ts_unary_accel(cond, _roll) - - -def TS_COUNT(cond: pd.DataFrame, window: Window) -> pd.DataFrame: - """滚动窗内 ``cond`` 为真的 bar 数。 - - **真** = 有限且 ≠0(与 ``TS_SINCE`` 一致);比较结果请先 ``CAST(..., 'float64')``。 - 窗内无有限 ``cond`` 输出 NaN;有有限值但无真值时输出 0。 - ``window`` 为 int 或单列 DataFrame 动态窗宽。""" - return _ts_cond_roll(cond, window, 0) - - -def TS_RATE(cond: pd.DataFrame, window: Window) -> pd.DataFrame: - """滚动窗内 ``cond`` 真值占比 ∈ [0, 1]。 - - ``真 bar 数 / 有限 bar 数``;真值规则同 ``TS_SINCE``。窗内无有限 ``cond`` 为 NaN。""" - return _ts_cond_roll(cond, window, 1) - - -def TS_ANY(cond: pd.DataFrame, window: Window) -> pd.DataFrame: - """滚动窗内是否存在真 bar:有 → 1,无 → 0。 - - 真值规则同 ``TS_SINCE``;窗内无有限 ``cond`` 为 NaN。""" - return _ts_cond_roll(cond, window, 2) - - -def TS_ALL(cond: pd.DataFrame, window: Window) -> pd.DataFrame: - """滚动窗内是否全部为真:是 → 1,否 → 0。 - - 真值规则同 ``TS_SINCE``;窗内无有限 ``cond`` 为 NaN。""" - return _ts_cond_roll(cond, window, 3) - - -def TS_STREAK(cond: pd.DataFrame) -> pd.DataFrame: - """当前连续为真的根数(含当根);假或 0 中断为 0,``cond`` 为 NaN 时输出 NaN 并重置。 - - 真值规则同 ``TS_SINCE``;比较结果请先 ``CAST(..., 'float64')``。""" - return _ts_unary_accel(cond, _accel.ts_streak) - - -def _month_progress_values(dt: pd.DatetimeIndex) -> np.ndarray: - """自然月进度 ∈ [0,1]:``(day-1)/(days_in_month-1)``;1 号=0,月末=1。""" - ts = pd.DatetimeIndex(dt) - day = ts.day.to_numpy(dtype=np.float32) - dim = ts.days_in_month.to_numpy(dtype=np.float32) - denom = np.maximum(dim - 1.0, 1.0) - return (day - 1.0) / denom - - -def TS_MONTH_POS(df: pd.DataFrame) -> pd.DataFrame: - """自然月进度 ∈ [0,1]:``(day-1)/(days_in_month-1)``;每月 1 日=0,该月最后一日=1。 - - 闰年 2 月自动按 29 天计。``df`` 仅用于索引对齐(取 ``datetime`` 层)。 - """ - if df.empty: - return pd.DataFrame(index=df.index, columns=df.columns[:1], dtype=np.float32) - dt = df.index.get_level_values("datetime") - vals = _month_progress_values(dt) - return pd.DataFrame(vals, index=df.index, columns=df.columns[:1], dtype=np.float32) - - -PRICE_GAP_DEFAULT_MIN_PCT: float = 0.0 - - -def _instrument_price_gap_state( - high: np.ndarray, - low: np.ndarray, - *, - min_pct: float, -) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]: - """单品种 OHLC:相邻 bar 经典 K 线跳空检测,并向前传播缺口状态。 - - 向上缺口:``low[i] > high[i-1]``(区间 ``[high[i-1], low[i]]``,下沿/上沿)。 - 向下缺口:``high[i] < low[i-1]``(区间 ``[high[i], low[i-1]]``)。 - """ - n = len(high) - size = np.zeros(n, dtype=np.float32) - fill = np.full(n, np.nan, dtype=np.float32) - floor = np.full(n, np.nan, dtype=np.float32) - ceiling = np.full(n, np.nan, dtype=np.float32) - event = np.zeros(n, dtype=np.float32) - bars = np.full(n, np.nan, dtype=np.float32) - - if n == 0: - return size, fill, floor, ceiling, event, bars - - active = False - gap_dir = 0 - gap_floor = np.nan - gap_ceiling = np.nan - gap_height = 0.0 - signed_size = 0.0 - min_low = np.nan - max_high = np.nan - bars_count = np.nan - - min_pct_f = max(float(min_pct), 0.0) - - for i in range(n): - event[i] = 0.0 - formed_gap = False - - if i > 0: - prev_hi = high[i - 1] - prev_lo = low[i - 1] - hi_i = high[i] - lo_i = low[i] - - is_up = False - is_down = False - if ( - np.isfinite(prev_hi) - and np.isfinite(prev_lo) - and np.isfinite(hi_i) - and np.isfinite(lo_i) - and prev_hi != 0.0 - and prev_lo != 0.0 - ): - if lo_i > prev_hi: - up_size = (lo_i - prev_hi) / abs(prev_hi) - is_up = up_size >= min_pct_f - if hi_i < prev_lo: - down_size = (prev_lo - hi_i) / abs(prev_lo) - is_down = down_size >= min_pct_f - - if is_up and not is_down: - active = True - gap_dir = 1 - gap_floor = prev_hi - gap_ceiling = lo_i - gap_height = lo_i - prev_hi - signed_size = gap_height / abs(prev_hi) - min_low = lo_i - max_high = np.nan - bars_count = 0.0 - event[i] = 1.0 - formed_gap = True - elif is_down and not is_up: - active = True - gap_dir = -1 - gap_floor = hi_i - gap_ceiling = prev_lo - gap_height = prev_lo - hi_i - signed_size = -gap_height / abs(prev_lo) - max_high = hi_i - min_low = np.nan - bars_count = 0.0 - event[i] = -1.0 - formed_gap = True - elif active: - bars_count += 1.0 - if gap_dir > 0: - if np.isfinite(lo_i): - min_low = lo_i if not np.isfinite(min_low) else min(min_low, lo_i) - if gap_height > 1e-12: - if min_low <= gap_floor: - fill[i] = 1.0 - else: - fill[i] = np.clip( - (gap_ceiling - min_low) / gap_height, 0.0, 1.0 - ) - else: - fill[i] = 1.0 - elif gap_dir < 0: - if np.isfinite(hi_i): - max_high = hi_i if not np.isfinite(max_high) else max(max_high, hi_i) - if gap_height > 1e-12: - if max_high >= gap_ceiling: - fill[i] = 1.0 - else: - fill[i] = np.clip( - (max_high - gap_floor) / gap_height, 0.0, 1.0 - ) - else: - fill[i] = 1.0 - - size[i] = signed_size if active else 0.0 - if active: - floor[i] = gap_floor - ceiling[i] = gap_ceiling - bars[i] = bars_count - if not np.isfinite(fill[i]): - fill[i] = 0.0 if gap_height > 1e-12 else 1.0 - elif not formed_gap: - signed_size = 0.0 - bars_count = np.nan - - return size, fill, floor, ceiling, event, bars - - -def _price_gap_output( - open_df: pd.DataFrame, - high_df: pd.DataFrame, - low_df: pd.DataFrame, - close_df: pd.DataFrame, - field: str, - min_pct: float = PRICE_GAP_DEFAULT_MIN_PCT, -) -> pd.DataFrame: - """OHLC 四列对齐后,按 ``field`` 输出缺口派生序列。""" - template = close_df - if template.empty: - return pd.DataFrame(index=template.index, columns=template.columns[:1], dtype=np.float32) - - for name, other in ( - ("open_df", open_df), - ("high_df", high_df), - ("low_df", low_df), - ): - if other.shape != template.shape or not other.index.equals(template.index): - raise ValueError(f"缺口算子要求 OHLC 四列同形同索引,{name} 不一致") - - valid = {"size", "fill", "floor", "ceiling", "event", "bars"} - if field not in valid: - raise ValueError(f"未知缺口字段: {field!r}") - - result = np.full(len(template), np.nan, dtype=np.float32) - for _, sub_c in _gb_instrument(close_df): - idx = sub_c.index - sub_h = high_df.reindex(idx) - sub_l = low_df.reindex(idx) - sz, fl, flr, clg, ev, br = _instrument_price_gap_state( - sub_h.iloc[:, 0].to_numpy(dtype=float, copy=False), - sub_l.iloc[:, 0].to_numpy(dtype=float, copy=False), - min_pct=float(min_pct), - ) - pick = { - "size": sz, - "fill": fl, - "floor": flr, - "ceiling": clg, - "event": ev, - "bars": br, - }[field] - pos = template.index.get_indexer(idx) - result[pos] = pick - - return pd.DataFrame(result, index=template.index, columns=template.columns[:1]) - - -def PRICE_GAP_SIZE( - open_df: pd.DataFrame, - high_df: pd.DataFrame, - low_df: pd.DataFrame, - close_df: pd.DataFrame, - min_pct: float = PRICE_GAP_DEFAULT_MIN_PCT, -) -> pd.DataFrame: - """活跃缺口**带符号相对幅度**(向上为正、向下为负);无活跃缺口为 0。 - - 缺口判定:``low[t]>high[t-1]`` 向上;``high[t] pd.DataFrame: - """当前活跃缺口的**下沿价格**;无活跃缺口为 NaN。 - - 向上缺口 = ``high[t-1]``;向下缺口 = 形成当根 ``high[t]``。 - """ - return _price_gap_output(open_df, high_df, low_df, close_df, "floor", min_pct) - - -def PRICE_GAP_CEILING( - open_df: pd.DataFrame, - high_df: pd.DataFrame, - low_df: pd.DataFrame, - close_df: pd.DataFrame, - min_pct: float = PRICE_GAP_DEFAULT_MIN_PCT, -) -> pd.DataFrame: - """当前活跃缺口的**上沿价格**;无活跃缺口为 NaN。 - - 向上缺口 = 形成当根 ``low[t]``;向下缺口 = ``low[t-1]``。 - """ - return _price_gap_output(open_df, high_df, low_df, close_df, "ceiling", min_pct) - - -def PRICE_GAP_FILL( - open_df: pd.DataFrame, - high_df: pd.DataFrame, - low_df: pd.DataFrame, - close_df: pd.DataFrame, - min_pct: float = PRICE_GAP_DEFAULT_MIN_PCT, -) -> pd.DataFrame: - """当前活跃缺口的**回补比例** ∈ [0,1](0=未回补,1=完全回补);无活跃缺口为 NaN。""" - return _price_gap_output(open_df, high_df, low_df, close_df, "fill", min_pct) - - -def TS_LAST_ARGGAP( - open_df: pd.DataFrame, - high_df: pd.DataFrame, - low_df: pd.DataFrame, - close_df: pd.DataFrame, - min_pct: float = PRICE_GAP_DEFAULT_MIN_PCT, -) -> pd.DataFrame: - """最近一次缺口形成距今 bar 数(形成当根=0);无活跃缺口为 NaN。 - - 缺口判定同 ``PRICE_GAP_EVENT``;语义同 ``TS_LAST_ARGPEAK`` 类「距今根数」算子。 - 新缺口形成时重置为 0。 - """ - return _price_gap_output(open_df, high_df, low_df, close_df, "bars", min_pct) - - -def TS_ARGGAP( - open_df: pd.DataFrame, - high_df: pd.DataFrame, - low_df: pd.DataFrame, - close_df: pd.DataFrame, - min_pct: float = PRICE_GAP_DEFAULT_MIN_PCT, -) -> pd.DataFrame: - """[兼容] 同 ``TS_LAST_ARGGAP``。""" - return TS_LAST_ARGGAP(open_df, high_df, low_df, close_df, min_pct) - - -def PRICE_GAP_EVENT( - open_df: pd.DataFrame, - high_df: pd.DataFrame, - low_df: pd.DataFrame, - close_df: pd.DataFrame, - min_pct: float = PRICE_GAP_DEFAULT_MIN_PCT, -) -> pd.DataFrame: - """缺口**形成当根**事件:+1 向上、-1 向下、0 无缺口;其余 bar 为 0。""" - return _price_gap_output(open_df, high_df, low_df, close_df, "event", min_pct) - - -def TS_CROSS_ABOVE(x: pd.DataFrame, y: TsCrossOperand) -> pd.DataFrame: - """上穿事件:``x[t-1] <= y[t-1] and x[t] > y[t]`` 为 1,否则 0;缺失为 NaN。 - - 第一个参数 ``x`` 必须是面板(pd.DataFrame,时间序列),不能是常数; - 第二个参数 ``y`` 可为与 ``x`` 同索引的单列面板,也可为 Python/NumPy 数值标量 - (按 ``x`` 索引广播,等价于 ``ADD(df, k)`` 的常数语义),常用于"上穿固定阈值", - 例如 ``TS_CROSS_ABOVE(compression_rank, 0.8)``。 - """ - return _ts_bivariate_event_accel( - x, - _as_ts_cross_y_panel(x, y), - lambda a, b: _accel.ts_cross(a, b, 1), - ) - - -def TS_CROSS_BELOW(x: pd.DataFrame, y: TsCrossOperand) -> pd.DataFrame: - """下穿事件:``x[t-1] >= y[t-1] and x[t] < y[t]`` 为 1,否则 0;缺失为 NaN。 - - 第一个参数 ``x`` 必须是面板(pd.DataFrame,时间序列),不能是常数; - 第二个参数 ``y`` 可为与 ``x`` 同索引的单列面板,也可为 Python/NumPy 数值标量 - (按 ``x`` 索引广播),常用于"下穿固定阈值", - 例如 ``TS_CROSS_BELOW(compression_rank, 0.2)``。 - """ - return _ts_bivariate_event_accel( - x, - _as_ts_cross_y_panel(x, y), - lambda a, b: _accel.ts_cross(a, b, -1), - ) - - -def _ts_bivariate_fixed( - df1: pd.DataFrame, - df2: pd.DataFrame, - window: int, - kernel, -) -> pd.DataFrame: - """双序列固定窗滚动(corr / cov),按品种分组后调用加速内核。""" - w = max(1, int(window)) - result = np.full(len(df1), np.nan, dtype=np.float32) - - for _, sub1 in _gb_instrument(df1): - idx = sub1.index - sub2 = df2.reindex(idx) - x = sub1.iloc[:, 0].to_numpy(dtype=float, copy=False) - y = sub2.iloc[:, 0].to_numpy(dtype=float, copy=False) - pos = df1.index.get_indexer(idx) - result[pos] = kernel(x, y, w) - - return pd.DataFrame(result, index=df1.index, columns=df1.columns[:1]) - - -def TS_CORR(df1: pd.DataFrame, df2: pd.DataFrame, window: int) -> pd.DataFrame: - """滚动 Pearson 相关系数;按品种分组,对 df1、df2 对应列在窗口内计算相关性。 - NaN 对会被跳过;有效对数 < 2 或任一序列方差为零时输出 NaN。""" - return _ts_bivariate_fixed(df1, df2, window, _accel.roll_corr_fixed) - - -def TS_COV(df1: pd.DataFrame, df2: pd.DataFrame, window: int, ddof: int = 1) -> pd.DataFrame: - """滚动协方差;按品种分组,对 df1、df2 对应列在窗口内计算样本协方差(ddof 默认 1)。 - NaN 对会被跳过;有效对数 ≤ ddof 时输出 NaN。""" - return _ts_bivariate_fixed( - df1, df2, window, - lambda x, y, w: _accel.roll_cov_fixed(x, y, w, ddof=ddof), - ) - - -def TS_RANKCORR(df1: pd.DataFrame, df2: pd.DataFrame, window: int) -> pd.DataFrame: - """滚动 Spearman(秩)相关系数:每个窗口内先对 df1、df2 各自取平均秩(等同 - ``rank(method='average')``),再对两组秩计算 Pearson 相关;对异常值鲁棒, - 捕捉单调关系。NaN 对跳过;有效对数 < 2 或任一维秩方差为零输出 NaN。""" - return _ts_bivariate_fixed(df1, df2, window, _accel.roll_rankcorr_fixed) - - -def MUTUAL_INFO_LAG( - df_close: pd.DataFrame, - df_volume: pd.DataFrame, - window: int, - lag: int, - *, - n_bins: int = 8, - min_pairs: int | None = None, -) -> pd.DataFrame: - """滚动直方图(秩分箱)估计 Shannon 互信息 I(close(t); volume(t-k)),单位为 nats。 - - **配对规则**:窗口 ``[t-window+1, t]`` 内对每个满足 j≥k 的 j 取 - (close[j], volume[j-k]),k 即 ``lag``(k=0 表示同 bar 的量价)。窗内先在有效样本上 - 对价格、成交量各自做等频秩映射到 ``n_bins`` 档,再对联合频数表计算 MI;可检出 - Pearson≈0 但存在的非线性耦合(如阈值效应)。**有效对数**不足 ``min_pairs`` - (默认 ``max(n_bins+2, 8)``)时为 NaN。""" - w = max(2, int(window)) - lag_i = max(0, int(lag)) - B = int(n_bins) - if B < 2: - raise ValueError("n_bins must be >= 2") - mp = int(min_pairs) if min_pairs is not None else max(B + 2, 8) - - result = np.full(len(df_close), np.nan, dtype=np.float32) - for _, sub_c in _gb_instrument(df_close): - idx = sub_c.index - sub_v = df_volume.reindex(idx) - c_arr = sub_c.iloc[:, 0].to_numpy(dtype=float, copy=False) - v_arr = sub_v.iloc[:, 0].to_numpy(dtype=float, copy=False) - pos = df_close.index.get_indexer(idx) - result[pos] = _accel.roll_mutual_info_lag_fixed( - c_arr, v_arr, w, lag_i, n_bins=B, min_pairs=mp - ) - - return pd.DataFrame( - result, index=df_close.index, columns=df_close.columns[:1] - ) - - -def TS_TREND_RANK(df: pd.DataFrame, window: int) -> pd.DataFrame: - """Mann-Kendall 风格秩趋势:每个窗口内对 x 值与"时间位置"做 Spearman 相关, - 得到 ∈ [-1, +1] 的**非参数单调趋势强度**——+1 表示窗口内单调上升、-1 单调下降、 - 0 无单调趋势。比 ``SLOPE`` / ``REGBETA`` 对尖刺、跳空、离群点鲁棒得多,与 - ``TS_EFFICIENCY_RATIO`` 互补(ER 测路径效率、本算子测单调性)。NaN 跳过; - 有效点数 < 2 输出 NaN。""" - w = max(2, int(window)) - result = np.full(len(df), np.nan, dtype=np.float32) - for _, sub in _gb_instrument(df): - idx = sub.index - vals = sub.iloc[:, 0].to_numpy(dtype=float, copy=False) - # 每品种独立的严格单调计数器;在任意窗口内它的平均秩恒为 1..c(c=窗内有效数), - # 因此 Spearman(x, counter) 恰好是 Mann-Kendall 风格的单调趋势度量。 - counter = np.arange(len(vals), dtype=np.float32) - pos = df.index.get_indexer(idx) - result[pos] = _accel.roll_rankcorr_fixed(vals, counter, w) - return pd.DataFrame(result, index=df.index, columns=df.columns[:1]) - - -def TS_EFFICIENCY_RATIO(df: pd.DataFrame, window: Window) -> pd.DataFrame: - """Kaufman 效率比 ER∈[0,1];window 可为整数固定窗或单列 DataFrame 动态窗(如峰谷间距)。 - 定义:|窗末-窗首|/Σ|逐 bar 变化|;接近 1=单边趋势,接近 0=震荡;有效值不足或路径为 0 时为 NaN。""" - result = np.full(len(df), np.nan, dtype=np.float32) - - if _is_dynamic_window(window): - for _, sub in _gb_instrument(df): - idx = sub.index - vals = sub.iloc[:, 0].to_numpy(dtype=float, copy=False) - wsub = window.reindex(idx) - w_arr = _dynamic_window_int_series(wsub, idx) - pos = df.index.get_indexer(idx) - result[pos] = _accel.roll_efficiency_ratio_dynamic(vals, w_arr) - return pd.DataFrame(result, index=df.index, columns=df.columns[:1]) - - w = max(2, _as_int_window(window)) - - def _er_accelerated(s: pd.Series) -> pd.Series: - vals = s.to_numpy(dtype=float, copy=False) - out = _accel.roll_efficiency_ratio_fixed(vals, w) - return pd.Series(out, index=s.index) - - ser = _gb_instrument(df).transform( - lambda x: _er_accelerated(_series_from_group(x)) - ) - return _out_frame(ser, df) - - -def VOLUME_CLOCK_VPIN( - price: pd.DataFrame, - volume: pd.DataFrame, - window: int, - bucket_size: float, - classification: str = "tick", - min_buckets: int = 5, - sigma_window: int = 20, - eps: float = 1e-12, -) -> pd.DataFrame: - """成交量同步 VPIN(Volume-Synchronized Probability of Informed Trading)∈ [0,1]。 - - **成交量时钟**(非时间窗):按固定 ``bucket_size`` 切成交量桶;单 bar 可跨多桶或多 bar - 才满一桶,**不**等价于固定 K 线根数。逐 bar **从前向后**因果累积,无前视。 - - **买卖分类**(第 5 参 ``classification``,禁止 ``name=value``): - - - ``'tick'``(默认):涨→全买、跌→全卖;平盘沿用上一方向(首根 50/50) - - ``'lee_ready'``:涨→全买、跌→全卖;平盘恒 50/50 - - ``'bulk'``:量钟 BVC——``buy = vol·Φ(ΔP/σ_ΔP)``,``sell = vol - buy``;σ_ΔP 为过去 - bar 价格变化的滚动样本标准差(第 7 参 ``sigma_window``,默认 20;``tick``/``lee_ready`` 忽略) - - **每桶**结算 imbalance = ``|Buy-Sell|/(Buy+Sell)``;**t 时刻输出** = 最近 ``n`` 个 - **已满桶** imbalance 的均值(``n = min(已满桶数, window)``,窗口末位为最新完成桶)。 - **进行中的半桶不计入**;已满桶数 < ``min_buckets`` 时为 NaN(默认 ``min_buckets=5``, - 且自动截断为 ``min(min_buckets, window)``)。``window`` 为桶个数上限(非 bar 数)。 - 高≈单边主动/知情成交主导,低≈买卖均衡。price 建议 ``$adj_vwap``,volume 用 ``$volume``。""" - w = max(1, int(window)) - bsize = float(bucket_size) - if bsize <= 0.0: - raise ValueError("bucket_size must be > 0") - _accel.vpin_classification_id(classification) - mb = int(min_buckets) - if mb < 1: - raise ValueError("min_buckets must be >= 1") - sw = int(sigma_window) - if sw < 1: - raise ValueError("sigma_window must be >= 1") - eps_f = float(eps) - - result = np.full(len(price), np.nan, dtype=np.float32) - for _, sub_p in _gb_instrument(price): - idx = sub_p.index - sub_v = volume.reindex(idx) - p_arr = sub_p.iloc[:, 0].to_numpy(dtype=float, copy=False) - v_arr = sub_v.iloc[:, 0].to_numpy(dtype=float, copy=False) - pos = price.index.get_indexer(idx) - result[pos] = _accel.volume_clock_vpin_fixed( - p_arr, - v_arr, - w, - bsize, - classification, - min_buckets=mb, - eps=eps_f, - ) - - return pd.DataFrame(result, index=price.index, columns=price.columns[:1]) - - -def WICK_EFFICIENCY( - open_df: pd.DataFrame, - high_df: pd.DataFrame, - low_df: pd.DataFrame, - close_df: pd.DataFrame, - lag: int, - *, - eps: float = 1e-12, -) -> pd.DataFrame: - """影线能量效率:当前柱上影线与 k 根前下影线的交叉耦合,按实体尺度归一化。 - - :: - UP_WICK(t) = high - max(open, close) - DN_WICK(t) = min(open, close) - low - BODY(t) = |close - open| - OUT(t) = UP_WICK(t) * DN_WICK(t-k) / (BODY(t) * BODY(t-k) + eps) - - 若前期下影线与当期上影线在区间边界上呈现对称吸收语义,比值往往偏大。**时间交叉乘法** - 结构无法由单序列 ``DELTA`` 单独生成。t < k 或任一侧 OHLC 非有限为 NaN。``lag`` ≥ 1。 - """ - k = int(lag) - if k < 1: - raise ValueError("lag must be >= 1") - - eps_f = float(eps) - idx0 = open_df.index - if not ( - high_df.index.equals(idx0) - and low_df.index.equals(idx0) - and close_df.index.equals(idx0) - ): - raise ValueError("WICK_EFFICIENCY: open/high/low/close panels must share the same index") - - result = np.full(len(open_df), np.nan, dtype=np.float32) - - for _, sub_o in _gb_instrument(open_df): - idx = sub_o.index - sub_h = high_df.reindex(idx) - sub_l = low_df.reindex(idx) - sub_c = close_df.reindex(idx) - o_arr = sub_o.iloc[:, 0].to_numpy(dtype=float, copy=False) - h_arr = sub_h.iloc[:, 0].to_numpy(dtype=float, copy=False) - l_arr = sub_l.iloc[:, 0].to_numpy(dtype=float, copy=False) - c_arr = sub_c.iloc[:, 0].to_numpy(dtype=float, copy=False) - pos = open_df.index.get_indexer(idx) - result[pos] = _accel.wick_efficiency_fixed(o_arr, h_arr, l_arr, c_arr, k, eps=eps_f) - - return pd.DataFrame(result, index=open_df.index, columns=open_df.columns[:1]) - - -def KLINE_GEOMETRY( - open_df: pd.DataFrame, - high_df: pd.DataFrame, - low_df: pd.DataFrame, - close_df: pd.DataFrame, - window: Window, - *, - eps: float = 1e-15, -) -> pd.DataFrame: - """窗口内将每根 K 线视为 ``(o,h,l,c)∈R^4``,堆成 ``X∈R^{k×4}`` 做 SVD,输出 ``σ₂/σ₁``。 - - 刻画形态从近似一维(比值 → 0)到多方向分散/震荡(比值 → 1)。``window`` 须 ≥ 2; - 可为固定整数或单列动态窗。窗内任一路 OHLC 非有限则该点为 NaN。""" - idx0 = open_df.index - if not ( - high_df.index.equals(idx0) - and low_df.index.equals(idx0) - and close_df.index.equals(idx0) - ): - raise ValueError("KLINE_GEOMETRY: open/high/low/close panels must share the same index") - - eps_f = float(eps) - result = np.full(len(open_df), np.nan, dtype=np.float32) - - if _is_dynamic_window(window): - for _, sub_o in _gb_instrument(open_df): - idx = sub_o.index - sub_h = high_df.reindex(idx) - sub_l = low_df.reindex(idx) - sub_c = close_df.reindex(idx) - o_arr = sub_o.iloc[:, 0].to_numpy(dtype=float, copy=False) - h_arr = sub_h.iloc[:, 0].to_numpy(dtype=float, copy=False) - l_arr = sub_l.iloc[:, 0].to_numpy(dtype=float, copy=False) - c_arr = sub_c.iloc[:, 0].to_numpy(dtype=float, copy=False) - wsub = window.reindex(idx) - w_arr = _dynamic_window_int_series(wsub, idx) - pos = open_df.index.get_indexer(idx) - result[pos] = _accel.roll_kline_geometry( - o_arr, h_arr, l_arr, c_arr, w_arr, eps=eps_f - ) - else: - w = _as_int_window(window) - if w < 2: - raise ValueError("KLINE_GEOMETRY: window must be >= 2") - for _, sub_o in _gb_instrument(open_df): - idx = sub_o.index - sub_h = high_df.reindex(idx) - sub_l = low_df.reindex(idx) - sub_c = close_df.reindex(idx) - o_arr = sub_o.iloc[:, 0].to_numpy(dtype=float, copy=False) - h_arr = sub_h.iloc[:, 0].to_numpy(dtype=float, copy=False) - l_arr = sub_l.iloc[:, 0].to_numpy(dtype=float, copy=False) - c_arr = sub_c.iloc[:, 0].to_numpy(dtype=float, copy=False) - pos = open_df.index.get_indexer(idx) - result[pos] = _accel.roll_kline_geometry_fixed( - o_arr, h_arr, l_arr, c_arr, w, eps=eps_f - ) - - return pd.DataFrame(result, index=open_df.index, columns=open_df.columns[:1]) - - -def TS_PERMUTATION_ENTROPY( - df: pd.DataFrame, window: int, order: int = 3 -) -> pd.DataFrame: - """Bandt-Pompe 排列熵:窗口内所有长度 ``order`` 的子序列按"序数模式"计数并做 - Shannon 熵,再除以 ``log(order!)`` 归一化到 [0, 1]。接近 1 表示序列无规律(白噪 - 声),接近 0 表示高度可预测(单调或周期)。``order`` 典型 3–5;``window`` 建议 - ≥ ``order!`` 以覆盖各种模式。窗口内无可用子序列输出 NaN。""" - w = max(2, int(window)) - m = int(order) - if m < 2 or m > 7: - raise ValueError("order must be in [2, 7]") - - def _pe_accelerated(s: pd.Series) -> pd.Series: - vals = s.to_numpy(dtype=float, copy=False) - out = _accel.roll_permutation_entropy_fixed(vals, w, m) - return pd.Series(out, index=s.index) - - ser = _gb_instrument(df).transform( - lambda x: _pe_accelerated(_series_from_group(x)) - ) - return _out_frame(ser, df) - - -def _chip_metric_daily( - close_df: pd.DataFrame, - low_df: pd.DataFrame, - high_df: pd.DataFrame, - volume_df: pd.DataFrame, - window: int, - float_cap_df: pd.DataFrame, - nbins: int, - method: str, - op: str, -) -> pd.DataFrame: - """日频筹码指标(默认 cyq + float_cap)。""" - w = max(1, int(window)) - nb = int(nbins) - if nb < 2: - raise ValueError("nbins must be >= 2") - _chip_daily.chip_method_id(method) - - result = np.full(len(close_df), np.nan, dtype=np.float32) - for _, sub_c in _gb_instrument(close_df): - idx = sub_c.index - sub_l = low_df.reindex(idx) - sub_h = high_df.reindex(idx) - sub_v = volume_df.reindex(idx) - sub_a = float_cap_df.reindex(idx) - close = sub_c.iloc[:, 0].to_numpy(dtype=float, copy=False) - low = sub_l.iloc[:, 0].to_numpy(dtype=float, copy=False) - high = sub_h.iloc[:, 0].to_numpy(dtype=float, copy=False) - vol = sub_v.iloc[:, 0].to_numpy(dtype=float, copy=False) - aux = sub_a.iloc[:, 0].to_numpy(dtype=float, copy=False) - pos = close_df.index.get_indexer(idx) - result[pos] = _accel.roll_chip_metric_fixed( - close, vol, low, high, aux, w, nb, op, method - ) - - return pd.DataFrame( - result, index=close_df.index, columns=close_df.columns[:1] - ) - - -def _chip_roll_daily( - close_df: pd.DataFrame, - low_df: pd.DataFrame, - high_df: pd.DataFrame, - volume_df: pd.DataFrame, - window: int, - float_cap_df: pd.DataFrame, - nbins: int, - method: str, - kernel, -) -> pd.DataFrame: - """按品种调用日频筹码 kernel(close, low, high, vol, aux, window, nbins)。""" - w = max(1, int(window)) - nb = int(nbins) - if nb < 2: - raise ValueError("nbins must be >= 2") - _chip_daily.chip_method_id(method) - - result = np.full(len(close_df), np.nan, dtype=np.float32) - for _, sub_c in _gb_instrument(close_df): - idx = sub_c.index - sub_l = low_df.reindex(idx) - sub_h = high_df.reindex(idx) - sub_v = volume_df.reindex(idx) - sub_a = float_cap_df.reindex(idx) - close = sub_c.iloc[:, 0].to_numpy(dtype=float, copy=False) - low = sub_l.iloc[:, 0].to_numpy(dtype=float, copy=False) - high = sub_h.iloc[:, 0].to_numpy(dtype=float, copy=False) - vol = sub_v.iloc[:, 0].to_numpy(dtype=float, copy=False) - aux = sub_a.iloc[:, 0].to_numpy(dtype=float, copy=False) - pos = close_df.index.get_indexer(idx) - result[pos] = kernel(close, low, high, vol, aux, w, nb) - - return pd.DataFrame( - result, index=close_df.index, columns=close_df.columns[:1] - ) - - -def CHIP_PEAK_LOC( - close: pd.DataFrame, - low: pd.DataFrame, - high: pd.DataFrame, - volume: pd.DataFrame, - window: int, - float_cap: pd.DataFrame, - nbins: int = 64, - method: str = "cyq", -) -> pd.DataFrame: - """筹码主峰价相对现价的偏离:``(p* - P) / P``(日频 CYQ,默认)。 - **DSL 位置参数**:``close, low, high, volume, window, float_cap, [nbins], [method]``。 - 默认 ``method='cyq'``、``nbins=64``;``tri`` 时第 6 参改传 ``$vwap``。""" - return _chip_metric_daily( - close, low, high, volume, window, float_cap, nbins, method, "peak_loc" - ) - - -def CHIP_ENTROPY( - close: pd.DataFrame, - low: pd.DataFrame, - high: pd.DataFrame, - volume: pd.DataFrame, - window: int, - float_cap: pd.DataFrame, - nbins: int = 64, - method: str = "cyq", -) -> pd.DataFrame: - """筹码密度归一化 Shannon 熵 ``H / log(nbins)`` ∈ [0, 1](日频,默认 cyq)。 - **DSL 位置参数**:``close, low, high, volume, window, float_cap, [nbins], [method]``。 - 推荐窗口 20~120 交易日。""" - return _chip_metric_daily( - close, low, high, volume, window, float_cap, nbins, method, "entropy" - ) - - -def CHIP_COM_W_GAP( - close: pd.DataFrame, - low: pd.DataFrame, - high: pd.DataFrame, - volume: pd.DataFrame, - window: int, - float_cap: pd.DataFrame, - nbins: int = 64, - method: str = "cyq", -) -> pd.DataFrame: - """筹码重心相对现价的偏离:``(bar_p - P) / P``(日频,默认 cyq)。 - **DSL 位置参数**:``close, low, high, volume, window, float_cap, [nbins], [method]``。""" - return _chip_metric_daily( - close, low, high, volume, window, float_cap, nbins, method, "com_w_gap" - ) - - -def CHIP_MASS_ASYM( - close: pd.DataFrame, - low: pd.DataFrame, - high: pd.DataFrame, - volume: pd.DataFrame, - window: int, - float_cap: pd.DataFrame, - nbins: int = 64, - method: str = "cyq", -) -> pd.DataFrame: - """以现价为界的上下筹码质量不对称度 ``M_below - M_above`` ∈ [-1, 1](日频,默认 cyq)。 - **DSL 位置参数**:``close, low, high, volume, window, float_cap, [nbins], [method]``。""" - return _chip_metric_daily( - close, low, high, volume, window, float_cap, nbins, method, "mass_asym" - ) - - -def CHIP_PEAK_SHARPNESS( - close: pd.DataFrame, - low: pd.DataFrame, - high: pd.DataFrame, - volume: pd.DataFrame, - window: int, - float_cap: pd.DataFrame, - nbins: int = 64, - implementation: str = "curvature", - method: str = "cyq", -) -> pd.DataFrame: - """主峰尖锐度(日频,默认 cyq):``curvature`` / ``fwhm`` / ``combined``。 - **DSL 位置参数**:``close, low, high, volume, window, float_cap, [nbins], [implementation], [method]``。""" - _accel.chip_peak_sharpness_impl_id(implementation) - return _chip_roll_daily( - close, - low, - high, - volume, - window, - float_cap, - nbins, - method, - lambda c, l, h, v, a, w, nb: _accel.roll_chip_peak_sharpness_fixed( - c, v, l, h, a, w, nb, implementation, method - ), - ) - - -def CHIP_BIMODAL_SCORE( - close: pd.DataFrame, - low: pd.DataFrame, - high: pd.DataFrame, - volume: pd.DataFrame, - window: int, - float_cap: pd.DataFrame, - nbins: int = 64, - implementation: str = "simple", - lambda_scale: float = 1.0, - method: str = "cyq", -) -> pd.DataFrame: - """双峰结构得分(日频,默认 cyq):``simple`` 或 ``dip``。 - **DSL 位置参数**:``close, low, high, volume, window, float_cap, [nbins], [implementation], [lambda_scale], [method]``。""" - _accel.chip_bimodal_impl_id(implementation) - return _chip_roll_daily( - close, - low, - high, - volume, - window, - float_cap, - nbins, - method, - lambda c, l, h, v, a, w, nb: _accel.roll_chip_bimodal_fixed( - c, v, l, h, a, w, nb, implementation, method, lambda_scale=lambda_scale - ), - ) - - -def CHIP_WASS_DIST( - close: pd.DataFrame, - low: pd.DataFrame, - high: pd.DataFrame, - volume: pd.DataFrame, - window: Window, - float_cap: pd.DataFrame, - nbins: int = 64, - lag: Window = 0, - implementation: str = "moment", - method: str = "cyq", -) -> pd.DataFrame: - """当前窗与参照窗筹码直方图漂移(日频 CYQ,ρ=``lag``)。 - **DSL 位置参数**:``close, low, high, volume, window, float_cap, [nbins], [lag], [implementation], [method]``。""" - nb = int(nbins) - if nb < 2: - raise ValueError("nbins must be >= 2") - _accel.chip_wass_implementation_id(implementation) - _chip_daily.chip_method_id(method) - - result = np.full(len(close), np.nan, dtype=np.float32) - for _, sub_c in _gb_instrument(close): - idx = sub_c.index - sub_l = low.reindex(idx) - sub_h = high.reindex(idx) - sub_v = volume.reindex(idx) - sub_a = float_cap.reindex(idx) - close_arr = sub_c.iloc[:, 0].to_numpy(dtype=float, copy=False) - low_arr = sub_l.iloc[:, 0].to_numpy(dtype=float, copy=False) - high_arr = sub_h.iloc[:, 0].to_numpy(dtype=float, copy=False) - vol_arr = sub_v.iloc[:, 0].to_numpy(dtype=float, copy=False) - aux_arr = sub_a.iloc[:, 0].to_numpy(dtype=float, copy=False) - w_arr = _chip_wass_win_series(window, idx) - rho_arr = _chip_wass_rho_series(lag, idx) - pos = close.index.get_indexer(idx) - result[pos] = _accel.roll_chip_wass_dist( - close_arr, - vol_arr, - low_arr, - high_arr, - aux_arr, - w_arr, - w_arr, - rho_arr, - nb, - implementation, - method, - ) - - return pd.DataFrame( - result, index=close.index, columns=close.columns[:1] - ) - - -def _ts_arg_extreme(df: pd.DataFrame, window: int, want_max: bool) -> pd.DataFrame: - """窗口内极值距今的 bar 数(0 表示当前 bar 即为极值)。固定窗走 Numba,避免 ``rolling.apply`` 全表 Python 回调。""" - return arg_extreme_fixed(df, window, want_max) - - -def _ts_arg_local_extreme_last(df: pd.DataFrame, half_window: int, want_max: bool) -> pd.DataFrame: - """最近一次已确认中心局部峰/谷距今 bar 数。 - - 某位置 ``j`` 只有在其前后各 ``half_window`` 根都齐备时,才会被判定为局部峰/谷; - 因此输出天然带 ``half_window`` 根确认延迟,可避免把未来 ``close`` 偷看进当前时点。 - """ - hw = max(1, int(half_window)) - - def _arg_local_extreme_accelerated(s: pd.Series) -> pd.Series: - vals = s.to_numpy(dtype=float, copy=False) - out = _accel.arg_local_extreme(vals, hw, want_max=want_max) - return pd.Series(out, index=s.index) - - ser = _gb_instrument(df).transform( - lambda x: _arg_local_extreme_accelerated(_series_from_group(x)) - ) - return _out_frame(ser, df) - - -def _ts_local_extreme_value_last( - df: pd.DataFrame, half_window: int, want_max: bool -) -> pd.DataFrame: - """最近一次已确认中心局部峰/谷的价格值。""" - hw = max(1, int(half_window)) - - def _local_extreme_value_accelerated(s: pd.Series) -> pd.Series: - vals = s.to_numpy(dtype=float, copy=False) - out = _accel.local_extreme_value(vals, hw, want_max=want_max) - return pd.Series(out, index=s.index) - - ser = _gb_instrument(df).transform( - lambda x: _local_extreme_value_accelerated(_series_from_group(x)) - ) - return _out_frame(ser, df) - - -def _ts_maxamp_arg_local( - df: pd.DataFrame, half_window: int, want_max: bool -) -> pd.DataFrame: - """在已确认峰/谷中,选左右「峰到谷/谷到峰」价宽之和最大者,输出距今 bar 数。""" - hw = max(1, int(half_window)) - col = df.columns[0] - - def _f(s: pd.Series) -> pd.Series: - vals = s.to_numpy(dtype=float, copy=False) - out = _accel.maxamp_arg_local_extreme(vals, hw, want_max=want_max) - return pd.Series(out, index=s.index) - - ser = df[col].groupby(level="instrument", sort=False).transform(_f) - return _out_frame(ser, df) - - -def _ts_maxamp_value_local( - df: pd.DataFrame, half_window: int, want_max: bool -) -> pd.DataFrame: - """在已确认峰/谷中,选左右价宽和最大者,输出该极值价格。""" - hw = max(1, int(half_window)) - col = df.columns[0] - - def _f(s: pd.Series) -> pd.Series: - vals = s.to_numpy(dtype=float, copy=False) - out = _accel.maxamp_local_extreme_value(vals, hw, want_max=want_max) - return pd.Series(out, index=s.index) - - ser = df[col].groupby(level="instrument", sort=False).transform(_f) - return _out_frame(ser, df) - - -def _ts_arg_extreme_dynamic( - df: pd.DataFrame, win_df: pd.DataFrame, want_max: bool -) -> pd.DataFrame: - """与 _ts_arg_extreme 相同语义,窗口宽度为每行可变的正整数(来自另一列 DataFrame)。""" - return arg_extreme_dynamic(df, win_df, _dynamic_window_int_series, want_max) - - -def TS_ARGMAX(df: pd.DataFrame, window: Window) -> pd.DataFrame: - """极大值距今 bar 数;window 同 TS_MIN。""" - if _is_dynamic_window(window): - return _ts_arg_extreme_dynamic(df, window, True) - return _ts_arg_extreme(df, _as_int_window(window), True) - - -def TS_ARGMIN(df: pd.DataFrame, window: Window) -> pd.DataFrame: - """极小值距今 bar 数;window 同 TS_MIN。""" - if _is_dynamic_window(window): - return _ts_arg_extreme_dynamic(df, window, False) - return _ts_arg_extreme(df, _as_int_window(window), False) - - -def TS_ARGMEDIAN(df: pd.DataFrame, window: Window) -> pd.DataFrame: - """窗口内最接近中位数的 bar 距今数;window 同 TS_MIN。 - 若有多个值与中位数距离相同,取距当前 bar 最近(索引最大)的。""" - W = _as_int_window(window) - - def _f(s: pd.Series) -> pd.Series: - vals = s.to_numpy(dtype=float, copy=False) - out = _accel.arg_median_fixed(vals, W) - return pd.Series(out, index=s.index) - - ser = _gb_instrument(df).transform(lambda x: _f(_series_from_group(x))) - return _out_frame(ser, df) - - -def TS_ARGNTH(df: pd.DataFrame, window: Window, n: int, ascending: bool = False, unique: bool = False) -> pd.DataFrame: - """窗口内第 n 大 (ascending=False) 或第 n 小 (ascending=True) 的 bar 距今数;window 同 TS_MIN。 - n >= 1;有效值不足 n 个时输出 NaN。 - - ascending: 排序方向,False=降序(大的在前找第n大),True=升序(小的在前找第n小) - - unique=False (默认): 有重复值时取距当前 bar 最近的 - - unique=True: 跳过重复值,找严格第 n 个不同的值""" - W = _as_int_window(window) - n = max(1, int(n)) - - def _f(s: pd.Series) -> pd.Series: - vals = s.to_numpy(dtype=float, copy=False) - out = _accel.arg_nth_fixed(vals, W, n, ascending=bool(ascending), unique=bool(unique)) - return pd.Series(out, index=s.index) - - ser = _gb_instrument(df).transform(lambda x: _f(_series_from_group(x))) - return _out_frame(ser, df) - - -def TS_LAST_ARGPEAK(df: pd.DataFrame, confirm_window: int = 10) -> pd.DataFrame: - """最近一次已确认局部波峰距今 bar 数。 - - 波峰判定规则:某位置为中心、左右各 ``confirm_window`` 根组成的窗口内, - 该中心必须是最高价;只有当右侧 ``confirm_window`` 根都到齐后,该峰才被确认, - 因此输出天然带 ``confirm_window`` 根延迟。 - """ - return _ts_arg_local_extreme_last(df, int(confirm_window), True) - - -def TS_LAST_ARGTROUGH(df: pd.DataFrame, confirm_window: int = 10) -> pd.DataFrame: - """最近一次已确认局部波谷距今 bar 数。 - - 波谷判定规则:某位置为中心、左右各 ``confirm_window`` 根组成的窗口内, - 该中心必须是最低价;只有当右侧 ``confirm_window`` 根都到齐后,该谷才被确认, - 因此输出天然带 ``confirm_window`` 根延迟。 - """ - return _ts_arg_local_extreme_last(df, int(confirm_window), False) - - -def TS_LAST_PEAK(df: pd.DataFrame, confirm_window: int = 10) -> pd.DataFrame: - """最近一次已确认局部波峰的价格。 - - 波峰判定规则同 ``TS_LAST_ARGPEAK``;输出为该峰价格而非距离。 - """ - return _ts_local_extreme_value_last(df, int(confirm_window), True) - - -def TS_LAST_TROUGH(df: pd.DataFrame, confirm_window: int = 10) -> pd.DataFrame: - """最近一次已确认局部波谷的价格。 - - 波谷判定规则同 ``TS_LAST_ARGTROUGH``;输出为该谷价格而非距离。 - """ - return _ts_local_extreme_value_last(df, int(confirm_window), False) - - -def TS_AMPARGPEAK(df: pd.DataFrame, confirm_window: int = 10) -> pd.DataFrame: - """已确认波峰中,左右价宽之和最大者距今 bar 数;确认规则同 ``TS_LAST_ARGPEAK``。""" - return _ts_maxamp_arg_local(df, int(confirm_window), True) - - -def TS_AMPARGTROUGH(df: pd.DataFrame, confirm_window: int = 10) -> pd.DataFrame: - """已确认波谷中,左右价宽之和最大者距今 bar 数;确认规则同 ``TS_LAST_ARGTROUGH``。""" - return _ts_maxamp_arg_local(df, int(confirm_window), False) - - -def TS_AMPPEAK(df: pd.DataFrame, confirm_window: int = 10) -> pd.DataFrame: - """已确认波峰中,选左右价宽和最大者,输出该峰价格。""" - return _ts_maxamp_value_local(df, int(confirm_window), True) - - -def TS_AMPTROUGH(df: pd.DataFrame, confirm_window: int = 10) -> pd.DataFrame: - """已确认波谷中,选左右价宽和最大者,输出该谷价格。""" - return _ts_maxamp_value_local(df, int(confirm_window), False) - - -def _ts_chan_fractal_3bar_bivariate( - df_high: pd.DataFrame, - df_low: pd.DataFrame, - *, - want_top: bool, - want_arg: bool, -) -> pd.DataFrame: - """三 K 线顶/底分型:``df_high`` / ``df_low`` 为同索引单列(例如 ``$adj_high@60m`` 与 ``$adj_low@60m``)。""" - if ( - df_high.shape != df_low.shape - or not df_high.index.equals(df_low.index) - or df_high.shape[1] != 1 - or df_low.shape[1] != 1 - ): - raise ValueError( - "分型算子要求 df_high 与 df_low 为同索引、同形的单列面板" - ) - result = np.full(len(df_high), np.nan, dtype=np.float32) - for _, sub_h in _gb_instrument(df_high): - idx = sub_h.index - sub_l = df_low.reindex(idx) - h = sub_h.iloc[:, 0].to_numpy(dtype=float, copy=False) - lo = sub_l.iloc[:, 0].to_numpy(dtype=float, copy=False) - out = _accel.fractal_chan_3bar_last( - h, lo, want_top_fractal=want_top, want_arg=want_arg - ) - pos = df_high.index.get_indexer(idx) - result[pos] = out - return pd.DataFrame(result, index=df_high.index, columns=df_high.columns[:1]) - - -def TS_LAST_ARGBOTTOMFRACTAL(df_high: pd.DataFrame, df_low: pd.DataFrame) -> pd.DataFrame: - """三 K 线底分型(严格不等):连续三根编号 1→3 由旧到新须满足 ``h1>h2l2 pd.DataFrame: - """底分型确认规则同 ``TS_LAST_ARGBOTTOMFRACTAL``;输出为分型中心 K 的最低价。""" - return _ts_chan_fractal_3bar_bivariate( - df_high, df_low, want_top=False, want_arg=False - ) - - -def TS_LAST_ARGTOPFRACTAL(df_high: pd.DataFrame, df_low: pd.DataFrame) -> pd.DataFrame: - """三 K 线顶分型(严格不等):须满足 ``h1

h3`` 且 ``l1l3``;第三根收盘后确认,中心为第 2 根。""" - return _ts_chan_fractal_3bar_bivariate(df_high, df_low, want_top=True, want_arg=True) - - -def TS_LAST_TOPFRACTAL(df_high: pd.DataFrame, df_low: pd.DataFrame) -> pd.DataFrame: - """顶分型确认规则同 ``TS_LAST_ARGTOPFRACTAL``;输出为分型中心 K 的最高价。""" - return _ts_chan_fractal_3bar_bivariate(df_high, df_low, want_top=True, want_arg=False) - - -# 兼容旧名 -def TS_ARGPEAK(df: pd.DataFrame, confirm_window: int = 10) -> pd.DataFrame: - """[兼容] 同 ``TS_LAST_ARGPEAK``。""" - return TS_LAST_ARGPEAK(df, confirm_window) - - -def TS_ARGTROUGH(df: pd.DataFrame, confirm_window: int = 10) -> pd.DataFrame: - """[兼容] 同 ``TS_LAST_ARGTROUGH``。""" - return TS_LAST_ARGTROUGH(df, confirm_window) - - -def TS_PEAK(df: pd.DataFrame, confirm_window: int = 10) -> pd.DataFrame: - """[兼容] 同 ``TS_LAST_PEAK``。""" - return TS_LAST_PEAK(df, confirm_window) - - -def TS_TROUGH(df: pd.DataFrame, confirm_window: int = 10) -> pd.DataFrame: - """[兼容] 同 ``TS_LAST_TROUGH``。""" - return TS_LAST_TROUGH(df, confirm_window) - - -# ----------------------------------------------------------------------------- -# 截面算子(per datetime) -# ----------------------------------------------------------------------------- - -_CS_MIN_PAIRS: int = 2 - - -def _validate_cs_panel(df: pd.DataFrame, *, name: str) -> None: - if not isinstance(df, pd.DataFrame): - raise TypeError(f"{name} 第一参数须为面板 DataFrame") - if df.shape[1] < 1: - raise ValueError(f"{name} 须至少一列") - if not isinstance(df.index, pd.MultiIndex): - raise ValueError(f"{name} 需要 MultiIndex 面板 (datetime, instrument)") - if "datetime" not in df.index.names: - raise ValueError(f"{name} 索引须含 datetime 层") - - -def RANK(df: pd.DataFrame) -> pd.DataFrame: - """每个 **datetime 截面**内的百分位秩 ∈ [0, 1](``rank(pct=True, method='average')``)。 - - 与 ``TS_RANK``(单 instrument 窗口内时序秩)不同。NaN 不参与排序;截面无有效值时为 NaN。""" - _validate_cs_panel(df, name="RANK") - - def _rank_cs(s: pd.Series) -> pd.Series: - finite = s.notna() - if not finite.any(): - return pd.Series(np.nan, index=s.index, dtype=np.float32) - out = pd.Series(np.nan, index=s.index, dtype=np.float32) - out.loc[finite] = s.loc[finite].rank(pct=True, method="average").astype(np.float32) - return out - - return _per_datetime_transform(df, _rank_cs) - - -def CS_ZSCORE(df: pd.DataFrame, ddof: int = 1) -> pd.DataFrame: - """截面标准化:``(x - mean) / std``(按 datetime 分组)。 - - 有效样本 < 2 或 std=0 时该截面输出 NaN;输入 NaN 保持 NaN。""" - _validate_cs_panel(df, name="CS_ZSCORE") - d = int(ddof) - - def _zscore_cs(s: pd.Series) -> pd.Series: - finite = s.notna() - n = int(finite.sum()) - if n < _CS_MIN_PAIRS: - return pd.Series(np.nan, index=s.index, dtype=np.float32) - vals = s.loc[finite].to_numpy(dtype=float, copy=False) - mu = float(np.mean(vals)) - std = float(np.std(vals, ddof=d)) - if not np.isfinite(std) or std == 0.0: - return pd.Series(np.nan, index=s.index, dtype=np.float32) - out = pd.Series(np.nan, index=s.index, dtype=np.float32) - out.loc[finite] = ((s.loc[finite] - mu) / std).astype(np.float32) - return out - - return _per_datetime_transform(df, _zscore_cs) - - -def CS_DEMEAN(df: pd.DataFrame) -> pd.DataFrame: - """截面去均值:``x - mean``(按 datetime 分组)。有效样本 < 1 时该截面为 NaN。""" - _validate_cs_panel(df, name="CS_DEMEAN") - - def _demean_cs(s: pd.Series) -> pd.Series: - finite = s.notna() - if not finite.any(): - return pd.Series(np.nan, index=s.index, dtype=np.float32) - mu = float(s.loc[finite].mean()) - out = pd.Series(np.nan, index=s.index, dtype=np.float32) - out.loc[finite] = (s.loc[finite] - mu).astype(np.float32) - return out - - return _per_datetime_transform(df, _demean_cs) - - -def CS_WINSORIZE( - df: pd.DataFrame, - lower_pct: float, - upper_pct: float, -) -> pd.DataFrame: - """截面分位裁剪:将每个 datetime 截面内的值限制在 ``[lower_pct, upper_pct]`` 分位之间。 - - 参数为 [0, 1] 分位(如 ``0.01, 0.99``)。截面无有效值时为 NaN。""" - _validate_cs_panel(df, name="CS_WINSORIZE") - lo = float(lower_pct) - hi = float(upper_pct) - if not (0.0 <= lo < hi <= 1.0): - raise ValueError("CS_WINSORIZE 要求 0 <= lower_pct < upper_pct <= 1") - - def _winsor_cs(s: pd.Series) -> pd.Series: - finite = s.notna() - if not finite.any(): - return pd.Series(np.nan, index=s.index, dtype=np.float32) - valid = s.loc[finite] - q_lo = float(valid.quantile(lo)) - q_hi = float(valid.quantile(hi)) - out = pd.Series(np.nan, index=s.index, dtype=np.float32) - clipped = valid.clip(lower=q_lo, upper=q_hi).astype(np.float32) - out.loc[finite] = clipped - return out - - return _per_datetime_transform(df, _winsor_cs) - - -def CS_BUCKET(df: pd.DataFrame, n_bins: int) -> pd.DataFrame: - """截面等频分 N 组:每个 datetime 内对变量做 ``qcut``,输出组号 ``0…K-1``(``K≤N``)。 - - 可与 ``CS_NEUTRALIZE`` 配合做组内去均值,例如 - ``CS_NEUTRALIZE(raw, CS_BUCKET(LOG($float_cap), 10))``。 - 有效样本数 < ``n_bins`` 时该截面为 NaN;输入 NaN 保持 NaN。""" - _validate_cs_panel(df, name="CS_BUCKET") - n = int(n_bins) - if n < 2: - raise ValueError("CS_BUCKET 要求 n_bins >= 2") - - def _bucket_cs(s: pd.Series) -> pd.Series: - finite = s.notna() - if not finite.any(): - return pd.Series(np.nan, index=s.index, dtype=np.float32) - if int(finite.sum()) < n: - return pd.Series(np.nan, index=s.index, dtype=np.float32) - valid = s.loc[finite] - try: - codes = pd.qcut(valid, n, labels=False, duplicates="drop") - except ValueError: - codes = pd.qcut(valid.rank(method="first"), n, labels=False, duplicates="drop") - out = pd.Series(np.nan, index=s.index, dtype=np.float32) - out.loc[finite] = codes.astype(np.float32) - return out - - return _per_datetime_transform(df, _bucket_cs) - - -def CS_NEUTRALIZE(x: pd.DataFrame, group: pd.DataFrame) -> pd.DataFrame: - """截面组内去均值:每个 datetime 内按 ``group`` 分组,输出 ``x - group_mean(x)``。 - - 组内仅 1 个有效值时输出 **0**(已中性化);输入 NaN 保持 NaN。 - ``group`` 须为离散组号,可用 ``CS_BUCKET(var, N)`` 构造,例如 - ``CS_NEUTRALIZE(raw, CS_BUCKET(LOG($float_cap), 10))``。""" - _validate_cs_panel(x, name="CS_NEUTRALIZE") - _validate_cs_panel(group, name="CS_NEUTRALIZE") - if not x.index.equals(group.index): - raise ValueError("CS_NEUTRALIZE: x 与 group 须同索引") - - xs = _first_series(x).to_numpy(dtype=float, copy=False) - gs = _first_series(group).to_numpy(dtype=float, copy=False) - result = np.full(len(x), np.nan, dtype=np.float32) - - for _, sub in x.groupby(level="datetime", sort=False): - pos = x.index.get_indexer(sub.index) - x_day = xs[pos] - g_day = gs[pos] - uniq = np.unique(g_day[np.isfinite(g_day)]) - for g_val in uniq: - x_mask = np.isfinite(g_day) & (g_day == g_val) & np.isfinite(x_day) - n = int(x_mask.sum()) - if n == 0: - continue - if n == 1: - result[pos[x_mask]] = 0.0 - continue - mu = float(np.mean(x_day[x_mask])) - result[pos[x_mask]] = (x_day[x_mask] - mu).astype(np.float32) - - return pd.DataFrame(result, index=x.index, columns=x.columns[:1]) - - -# ----------------------------------------------------------------------------- -# 二元与逻辑 -# ----------------------------------------------------------------------------- - - -def ADD(df1, df2): - """逐元素加;双面板单列首列对齐。""" - if isinstance(df1, pd.DataFrame) and isinstance(df2, pd.DataFrame): - return _binary_op_panel_df(df1, df2, np.add) - return np.add(df1, df2) - - -def SUBTRACT(df1, df2): - """逐元素减;规则同 ADD。""" - if isinstance(df1, pd.DataFrame) and isinstance(df2, pd.DataFrame): - return _binary_op_panel_df(df1, df2, np.subtract) - return np.subtract(df1, df2) - - -def MULTIPLY(df1, df2): - """逐元素乘;规则同 ADD。""" - if isinstance(df1, pd.DataFrame) and isinstance(df2, pd.DataFrame): - return _binary_op_panel_df(df1, df2, np.multiply) - return np.multiply(df1, df2) - - -def DIVIDE(df1, df2): - """逐元素除;规则同 ADD。""" - if isinstance(df1, pd.DataFrame) and isinstance(df2, pd.DataFrame): - return _binary_op_panel_df(df1, df2, np.divide) - return np.divide(df1, df2) - - -def MAXIMUM(x: object, y: object, z: Optional[Any] = None) -> pd.DataFrame: - """两/三列逐元素 max(非 TS_MAX);任一参可为面板同索引广播用的数值标量。""" - if z is None: - return _binary_op_panel_mixed(x, y, np.maximum) - return MAXIMUM(MAXIMUM(x, y), z) - - -def MINIMUM(x: object, y: object, z: Optional[Any] = None) -> pd.DataFrame: - """两/三列逐元素 min;同 MAXIMUM,支持对标量边界(如 clip)。""" - if z is None: - return _binary_op_panel_mixed(x, y, np.minimum) - return MINIMUM(MINIMUM(x, y), z) - - -def LT(df1, df2): - """双面板逐元素 ``<``;列名可不同,规则同 ``ADD``。""" - if isinstance(df1, pd.DataFrame) and isinstance(df2, pd.DataFrame): - return _binary_op_panel_df(df1, df2, np.less) - return np.less(df1, df2) - - -def GT(df1, df2): - """双面板逐元素 ``>``。""" - if isinstance(df1, pd.DataFrame) and isinstance(df2, pd.DataFrame): - return _binary_op_panel_df(df1, df2, np.greater) - return np.greater(df1, df2) - - -def LE(df1, df2): - """双面板逐元素 ``<=``。""" - if isinstance(df1, pd.DataFrame) and isinstance(df2, pd.DataFrame): - return _binary_op_panel_df(df1, df2, np.less_equal) - return np.less_equal(df1, df2) - - -def GE(df1, df2): - """双面板逐元素 ``>=``。""" - if isinstance(df1, pd.DataFrame) and isinstance(df2, pd.DataFrame): - return _binary_op_panel_df(df1, df2, np.greater_equal) - return np.greater_equal(df1, df2) - - -def EQ(df1, df2): - """双面板逐元素 ``==``。""" - if isinstance(df1, pd.DataFrame) and isinstance(df2, pd.DataFrame): - return _binary_op_panel_df(df1, df2, np.equal) - return np.equal(df1, df2) - - -def NE(df1, df2): - """双面板逐元素 ``!=``。""" - if isinstance(df1, pd.DataFrame) and isinstance(df2, pd.DataFrame): - return _binary_op_panel_df(df1, df2, np.not_equal) - return np.not_equal(df1, df2) - - -def AND(df1, df2): - """按位与(先转 bool)。""" - return np.bitwise_and(df1.astype(np.bool_), df2.astype(np.bool_)) - - -def OR(df1, df2): - """按位或(先转 bool)。""" - return np.bitwise_or(df1.astype(np.bool_), df2.astype(np.bool_)) - - -# ----------------------------------------------------------------------------- -# 回归 / 斜率(时序滚动,无截面) -# ----------------------------------------------------------------------------- - - -def SEQUENCE(n: int) -> np.ndarray: - """长度 n 的 1…n 浮点向量,用于回归自变量形状。""" - n = int(n) - if n < 1: - raise ValueError("SEQUENCE(n) 需要 n>=1") - return np.linspace(1.0, float(n), n, dtype=np.float32) - - -def calculate_beta(y: np.ndarray, x: np.ndarray) -> float: - y = np.asarray(y, dtype=float).ravel() - x = np.asarray(x, dtype=float).ravel() - if y.shape[0] != x.shape[0]: - raise ValueError( - f"calculate_beta: y/x 长度须一致 (len(y)={y.shape[0]}, len(x)={x.shape[0]})。" - "REGBETA/REGRESI 请将 df2 配成 SEQUENCE(p),或使用 REGBETA(df,p) 的标量窗口写法。" - ) - mask = np.isfinite(y) & np.isfinite(x) - if mask.sum() < 2: - return float("nan") - yv, xv = y[mask], x[mask] - X = np.column_stack([xv, np.ones(len(xv))]) - beta, _, _, _ = np.linalg.lstsq(X, yv, rcond=None) - return float(beta[0]) - - -def rolling_beta(df1_group: pd.DataFrame, df2_vec: np.ndarray, p: int) -> pd.Series: - y = df1_group.iloc[:, 0] - x = df2_vec[:p] - n = len(y) - out = np.full(n, np.nan) - for i in range(p - 1, n): - yy = y.iloc[i - p + 1 : i + 1].to_numpy() - out[i] = calculate_beta(yy, x) - return pd.Series(out, index=df1_group.index) - - -def REGBETA( - df1: pd.DataFrame, df2: Union[pd.DataFrame, np.ndarray], p: int = 5, n_jobs: int = 1 -) -> pd.DataFrame: - """滚动 OLS 斜率 β;df2 为 ndarray 取前 p 元为自变量;DataFrame 时对齐索引且自变量为 SEQUENCE(p);零维数值则视为窗口长度。 - (``REGBETA(df, 20)`` 等价 ``REGBETA(df, SEQUENCE(20), 20)``。)""" - p = int(p) - if isinstance(df2, np.ndarray): - xvec = np.asarray(df2, dtype=float).ravel()[:p] - elif isinstance(df2, pd.DataFrame): - assert df1.index.equals(df2.index), "df1 与 df2 索引须对齐" - xvec = SEQUENCE(p) - elif np.ndim(df2) == 0: - p = int(np.asarray(df2).item()) - if p < 1: - raise ValueError("REGBETA: 窗口长度须 >= 1") - xvec = SEQUENCE(p) - else: - raise TypeError( - "REGBETA: df2 须为 DataFrame、ndarray,或窗口长度(如 int);" - "若第二参为 ndarray 且较短,勿短于滚动窗 p。" - ) - parts = Parallel(n_jobs=n_jobs)( - delayed(rolling_beta)(grp, xvec, p) for _, grp in _gb_instrument(df1) - ) - ser = pd.concat(parts).sort_index() - return _out_frame(ser, df1) - - -def SLOPE(df: pd.DataFrame, p: int = 5, n_jobs: int = 1) -> pd.DataFrame: - """对时间 1…p 的滚动斜率,等价 REGBETA(df, SEQUENCE(p), p)。""" - return REGBETA(df, SEQUENCE(int(p)), p, n_jobs=n_jobs) - - -def calculate_residuals(y: np.ndarray, x: np.ndarray) -> float: - y = np.asarray(y, dtype=float).ravel() - x = np.asarray(x, dtype=float).ravel() - if y.shape[0] != x.shape[0]: - raise ValueError( - f"calculate_residuals: y/x 长度须一致 (len(y)={y.shape[0]}, len(x)={x.shape[0]})。" - "若只要固定窗残差请用 RESI(df, p) 或 REGRESI(df, SEQUENCE(p), p)。" - ) - mask = np.isfinite(y) & np.isfinite(x) - if mask.sum() < 2: - return float("nan") - yv, xv = y[mask], x[mask] - X = np.column_stack([xv, np.ones(len(xv))]) - b, _, _, _ = np.linalg.lstsq(X, yv, rcond=None) - pred = b[0] * xv + b[1] - return float(yv[-1] - pred[-1]) - - -def rolling_residuals(df1_group: pd.DataFrame, x: np.ndarray, p: int) -> pd.Series: - y = df1_group.iloc[:, 0] - n = len(y) - out = np.full(n, np.nan) - for i in range(p - 1, n): - yy = y.iloc[i - p + 1 : i + 1].to_numpy() - out[i] = calculate_residuals(yy, x) - return pd.Series(out, index=df1_group.index) - - -def REGRESI( - df1: pd.DataFrame, df2: Union[pd.DataFrame, np.ndarray], p: int = 5, n_jobs: int = 1 -) -> pd.DataFrame: - """滚动 OLS 末点残差; ndarray 取自变量 ravel(df2)[:p];零维数值则视为窗口长度并按 SEQUENCE(p)(``REGRESI(df, 20)`` 等同 ``RESI(df,20)``)。""" - p_roll = int(p) - if isinstance(df2, pd.DataFrame): - assert df1.index.equals(df2.index), "df1 与 df2 索引须对齐" - xvec = SEQUENCE(p_roll) - elif isinstance(df2, np.ndarray): - xvec = np.asarray(df2, dtype=float).ravel()[:p_roll] - elif np.ndim(df2) == 0: - p_roll = int(np.asarray(df2).item()) - if p_roll < 1: - raise ValueError("REGRESI: 窗口长度须 >= 1") - xvec = SEQUENCE(p_roll) - else: - xvec = np.asarray(df2, dtype=float).ravel()[:p_roll] - parts = Parallel(n_jobs=n_jobs)( - delayed(rolling_residuals)(grp, xvec, p_roll) for _, grp in _gb_instrument(df1) - ) - ser = pd.concat(parts).sort_index() - return _out_frame(ser, df1) - - -def RESI(df1: pd.DataFrame, p: int = 5, n_jobs: int = 1) -> pd.DataFrame: - """对 SEQUENCE(p) 的滚动残差,等价 REGRESI(df1, SEQUENCE(p), p)。""" - return REGRESI(df1, SEQUENCE(int(p)), p, n_jobs=n_jobs) - - -# ----------------------------------------------------------------------------- -# 广义拥挤度(CROWD_*):dimension 分桶 → 成交/属性是否扎堆 -# -# 与 CHIP_*(价轴筹码直方图)、VOLUME_CLOCK_VPIN(成交量时钟)勿混用。 -# ----------------------------------------------------------------------------- - - -def _parse_crowd_bucket_params( - side_or_nbuckets: str | int | float = "high", - split_or_bucket_idx: str | int | float = 0.5, -) -> dict[str, Any]: - """解析分桶参数:字符串 ``side`` + ``split``,或整数 ``n_buckets`` + ``bucket_idx``。""" - if isinstance(side_or_nbuckets, str): - side = side_or_nbuckets.strip().lower() - if side not in ("high", "low"): - raise ValueError( - f"CROWD_* side 须为 'high' 或 'low',收到: {side_or_nbuckets!r}" - ) - split = float(split_or_bucket_idx) - if not (0.0 < split < 1.0): - raise ValueError(f"CROWD_* split 须在 (0, 1),收到: {split}") - return { - "bucket_mode": "quantile", - "side": side, - "split": split, - "n_buckets": 2, - "bucket_idx": 1, - } - try: - nb = int(side_or_nbuckets) - bidx = int(split_or_bucket_idx) - except (TypeError, ValueError) as e: - raise ValueError( - "CROWD_* 分桶参数:第 4 参为 'high'/'low' + split," - "或整数 n_buckets + bucket_idx" - ) from e - if nb < 2 or bidx < 1 or bidx > nb: - raise ValueError( - f"CROWD_* 等频分桶须 2 <= n_buckets 且 1 <= bucket_idx <= n_buckets," - f"收到 n_buckets={nb}, bucket_idx={bidx}" - ) - return { - "bucket_mode": "equal_freq", - "side": "high", - "split": 0.5, - "n_buckets": nb, - "bucket_idx": bidx, - } - - -def _crowd_roll_panel( - dimension: pd.DataFrame, - attribute: pd.DataFrame, - weight: pd.DataFrame, - window: int, - op: str, - *, - bucket_mode: str = "quantile", - side: str = "high", - split: float = 0.5, - n_buckets: int = 2, - bucket_idx: int = 1, - min_valid: int = 0, - use_attr: bool = True, - use_weight: bool = True, -) -> pd.DataFrame: - w = max(1, int(window)) - result = np.full(len(dimension), np.nan, dtype=np.float32) - for _, sub_d in _gb_instrument(dimension): - idx = sub_d.index - sub_a = attribute.reindex(idx) - sub_w = weight.reindex(idx) - d_arr = sub_d.iloc[:, 0].to_numpy(dtype=float, copy=False) - a_arr = sub_a.iloc[:, 0].to_numpy(dtype=float, copy=False) - w_arr = sub_w.iloc[:, 0].to_numpy(dtype=float, copy=False) - pos = dimension.index.get_indexer(idx) - result[pos] = _accel.roll_crowd_fixed( - d_arr, - a_arr, - w_arr, - w, - op, - bucket_mode=bucket_mode, - side=side, - split=float(split), - n_buckets=int(n_buckets), - bucket_idx=int(bucket_idx), - min_valid=int(min_valid), - use_attr=use_attr, - use_weight=use_weight, - ) - return pd.DataFrame( - result, index=dimension.index, columns=dimension.columns[:1] - ) - - -def CROWD_SHARE( - dimension: pd.DataFrame, - weight: pd.DataFrame, - window: int, - side_or_nbuckets: str | int | float = "high", - split_or_bucket_idx: str | int | float = 0.5, -) -> pd.DataFrame: - """**量拥挤占比** ∈ [0,1]:窗口内 ``weight`` 有多少落在指定 **dimension 环境桶**。 - -**问什么**:成交(``weight``,通常 ``$volume``)是否**扎堆**在贵/便宜、开盘/尾盘、高波动、放量等环境(由 ``dimension`` 定义)。 - -**分桶**(第 4、5 位置参数,禁止关键字): -``'high'/'low'`` + ``split``(``high``→``dim>=``窗口分位;``low``→``dim<``分位;如 ``'high',0.9``≈最高十分位); -或 ``n_buckets, bucket_idx``(等频;``1``=最低档,``K``=最高档)。 - -``dimension`` 例:``$adj_vwap``、``TS_MONTH_POS($adj_close)``、``TS_STD($ret,5)``、``$float_cap``。 -目标桶无有效 weight → NaN。无前视。""" - cfg = _parse_crowd_bucket_params(side_or_nbuckets, split_or_bucket_idx) - dummy = pd.DataFrame(0.0, index=dimension.index, columns=dimension.columns[:1]) - return _crowd_roll_panel( - dimension, - dummy, - weight, - window, - "share", - bucket_mode=cfg["bucket_mode"], - side=cfg["side"], - split=cfg["split"], - n_buckets=cfg["n_buckets"], - bucket_idx=cfg["bucket_idx"], - use_attr=False, - use_weight=True, - ) - - -def CROWD_MEAN_RATIO( - dimension: pd.DataFrame, - attribute: pd.DataFrame, - window: int, - side_or_nbuckets: str | int | float = "high", - split_or_bucket_idx: str | int | float = 0.5, -) -> pd.DataFrame: - """**属性抬升倍数**:目标环境桶内 ``attribute`` 均值 / 全窗 ``attribute`` 均值。 - -**问什么**:在 dimension 划出的环境桶里(如高成交量时段),``attribute``(如 ``$adj_vwap``)比全天平均水平高/低多少倍。 - -分桶同 ``CROWD_SHARE``(第 4、5 参)。全窗均值为 0 或目标桶为空 → NaN。 -例:``CROWD_MEAN_RATIO($volume, $adj_vwap, 48, 'high', 0.9)``。""" - cfg = _parse_crowd_bucket_params(side_or_nbuckets, split_or_bucket_idx) - dummy_w = pd.DataFrame(1.0, index=dimension.index, columns=dimension.columns[:1]) - return _crowd_roll_panel( - dimension, - attribute, - dummy_w, - window, - "mean_ratio", - bucket_mode=cfg["bucket_mode"], - side=cfg["side"], - split=cfg["split"], - n_buckets=cfg["n_buckets"], - bucket_idx=cfg["bucket_idx"], - use_attr=True, - use_weight=False, - ) - - -def CROWD_CONTRAST( - dimension: pd.DataFrame, - attribute: pd.DataFrame, - window: int, - split: float = 0.5, -) -> pd.DataFrame: - """**高低环境差**:高维区 ``attribute`` 均值 − 低维区 ``attribute`` 均值。 - -**问什么**:dimension 高端 vs 低端环境里,``attribute`` 水平差多少(默认 ``split=0.5`` 中位数二分)。 -``dimension`` 与 ``attribute`` 应为不同语义列(如 ``$adj_vwap`` 分桶、``$ret`` 作属性); - -第 4 参 ``split``(``(0,1)``)。任一侧无样本 → NaN。无前视。""" - sq = float(split) - if not (0.0 < sq < 1.0): - raise ValueError(f"CROWD_CONTRAST split 须在 (0, 1),收到: {split}") - dummy_w = pd.DataFrame(1.0, index=dimension.index, columns=dimension.columns[:1]) - return _crowd_roll_panel( - dimension, - attribute, - dummy_w, - window, - "contrast", - bucket_mode="quantile", - side="high", - split=sq, - use_attr=True, - use_weight=False, - ) - - -def CROWD_RANK_WEIGHTED( - dimension: pd.DataFrame, - attribute: pd.DataFrame, - window: int, - weight: pd.DataFrame, -) -> pd.DataFrame: - """**软倾斜加权**:``Σ rank_norm(dim)·attr·weight / Σ weight``(不硬切桶)。 - -**问什么**:dimension 越高的 bar,其 ``attribute`` 在 ``weight`` 加权平均里话语权越大(``rank_norm``∈[0,1] 为窗口内平均秩)。 - -``weight`` 通常 ``$volume``。有效样本 <2 或 ``Σweight<=0`` → NaN。 -价量**线性共动**用 ``TS_CORR``,勿与本算子混用。无前视。""" - return _crowd_roll_panel( - dimension, - attribute, - weight, - window, - "rank_weighted", - bucket_mode="quantile", - side="high", - split=0.5, - use_attr=True, - use_weight=True, - ) - - -# ----------------------------------------------------------------------------- -# 兼容别名:旧表达式若写 MAX(A,B) 逐元素,映射到 MAXIMUM -# ----------------------------------------------------------------------------- - -MAX = MAXIMUM -MIN = MINIMUM - - -# ----------------------------------------------------------------------------- -# __main__ -# ----------------------------------------------------------------------------- - -if __name__ == "__main__": - idx = pd.MultiIndex.from_product( - [ - pd.date_range("2020-01-01", periods=5, freq="min"), - ["S1"], - ], - names=["datetime", "instrument"], - ) - demo = pd.DataFrame({"high": [1.0, 3.0, 2.0, 4.0, 1.0]}, index=idx) - th = TS_ARGMAX(demo, 3) - dyn = pd.DataFrame({"w": [1.0, 2.0, 2.0, 3.0, 2.0]}, index=idx) - m = TS_MIN(demo, dyn) - assert callable(DELTA) and callable(TS_MIN) - print("function_registry OK (futures time-series only):", th.iloc[-1, 0], m.iloc[-1, 0]) diff --git a/seekalpha/dsl/core/ops_kit.py b/seekalpha/dsl/core/ops_kit.py deleted file mode 100644 index d597aadc..00000000 --- a/seekalpha/dsl/core/ops_kit.py +++ /dev/null @@ -1,112 +0,0 @@ -"""DSL 算子公共工具:面板约定、按品种分组、Numba 友好的一维内核包装。 - -扩展算子(``aqra/dsl/extensions/`` 或用户模块)应只依赖本模块,勿直接改 ``operators.py``。 -""" -from __future__ import annotations - -from typing import Callable, Union - -import numpy as np -import pandas as pd - -Window = Union[int, float, pd.DataFrame] - -InstrumentKernel = Callable[[np.ndarray], np.ndarray] -InstrumentKernel2 = Callable[[np.ndarray, np.ndarray], np.ndarray] -DatetimeKernel = Callable[[np.ndarray], np.ndarray] -DatetimeTransform = Callable[[pd.Series], pd.Series] - - -def is_dynamic_window(w: Window) -> bool: - return isinstance(w, pd.DataFrame) - - -def as_int_window(w: Window) -> int: - if is_dynamic_window(w): - raise TypeError("此处需要整数窗口,收到 DataFrame(动态窗口请用对应重载)") - return int(w) - - -def first_series(df: pd.DataFrame) -> pd.Series: - return df.iloc[:, 0] - - -def series_from_group(x: Union[pd.DataFrame, pd.Series]) -> pd.Series: - if isinstance(x, pd.DataFrame): - return x.iloc[:, 0] - return x - - -def gb_instrument(df: pd.DataFrame): - return df.groupby(level="instrument", sort=False) - - -def gb_datetime(df: pd.DataFrame): - return df.groupby(level="datetime", sort=False) - - -def out_frame(values: pd.Series, template: pd.DataFrame) -> pd.DataFrame: - return pd.DataFrame(values, index=template.index, columns=template.columns[:1]) - - -def dynamic_window_int_series(win: pd.DataFrame, index: pd.Index) -> np.ndarray: - s = first_series(win.reindex(index)).to_numpy(dtype=float, copy=False) - out = np.nan_to_num(s, nan=1.0) - out = np.clip(np.round(out), 1, None) - return out.astype(int) - - -def lag_int_series(lag_df: pd.DataFrame, index: pd.Index) -> np.ndarray: - s = first_series(lag_df.reindex(index)).to_numpy(dtype=float, copy=False) - out = np.nan_to_num(s, nan=0.0) - out = np.clip(np.round(out), 0, None) - return out.astype(int) - - -def per_instrument_unary(df: pd.DataFrame, kernel: InstrumentKernel) -> pd.DataFrame: - """单列面板 → 按 instrument 切 1-D 数组 → kernel → 写回同索引单列。""" - result = np.full(len(df), np.nan, dtype=np.float32) - for _, sub in gb_instrument(df): - idx = sub.index - vals = sub.iloc[:, 0].to_numpy(dtype=float, copy=False) - pos = df.index.get_indexer(idx) - result[pos] = kernel(vals) - return pd.DataFrame(result, index=df.index, columns=df.columns[:1]) - - -def per_instrument_bivariate( - df1: pd.DataFrame, - df2: pd.DataFrame, - kernel: InstrumentKernel2, -) -> pd.DataFrame: - """双列面板对齐后按 instrument 运行二维 kernel。""" - result = np.full(len(df1), np.nan, dtype=np.float32) - for _, sub1 in gb_instrument(df1): - idx = sub1.index - sub2 = df2.reindex(idx) - x = sub1.iloc[:, 0].to_numpy(dtype=float, copy=False) - y = sub2.iloc[:, 0].to_numpy(dtype=float, copy=False) - pos = df1.index.get_indexer(idx) - result[pos] = kernel(x, y) - return pd.DataFrame(result, index=df1.index, columns=df1.columns[:1]) - - -def per_datetime_unary(df: pd.DataFrame, kernel: DatetimeKernel) -> pd.DataFrame: - """单列面板 → 按 datetime 切截面向量 → kernel → 写回同索引单列。""" - result = np.full(len(df), np.nan, dtype=np.float32) - for _, sub in gb_datetime(df): - idx = sub.index - vals = sub.iloc[:, 0].to_numpy(dtype=float, copy=False) - pos = df.index.get_indexer(idx) - result[pos] = kernel(vals) - return pd.DataFrame(result, index=df.index, columns=df.columns[:1]) - - -def per_datetime_transform( - df: pd.DataFrame, - transform: DatetimeTransform, -) -> pd.DataFrame: - """按 datetime 截面 ``groupby.transform``,适合 rank / zscore 等 pandas 操作。""" - ser = first_series(df) - out = ser.groupby(level="datetime", sort=False).transform(transform) - return out_frame(out.astype(np.float32), df) diff --git a/seekalpha/dsl/core/parser.py b/seekalpha/dsl/core/parser.py deleted file mode 100644 index 86d0d3a0..00000000 --- a/seekalpha/dsl/core/parser.py +++ /dev/null @@ -1,691 +0,0 @@ -"""因子表达式语法解析:使用 pyparsing 将 DSL 文本解析为可 exec/eval 的 Python 代码串;变量、函数与中缀经 parse_symbol 与 evaluator 绑定为 DataFrame 列。比较运算 ``> < >= <= == !=`` 在双非数字操作数时改写为 ``GT``/``LT``/… 以便与 ``ADD`` 一样支持列名不同之单列面板。 - -启用 packrat 与较高递归深度以减轻深层嵌套调用的解析开销。""" -from pyparsing import Word, alphas, alphanums, infix_notation, opAssoc, one_of, Optional, DelimitedList, Forward, Group -from pyparsing import ParseException -from pyparsing import Regex, Combine, Literal -import sys -import re -import numpy as np -import keyword - -# 引入pyparsing自带的cache功能 -# 加快function_call = var + '(' + Optional(DelimitedList(expr)) + ')'这种嵌套式的pyparsing解析器 -from pyparsing import ParserElement -ParserElement.enable_packrat() - -sys.setrecursionlimit(4000) # 设置更高的递归深度限制 - -# 定义基本元素 -# 变量支持可选的频率后缀:`$close@60m` / `$close@1h` / `$close@1d`;不带 `@` 时默认当前面板(通常为 1m)。 -var = ( - Combine( - Optional(Literal("$")) - + Word(alphas, alphanums + "_") - + Optional(Literal("@") + Word(alphanums + "_")) - ) -).set_name("variable") -# var = Word(alphas, alphanums + "_") - -# 定义数字的正则表达式 -# 正则表达式匹配整数和小数,可以有正负号,以及科学计数法 -number_pattern = r"[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?" -number = Regex(number_pattern) - -# 定义字符串字面量 -# 支持单引号和双引号,包括转义字符 -from pyparsing import QuotedString -string_literal = ( - QuotedString("'", esc_char='\\') | - QuotedString('"', esc_char='\\') -).set_parse_action(lambda t: f"'{t[0]}'") # 统一转换为单引号形式 - -# 定义操作符 -mul_div = one_of("* /", use_regex=True) -add_minus = one_of("+ -") -comparison_op = one_of("> < >= <= == !=") -logical_and = one_of("&& &") -logical_or = one_of("|| |") -conditional_op = ("?", ":") - - -def is_number(s): - try: - float(s) - return True - except ValueError: - return False - -# 展平嵌套的 ParseResults 为字符串 -def flatten_nested_tokens(tokens): - # import pdb; pdb.set_trace() - flattened = [] - for token in tokens: - if isinstance(token, str): - flattened.append(token) - elif isinstance(token, list): - flattened.extend(flatten_nested_tokens(token)) - else: # ParseResults - flattened.extend(flatten_nested_tokens(token.as_list())) - return flattened - - - - -def parse_arith_op(s, loc, tokens): - # tokens[0] 包含整个运算表达式的分解 - # 因为操作符定义为左结合,我们可以从左到右递归处理tokens列表 - def recursive_build_expression(tokens): - if len(tokens) == 3: - A, op, B = tokens - # 构建表达式 - return build_expression(A, op, B) - else: - left = tokens[:-2] - op = tokens[-2] - right = tokens[-1] - left_expr = recursive_build_expression(left) - return build_expression(left_expr, op, right) - - def build_expression(A, op, B): - A = ''.join(flatten_nested_tokens([A])) - B = ''.join(flatten_nested_tokens([B])) - A_is_number = is_number(A) - B_is_number = is_number(B) - - ## 任意一个操作数都是数字 - if A_is_number or B_is_number: - return f"{A}{op}{B}" - - ## 两个操作数都是pd变量 - else: - if op == '+': - return f'ADD({A}, {B})' - # return f'np.add({A}, {B})' - elif op == '-': - return f'SUBTRACT({A}, {B})' - # return f'np.subtract({A}, {B})' - elif op == '*': - return f'MULTIPLY({A}, {B})' - # return f'np.multiply({A}, {B})' - elif op == '/': - return f'DIVIDE({A}, {B})' - # return f'np.divide({A}, {B})' - else: - raise NotImplementedError(f'arith op \'{op}\' is not implemented') - # 操作数2是BENCHMARKINDEX (pd.Series),而操作数1不是BENCHMARKINDEX (pd.Series)的情况下,Series必须要放在第二操作数,否则会报错 - # if 'BENCHMARKINDEX' in A and 'BENCHMARKINDEX' not in B: - # if op == '+': - # return f'({B}).add({A}, axis=0)' - # elif op == '-': - # return f'(-1*{(B)}).add({A}, axis=0)' - # elif op == '*': - # return f'({B}).mul({A}, axis=0)' - # elif op == '/': - # return f'(1/{(B)}).mul({A}, axis=0)' - # else: - # raise NotImplementedError(f'arith op \'{op}\' is not implemented') - # else: - # if op == '+': - # return f'({A}).add({B}, axis=0)' - # elif op == '-': - # return f'({A}).sub({B}, axis=0)' - # elif op == '*': - # return f'({A}).mul({B}, axis=0)' - # elif op == '/': - # return f'({A}).div({B}, axis=0)' - # else: - # raise NotImplementedError(f'arith op \'{op}\' is not implemented') - - return recursive_build_expression(tokens[0]) - - -def parse_cmp_op(s, loc, tokens): - """将比较重写成 ``LT``/``GT``/…,使双列名单列面板可逐元比较(与 ``ADD`` 一致)。""" - - def recursive_build_expression(tokens): - if len(tokens) == 3: - A, op, B = tokens - return build_expression(A, op, B) - left = tokens[:-2] - op = tokens[-2] - right = tokens[-1] - left_expr = recursive_build_expression(left) - return build_expression(left_expr, op, right) - - def build_expression(A, op, B): - A = "".join(flatten_nested_tokens([A])) - B = "".join(flatten_nested_tokens([B])) - A_is_number = is_number(A) - B_is_number = is_number(B) - if A_is_number or B_is_number: - return f"{A}{op}{B}" - op_map = { - ">": "GT", - "<": "LT", - ">=": "GE", - "<=": "LE", - "==": "EQ", - "!=": "NE", - } - if op not in op_map: - raise NotImplementedError(f"cmp op {op!r} is not implemented") - return f"{op_map[op]}({A}, {B})" - - return recursive_build_expression(tokens[0]) - - -# def parse_arith_op(s, loc, tokens): -# A = ''.join(flatten_nested_tokens(tokens[0][0])) -# op = ''.join(flatten_nested_tokens(tokens[0][1])) -# B = ''.join(flatten_nested_tokens(tokens[0][2])) - -# # 检查操作数是否存在 -# if A == '' or B == '': -# raise ParseException(s, loc, f"运算符 '{op}' 缺少操作数") - -# # 检查操作数是否为数字 -# A_is_number = is_number(A) -# B_is_number = is_number(B) - -# # 根据操作数类型选择操作 - -# ## 任意一个操作数都是数字 -# if A_is_number or B_is_number: -# return f"{A}{op}{B}" - -# ## 两个操作数都是pd变量 -# else: -# # 操作数2是BENCHMARKINDEX (pd.Series),而操作数1不是BENCHMARKINDEX (pd.Series)的情况下,Series必须要放在第二操作数,否则会报错 -# if 'BENCHMARKINDEX' in A and 'BENCHMARKINDEX' not in B: -# if op == '+': -# return f'({B}).add({A}, axis=0)' -# elif op == '-': -# return f'(-1*{(B)}).add({A}, axis=0)' -# elif op == '*': -# return f'({B}).mul({A}, axis=0)' -# elif op == '/': -# return f'(1/{(B)}).mul({A}, axis=0)' -# else: -# raise NotImplementedError(f'arith op \'{op}\' is not implemented') -# else: -# if op == '+': -# return f'({A}).add({B}, axis=0)' -# elif op == '-': -# return f'({A}).sub({B}, axis=0)' -# elif op == '*': -# return f'({A}).mul({B}, axis=0)' -# elif op == '/': -# return f'({A}).div({B}, axis=0)' -# else: -# raise NotImplementedError(f'arith op \'{op}\' is not implemented') - - -# 定义条件表达式的解析函数 -def parse_conditional_expression(s, loc, tokens): - A, B, C = tokens[0][0], tokens[0][2], tokens[0][4] - # 将 A, B, C 转换为字符串 - A = ''.join(flatten_nested_tokens(A)) - B = ''.join(flatten_nested_tokens(B)) - C = ''.join(flatten_nested_tokens(C)) - - # 将结果转换为带有datetime和instrument双重索引的Series - return f"pd.Series(np.where({A}, {B}, {C}), index=($close).index)" - -# 定义逻辑运算符的解析函数 -def parse_logical_expression(s, loc, tokens): - # tokens[0] 包含整个表达式的分解,可能包括嵌套的列表 - # 由于操作符定义为左结合,我们可以递归地展开tokens列表 - def recursive_flatten(tokens): - if len(tokens) == 1: - return ''.join(flatten_nested_tokens([tokens[0]])) - else: - left = tokens[0] - operator = tokens[1] - # right = tokens[2] - left_str = ''.join(flatten_nested_tokens([left])) - right_str = recursive_flatten(tokens[2:]) - if operator in ["||", "|"]: - return f"OR({left_str}, {right_str})" - # return f"({left_str}) | ({right_str})" - elif operator in ["&&", "&"]: - return f"AND({left_str}, {right_str})" - # return f"({left_str}) & ({right_str})" - - return recursive_flatten(tokens[0]) - - -# 定义函数调用解析函数 -def parse_function_call(s, loc, tokens): - # unary_operator = tokens[0] - function_name = tokens[0] - arguments = tokens[2:-1] - # import pdb; pdb.set_trace() - - - # 处理参数列表中的每个参数 - arguments_flat = [] - # import pdb; pdb.set_trace() - for arg in arguments: - if isinstance(arg, str): - arguments_flat.append(arg) - else: - # 如果参数是嵌套的表达式或函数调用,递归处理 - flattened_arg = ''.join(flatten_nested_tokens(arg)) - arguments_flat.append(flattened_arg) - arguments_str = ','.join(arguments_flat) - return f"{function_name}({arguments_str})" - -# 先定义一个 Forward 对象以便在定义 function_call 时引用 -expr = Forward() - -# 定义函数调用 -## 定义可选的一元操作符,这里使用 one_of 选择器来匹配 "+" 或 "-" -unary_op = Optional(one_of("+ -")).set_parse_action(lambda t: t[0] if t else '') -function_call = var + '(' + Optional(DelimitedList(expr)) + ')' # 使用 expr -function_call.set_parse_action(parse_function_call) -nested_expr = Group('(' + expr + ')') -# sign_var = unary_op + var - -# 更新操作数,以包含函数调用和字符串字面量 -operand = Group(unary_op + (function_call | var | string_literal | number | nested_expr | expr)) - -# unary_operand = one_of("+ -") + operand -# unary_operand.set_parse_action(lambda tokens: ''.join(tokens)) -# operand = (unary_operand | function_call | var | number ) - -# 使用新的 flatten_nested_tokens 函数 -def parse_entire_expression(s, loc, tokens): - # import pdb; pdb.set_trace() - return ''.join(flatten_nested_tokens(tokens)) - - -def check_for_invalid_operators(expression): - valid_operators = {"(", ")", ",", "+", "-", "*", "/", "&&", "||", "&", "|", ">", "<", ">=", "<=", "==", "!=", "?", ":", ".", "\'", "\""} - # 使用正则表达式查找所有的运算符,但排除字符串内容 - # 先移除字符串字面量,避免误判 - import re - expr_without_strings = re.sub(r"'[^']*'", '', expression) # 移除单引号字符串 - expr_without_strings = re.sub(r'"[^"]*"', '', expr_without_strings) # 移除双引号字符串 - # 先移除形如 `$name@60m` 的频率后缀整体,避免 `@` 被误识别为非法运算符 - expr_without_strings = re.sub(r"\$?[A-Za-z_][A-Za-z0-9_]*@[A-Za-z0-9_]+", "", expr_without_strings) - - pattern = r'([+\-*/,><=!&|^`~@#%\\;{}[\]"\'\\]+)' # ([|&=]{3,})| - found_operators_tuples = re.findall(pattern, expr_without_strings) - found_operators = [operator for tup in found_operators_tuples for operator in tup if operator] - invalid_operators = set(found_operators) - valid_operators - - if invalid_operators: - raise Exception(f"无效的运算符: \"{''.join(invalid_operators)}\"") - - -# 现在更新 expr 的定义 -expr <<= infix_notation(operand, - [ - (mul_div, 2, opAssoc.LEFT, parse_arith_op), - (add_minus, 2, opAssoc.LEFT, parse_arith_op), - (comparison_op, 2, opAssoc.LEFT, parse_cmp_op), - (logical_and, 2, opAssoc.LEFT, parse_logical_expression), - (logical_or, 2, opAssoc.LEFT, parse_logical_expression), - (conditional_op, 3, opAssoc.RIGHT, parse_conditional_expression) - ]) - - -def check_parentheses_balance(expr): - if expr.count('(') != expr.count(')'): - raise ParseException(f"表达式括号未闭合") - -# 定义整个表达式的解析规则 -expr.set_parse_action(parse_entire_expression) # check_parentheses_balance, -# expr.setDebug() - -def parse_expression(factor_expression, verbose=False): - try: - check_parentheses_balance(factor_expression) - check_for_invalid_operators(factor_expression) - if verbose: - print("因子表达式: ", factor_expression) - - parsed_data_function = expr.parse_string(factor_expression)[0] - return parsed_data_function - except Exception as e: - raise Exception(f"表达式`{factor_expression}`解析失败: {e}") - - - -def dollar_ref_to_pyname(name: str) -> str: - """把 DSL 中 `$field[@freq]` 形式的变量引用转成合法的 Python 标识符。 - - 规则: - - `$close` -> `close` - - `$adj_close@60m` -> `adj_close__60m` - - 不带 `$` 也允许(某些内部再次规范化场景)。 - - 约定用双下划线作为频率分隔符;真实列名中出现 `__` 的极少见场景由调用方保证不冲突。 - """ - n = name.lstrip("$") - if "@" in n: - base, freq = n.split("@", 1) - freq = re.sub(r"[^A-Za-z0-9_]", "_", freq) - return f"{base}__{freq}" - return n - - -def parse_symbol(expr, columns): - keyword_map = { - "TRUE": "True", - "true": "True", - "FALSE": "False", - "false": "False", - "NAN": "np.nan", - "NaN": "np.nan", - "nan": "np.nan", - "NULL": "np.nan", - "null": "np.nan", - } - - # 先替换 `$列名[@freq]` -> 目标 Python 标识符;按长度降序处理,避免 `$x` 先匹配导致 `$x@60m` 破碎。 - # 列名以 `$` 前缀起头,不会与普通标识符冲突,可直接做字符串替换。 - col_items = [(col, dollar_ref_to_pyname(col)) for col in columns] - col_items.sort(key=lambda kv: -len(kv[0])) - for col, var_df in col_items: - expr = expr.replace(col, var_df) - - # 关键字替换必须带词边界:否则形如 `dominant` / `tenant` / `null_bar` 这类普通标识符 - # 里的子串 `nan` / `null` 会被错误改写成 `np.nan`,在后续 exec 阶段导致 invalid syntax。 - # 字符串字面量内的关键字不替换,避免破坏 CAST(..., 'float64') 之类的参数。 - def _replace_keyword_safe(s: str, kw: str, val: str) -> str: - pattern = re.compile(r"(? str: - """去掉从 `#` 到行尾的注释;字符串字面量(单/双引号,支持 `\\` 转义)内的 `#` 保留。""" - out: list[str] = [] - i = 0 - in_squote = False - in_dquote = False - escape = False - while i < len(line): - c = line[i] - if escape: - out.append(c) - escape = False - i += 1 - continue - if in_squote or in_dquote: - if c == "\\": - out.append(c) - escape = True - i += 1 - continue - if c == "'" and in_squote: - in_squote = False - elif c == '"' and in_dquote: - in_dquote = False - out.append(c) - i += 1 - continue - if c == "'": - in_squote = True - out.append(c) - i += 1 - continue - if c == '"': - in_dquote = True - out.append(c) - i += 1 - continue - if c == "#": - break - out.append(c) - i += 1 - return "".join(out).strip() - - -def parse_multi_line_expression(multi_line_expr, verbose=False): - """ - 解析多行表达式,支持中间变量赋值 - - 例如: - a=(RANK(SLOPE($amount/$volume, 5)) > 0.104) - b=(RANK(SLOPE($amount/$volume, 90)) > 0.104) - c=a&b?RANK(CS_NEUTRALIZE($ret, LOG($float_cap))) : nan - - 返回一个 Python 代码字符串,使用原生变量存储中间结果,避免重复计算 - - 中间变量命名要求: - 1. 基本规则: - - 必须以字母(a-z, A-Z)或下划线(_)开头 - - 后面可以跟字母、数字(0-9)或下划线 - - 正则表达式: [a-zA-Z_][a-zA-Z0-9_]* - - 2. 应该避免的命名: - - Python关键字: if, for, and, or, True, False, None, def, class 等 - - 函数库函数名: RANK, SLOPE, ADD, SUBTRACT, MULTIPLY, DIVIDE, AND, OR 等 - - 数据列名(带$前缀): $amount, $volume 等(不会冲突,因为带$前缀) - - 3. 推荐命名风格: - - 小写字母: a, b, c, temp, result - - 下划线分隔: my_var, temp_result - - 避免使用大写(可能与函数名冲突) - - 参数: - multi_line_expr: 多行表达式字符串 - - 返回: - Python 代码字符串,可以直接 eval() 执行 - - 支持 ``#`` 行注释:从 ``#`` 到行尾(字符串字面量内除外);仅整行注释的行会被跳过。 - """ - lines = [] - for raw in multi_line_expr.strip().split("\n"): - cleaned = _strip_hash_comment_from_line(raw) - if cleaned: - lines.append(cleaned) - - if not lines: - raise Exception("表达式为空") - - # 识别变量赋值语句和最终表达式 - assignments = [] # 存储 (var_name, expression) 元组 - final_expr = None - - for i, line in enumerate(lines): - # 检查是否是赋值语句 (var=...) - # 使用正则表达式匹配 var=... 的模式 - # 注意:需要处理括号,因为表达式可能包含括号 - assignment_match = re.match(r'^([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*(.+)$', line) - - if assignment_match: - var_name = assignment_match.group(1) - expr_str = assignment_match.group(2).strip() - # 移除表达式两端的括号(如果存在且括号匹配) - # 注意:只有当整个表达式被括号包裹时才移除,例如 (A) 而不是 (A) & (B) - if expr_str.startswith('(') and expr_str.endswith(')'): - # 检查括号是否匹配,并且整个表达式被括号包裹 - # 从第二个字符开始查找,如果找到匹配的右括号在最后,说明整个表达式被括号包裹 - paren_count = 0 - should_remove = False - for j, char in enumerate(expr_str): - if char == '(': - paren_count += 1 - elif char == ')': - paren_count -= 1 - if paren_count == 0: - # 如果第一个左括号在位置0,匹配的右括号在最后,说明整个表达式被括号包裹 - if j == len(expr_str) - 1: - should_remove = True - break - if should_remove: - expr_str = expr_str[1:-1] - - # 如果是最后一行,且没有其他非赋值语句,则作为最终表达式 - if i == len(lines) - 1 and final_expr is None: - # 最后一个赋值语句作为最终表达式 - final_expr = line # 保留完整的赋值语句,后续会提取右侧表达式 - else: - assignments.append((var_name, expr_str)) - else: - # 如果不是赋值语句,则作为最终表达式 - if final_expr is None: - final_expr = line - else: - # 如果已经有最终表达式,则追加(可能是多行表达式) - final_expr += '\n' + line - - # 如果没有找到赋值语句,则整个表达式就是最终表达式 - if not assignments and final_expr: - return parse_expression(final_expr.strip(), verbose=verbose) - - # 如果最终表达式是赋值语句(如 c=...),提取右侧表达式 - if final_expr and '=' in final_expr: - final_expr_match = re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*\s*=\s*(.+)$', final_expr) - if final_expr_match: - final_expr = final_expr_match.group(1).strip() - # 移除表达式两端的括号(如果存在) - if final_expr.startswith('(') and final_expr.endswith(')'): - if final_expr.count('(') == final_expr.count(')'): - final_expr = final_expr[1:-1] - - # 如果没有最终表达式,则最后一个赋值的结果就是最终结果 - if not final_expr and assignments: - final_expr = assignments[-1][0] - assignments = assignments[:-1] - - if not final_expr: - raise Exception("未找到最终表达式") - - # 收集所有中间变量名 - intermediate_vars = {var_name for var_name, _ in assignments} - - # 验证中间变量名 - reserved_names = set(keyword.kwlist) # Python关键字 - # 添加一些常见的函数名(这些在函数库中定义) - reserved_names.update(['RANK', 'CS_ZSCORE', 'CS_DEMEAN', 'CS_WINSORIZE', 'CS_BUCKET', 'CS_NEUTRALIZE', - 'SLOPE', 'ADD', 'SUBTRACT', 'MULTIPLY', 'DIVIDE', - 'AND', 'OR', 'MAX', 'MIN', 'MEAN', 'STD', 'ABS', 'SIGN', 'CAST', - 'DELTA', 'DELAY', 'EMA', 'SMA', 'TS_MAX', 'TS_MIN', 'TS_MEAN', - 'np', 'pd', 'df']) - - for var_name in intermediate_vars: - if var_name in reserved_names: - raise Exception(f"中间变量名 '{var_name}' 是保留名称(Python关键字或函数名),请使用其他名称") - - # 解析每个赋值语句的表达式 - parsed_assignments = [] - for var_name, expr_str in assignments: - try: - parsed_expr = parse_expression(expr_str, verbose=verbose) - parsed_assignments.append((var_name, parsed_expr)) - except Exception as e: - raise Exception(f"解析变量 {var_name} 的表达式失败: {e}") - - # 解析最终表达式 - # 对于最终表达式,我们需要特殊处理中间变量名 - # 策略:先尝试解析,如果失败,则手动处理中间变量名 - - # 检查最终表达式中是否包含中间变量名 - has_intermediate_vars = any(re.search(r'\b' + re.escape(var_name) + r'\b', final_expr) for var_name in intermediate_vars) - - if has_intermediate_vars: - # 如果包含中间变量,我们需要手动处理 - # 将中间变量名替换为临时标识符,解析后再替换回来 - # 使用 $ 前缀的临时变量名,这样解析器会将其识别为变量 - var_temp_map = {} - temp_final_expr = final_expr - for var_name in intermediate_vars: - if re.search(r'\b' + re.escape(var_name) + r'\b', temp_final_expr): - # 对于下划线开头的变量,需要特殊处理 - if var_name.startswith('_'): - temp_var = f"$TEMP{var_name}" - else: - temp_var = f"${var_name}_temp" - var_temp_map[temp_var] = var_name - temp_final_expr = re.sub(r'\b' + re.escape(var_name) + r'\b', temp_var, temp_final_expr) - - try: - parsed_final_expr = parse_expression(temp_final_expr) - # 将临时变量名替换回中间变量名 - for temp_var, var_name in var_temp_map.items(): - parsed_final_expr = parsed_final_expr.replace(temp_var, var_name) - except Exception as e: - # 如果解析失败,尝试直接使用中间变量名(不解析) - # 这种情况下,最终表达式可能包含未解析的部分 - raise Exception(f"解析最终表达式失败: {e}") - else: - # 如果不包含中间变量,正常解析 - try: - parsed_final_expr = parse_expression(final_expr) - except Exception as e: - raise Exception(f"解析最终表达式失败: {e}") - - # 构建 Python 代码 - # 使用原生变量存储中间结果,避免重复计算 - code_lines = [] - - # 按顺序执行赋值语句 - for var_name, parsed_expr in parsed_assignments: - code_lines.append(f"{var_name} = {parsed_expr}") - - # 最后返回最终表达式的结果 - code_lines.append(parsed_final_expr) - - # 将所有代码行合并,用换行符连接 - return '\n'.join(code_lines) - - -if __name__ == '__main__': - # 多行表达式示例(勿用变量名 expr,否则会覆盖模块顶层的 Forward 解析器) - multi_line_sample = """ - x = DELTA($open, 1) - y = RANK(x - $close) - y / (1e-8 + 1) - """ - print(parse_multi_line_expression(multi_line_sample)) \ No newline at end of file diff --git a/seekalpha/dsl/core/resample.py b/seekalpha/dsl/core/resample.py deleted file mode 100644 index 25d6ab87..00000000 --- a/seekalpha/dsl/core/resample.py +++ /dev/null @@ -1,344 +0,0 @@ -"""多周期变换层:支持把细粒度行情聚合到更粗 bar,并提供 60m 无前视广播工具。 - -设计要点 --------- -- 输入输出既支持 ``MultiIndex(datetime, instrument)`` 面板,也支持长表 ``datetime`` / ``dominant_id``。 -- 按**自然整点**切桶,不处理夜盘/午休边界(与仓库现状一致,后续可另起 session-aware 版本)。 -- 对离线合成更粗粒度行情时,可选 ``strict_complete_bars=True`` 要求每个桶内必须凑齐完整 bar 数。 -- 粗周期 ``adj_vwap`` 为桶内根 bar 上 ``adj_vwap`` 的成交量加权平均;未调整 ``vwap`` 仍为成交额/成交量。 -- 60m 面板的 ``datetime`` 索引使用桶起点时刻(``HH:00``),便于在 60m 上继续使用现有按 - 行滚动的算子。广播阶段独立完成“右闭/左开 + 向后移 1 小时”的无前视规则。 -""" -from __future__ import annotations - -from typing import Iterable, Optional - -import numpy as np -import pandas as pd - -from .intervals import bar_interval_to_minutes, normalize_bar_interval - - -# 不同字段的默认聚合规则;未列出的列使用 ``last``。 -_AGG_FIRST = {"open", "adj_open", "today_open"} -_AGG_MAX = {"high", "adj_high", "today_high"} -_AGG_MIN = {"low", "adj_low", "today_low"} -_AGG_LAST = {"close", "adj_close"} -_AGG_SUM = {"volume", "total_turnover"} - - -def _select_numeric_columns( - panel_1m: pd.DataFrame, - include: Optional[Iterable[str]] = None, -) -> list[str]: - if include is not None: - cols = [c for c in include if c in panel_1m.columns] - else: - cols = list(panel_1m.columns) - numeric = [c for c in cols if pd.api.types.is_numeric_dtype(panel_1m[c])] - return numeric - - -def _aggregation_rule_for(col: str) -> str: - if col in _AGG_FIRST: - return "first" - if col in _AGG_MAX: - return "max" - if col in _AGG_MIN: - return "min" - if col in _AGG_SUM: - return "sum" - if col in _AGG_LAST: - return "last" - # 默认 VWAP/价格型字段保留最后一笔;成交类字段已在上面覆盖。 - return "last" - - -def _safe_divide(num: pd.Series, den: pd.Series) -> pd.Series: - num_arr = pd.to_numeric(num, errors="coerce").to_numpy(dtype=float, copy=False) - den_arr = pd.to_numeric(den, errors="coerce").to_numpy(dtype=float, copy=False) - out = np.full(len(num_arr), np.nan, dtype=float) - mask = np.isfinite(num_arr) & np.isfinite(den_arr) & (den_arr != 0.0) - out[mask] = num_arr[mask] / den_arr[mask] - return pd.Series(out, index=num.index, dtype=float) - - -def _empty_panel() -> pd.DataFrame: - return pd.DataFrame( - index=pd.MultiIndex.from_arrays( - [pd.DatetimeIndex([], name="datetime"), pd.Index([], name="instrument")] - ) - ) - - -def _expected_rows_per_bucket(base_interval: str | int, target_interval: str | int) -> int: - base_minutes = bar_interval_to_minutes(base_interval) - target_minutes = bar_interval_to_minutes(target_interval) - if target_minutes % base_minutes != 0: - raise ValueError( - f"目标周期 {target_interval!r} 不是基础周期 {base_interval!r} 的整数倍" - ) - return int(target_minutes // base_minutes) - - -def _pandas_floor_rule(interval: str | int) -> str: - tag = normalize_bar_interval(interval) - if tag.endswith("m"): - return f"{tag[:-1]}min" - if tag.endswith("d"): - return f"{tag[:-1]}D" - return tag - - -def build_timeframe_panel( - panel: pd.DataFrame, - *, - target_interval: str = "60m", - base_interval: str = "1m", - columns: Optional[Iterable[str]] = None, - strict_complete_bars: bool = False, -) -> pd.DataFrame: - """把面板聚合为更粗周期(按自然边界切桶)。 - - Parameters - ---------- - panel: - ``MultiIndex(datetime, instrument)`` 面板;列为特征列。 - target_interval: - 目标周期,如 ``5m`` / ``60m``。 - base_interval: - 当前面板的基础周期;用于 ``strict_complete_bars`` 时判断桶是否完整。 - columns: - 若指定,则只聚合这些列(可用于减轻内存压力);非数值列会被跳过。 - strict_complete_bars: - 为 True 时,仅保留包含完整基础 bar 数的桶。 - - Notes - ----- - 粗周期 ``adj_vwap``:桶内分钟(根 bar)``adj_vwap`` 的**成交量加权平均** - ``sum(adj_vwap * volume) / sum(volume)``,仅计入 ``adj_vwap`` 与 ``volume`` 均有限且 - ``volume > 0`` 的行;若分母为 0 则为 NaN。未调整 ``vwap`` 仍为 ``sum(turnover)/sum(volume)``。 - - Returns - ------- - panel_target: - ``MultiIndex(datetime, instrument)`` 的聚合面板,``datetime`` 为桶起点。 - """ - if not isinstance(panel.index, pd.MultiIndex): - raise ValueError("panel 必须是 (datetime, instrument) MultiIndex 面板") - if panel.index.names[:2] != ["datetime", "instrument"]: - raise ValueError("panel 的索引层必须依次为 datetime、instrument") - - target_rule = normalize_bar_interval(target_interval) - floor_rule = _pandas_floor_rule(target_rule) - expected_rows = _expected_rows_per_bucket(base_interval, target_rule) - use_cols = _select_numeric_columns(panel, include=columns) - if not use_cols: - return _empty_panel() - requested_cols = list(use_cols) - needs_adj_close_for_ret = "ret" in use_cols and "adj_close" in panel.columns - if needs_adj_close_for_ret and "adj_close" not in use_cols: - use_cols.append("adj_close") - - df = panel[use_cols].reset_index() - df["__bucket__"] = df["datetime"].dt.floor(floor_rule) - - agg_map = {c: _aggregation_rule_for(c) for c in use_cols} - if strict_complete_bars: - counts = ( - df.groupby(["instrument", "__bucket__"], sort=True)["datetime"] - .size() - .rename("__n_rows__") - ) - valid = counts[counts >= expected_rows].index - if len(valid) == 0: - return _empty_panel() - df = ( - df.set_index(["instrument", "__bucket__"]) - .loc[valid] - .reset_index() - ) - grouped = df.groupby(["instrument", "__bucket__"], sort=True).agg(agg_map) - - # VWAP(未调整):桶内成交额 / 成交量。 - if {"volume", "total_turnover"}.issubset(df.columns): - vol_sum = grouped["volume"] if "volume" in grouped.columns else None - turnover_sum = grouped["total_turnover"] if "total_turnover" in grouped.columns else None - if vol_sum is not None and turnover_sum is not None: - vwap_agg = _safe_divide(turnover_sum, vol_sum) - if "vwap" in use_cols: - grouped["vwap"] = vwap_agg - - # 调整后 VWAP:桶内分钟 adj_vwap 的成交量加权平均(不依赖未调整 vwap 列)。 - if "adj_vwap" in use_cols and "volume" in df.columns: - av = pd.to_numeric(df["adj_vwap"], errors="coerce").to_numpy(dtype=float, copy=False) - vol = pd.to_numeric(df["volume"], errors="coerce").to_numpy(dtype=float, copy=False) - mask = np.isfinite(av) & np.isfinite(vol) & (vol > 0.0) - vw_num = np.where(mask, av * vol, 0.0) - vw_den = np.where(mask, vol, 0.0) - acc = df[["instrument", "__bucket__"]].copy() - acc["__adj_vw_n"] = vw_num - acc["__adj_vw_d"] = vw_den - vw_part = acc.groupby(["instrument", "__bucket__"], sort=True)[ - ["__adj_vw_n", "__adj_vw_d"] - ].sum() - grouped["adj_vwap"] = _safe_divide(vw_part["__adj_vw_n"], vw_part["__adj_vw_d"]) - if "ret" in grouped.columns and "adj_close" in grouped.columns: - grouped["ret"] = grouped["adj_close"].groupby(level="instrument").pct_change() - if needs_adj_close_for_ret: - grouped = grouped[requested_cols] - # grouped.index 是 (instrument, bucket);改回 (datetime, instrument) 并排序。 - grouped.index = grouped.index.set_names(["instrument", "datetime"]) - grouped = grouped.swaplevel("instrument", "datetime").sort_index() - grouped.index = grouped.index.set_names(["datetime", "instrument"]) - return grouped - - -def build_60m_panel( - panel: pd.DataFrame, - *, - columns: Optional[Iterable[str]] = None, - base_interval: str = "1m", - strict_complete_bars: bool = False, -) -> pd.DataFrame: - """把**主条行情**面板聚合为 60m 面板(按自然整点切桶)。 - - ``panel`` 的 bar 可以来自 1m、5m 等与数据集一致的任一格律;`datetime` 仍按根 bar - 起点对齐。聚合规则见 ``build_timeframe_panel``(OHLCV 等按桶 first/max/min/last/sum)。 - - 参数 ``base_interval`` 须与 ``panel`` 实际根周期一致。它**仅**在 - ``strict_complete_bars=True`` 时参与「每桶是否凑满预期根数」的过滤;**当前默认** - ``strict_complete_bars=False`` 时,无论写成 ``1m`` 还是 ``5m``,聚合路径都只对行做 - ``floor`` 到 60m 桶再 ``groupby``,**不因写错 base_interval 而改变数值**。 - - 若主数据是 5m 而不是 1m,60m K 线是由 5m bar 聚出来的;与从底层 1m 再聚 60m 相比, - 高低价等在理论上可能更粗(缺分钟内极值)。本仓库评估管线里应对 ``base_interval`` 与 - 数据集的 ``bar_interval`` 保持一致,以便日后若开启「整桶才保留」时语义正确。""" - return build_timeframe_panel( - panel, - target_interval="60m", - base_interval=base_interval, - columns=columns, - strict_complete_bars=strict_complete_bars, - ) - - -def resample_universe_long( - df: pd.DataFrame, - *, - target_interval: str, - base_interval: str = "1m", - strict_complete_bars: bool = False, - symbol_col: str = "dominant_id", -) -> pd.DataFrame: - """把长表行情聚合到更粗粒度。 - - 返回列仍为长表风格:至少包含 ``datetime`` 与 ``dominant_id``,便于直接写 parquet / sqlite。 - """ - if df is None or df.empty: - return pd.DataFrame(columns=["datetime", symbol_col]) - if "datetime" not in df.columns: - raise ValueError("行情长表需含 datetime 列") - if symbol_col not in df.columns: - raise ValueError(f"行情长表需含 {symbol_col} 列") - - frame = df.copy() - frame["instrument"] = frame[symbol_col].astype(str) - idx_cols = ["datetime", "instrument"] - feature_cols = [c for c in frame.columns if c not in idx_cols and c != symbol_col] - panel = frame.set_index(idx_cols)[feature_cols].sort_index() - out = build_timeframe_panel( - panel, - target_interval=target_interval, - base_interval=base_interval, - strict_complete_bars=strict_complete_bars, - ) - long = out.reset_index().rename(columns={"instrument": symbol_col}) - return long - - -def broadcast_timeframe_to_main_freq( - values: pd.DataFrame, - target_index: pd.MultiIndex, - target_interval: str, -) -> pd.DataFrame: - """把辅周期 ``values`` 无前视地广播到**主频率** ``target_index``(与 ``broadcast_60m_to_main_freq`` 同 - 一语义,但完成时间 = 桶起点 + 该辅周期整段长度)。 - - Parameters - ---------- - values: - ``MultiIndex(datetime, instrument)`` 面板,``datetime`` 为辅周期桶**起点**(与 - ``build_timeframe_panel`` 输出一致)。 - target_index: - 主面板的 ``MultiIndex(datetime, instrument)``。 - target_interval: - 辅周期,如 ``5m`` / ``10m`` / ``60m`` / ``1h``(经 ``normalize_bar_interval`` 归一)。 - """ - if not isinstance(target_index, pd.MultiIndex): - raise ValueError("target_index 必须是 (datetime, instrument) MultiIndex") - if target_index.names[:2] != ["datetime", "instrument"]: - raise ValueError("target_index 的索引层必须依次为 datetime、instrument") - - tag = normalize_bar_interval(target_interval) - bar_minutes = int(bar_interval_to_minutes(tag)) - completion = pd.Timedelta(minutes=bar_minutes) - - cols = list(values.columns) - if values.empty or not cols: - return pd.DataFrame( - np.nan, - index=target_index, - columns=cols, - ) - - # 右表:桶起点 + 整段 bar 长 = 该桶「完成时间」,作为 merge_asof 的 on 键(无前视)。 - src = values.reset_index() - src["datetime"] = src["datetime"] + completion - src = src.sort_values(["datetime", "instrument"], kind="mergesort").reset_index( - drop=True - ) - - # 左表:把 target_index 展开并附带原始行号,以便合并后恢复原顺序。 - tgt = pd.DataFrame( - {"__row__": np.arange(len(target_index), dtype=np.int64)}, - index=target_index, - ).reset_index() - tgt_sorted = tgt.sort_values( - ["datetime", "instrument"], kind="mergesort" - ).reset_index(drop=True) - - # 按 instrument 分组的 asof backward:在同一 instrument 内找最近一根"完成时间 <= t"的 bar。 - merged = pd.merge_asof( - tgt_sorted, - src, - on="datetime", - by="instrument", - direction="backward", - allow_exact_matches=True, - ) - merged = merged.sort_values("__row__", kind="mergesort") - - out = pd.DataFrame( - merged[cols].to_numpy(), - index=target_index, - columns=cols, - ) - return out - - -def broadcast_60m_to_main_freq( - values_60m: pd.DataFrame, - target_index: pd.MultiIndex, -) -> pd.DataFrame: - """兼容旧名:等价于 ``broadcast_timeframe_to_main_freq(..., target_interval=\"60m\")``。""" - return broadcast_timeframe_to_main_freq(values_60m, target_index, "60m") - - -__all__ = [ - "broadcast_60m_to_main_freq", - "broadcast_timeframe_to_main_freq", - "build_60m_panel", - "build_timeframe_panel", - "resample_universe_long", -] diff --git a/seekalpha/dsl/eval.py b/seekalpha/dsl/eval.py deleted file mode 100644 index 0aadba5f..00000000 --- a/seekalpha/dsl/eval.py +++ /dev/null @@ -1,656 +0,0 @@ -"""多行因子表达式求值:调用 dsl.parser 将用户文本解析为 Python 代码串,将 $列名 解析为注入的面板列,在 function_registry 命名空间中顺序 exec 赋值行并对最终表达式 eval。 - -失败时抛出 MultiLineFactorEvalError,附带生成代码行号与用户源码行映射。 - -多周期扩展:DSL 支持 ``$field@5m`` / ``@10m`` / ``@60m`` / ``@1h`` / ``@1d`` 等(与 ``data.datasets.normalize_bar_interval`` -一致)。每条赋值语句按引用列的频率分派: - -- **纯辅周期行**(本行只含某单一 ``@`` 列/中间变量)-> 在该辅周期面板上直接运算; -- **多辅频无主频行**(本行引用多种辅周期列/中间变量,且不含主频列)-> 与混合行相同:各辅频先广播到 - 主频索引再求值; -- **混合**(本行同时含主频列与一或多个辅周期列/中间变量,或仅主频列)-> 主频行上求值,辅周期先按 - 「最近一根已完成 bar」广播到主频索引(见 ``data.resample.broadcast_timeframe_to_main_freq``); -- 中间变量保留各自频率,被跨频引用时再按需广播。 - -``eval_multi_line_factor`` 可传 ``aux_panels={"5m": df5, "60m": df60}`` 预建辅表,缺省由主表 -``build_timeframe_panel`` 现算。兼容旧参 ``df_60m=...`` 等价于 ``aux_panels`` 中 ``"60m"`` 一项。 - -窗口语义:``TS_*`` 在**纯**辅周期行上时 ``N`` 为**该频**的 bar 数;**混合**行中若将 ``TS_*(...@f, N)`` -与主频列写在同一行,``@f`` 会先被广播,``N`` 会表现为主频 bar 数,需拆多行见提示词/文档。 -""" -from __future__ import annotations - -import re -import traceback -import warnings -from typing import Any, Dict, List, Mapping, MutableMapping, Optional, Sequence, Tuple - -import numpy as np -import pandas as pd - -from seekalpha.dsl.core.errors import MultiLineFactorEvalError -from seekalpha.dsl.core.parser import ( - dollar_ref_to_pyname, - parse_multi_line_expression, - parse_symbol, -) -from seekalpha.dsl.stock.aux_cache import get_or_build_aux_panel -from seekalpha.dsl.stock.intervals import normalize_bar_interval -from seekalpha.dsl.stock.resample import broadcast_timeframe_to_main_freq -from seekalpha.dsl.registry import build_operator_namespace, resolve_extension_modules - -# 股票日频默认主周期 -DEFAULT_BASE_INTERVAL = "1d" - - -def _function_lib_namespace( - operator_modules: Sequence[str] | None = None, -) -> Dict[str, Any]: - return build_operator_namespace( - extension_modules=resolve_extension_modules(operator_modules), - ) - - -def _dollar_columns(df: pd.DataFrame) -> List[str]: - return ["$" + str(c) for c in df.columns] - - -def _dollar_columns_aux(panel: Optional[pd.DataFrame], tag: str) -> List[str]: - if panel is None or getattr(panel, "empty", True): - return [] - t = normalize_bar_interval(tag) - return [f"${c}@{t}" for c in panel.columns] - - -# ``$name@5m`` 等,捕获频率片段并归一化(跳过字面量后扫描) -_DOLLAR_AT_REF_RE = re.compile(r"\$[A-Za-z_][A-Za-z0-9_]*@([A-Za-z0-9_]+)\b") - - -def collect_aux_intervals_from_expr(multi_line_expr: str) -> List[str]: - """从 DSL 中收集 ``$x@`` 里的 ````,归一化、保序、去重。""" - s = _strip_string_literals(multi_line_expr) - seen: set[str] = set() - out: List[str] = [] - for m in _DOLLAR_AT_REF_RE.finditer(s): - raw = m.group(1) - try: - tag = normalize_bar_interval(raw) - except ValueError as exc: - raise ValueError( - f"表达式含不支持的辅频 @{raw!r}(股票仅支持 @1d / @1w)" - ) from exc - if tag not in seen: - seen.add(tag) - out.append(tag) - return out - - -def _column_bindings(df: pd.DataFrame) -> Dict[str, pd.DataFrame]: - return {str(c): df[[c]] for c in df.columns} - - -_IDENT_RE = re.compile(r"\b[A-Za-z_][A-Za-z0-9_]*\b") -_ASSIGN_RE = re.compile(r"^([A-Za-z_][A-Za-z0-9_]*)\s*=\s*(.+)$") - - -def _strip_string_literals(s: str) -> str: - out = re.sub(r"'[^']*'", "", s) - out = re.sub(r'"[^"]*"', "", out) - return out - - -def _extract_identifier_refs(rhs: str, known: set) -> List[str]: - cleaned = _strip_string_literals(rhs) - return [t for t in _IDENT_RE.findall(cleaned) if t in known] - - -def _split_assignment(line: str) -> Tuple[Optional[str], str]: - m = _ASSIGN_RE.match(line) - if m: - return m.group(1), m.group(2).strip() - return None, line - - -def _split_generated_lines(code: str) -> List[str]: - return [ln.strip() for ln in code.strip().split("\n") if ln.strip()] - - -def _nonblank_user_lines(multi_line_expr: str) -> List[Tuple[int, str]]: - out: List[Tuple[int, str]] = [] - for i, line in enumerate(multi_line_expr.splitlines(), start=1): - s = line.strip() - if s: - out.append((i, s)) - return out - - -def _align_user_to_generated( - generated_line_no: int, - n_generated: int, - multi_line_expr: str, -) -> Tuple[Optional[int], Optional[str]]: - user = _nonblank_user_lines(multi_line_expr) - if not user or n_generated < 1: - return None, None - if len(user) != n_generated: - return None, None - if not (1 <= generated_line_no <= len(user)): - return None, None - lineno, text = user[generated_line_no - 1] - return lineno, text - - -def _problem_from_exception(exc: BaseException) -> Tuple[str, str]: - tname = type(exc).__name__ - if isinstance(exc, NameError): - name = getattr(exc, "name", None) - if not name: - m = re.search(r"name ['\"](\w+)['\"] is not defined", str(exc)) - name = m.group(1) if m else None - if name: - return tname, f"未定义的名称 {name!r}(缺少函数/变量或列未注入)" - return tname, str(exc) or repr(exc) - if isinstance(exc, (TypeError, ValueError, ZeroDivisionError)): - return tname, str(exc) or repr(exc) - if isinstance(exc, KeyError): - return tname, f"键不存在或列缺失: {exc!r}" - if isinstance(exc, AssertionError): - return tname, str(exc) or "断言失败" - return tname, str(exc) or repr(exc) - - -def _infer_user_line_parse_phase(multi_line_expr: str, exc: Exception) -> Tuple[Optional[int], Optional[str]]: - msg = str(exc) - user_lines = _nonblank_user_lines(multi_line_expr) - if not user_lines: - return None, None - - m = re.search(r"解析变量\s+(\w+)\s+的表达式失败", msg) - if m: - var = m.group(1) - for lineno, text in user_lines: - if re.match(rf"^{re.escape(var)}\s*=", text): - return lineno, text - return None, None - - if "解析最终表达式失败" in msg or "未找到最终表达式" in msg: - lineno, text = user_lines[-1] - return lineno, text - - m = re.search(r"表达式`([^`]+)`解析失败", msg) - if m: - snippet = m.group(1).strip() - for lineno, text in user_lines: - if snippet in text.replace(" ", "") or snippet in text: - return lineno, text - for lineno, text in user_lines: - if snippet in multi_line_expr: - return lineno, text - - return None, None - - -def _build_eval_error( - phase: str, - exc: BaseException, - *, - generated_code: str, - generated_line_no: Optional[int], - generated_line_text: Optional[str], - multi_line_expr: str, -) -> MultiLineFactorEvalError: - et, problem = _problem_from_exception(exc) - tb = traceback.format_exc() - n_gen = len([ln for ln in generated_code.strip().split("\n") if ln.strip()]) - u_no, u_text = (None, None) - if generated_line_no is not None and n_gen > 0: - u_no, u_text = _align_user_to_generated( - generated_line_no, n_gen, multi_line_expr - ) - summary = f"{phase} 阶段失败: {problem}" - return MultiLineFactorEvalError( - summary, - phase=phase, - problem=problem, - exception_type=et, - generated_code=generated_code, - generated_line_no=generated_line_no, - generated_line_text=generated_line_text, - user_source=multi_line_expr, - user_line_no=u_no, - user_line_text=u_text, - traceback_text=tb, - ) - - -def compile_multi_line_factor( - multi_line_expr: str, - *, - columns: Optional[Sequence[str]] = None, - verbose: bool = False, -) -> str: - try: - raw = parse_multi_line_expression(multi_line_expr, verbose=verbose) - except Exception as e: - u_no, u_text = _infer_user_line_parse_phase(multi_line_expr, e) - et, problem = _problem_from_exception(e) - tb = traceback.format_exc() - raise MultiLineFactorEvalError( - f"parse 阶段失败: {problem}", - phase="parse", - problem=problem, - exception_type=et, - user_source=multi_line_expr, - user_line_no=u_no, - user_line_text=u_text, - traceback_text=tb, - ) from e - - if not columns: - return raw - - try: - return parse_symbol(raw, list(columns)) - except Exception as e: - et, problem = _problem_from_exception(e) - tb = traceback.format_exc() - raise MultiLineFactorEvalError( - f"symbol 阶段失败: {problem}", - phase="symbol", - problem=problem, - exception_type=et, - generated_code=raw, - user_source=multi_line_expr, - traceback_text=tb, - ) from e - - -SCOPE_MAIN = "main" - -# 混频下生成码里辅周期列名为 ``col__freq``;若同列出现 ``TS_*(...`` 且实参区含该形式,则 ``N`` 按主频计。 -_MIXED_LINE_TS_WITH_AUX_ARG_RE = re.compile( - r"TS_[A-Z][A-Z0-9_]*\s*\([^)]*__[A-Za-z0-9_]+" -) - - -def _warn_mixed_line_ts_on_broadcast_aux(*, rhs: str, has_main: bool) -> None: - if not has_main or not _MIXED_LINE_TS_WITH_AUX_ARG_RE.search(rhs): - return - warnings.warn( - "混频行:TS_*() 的实参含辅周期列(已广播到主频索引)," - "其窗口参数 N 按主频 bar 数计,不是辅周期根数;" - "需要「N 根辅周期」时请先在仅含 @<周期> 的一行上算好中间变量。", - UserWarning, - stacklevel=4, - ) - - -def _merge_build_aux_panels( - df: pd.DataFrame, - *, - required: List[str], - base_interval: str, - df_60m: Optional[pd.DataFrame], - aux_panels: Optional[Mapping[str, pd.DataFrame]], - aux_cache: Optional[MutableMapping[tuple[int, str, str], pd.DataFrame]] = None, -) -> Dict[str, pd.DataFrame]: - """合并用户传入的辅表与按 required 自动聚合的结果(带缓存)。""" - aux: Dict[str, pd.DataFrame] = {} - if aux_panels: - for k, v in aux_panels.items(): - if v is not None and not getattr(v, "empty", True): - aux[normalize_bar_interval(str(k))] = v - if df_60m is not None: - try: - aux.setdefault("60m", df_60m) - except ValueError: - pass - for tag in required: - if tag not in aux: - aux[tag] = get_or_build_aux_panel( - df, - tag, - base_interval=base_interval, - cache=aux_cache, - ) - return aux - - -def _compile_column_tokens(df: pd.DataFrame, aux: Dict[str, pd.DataFrame]) -> List[str]: - cols = _dollar_columns(df) - for tag in sorted(aux.keys()): - cols.extend(_dollar_columns_aux(aux[tag], tag)) - return cols - - -def eval_multi_line_factor( - multi_line_expr: str, - df: pd.DataFrame, - *, - df_60m: Optional[pd.DataFrame] = None, - aux_panels: Optional[Mapping[str, pd.DataFrame]] = None, - base_interval: str = DEFAULT_BASE_INTERVAL, - columns: Optional[Sequence[str]] = None, - operator_modules: Optional[Sequence[str]] = None, - extra_namespace: Optional[Mapping[str, Any]] = None, - function_lib_namespace: Optional[Mapping[str, Any]] = None, - aux_cache: Optional[MutableMapping[tuple[int, str, str], pd.DataFrame]] = None, - verbose: bool = False, -) -> Any: - """对多行因子 DSL 求值(股票主频默认 1d,辅频 @1d / @1w)。""" - required = collect_aux_intervals_from_expr(multi_line_expr) - use_multi = bool(required) or (df_60m is not None) - aux: Dict[str, pd.DataFrame] = {} - # 默认启用辅表缓存,避免重复聚合 1w - if aux_cache is None: - aux_cache = {} - if use_multi: - aux = _merge_build_aux_panels( - df, - required=required, - base_interval=base_interval, - df_60m=df_60m, - aux_panels=aux_panels, - aux_cache=aux_cache, - ) - - if columns is not None: - cols = list(columns) - else: - cols = _compile_column_tokens(df, aux) if use_multi else _dollar_columns(df) - code = compile_multi_line_factor( - multi_line_expr, columns=cols, verbose=verbose - ) - lines = _split_generated_lines(code) - - if not lines: - raise MultiLineFactorEvalError( - "生成代码为空(解析结果无任何可执行行)", - phase="eval", - problem="生成代码为空", - exception_type="ValueError", - generated_code=code, - user_source=multi_line_expr, - ) - - base_ns: Dict[str, Any] = { - "__builtins__": __builtins__, - "np": np, - "pd": pd, - } - if function_lib_namespace is not None: - base_ns.update(dict(function_lib_namespace)) - else: - base_ns.update(_function_lib_namespace(operator_modules)) - if extra_namespace: - base_ns.update(dict(extra_namespace)) - - if not use_multi: - return _eval_single_frequency( - lines=lines, - code=code, - df=df, - base_ns=base_ns, - multi_line_expr=multi_line_expr, - ) - return _eval_multi_frequency( - lines=lines, - code=code, - df_main=df, - aux=aux, - base_ns=base_ns, - multi_line_expr=multi_line_expr, - ) - - -def _eval_single_frequency( - *, - lines: List[str], - code: str, - df: pd.DataFrame, - base_ns: Dict[str, Any], - multi_line_expr: str, -) -> Any: - ctx: MutableMapping[str, Any] = dict(base_ns) - ctx.update(_column_bindings(df)) - - if len(lines) == 1: - line = lines[0] - try: - return eval(line, ctx, ctx) - except Exception as e: - raise _build_eval_error( - "eval", - e, - generated_code=code, - generated_line_no=1, - generated_line_text=line, - multi_line_expr=multi_line_expr, - ) from e - - for i, line in enumerate(lines[:-1], start=1): - try: - exec(line, ctx, ctx) - except Exception as e: - raise _build_eval_error( - "exec", - e, - generated_code=code, - generated_line_no=i, - generated_line_text=line, - multi_line_expr=multi_line_expr, - ) from e - - last = lines[-1] - last_no = len(lines) - try: - return eval(last, ctx, ctx) - except Exception as e: - raise _build_eval_error( - "eval", - e, - generated_code=code, - generated_line_no=last_no, - generated_line_text=last, - multi_line_expr=multi_line_expr, - ) from e - - -def _eval_multi_frequency( - *, - lines: List[str], - code: str, - df_main: pd.DataFrame, - aux: Dict[str, pd.DataFrame], - base_ns: Dict[str, Any], - multi_line_expr: str, -) -> Any: - """按频率作用域分派的多频求值:详见模块 docstring。""" - bindings_main = _column_bindings(df_main) - native: Dict[str, Dict[str, pd.DataFrame]] = {} - broadcast: Dict[str, Dict[str, pd.DataFrame]] = {} - for tag, panel in aux.items(): - if panel is None or getattr(panel, "empty", True): - continue - native[tag] = {} - broadcast[tag] = {} - for c in panel.columns: - py = dollar_ref_to_pyname(f"${c}@{tag}") - native[tag][py] = panel[[c]] - broadcast[tag][py] = broadcast_timeframe_to_main_freq( - panel[[c]], df_main.index, tag - ) - - panel_freq: Dict[str, str] = {} - for n in bindings_main: - panel_freq[n] = SCOPE_MAIN - for tag in native: - for py in native[tag]: - panel_freq[py] = tag - - ivar_value: Dict[str, Any] = {} - ivar_freq: Dict[str, str] = {} - - def _known_names() -> set: - return set(panel_freq.keys()) | set(ivar_freq.keys()) - - def _ref_freq(name: str) -> str: - if name in panel_freq: - return panel_freq[name] - return ivar_freq[name] - - def _ensure_main(value: Any, src_freq: str) -> Any: - if src_freq == SCOPE_MAIN: - return value - return broadcast_timeframe_to_main_freq(value, df_main.index, src_freq) - - def _eval_rhs(rhs: str, line_no: int, line_text: str, phase: str) -> Tuple[Any, str]: - refs = _extract_identifier_refs(rhs, _known_names()) - freqs = [_ref_freq(r) for r in refs] - has_main = any(f == SCOPE_MAIN for f in freqs) - aux_in_refs = {f for f in freqs if f != SCOPE_MAIN} - - if not has_main and len(aux_in_refs) == 1: - tag = next(iter(aux_in_refs)) - ctx = dict(base_ns) - for r in refs: - if r in native.get(tag, {}): - ctx[r] = native[tag][r] - elif r in ivar_value: - if ivar_freq[r] != tag: - raise MultiLineFactorEvalError( - f"纯 @{tag} 作用域中引用了其它频率中间变量 {r!r};请改为混合表达式或先升频", - phase=phase, - problem="跨频引用(辅频作用域)", - exception_type="ValueError", - generated_code=code, - generated_line_no=line_no, - generated_line_text=line_text, - user_source=multi_line_expr, - ) - ctx[r] = ivar_value[r] - try: - result = eval(rhs, ctx, ctx) - except Exception as e: - raise _build_eval_error( - phase, - e, - generated_code=code, - generated_line_no=line_no, - generated_line_text=line_text, - multi_line_expr=multi_line_expr, - ) from e - return result, tag - - ctx = dict(base_ns) - for r in refs: - if r in bindings_main: - ctx[r] = bindings_main[r] - else: - placed = False - for t in native: - if r in broadcast[t]: - ctx[r] = broadcast[t][r] - placed = True - break - if not placed and r in ivar_value: - ctx[r] = _ensure_main(ivar_value[r], ivar_freq[r]) - _warn_mixed_line_ts_on_broadcast_aux(rhs=rhs, has_main=has_main) - try: - result = eval(rhs, ctx, ctx) - except Exception as e: - raise _build_eval_error( - phase, - e, - generated_code=code, - generated_line_no=line_no, - generated_line_text=line_text, - multi_line_expr=multi_line_expr, - ) from e - return result, SCOPE_MAIN - - last_idx = len(lines) - 1 - final_value: Any = None - final_freq: str = SCOPE_MAIN - for i, line in enumerate(lines): - is_last = i == last_idx - lhs, rhs = _split_assignment(line) - phase = "eval" if is_last else "exec" - line_no = i + 1 - value, freq = _eval_rhs(rhs, line_no, line, phase) - if lhs is not None and not is_last: - ivar_value[lhs] = value - ivar_freq[lhs] = freq - if is_last: - final_value = value - final_freq = freq - if lhs is not None: - ivar_value[lhs] = value - ivar_freq[lhs] = freq - - if final_freq != SCOPE_MAIN: - return broadcast_timeframe_to_main_freq(final_value, df_main.index, final_freq) - return final_value - - -def _smoke_test_dataframe() -> pd.DataFrame: - idx = pd.MultiIndex.from_product( - [ - pd.date_range("2020-01-01", periods=8, freq="min", name="datetime"), - ["SYM1", "SYM2"], - ], - names=["datetime", "instrument"], - ) - n = len(idx) - rng = np.random.default_rng(0) - return pd.DataFrame( - { - "open": rng.random(n) + 1.0, - "close": rng.random(n) + 1.0, - }, - index=idx, - ) - - -def eval_factor( - expr: str, - panel: pd.DataFrame, - **kwargs: Any, -) -> pd.Series: - """对表达式求值,返回与 panel 对齐的单列 Series(股票默认日频)。""" - kwargs.setdefault("base_interval", DEFAULT_BASE_INTERVAL) - out = eval_multi_line_factor(expr, panel, **kwargs) - if isinstance(out, pd.DataFrame): - return out.iloc[:, 0] - if isinstance(out, pd.Series): - return out - raise TypeError(f"因子输出须为 DataFrame/Series,得到 {type(out)!r}") - - -if __name__ == "__main__": - df_test = _smoke_test_dataframe() - expr_ok = """ - x = TS_DELTA($open@5m, 1) - y = TS_RANK(SUBTRACT(x, $close@5m), 5) - DIVIDE(y, ADD(1e-8, 1)) - """ - print("示例输入1: expr_ok =") - print(expr_ok) - compiled = compile_multi_line_factor(expr_ok, columns=_dollar_columns(df_test)) - print("编译通过,生成代码:\n", compiled) - - out = eval_multi_line_factor(expr_ok, df_test) - print("求值 OK, shape:", getattr(out, "shape", out)) - - expr_bad = """ - x = TS_DELTA($open, 1) - y = TS_RANK(UNKNOWN_FUNC($close), 5) - y - """ - print("示例输入2: expr_bad =") - print(expr_bad) - try: - eval_multi_line_factor(expr_bad, df_test) - except MultiLineFactorEvalError as e: - print("预期失败 (结构化错误信息):\n", e) - \ No newline at end of file diff --git a/seekalpha/dsl/registry.py b/seekalpha/dsl/registry.py deleted file mode 100644 index 91b9beaf..00000000 --- a/seekalpha/dsl/registry.py +++ /dev/null @@ -1,230 +0,0 @@ -"""DSL 算子注册与命名空间合并:内置 + 扩展模块。""" -from __future__ import annotations - -import importlib -import importlib.util -import os -import re -from dataclasses import dataclass -from pathlib import Path -from types import ModuleType -from typing import Any, Callable, Iterator, Mapping, Sequence - -_BUILTIN_MODULE = "seekalpha.dsl.core.operators" -_OPERATOR_NAME_RE = re.compile(r"^[A-Z][A-Z0-9_]*$") - - -class OperatorConflictError(ValueError): - """扩展算子与内置或其他扩展同名。""" - - -@dataclass(frozen=True) -class OperatorMeta: - name: str - module: str - default_column: str = "close" - test_param: int = 20 - - -_REGISTRY: dict[str, OperatorMeta] = {} - - -def register_operator( - name: str | None = None, - *, - default_column: str = "close", - test_param: int = 20, -) -> Callable[[Callable[..., Any]], Callable[..., Any]]: - """装饰器:登记扩展算子元数据(供测试/文档发现)。""" - - def deco(fn: Callable[..., Any]) -> Callable[..., Any]: - op_name = name or fn.__name__ - if not _OPERATOR_NAME_RE.match(op_name): - raise ValueError(f"算子名须为大写下划线风格: {op_name!r}") - _REGISTRY[op_name] = OperatorMeta( - name=op_name, - module=fn.__module__, - default_column=default_column, - test_param=test_param, - ) - return fn - - return deco - - -def iter_registered_operators(*, module: str | None = None) -> Iterator[OperatorMeta]: - for meta in _REGISTRY.values(): - if module is None or meta.module == module: - yield meta - - -def is_operator_name(name: str) -> bool: - return bool(_OPERATOR_NAME_RE.match(name)) - - -def collect_module_operators(mod: ModuleType) -> dict[str, Callable[..., Any]]: - """收集模块内公开的大写 callable(DSL 算子命名约定)。""" - out: dict[str, Callable[..., Any]] = {} - for name in dir(mod): - if name.startswith("_") or not is_operator_name(name): - continue - obj = getattr(mod, name) - if callable(obj): - out[name] = obj - return out - - -def load_operator_module(path: str) -> ModuleType: - """加载 Python 模块:``pkg.mod`` 或 ``/abs/path/to/file.py``。""" - if path.endswith(".py") or "/" in path or "\\" in path: - file_path = Path(path).resolve() - if not file_path.is_file(): - raise FileNotFoundError(f"扩展算子文件不存在: {path}") - stem = file_path.stem - if "/workspace/sandbox/extensions/" in file_path.as_posix(): - mod_name = f"workspace.sandbox.extensions.{stem}" - elif "/workspace/dsl/extensions/" in file_path.as_posix(): - mod_name = f"workspace.dsl.extensions.{stem}" - elif "/sandbox/extensions/" in file_path.as_posix(): - mod_name = f"sandbox.extensions.{stem}" - else: - mod_name = f"seekalpha.dsl.extensions.{stem}" - spec = importlib.util.spec_from_file_location(mod_name, file_path) - if spec is None or spec.loader is None: - raise ImportError(f"无法加载扩展模块: {path}") - mod = importlib.util.module_from_spec(spec) - spec.loader.exec_module(mod) - return mod - return importlib.import_module(path) - - -def extensions_from_env() -> list[str]: - raw = os.environ.get("ALPHAAGENT_DSL_EXTENSIONS", "").strip() - if not raw: - return [] - return [p.strip() for p in raw.split(",") if p.strip()] - - -def discover_extension_modules( - package: str = "seekalpha.dsl.extensions", - *, - skip_private: bool = True, -) -> list[str]: - """扫描包目录下 ``.py`` 模块(默认跳过 ``_`` 开头文件如 ``_example.py``)。""" - try: - mod = importlib.import_module(package) - except ImportError: - return [] - pkg_path = getattr(mod, "__path__", None) - if not pkg_path: - return [] - root = Path(pkg_path[0]) - if not root.is_dir(): - return [] - prefix = package + "." - out: list[str] = [] - for path in sorted(root.glob("*.py")): - stem = path.stem - if stem == "__init__": - continue - if skip_private and stem.startswith("_"): - continue - out.append(prefix + stem) - return out - - -def discover_sandbox_extensions( - sandbox_dir: str | Path | None = None, - *, - skip_private: bool = True, -) -> list[str]: - """扫描 ``workspace/sandbox/extensions/*.py``,返回绝对路径供 ``load_operator_module`` 使用。""" - raw = sandbox_dir if sandbox_dir is not None else os.environ.get( - "ALPHAAGENT_SANDBOX_EXTENSIONS_DIR", "workspace/sandbox/extensions" - ) - root = Path(raw) - if not root.is_absolute(): - root = Path.cwd() / root - if not root.is_dir(): - return [] - out: list[str] = [] - for path in sorted(root.glob("*.py")): - if path.stem == "__init__": - continue - if skip_private and path.stem.startswith("_"): - continue - out.append(str(path.resolve())) - return out - - -def resolve_extension_modules( - operator_modules: Sequence[str] | None = None, - *, - auto_discover: bool | None = None, -) -> list[str]: - """合并显式模块、环境变量、包内扩展、sandbox 扩展(后者可覆盖同名包内算子)。""" - if auto_discover is None: - auto_discover = os.environ.get("ALPHAAGENT_DSL_AUTO_DISCOVER", "0").strip().lower() not in ( - "0", - "false", - "no", - "off", - ) - seen: set[str] = set() - merged: list[str] = [] - for mod in list(operator_modules or ()) + extensions_from_env(): - if mod and mod not in seen: - seen.add(mod) - merged.append(mod) - if auto_discover: - for mod in discover_extension_modules(): - if mod not in seen: - seen.add(mod) - merged.append(mod) - for path in discover_sandbox_extensions(): - if path not in seen: - seen.add(path) - merged.append(path) - return merged - - -def _is_sandbox_module_path(mod_path: str) -> bool: - p = mod_path.replace("\\", "/") - return ( - "/workspace/sandbox/extensions/" in p - or "/workspace/dsl/extensions/" in p - or "/sandbox/extensions/" in p - ) - - -def build_operator_namespace( - *, - include_builtin: bool = True, - extension_modules: Sequence[str] = (), - extra: Mapping[str, Any] | None = None, - allow_override: bool = False, -) -> dict[str, Any]: - """合并内置与扩展算子为 evaluator 命名空间。""" - ns: dict[str, Any] = {} - - if include_builtin: - builtin = importlib.import_module(_BUILTIN_MODULE) - ns.update(collect_module_operators(builtin)) - - for mod_path in extension_modules: - mod = load_operator_module(mod_path) - sandbox = _is_sandbox_module_path(mod_path) - for name, fn in collect_module_operators(mod).items(): - if name in ns and not allow_override and not sandbox: - raise OperatorConflictError( - f"算子 {name!r} 已存在(模块 {mod_path!r} 与已有定义冲突)" - ) - ns[name] = fn - - if extra: - for name, obj in extra.items(): - if name in ns and not allow_override: - raise OperatorConflictError(f"算子 {name!r} 已存在,extra_namespace 冲突") - ns[name] = obj - - return ns diff --git a/seekalpha/dsl/stock/__init__.py b/seekalpha/dsl/stock/__init__.py deleted file mode 100644 index b6dbd08b..00000000 --- a/seekalpha/dsl/stock/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -"""股票日频 DSL 混频:@1d / @1w 日历合成与无前视广播。""" - -from seekalpha.dsl.stock.intervals import ( - bar_interval_to_timedelta, - normalize_bar_interval, -) -from seekalpha.dsl.stock.incremental import IncrementalWeekEngine, assert_incremental_matches_batch -from seekalpha.dsl.stock.resample import ( - broadcast_timeframe_to_main_freq, - build_timeframe_panel, -) - -__all__ = [ - "IncrementalWeekEngine", - "assert_incremental_matches_batch", - "bar_interval_to_timedelta", - "broadcast_timeframe_to_main_freq", - "build_timeframe_panel", - "normalize_bar_interval", -] diff --git a/seekalpha/dsl/stock/aux_cache.py b/seekalpha/dsl/stock/aux_cache.py deleted file mode 100644 index c037138a..00000000 --- a/seekalpha/dsl/stock/aux_cache.py +++ /dev/null @@ -1,39 +0,0 @@ -"""辅周期 panel 缓存:同一日频 panel 重复 eval 时复用 1w 聚合结果。""" - -from __future__ import annotations - -from typing import MutableMapping - -import pandas as pd - -from seekalpha.dsl.stock.intervals import normalize_bar_interval -from seekalpha.dsl.stock.resample import build_timeframe_panel - -# 键:(id(panel), base_interval, tag) → 辅频 DataFrame -AuxCache = MutableMapping[tuple[int, str, str], pd.DataFrame] - - -def get_or_build_aux_panel( - panel: pd.DataFrame, - tag: str, - *, - base_interval: str = "1d", - cache: AuxCache | None = None, -) -> pd.DataFrame: - """按 tag 返回辅频 panel;命中 cache 则直接复用。""" - norm_tag = normalize_bar_interval(tag) - norm_base = normalize_bar_interval(base_interval) - key = (id(panel), norm_base, norm_tag) - - if cache is not None and key in cache: - return cache[key] - - built = build_timeframe_panel( - panel, - target_interval=norm_tag, - base_interval=norm_base, - ) - - if cache is not None: - cache[key] = built - return built diff --git a/seekalpha/dsl/stock/incremental.py b/seekalpha/dsl/stock/incremental.py deleted file mode 100644 index 7b386dc9..00000000 --- a/seekalpha/dsl/stock/incremental.py +++ /dev/null @@ -1,297 +0,0 @@ -"""增量周线聚合 + 无前视 broadcast(与 batch resample 对拍)。""" - -from __future__ import annotations - -from dataclasses import dataclass, field -from typing import Any, Mapping, MutableMapping, Optional, Sequence - -import numpy as np -import pandas as pd - -from seekalpha.dsl.stock.intervals import bar_interval_to_timedelta -from seekalpha.dsl.stock.resample import ( - _aggregation_rule_for, - _bucket_datetime, - _safe_divide, - _select_numeric_columns, - broadcast_timeframe_to_main_freq, - build_timeframe_panel, -) - -# 派生列:partial 阶段不直接累加,finalize 时重算 -_DERIVED_COLS = frozenset({"vwap", "ret"}) - - -def _bucket_start_ts(dt: pd.Timestamp, interval: str = "1w") -> pd.Timestamp: - s = pd.Series([pd.Timestamp(dt)]) - return pd.Timestamp(_bucket_datetime(s, interval).iloc[0]) - - -def _finalize_partial( - partial: Mapping[str, float], - *, - cols: Sequence[str], - prev_adj_close: float | None, -) -> dict[str, float]: - """把桶内累加状态转为周线一行(含 vwap / ret)。""" - out = dict(partial) - if "vwap" in cols and "volume" in out and "amount" in out: - out["vwap"] = float( - _safe_divide( - pd.Series([out["amount"]]), - pd.Series([out["volume"]]), - ).iloc[0] - ) - if "ret" in cols and "adj_close" in out: - ac = out["adj_close"] - if prev_adj_close is not None and np.isfinite(prev_adj_close) and prev_adj_close != 0: - out["ret"] = (ac - prev_adj_close) / prev_adj_close - else: - out["ret"] = np.nan - return out - - -def _update_running( - acc: MutableMapping[str, float], - bar: Mapping[str, float], - cols: Sequence[str], - *, - first_bar: bool, -) -> None: - for c in cols: - if c in _DERIVED_COLS: - continue - v = float(bar[c]) if pd.notna(bar[c]) else np.nan - rule = _aggregation_rule_for(c) - if first_bar: - acc[c] = v - continue - cur = acc.get(c, np.nan) - if rule == "first": - continue - if rule == "max": - acc[c] = v if pd.isna(cur) else max(cur, v) if pd.notna(v) else cur - elif rule == "min": - acc[c] = v if pd.isna(cur) else min(cur, v) if pd.notna(v) else cur - elif rule == "sum": - if pd.isna(cur): - acc[c] = v - elif pd.notna(v): - acc[c] = cur + v - else: # last - acc[c] = v - - -@dataclass -class _InstrumentWeekState: - """单票周线增量状态。""" - - cols: tuple[str, ...] - partial_bucket: pd.Timestamp | None = None - partial_acc: dict[str, float] = field(default_factory=dict) - # bucket_start → 定稿周线(与 batch build_timeframe_panel 一致) - weekly_bars: dict[pd.Timestamp, dict[str, float]] = field(default_factory=dict) - # (complete_time, values) 已排序,供单点 backward broadcast - broadcast_ready: list[tuple[pd.Timestamp, dict[str, float]]] = field(default_factory=list) - last_finalized_adj_close: float | None = None - - def _finalize_current_bucket(self) -> None: - if self.partial_bucket is None or not self.partial_acc: - return - row = _finalize_partial( - self.partial_acc, - cols=self.cols, - prev_adj_close=self.last_finalized_adj_close, - ) - self.weekly_bars[self.partial_bucket] = row - complete = self.partial_bucket + bar_interval_to_timedelta("1w") - self.broadcast_ready.append((complete, dict(row))) - if "adj_close" in row and pd.notna(row["adj_close"]): - self.last_finalized_adj_close = float(row["adj_close"]) - self.partial_bucket = None - self.partial_acc = {} - - def append_bar(self, dt: pd.Timestamp, bar: Mapping[str, float]) -> None: - bucket = _bucket_start_ts(dt, "1w") - if self.partial_bucket is not None and bucket != self.partial_bucket: - self._finalize_current_bucket() - - first = self.partial_bucket != bucket or not self.partial_acc - if first: - self.partial_bucket = bucket - self.partial_acc = {} - - _update_running(self.partial_acc, bar, self.cols, first_bar=first) - - def weekly_rows(self) -> list[tuple[pd.Timestamp, dict[str, float]]]: - """定稿桶 + 当前未结束桶(与 batch 含 partial week 一致)。""" - rows = sorted(self.weekly_bars.items(), key=lambda x: x[0]) - if self.partial_bucket is not None and self.partial_acc: - partial_row = _finalize_partial( - self.partial_acc, - cols=self.cols, - prev_adj_close=self.last_finalized_adj_close, - ) - rows.append((self.partial_bucket, partial_row)) - return rows - - def broadcast_at(self, dt: pd.Timestamp, col: str) -> float: - """单点 backward broadcast(完成时刻 ≤ dt 的最近一根已完成周 bar)。""" - val = np.nan - for complete, data in self.broadcast_ready: - if complete <= dt: - val = data.get(col, np.nan) - else: - break - return float(val) if pd.notna(val) else np.nan - - -class IncrementalWeekEngine: - """日频逐 bar 增量更新 @1w 辅表,语义对齐 batch resample + merge_asof backward。""" - - def __init__(self, columns: Optional[Sequence[str]] = None) -> None: - self._columns: Optional[tuple[str, ...]] = ( - tuple(columns) if columns is not None else None - ) - self._states: dict[str, _InstrumentWeekState] = {} - - def _cols_for(self, bar: Mapping[str, Any]) -> tuple[str, ...]: - if self._columns is not None: - return self._columns - return tuple(_select_numeric_columns(pd.DataFrame([bar]))) - - def _state(self, instrument: str, cols: tuple[str, ...]) -> _InstrumentWeekState: - st = self._states.get(instrument) - if st is None: - st = _InstrumentWeekState(cols=cols) - self._states[instrument] = st - elif st.cols != cols: - raise ValueError(f"列集合不一致: {instrument} {st.cols} vs {cols}") - return st - - def append_bar( - self, - dt: pd.Timestamp, - instrument: str, - bar: Mapping[str, Any], - ) -> None: - """追加一根日 K(单票)。""" - cols = self._cols_for(bar) - st = self._state(instrument, cols) - st.append_bar(pd.Timestamp(dt), bar) - - def append_panel(self, panel: pd.DataFrame) -> None: - """按 instrument × datetime 顺序 replay 面板(可多次 append 续接)。""" - if not isinstance(panel.index, pd.MultiIndex): - raise ValueError("panel 必须是 (datetime, instrument) MultiIndex") - df = panel.reset_index().sort_values(["instrument", "datetime"], kind="mergesort") - cols = self._columns or tuple(_select_numeric_columns(panel)) - for row in df.itertuples(index=False): - inst = row.instrument - dt = pd.Timestamp(row.datetime) - bar = {c: getattr(row, c) for c in cols if hasattr(row, c)} - self._state(inst, cols).append_bar(dt, bar) - - def weekly_panel(self) -> pd.DataFrame: - """当前周线辅表(index = 桶起点 datetime, instrument)。""" - if not self._states: - return pd.DataFrame( - index=pd.MultiIndex.from_arrays( - [[], []], names=["datetime", "instrument"] - ) - ) - cols = next(iter(self._states.values())).cols - records: list[dict[str, Any]] = [] - for inst, st in sorted(self._states.items()): - for bucket, data in st.weekly_rows(): - rec = {"datetime": bucket, "instrument": inst, **data} - records.append(rec) - if not records: - return pd.DataFrame( - index=pd.MultiIndex.from_arrays( - [[], []], names=["datetime", "instrument"] - ) - ) - df = pd.DataFrame(records) - df = df.set_index(["datetime", "instrument"]).sort_index() - # 列顺序与 batch 一致 - return df[[c for c in cols if c in df.columns]] - - def broadcast( - self, - target_index: pd.MultiIndex, - columns: Optional[Sequence[str]] = None, - ) -> pd.DataFrame: - """全量 broadcast(复用 batch merge_asof,weekly 来自增量状态)。""" - weekly = self.weekly_panel() - if weekly.empty: - cols = list(columns or self._columns or []) - return pd.DataFrame(np.nan, index=target_index, columns=cols) - use = weekly if columns is None else weekly[list(columns)] - return broadcast_timeframe_to_main_freq(use, target_index, "1w") - - def broadcast_at( - self, - dt: pd.Timestamp, - instrument: str, - col: str, - ) -> float: - """单点 broadcast(不跑 merge_asof,O(#已完成周))。""" - st = self._states.get(instrument) - if st is None: - return np.nan - return st.broadcast_at(pd.Timestamp(dt), col) - - def replay_panel(self, panel: pd.DataFrame) -> pd.DataFrame: - """清空后全量 replay,返回 weekly_panel(便于测试)。""" - self._states.clear() - self.append_panel(panel) - return self.weekly_panel() - - -def assert_incremental_matches_batch( - panel: pd.DataFrame, - *, - rtol: float = 1e-6, - atol: float = 1e-6, -) -> None: - """增量 replay 与 batch build + broadcast 全字段对拍。""" - batch_weekly = build_timeframe_panel(panel, target_interval="1w", base_interval="1d") - engine = IncrementalWeekEngine() - inc_weekly = engine.replay_panel(panel) - - pd.testing.assert_frame_equal( - inc_weekly.sort_index(), - batch_weekly.sort_index(), - check_exact=False, - rtol=rtol, - atol=atol, - ) - - if batch_weekly.empty: - return - - cols = list(batch_weekly.columns) - batch_bc = broadcast_timeframe_to_main_freq(batch_weekly[cols], panel.index, "1w") - inc_bc = engine.broadcast(panel.index, columns=cols) - pd.testing.assert_frame_equal( - inc_bc.sort_index(), - batch_bc.sort_index(), - check_exact=False, - rtol=rtol, - atol=atol, - ) - - # 单点 broadcast 与 merge 结果一致(抽样) - for i in (0, len(panel) // 2, len(panel) - 1): - key = panel.index[i] - dt, inst = key - for c in cols[: min(5, len(cols))]: - point = engine.broadcast_at(dt, inst, c) - merged = float(inc_bc.loc[key, c]) - assert np.isclose(point, merged, rtol=rtol, atol=atol, equal_nan=True), ( - key, - c, - point, - merged, - ) diff --git a/seekalpha/dsl/stock/intervals.py b/seekalpha/dsl/stock/intervals.py deleted file mode 100644 index 691d0e93..00000000 --- a/seekalpha/dsl/stock/intervals.py +++ /dev/null @@ -1,63 +0,0 @@ -"""股票 DSL 周期归一化:仅支持日频 @1d 与日历周 @1w。""" - -from __future__ import annotations - -import re - -import pandas as pd - -# 1d / 1w 及常见别名 -_STOCK_INTERVAL_RE = re.compile( - r"^\s*(\d+)\s*(d|w|day|week)(?:s)?\s*$", - re.IGNORECASE, -) - -# 股票模式允许的归一化周期 -STOCK_INTERVALS = frozenset({"1d", "1w"}) - - -def normalize_bar_interval(value: str | int) -> str: - """把 ``1d`` / ``1D`` / ``1week`` 等归一为 ``1d`` 或 ``1w``。""" - if isinstance(value, int): - if value <= 0: - raise ValueError(f"bar_interval 必须为正整数,收到: {value!r}") - # 整数仅用于分钟期货语义;股票模式不支持 - raise ValueError(f"股票 DSL 不支持整数周期 {value!r},请使用 1d 或 1w") - - s = str(value).strip().lower() - if not s: - raise ValueError("bar_interval 不能为空") - - # 精确匹配已归一形式 - if s in STOCK_INTERVALS: - return s - - m = _STOCK_INTERVAL_RE.match(s) - if m is None: - raise ValueError( - f"不支持的 bar_interval: {value!r}(股票仅支持 1d / 1w)" - ) - - num = int(m.group(1)) - unit = m.group(2).lower() - if unit in ("d", "day"): - tag = f"{num}d" - elif unit in ("w", "week"): - tag = f"{num}w" - else: - raise ValueError(f"不支持的 bar_interval: {value!r}") - - if tag not in STOCK_INTERVALS: - raise ValueError(f"股票 DSL 仅支持 1d / 1w,收到: {tag!r}") - return tag - - -def bar_interval_to_timedelta(value: str | int) -> pd.Timedelta: - """辅周期 bar 全长,用于无前视广播的「完成时刻 = 桶起点 + 全长」。""" - tag = normalize_bar_interval(value) - if tag == "1d": - return pd.Timedelta(days=1) - if tag == "1w": - # 日历周:W-FRI 桶起点 + 7 天 = 该周 bar 完成 - return pd.Timedelta(days=7) - raise ValueError(f"未知周期: {tag!r}") diff --git a/seekalpha/dsl/stock/resample.py b/seekalpha/dsl/stock/resample.py deleted file mode 100644 index fe5a26a3..00000000 --- a/seekalpha/dsl/stock/resample.py +++ /dev/null @@ -1,200 +0,0 @@ -"""股票日历混频:日频 panel 聚合为 @1d / @1w,并无前视广播回日频。 - -改编自 AQRA dsl_core/resample.py,适配 A 股日频 panel 与 W-FRI 周线。 -""" - -from __future__ import annotations - -from typing import Iterable, Optional - -import numpy as np -import pandas as pd - -from seekalpha.dsl.stock.intervals import bar_interval_to_timedelta, normalize_bar_interval - -# 字段聚合规则(OHLCV + 股票扩展列) -_AGG_FIRST = {"open", "adj_open"} -_AGG_MAX = {"high", "adj_high"} -_AGG_MIN = {"low", "adj_low"} -_AGG_LAST = { - "close", - "adj_close", - "is_trade", - "not_st", - "float_cap", - "tot_cap", - "label_1d_close_to_close", - "label_1d_open_to_open", - "label_10d_close_to_close", - "label_20d_close_to_close", -} -_AGG_SUM = {"volume", "amount", "total_turnover"} - - -def _select_numeric_columns( - panel: pd.DataFrame, - include: Optional[Iterable[str]] = None, -) -> list[str]: - if include is not None: - cols = [c for c in include if c in panel.columns] - else: - cols = list(panel.columns) - return [c for c in cols if pd.api.types.is_numeric_dtype(panel[c])] - - -def _aggregation_rule_for(col: str) -> str: - if col in _AGG_FIRST: - return "first" - if col in _AGG_MAX: - return "max" - if col in _AGG_MIN: - return "min" - if col in _AGG_SUM: - return "sum" - if col in _AGG_LAST: - return "last" - return "last" - - -def _safe_divide(num: pd.Series, den: pd.Series) -> pd.Series: - num_arr = pd.to_numeric(num, errors="coerce").to_numpy(dtype=float, copy=False) - den_arr = pd.to_numeric(den, errors="coerce").to_numpy(dtype=float, copy=False) - out = np.full(len(num_arr), np.nan, dtype=float) - mask = np.isfinite(num_arr) & np.isfinite(den_arr) & (den_arr != 0.0) - out[mask] = num_arr[mask] / den_arr[mask] - return pd.Series(out, index=num.index, dtype=float) - - -def _empty_panel() -> pd.DataFrame: - return pd.DataFrame( - index=pd.MultiIndex.from_arrays( - [pd.DatetimeIndex([], name="datetime"), pd.Index([], name="instrument")] - ) - ) - - -def _bucket_datetime(dt: pd.Series, interval: str) -> pd.Series: - """日历切桶;1w 用 to_period(pandas 3 不支持 dt.floor('W-FRI'))。""" - if not pd.api.types.is_datetime64_any_dtype(dt): - dt = pd.to_datetime(dt) - tag = normalize_bar_interval(interval) - if tag == "1d": - return dt.dt.floor("1D") - if tag == "1w": - # W-FRI:自然周,周五为周期锚点 - return dt.dt.to_period("W-FRI").dt.start_time - raise ValueError(f"不支持的 bucket 周期: {tag!r}") - - -def build_timeframe_panel( - panel: pd.DataFrame, - *, - target_interval: str = "1w", - base_interval: str = "1d", - columns: Optional[Iterable[str]] = None, - strict_complete_bars: bool = False, -) -> pd.DataFrame: - """把日频 panel 聚合为更粗日历周期(1d / 1w)。""" - if not isinstance(panel.index, pd.MultiIndex): - raise ValueError("panel 必须是 (datetime, instrument) MultiIndex 面板") - if panel.index.names[:2] != ["datetime", "instrument"]: - raise ValueError("panel 的索引层必须依次为 datetime、instrument") - - target_rule = normalize_bar_interval(target_interval) - normalize_bar_interval(base_interval) - - use_cols = _select_numeric_columns(panel, include=columns) - if not use_cols: - return _empty_panel() - requested_cols = list(use_cols) - - # ret 需在聚合后按 instrument 重算 - needs_adj_close_for_ret = "ret" in use_cols and "adj_close" in panel.columns - if needs_adj_close_for_ret and "adj_close" not in use_cols: - use_cols.append("adj_close") - - df = panel[use_cols].reset_index() - # 按日历边界切桶 - df["__bucket__"] = _bucket_datetime(df["datetime"], target_rule) - - agg_map = {c: _aggregation_rule_for(c) for c in use_cols} - - # 股票 1d→1w 不按固定交易日数过滤桶;strict 模式暂不启用 - if strict_complete_bars and target_rule != "1w": - raise ValueError("股票 resample 暂不支持 strict_complete_bars") - - grouped = df.groupby(["instrument", "__bucket__"], sort=True).agg(agg_map) - - # vwap = 桶内 sum(amount) / sum(volume) - if {"volume", "amount"}.issubset(df.columns) and "vwap" in use_cols: - vol_sum = grouped["volume"] if "volume" in grouped.columns else None - amt_sum = grouped["amount"] if "amount" in grouped.columns else None - if vol_sum is not None and amt_sum is not None: - grouped["vwap"] = _safe_divide(amt_sum, vol_sum) - - # 聚合后重算 ret - if "ret" in grouped.columns and "adj_close" in grouped.columns: - grouped["ret"] = grouped["adj_close"].groupby(level="instrument").pct_change( - fill_method=None - ) - - if needs_adj_close_for_ret: - grouped = grouped[requested_cols] - - # (instrument, bucket) → (datetime, instrument) - grouped.index = grouped.index.set_names(["instrument", "datetime"]) - grouped = grouped.swaplevel("instrument", "datetime").sort_index() - grouped.index = grouped.index.set_names(["datetime", "instrument"]) - return grouped - - -def broadcast_timeframe_to_main_freq( - values: pd.DataFrame, - target_index: pd.MultiIndex, - target_interval: str, -) -> pd.DataFrame: - """辅周期 values 无前视广播到主频 target_index(merge_asof backward)。""" - if not isinstance(target_index, pd.MultiIndex): - raise ValueError("target_index 必须是 (datetime, instrument) MultiIndex") - if target_index.names[:2] != ["datetime", "instrument"]: - raise ValueError("target_index 的索引层必须依次为 datetime、instrument") - - tag = normalize_bar_interval(target_interval) - # 完成时刻 = 桶起点 + 整段 bar 长(与 AQRA 60m 语义一致) - completion = bar_interval_to_timedelta(tag) - - cols = list(values.columns) - if values.empty or not cols: - return pd.DataFrame(np.nan, index=target_index, columns=cols) - - # 右表:桶起点 + bar 全长 → 完成时间 - src = values.reset_index() - src["datetime"] = src["datetime"] + completion - src = src.sort_values(["datetime", "instrument"], kind="mergesort").reset_index( - drop=True - ) - - # 左表带行号,合并后恢复原顺序 - tgt = pd.DataFrame( - {"__row__": np.arange(len(target_index), dtype=np.int64)}, - index=target_index, - ).reset_index() - tgt_sorted = tgt.sort_values(["datetime", "instrument"], kind="mergesort").reset_index( - drop=True - ) - - merged = pd.merge_asof( - tgt_sorted, - src, - on="datetime", - by="instrument", - direction="backward", - allow_exact_matches=True, - ) - merged = merged.sort_values("__row__", kind="mergesort") - - return pd.DataFrame( - merged[cols].to_numpy(), - index=target_index, - columns=cols, - ) diff --git a/seekalpha/factor/__init__.py b/seekalpha/factor/__init__.py deleted file mode 100644 index 18ec2b14..00000000 --- a/seekalpha/factor/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -"""因子研究:评估、入库、factorzoo 存储。""" - -from seekalpha.factor.eval import evaluate_factor -from seekalpha.factor.ingest import ingest_factor, load_panel_for_zoo -from seekalpha.factor.registry import list_factor_entries, load_registry -from seekalpha.factor.types import DEFAULT_INGEST_POLICY, IngestPolicy, IngestResult -from seekalpha.factor.zoo import ( - DEFAULT_FACTORLIB_ROOT, - FactorZoo, - init_library, -) - -__all__ = [ - "DEFAULT_FACTORLIB_ROOT", - "DEFAULT_INGEST_POLICY", - "FactorZoo", - "IngestPolicy", - "IngestResult", - "evaluate_factor", - "ingest_factor", - "init_library", - "list_factor_entries", - "load_panel_for_zoo", - "load_registry", -] diff --git a/seekalpha/factor/align.py b/seekalpha/factor/align.py deleted file mode 100644 index ac67cb63..00000000 --- a/seekalpha/factor/align.py +++ /dev/null @@ -1,74 +0,0 @@ -"""因子值对齐到 canonical row_id 顺序。""" - -from __future__ import annotations - -import numpy as np -import pandas as pd - -from seekalpha.factor.zoo.index import RowIndex - - -def series_to_long(values: pd.Series) -> pd.DataFrame: - """MultiIndex Series → datetime, instrument, value。""" - if not isinstance(values.index, pd.MultiIndex): - raise ValueError("因子值须为 (datetime, instrument) MultiIndex Series") - out = values.reset_index() - out.columns = ["datetime", "instrument", "value"] - out["datetime"] = pd.to_datetime(out["datetime"], errors="coerce") - out["instrument"] = out["instrument"].astype(str) - return out.dropna(subset=["datetime"]) - - -def canonical_align( - values: np.ndarray | pd.Series, - *, - factor_dt: pd.Series | None = None, - factor_inst: pd.Series | None = None, - row_index: RowIndex, - n_rows: int, -) -> np.ndarray: - """通过 merge 将因子值对齐到 canonical row_id 顺序(长度 n_rows)。""" - if isinstance(values, pd.Series): - long = series_to_long(values) - factor_dt = long["datetime"] - factor_inst = long["instrument"] - arr = long["value"].to_numpy(dtype=np.float32, copy=False) - else: - arr = np.asarray(values, dtype=np.float32) - if factor_dt is None or factor_inst is None: - raise ValueError("ndarray 输入须提供 factor_dt 与 factor_inst") - - if len(arr) == 0: - return np.full(n_rows, np.nan, dtype=np.float32) - - dt_series = pd.to_datetime(factor_dt, errors="coerce") - ref = row_index.rows[["row_id", "datetime", "instrument"]].copy() - ref["datetime"] = pd.to_datetime(ref["datetime"], errors="coerce") - ref["instrument"] = ref["instrument"].astype(str) - - tmp = pd.DataFrame( - { - "_dt": dt_series, - "_inst": factor_inst.astype(str), - "_val": np.asarray(arr, dtype=np.float32), - } - ) - merged = tmp.merge( - ref, - left_on=["_dt", "_inst"], - right_on=["datetime", "instrument"], - how="inner", - ) - out = np.full(n_rows, np.nan, dtype=np.float32) - if merged.empty: - return out - rid = merged["row_id"].to_numpy(dtype=np.int64, copy=False) - out[rid] = merged["_val"].to_numpy(dtype=np.float32, copy=False) - return out - - -def align_series_to_panel(values: pd.Series, panel: pd.DataFrame) -> np.ndarray: - """panel index 顺序对齐(panel 须已与 row_index 同序)。""" - panel = panel.sort_index() - aligned = values.reindex(panel.index) - return aligned.to_numpy(dtype=np.float32, copy=False) diff --git a/seekalpha/factor/eval.py b/seekalpha/factor/eval.py deleted file mode 100644 index 545155b8..00000000 --- a/seekalpha/factor/eval.py +++ /dev/null @@ -1,245 +0,0 @@ -"""因子评估:DSL 求值 + IC/MLS 指标。""" - -from __future__ import annotations - -import time -from typing import Any - -import numpy as np -import pandas as pd - -from seekalpha.dsl import eval_factor -from seekalpha.dsl.core.errors import MultiLineFactorEvalError -from seekalpha.factor.align import align_series_to_panel -from seekalpha.factor.metrics import ( - coverage, - cross_sectional_ic, - cross_sectional_lag1_pearson_autocorr, - cross_sectional_rank_ic, - cs_ic_summary, - decile_mean_label, - factor_skew_kurtosis, - label_quantile_buckets, - monthly_ic_robustness, - mls_fmb_summary, - pearson_ic, -) - -from seekalpha.data.panel import slice_panel -from seekalpha.factor.metrics import evaluate_on_panel -from seekalpha.factor.types import DEFAULT_LABEL_COL - - -def evaluate_factor( - expr: str, - panel: pd.DataFrame, - *, - label_col: str = DEFAULT_LABEL_COL, - start: str | None = None, - end: str | None = None, - min_pairs: int = 5, -) -> dict[str, Any]: - """在全量 panel 上求值 DSL,再按日期窗计算 IC/ICIR/RANKIC/MLS 等指标。""" - panel_full = panel.sort_index() - if label_col not in panel_full.columns: - raise KeyError(f"panel 缺少标签列: {label_col}") - out = eval_factor(expr, panel_full) - if not isinstance(out, pd.Series): - raise TypeError(f"因子输出须为 Series,得到 {type(out)!r}") - values = align_series_to_panel(out, panel_full) - eval_panel = slice_panel(panel_full, start=start, end=end) - if eval_panel.empty: - raise ValueError(f"评估切片为空: start={start!r} end={end!r}") - pos = panel_full.index.isin(eval_panel.index) - metrics = evaluate_on_panel( - values[pos], eval_panel, label_col=label_col, min_ic_pairs=min_pairs - ) - metrics["eval_start"] = start - metrics["eval_end"] = end - metrics["label_col"] = label_col - return metrics - - - - -def _build_summary( - factor_series: pd.Series, - label_series: pd.Series, - values: np.ndarray, - daily_ic: pd.Series, - daily_rank_ic: pd.Series, - *, - min_cs_autocorr_pairs: int = 30, -) -> dict[str, Any]: - cs = cs_ic_summary(daily_ic, daily_rank_ic) - n_inst = int(factor_series.index.get_level_values("instrument").nunique()) - skew, kurt = factor_skew_kurtosis(values) - fac = factor_series.to_numpy(dtype=float, copy=False) - lab = label_series.to_numpy(dtype=float, copy=False) - cs_autocorr_pairs = min(min_cs_autocorr_pairs, max(n_inst - 1, 2)) - mls_min_stocks = min(30, max(n_inst, 10)) - return { - "ic": cs["ic"], - "icir": cs["icir"], - "rank_ic": cs["rank_ic"], - "n_days": cs["n_days"], - "n_instruments": n_inst, - "factor_coverage": coverage(values), - "factor_skewness": skew, - "factor_kurtosis": kurt, - "cs_pearson_autocorr": cross_sectional_lag1_pearson_autocorr( - factor_series, - min_pairs=cs_autocorr_pairs, - ), - "decile_mean_label": decile_mean_label(fac, lab, n_deciles=10), - "mls_fmb": mls_fmb_summary( - factor_series, - label_series, - min_stocks=mls_min_stocks, - ), - } - - -def _detail_tables( - factor_series: pd.Series, - label_series: pd.Series, - daily_ic: pd.Series, - daily_rank_ic: pd.Series, -) -> dict[str, Any]: - by_month_rows: list[dict[str, Any]] = [] - if not daily_ic.empty: - s = daily_ic.copy() - s.index = pd.to_datetime(s.index, errors="coerce") - r = daily_rank_ic.copy() - r.index = pd.to_datetime(r.index, errors="coerce") - for month, grp in s.groupby(s.index.to_period("M"), sort=True): - r_grp = r.loc[grp.index] - by_month_rows.append( - { - "month": str(month), - "mean_ic": float(grp.mean(skipna=True)), - "mean_rank_ic": float(r_grp.mean(skipna=True)), - "n_days": int(grp.notna().sum()), - } - ) - - by_symbol_rows: list[dict[str, Any]] = [] - for inst, f_sub in factor_series.groupby(level="instrument", sort=False): - y_sub = label_series.xs(inst, level="instrument") - ts_ic = pearson_ic( - f_sub.to_numpy(dtype=np.float64, copy=False), - y_sub.to_numpy(dtype=np.float64, copy=False), - min_pairs=5, - ) - by_symbol_rows.append({"instrument": str(inst), "ts_ic": ts_ic}) - - return {"by_month": by_month_rows, "by_symbol": by_symbol_rows} - - -def evaluate_factor_on_split( - session, - *, - split: str, - multi_line_expr: str, - factor_name: str = "expr", - include_detail_tables: bool = False, - label_quantile_n: int = 10, -) -> dict[str, Any]: - """在 train/val 窗上评估多行 DSL,返回未格式化的原始结果 dict。""" - ctx = session.ctx - panel, start, end = session.get_split_panel(split) - date_range = {"start": start, "end": end} - - if panel.empty: - return { - "ok": False, - "error": "日期窗内无 panel 数据", - "error_type": "EmptyData", - "split": split, - "date_range": date_range, - } - - if ctx.label_col not in panel.columns: - return { - "ok": False, - "error": f"panel 缺少标签列: {ctx.label_col}", - "error_type": "MissingLabelColumn", - "split": split, - "date_range": date_range, - } - - timing: dict[str, float] = {} - t0 = time.perf_counter() - - try: - t_eval = time.perf_counter() - out = eval_factor(multi_line_expr, panel) - timing["eval_ms"] = (time.perf_counter() - t_eval) * 1000 - except MultiLineFactorEvalError as e: - return { - "ok": False, - "error": str(e), - "error_type": type(e).__name__, - "split": split, - "date_range": date_range, - } - except Exception as e: - return { - "ok": False, - "error": str(e), - "error_type": type(e).__name__, - "split": split, - "date_range": date_range, - } - - if not isinstance(out, pd.Series): - return { - "ok": False, - "error": f"因子输出须为 Series,得到 {type(out)!r}", - "error_type": "TypeError", - "split": split, - "date_range": date_range, - } - - t_align = time.perf_counter() - values = align_series_to_panel(out, panel) - factor_series = pd.Series(values, index=panel.index, name=factor_name, dtype=np.float32) - label_series = panel[ctx.label_col] - timing["align_ms"] = (time.perf_counter() - t_align) * 1000 - - t_metrics = time.perf_counter() - daily_ic = cross_sectional_ic(factor_series, label_series, min_pairs=5) - daily_rank_ic = cross_sectional_rank_ic(factor_series, label_series, min_pairs=5) - summary = _build_summary(factor_series, label_series, values, daily_ic, daily_rank_ic) - monthly_rob = monthly_ic_robustness(daily_ic) - - buckets: list[dict[str, Any]] = [] - if label_quantile_n >= 2: - buckets = label_quantile_buckets( - factor_series.to_numpy(dtype=float, copy=False), - label_series.to_numpy(dtype=float, copy=False), - n_quantiles=label_quantile_n, - ) - - detail: dict[str, Any] = {} - if include_detail_tables: - detail = _detail_tables(factor_series, label_series, daily_ic, daily_rank_ic) - - timing["metrics_ms"] = (time.perf_counter() - t_metrics) * 1000 - timing["total_ms"] = (time.perf_counter() - t0) * 1000 - - result: dict[str, Any] = { - "ok": True, - "split": split, - "date_range": date_range, - "eval_wall_seconds": timing["total_ms"] / 1000.0, - "timing_ms": timing, - "summary": summary, - "monthly_corr_robustness": monthly_rob, - "label_quantile_buckets": buckets, - "label_quantile_n": int(label_quantile_n), - "label_col": ctx.label_col, - "bar_interval": "1d", - } - result.update(detail) - return result diff --git a/seekalpha/factor/expr_store.py b/seekalpha/factor/expr_store.py deleted file mode 100644 index 8402bc89..00000000 --- a/seekalpha/factor/expr_store.py +++ /dev/null @@ -1,60 +0,0 @@ -"""因子 DSL 源文件读写(与 factorzoo 同目录:{lib}/expressions/)。""" - -from __future__ import annotations - -from pathlib import Path - -from seekalpha.core.paths import FACTOR_EXPR_DIR -from seekalpha.factor.zoo import FactorZoo - - -def factor_expr_path(factor_id: str, *, expr_dir: Path | None = None) -> Path: - root = Path(expr_dir or FACTOR_EXPR_DIR) - return root / f"{factor_id}.dsl" - - -def write_factor_expr(factor_id: str, expr: str, *, expr_dir: Path | None = None) -> Path: - path = factor_expr_path(factor_id, expr_dir=expr_dir) - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(expr.strip() + "\n", encoding="utf-8") - return path - - -def list_expr_dir_entries(expr_dir: Path | str) -> list[tuple[str, str, str]]: - """扫描目录下 ``*.dsl``,返回 (factor_id, name, expr) 列表。""" - root = Path(expr_dir) - if not root.is_dir(): - raise FileNotFoundError(f"表达式目录不存在: {root}") - entries: list[tuple[str, str, str]] = [] - for path in sorted(root.glob("*.dsl")): - factor_id = path.stem - expr = path.read_text(encoding="utf-8").strip() - if not expr: - continue - entries.append((factor_id, factor_id, expr)) - if not entries: - raise ValueError(f"目录下无有效 .dsl: {root}") - return entries - - -def export_zoo_expressions( - zoo: FactorZoo, - *, - expr_dir: Path | None = None, - overwrite: bool = True, -) -> list[tuple[str, Path]]: - """从 factorzoo catalog 导出全部因子 DSL 到 {lib}/expressions/。""" - out_dir = Path(expr_dir or zoo.paths.expressions_dir) - out_dir.mkdir(parents=True, exist_ok=True) - written: list[tuple[str, Path]] = [] - for factor_id in zoo.catalog.list_factor_ids(): - meta = zoo.catalog.get(factor_id) - if meta is None: - continue - path = factor_expr_path(factor_id, expr_dir=out_dir) - if path.is_file() and not overwrite: - written.append((factor_id, path)) - continue - path = write_factor_expr(factor_id, meta.expr, expr_dir=out_dir) - written.append((factor_id, path)) - return written diff --git a/seekalpha/factor/ingest.py b/seekalpha/factor/ingest.py deleted file mode 100644 index d4b12dee..00000000 --- a/seekalpha/factor/ingest.py +++ /dev/null @@ -1,341 +0,0 @@ -"""因子物化、评估与入库编排。""" - -from __future__ import annotations - -from pathlib import Path -from typing import Any - -import numpy as np -import pandas as pd - -from seekalpha.data.panel import load_panel, slice_panel -from seekalpha.dsl import eval_factor -from seekalpha.dsl.eval import collect_aux_intervals_from_expr -from seekalpha.factor.align import align_series_to_panel, canonical_align -from seekalpha.factor.metrics import coverage as metric_coverage -from seekalpha.factor.metrics import evaluate_on_panel, factor_skew_kurtosis -from seekalpha.factor.types import DEFAULT_INGEST_POLICY, IngestPolicy, IngestResult, MaterializeResult -from seekalpha.factor.zoo import FactorStatus, FactorZoo, SimilarityMatrix - - -def clip_values( - values: np.ndarray, - *, - lower_pct: float = 1.0, - upper_pct: float = 99.0, -) -> tuple[np.ndarray, dict[str, Any]]: - """按分位数 clip,返回 clip 后向量与 extra 元数据。""" - arr = np.asarray(values, dtype=np.float32) - finite = np.isfinite(arr) - if finite.sum() <= 1: - return arr.copy(), { - "clip_p01": None, - "clip_p99": None, - "clip_lower_pct": lower_pct, - "clip_upper_pct": upper_pct, - } - raw = arr[finite] - p01, p99 = np.nanpercentile(raw, [lower_pct, upper_pct]) - out = arr.copy() - out[finite] = np.clip(raw, p01, p99).astype(np.float32, copy=False) - return out, { - "clip_p01": float(p01) if np.isfinite(p01) else None, - "clip_p99": float(p99) if np.isfinite(p99) else None, - "clip_lower_pct": lower_pct, - "clip_upper_pct": upper_pct, - } - - - -def materialize_factor(expr: str, panel: pd.DataFrame) -> MaterializeResult: - """DSL 求值并对齐 panel 行序。""" - panel = panel.sort_index() - out = eval_factor(expr, panel) - if not isinstance(out, pd.Series): - raise TypeError(f"因子输出须为 Series,得到 {type(out)!r}") - values = align_series_to_panel(out, panel) - tags = collect_aux_intervals_from_expr(expr) - return MaterializeResult(values=values, expr=expr.strip(), aux_tags=tags) - - -def materialize_to_canonical( - expr: str, - panel: pd.DataFrame, - zoo: FactorZoo, -) -> MaterializeResult: - """求值并对齐到因子库 canonical row_id。""" - panel = panel.sort_index() - out = eval_factor(expr, panel) - if not isinstance(out, pd.Series): - raise TypeError(f"因子输出须为 Series,得到 {type(out)!r}") - aligned = canonical_align( - out, - row_index=zoo.index, - n_rows=zoo.manifest.n_rows, - ) - tags = collect_aux_intervals_from_expr(expr) - return MaterializeResult(values=aligned, expr=expr.strip(), aux_tags=tags) - - -def materialize_slice_to_canonical( - expr: str, - panel: pd.DataFrame, - zoo: FactorZoo, - *, - start: str, - end: str, -) -> MaterializeResult: - """在 panel 日期子集上求值,并对齐到完整 canonical index(窗口外为 NaN)。""" - panel = panel.sort_index() - panel_slice = slice_panel(panel, start=start, end=end) - if panel_slice.empty: - raise ValueError(f"panel 切片为空: start={start!r} end={end!r}") - out = eval_factor(expr, panel_slice) - if not isinstance(out, pd.Series): - raise TypeError(f"因子输出须为 Series,得到 {type(out)!r}") - aligned = canonical_align( - out, - row_index=zoo.index, - n_rows=zoo.manifest.n_rows, - ) - tags = collect_aux_intervals_from_expr(expr) - return MaterializeResult(values=aligned, expr=expr.strip(), aux_tags=tags) - - -def mask_values_before_start(values: np.ndarray, zoo: FactorZoo, start: str) -> np.ndarray: - """将 datetime < start 的行置为 NaN,从 start(含)起保留入库值。""" - out = np.array(values, dtype=np.float32, copy=True) - rows = zoo.index.rows - dt = pd.to_datetime(rows["datetime"], errors="coerce") - cutoff = pd.Timestamp(start) - out[dt < cutoff] = np.nan - return out - - -def compute_ingest_metrics( - stored_values: np.ndarray, - panel: pd.DataFrame, - policy: IngestPolicy, -) -> dict[str, Any]: - """按 policy 在 eval 区间计算入库指标;coverage 为 eval 区间口径。""" - panel_sorted = panel.sort_index() - eval_values, eval_panel = _slice_values_with_panel( - stored_values, - panel_sorted, - start=policy.eval_start, - end=policy.eval_end, - ) - metrics = evaluate_on_panel(eval_values, eval_panel, label_col=policy.label_col) - skew, kurt = factor_skew_kurtosis(eval_values) - metrics["skew"] = skew - metrics["kurt"] = kurt - metrics["eval_start"] = policy.eval_start - metrics["eval_end"] = policy.eval_end - metrics["finite_ratio"] = metric_coverage(stored_values) - return metrics - - -def prepare_stored_values( - expr: str, - panel: pd.DataFrame, - zoo: FactorZoo, - policy: IngestPolicy, -) -> tuple[np.ndarray, str, list[str], dict[str, Any]]: - """物化 → 可选 clip → mask → 返回 (stored_values, expr, aux_tags, clip_extra)。""" - mat = materialize_to_canonical(expr, panel, zoo) - if policy.clip_pct is not None: - stored_values, clip_extra = clip_values( - mat.values, lower_pct=policy.clip_pct[0], upper_pct=policy.clip_pct[1] - ) - else: - stored_values = mat.values - clip_extra = {"clip_lower_pct": None, "clip_upper_pct": None} - stored_values = mask_values_before_start(stored_values, zoo, policy.mask_before_start) - return stored_values, mat.expr, mat.aux_tags, clip_extra - - -def _slice_values_with_panel( - values: np.ndarray, - panel: pd.DataFrame, - *, - start: str | None = None, - end: str | None = None, -) -> tuple[np.ndarray, pd.DataFrame]: - """按日期切片 panel,并取对应位置的因子值。""" - panel = panel.sort_index() - sub = slice_panel(panel, start=start, end=end) - if sub.empty: - raise ValueError(f"panel 切片为空: start={start!r} end={end!r}") - pos = panel.index.isin(sub.index) - return values[pos], sub - - -def ingest_factor( - zoo: FactorZoo, - *, - factor_id: str, - name: str, - expr: str, - panel: pd.DataFrame, - policy: IngestPolicy | None = None, - label_col: str = DEFAULT_INGEST_POLICY.label_col, - clip_pct: tuple[float, float] | None = None, - mask_before_start: str | None = None, - eval_start: str | None = None, - eval_end: str | None = None, - max_cs_corr: float = DEFAULT_INGEST_POLICY.max_cs_corr, - similar_top_k: int = DEFAULT_INGEST_POLICY.similar_top_k, - similar_min_pairs: int = 10, - overwrite: bool = False, - dry_run: bool = False, - update_similarity: bool = True, -) -> IngestResult: - """单因子:物化 → [可选 clip] → mask → eval 指标 → 查重 → 入库。""" - if policy is not None: - pol = policy - else: - train = mask_before_start or eval_start or DEFAULT_INGEST_POLICY.train_start - pol = IngestPolicy( - train_start=train, - val_end=eval_end or DEFAULT_INGEST_POLICY.val_end, - label_col=label_col, - max_cs_corr=max_cs_corr, - similar_top_k=similar_top_k, - clip_pct=clip_pct, - ) - - stored_values, mat_expr, aux_tags, clip_extra = prepare_stored_values(expr, panel, zoo, pol) - metrics = compute_ingest_metrics(stored_values, panel, pol) - extra: dict[str, Any] = { - **clip_extra, - "aux_tags": aux_tags, - "metrics": metrics, - } - - sim: SimilarityMatrix | None = None - sim_info: dict[str, Any] | None = None - existing = zoo.catalog.get(factor_id) - - if existing is None: - max_corr = 0.0 - neighbor_report: dict[str, Any] | None = None - if zoo.n_factors > 0: - sim = SimilarityMatrix(zoo.paths, zoo.manifest.max_factors) - neighbor_report = sim.cross_sectional_neighbor_report( - zoo, stored_values, top_k=pol.similar_top_k, min_pairs=similar_min_pairs - ) - max_corr = float(neighbor_report["max_abs_corr"]) - if max_corr >= pol.max_cs_corr: - return IngestResult( - factor_id=factor_id, - col_idx=None, - stored=False, - skipped_reason=f"cs_corr={max_corr:.4f} >= {pol.max_cs_corr}", - metrics=metrics, - similarity=neighbor_report or {"max_abs_corr": 0.0, "top_neighbors": []}, - extra=extra, - ) - if dry_run: - return IngestResult( - factor_id=factor_id, - col_idx=None, - stored=False, - skipped_reason="dry_run", - metrics=metrics, - similarity={"max_abs_corr": max_corr}, - extra=extra, - ) - col_idx = zoo.append_factor( - factor_id=factor_id, - name=name, - expr=mat_expr, - values=stored_values, - status=FactorStatus.full, - extra=extra, - ) - if update_similarity: - sim = sim or SimilarityMatrix(zoo.paths, zoo.manifest.max_factors) - sim_info = sim.append_factor_correlations( - zoo, - factor_id=factor_id, - col_idx=col_idx, - values=stored_values, - top_k=pol.similar_top_k, - ) - return IngestResult( - factor_id=factor_id, - col_idx=col_idx, - stored=True, - skipped_reason=None, - metrics=metrics, - similarity=sim_info, - extra=extra, - ) - - if not overwrite: - return IngestResult( - factor_id=factor_id, - col_idx=existing.col_idx, - stored=False, - skipped_reason="already_exists", - metrics=metrics, - similarity=None, - extra=extra, - ) - - if dry_run: - return IngestResult( - factor_id=factor_id, - col_idx=existing.col_idx, - stored=False, - skipped_reason="dry_run", - metrics=metrics, - similarity=None, - extra=extra, - ) - - col_idx = zoo.overwrite_factor( - factor_id=factor_id, - name=name, - expr=mat_expr, - values=stored_values, - status=FactorStatus.full, - extra=extra, - ) - if update_similarity: - sim = SimilarityMatrix(zoo.paths, zoo.manifest.max_factors) - sim_info = sim.append_factor_correlations( - zoo, - factor_id=factor_id, - col_idx=col_idx, - values=stored_values, - top_k=pol.similar_top_k, - ) - return IngestResult( - factor_id=factor_id, - col_idx=col_idx, - stored=True, - skipped_reason=None, - metrics=metrics, - similarity=sim_info, - extra=extra, - ) - - -def load_panel_for_zoo( - zoo: FactorZoo, - *, - panel_path: Path | None = None, - start: str | None = None, - end: str | None = None, -) -> pd.DataFrame: - """加载与因子库对齐的 panel(可选日期切片)。""" - path = panel_path or Path(zoo.manifest.panel_path) - panel = load_panel(path) - panel = slice_panel(panel, start=start, end=end) - if len(panel) != zoo.manifest.n_rows and start is None and end is None: - raise ValueError( - f"panel 行数 {len(panel)} != 库 n_rows {zoo.manifest.n_rows};" - "请用相同 panel 初始化库,或仅用于调试切片" - ) - return panel.sort_index() diff --git a/seekalpha/factor/metrics.py b/seekalpha/factor/metrics.py deleted file mode 100644 index ad8aec2e..00000000 --- a/seekalpha/factor/metrics.py +++ /dev/null @@ -1,605 +0,0 @@ -"""股票因子评估指标。""" - -from __future__ import annotations - -import math -from typing import Any - -import numpy as np -import pandas as pd - -from seekalpha.factor.types import DEFAULT_LABEL_COL - - -def coverage(values: np.ndarray) -> float: - arr = np.asarray(values, dtype=np.float32) - if len(arr) == 0: - return 0.0 - return float(np.isfinite(arr).mean()) - - -def pearson_ic(factor: np.ndarray, label: np.ndarray, *, min_pairs: int = 30) -> float: - x = np.asarray(factor, dtype=np.float64) - y = np.asarray(label, dtype=np.float64) - mask = np.isfinite(x) & np.isfinite(y) - if int(mask.sum()) < min_pairs: - return float("nan") - xs = x[mask] - ys = y[mask] - xs = xs - xs.mean() - ys = ys - ys.mean() - denom = float(np.sqrt((xs * xs).sum() * (ys * ys).sum())) - if denom <= 0.0: - return float("nan") - return float((xs * ys).sum() / denom) - - -def spearman_ic(factor: np.ndarray, label: np.ndarray, *, min_pairs: int = 10) -> float: - """Spearman 秩相关(截面或任意配对样本)。""" - x = np.asarray(factor, dtype=np.float64) - y = np.asarray(label, dtype=np.float64) - mask = np.isfinite(x) & np.isfinite(y) - if int(mask.sum()) < min_pairs: - return float("nan") - xr = pd.Series(x[mask]).rank(method="average").to_numpy(dtype=np.float64) - yr = pd.Series(y[mask]).rank(method="average").to_numpy(dtype=np.float64) - return pearson_ic(xr, yr, min_pairs=min_pairs) - - -def cross_sectional_ic( - factor: pd.Series, - label: pd.Series, - *, - time_level: str = "datetime", - min_pairs: int = 10, -) -> pd.Series: - """按 datetime 截面 Pearson IC,返回逐日 IC 序列。""" - if not isinstance(factor.index, pd.MultiIndex): - raise ValueError("cross_sectional_ic 需要 MultiIndex 面板 (datetime, instrument)") - if time_level not in factor.index.names: - raise ValueError(f"索引缺少 level={time_level!r}") - - rows: list[float] = [] - idx: list[object] = [] - grouped = factor.groupby(level=time_level, sort=False) - for ts, f_sub in grouped: - y_sub = label.xs(ts, level=time_level) - ic = pearson_ic( - f_sub.to_numpy(dtype=np.float64, copy=False), - y_sub.to_numpy(dtype=np.float64, copy=False), - min_pairs=min_pairs, - ) - rows.append(ic) - idx.append(ts) - return pd.Series(rows, index=pd.Index(idx, name=time_level), dtype=float) - - -def cross_sectional_rank_ic( - factor: pd.Series, - label: pd.Series, - *, - time_level: str = "datetime", - min_pairs: int = 10, -) -> pd.Series: - """按 datetime 截面 Spearman Rank IC,返回逐日 RANKIC 序列。""" - if not isinstance(factor.index, pd.MultiIndex): - raise ValueError("cross_sectional_rank_ic 需要 MultiIndex 面板 (datetime, instrument)") - if time_level not in factor.index.names: - raise ValueError(f"索引缺少 level={time_level!r}") - - rows: list[float] = [] - idx: list[object] = [] - grouped = factor.groupby(level=time_level, sort=False) - for ts, f_sub in grouped: - y_sub = label.xs(ts, level=time_level) - ic = spearman_ic( - f_sub.to_numpy(dtype=np.float64, copy=False), - y_sub.to_numpy(dtype=np.float64, copy=False), - min_pairs=min_pairs, - ) - rows.append(ic) - idx.append(ts) - return pd.Series(rows, index=pd.Index(idx, name=time_level), dtype=float) - - -def cross_sectional_lag1_pearson_autocorr_series( - factor: pd.Series, - *, - time_level: str = "datetime", - instrument_level: str = "instrument", - min_pairs: int = 30, -) -> pd.Series: - """逐日横截面 lag-1 Pearson 自相关:corr_CS(f_t, f_{t-1})。""" - if not isinstance(factor.index, pd.MultiIndex): - raise ValueError("cross_sectional_lag1_pearson_autocorr_series 需要 MultiIndex 面板") - if time_level not in factor.index.names or instrument_level not in factor.index.names: - raise ValueError(f"索引缺少 level={time_level!r} 或 {instrument_level!r}") - - lag1 = factor.groupby(level=instrument_level, sort=False).shift(1) - rows: list[float] = [] - idx: list[object] = [] - for ts, cur in factor.groupby(level=time_level, sort=False): - prev = lag1.xs(ts, level=time_level) - corr = pearson_ic( - cur.to_numpy(dtype=np.float64, copy=False), - prev.to_numpy(dtype=np.float64, copy=False), - min_pairs=min_pairs, - ) - rows.append(corr) - idx.append(ts) - return pd.Series(rows, index=pd.Index(idx, name=time_level), dtype=float) - - -def cross_sectional_lag1_pearson_autocorr( - factor: pd.Series, - *, - min_pairs: int = 30, -) -> float: - """逐日横截面 lag-1 Pearson 自相关的均值。""" - daily = cross_sectional_lag1_pearson_autocorr_series(factor, min_pairs=min_pairs) - finite = daily[np.isfinite(daily.to_numpy(dtype=float, copy=False))] - return float(finite.mean()) if len(finite) else float("nan") - - -def cs_ic_summary( - daily_ic: pd.Series, - daily_rank_ic: pd.Series | None = None, -) -> dict[str, float | int]: - """由逐日截面 IC / RANKIC 汇总 IC、ICIR、RANKIC。""" - ic_vals = daily_ic[np.isfinite(daily_ic.to_numpy(dtype=float, copy=False))] - n_days = int(len(ic_vals)) - if n_days == 0: - return {"ic": float("nan"), "icir": float("nan"), "rank_ic": float("nan"), "n_days": 0} - - ic_mean = float(ic_vals.mean()) - ic_std = float(ic_vals.std(ddof=1)) if n_days > 1 else float("nan") - icir = ic_mean / ic_std if ic_std and np.isfinite(ic_std) and ic_std > 0 else float("nan") - - rank_ic_mean = float("nan") - if daily_rank_ic is not None: - rv = daily_rank_ic[np.isfinite(daily_rank_ic.to_numpy(dtype=float, copy=False))] - rank_ic_mean = float(rv.mean()) if len(rv) else float("nan") - - return { - "ic": ic_mean, - "icir": icir, - "rank_ic": rank_ic_mean, - "n_days": n_days, - } - - -def evaluate_cs_on_panel( - values: np.ndarray, - panel: pd.DataFrame, - *, - label_col: str = DEFAULT_LABEL_COL, - min_pairs: int = 5, -) -> dict[str, Any]: - """在 panel 上计算截面 IC / ICIR / RANKIC 与 coverage。""" - panel = panel.sort_index() - if label_col not in panel.columns: - raise KeyError(f"panel 缺少标签列: {label_col}") - if len(values) != len(panel): - raise ValueError(f"values 长度 {len(values)} != panel 行数 {len(panel)}") - - factor_series = pd.Series(values, index=panel.index, dtype=np.float32) - label_series = panel[label_col] - daily_ic = cross_sectional_ic(factor_series, label_series, min_pairs=min_pairs) - daily_rank_ic = cross_sectional_rank_ic(factor_series, label_series, min_pairs=min_pairs) - cs = cs_ic_summary(daily_ic, daily_rank_ic) - cs_autocorr_min_pairs = min(30, max(int(panel.index.get_level_values("instrument").nunique()) - 1, min_pairs)) - mls_min_stocks = min(30, max(min_pairs * 6, 10)) - fac = factor_series.to_numpy(dtype=float, copy=False) - lab = label_series.to_numpy(dtype=float, copy=False) - - return { - "coverage": coverage(values), - "ic": cs["ic"], - "icir": cs["icir"], - "rank_ic": cs["rank_ic"], - "n_days": cs["n_days"], - "cs_pearson_autocorr": cross_sectional_lag1_pearson_autocorr( - factor_series, - min_pairs=cs_autocorr_min_pairs, - ), - "decile_mean_label": decile_mean_label(fac, lab, n_deciles=10), - "mls_fmb": mls_fmb_summary( - factor_series, - label_series, - min_stocks=mls_min_stocks, - ), - "label_col": label_col, - } - - -def evaluate_on_panel( - values: np.ndarray, - panel: pd.DataFrame, - *, - label_col: str = DEFAULT_LABEL_COL, - min_ic_pairs: int = 5, -) -> dict[str, Any]: - """在 panel 上计算 coverage 与截面 IC / ICIR / RANKIC。""" - return evaluate_cs_on_panel( - values, - panel, - label_col=label_col, - min_pairs=min_ic_pairs, - ) - - -def _round_label_mean(value: float) -> float: - return float(round(value, 6)) - - -def label_quantile_buckets( - factor: np.ndarray, - label: np.ndarray, - *, - n_quantiles: int = 10, -) -> list[dict[str, Any]]: - """按因子值等频分位分桶,每桶内 label 均值;Q1 = 因子最低组。""" - if n_quantiles < 2: - return [] - xf = np.asarray(factor, dtype=float) - yl = np.asarray(label, dtype=float) - m = np.isfinite(xf) & np.isfinite(yl) - xf, yl = xf[m], yl[m] - if xf.size < n_quantiles: - return [] - fac_s = pd.Series(xf) - try: - qbins = pd.qcut(fac_s, n_quantiles, duplicates="drop") - except ValueError: - qbins = pd.qcut(fac_s.rank(method="first"), n_quantiles, duplicates="drop") - n_q = int(qbins.cat.categories.size) - codes = qbins.cat.codes.to_numpy() - out: list[dict[str, Any]] = [] - for i in range(n_q): - mask = codes == i - cnt = int(mask.sum()) - if cnt == 0: - mean_v = None - else: - mv = float(np.mean(yl[mask])) - mean_v = _round_label_mean(mv) if np.isfinite(mv) else None - cat = qbins.cat.categories[i] - out.append( - { - "quantile": i + 1, - "n": cnt, - "mean_label": mean_v, - "factor_bin": str(cat), - } - ) - return out - - -def decile_mean_label( - factor: np.ndarray, - label: np.ndarray, - *, - n_deciles: int = 10, -) -> list[dict[str, Any]]: - """全样本等频十分组 label 均值;D1=因子最低组,D10=最高组(同 AQRA label_quantile_buckets)。""" - buckets = label_quantile_buckets(factor, label, n_quantiles=n_deciles) - return [{"decile": b["quantile"], "mean_label": b["mean_label"]} for b in buckets] - - -def _cross_section_decile_mean_labels( - factor: np.ndarray, - label: np.ndarray, - *, - n_deciles: int = 10, - min_stocks: int = 30, -) -> list[float] | None: - """单日截面上按因子等频分组的 label 均值;D1=因子最低组。""" - xf = np.asarray(factor, dtype=float) - yl = np.asarray(label, dtype=float) - mask = np.isfinite(xf) & np.isfinite(yl) - xf, yl = xf[mask], yl[mask] - if xf.size < min_stocks or xf.size < n_deciles: - return None - - fac_s = pd.Series(xf) - try: - qbins = pd.qcut(fac_s, n_deciles, duplicates="drop") - except ValueError: - qbins = pd.qcut(fac_s.rank(method="first"), n_deciles, duplicates="drop") - - n_q = int(qbins.cat.categories.size) - if n_q < 2: - return None - - codes = qbins.cat.codes.to_numpy() - means: list[float] = [] - for i in range(n_q): - bucket = codes == i - if not bucket.any(): - means.append(float("nan")) - else: - means.append(float(np.mean(yl[bucket]))) - return means - - -def _iter_daily_decile_mean_labels( - factor: pd.Series, - label: pd.Series, - *, - time_level: str = "datetime", - n_deciles: int = 10, - min_stocks: int = 30, -): - """逐日截面十分组 label 均值;样本不足日跳过。""" - if not isinstance(factor.index, pd.MultiIndex): - raise ValueError("_iter_daily_decile_mean_labels 需要 MultiIndex 面板 (datetime, instrument)") - if time_level not in factor.index.names: - raise ValueError(f"索引缺少 level={time_level!r}") - - for ts, f_sub in factor.groupby(level=time_level, sort=False): - y_sub = label.xs(ts, level=time_level) - means = _cross_section_decile_mean_labels( - f_sub.to_numpy(dtype=np.float64, copy=False), - y_sub.to_numpy(dtype=np.float64, copy=False), - n_deciles=n_deciles, - min_stocks=min_stocks, - ) - if means is not None: - yield ts, means - - -def daily_decile_monotonicity_series( - factor: pd.Series, - label: pd.Series, - *, - time_level: str = "datetime", - n_deciles: int = 10, - min_stocks: int = 30, - min_deciles_for_rho: int = 3, -) -> pd.Series: - """逐日截面单调性分量 ρ_t = Spearman({1..K}, {R_{1,t},..,R_{K,t}})。""" - rows: list[float] = [] - idx: list[object] = [] - ranks_template = np.arange(1, n_deciles + 1, dtype=np.float64) - - for ts, means in _iter_daily_decile_mean_labels( - factor, - label, - time_level=time_level, - n_deciles=n_deciles, - min_stocks=min_stocks, - ): - means_arr = np.asarray(means, dtype=np.float64) - valid = np.isfinite(means_arr) - n_valid = int(valid.sum()) - if n_valid < min_deciles_for_rho: - rho = float("nan") - else: - ranks = ranks_template[: len(means_arr)][valid] - rho = spearman_ic(ranks, means_arr[valid], min_pairs=min_deciles_for_rho) - rows.append(rho) - idx.append(ts) - - return pd.Series(rows, index=pd.Index(idx, name=time_level), dtype=float) - - -def daily_long_short_series( - factor: pd.Series, - label: pd.Series, - *, - time_level: str = "datetime", - n_deciles: int = 10, - min_stocks: int = 30, -) -> pd.Series: - """逐日截面多空分量 LS_t = R_{Q10,t} - R_{Q1,t}(最高组减最低组)。""" - rows: list[float] = [] - idx: list[object] = [] - - for ts, means in _iter_daily_decile_mean_labels( - factor, - label, - time_level=time_level, - n_deciles=n_deciles, - min_stocks=min_stocks, - ): - top = means[-1] - bottom = means[0] - if np.isfinite(top) and np.isfinite(bottom): - rows.append(float(top - bottom)) - else: - rows.append(float("nan")) - idx.append(ts) - - return pd.Series(rows, index=pd.Index(idx, name=time_level), dtype=float) - - -def newey_west_mean_tstat( - series: pd.Series | np.ndarray, - *, - lags: int | None = None, -) -> dict[str, float | int]: - """Newey–West t 统计量,检验序列均值是否显著异于 0(Bartlett 核)。""" - if isinstance(series, pd.Series): - x = series.to_numpy(dtype=np.float64, copy=False) - else: - x = np.asarray(series, dtype=np.float64) - x = x[np.isfinite(x)] - t_n = int(len(x)) - nan_out: dict[str, float | int] = { - "mean": float("nan"), - "se_nw": float("nan"), - "t_nw": float("nan"), - "n": t_n, - "lags": 0, - } - if t_n < 2: - return nan_out - - if lags is None: - lags = max(1, int(4.0 * (t_n / 100.0) ** (2.0 / 9.0))) - lags = min(int(lags), t_n - 1) - - x_mean = float(x.mean()) - u = x - x_mean - gamma0 = float(np.dot(u, u) / t_n) - nw_var_mean = gamma0 - for k in range(1, lags + 1): - w = 1.0 - k / (lags + 1.0) - gamma_k = float(np.dot(u[k:], u[:-k]) / t_n) - nw_var_mean += 2.0 * w * gamma_k - nw_var_mean /= t_n - - if nw_var_mean <= 0.0 or not np.isfinite(nw_var_mean): - se_nw = float("nan") - t_nw = float("nan") - else: - se_nw = float(math.sqrt(nw_var_mean)) - t_nw = float(x_mean / se_nw) - - return { - "mean": x_mean, - "se_nw": se_nw, - "t_nw": t_nw, - "n": t_n, - "lags": lags, - } - - -def _resolve_mls_params( - factor: pd.Series, - *, - n_deciles: int, - min_stocks: int, - instrument_level: str = "instrument", -) -> tuple[int, int]: - """按截面股票数自适应分位数个数与最低样本门槛。""" - n_inst = int(factor.index.get_level_values(instrument_level).nunique()) - eff_deciles = min(int(n_deciles), n_inst) - eff_min = min(int(min_stocks), n_inst) - if eff_min < eff_deciles: - eff_min = eff_deciles - return eff_deciles, eff_min - - -def mls_fmb_summary( - factor: pd.Series, - label: pd.Series, - *, - n_deciles: int = 10, - min_stocks: int = 30, - annualization_factor: float = 252.0, - nw_lags: int | None = None, -) -> dict[str, Any]: - """MLS-FMB:逐日截面计算十分组单调性 ρ_t 与 Q10−Q1 多空 LS_t,再按 Fama–MacBeth 时序聚合并用 Newey–West t 检验显著性,综合得分 MLS = mean(ρ) × 年化 IR_LS。""" - eff_deciles, eff_min_stocks = _resolve_mls_params( - factor, - n_deciles=n_deciles, - min_stocks=min_stocks, - ) - rho_series = daily_decile_monotonicity_series( - factor, - label, - n_deciles=eff_deciles, - min_stocks=eff_min_stocks, - ) - ls_series = daily_long_short_series( - factor, - label, - n_deciles=eff_deciles, - min_stocks=eff_min_stocks, - ) - - rho_nw = newey_west_mean_tstat(rho_series, lags=nw_lags) - ls_nw = newey_west_mean_tstat(ls_series, lags=nw_lags) - - rho_vals = rho_series[np.isfinite(rho_series.to_numpy(dtype=float, copy=False))] - ls_vals = ls_series[np.isfinite(ls_series.to_numpy(dtype=float, copy=False))] - - mean_rho = float(rho_vals.mean()) if len(rho_vals) else float("nan") - mean_ls = float(ls_vals.mean()) if len(ls_vals) else float("nan") - - if len(ls_vals) > 1: - ls_std = float(ls_vals.std(ddof=1)) - elif len(ls_vals) == 1: - ls_std = float("nan") - else: - ls_std = float("nan") - - ir_ls = mean_ls / ls_std if ls_std and np.isfinite(ls_std) and ls_std > 0 else float("nan") - ir_ls_annual = ( - float(ir_ls * math.sqrt(annualization_factor)) - if np.isfinite(ir_ls) - else float("nan") - ) - mls = ( - float(mean_rho * ir_ls_annual) - if np.isfinite(mean_rho) and np.isfinite(ir_ls_annual) - else float("nan") - ) - - return { - "mean_rho": mean_rho, - "mean_ls": mean_ls, - "ir_ls": ir_ls, - "ir_ls_annual": ir_ls_annual, - "mls": mls, - "nw_t_rho": rho_nw["t_nw"], - "nw_t_ls": ls_nw["t_nw"], - "nw_se_rho": rho_nw["se_nw"], - "nw_se_ls": ls_nw["se_nw"], - "n_days_rho": int(rho_nw["n"]), - "n_days_ls": int(ls_nw["n"]), - "nw_lags": int(rho_nw["lags"]), - "n_deciles": int(eff_deciles), - "n_deciles_requested": int(n_deciles), - "min_stocks": int(eff_min_stocks), - "min_stocks_requested": int(min_stocks), - "annualization_factor": float(annualization_factor), - "note": ( - "MLS+FMB 非参数版:ρ_t=逐日十分组 Spearman 单调性,LS_t=Q10-Q1 多空;" - "IR_LS 为日频 LS 均值/标准差,ir_ls_annual=IR_LS×√252;MLS=mean(ρ)×ir_ls_annual。" - ), - } - - -def monthly_ic_robustness(daily_ic: pd.Series) -> dict[str, Any]: - """由逐日截面 IC 聚合月度稳健性(股票口径)。""" - if daily_ic.empty: - return {} - s = daily_ic.copy() - s.index = pd.to_datetime(s.index, errors="coerce") - s = s[np.isfinite(s.to_numpy(dtype=float, copy=False))] - if s.empty: - return {} - - monthly_means: list[float] = [] - months: list[str] = [] - for month, grp in s.groupby(s.index.to_period("M"), sort=True): - vals = grp.to_numpy(dtype=float, copy=False) - m = float(np.mean(vals)) if len(vals) else float("nan") - months.append(str(month)) - monthly_means.append(m) - - n_months = len(months) - finite_means = [x for x in monthly_means if np.isfinite(x)] - mean_monthly = float(np.mean(finite_means)) if finite_means else float("nan") - share_ic_pos = ( - float(sum(m > 0 for m in finite_means) / len(finite_means)) if finite_means else float("nan") - ) - - return { - "n_months": n_months, - "mean_monthly_ic": mean_monthly, - "share_months_ic_positive": share_ic_pos, - "note": "股票口径:逐日横截面 IC 按自然月取均值;share_months_ic_positive=月均 IC>0 的月份占比。", - } - - -def factor_skew_kurtosis(values: np.ndarray) -> tuple[float, float]: - """因子有限值偏度与超额峰度。""" - x = np.asarray(values, dtype=float) - x = x[np.isfinite(x)] - if len(x) < 3: - return float("nan"), float("nan") - s = pd.Series(x) - return float(s.skew()), float(s.kurtosis()) diff --git a/seekalpha/factor/mining/__init__.py b/seekalpha/factor/mining/__init__.py deleted file mode 100644 index e8dbf2ed..00000000 --- a/seekalpha/factor/mining/__init__.py +++ /dev/null @@ -1,37 +0,0 @@ -"""因子挖掘插件(可选依赖 mining extra)。""" - -from __future__ import annotations - -from typing import Any - - -def __getattr__(name: str) -> Any: - if name == "run_factor_mining": - from seekalpha.factor.mining.run import run_factor_mining - - return run_factor_mining - if name == "run_factor_mining_agentscope": - from seekalpha.factor.mining.agentscope_run import run_factor_mining_agentscope - - return run_factor_mining_agentscope - if name == "MiningConfig": - from seekalpha.factor.mining.config import MiningConfig - - return MiningConfig - if name == "build_system_prompt": - from seekalpha.factor.mining.prompts import build_system_prompt - - return build_system_prompt - if name == "FactorSubmitService": - from seekalpha.factor.mining.submit import FactorSubmitService - - return FactorSubmitService - if name == "default_factorlib_path": - from seekalpha.factor.mining.submit import default_factorlib_path - - return default_factorlib_path - if name == "FactorEvalTools": - from seekalpha.factor.mining.tools import FactorEvalTools - - return FactorEvalTools - raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/seekalpha/factor/mining/agentscope_run.py b/seekalpha/factor/mining/agentscope_run.py deleted file mode 100644 index 44b84228..00000000 --- a/seekalpha/factor/mining/agentscope_run.py +++ /dev/null @@ -1,289 +0,0 @@ -"""AgentScope 版因子挖掘编排:复用 prompt/工具上下文,终端流式输出。""" - -from __future__ import annotations - -import json -import sys -from datetime import datetime, timezone -from pathlib import Path -from typing import Any - -from agentscope.agent import Agent, ContextConfig, ReActConfig -from agentscope.credential import OpenAICredential -from agentscope.message import UserMsg -from agentscope.model import OpenAIChatModel -from agentscope.permission import PermissionContext, PermissionMode -from agentscope.state import AgentState -from agentscope.workspace import LocalWorkspace - -from seekalpha.factor.mining.env_settings import resolve_max_parallel_eval -from seekalpha.factor.mining.schemas import SessionCreateRequest -from seekalpha.factor.mining.service import StockEvalService -from seekalpha.factor.mining.agentscope_tools import build_factor_eval_toolkit, context_to_openai_messages -from seekalpha.factor.mining.cli_stream import MiningStreamObserver, stream_to_cli -from seekalpha.factor.mining.config import MiningConfig -from seekalpha.factor.mining.console import ConsolePrinter -from seekalpha.factor.mining.loop import _NUDGE, _submit_record -from seekalpha.factor.mining.operators import list_operator_names -from seekalpha.factor.mining.prompts import build_system_prompt -from seekalpha.factor.mining.submit import FactorSubmitService, default_factorlib_path -from seekalpha.factor.mining.tools import FactorEvalTools - -_NUDGE_MSG = _NUDGE - - -def _repo_root() -> Path: - return Path(__file__).resolve().parents[3] - - -def _now() -> str: - return datetime.now(timezone.utc).isoformat() - - -def _build_model( - config: MiningConfig, - *, - api_key: str, - base_url: str | None, - extra_body: dict[str, Any] | None, -) -> OpenAIChatModel: - params: dict[str, Any] = { - "max_tokens": config.max_tokens, - "parallel_tool_calls": True, - } - if config.temperature is not None: - params["temperature"] = config.temperature - return OpenAIChatModel( - credential=OpenAICredential(api_key=api_key, base_url=base_url), - model=config.model, - parameters=OpenAIChatModel.Parameters(**params), - stream=True, - extra_body=extra_body, - ) - - -async def create_mining_agent( - *, - config: MiningConfig, - system_prompt: str, - factor_tools: FactorEvalTools, - workspace: LocalWorkspace, - api_key: str, - base_url: str | None, - extra_body: dict[str, Any] | None, -) -> Agent: - toolkit = build_factor_eval_toolkit(factor_tools, max_workers=config.max_tool_workers) - react_iters = max(config.max_turns * config.max_tool_calls_per_round, config.max_turns, 20) - return Agent( - name="FactorMiner", - system_prompt=system_prompt, - model=_build_model(config, api_key=api_key, base_url=base_url, extra_body=extra_body), - toolkit=toolkit, - offloader=workspace, - react_config=ReActConfig(max_iters=react_iters), - state=AgentState( - permission_context=PermissionContext(mode=PermissionMode.BYPASS), - ), - context_config=ContextConfig( - trigger_ratio=0.75, - reserve_ratio=0.15, - tool_result_limit=5000, - ), - ) - - -async def run_factor_mining_agentscope( - config: MiningConfig, - user_message: str, - *, - api_key: str, - base_url: str | None = None, - log_dir: str | Path = "logs/factor_mining", - include_operator_catalog: bool = True, - extra_instructions: str = "", - extra_body: dict[str, Any] | None = None, - service: StockEvalService | None = None, - verbose: bool = True, - repo_root: Path | None = None, -) -> dict[str, Any]: - """AgentScope 版挖掘入口:与 run_factor_mining 配置一致,CLI 流式输出。""" - service = service or StockEvalService( - max_parallel_eval=resolve_max_parallel_eval(config.max_parallel_eval), - ) - root = repo_root or _repo_root() - ctx = config.eval - session_resp = service.create_session( - SessionCreateRequest( - panel_path=str(ctx.panel_path), - train_start=ctx.train_start, - train_end=ctx.train_end, - val_start=ctx.val_start, - val_end=ctx.val_end, - label_col=ctx.label_col, - include_fundamentals=ctx.include_fundamentals, - ) - ) - - submit_service: FactorSubmitService | None = None - if config.enable_submit: - lib_path = config.factorlib_path or default_factorlib_path(root) - submit_service = FactorSubmitService( - service, - factorlib_path=lib_path, - registry_path=root / config.registry_path, - expr_dir=root / config.expr_dir, - repo_root=root, - max_cs_corr=config.max_cs_corr, - similar_top_k=config.similar_top_k, - overwrite=config.ingest_overwrite, - ) - - factor_tools = FactorEvalTools(service, session_resp.session_id, submit_service=submit_service) - system_prompt = build_system_prompt( - include_operator_catalog=include_operator_catalog, - enable_submit=config.enable_submit, - extra_instructions=extra_instructions, - label_col=ctx.label_col, - include_fundamentals=ctx.include_fundamentals, - ) - - log_dir = Path(log_dir) - log_dir.mkdir(parents=True, exist_ok=True) - stamp = datetime.now().strftime("%Y%m%d_%H%M%S") - log_jsonl = log_dir / f"run_{stamp}.jsonl" - workspace_dir = log_dir / f"agentscope_workspace_{stamp}" - workspace = LocalWorkspace(workdir=str(workspace_dir)) - await workspace.initialize() - - printer = ConsolePrinter(stream=sys.stderr) if verbose else None - if printer is not None: - printer.session_start(config.model, len(list_operator_names())) - - def _emit(event: str, payload: dict[str, Any]) -> None: - with log_jsonl.open("a", encoding="utf-8") as f: - f.write(json.dumps({"ts": _now(), "event": event, **payload}, ensure_ascii=False, default=str) + "\n") - f.flush() - - _emit("session_start", {"model": config.model, "max_turns": config.max_turns, "framework": "agentscope"}) - _emit("user_message", {"turn": 0, "content": user_message}) - - agent = await create_mining_agent( - config=config, - system_prompt=system_prompt, - factor_tools=factor_tools, - workspace=workspace, - api_key=api_key, - base_url=base_url, - extra_body=extra_body, - ) - - tool_call_rounds = 0 - outer_turn = 0 - tool_call_rows: list[dict[str, Any]] = [] - submit_records: list[dict[str, Any]] = [] - end_reason = "no_tool_calls" - pending = user_message - - while outer_turn < config.max_turns: - if printer is not None: - printer.turn(outer_turn) - - observer = MiningStreamObserver(printer=printer, emit=_emit, turn=outer_turn) - - def _on_stream_emit(event: str, payload: dict[str, Any]) -> None: - _emit(event, payload) - if event != "tool_results": - return - for row in payload.get("results") or []: - res = row.get("result") if isinstance(row.get("result"), dict) else {} - tool_call_rows.append( - { - "turn": outer_turn, - "name": row.get("name"), - "elapsed_seconds": row.get("elapsed_seconds"), - "ok": res.get("ok"), - } - ) - if row.get("name") == "submit_factor": - submit_records.append( - _submit_record( - turn=outer_turn, - arguments_raw=row.get("arguments_raw"), - result=res, - ) - ) - - observer.emit = _on_stream_emit - - if outer_turn > 0 or pending != user_message: - _emit("user_message", {"turn": outer_turn, "content": pending}) - - user_msg = UserMsg(name="user", content=pending) - had_tools = await stream_to_cli( - agent, - user_msg, - show_thinking=True, - auto_confirm=True, - observer=observer, - quiet=not verbose, - ) - - if had_tools: - tool_call_rounds += 1 - - if not had_tools: - if tool_call_rounds < config.min_tool_call_rounds_before_allow_stop: - _emit("nudge", {"turn": outer_turn, "tool_call_rounds": tool_call_rounds}) - pending = _NUDGE_MSG - outer_turn += 1 - continue - end_reason = "no_tool_calls" - break - - outer_turn += 1 - if outer_turn >= config.max_turns: - end_reason = "max_turns_reached" - break - pending = "请基于当前工具结果继续迭代;需要时并行 eval_on_train_set,达标后 submit_factor。" - else: - end_reason = "max_turns_reached" - - if printer is not None: - ok = sum(1 for r in tool_call_rows if r.get("ok")) - printer.session_end(end_reason, ok, len(tool_call_rows)) - - messages = context_to_openai_messages(agent.state.context) - snapshot = log_jsonl.with_suffix(".messages.json") - snapshot.write_text(json.dumps(messages, ensure_ascii=False, indent=2, default=str), encoding="utf-8") - - times = [r["elapsed_seconds"] for r in tool_call_rows if r.get("elapsed_seconds") is not None] - submitted_factors = [r for r in submit_records if r.get("stored")] - submit_failures = [r for r in submit_records if not r.get("stored")] - summary = { - "log_jsonl": str(log_jsonl), - "framework": "agentscope", - "agent_session_id": agent.state.session_id, - "tool_calls": { - "count": len(tool_call_rows), - "ok": sum(1 for r in tool_call_rows if r.get("ok")), - "elapsed_seconds_total": round(sum(times), 4) if times else 0.0, - }, - "submitted_factors": submitted_factors, - "submit_failures": submit_failures, - "messages_snapshot": str(snapshot), - } - log_jsonl.with_suffix(".summary.json").write_text( - json.dumps(summary, ensure_ascii=False, indent=2, default=str), encoding="utf-8" - ) - _emit("run_summary", summary) - _emit("session_end", {"turn": outer_turn, "reason": end_reason}) - - return { - "session_id": session_resp.session_id, - "agent_session_id": agent.state.session_id, - "n_messages": len(messages), - "log_jsonl": str(log_jsonl), - "messages_snapshot": str(snapshot), - "summary": str(log_jsonl.with_suffix(".summary.json")), - "framework": "agentscope", - } diff --git a/seekalpha/factor/mining/agentscope_tools.py b/seekalpha/factor/mining/agentscope_tools.py deleted file mode 100644 index d730ddf7..00000000 --- a/seekalpha/factor/mining/agentscope_tools.py +++ /dev/null @@ -1,129 +0,0 @@ -"""将 FactorEvalTools 包装为 AgentScope FunctionTool。""" - -from __future__ import annotations - -import time -from concurrent.futures import ThreadPoolExecutor -from typing import Any - -from agentscope.message import TextBlock -from agentscope.tool import FunctionTool, Toolkit, ToolChunk - -from seekalpha.factor.mining.tools import FactorEvalTools - -_EXECUTOR: ThreadPoolExecutor | None = None - - -def _executor(max_workers: int) -> ThreadPoolExecutor: - global _EXECUTOR - if _EXECUTOR is None: - _EXECUTOR = ThreadPoolExecutor(max_workers=max(1, max_workers)) - return _EXECUTOR - - -def _dispatch_sync(tools: FactorEvalTools, name: str, arguments: dict[str, Any]) -> tuple[dict[str, Any], float]: - t0 = time.perf_counter() - result = tools.dispatch(name, arguments) - elapsed = round(time.perf_counter() - t0, 4) - return result if isinstance(result, dict) else {"ok": False, "error": str(result)}, elapsed - - -def build_factor_eval_toolkit(tools: FactorEvalTools, *, max_workers: int = 4) -> Toolkit: - """构建与 OpenAI 版一致的 eval / submit 工具集。""" - - async def eval_on_train_set( - multi_line_expr: str, - factor_name: str = "expr", - include_detail_tables: bool = False, - label_quantile_n: int = 10, - ) -> ToolChunk: - """训练集评估多行因子表达式,返回 summary、monthly_corr_robustness、label_quantile_buckets。""" - loop = __import__("asyncio").get_running_loop() - result, _elapsed = await loop.run_in_executor( - _executor(max_workers), - _dispatch_sync, - tools, - "eval_on_train_set", - { - "multi_line_expr": multi_line_expr, - "factor_name": factor_name, - "include_detail_tables": include_detail_tables, - "label_quantile_n": label_quantile_n, - }, - ) - return ToolChunk(content=[TextBlock(text=tools.result_to_content(result))]) - - async def eval_on_val_set( - multi_line_expr: str, - factor_name: str = "expr", - include_detail_tables: bool = False, - label_quantile_n: int = 10, - expected_sign: int | None = None, - ) -> ToolChunk: - """验证集评估;须传 expected_sign(train IC 符号 1/-1),结果含 sign_check。""" - loop = __import__("asyncio").get_running_loop() - args: dict[str, Any] = { - "multi_line_expr": multi_line_expr, - "factor_name": factor_name, - "include_detail_tables": include_detail_tables, - "label_quantile_n": label_quantile_n, - } - if expected_sign is not None: - args["expected_sign"] = expected_sign - result, _elapsed = await loop.run_in_executor( - _executor(max_workers), - _dispatch_sync, - tools, - "eval_on_val_set", - args, - ) - return ToolChunk(content=[TextBlock(text=tools.result_to_content(result))]) - - func_tools: list[FunctionTool] = [ - FunctionTool(eval_on_train_set, name="eval_on_train_set", is_read_only=True), - FunctionTool(eval_on_val_set, name="eval_on_val_set", is_read_only=True), - ] - - if tools.submit_service is not None: - - async def submit_factor( - multi_line_expr: str, - factor_name: str, - comment: str, - ) -> ToolChunk: - """【正式交付】将保留级候选入库 factorzoo;train/val 达标后必须调用。""" - loop = __import__("asyncio").get_running_loop() - result, _elapsed = await loop.run_in_executor( - _executor(max_workers), - _dispatch_sync, - tools, - "submit_factor", - { - "multi_line_expr": multi_line_expr, - "factor_name": factor_name, - "comment": comment, - }, - ) - return ToolChunk(content=[TextBlock(text=tools.result_to_content(result))]) - - func_tools.append(FunctionTool(submit_factor, name="submit_factor")) - - return Toolkit(tools=func_tools) - - -def context_to_openai_messages(agent_context: Any) -> list[dict[str, Any]]: - """将 AgentScope context 快照为 OpenAI 风格 messages(便于与旧日志格式对齐)。""" - out: list[dict[str, Any]] = [] - for msg in agent_context: - role = getattr(msg, "role", None) or getattr(msg, "name", "unknown") - content = getattr(msg, "content", None) - if isinstance(content, list): - text_parts = [] - for block in content: - if hasattr(block, "text"): - text_parts.append(block.text) - elif isinstance(block, dict) and block.get("text"): - text_parts.append(str(block["text"])) - content = "\n".join(text_parts) - out.append({"role": str(role), "content": content}) - return out diff --git a/seekalpha/factor/mining/cli_stream.py b/seekalpha/factor/mining/cli_stream.py deleted file mode 100644 index ee99d291..00000000 --- a/seekalpha/factor/mining/cli_stream.py +++ /dev/null @@ -1,386 +0,0 @@ -"""AgentScope 挖掘会话的终端流式输出(参考 AQRA examples/agentscope/first_agent.py)。""" - -from __future__ import annotations - -import json -import re -import sys -import time -from dataclasses import dataclass, field -from typing import Any, Callable, TextIO - -from agentscope.agent import Agent -from agentscope.event import ConfirmResult, EventType, UserConfirmResultEvent -from agentscope.message import UserMsg - -_INDENT = " " -_USE_COLOR = sys.stdout.isatty() -_ANSI_RE = re.compile(r"\033\[[0-9;]*m") - - -def _ansi(code: str, text: str) -> str: - if not _USE_COLOR: - return text - return f"\033[{code}m{text}\033[0m" - - -def _strip_ansi(text: str) -> str: - return _ANSI_RE.sub("", text) - - -class CliRunLogger: - """将 CLI 输出写入 run 目录的 cli.log。""" - - def __init__( - self, - *, - cli_log_path: TextIO | None = None, - on_reply_end: Callable[[], None] | None = None, - ) -> None: - self._cli_file = cli_log_path - self.on_reply_end = on_reply_end - - def write_plain(self, text: str) -> None: - if self._cli_file is not None: - self._cli_file.write(text) - self._cli_file.flush() - - def write_line(self, line: str = "") -> None: - self.write_plain(line + "\n") - - def close(self) -> None: - if self._cli_file is not None: - self._cli_file.close() - - -def _tag( - label: str, - *, - detail: str = "", - color: str = "1;36", - logger: CliRunLogger | None = None, - stream: TextIO | None = None, -) -> None: - line = f"[{label}]" - if detail: - line = f"{line} {detail}" - if logger: - logger.write_line(line) - out = stream or sys.stdout - print(_ansi(color, line), file=out, flush=True) - - -def _prefix_body_delta(delta: str, *, need_leading_indent: bool) -> tuple[str, bool]: - if not delta: - return "", need_leading_indent - out: list[str] = [] - parts = delta.split("\n") - for i, part in enumerate(parts): - if i > 0: - out.append(f"\n{_INDENT}") - if need_leading_indent or i > 0: - out.append(_INDENT) - need_leading_indent = False - out.append(part) - return _ansi("2", "".join(out)), need_leading_indent - - -def _print_body_delta( - delta: str, - *, - need_leading_indent: bool, - logger: CliRunLogger | None = None, - stream: TextIO | None = None, - use_color: bool = True, -) -> bool: - if use_color: - chunk, need_leading_indent = _prefix_body_delta(delta, need_leading_indent=need_leading_indent) - else: - chunk, need_leading_indent = delta, need_leading_indent - if logger and chunk: - logger.write_plain(_strip_ansi(chunk) if use_color else chunk) - out = stream or sys.stdout - print(chunk, end="", file=out, flush=True) - return need_leading_indent - - -@dataclass -class _PendingToolCall: - name: str - arguments: str = "" - result_parts: list[str] = field(default_factory=list) - started_at: float = field(default_factory=time.perf_counter) - - -class MiningStreamObserver: - """挖掘流式事件观察者:按 tool_call_id 跟踪并行工具调用,并流式落盘 agent 输出。""" - - def __init__( - self, - *, - printer: Any | None = None, - emit: Callable[[str, dict], None] | None = None, - turn: int = 0, - ) -> None: - self.printer = printer - self.emit = emit - self.turn = turn - self.had_tool_calls = False - self.tool_call_count = 0 - self._pending: dict[str, _PendingToolCall] = {} - self._thinking_parts: list[str] = [] - self._text_parts: list[str] = [] - self._logged_tool_calls: set[str] = set() - - def _emit_block(self, event: str, content: str) -> None: - if self.emit is None or not content: - return - self.emit(event, {"turn": self.turn, "content": content}) - - def on_thinking_start(self) -> None: - self._thinking_parts.clear() - - def on_thinking_delta(self, delta: str) -> None: - if delta: - self._thinking_parts.append(delta) - - def on_thinking_end(self) -> None: - self._emit_block("agent_thinking", "".join(self._thinking_parts)) - self._thinking_parts.clear() - - def on_text_delta(self, delta: str) -> None: - if delta: - self._text_parts.append(delta) - - def on_text_end(self) -> None: - self._emit_block("assistant_message", "".join(self._text_parts)) - self._text_parts.clear() - - def on_tool_call_start(self, tool_call_id: str, name: str) -> None: - self.had_tool_calls = True - self.tool_call_count += 1 - self._pending[tool_call_id] = _PendingToolCall(name=name) - - def on_tool_call_delta(self, tool_call_id: str, delta: str) -> None: - pending = self._pending.get(tool_call_id) - if pending is not None and delta: - pending.arguments += delta - - def on_tool_call_ready(self, tool_call_id: str) -> None: - """工具参数流式接收完毕、即将执行时落盘 assistant tool_call。""" - if self.emit is None or tool_call_id in self._logged_tool_calls: - return - pending = self._pending.get(tool_call_id) - if pending is None: - return - self._logged_tool_calls.add(tool_call_id) - self.emit( - "assistant_tool_call", - { - "turn": self.turn, - "tool_call_id": tool_call_id, - "name": pending.name, - "arguments_raw": pending.arguments, - }, - ) - - def on_tool_result_delta(self, tool_call_id: str, delta: str) -> None: - pending = self._pending.get(tool_call_id) - if pending is not None and delta: - pending.result_parts.append(delta) - - def on_tool_result_end(self, tool_call_id: str) -> None: - pending = self._pending.pop(tool_call_id, None) - if pending is None: - return - - elapsed = round(time.perf_counter() - pending.started_at, 4) - raw = "".join(pending.result_parts) - try: - result = json.loads(raw) if raw.strip() else {} - except json.JSONDecodeError: - result = {"ok": False, "error": raw[:500]} - - if self.printer is not None: - self.printer.tool_result( - pending.name, - pending.arguments, - result if isinstance(result, dict) else {}, - elapsed, - ) - - if self.emit is not None: - self.emit( - "tool_results", - { - "turn": self.turn, - "results": [ - { - "tool_call_id": tool_call_id, - "name": pending.name, - "arguments_raw": pending.arguments, - "result": result, - "elapsed_seconds": elapsed, - } - ], - }, - ) - - -async def stream_to_cli( - agent: Agent, - user_msg: UserMsg, - *, - show_thinking: bool = True, - auto_confirm: bool = True, - logger: CliRunLogger | None = None, - observer: MiningStreamObserver | None = None, - quiet: bool = False, - stream: TextIO | None = None, -) -> bool: - """将 agent.reply_stream 事件流式打印到终端。 - - Returns: - 本轮是否发生过至少一次工具调用。 - """ - pending: UserMsg | UserConfirmResultEvent = user_msg - inject_comment: str | None = None - body_needs_indent = False - had_tool_calls = False - out = stream or sys.stdout - use_printer = observer is not None and observer.printer is not None - - while True: - next_input: UserConfirmResultEvent | None = None - - async for event in agent.reply_stream(pending): - match event.type: - case EventType.THINKING_BLOCK_START: - if observer is not None: - observer.on_thinking_start() - if show_thinking and not quiet: - print(file=out, flush=True) - if logger: - logger.write_line() - _tag("思考", color="1;35", logger=logger, stream=out) - body_needs_indent = True - case EventType.THINKING_BLOCK_DELTA: - if observer is not None: - observer.on_thinking_delta(event.delta) - if show_thinking and not quiet: - body_needs_indent = _print_body_delta( - event.delta, - need_leading_indent=body_needs_indent, - logger=logger, - stream=out, - ) - case EventType.THINKING_BLOCK_END: - if observer is not None: - observer.on_thinking_end() - if show_thinking and not quiet: - print(file=out, flush=True) - if logger: - logger.write_line() - body_needs_indent = False - case EventType.TEXT_BLOCK_DELTA: - if observer is not None: - observer.on_text_delta(event.delta) - if not quiet: - if logger and event.delta: - logger.write_plain(event.delta) - print(event.delta, end="", file=out, flush=True) - case EventType.TEXT_BLOCK_END: - if observer is not None: - observer.on_text_end() - if not quiet: - print(file=out, flush=True) - if logger: - logger.write_line() - case EventType.TOOL_CALL_START: - had_tool_calls = True - if observer is not None: - observer.on_tool_call_start(event.tool_call_id, event.tool_call_name) - elif not quiet: - print(file=out, flush=True) - if logger: - logger.write_line() - _tag("工具", detail=event.tool_call_name, color="1;36", logger=logger, stream=out) - body_needs_indent = False - case EventType.TOOL_CALL_DELTA: - if observer is not None: - observer.on_tool_call_delta(event.tool_call_id, event.delta) - case EventType.TOOL_RESULT_START: - if observer is not None: - observer.on_tool_call_ready(event.tool_call_id) - if not quiet and not use_printer: - _tag("结果", detail=event.tool_call_name, color="1;33", logger=logger, stream=out) - body_needs_indent = True - case EventType.TOOL_RESULT_TEXT_DELTA: - if observer is not None: - observer.on_tool_result_delta(event.tool_call_id, event.delta) - if not quiet and not use_printer: - body_needs_indent = _print_body_delta( - event.delta, - need_leading_indent=body_needs_indent, - logger=logger, - stream=out, - ) - case EventType.TOOL_RESULT_END: - if observer is not None: - observer.on_tool_result_end(event.tool_call_id) - if not quiet and not use_printer: - print(file=out, flush=True) - if logger: - logger.write_line() - body_needs_indent = False - if inject_comment: - agent.state.context.append(UserMsg(name="user", content=inject_comment)) - inject_comment = None - case EventType.REQUIRE_USER_CONFIRM: - if auto_confirm: - next_input = UserConfirmResultEvent( - reply_id=event.reply_id, - confirm_results=[ - ConfirmResult(confirmed=True, tool_call=tc) - for tc in event.tool_calls - ], - ) - elif not quiet: - for tool_call in event.tool_calls: - print(file=out, flush=True) - if logger: - logger.write_line() - _tag( - "待确认", - detail=f"{tool_call.name}: {tool_call.input}", - color="1;31", - logger=logger, - stream=out, - ) - answer = input("确认执行? [y/N,可附加说明]: ").strip() - if logger: - logger.write_line(f"确认执行? [y/N,可附加说明]: {answer}") - confirmed = answer.lower() in ("y", "yes", "yes ") - inject_comment = None if confirmed or not answer else answer - next_input = UserConfirmResultEvent( - reply_id=event.reply_id, - confirm_results=[ - ConfirmResult(confirmed=confirmed, tool_call=tc) - for tc in event.tool_calls - ], - ) - case EventType.EXCEED_MAX_ITERS: - if not quiet: - print(file=out, flush=True) - _tag("警告", detail="达到最大迭代次数", color="1;31", logger=logger, stream=out) - case EventType.REPLY_END: - if logger and logger.on_reply_end: - logger.on_reply_end() - - if next_input is None: - break - pending = next_input - - if observer is not None: - observer.had_tool_calls = had_tool_calls - return had_tool_calls diff --git a/seekalpha/factor/mining/config.py b/seekalpha/factor/mining/config.py deleted file mode 100644 index ef79ad29..00000000 --- a/seekalpha/factor/mining/config.py +++ /dev/null @@ -1,32 +0,0 @@ -"""因子挖掘配置:评估上下文 + LLM/循环参数。""" - -from __future__ import annotations - -from dataclasses import dataclass -from pathlib import Path - -from seekalpha.factor.mining.context import StockEvalContext - - -@dataclass -class MiningConfig: - """一次挖掘运行的全部配置。""" - - eval: StockEvalContext - model: str = "gpt-4o-mini" - temperature: float | None = None - max_tokens: int = 8192 - max_turns: int = 16 - max_tool_calls_per_round: int = 8 - max_tool_workers: int = 4 - max_parallel_eval: int | None = None - """同时进行的 train/val 评估上限;None 时读环境变量 MAX_PARALLEL_EVAL。""" - min_tool_call_rounds_before_allow_stop: int = 3 - factorlib_path: Path | None = None - enable_submit: bool = True - max_cs_corr: float = 0.8 - similar_top_k: int = 3 - ingest_overwrite: bool = False - auto_realign_panel: bool = True - registry_path: Path = Path("artifacts/factorzoo/stock_1d/mining_delivered_registry.json") - expr_dir: Path = Path("artifacts/factorzoo/stock_1d/expressions") diff --git a/seekalpha/factor/mining/console.py b/seekalpha/factor/mining/console.py deleted file mode 100644 index daaefd62..00000000 --- a/seekalpha/factor/mining/console.py +++ /dev/null @@ -1,213 +0,0 @@ -"""挖掘轨迹的整洁终端打印。""" - -from __future__ import annotations - -import json -import os -import sys -from typing import Any, TextIO - - -def _use_color(stream: TextIO) -> bool: - if os.environ.get("NO_COLOR"): - return False - return bool(getattr(stream, "isatty", lambda: False)()) - - -def _fmt_num(value: Any, digits: int = 4) -> str: - try: - return f"{float(value):.{digits}g}" - except (TypeError, ValueError): - return "—" - - -def _decile_d1_d10(deciles: Any) -> tuple[str, str]: - if not isinstance(deciles, list): - return "—", "—" - by: dict[int, float | None] = {} - for row in deciles: - if not isinstance(row, dict): - continue - d = row.get("decile") - ml = row.get("mean_label") - if d is None: - continue - try: - by[int(d)] = float(ml) if ml is not None else None - except (TypeError, ValueError): - by[int(d)] = None - d1 = by.get(1) - d10 = by.get(10) or by.get(max(by) if by else 0) - return (_fmt_num(d1) if d1 is not None else "—", _fmt_num(d10) if d10 is not None else "—") - - -def _normalize_neighbor(nb: Any) -> dict[str, Any]: - if isinstance(nb, dict): - return { - "factor_id": nb.get("factor_id") or "?", - "cs_corr": nb.get("cs_corr"), - "expr": nb.get("expr") or "", - } - if isinstance(nb, (list, tuple)) and len(nb) >= 2: - return {"factor_id": str(nb[0]), "cs_corr": nb[1], "expr": ""} - return {"factor_id": "?", "cs_corr": None, "expr": ""} - - -def _submit_failure_detail(result: dict[str, Any]) -> str: - detail = result.get("error") or result.get("skipped_reason") or "failed" - delivery = result.get("delivery_check") or {} - reasons = delivery.get("fail_reasons") - if reasons: - detail = f"{detail} ({','.join(str(r) for r in reasons)})" - return detail - - -def _mls_fmb_parts(mls: Any) -> list[str]: - if not isinstance(mls, dict) or not mls: - return [] - parts: list[str] = [] - for key, label in ( - ("mean_rho", "ρ"), - ("nw_t_rho", "NWρ"), - ("nw_t_ls", "NWls"), - ("mls", "MLS"), - ): - if key in mls and mls[key] is not None: - parts.append(f"{label}={_fmt_num(mls[key])}") - return parts - - -def _metrics_parts( - summ: dict[str, Any], - rob: dict[str, Any], - sign_check: dict[str, Any] | None, -) -> list[str]: - d1, d10 = _decile_d1_d10(summ.get("decile_mean_label")) - parts = [ - f"IC={_fmt_num(summ.get('ic'))}", - f"ICIR={_fmt_num(summ.get('icir'))}", - f"RANKIC={_fmt_num(summ.get('rank_ic'))}", - f"cov={_fmt_num(summ.get('factor_coverage'))}", - f"csAuto={_fmt_num(summ.get('cs_pearson_autocorr'))}", - f"days={summ.get('n_days') if summ.get('n_days') is not None else '—'}", - f"inst={summ.get('n_instruments') if summ.get('n_instruments') is not None else '—'}", - f"skew={_fmt_num(summ.get('factor_skewness'))}", - f"kurt={_fmt_num(summ.get('factor_kurtosis'))}", - f"月数={rob.get('n_months') if rob.get('n_months') is not None else '—'}", - f"月IC均={_fmt_num(rob.get('mean_monthly_ic'))}", - f"月IC+={_fmt_num(rob.get('share_months_ic_positive'), 3)}", - f"D1={d1}", - f"D10={d10}", - ] - parts.extend(_mls_fmb_parts(summ.get("mls_fmb"))) - if isinstance(sign_check, dict) and sign_check.get("matches_expected_sign") is not None: - parts.append("sign" + ("✓" if sign_check["matches_expected_sign"] else "✗")) - return parts - - -class ConsolePrinter: - """逐轮打印:轮次 → 模型文本/思考 → 因子表达式 → 关键指标。""" - - def __init__(self, *, stream: TextIO | None = None, show_thinking: bool = True) -> None: - self.stream = stream or sys.stderr - self.show_thinking = show_thinking - self._color = _use_color(self.stream) - - def _ansi(self, code: str, text: str) -> str: - return f"\033[{code}m{text}\033[0m" if self._color else text - - def _tag(self, code: str, label: str, detail: str = "") -> None: - line = self._ansi(code, f"[{label}]") - if detail: - line = f"{line} {detail}" - print(line, file=self.stream, flush=True) - - def _body(self, text: str | None) -> None: - if not text: - return - for raw in str(text).strip().splitlines(): - print(f" {raw}", file=self.stream, flush=True) - - def _blank(self) -> None: - print(file=self.stream, flush=True) - - def session_start(self, model: str, n_operators: int) -> None: - self._tag("1;32", "挖掘启动", f"model={model} 算子={n_operators}") - self._blank() - - def turn(self, turn: int) -> None: - self._blank() - self._tag("1;36", f"轮次 {turn}") - self._blank() - - def assistant(self, content: str | None, reasoning: str | None) -> None: - if self.show_thinking and reasoning: - self._tag("35", "思考") - self._body(reasoning) - self._blank() - if content and content.strip(): - self._tag("1", "助手") - self._body(content) - self._blank() - - def tool_result(self, name: str, arguments_raw: Any, result: dict[str, Any], elapsed: float | None) -> None: - args = arguments_raw - if isinstance(args, str): - try: - args = json.loads(args) - except json.JSONDecodeError: - args = {} - if not isinstance(args, dict): - args = {} - split = "train" if name == "eval_on_train_set" else ("val" if name == "eval_on_val_set" else "submit") - factor_name = args.get("factor_name") or "expr" - secs = f"{elapsed:.1f}s" if elapsed is not None else "—" - if name == "submit_factor": - self._tag("1;32", "交付", f"{factor_name} ({secs})") - self._body(args.get("comment")) - self._body(args.get("multi_line_expr")) - if not result.get("ok"): - detail = _submit_failure_detail(result) - self._tag("1;31", "结果", self._ansi("1;31", f"✗ {detail}")) - sim = result.get("similarity") or {} - for nb in sim.get("top_neighbors") or []: - norm = _normalize_neighbor(nb) - self._body( - f" ~{norm['factor_id']} cs_corr={_fmt_num(norm['cs_corr'])}: {norm['expr']}" - ) - self._blank() - return - summ = result.get("metrics") or {} - sim = result.get("similarity") or {} - max_corr = sim.get("max_abs_corr") - parts = [ - "stored✓", - f"IC={_fmt_num(summ.get('ic'))}", - f"ICIR={_fmt_num(summ.get('icir'))}", - f"RANKIC={_fmt_num(summ.get('rank_ic'))}", - f"cov={_fmt_num(summ.get('coverage'))}", - f"csAuto={_fmt_num(summ.get('cs_pearson_autocorr'))}", - ] - if max_corr is not None: - parts.append(f"max_cs_corr={_fmt_num(max_corr)}") - self._tag("1;33", "结果", " ".join(parts)) - self._blank() - return - - self._tag("1;36", f"因子 {split}", f"{factor_name} ({secs})") - self._body(args.get("multi_line_expr")) - - if not result.get("ok"): - self._tag("1;31", "指标", self._ansi("1;31", f"✗ {result.get('error_type')}: {result.get('error')}")) - self._blank() - return - summ = result.get("summary") or {} - rob = result.get("monthly_corr_robustness") or {} - parts = _metrics_parts(summ, rob, result.get("sign_check")) - self._tag("1;33", "指标", " ".join(parts)) - self._blank() - - def session_end(self, reason: str, ok: int, count: int) -> None: - self._blank() - self._tag("1;32", "会话结束", f"{reason} · 工具调用 {ok}/{count} 成功") - self._blank() diff --git a/seekalpha/factor/mining/context.py b/seekalpha/factor/mining/context.py deleted file mode 100644 index 1143b9e7..00000000 --- a/seekalpha/factor/mining/context.py +++ /dev/null @@ -1,36 +0,0 @@ -"""股票因子挖掘评估上下文:panel 路径与 train/val 日期切分。""" - -from __future__ import annotations - -from dataclasses import dataclass -from pathlib import Path - -from seekalpha.factor.types import DEFAULT_LABEL_COL - - -@dataclass -class StockEvalContext: - """一次挖掘会话的数据与标签配置。""" - - panel_path: Path - train_start: str = "2019-01-01" - train_end: str = "2021-12-31" - val_start: str = "2022-01-01" - val_end: str = "2023-12-31" - label_col: str = DEFAULT_LABEL_COL - include_fundamentals: bool = True - """是否载入基本面列(``funda_*``)。挖价量因子时可关闭以省内存。""" - - def split_range(self, split: str) -> tuple[str, str]: - if split == "train": - return self.train_start, self.train_end - if split == "val": - return self.val_start, self.val_end - raise ValueError(f"未知 split: {split!r}") - - def coverage_range(self) -> tuple[str, str]: - """train ∪ val 日期并集。""" - return ( - min(self.train_start, self.val_start), - max(self.train_end, self.val_end), - ) diff --git a/seekalpha/factor/mining/env_settings.py b/seekalpha/factor/mining/env_settings.py deleted file mode 100644 index 63b77967..00000000 --- a/seekalpha/factor/mining/env_settings.py +++ /dev/null @@ -1,28 +0,0 @@ -"""挖掘评估相关的环境变量解析。""" - -from __future__ import annotations - -import os - -ENV_MAX_PARALLEL_EVAL = "MAX_PARALLEL_EVAL" -DEFAULT_MAX_PARALLEL_EVAL = 1 - - -def parse_max_parallel_eval(raw: str | None = None) -> int: - """解析最大并行 train/val 评估数;未设置或空字符串时返回 1。""" - if raw is None: - raw = os.environ.get(ENV_MAX_PARALLEL_EVAL, "") - text = str(raw).strip() - if not text: - return DEFAULT_MAX_PARALLEL_EVAL - value = int(text) - if value < 1: - raise ValueError(f"{ENV_MAX_PARALLEL_EVAL} 须为正整数,得到 {value!r}") - return value - - -def resolve_max_parallel_eval(override: int | None = None) -> int: - """MiningConfig 显式值优先,否则读环境变量。""" - if override is not None: - return parse_max_parallel_eval(str(override)) - return parse_max_parallel_eval() diff --git a/seekalpha/factor/mining/loop.py b/seekalpha/factor/mining/loop.py deleted file mode 100644 index dd531f63..00000000 --- a/seekalpha/factor/mining/loop.py +++ /dev/null @@ -1,230 +0,0 @@ -"""多轮因子挖掘主循环:OpenAI Chat Completions + 并行工具调用 + JSONL 轨迹。""" - -from __future__ import annotations - -import json -import time -from concurrent.futures import ThreadPoolExecutor -from datetime import datetime, timezone -from pathlib import Path -from typing import Any - -from seekalpha.factor.mining.console import ConsolePrinter -from seekalpha.factor.mining.tools import FactorEvalTools - -_NUDGE = ( - "[local] 请继续推进,不要停在解释或征询下一步;" - "请直接基于当前上下文发起原生 tool_calls(建议并行多条 eval_on_train_set)。" -) - - -def _now() -> str: - return datetime.now(timezone.utc).isoformat() - - -def _chat_with_retry(client: Any, req: dict[str, Any], *, max_retries: int = 4, backoff: float = 2.0) -> Any: - last: BaseException | None = None - for attempt in range(max_retries + 1): - try: - return client.chat.completions.create(**req) - except BaseException as exc: # noqa: BLE001 - last = exc - status = getattr(exc, "status_code", None) - if status in (400, 401, 404, 422) or attempt == max_retries: - raise - time.sleep(min(60.0, backoff * (2**attempt))) - assert last is not None # pragma: no cover - raise last - - -def _tool_calls_payload(msg: Any) -> list[dict[str, Any]]: - if not getattr(msg, "tool_calls", None): - return [] - return [ - { - "id": tc.id, - "type": "function", - "function": {"name": tc.function.name, "arguments": tc.function.arguments}, - } - for tc in msg.tool_calls - ] - - -def _parse_tool_arguments(arguments_raw: Any) -> dict[str, Any]: - if isinstance(arguments_raw, str): - try: - arguments_raw = json.loads(arguments_raw) if arguments_raw.strip() else {} - except json.JSONDecodeError: - arguments_raw = {} - return arguments_raw if isinstance(arguments_raw, dict) else {} - - -def _submit_record(*, turn: int, arguments_raw: Any, result: dict[str, Any]) -> dict[str, Any]: - args = _parse_tool_arguments(arguments_raw) - return { - "turn": turn, - "ok": bool(result.get("ok")), - "stored": bool(result.get("stored")), - "factor_id": result.get("factor_id") or args.get("factor_name"), - "factor_name": result.get("factor_name") or args.get("factor_name"), - "comment": result.get("comment") or args.get("comment"), - "multi_line_expr": args.get("multi_line_expr"), - "metrics": result.get("metrics"), - "skipped_reason": result.get("skipped_reason"), - "error": result.get("error"), - "error_type": result.get("error_type"), - "registry_path": result.get("registry_path"), - "dsl_path": result.get("dsl_path"), - "similarity": result.get("similarity"), - } - - -def _dispatch_parallel(tools: FactorEvalTools, tool_calls: list[Any], *, max_workers: int) -> list[dict[str, Any]]: - def _one(tc: Any) -> dict[str, Any]: - t0 = time.perf_counter() - try: - out = tools.dispatch(tc.function.name, tc.function.arguments) - except Exception as e: # noqa: BLE001 - out = {"ok": False, "error": f"{type(e).__name__}: {e}", "error_type": type(e).__name__} - return { - "tool_call_id": tc.id, - "name": tc.function.name, - "arguments_raw": tc.function.arguments, - "result": out, - "elapsed_seconds": round(time.perf_counter() - t0, 4), - } - - if len(tool_calls) <= 1 or max_workers <= 1: - return [_one(tc) for tc in tool_calls] - - workers = min(max_workers, len(tool_calls)) - with ThreadPoolExecutor(max_workers=workers) as ex: - by_id = {row["tool_call_id"]: row for row in ex.map(_one, tool_calls)} - return [by_id[tc.id] for tc in tool_calls] - - -def run_trajectory( - *, - client: Any, - model: str, - system_prompt: str, - user_message: str, - tools: FactorEvalTools, - log_jsonl: Path, - max_turns: int = 16, - max_tool_calls_per_round: int = 8, - max_tool_workers: int = 4, - min_tool_call_rounds_before_allow_stop: int = 3, - temperature: float | None = None, - max_tokens: int = 8192, - extra_body: dict[str, Any] | None = None, - printer: ConsolePrinter | None = None, -) -> list[dict[str, Any]]: - schemas = tools.schemas() - messages: list[dict[str, Any]] = [ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": user_message}, - ] - - log_path = Path(log_jsonl) - log_path.parent.mkdir(parents=True, exist_ok=True) - - def _emit(event: str, payload: dict[str, Any]) -> None: - with log_path.open("a", encoding="utf-8") as f: - f.write(json.dumps({"ts": _now(), "event": event, **payload}, ensure_ascii=False, default=str) + "\n") - - _emit("session_start", {"model": model, "max_turns": max_turns}) - tool_call_rows: list[dict[str, Any]] = [] - submit_records: list[dict[str, Any]] = [] - tool_call_rounds = 0 - - for turn in range(max_turns): - if printer is not None: - printer.turn(turn) - req: dict[str, Any] = { - "model": model, - "messages": messages, - "tools": schemas, - "tool_choice": "auto", - "max_tokens": max_tokens, - } - if temperature is not None: - req["temperature"] = temperature - if extra_body: - req["extra_body"] = extra_body - - resp = _chat_with_retry(client, req) - msg = resp.choices[0].message - reasoning = getattr(msg, "reasoning_content", None) or getattr(msg, "reasoning", None) - tc_payload = _tool_calls_payload(msg) - _emit( - "assistant", - {"turn": turn, "content": msg.content, "reasoning": reasoning, "tool_calls": tc_payload}, - ) - - if printer is not None: - printer.assistant(msg.content, reasoning) - - assistant_msg: dict[str, Any] = {"role": "assistant", "content": msg.content} - if tc_payload: - assistant_msg["tool_calls"] = tc_payload - messages.append(assistant_msg) - - if not msg.tool_calls: - if tool_call_rounds < min_tool_call_rounds_before_allow_stop: - messages.append({"role": "user", "content": _NUDGE}) - _emit("nudge", {"turn": turn, "tool_call_rounds": tool_call_rounds}) - continue - _emit("session_end", {"turn": turn, "reason": "no_tool_calls"}) - if printer is not None: - printer.session_end("no_tool_calls", sum(1 for r in tool_call_rows if r.get("ok")), len(tool_call_rows)) - break - - tool_call_rounds += 1 - tcs = list(msg.tool_calls)[:max_tool_calls_per_round] - rows = _dispatch_parallel(tools, tcs, max_workers=max_tool_workers) - _emit("tool_results", {"turn": turn, "results": rows}) - - for r in rows: - res = r["result"] if isinstance(r.get("result"), dict) else {} - if printer is not None: - printer.tool_result(r["name"], r.get("arguments_raw"), res, r.get("elapsed_seconds")) - tool_call_rows.append( - {"turn": turn, "name": r["name"], "elapsed_seconds": r.get("elapsed_seconds"), "ok": res.get("ok")} - ) - if r["name"] == "submit_factor": - submit_records.append(_submit_record(turn=turn, arguments_raw=r.get("arguments_raw"), result=res)) - messages.append( - { - "role": "tool", - "tool_call_id": r["tool_call_id"], - "content": tools.result_to_content(res), - } - ) - else: - _emit("session_end", {"turn": max_turns - 1, "reason": "max_turns_reached"}) - if printer is not None: - printer.session_end("max_turns_reached", sum(1 for r in tool_call_rows if r.get("ok")), len(tool_call_rows)) - - snapshot = log_path.with_suffix(".messages.json") - snapshot.write_text(json.dumps(messages, ensure_ascii=False, indent=2, default=str), encoding="utf-8") - - times = [r["elapsed_seconds"] for r in tool_call_rows if r.get("elapsed_seconds") is not None] - submitted_factors = [r for r in submit_records if r.get("stored")] - submit_failures = [r for r in submit_records if not r.get("stored")] - summary = { - "log_jsonl": str(log_path), - "tool_calls": { - "count": len(tool_call_rows), - "ok": sum(1 for r in tool_call_rows if r.get("ok")), - "elapsed_seconds_total": round(sum(times), 4) if times else 0.0, - }, - "submitted_factors": submitted_factors, - "submit_failures": submit_failures, - "messages_snapshot": str(snapshot), - } - log_path.with_suffix(".summary.json").write_text( - json.dumps(summary, ensure_ascii=False, indent=2, default=str), encoding="utf-8" - ) - _emit("run_summary", summary) - return messages diff --git a/seekalpha/factor/mining/mls_thresholds.py b/seekalpha/factor/mining/mls_thresholds.py deleted file mode 100644 index d9831829..00000000 --- a/seekalpha/factor/mining/mls_thresholds.py +++ /dev/null @@ -1,75 +0,0 @@ -"""MLS-FMB 保留级门槛(由已入库因子 train/val 表现 25% 分位校准)。""" - -from __future__ import annotations - -import json -from pathlib import Path -from typing import Any - -from seekalpha.core.paths import MLS_FMB_PERCENTILES_PATH - - -def _round_threshold(value: float) -> float: - x = float(value) - if x >= 10: - return round(x, 0) - if x >= 1: - return round(x, 1) - return round(x, 2) - - -def load_mls_fmb_percentiles(path: Path | None = None) -> dict[str, Any]: - p = Path(path or MLS_FMB_PERCENTILES_PATH) - if not p.is_file(): - return {} - return json.loads(p.read_text(encoding="utf-8")) - - -def mls_fmb_prompt_thresholds(path: Path | None = None) -> dict[str, dict[str, float]]: - """返回 train/val 各指标的 |·| 门槛(p25,两位有效舍入)。""" - raw = load_mls_fmb_percentiles(path) - abs_p25 = raw.get("abs_p25") or {} - out: dict[str, dict[str, float]] = {} - for split in ("train", "val"): - src = abs_p25.get(split) or {} - out[split] = { - key: _round_threshold(float(src[key])) - for key in ("mean_rho", "nw_t_rho", "nw_t_ls", "mls") - if key in src and src[key] is not None - } - return out - - -def mls_fmb_thresholds_markdown(path: Path | None = None, *, label_col: str | None = None) -> str: - """生成写入挖掘 prompt 的 MLS-FMB 门槛说明。""" - th = mls_fmb_prompt_thresholds(path) - if not th.get("train") or not th.get("val"): - return ( - "**MLS-FMB**(`summary.mls_fmb`):逐日十分组单调性 × 多空 IR;" - "门槛文件缺失时仅作参考,详见 `docs/factor_metrics.md`。" - ) - - tr = th["train"] - va = th["val"] - meta = load_mls_fmb_percentiles(path) - n = meta.get("n_factors", "?") - cal_label = str(meta.get("label_col") or "label_1d_open_to_open") - rel = MLS_FMB_PERCENTILES_PATH.as_posix() - session_label = label_col or cal_label - label_note = "" - if session_label != cal_label: - label_note = ( - f"\n\n> **注意**:本次会话 label 为 `{session_label}`,下表门槛按 `{cal_label}` 校准," - "仅作**相对参考**;长持有 label 下 MLS 数值尺度可能不同,请结合 IC/ICIR 与十分组判断。" - ) - - return f"""**MLS-FMB**(`summary.mls_fmb`,详见 `docs/factor_metrics.md`):逐日截面十分组单调性 ρ_t 与 Q10−Q1 多空 LS_t 的 FMB+NW 推断;**保留级候选**在 train/val 上还须(指标与 `ic` **同号**,负 IC 看绝对值): - -| 指标 | train(|·| ≥) | val(|·| ≥) | -|------|----------------|---------------| -| `mls_fmb.mean_rho` | {tr.get("mean_rho", "—")} | {va.get("mean_rho", "—")} | -| `mls_fmb.nw_t_rho` | {tr.get("nw_t_rho", "—")} | {va.get("nw_t_rho", "—")} | -| `mls_fmb.nw_t_ls` | {tr.get("nw_t_ls", "—")} | {va.get("nw_t_ls", "—")} | -| `mls_fmb.mls`(参考) | {tr.get("mls", "—")} | {va.get("mls", "—")} | - -门槛来源:当前 factorzoo **{n}** 个已入库因子在 train(2019-01-01~2021-12-31)/ val(2022-01-01~2024-12-31)上的 **25% 分位**(`|·|`,`{cal_label}`;`{rel}`)。{label_note}""" diff --git a/seekalpha/factor/mining/operators.py b/seekalpha/factor/mining/operators.py deleted file mode 100644 index 11e03dd3..00000000 --- a/seekalpha/factor/mining/operators.py +++ /dev/null @@ -1,7 +0,0 @@ -"""挖掘侧算子清单:复用 seekalpha.dsl.catalog(算子唯一定义在 seekalpha/dsl)。""" - -from __future__ import annotations - -from seekalpha.dsl.catalog import list_operator_names, operator_catalog_markdown - -__all__ = ["list_operator_names", "operator_catalog_markdown"] diff --git a/seekalpha/factor/mining/prompts.py b/seekalpha/factor/mining/prompts.py deleted file mode 100644 index c50c249b..00000000 --- a/seekalpha/factor/mining/prompts.py +++ /dev/null @@ -1,429 +0,0 @@ -"""股票因子挖掘 system prompt。""" - -from __future__ import annotations - -import json - -from seekalpha.dsl.catalog import operator_catalog_markdown -from seekalpha.factor.mining.mls_thresholds import mls_fmb_thresholds_markdown -from seekalpha.factor.types import DEFAULT_LABEL_COL - -_LABEL_DESCRIPTIONS: dict[str, str] = { - "label_1d_open_to_open": "T+1 开盘 → T+2 开盘(短周期 alpha,默认)", - "label_1d_close_to_close": "T+1 收盘 → T+2 收盘(1 日持有,**适合价量/短周期因子**)", - "label_10d_close_to_close": "T+1 收盘 → T+11 收盘(10 日持有,**适合基本面/慢因子**)", - "label_20d_close_to_close": "T+1 收盘 → T+21 收盘(20 日持有,适合基本面/慢因子)", -} - -FACTOR_MINING_INTERFACE_PROMPT = """你是一名量化研究自主智能体,专注于**A 股日频** alpha 因子。请在多轮迭代中演化因子;**核心目标是在提升与前瞻 label 线性相关的同时,将因子鲁棒性视为与「够不够相关」同等重要**。**主战场在训练集(train)**:日常迭代以 train 的 `summary` 与 **`monthly_corr_robustness`** 联合判断;验证集(val)仅用于**极少量**泛化抽检。 - -# 因子构建接口 - -## 你的目标 - -**优化目标:(1)train 上达到可用的相关水平,且(2)鲁棒性达标。** 鲁棒性覆盖:`monthly_corr_robustness`、`factor_coverage`、因子分布(`factor_skewness`/`factor_kurtosis`)、**`summary.mls_fmb`**,以及少数 val 调用上与 train 不出现灾难性背离。 - -**【交付定义】** 仅当因子在 **train 与 val** 上均满足:`abs(summary.ic) ≥ 0.005`(且最好 `abs(summary.rank_ic) ≥ 0.005`)、`|summary.icir| > 0.1`、`summary.cs_pearson_autocorr > 0.6`(截面 lag-1 自相关,衡量日度排名稳定性)、`monthly_corr_robustness.share_months_ic_positive > 0.7`(负 IC 因子看 `< 0.3`)、`summary.factor_coverage > 0.9`,**十分组单调性**合理(见 `summary.decile_mean_label`:`ic>0` 时 D10.mean_label > D1;`ic<0` 时 D1 > D10),**MLS-FMB** 达标(见下节 `summary.mls_fmb`,`mean_rho`/`nw_t_rho`/`nw_t_ls` 与 `ic` 同号且 train/val 分别满足 |·| 门槛),且 **val `ic` 与 train 同号**(`eval_on_val_set` 须传 `expected_sign=1` 或 `-1`),才视为 **保留级候选**。 - -**【会话完成条件】** 挖掘会话的**唯一正式交付方式**是调用 **`submit_factor`** 并成功入库(返回 `stored=true`)。仅完成 train/val 评估、口头总结或停在「建议入库」**不算交付**;每个保留级候选须各调用一次 `submit_factor`(不同 `factor_name`)。查重失败时根据 `similarity.top_neighbors` 改写后再提交。 - -- 会话已配置 train/val 日期与 label 列;工具结果中不再重复这些配置。 -- 每一轮:**并行调用 3~5 次 `eval_on_train_set`**,用不同 `multi_line_expr` 探多条假设;train 上相关与鲁棒性均可接受时,调用 `eval_on_val_set` 及时查看性能是否能够跨区间泛化。 -- 默认 **`include_detail_tables`: false**;需要按月/分品种明细时再设为 **true**。 - -请遵循: -- **相关性 + 鲁棒性双目标**;筛选用 **`abs(summary.ic)`** 与 **`abs(summary.rank_ic)`**;**负 IC 是有效负向 alpha**,不是错误。 -- **中间变量命名**:蛇形英文名(如 `ma_w_dev`),避免 `x`、`tmp`。 -- 若 `ok` 为 false,修正 DSL 或列名。 - ---- - -### 数据与评估口径 - -本仓库为**股票日频 panel**:索引 `(datetime, instrument)`,主频 **1d**。 - -- **时序算子**(`TS_*`、`DELTA`、`SLOPE` 等):在**每个 instrument 各自时间序列**上计算。 -- **截面算子**(`RANK`、`CS_ZSCORE`、`CS_DEMEAN`、`CS_WINSORIZE`、`CS_BUCKET`、`CS_NEUTRALIZE`):在**每个 datetime 截面上**跨 instrument 计算。 - -评估指标均为**截面**口径: - -| 指标 | 含义 | -|------|------| -| `summary.ic` | 逐日横截面 Pearson IC 的均值 | -| `summary.icir` | IC / std(逐日 IC),即 IC 信息比率 | -| `summary.rank_ic` | 逐日横截面 Spearman Rank IC 的均值 | -| `summary.cs_pearson_autocorr` | 逐日横截面 lag-1 Pearson 自相关均值:`corr_CS(f_t, f_{t-1})`,衡量因子排名日度延续性;**> 0.6** 方可入库 | -| `summary.mls_fmb` | 逐日十分组 MLS-FMB:`mean_rho`(单调性)、`mean_ls`/`ir_ls_annual`(多空 IR)、`mls`(综合)、`nw_t_rho`/`nw_t_ls`(NW t) | - -{{MLS_FMB_THRESHOLDS}} - -{{LABEL_SECTION}} - ---- - -### 可用行情变量 - -表达式引用列须 **`$` + 列名**: - -| 字段 | 说明 | -|------|------| -| `$open` / `$high` / `$low` / `$close` | 原始 OHLC | -| `$adj_open` / `$adj_high` / `$adj_low` / `$adj_close` | 复权 OHLC(**优先**) | -| `$volume` / `$amount` | 成交量 / 成交额 | -| `$float_cap` / `$tot_cap` | 流通 / 总市值 | -| `$vwap` | 成交量加权均价(与 `$close` 同单位尺度:amount/volume) | -| `$adj_vwap` | 后复权 VWAP(`$vwap × $adjfactor`,与 `$adj_close` 同复权口径) | -| `$ret` | 日 adj_close pct_change(按 instrument) | -| `$is_trade` / `$not_st` | 可交易 / 非 ST 标记 | -| `$industry_sw_l1` | 申万一级行业**离散码**(严格 PIT,`--with-industry` 时才有);仅用于分组,不做数值运算 | - -> **行业中性化**:行业码是离散组号,直接 `CS_NEUTRALIZE(factor, $industry_sw_l1)` 即为行业内去均值;**勿**对它套 `CS_BUCKET`。 - ---- - -{{FUNDAMENTAL_SECTION}} - ---- - -### 多周期:`$field@<周期>` - -仅支持 **`@1d`** 与 **`@1w`**(W-FRI 周线,严格无前视 backward 广播)。 - -**行作用域规则:** - -| 当行引用 | 计算面板 | `TS_*(x, N)` 中 N | -|---|---|---| -| 仅同一种 `@周期` | 该辅频面板 | 该频 N 根 bar | -| 仅主频列 | 日频面板 | N 个交易日 | -| 主频 + `@周期` 混合 | 日频;`@` 列先广播 | N 个交易日 | - -**要「真正 N 根周线」的滚动统计**,须单独写纯 `@1w` 行得到中间变量,再与主频列组合: - -```text -ma_w = TS_MEAN($adj_close@1w, 4) -SUBTRACT($adj_close, ma_w) -``` - -混频 `TS_MEAN($col@1w, N)`:**在 broadcast 后的日频 index 上 rolling**,N = 日 bar 数。 - -**截面算子示例**(逐日跨股票): - -```text -# 市值中性动量(10 档等频分组后组内去均值) -raw = TS_MEAN($ret, 20) -CS_NEUTRALIZE(raw, CS_BUCKET(LOG($float_cap), 10)) - -# 截面秩 -RANK(CS_ZSCORE($amount)) -``` - -**日频筹码算子**(默认 CYQ 换手衰减;6 参即可,勿写 `method` / 旧两参写法): - -```text -# 标准写法(close, low, high, volume, window, float_cap) -peak = CHIP_PEAK_LOC($adj_close, $adj_low, $adj_high, $volume, 60, $float_cap) -entropy = CHIP_ENTROPY($adj_close, $adj_low, $adj_high, $volume, 30, $float_cap) -com_gap = CHIP_COM_W_GAP($adj_close, $adj_low, $adj_high, $volume, 40, $float_cap) - -# 可选:第 7 参 nbins(默认 64);第 8 参 method(仅 tri/uniform 时) -tri_gap = CHIP_COM_W_GAP($adj_close, $adj_low, $adj_high, $volume, 40, $vwap, 64, 'tri') -``` - ---- - -### 可用算子 - -算子均为**大写**(如 `TS_MEAN`、`DELTA`)。**仅支持位置参数**;禁止 `name=value` 关键字写法。 - -{{OPERATOR_CATALOG}} - ---- - -### 中性化使用指南 - -**1. 中性化的本质与判定。** 因子变量往往在多个维度上同时暴露——既押了你想要的 alpha,也搭便车押了若干你并不想要的风险维度(市值、行业、盈利质量等)。中性化的本质是剥离"你不想押注、但变量恰好暴露在上的维度",只留下真正的 alpha 残差。判定准则只有一条:**该变量在某维度的暴露,是不是我故意要押注的 alpha?** 是 → 不中性化;否 → 中性化。量价变量与市值的相关性分三档,紧迫性也分三档:原始量(`amount`/`volume`,A 股截面 Spearman ~0.55)几乎必须中性化,否则等于隐性押注大市值;比率类(`amt_to_cap`,~0.33)建议中性化但非必须,取决于 alpha 是否容忍"小市值高换手"暴露;波动率(~0.10)收益有限。已 `CS_ZSCORE` 且市值暴露本身就是 alpha 的(如 PEAD、低关注度)不要再中性化,否则徒劳甚至略降 IC。 - -**2. 对中间变量做,还是最后做。** 通常的处理顺序是 `winsorize → 中性化 → 标准化 → 合成`:winsorize 在前是为了避免极值扭曲分组回归,标准化在中性化之后是为了把残差拉到可比尺度方便加权,合成在最后且通常不再整体中性化。合成路线上,"分信号各自中性化、最后合成"在历史数据上 IC 最稳最高,是首选;"多变量合成后整体中性化"会搅动已调好的尺度配比,慎用;"单变量叠加多重处理后直接出因子"几乎必洗光信号,应避免。同一变量稳妥起见最多中性化一次,需要剥多个维度时把第二维度交给辅助信号去隐式吸收。 - -**3. 按什么分组做。** 市值用 `CS_BUCKET(LOG($float_cap), 10)`(务必取 log、10 档、每组数百只),盈利质量用 `CS_BUCKET($funda_ROIC_TTM, 10)`,行业用申万一级且仅当不押注行业景气时使用。分组键必须**稳定低噪、与剥离维度同义**——高频变量需先 `TS_MEAN` 平滑或取 LOG,绝不能用成交额/换手率代理市值(会顺带洗掉 alpha)。 - ---- - -### 工具调用 - -| 工具 | 作用 | -|------|------| -| `eval_on_train_set` | 训练窗评估(**应占绝大多数调用**) | -| `eval_on_val_set` | 验证窗评估(**少用**);须传 `expected_sign` | -| `submit_factor` | **交付入库**:train-start~val-end 全区间求值、指标门槛、截面去重后写入 factorzoo | - -共用参数(eval):`multi_line_expr`(必填)、`factor_name`、`include_detail_tables`、`label_quantile_n`(默认 10,0 则不输出分位桶)。 - -**`submit_factor` 参数**:`multi_line_expr`、`factor_name`(蛇形英文名)、`comment`(必填,描述因子经济含义与结构)。 - -**`submit_factor` 返回字段**:`stored`、`factor_id`、`metrics`(eval 区间 IC/ICIR/RANKIC/coverage/**cs_pearson_autocorr** + `finite_ratio`/skew/kurt)、`delivery_check`、`similarity`(`max_abs_corr` 与 `top_neighbors`:查重失败时含最相似因子的 `factor_id`/`cs_corr`/`expr`,默认 top3)、`skipped_reason`、`registry_path`。 - -**工具返回 JSON 字段:** - -| 字段 | 含义 | -|------|------| -| `summary` | `ic`、`icir`、`rank_ic`、**`cs_pearson_autocorr`**、`n_days`、`n_instruments`、`factor_coverage`、`factor_skewness`、`factor_kurtosis`、**`decile_mean_label`**(固定 10 组,`decile` 1–10,`mean_label` 为组内前瞻 label 均值) | -| `monthly_corr_robustness` | `n_months`、`mean_monthly_ic`、**`share_months_ic_positive`**(月均 IC>0 的月份占比) | -| `label_quantile_buckets` | 与 `decile_mean_label` 同口径的可选分位桶(`label_quantile_n` 控制,默认 10) | -| `sign_check` | 仅 val 且传入 `expected_sign` | -| `by_month` / `by_symbol` | 仅 `include_detail_tables=true` | - ---- - -### IC 方向、月度稳健性与十分组 - -- 推荐强度看 **`abs(ic)`**、**|icir|** 与 **`abs(rank_ic)`**;负 IC 有效,val 传对应 `expected_sign`。 -- **`summary.cs_pearson_autocorr > 0.6`**:截面排名日度延续性门槛;过低则换手过高、不宜实盘,**不得**作保留级或入库。 -- **`ic > 0`**:`mean_monthly_ic` 宜为正;`share_months_ic_positive`(终端「月IC+」)须 **> 0.7**。 -- **`ic < 0`**:`mean_monthly_ic` 宜为负;`share_months_ic_positive` 须 **< 0.3**。 -- **十分组 `decile_mean_label`**(全样本等频,D1=因子最低): - - `ic > 0`:宜 **D10.mean_label > D1.mean_label**(因子越高、label 越高) - - `ic < 0`:宜 **D1.mean_label > D10.mean_label** - - D1≈D10 或顺序与 IC 符号相反 → 分位无区分,不宜作保留级 - ---- - -### 交付入库(**必须调用 `submit_factor`**) - -当因子在 **train 与 val** 上均达保留级候选后,**必须**调用 **`submit_factor`** 完成正式交付(勿手动改 registry,勿以文字总结代替): - -1. **最后一轮**:对确认保留的因子调用 `submit_factor`(可与收尾说明同轮,但不可省略该 tool_call) -2. 在 **train-start ~ val-end** 全区间求值并算 IC/ICIR/RANKIC/coverage/**cs_pearson_autocorr**(`coverage` 为 eval 区间;`finite_ratio` 为全行有限值占比) -3. 入库 delivery 门槛:全区间 `|ICIR| > 0.1`、**`cs_pearson_autocorr > 0.6`** -4. 自动截面去重(与库内因子 |cs_corr| ≥ 阈值则拒绝,并返回 top3 最相似因子的 `expr` 供改写后重试) -5. 须传 **`comment`** 说明因子含义(经济直觉、算子、窗口、IC 方向) -6. 成功后写入 factorzoo 与 `artifacts/factorzoo/stock_1d/mining_delivered_registry.json`;返回 `stored=true` 方可结束会话 - ---- - -### 行为准则 - -1. 每轮先归因上一轮结果,再设计下一代;避免仅改窗口长度的同质批次。 -2. 并行候选应跨越不同信息维度:价格动量/均值回归、波动、量价、市值、**基本面(`funda_*`)**、周线结构(`@1w`)等。 -3. 连续 2 轮无改善时,强制引入未用过的信息源或算子族。 -4. 发起 tool_calls 前完成思考;**不要**停在解释或征询用户下一步。 -5. 确认保留级候选后,**必须**调用 **`submit_factor`** 交付;`comment` 须清晰描述因子逻辑,勿空泛。 -6. **结束前检查**:若已有保留级候选但尚未 `submit_factor`,不得结束;先提交再收尾。 -7. **避免过度调参**:除非本轮已产出多个**两两截面相关较低**(机制差异明显)的保留级候选,通常 **`submit_factor` 成功交付一个因子后即可结束**,无需对同一机制反复微调窗口或参数。 -""" - - -def _tool_call_examples_section(*, include_submit: bool = True, include_fundamentals: bool = True) -> str: - examples = [ - { - "name": "eval_on_train_set", - "arguments": { - "multi_line_expr": "ma20 = TS_MEAN($adj_close, 20)\nSUBTRACT($adj_close, ma20)", - "factor_name": "ma20_dev", - }, - }, - { - "name": "eval_on_train_set", - "arguments": { - "multi_line_expr": "ma_w = TS_MEAN($adj_close@1w, 4)\nSUBTRACT($adj_close, ma_w)", - "factor_name": "ma_w_dev", - }, - }, - ] - if include_fundamentals: - examples.append( - { - "name": "eval_on_train_set", - "arguments": { - "multi_line_expr": "roe_z = CS_ZSCORE(CS_WINSORIZE($funda_roe, 0.01, 0.99))\ngro = CS_ZSCORE(CS_WINSORIZE($funda_netprofit_yoy, 0.01, 0.99))\nCS_NEUTRALIZE(MULTIPLY(roe_z, gro), CS_BUCKET(LOG($float_cap), 10))", - "factor_name": "funda_roe_growth_neutral", - }, - } - ) - examples.append( - { - "name": "eval_on_train_set", - "arguments": { - "multi_line_expr": "TS_RANK($ret, 20)", - "factor_name": "ret_rank20", - }, - } - ) - submit_note = "" - if include_submit: - examples.append( - { - "name": "submit_factor", - "arguments": { - "multi_line_expr": "ma20 = TS_MEAN($adj_close, 20)\nSUBTRACT($adj_close, ma20)", - "factor_name": "ma20_dev", - "comment": "20日均价偏离:价格相对短期均线的回归/动量;负IC表示均值回归。", - }, - } - ) - submit_note = ( - "\n\n**交付示例**:train/val 均达标后,须调用 `submit_factor`(上表第 4 条);" - "查重失败则读 `similarity.top_neighbors[].expr` 改写后重试。" - ) - body = json.dumps(examples, ensure_ascii=False, indent=2) - dims = "动量、周线偏离、基本面、收益秩" if include_fundamentals else "动量、周线偏离、收益秩" - note = ( - f"上表为同轮并行 `eval_on_train_set` 示例({dims})。" - "建议每轮 3~5 条并行;仅当 train 有满意候选时,偶尔对少数 factor 做 val 抽检。" - + submit_note - ) - return ( - "---\n\n## ``tool_calls`` 示例(**并行 train + 最终 submit**)\n\n" - + note - + "\n\n```json\n" - + body - + "\n```\n" - ) - - -_SUBMIT_DISABLED_NOTE = """ ---- - - -### 交付说明 - -本次会话**未启用** `submit_factor` 工具(`--no-submit`)。保留级候选仅能通过 train/val 评估确认,无法自动入库。 -""" - - -def _label_section_markdown(label_col: str, *, include_fundamentals: bool = True) -> str: - desc = _LABEL_DESCRIPTIONS.get(label_col, "panel 内预计算的前瞻收益列") - lines = [ - f"**本次会话 label 列:`{label_col}`** — {desc}。", - "所有 `summary.ic` / `rank_ic` / `decile_mean_label` / `mls_fmb` 均相对该列计算。", - "", - "panel 内常用 label(启动时可 `--label-col` 切换):", - "", - "| 列名 | 含义 |", - "|------|------|", - ] - for name, meaning in _LABEL_DESCRIPTIONS.items(): - mark = " **← 本次**" if name == label_col else "" - lines.append(f"| `{name}` | {meaning}{mark} |") - if label_col not in _LABEL_DESCRIPTIONS: - lines.append(f"| `{label_col}` | {desc} **← 本次** |") - lines.extend( - [ - "", - "**label 选用建议**(`eval_factor` / 挖掘 CLI 的 `--label-col`):", - "", - "| 因子类型 | 推荐 label |", - "|----------|------------|", - *( - ["| 基本面(主要用 `$funda_*`) | `label_10d_close_to_close` |"] - if include_fundamentals - else [] - ), - "| 价量(OHLC / `$ret` / `$volume` / 筹码等) | `label_1d_close_to_close` |", - "", - "本次会话已配置为上表「本次」行;勿在 tool 参数中切换 label。", - ] - ) - if label_col.startswith("label_") and "d_close_to_close" in label_col and label_col not in ( - "label_1d_close_to_close", - ): - try: - hold = int(label_col.split("_")[1].replace("d", "")) - if hold > 1: - lines.extend( - [ - "", - f"**长持有 label 提示**:持有约 **{hold} 个交易日**,因子宜偏基本面/低频结构;" - "月度 IC 稳健性与 `cs_pearson_autocorr` 仍适用,但 IC 绝对值通常低于短周期 label。", - ] - ) - except ValueError: - pass - return "\n".join(lines) - - -_FUNDAMENTAL_SECTION_MD = """### 基本面与披露日历(`build_panel --with-fundamentals` 并入) - -季频 `fina_indicator` 经**严格 PIT** 展开为日频:财报公告日 D **不可用**,**D 的下一交易日**起该期字段才可引用;两期之间 **ffill** 保持最近已披露值。披露前为 NaN 属正常,勿当缺失错误。 - -**财务指标**(`fina_indicator` → 日频,前缀 `funda_`): - -| 字段 | 说明 | -|------|------| -| `$funda_roe` / `$funda_roa` | 净资产收益率 / 总资产报酬率 | -| `$funda_debt_to_assets` | 资产负债率 | -| `$funda_eps` / `$funda_bps` | 每股收益 / 每股净资产 | -| `$funda_grossprofit_margin` / `$funda_netprofit_margin` | 毛利率 / 净利率 | -| `$funda_profit_dedt` | 扣非净利润 | -| `$funda_ocfps` | 每股经营现金流 | -| `$funda_current_ratio` / `$funda_quick_ratio` | 流动比率 / 速动比率 | -| `$funda_netprofit_yoy` / `$funda_or_yoy` / `$funda_tr_yoy` | 归母净利 / 营收 / 营业总收入同比(%) | - -**财报科目**(前缀 `funda_fs_`,同为 PIT 日频;`--with-statements` 时含约 70 个三大表科目): - -| 字段 | 说明 | -|------|------| -| `$funda_fs_working_capital` / `$funda_fs_ebit` | 营运资本 / 息税前利润 | -| `$funda_fs_total_assets` / `$funda_fs_total_liabilities` / `$funda_fs_total_equity` | 资产 / 负债 / 权益(时点) | -| `$funda_fs_oper_revenue_ytd` / `$funda_fs_net_profit_parent_ytd` | 营收 / 归母净利(年初至今累计,`_ytd`) | -| `$funda_fs_ocf_net_ytd` | 经营现金流净额(累计) | - -> 三大表 `_ytd` 为**年初至今累计**(Q1=当季,中报/三季报/年报累计);资产负债表科目为时点值。完整清单见 `docs/panel_fundamental_fields.md` §3。 - -**披露日历特征**: - -| 字段 | 说明 | -|------|------| -| `$funda_days_since_disclose` | 距**上一期**财报披露**生效日**的交易日数(生效日=0);严格 PIT | -| `$funda_days_since_quarter_start` | 距当前季报区间首日(1/1、4/1、7/1、10/1)的交易日数 | - -**使用建议**(基本面/慢因子): - -- 基本面列在日频上**阶跃+持有**,`TS_PCTCHANGE($funda_roe, 20)` 等窗口单位为**交易日**;约 60 日 ≈ 一季。 -- 截面组合建议 `CS_NEUTRALIZE(..., CS_BUCKET(LOG($float_cap), 10))` 市值中性;比率类可先 `CS_WINSORIZE` 再 `RANK` 截面排序。 -- 事件窗示例:`TS_PCTCHANGE($xxx, $funda_days_since_disclose)`(披露生效后变量 xxx 的变化)。 - -> 行尾可写 `#` 注释;字符串内 `#` 保留。""" - -_FUNDAMENTAL_DISABLED_MD = ( - "### 基本面\n\n" - "**本次未载入基本面列**:请勿使用任何 `$funda_*` / `$funda_fs_*` 字段" - "(本会话仅提供价量/行情列,专注价量因子)。\n\n" - "> 行尾可写 `#` 注释;字符串内 `#` 保留。" -) - - -def build_system_prompt( - *, - include_operator_catalog: bool = True, - enable_submit: bool = True, - extra_instructions: str = "", - label_col: str = DEFAULT_LABEL_COL, - include_fundamentals: bool = True, -) -> str: - catalog = operator_catalog_markdown() if include_operator_catalog else "(本次未注入算子清单)" - mls_block = mls_fmb_thresholds_markdown(label_col=label_col) - label_block = _label_section_markdown(label_col, include_fundamentals=include_fundamentals) - funda_block = _FUNDAMENTAL_SECTION_MD if include_fundamentals else _FUNDAMENTAL_DISABLED_MD - body = ( - FACTOR_MINING_INTERFACE_PROMPT.replace("{{OPERATOR_CATALOG}}", catalog) - .replace("{{MLS_FMB_THRESHOLDS}}", mls_block) - .replace("{{LABEL_SECTION}}", label_block) - .replace("{{FUNDAMENTAL_SECTION}}", funda_block) - ) - if not include_fundamentals: - body = body.replace("、**基本面(`funda_*`)**、", "、") - if not enable_submit: - body = body.replace("**【会话完成条件】**", "**【会话完成条件(本次未启用 submit)】**") - parts = [ - body.strip(), - _tool_call_examples_section( - include_submit=enable_submit, - include_fundamentals=include_fundamentals, - ), - ] - if not enable_submit: - parts.append(_SUBMIT_DISABLED_NOTE.strip()) - if extra_instructions.strip(): - parts.append(extra_instructions.strip()) - return "\n\n".join(parts) diff --git a/seekalpha/factor/mining/registry_io.py b/seekalpha/factor/mining/registry_io.py deleted file mode 100644 index a77de3b2..00000000 --- a/seekalpha/factor/mining/registry_io.py +++ /dev/null @@ -1,159 +0,0 @@ -"""mining_delivered_registry.json 读写(submit / repair 共用)。""" - -from __future__ import annotations - -import json -from datetime import datetime, timezone -from pathlib import Path -from typing import Any - -from seekalpha.factor.types import IngestPolicy -from seekalpha.factor.zoo import FactorZoo -from seekalpha.factor.zoo.similarity import SimilarityMatrix - - -def load_mining_registry(path: Path) -> dict[str, Any]: - path = Path(path) - if not path.is_file(): - return {} - return json.loads(path.read_text(encoding="utf-8")) - - -def save_mining_registry(path: Path, registry: dict[str, Any]) -> None: - path = Path(path) - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(json.dumps(registry, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") - - -def _trim_similarity(sim: dict[str, Any] | None, top_k: int = 3) -> dict[str, Any] | None: - if not isinstance(sim, dict) or not sim: - return None - out = dict(sim) - nb = out.get("top_neighbors") - if isinstance(nb, list) and top_k > 0: - out["top_neighbors"] = nb[:top_k] - return out - - -def upsert_mining_registry( - registry_path: Path, - *, - factor_id: str, - name: str, - expr: str, - expr_dir: Path, - repo_root: Path, - policy: IngestPolicy, - metrics: dict[str, Any] | None, - similarity: dict[str, Any] | None, - comment: str = "", - ingest_status: str = "stored", - source: str = "submit", - merge: bool = True, -) -> tuple[str, str]: - """写入或合并一条 registry 记录;返回 (registry_path, dsl_path)。""" - registry_path = Path(registry_path).expanduser().resolve() - expr_dir = Path(expr_dir).expanduser().resolve() - repo_root = Path(repo_root).resolve() - expr_dir.mkdir(parents=True, exist_ok=True) - - dsl_path = expr_dir / f"{factor_id}.dsl" - dsl_path.write_text(expr.strip() + "\n", encoding="utf-8") - rel_expr = dsl_path.relative_to(repo_root).as_posix() - - registry = load_mining_registry(registry_path) if merge else {} - prev = registry.get(factor_id, {}) if merge else {} - - entry: dict[str, Any] = { - "name": name, - "comment": comment or prev.get("comment") or name, - "expression_file": rel_expr, - "ingest_config": policy.ingest_config_dict(), - "ingested_at": datetime.now(timezone.utc).isoformat(), - "ingest_metrics": metrics, - "ingest_status": ingest_status, - } - sim = _trim_similarity(similarity) - if sim: - entry["similarity"] = sim - elif prev.get("similarity"): - entry["similarity"] = prev["similarity"] - if source == "submit" and "source_runs" not in prev: - entry["source"] = "submit" - for key in ("source_runs", "mining_metrics"): - if key in prev: - entry[key] = prev[key] - if prev.get("source") == "submit" and source != "submit": - entry["comment"] = prev.get("comment") or entry["comment"] - if "source" in prev: - entry["source"] = prev["source"] - - registry[factor_id] = entry - save_mining_registry(registry_path, registry) - return str(registry_path), str(dsl_path) - - -def sync_registry_from_zoo( - registry_path: Path, - zoo: FactorZoo, - *, - expr_dir: Path, - repo_root: Path, - policy: IngestPolicy, -) -> dict[str, Any]: - """将 zoo 中已有因子同步进 registry(补缺 + 刷新 ingest_metrics / similarity)。""" - registry = load_mining_registry(registry_path) - zoo_ids = set(zoo.catalog.list_factor_ids()) - added: list[str] = [] - refreshed: list[str] = [] - sim_mat = SimilarityMatrix(zoo.paths, zoo.manifest.max_factors) if zoo_ids else None - for fid in sorted(zoo_ids): - meta = zoo.catalog.get(fid) - if meta is None: - continue - prev = registry.get(fid, {}) - metrics = (meta.extra or {}).get("metrics") - if fid not in registry: - added.append(fid) - elif metrics and prev.get("ingest_metrics") != metrics: - refreshed.append(fid) - sim_info = None - if sim_mat is not None: - values = zoo.read_factor(fid) - report = sim_mat.cross_sectional_neighbor_report( - zoo, - values, - exclude_factor_id=fid, - top_k=policy.similar_top_k, - ) - sim_info = { - "col_idx": meta.col_idx, - "n_factors": zoo.n_factors, - "kind": report.get("kind"), - "max_abs_corr": report.get("max_abs_corr"), - "top_neighbors": report.get("top_neighbors"), - } - upsert_mining_registry( - registry_path, - factor_id=fid, - name=meta.name, - expr=meta.expr, - expr_dir=expr_dir, - repo_root=repo_root, - policy=policy, - metrics=metrics, - similarity=sim_info, - comment=str(prev.get("comment") or meta.name), - ingest_status=str(prev.get("ingest_status") or "stored"), - source=str(prev.get("source") or "zoo_sync"), - merge=True, - ) - registry_after = load_mining_registry(registry_path) - orphan = sorted(set(registry_after.keys()) - zoo_ids) - return { - "added": added, - "refreshed": refreshed, - "orphan_in_registry": orphan, - "n_registry": len(registry_after), - "n_zoo": len(zoo_ids), - } diff --git a/seekalpha/factor/mining/remove.py b/seekalpha/factor/mining/remove.py deleted file mode 100644 index bb1c8e7a..00000000 --- a/seekalpha/factor/mining/remove.py +++ /dev/null @@ -1,120 +0,0 @@ -"""因子交付物删除:factorzoo + registry + dsl 同步清理。""" - -from __future__ import annotations - -from pathlib import Path -from typing import Any - -from seekalpha.factor.zoo import FactorZoo -from seekalpha.factor.mining.registry_io import load_mining_registry, save_mining_registry - - -def _resolve_expr_path( - factor_id: str, - *, - registry_entry: dict[str, Any] | None, - expr_dir: Path, - repo_root: Path, -) -> Path | None: - if registry_entry: - rel = registry_entry.get("expression_file") - if rel: - candidate = (repo_root / str(rel)).resolve() - if candidate.is_file(): - return candidate - default = (expr_dir / f"{factor_id}.dsl").resolve() - return default if default.is_file() else None - - -def delete_factor_delivery( - factor_id: str, - *, - zoo: FactorZoo | None = None, - registry_path: Path | None = None, - expr_dir: Path | None = None, - repo_root: Path | None = None, - skip_zoo: bool = False, - skip_registry: bool = False, - skip_dsl: bool = False, -) -> dict[str, Any]: - """删除因子及其交付物;至少一项存在方可成功。""" - fid = str(factor_id).strip() - if not fid: - raise ValueError("factor_id 不能为空") - - reg_path = Path(registry_path).expanduser().resolve() if registry_path else None - expr_root = Path(expr_dir).expanduser().resolve() if expr_dir else None - root = Path(repo_root).expanduser().resolve() if repo_root else Path.cwd() - - registry = load_mining_registry(reg_path) if reg_path and not skip_registry else {} - entry = registry.get(fid) - - in_zoo = zoo is not None and not skip_zoo and zoo.catalog.get(fid) is not None - in_registry = bool(entry) and not skip_registry - dsl_path = None - if not skip_dsl and expr_root is not None: - dsl_path = _resolve_expr_path(fid, registry_entry=entry, expr_dir=expr_root, repo_root=root) - has_dsl = dsl_path is not None and dsl_path.is_file() - - if not in_zoo and not in_registry and not has_dsl: - raise KeyError(f"因子不存在于 zoo/registry/dsl: {fid}") - - result: dict[str, Any] = { - "factor_id": fid, - "zoo_deleted": False, - "registry_removed": False, - "dsl_deleted": False, - "dsl_path": None, - } - - if in_zoo: - assert zoo is not None - zoo.delete_factor(fid) - result["zoo_deleted"] = True - - if in_registry: - assert reg_path is not None - del registry[fid] - save_mining_registry(reg_path, registry) - result["registry_removed"] = True - - if has_dsl and dsl_path is not None: - dsl_path.unlink() - result["dsl_deleted"] = True - result["dsl_path"] = str(dsl_path) - - return result - - -def delete_factors_delivery( - factor_ids: list[str], - *, - zoo: FactorZoo | None = None, - registry_path: Path | None = None, - expr_dir: Path | None = None, - repo_root: Path | None = None, - skip_zoo: bool = False, - skip_registry: bool = False, - skip_dsl: bool = False, - ignore_missing: bool = True, -) -> list[dict[str, Any]]: - """批量删除;ignore_missing=True 时跳过不存在的 factor_id。""" - out: list[dict[str, Any]] = [] - for fid in factor_ids: - try: - out.append( - delete_factor_delivery( - fid, - zoo=zoo, - registry_path=registry_path, - expr_dir=expr_dir, - repo_root=repo_root, - skip_zoo=skip_zoo, - skip_registry=skip_registry, - skip_dsl=skip_dsl, - ) - ) - except KeyError: - if not ignore_missing: - raise - return out diff --git a/seekalpha/factor/mining/response.py b/seekalpha/factor/mining/response.py deleted file mode 100644 index 3ec14bbd..00000000 --- a/seekalpha/factor/mining/response.py +++ /dev/null @@ -1,155 +0,0 @@ -"""将 split 评估原始结果格式化为 LLM tool 兼容 JSON。""" - -from __future__ import annotations - -from typing import Any - -import numpy as np - - -def _round_float4(value: object) -> float | None: - if value is None: - return None - try: - if isinstance(value, (float, np.floating)) and not np.isfinite(value): - return None - except TypeError: - pass - try: - x = float(value) - except (TypeError, ValueError): - return None - return float(round(x, 4)) if np.isfinite(x) else None - - -def _format_decile_mean_label(rows: list[dict[str, Any]] | None) -> list[dict[str, Any]]: - if not rows: - return [] - out: list[dict[str, Any]] = [] - for row in rows: - ml = row.get("mean_label") - out.append( - { - "decile": int(row["decile"]), - "mean_label": _round_float4(ml) if ml is not None else None, - } - ) - return out - - -def _format_mls_fmb(raw: dict[str, Any] | None) -> dict[str, Any]: - if not raw: - return {} - float_keys = ( - "mean_rho", - "mean_ls", - "ir_ls", - "ir_ls_annual", - "mls", - "nw_t_rho", - "nw_t_ls", - "nw_se_rho", - "nw_se_ls", - "annualization_factor", - ) - int_keys = ("n_days_rho", "n_days_ls", "nw_lags", "n_deciles", "n_deciles_requested", "min_stocks", "min_stocks_requested") - out: dict[str, Any] = {} - for k in float_keys: - if k in raw: - out[k] = _round_float4(raw.get(k)) - for k in int_keys: - if k in raw: - v = raw.get(k) - out[k] = int(v) if v is not None else None - if raw.get("note"): - out["note"] = raw["note"] - return out - - -def _format_summary(summary: dict[str, Any]) -> dict[str, Any]: - return { - "ic": _round_float4(summary.get("ic")), - "icir": _round_float4(summary.get("icir")), - "rank_ic": _round_float4(summary.get("rank_ic")), - "n_days": int(summary["n_days"]) if summary.get("n_days") is not None else None, - "n_instruments": int(summary["n_instruments"]) if summary.get("n_instruments") is not None else None, - "factor_coverage": _round_float4(summary.get("factor_coverage")), - "factor_skewness": _round_float4(summary.get("factor_skewness")), - "factor_kurtosis": _round_float4(summary.get("factor_kurtosis")), - "cs_pearson_autocorr": _round_float4(summary.get("cs_pearson_autocorr")), - "decile_mean_label": _format_decile_mean_label(summary.get("decile_mean_label")), - "mls_fmb": _format_mls_fmb(summary.get("mls_fmb")), - } - - -def monthly_corr_robustness_json(raw: dict[str, Any]) -> dict[str, Any]: - if not raw: - return {} - out: dict[str, Any] = {} - for k, v in raw.items(): - if k == "note": - out[k] = v - elif isinstance(v, (float, np.floating)) and np.isfinite(v): - out[k] = _round_float4(v) - elif isinstance(v, (int, np.integer)): - out[k] = int(v) - else: - out[k] = v - return out - - -def format_eval_response( - raw: dict[str, Any], - *, - expected_sign: int | None = None, -) -> dict[str, Any]: - if not raw.get("ok", False): - return { - "ok": False, - "error": raw.get("error", "unknown"), - "error_type": raw.get("error_type", "Error"), - "split": raw.get("split"), - "date_range": raw.get("date_range"), - } - - summ = _format_summary(raw.get("summary") or {}) - rob_raw = raw.get("monthly_corr_robustness") or {} - - out: dict[str, Any] = { - "ok": True, - "split": raw["split"], - "date_range": raw["date_range"], - "eval_wall_seconds": _round_float4(raw.get("eval_wall_seconds")), - "summary": summ, - "label_col": raw.get("label_col"), - "bar_interval": raw.get("bar_interval", "1d"), - "label_quantile_buckets": [ - {k: v for k, v in row.items() if k != "n"} for row in raw.get("label_quantile_buckets", []) - ], - "label_quantile_n": raw.get("label_quantile_n"), - "monthly_corr_robustness": monthly_corr_robustness_json(rob_raw), - "timing_ms": {k: round(v, 2) for k, v in raw.get("timing_ms", {}).items()}, - } - - n_q = int(raw.get("label_quantile_n") or 0) - if n_q >= 2: - out["label_quantile_note"] = "D1/Q1=因子最低组; D10/Q10=因子最高组。summary.decile_mean_label 为固定十分组。" - - if raw.get("include_detail_tables") or raw.get("by_month") is not None: - if "by_month" in raw: - out["by_month"] = raw["by_month"] - if "by_symbol" in raw: - out["by_symbol"] = raw["by_symbol"] - - if raw.get("split") == "val" and expected_sign is not None: - ic_val = summ.get("ic") - matches = None - if ic_val is not None and np.isfinite(ic_val): - matches = (ic_val > 0 and expected_sign == 1) or (ic_val < 0 and expected_sign == -1) - out["expected_sign"] = int(expected_sign) - out["sign_check"] = { - "expected_sign": int(expected_sign), - "val_ic": ic_val, - "matches_expected_sign": matches, - } - return out diff --git a/seekalpha/factor/mining/run.py b/seekalpha/factor/mining/run.py deleted file mode 100644 index d9819e48..00000000 --- a/seekalpha/factor/mining/run.py +++ /dev/null @@ -1,111 +0,0 @@ -"""因子挖掘编排入口:建会话 → 拼 prompt → 跑轨迹循环。""" - -from __future__ import annotations - -from datetime import datetime -from pathlib import Path -from typing import Any - -from seekalpha.factor.mining.env_settings import resolve_max_parallel_eval -from seekalpha.factor.mining.schemas import SessionCreateRequest -from seekalpha.factor.mining.service import StockEvalService -from seekalpha.factor.mining.config import MiningConfig -from seekalpha.factor.mining.console import ConsolePrinter -from seekalpha.factor.mining.loop import run_trajectory -from seekalpha.factor.mining.operators import list_operator_names -from seekalpha.factor.mining.prompts import build_system_prompt -from seekalpha.factor.mining.submit import FactorSubmitService, default_factorlib_path -from seekalpha.factor.mining.tools import FactorEvalTools - - -def _repo_root() -> Path: - return Path(__file__).resolve().parents[3] - - -def run_factor_mining( - config: MiningConfig, - user_message: str, - *, - client: Any, - log_dir: str | Path = "logs/factor_mining", - include_operator_catalog: bool = True, - extra_instructions: str = "", - extra_body: dict[str, Any] | None = None, - service: StockEvalService | None = None, - verbose: bool = False, - repo_root: Path | None = None, -) -> dict[str, Any]: - service = service or StockEvalService( - max_parallel_eval=resolve_max_parallel_eval(config.max_parallel_eval), - ) - root = repo_root or _repo_root() - ctx = config.eval - session_resp = service.create_session( - SessionCreateRequest( - panel_path=str(ctx.panel_path), - train_start=ctx.train_start, - train_end=ctx.train_end, - val_start=ctx.val_start, - val_end=ctx.val_end, - label_col=ctx.label_col, - include_fundamentals=ctx.include_fundamentals, - ) - ) - - submit_service: FactorSubmitService | None = None - if config.enable_submit: - lib_path = config.factorlib_path or default_factorlib_path(root) - submit_service = FactorSubmitService( - service, - factorlib_path=lib_path, - registry_path=root / config.registry_path, - expr_dir=root / config.expr_dir, - repo_root=root, - max_cs_corr=config.max_cs_corr, - similar_top_k=config.similar_top_k, - overwrite=config.ingest_overwrite, - auto_realign_panel=config.auto_realign_panel, - ) - - tools = FactorEvalTools(service, session_resp.session_id, submit_service=submit_service) - system_prompt = build_system_prompt( - include_operator_catalog=include_operator_catalog, - enable_submit=config.enable_submit, - extra_instructions=extra_instructions, - label_col=ctx.label_col, - include_fundamentals=ctx.include_fundamentals, - ) - - printer = ConsolePrinter() if verbose else None - if printer is not None: - printer.session_start(config.model, len(list_operator_names())) - - log_dir = Path(log_dir) - log_dir.mkdir(parents=True, exist_ok=True) - stamp = datetime.now().strftime("%Y%m%d_%H%M%S") - log_jsonl = log_dir / f"run_{stamp}.jsonl" - - messages = run_trajectory( - client=client, - model=config.model, - system_prompt=system_prompt, - user_message=user_message, - tools=tools, - log_jsonl=log_jsonl, - max_turns=config.max_turns, - max_tool_calls_per_round=config.max_tool_calls_per_round, - max_tool_workers=config.max_tool_workers, - min_tool_call_rounds_before_allow_stop=config.min_tool_call_rounds_before_allow_stop, - temperature=config.temperature, - max_tokens=config.max_tokens, - extra_body=extra_body, - printer=printer, - ) - - return { - "session_id": session_resp.session_id, - "n_messages": len(messages), - "log_jsonl": str(log_jsonl), - "messages_snapshot": str(log_jsonl.with_suffix(".messages.json")), - "summary": str(log_jsonl.with_suffix(".summary.json")), - } diff --git a/seekalpha/factor/mining/schemas.py b/seekalpha/factor/mining/schemas.py deleted file mode 100644 index 5c3748ce..00000000 --- a/seekalpha/factor/mining/schemas.py +++ /dev/null @@ -1,45 +0,0 @@ -"""挖掘评估 API 请求/响应 schema。""" - -from __future__ import annotations - -from dataclasses import dataclass - -from seekalpha.factor.types import DEFAULT_LABEL_COL - - -@dataclass -class SessionCreateRequest: - panel_path: str - train_start: str = "2019-01-01" - train_end: str = "2021-12-31" - val_start: str = "2022-01-01" - val_end: str = "2023-12-31" - label_col: str = DEFAULT_LABEL_COL - include_fundamentals: bool = True - - -@dataclass -class SessionCreateResponse: - session_id: str - panel_rows: int - load_ms: float - columns_sample: list[str] - - -@dataclass -class EvalTrainRequest: - session_id: str - multi_line_expr: str - factor_name: str = "expr" - include_detail_tables: bool = False - label_quantile_n: int = 10 - - -@dataclass -class EvalValRequest: - session_id: str - multi_line_expr: str - factor_name: str = "expr" - include_detail_tables: bool = False - label_quantile_n: int = 10 - expected_sign: int | None = None diff --git a/seekalpha/factor/mining/seed_factors.py b/seekalpha/factor/mining/seed_factors.py deleted file mode 100644 index 75e0e419..00000000 --- a/seekalpha/factor/mining/seed_factors.py +++ /dev/null @@ -1,59 +0,0 @@ -"""从 .dsl 文件加载初始种子因子,拼入 user 消息。""" - -from __future__ import annotations - -from pathlib import Path - - -def resolve_seed_factor_paths(paths: list[Path], repo_root: Path) -> list[Path]: - """解析并校验种子因子路径(相对路径相对 repo_root)。""" - resolved: list[Path] = [] - for raw in paths: - p = raw if raw.is_absolute() else repo_root / raw - if not p.is_file(): - raise FileNotFoundError(f"seed factor not found: {p}") - resolved.append(p.resolve()) - return resolved - - -def build_user_message_with_seed_factors( - user_message: str, - seed_paths: list[Path], - *, - repo_root: Path, -) -> str: - """将种子因子 DSL 注入 user 消息,供 LLM 在既有表达式基础上迭代优化。""" - if not seed_paths: - return user_message - - resolved = resolve_seed_factor_paths(seed_paths, repo_root) - blocks: list[str] = [ - "以下 `.dsl` 文件为**初始种子因子**,请作为本轮挖掘起点:", - "1. **首轮**先用 `eval_on_train_set` 评估各种子因子的 train 基准表现(`multi_line_expr` 与文件内容一致);", - "2. 再在其基础上迭代优化(调整结构、窗口、中性化、温莎化等),避免仅做同质微调;", - "3. 达标后 `submit_factor` 时使用**新的** `factor_name`(勿覆盖种子因子 id)。", - "", - "## 初始种子因子", - ] - - for path in resolved: - expr = path.read_text(encoding="utf-8").strip() - factor_name = path.stem - try: - rel = path.relative_to(repo_root.resolve()) - source = str(rel).replace("\\", "/") - except ValueError: - source = str(path) - blocks.extend( - [ - "", - f"### {factor_name}(`{source}`)", - "", - "```text", - expr, - "```", - ] - ) - - blocks.extend(["", "---", "", user_message.strip()]) - return "\n".join(blocks) diff --git a/seekalpha/factor/mining/service.py b/seekalpha/factor/mining/service.py deleted file mode 100644 index 1198fc6c..00000000 --- a/seekalpha/factor/mining/service.py +++ /dev/null @@ -1,99 +0,0 @@ -"""股票因子挖掘评估服务:会话管理与 train/val 评估。""" - -from __future__ import annotations - -import threading -from pathlib import Path -from typing import Any - -from seekalpha.factor.mining.context import StockEvalContext -from seekalpha.factor.mining.env_settings import resolve_max_parallel_eval -from seekalpha.factor.mining.response import format_eval_response -from seekalpha.factor.mining.schemas import ( - EvalTrainRequest, - EvalValRequest, - SessionCreateRequest, - SessionCreateResponse, -) -from seekalpha.factor.mining.session import SessionStore -from seekalpha.factor.eval import evaluate_factor_on_split - - -class StockEvalService: - """进程内评估服务,供 mining FactorEvalTools 调用。""" - - def __init__( - self, - *, - sessions: SessionStore | None = None, - max_parallel_eval: int | None = None, - ) -> None: - self.sessions = sessions or SessionStore() - self.max_parallel_eval = resolve_max_parallel_eval(max_parallel_eval) - self._eval_semaphore = threading.Semaphore(self.max_parallel_eval) - - def create_session(self, req: SessionCreateRequest) -> SessionCreateResponse: - ctx = StockEvalContext( - panel_path=Path(req.panel_path).expanduser().resolve(), - train_start=req.train_start, - train_end=req.train_end, - val_start=req.val_start, - val_end=req.val_end, - label_col=req.label_col, - include_fundamentals=req.include_fundamentals, - ) - session = self.sessions.create(ctx) - cols = list(session.panel.columns[:12]) - return SessionCreateResponse( - session_id=session.session_id, - panel_rows=len(session.panel), - load_ms=float(session.meta.get("load_ms", 0)), - columns_sample=cols, - ) - - def _run_one( - self, - session_id: str, - *, - split: str, - multi_line_expr: str, - factor_name: str, - include_detail_tables: bool, - label_quantile_n: int, - expected_sign: int | None = None, - ) -> dict[str, Any]: - session = self.sessions.get(session_id) - raw = evaluate_factor_on_split( - session, - split=split, - multi_line_expr=multi_line_expr, - factor_name=factor_name, - include_detail_tables=include_detail_tables, - label_quantile_n=label_quantile_n, - ) - if include_detail_tables and raw.get("ok"): - raw["include_detail_tables"] = True - return format_eval_response(raw, expected_sign=expected_sign) - - def eval_train(self, req: EvalTrainRequest) -> dict[str, Any]: - with self._eval_semaphore: - return self._run_one( - req.session_id, - split="train", - multi_line_expr=req.multi_line_expr, - factor_name=req.factor_name, - include_detail_tables=req.include_detail_tables, - label_quantile_n=req.label_quantile_n, - ) - - def eval_val(self, req: EvalValRequest) -> dict[str, Any]: - with self._eval_semaphore: - return self._run_one( - req.session_id, - split="val", - multi_line_expr=req.multi_line_expr, - factor_name=req.factor_name, - include_detail_tables=req.include_detail_tables, - label_quantile_n=req.label_quantile_n, - expected_sign=req.expected_sign, - ) diff --git a/seekalpha/factor/mining/session.py b/seekalpha/factor/mining/session.py deleted file mode 100644 index 786eda36..00000000 --- a/seekalpha/factor/mining/session.py +++ /dev/null @@ -1,78 +0,0 @@ -"""StockEvalSession:会话级 panel 驻内存与 split 懒缓存。""" - -from __future__ import annotations - -import threading -import time -import uuid -from dataclasses import dataclass, field -from typing import Any - -import pandas as pd - -from seekalpha.data.panel import load_panel, slice_panel -from seekalpha.factor.mining.context import StockEvalContext - - -@dataclass -class StockEvalSession: - session_id: str - ctx: StockEvalContext - panel: pd.DataFrame - meta: dict[str, Any] = field(default_factory=dict) - created_at: float = field(default_factory=time.time) - _split_cache: dict[str, pd.DataFrame] = field(default_factory=dict, repr=False) - _split_lock: threading.Lock = field(default_factory=threading.Lock, repr=False) - - def get_split_panel(self, split: str) -> tuple[pd.DataFrame, str, str]: - """返回 (panel_slice, start, end)。""" - start, end = self.ctx.split_range(split) - with self._split_lock: - cached = self._split_cache.get(split) - if cached is not None: - return cached, start, end - sliced = slice_panel(self.panel, start=start, end=end) - self._split_cache[split] = sliced - return sliced, start, end - - -class SessionStore: - def __init__(self) -> None: - self._lock = threading.RLock() - self._sessions: dict[str, StockEvalSession] = {} - - def create(self, ctx: StockEvalContext) -> StockEvalSession: - t0 = time.perf_counter() - cov_start, cov_end = ctx.coverage_range() - panel = load_panel(ctx.panel_path) - dropped_funda = 0 - if not ctx.include_fundamentals: - funda_cols = [c for c in panel.columns if str(c).startswith("funda_")] - if funda_cols: - panel = panel.drop(columns=funda_cols) - dropped_funda = len(funda_cols) - panel = slice_panel(panel, start=cov_start, end=cov_end) - load_ms = (time.perf_counter() - t0) * 1000 - session_id = uuid.uuid4().hex - session = StockEvalSession( - session_id=session_id, - ctx=ctx, - panel=panel, - meta={ - "load_ms": load_ms, - "coverage_start": cov_start, - "coverage_end": cov_end, - "include_fundamentals": ctx.include_fundamentals, - "dropped_fundamental_cols": dropped_funda, - }, - ) - with self._lock: - self._sessions[session_id] = session - return session - - def get(self, session_id: str) -> StockEvalSession: - with self._lock: - session = self._sessions.get(session_id) - if session is None: - raise KeyError(f"未知 session_id: {session_id!r}") - return session diff --git a/seekalpha/factor/mining/submit.py b/seekalpha/factor/mining/submit.py deleted file mode 100644 index fb97d664..00000000 --- a/seekalpha/factor/mining/submit.py +++ /dev/null @@ -1,256 +0,0 @@ -"""挖掘会话内因子交付入库。""" - -from __future__ import annotations - -import re -from pathlib import Path -from typing import Any - -from seekalpha.factor.types import IngestPolicy -from seekalpha.factor.ingest import ingest_factor, load_panel_for_zoo -from seekalpha.factor.types import IngestResult -from seekalpha.factor.mining.service import StockEvalService -from seekalpha.factor.zoo import DEFAULT_FACTORLIB_ROOT, FactorZoo -from seekalpha.factor.zoo.realign import panel_paths_match, realign_factorlib_to_panel -from seekalpha.factor.mining.registry_io import upsert_mining_registry - - -def slug_factor_id(name: str) -> str: - s = re.sub(r"[^a-zA-Z0-9_]+", "_", str(name).strip().lower()) - return re.sub(r"_+", "_", s).strip("_") or "factor" - - -CS_PEARSON_AUTOCORR_MIN = 0.6 - - -def check_delivery_metrics(metrics: dict[str, Any]) -> tuple[bool, list[str]]: - """全区间入库指标是否达到保留级候选门槛。""" - reasons: list[str] = [] - ic = metrics.get("ic") - if ic is None or abs(float(ic)) < 0.005: - reasons.append("ic") - icir = metrics.get("icir") - if icir is None or abs(float(icir)) <= 0.1: - reasons.append("icir") - rank_ic = metrics.get("rank_ic") - if rank_ic is None or abs(float(rank_ic)) < 0.005: - reasons.append("rank_ic") - cov = metrics.get("coverage") - if cov is None or float(cov) <= 0.9: - reasons.append("coverage") - cs_auto = metrics.get("cs_pearson_autocorr") - if cs_auto is None or float(cs_auto) <= CS_PEARSON_AUTOCORR_MIN: - reasons.append("cs_pearson_autocorr") - return len(reasons) == 0, reasons - - -class FactorSubmitService: - """将保留级候选提交至 factorzoo(train-start ~ val-end 求值、指标、截面去重)。""" - - def __init__( - self, - service: StockEvalService, - *, - factorlib_path: Path, - registry_path: Path, - expr_dir: Path, - repo_root: Path, - max_cs_corr: float = 0.8, - similar_top_k: int = 3, - overwrite: bool = False, - auto_realign_panel: bool = True, - ) -> None: - self.service = service - self.factorlib_path = Path(factorlib_path).expanduser().resolve() - self.registry_path = Path(registry_path).expanduser().resolve() - self.expr_dir = Path(expr_dir).expanduser().resolve() - self.repo_root = Path(repo_root).resolve() - self.max_cs_corr = max_cs_corr - self.similar_top_k = similar_top_k - self.overwrite = overwrite - self.auto_realign_panel = auto_realign_panel - - def submit( - self, - session_id: str, - *, - multi_line_expr: str, - factor_name: str, - comment: str, - ) -> dict[str, Any]: - expr = multi_line_expr.strip() - if not expr: - return { - "ok": False, - "stored": False, - "error": "multi_line_expr_required_non_empty_string", - "error_type": "ToolArgumentsError", - } - if not str(comment).strip(): - return { - "ok": False, - "stored": False, - "error": "comment_required_non_empty_string", - "error_type": "ToolArgumentsError", - } - - factor_id = slug_factor_id(factor_name) - name = str(factor_name).strip() or factor_id - - try: - session = self.service.sessions.get(session_id) - except KeyError: - return { - "ok": False, - "stored": False, - "error": f"session_not_found: {session_id}", - "error_type": "SessionError", - } - - ctx = session.ctx - try: - zoo = FactorZoo.open(self.factorlib_path) - except FileNotFoundError as e: - return { - "ok": False, - "stored": False, - "error": f"factorlib_not_initialized: {self.factorlib_path}", - "error_type": "FactorLibError", - "detail": str(e), - } - - try: - panel = load_panel_for_zoo(zoo, panel_path=ctx.panel_path) - except ValueError as e: - if not self.auto_realign_panel: - return { - "ok": False, - "stored": False, - "error": str(e), - "error_type": "PanelMismatchError", - } - zoo_panel = Path(zoo.manifest.panel_path) - if not panel_paths_match(ctx.panel_path, zoo_panel): - return { - "ok": False, - "stored": False, - "error": ( - f"{e}; panel 路径不一致: session={ctx.panel_path} zoo={zoo_panel}," - "请用 --panel 与因子库相同文件,或重新 init_factorlib" - ), - "error_type": "PanelMismatchError", - } - try: - from seekalpha.data.panel import load_panel as _load_panel - - full_panel = _load_panel(ctx.panel_path).sort_index() - realign_info = realign_factorlib_to_panel( - self.factorlib_path, - panel=full_panel, - panel_path=ctx.panel_path, - ) - zoo = FactorZoo.open(self.factorlib_path) - panel = full_panel - except Exception as exc: # noqa: BLE001 - return { - "ok": False, - "stored": False, - "error": f"panel_realign_failed: {exc}", - "error_type": "PanelRealignError", - } - else: - realign_info = None - - if realign_info is None and len(panel) != zoo.manifest.n_rows: - return { - "ok": False, - "stored": False, - "error": ( - f"panel 行数 {len(panel)} != 库 n_rows {zoo.manifest.n_rows};" - "请用相同 panel 初始化库,或仅用于调试切片" - ), - "error_type": "PanelMismatchError", - } - - result = ingest_factor( - zoo, - factor_id=factor_id, - name=name, - expr=expr, - panel=panel, - policy=IngestPolicy.from_context(ctx, max_cs_corr=self.max_cs_corr, similar_top_k=self.similar_top_k), - overwrite=self.overwrite, - ) - - delivery_ok, delivery_reasons = check_delivery_metrics(result.metrics) - rolled_back = False - if result.stored and not delivery_ok: - zoo.delete_factor(factor_id) - rolled_back = True - result = IngestResult( - factor_id=result.factor_id, - col_idx=None, - stored=False, - skipped_reason=f"delivery_check_failed:{','.join(delivery_reasons)}", - metrics=result.metrics, - similarity=result.similarity, - extra=result.extra, - ) - - payload: dict[str, Any] = { - "ok": result.stored, - "stored": result.stored, - "factor_id": factor_id, - "factor_name": name, - "comment": comment.strip(), - "eval_range": {"start": ctx.train_start, "end": ctx.val_end}, - "metrics": result.metrics, - "delivery_check": {"passed": delivery_ok, "fail_reasons": delivery_reasons}, - "similarity": result.similarity, - "skipped_reason": result.skipped_reason, - "rolled_back": rolled_back, - } - if realign_info and realign_info.get("realigned"): - payload["panel_realigned"] = realign_info - - if result.stored: - policy = IngestPolicy.from_context(ctx, max_cs_corr=self.max_cs_corr, similar_top_k=self.similar_top_k) - reg_path, dsl_path = upsert_mining_registry( - self.registry_path, - factor_id=factor_id, - name=name, - comment=comment.strip(), - expr=expr, - expr_dir=self.expr_dir, - repo_root=self.repo_root, - policy=policy, - metrics=result.metrics, - similarity=result.similarity, - ingest_status="stored", - source="submit", - ) - payload["registry_path"] = reg_path - payload["dsl_path"] = dsl_path - payload["factorlib_path"] = str(self.factorlib_path) - elif result.skipped_reason: - payload["ok"] = False - if result.skipped_reason.startswith("cs_corr"): - payload["error_type"] = "DuplicateFactorError" - elif result.skipped_reason == "already_exists": - payload["error_type"] = "AlreadyExistsError" - elif result.skipped_reason.startswith("delivery_check_failed"): - payload["error_type"] = "DeliveryCheckError" - else: - payload["error_type"] = "IngestSkipped" - payload["error"] = result.skipped_reason - else: - payload["ok"] = False - payload["error_type"] = "IngestError" - payload["error"] = "ingest_failed" - - return payload - - -def default_factorlib_path(repo_root: Path) -> Path: - _ = repo_root # 兼容旧签名;SeekAlpha 使用固定 artifacts 路径 - return DEFAULT_FACTORLIB_ROOT diff --git a/seekalpha/factor/mining/tools.py b/seekalpha/factor/mining/tools.py deleted file mode 100644 index c5cc8a12..00000000 --- a/seekalpha/factor/mining/tools.py +++ /dev/null @@ -1,413 +0,0 @@ -"""LLM 评估与交付工具:train/val 评估 + submit_factor 入库。""" - - - -from __future__ import annotations - - - -import json - -from pathlib import Path - -from typing import Any - - - -from seekalpha.factor.mining.schemas import EvalTrainRequest, EvalValRequest - -from seekalpha.factor.mining.service import StockEvalService - -from seekalpha.factor.mining.submit import FactorSubmitService - - - -_EVAL_PARAMETERS: dict[str, Any] = { - - "type": "object", - - "properties": { - - "multi_line_expr": { - - "type": "string", - - "description": "多行因子 DSL:可含赋值行,最后一行为因子值;列用 $列名 引用,算子大写。", - - }, - - "factor_name": {"type": "string", "description": "因子列逻辑名,默认 expr。"}, - - "include_detail_tables": { - - "type": "boolean", - - "description": "true 时额外返回 by_month / by_symbol 明细;默认 false 仅返回 summary。", - - "default": False, - - }, - - "label_quantile_n": { - - "type": "integer", - - "description": "按因子值等频分位分桶,输出每桶 label 均值;0 表示不计算。默认 10。", - - "default": 10, - - }, - - }, - - "required": ["multi_line_expr"], - - "additionalProperties": False, - -} - - - -_VAL_PARAMETERS: dict[str, Any] = { - - "type": "object", - - "properties": { - - **_EVAL_PARAMETERS["properties"], - - "expected_sign": { - - "type": "integer", - - "description": "train summary.ic 的符号(1=正、-1=负);传入后返回 sign_check。", - - "enum": [1, -1], - - }, - - }, - - "required": ["multi_line_expr"], - - "additionalProperties": False, - -} - - - -_SUBMIT_PARAMETERS: dict[str, Any] = { - - "type": "object", - - "properties": { - - "multi_line_expr": { - - "type": "string", - - "description": "与 train/val 评估一致的多行因子 DSL。", - - }, - - "factor_name": { - - "type": "string", - - "description": "因子唯一逻辑名(蛇形英文),将写入因子库 factor_id。", - - }, - - "comment": { - - "type": "string", - - "description": "因子含义说明:经济直觉、关键算子与窗口、预期 IC 方向等,供后续查阅。", - - }, - - }, - - "required": ["multi_line_expr", "factor_name", "comment"], - - "additionalProperties": False, - -} - - - -TOOL_NAMES = ("eval_on_train_set", "eval_on_val_set", "submit_factor") - - - - - -class FactorEvalTools: - - """持有一个已建会话,向 LLM 暴露 eval_on_train_set / eval_on_val_set / submit_factor。""" - - - - def __init__( - - self, - - service: StockEvalService, - - session_id: str, - - *, - - submit_service: FactorSubmitService | None = None, - - ) -> None: - - self.service = service - - self.session_id = session_id - - self.submit_service = submit_service - - - - def schemas(self) -> list[dict[str, Any]]: - - out = [ - - { - - "type": "function", - - "function": { - - "name": "eval_on_train_set", - - "description": "训练集评估多行因子表达式,返回 summary、monthly_corr_robustness、label_quantile_buckets。", - - "parameters": _EVAL_PARAMETERS, - - }, - - }, - - { - - "type": "function", - - "function": { - - "name": "eval_on_val_set", - - "description": "验证集评估;须传 expected_sign(train IC 符号 1/-1),结果含 sign_check。", - - "parameters": _VAL_PARAMETERS, - - }, - - }, - - ] - - if self.submit_service is not None: - - out.append( - - { - - "type": "function", - - "function": { - - "name": "submit_factor", - - "description": ( - - "【正式交付】将保留级候选入库 factorzoo:train-start~val-end 全区间求值," - - "检查 IC/ICIR/coverage/cs_pearson_autocorr(>0.6)与截面去重;成功后 stored=true。" - - "train/val 达标后必须调用本工具完成交付,不可仅文字总结。" - - "查重失败时返回 top_neighbors 含相似因子 expr。" - - ), - - "parameters": _SUBMIT_PARAMETERS, - - }, - - } - - ) - - return out - - - - def dispatch(self, name: str, arguments: Any) -> dict[str, Any]: - - if isinstance(arguments, str): - - try: - - arguments = json.loads(arguments) if arguments.strip() else {} - - except json.JSONDecodeError as e: - - return {"ok": False, "error": f"invalid_tool_arguments_json: {e}", "error_type": "JSONDecodeError"} - - if not isinstance(arguments, dict): - - return {"ok": False, "error": "tool_arguments_must_be_object", "error_type": "ToolArgumentsError"} - - - - if name == "submit_factor": - - return self._dispatch_submit(arguments) - - - - expr = arguments.get("multi_line_expr") - - if not isinstance(expr, str) or not expr.strip(): - - return {"ok": False, "error": "multi_line_expr_required_non_empty_string", "error_type": "ToolArgumentsError"} - - - - factor_name = arguments.get("factor_name") or "expr" - - include_detail = bool(arguments.get("include_detail_tables", False)) - - label_quantile_n = arguments.get("label_quantile_n", 10) - - if label_quantile_n is None: - - label_quantile_n = 10 - - - - if name == "eval_on_train_set": - - return self.service.eval_train( - - EvalTrainRequest( - - session_id=self.session_id, - - multi_line_expr=expr, - - factor_name=factor_name, - - include_detail_tables=include_detail, - - label_quantile_n=int(label_quantile_n), - - ) - - ) - - if name == "eval_on_val_set": - - expected_sign = arguments.get("expected_sign") - - if expected_sign not in (None, 1, -1): - - return {"ok": False, "error": "expected_sign_must_be_1_or_-1", "error_type": "ToolArgumentsError"} - - return self.service.eval_val( - - EvalValRequest( - - session_id=self.session_id, - - multi_line_expr=expr, - - factor_name=factor_name, - - include_detail_tables=include_detail, - - label_quantile_n=int(label_quantile_n), - - expected_sign=expected_sign, - - ) - - ) - - return {"ok": False, "error": f"unknown_tool: {name}", "error_type": "UnknownTool"} - - - - def _dispatch_submit(self, arguments: dict[str, Any]) -> dict[str, Any]: - - if self.submit_service is None: - - return { - - "ok": False, - - "stored": False, - - "error": "submit_factor_disabled", - - "error_type": "SubmitDisabled", - - } - - expr = arguments.get("multi_line_expr") - - factor_name = arguments.get("factor_name") - - comment = arguments.get("comment") - - if not isinstance(factor_name, str) or not factor_name.strip(): - - return { - - "ok": False, - - "stored": False, - - "error": "factor_name_required_non_empty_string", - - "error_type": "ToolArgumentsError", - - } - - if not isinstance(comment, str) or not comment.strip(): - - return { - - "ok": False, - - "stored": False, - - "error": "comment_required_non_empty_string", - - "error_type": "ToolArgumentsError", - - } - - return self.submit_service.submit( - - self.session_id, - - multi_line_expr=str(expr or ""), - - factor_name=factor_name.strip(), - - comment=comment.strip(), - - ) - - - - @staticmethod - - def result_to_content(result: dict[str, Any]) -> str: - - return json.dumps(result, ensure_ascii=False, default=str) - - diff --git a/seekalpha/factor/registry.py b/seekalpha/factor/registry.py deleted file mode 100644 index 6af0d355..00000000 --- a/seekalpha/factor/registry.py +++ /dev/null @@ -1,46 +0,0 @@ -"""因子 registry 加载。""" - -from __future__ import annotations - -import json -from pathlib import Path -from typing import Any - - -def _find_repo_root(start: Path) -> Path: - p = start.resolve() - for _ in range(10): - if (p / "pyproject.toml").is_file(): - return p - if p.parent == p: - break - p = p.parent - return start.resolve() - - -def load_registry(path: Path, *, repo_root: Path | None = None) -> dict[str, dict[str, Any]]: - """读取 registry.json,解析 expression / expression_file。""" - reg_path = Path(path).expanduser().resolve() - root = repo_root or _find_repo_root(reg_path.parent) - raw = json.loads(reg_path.read_text(encoding="utf-8")) - out: dict[str, dict[str, Any]] = {} - for factor_id, spec in raw.items(): - if not isinstance(spec, dict): - raise ValueError(f"registry[{factor_id!r}] 须为 object") - entry = dict(spec) - if "expression" not in entry and "expression_file" not in entry: - raise ValueError(f"registry[{factor_id!r}] 缺少 expression 或 expression_file") - if "expression_file" in entry and "expression" not in entry: - expr_path = Path(entry["expression_file"]) - if not expr_path.is_absolute(): - expr_path = root / expr_path - entry["expression"] = expr_path.read_text(encoding="utf-8").strip() - entry.setdefault("name", factor_id) - out[str(factor_id)] = entry - return out - - -def list_factor_entries(path: Path, *, repo_root: Path | None = None) -> list[tuple[str, str, str]]: - """返回 (factor_id, name, expression) 列表。""" - reg = load_registry(path, repo_root=repo_root) - return [(fid, str(v["name"]), str(v["expression"])) for fid, v in reg.items()] diff --git a/seekalpha/factor/report.py b/seekalpha/factor/report.py deleted file mode 100644 index c6142b79..00000000 --- a/seekalpha/factor/report.py +++ /dev/null @@ -1,133 +0,0 @@ -"""因子评估报告文本格式化(CLI / 调试输出)。""" - -from __future__ import annotations - -import json -import math -from typing import Any, TextIO - - -def _fmt_num(value: object, *, digits: int = 4, pct: bool = False) -> str: - if value is None: - return "—" - try: - x = float(value) - except (TypeError, ValueError): - return "—" - if not math.isfinite(x): - return "—" - if pct: - return f"{x * 100:.{max(digits - 2, 1)}f}%" - return f"{x:.{digits}f}" - - -def _section(title: str, width: int = 52) -> str: - line = "─" * max(width - len(title) - 2, 8) - return f"\n── {title} {line}" - - -def _row(label: str, value: str, *, width: int = 52) -> str: - return f" {label:<16} {value:>{width - 18}}" - - -def _decile_bars(rows: list[dict[str, Any]], *, bar_width: int = 24) -> list[str]: - if not rows: - return [" (无十分组数据)"] - vals = [r.get("mean_label") for r in rows] - finite = [float(v) for v in vals if v is not None and math.isfinite(float(v))] - if not finite: - return [" (十分组 label 均为 NaN)"] - lo, hi = min(finite), max(finite) - span = hi - lo if hi > lo else 1.0 - lines: list[str] = [] - for row in rows: - dec = int(row.get("decile", 0)) - ml = row.get("mean_label") - if ml is None or not math.isfinite(float(ml)): - bar = "" - shown = "—" - else: - x = float(ml) - shown = _fmt_num(x, digits=6) - norm = (x - lo) / span - n = max(1, int(round(norm * bar_width))) if x >= lo else 0 - bar = "█" * n - lines.append(f" D{dec:>2} {bar:<{bar_width}} {shown}") - return lines - - -def format_factor_report_text(metrics: dict[str, Any]) -> str: - """将 evaluate_factor 返回的 metrics 格式化为可读文本报告。""" - lines: list[str] = [ - "", - "═" * 52, - " 因子评估报告", - "═" * 52, - ] - - start = metrics.get("eval_start") - end = metrics.get("eval_end") - if start or end: - lines.append(_row("评估区间", f"{start or '…'} ~ {end or '…'}")) - lines.append(_row("标签列", str(metrics.get("label_col", "—")))) - lines.append(_row("有效 IC 天数", str(metrics.get("n_days", "—")))) - - lines.append(_section("截面 IC")) - lines.append(_row("IC", _fmt_num(metrics.get("ic")))) - lines.append(_row("ICIR", _fmt_num(metrics.get("icir")))) - lines.append(_row("Rank IC", _fmt_num(metrics.get("rank_ic")))) - lines.append(_row("Coverage", _fmt_num(metrics.get("coverage"), pct=True))) - lines.append(_row("CS lag-1 ρ", _fmt_num(metrics.get("cs_pearson_autocorr")))) - - mls = metrics.get("mls_fmb") - if isinstance(mls, dict) and mls: - lines.append(_section("MLS / FMB")) - key_labels = [ - ("mean_rho", "mean ρ"), - ("mean_ls", "mean LS"), - ("ir_ls", "IR_LS"), - ("ir_ls_annual", "IR_LS 年化"), - ("mls", "MLS"), - ("nw_t_rho", "NW-t(ρ)"), - ("nw_t_ls", "NW-t(LS)"), - ("n_days_rho", "样本天数"), - ] - for key, label in key_labels: - if key in mls: - val = mls[key] - if key.startswith("n_"): - lines.append(_row(label, str(int(val)) if val is not None else "—")) - else: - lines.append(_row(label, _fmt_num(val))) - - deciles = metrics.get("decile_mean_label") - if isinstance(deciles, list) and deciles: - lines.append(_section("十分组 label 均值 (D1=低因子 → D10=高因子)")) - lines.extend(_decile_bars(deciles)) - - lines.append("") - lines.append("─" * 52) - return "\n".join(lines) - - -def print_factor_report( - metrics: dict[str, Any], - *, - file: TextIO | None = None, -) -> None: - print(format_factor_report_text(metrics), file=file) - - -def format_factor_report_json(metrics: dict[str, Any]) -> str: - """JSON 报告(数值四舍五入)。""" - - def _clean(obj: Any) -> Any: - if isinstance(obj, dict): - return {k: _clean(v) for k, v in obj.items() if k != "note"} - if isinstance(obj, list): - return [_clean(x) for x in obj] - if isinstance(obj, float): - return round(obj, 6) if math.isfinite(obj) else None - return obj - - return json.dumps(_clean(metrics), ensure_ascii=False, indent=2) diff --git a/seekalpha/factor/types.py b/seekalpha/factor/types.py deleted file mode 100644 index a0877e85..00000000 --- a/seekalpha/factor/types.py +++ /dev/null @@ -1,112 +0,0 @@ -"""因子入库类型与默认策略。""" - -from __future__ import annotations - -from dataclasses import asdict, dataclass -from typing import TYPE_CHECKING, Any - -import numpy as np - -if TYPE_CHECKING: - from seekalpha.factor.mining.context import StockEvalContext - -DEFAULT_TRAIN_START = "2019-01-01" -DEFAULT_VAL_END = "2024-12-31" -DEFAULT_LABEL_COL = "label_1d_open_to_open" -DEFAULT_MAX_CS_CORR = 0.8 -DEFAULT_SIMILAR_TOP_K = 3 - - -@dataclass(frozen=True) -class IngestPolicy: - train_start: str = DEFAULT_TRAIN_START - val_end: str = DEFAULT_VAL_END - label_col: str = DEFAULT_LABEL_COL - max_cs_corr: float = DEFAULT_MAX_CS_CORR - similar_top_k: int = DEFAULT_SIMILAR_TOP_K - clip_pct: tuple[float, float] | None = None - - @property - def mask_before_start(self) -> str: - return self.train_start - - @property - def eval_start(self) -> str: - return self.train_start - - @property - def eval_end(self) -> str: - return self.val_end - - def to_ingest_kwargs(self, **overrides: Any) -> dict[str, Any]: - kw: dict[str, Any] = { - "label_col": self.label_col, - "clip_pct": self.clip_pct, - "mask_before_start": self.mask_before_start, - "eval_start": self.eval_start, - "eval_end": self.eval_end, - "max_cs_corr": self.max_cs_corr, - "similar_top_k": self.similar_top_k, - } - kw.update(overrides) - return kw - - def ingest_config_dict(self) -> dict[str, str]: - return { - "train_start": self.train_start, - "ingest_start": self.eval_start, - "ingest_end": self.eval_end, - "label_col": self.label_col, - } - - def to_dict(self) -> dict[str, Any]: - d = asdict(self) - if d.get("clip_pct") is not None: - d["clip_pct"] = list(d["clip_pct"]) - return d - - @classmethod - def from_context(cls, ctx: StockEvalContext, **overrides: Any) -> IngestPolicy: - return cls( - train_start=ctx.train_start, - val_end=ctx.val_end, - label_col=ctx.label_col, - **overrides, - ) - - @classmethod - def from_dict(cls, data: dict[str, Any]) -> IngestPolicy: - clip = data.get("clip_pct") - if isinstance(clip, list) and len(clip) == 2: - clip_pct: tuple[float, float] | None = (float(clip[0]), float(clip[1])) - else: - clip_pct = None - return cls( - train_start=str(data.get("train_start", DEFAULT_TRAIN_START)), - val_end=str(data.get("val_end", DEFAULT_VAL_END)), - label_col=str(data.get("label_col", DEFAULT_LABEL_COL)), - max_cs_corr=float(data.get("max_cs_corr", DEFAULT_MAX_CS_CORR)), - similar_top_k=int(data.get("similar_top_k", DEFAULT_SIMILAR_TOP_K)), - clip_pct=clip_pct, - ) - - -DEFAULT_INGEST_POLICY = IngestPolicy() - - -@dataclass -class MaterializeResult: - values: np.ndarray - expr: str - aux_tags: list[str] - - -@dataclass -class IngestResult: - factor_id: str - col_idx: int | None - stored: bool - skipped_reason: str | None - metrics: dict[str, Any] - similarity: dict[str, Any] | None - extra: dict[str, Any] diff --git a/seekalpha/factor/zoo/__init__.py b/seekalpha/factor/zoo/__init__.py deleted file mode 100644 index f1ec2fbb..00000000 --- a/seekalpha/factor/zoo/__init__.py +++ /dev/null @@ -1,44 +0,0 @@ -"""股票因子库:canonical index、memmap 存储、相似度。""" - -from pathlib import Path - -from seekalpha.factor.zoo.catalog import FactorCatalog -from seekalpha.factor.zoo.index import RowIndex, init_library, verify_index_hash -from seekalpha.factor.zoo.similarity import ( - SIMILARITY_KIND, - SimilarityMatrix, - cross_sectional_pearson_mean, - cross_sectional_pearson_series, -) -from seekalpha.factor.zoo.types import ( - DEFAULT_BAR_INTERVAL, - DEFAULT_DATASET, - FactorLibraryPaths, - FactorStatus, - LibraryManifest, - RowSlice, -) -from seekalpha.factor.zoo.zoo import FactorZoo - -from seekalpha.core.paths import FACTORZOO_DIR - -DEFAULT_FACTORLIB_ROOT = FACTORZOO_DIR - -__all__ = [ - "DEFAULT_BAR_INTERVAL", - "DEFAULT_DATASET", - "DEFAULT_FACTORLIB_ROOT", - "FactorCatalog", - "FactorLibraryPaths", - "FactorStatus", - "FactorZoo", - "LibraryManifest", - "RowIndex", - "RowSlice", - "SimilarityMatrix", - "SIMILARITY_KIND", - "cross_sectional_pearson_mean", - "cross_sectional_pearson_series", - "init_library", - "verify_index_hash", -] diff --git a/seekalpha/factor/zoo/catalog.py b/seekalpha/factor/zoo/catalog.py deleted file mode 100644 index af31db2e..00000000 --- a/seekalpha/factor/zoo/catalog.py +++ /dev/null @@ -1,161 +0,0 @@ -"""因子元数据 catalog(factors.parquet)。""" - -from __future__ import annotations - -import json -from datetime import datetime, timezone -from pathlib import Path -from typing import Any - -import pandas as pd - -from seekalpha.factor.zoo.types import FactorMeta, FactorStatus - - -def _empty_factors_df() -> pd.DataFrame: - return pd.DataFrame( - columns=[ - "factor_id", - "name", - "expr", - "col_idx", - "status", - "finite_count", - "created_at", - "extra", - ] - ) - - -class FactorCatalog: - def __init__(self, path: Path) -> None: - self.path = Path(path) - if self.path.is_file(): - self._df = pd.read_parquet(self.path) - else: - self._df = _empty_factors_df() - - @property - def n_factors(self) -> int: - return len(self._df) - - def list_factor_ids(self) -> list[str]: - if self._df.empty: - return [] - return self._df["factor_id"].astype(str).tolist() - - def get(self, factor_id: str) -> FactorMeta | None: - if self._df.empty: - return None - hit = self._df[self._df["factor_id"] == factor_id] - if hit.empty: - return None - row = hit.iloc[0] - return FactorMeta( - factor_id=str(row["factor_id"]), - name=str(row["name"]), - expr=str(row["expr"]), - col_idx=int(row["col_idx"]), - status=FactorStatus(str(row["status"])), - finite_count=int(row.get("finite_count", 0)), - created_at=str(row.get("created_at", "")), - extra=_parse_extra(row.get("extra")), - ) - - def col_idx_for(self, factor_id: str) -> int | None: - meta = self.get(factor_id) - return None if meta is None else meta.col_idx - - def append( - self, - *, - factor_id: str, - name: str, - expr: str, - col_idx: int, - status: FactorStatus, - finite_count: int, - extra: dict[str, Any] | None = None, - ) -> FactorMeta: - if not self._df.empty and factor_id in set(self._df["factor_id"].astype(str)): - raise ValueError(f"factor_id 已存在: {factor_id}") - created_at = datetime.now(timezone.utc).isoformat() - row = { - "factor_id": factor_id, - "name": name, - "expr": expr, - "col_idx": int(col_idx), - "status": status.value, - "finite_count": int(finite_count), - "created_at": created_at, - "extra": json.dumps(extra, ensure_ascii=False) if extra else None, - } - self._df = pd.concat([self._df, pd.DataFrame([row])], ignore_index=True) - self.save() - return FactorMeta( - factor_id=factor_id, - name=name, - expr=expr, - col_idx=col_idx, - status=status, - finite_count=finite_count, - created_at=created_at, - extra=extra or {}, - ) - - def remove(self, factor_id: str) -> None: - if self._df.empty: - raise KeyError(f"factor_id 不存在: {factor_id}") - ids = set(self._df["factor_id"].astype(str)) - if factor_id not in ids: - raise KeyError(f"factor_id 不存在: {factor_id}") - self._df = self._df[self._df["factor_id"] != factor_id].reset_index(drop=True) - self.save() - - def update( - self, - factor_id: str, - *, - name: str, - expr: str, - finite_count: int, - status: FactorStatus, - extra: dict[str, Any] | None = None, - ) -> FactorMeta: - if self._df.empty or factor_id not in set(self._df["factor_id"].astype(str)): - raise KeyError(f"factor_id 不存在: {factor_id}") - idx = self._df.index[self._df["factor_id"] == factor_id][0] - row = self._df.loc[idx] - col_idx = int(row["col_idx"]) - created_at = str(row.get("created_at", "")) - self._df.at[idx, "name"] = name - self._df.at[idx, "expr"] = expr - self._df.at[idx, "finite_count"] = int(finite_count) - self._df.at[idx, "status"] = status.value - self._df.at[idx, "extra"] = json.dumps(extra, ensure_ascii=False) if extra else None - self.save() - return FactorMeta( - factor_id=factor_id, - name=name, - expr=expr, - col_idx=col_idx, - status=status, - finite_count=int(finite_count), - created_at=created_at, - extra=extra or {}, - ) - - def save(self) -> None: - self.path.parent.mkdir(parents=True, exist_ok=True) - self._df.to_parquet(self.path, index=False) - - def to_dataframe(self) -> pd.DataFrame: - return self._df.copy() - - -def _parse_extra(raw: Any) -> dict[str, Any]: - if raw is None or (isinstance(raw, float) and pd.isna(raw)): - return {} - if isinstance(raw, str): - return json.loads(raw) - return {} diff --git a/seekalpha/factor/zoo/index.py b/seekalpha/factor/zoo/index.py deleted file mode 100644 index 7864f108..00000000 --- a/seekalpha/factor/zoo/index.py +++ /dev/null @@ -1,348 +0,0 @@ -"""Canonical row index:panel (datetime, instrument) → row_id。""" - -from __future__ import annotations - -import hashlib -import json -from pathlib import Path - -import numpy as np -import pandas as pd - -from seekalpha.data.panel import load_panel -from seekalpha.factor.zoo.types import ( - DEFAULT_BAR_INTERVAL, - DEFAULT_DATASET, - FactorLibraryPaths, - LibraryManifest, - RowSlice, - TimeShard, -) - - -def _panel_to_index_frame(panel: pd.DataFrame) -> pd.DataFrame: - if not isinstance(panel.index, pd.MultiIndex): - raise ValueError("panel 必须是 (datetime, instrument) MultiIndex") - if panel.index.names[:2] != ["datetime", "instrument"]: - raise ValueError("panel 索引层须为 datetime, instrument") - panel = panel.sort_index() - return pd.DataFrame( - { - "datetime": pd.to_datetime(panel.index.get_level_values("datetime")), - "instrument": panel.index.get_level_values("instrument").astype(str), - } - ) - - -def _assign_quarter_shard_id(dt: pd.Series) -> pd.Series: - return dt.dt.to_period("Q").astype(str) - - -def build_row_index(df: pd.DataFrame) -> pd.DataFrame: - """排序并分配 row_id、shard_id。""" - d = df.sort_values(["datetime", "instrument"], kind="mergesort").reset_index(drop=True) - d["row_id"] = np.arange(len(d), dtype=np.int64) - d["shard_id"] = _assign_quarter_shard_id(d["datetime"]) - return d[["row_id", "datetime", "instrument", "shard_id"]] - - -def index_content_hash(rows: pd.DataFrame) -> str: - payload = rows[["row_id", "datetime", "instrument"]].astype( - {"datetime": str, "instrument": str, "row_id": int} - ) - digest = hashlib.sha256(payload.to_csv(index=False).encode("utf-8")).hexdigest() - return digest[:16] - - -def build_time_shards(rows: pd.DataFrame) -> list[TimeShard]: - shards: list[TimeShard] = [] - for shard_id, grp in rows.groupby("shard_id", sort=True): - start_row = int(grp["row_id"].min()) - stop_row = int(grp["row_id"].max()) + 1 - dt_min = grp["datetime"].min() - dt_max = grp["datetime"].max() - shards.append( - TimeShard( - shard_id=str(shard_id), - start_row=start_row, - stop_row=stop_row, - datetime_start=str(dt_min), - datetime_end=str(dt_max), - ) - ) - return shards - - -def build_sample_row_ids( - rows: pd.DataFrame, - *, - n_sample_rows: int, - seed: int = 42, -) -> np.ndarray: - """按 shard 比例分层抽样 row_id(固定种子,可复现)。""" - n_rows = len(rows) - if n_sample_rows <= 0: - raise ValueError("n_sample_rows 须为正整数") - if n_sample_rows >= n_rows: - return rows["row_id"].to_numpy(dtype=np.int64, copy=True) - - rng = np.random.default_rng(seed) - chosen: list[int] = [] - shard_ids = rows["shard_id"].unique() - counts = rows.groupby("shard_id", sort=True).size() - total = int(counts.sum()) - quotas = (counts / total * n_sample_rows).round().astype(int) - diff = int(n_sample_rows - quotas.sum()) - if diff != 0: - idx = int(np.argmax(counts.to_numpy())) - quotas.iloc[idx] = int(quotas.iloc[idx]) + diff - - for sid in shard_ids: - q = int(quotas.loc[sid]) - if q <= 0: - continue - pool = rows.loc[rows["shard_id"] == sid, "row_id"].to_numpy(dtype=np.int64) - if q >= len(pool): - chosen.extend(pool.tolist()) - else: - pick = rng.choice(pool, size=q, replace=False) - chosen.extend(pick.tolist()) - - out = np.array(sorted(set(chosen)), dtype=np.int64) - if len(out) > n_sample_rows: - out = np.sort(rng.choice(out, size=n_sample_rows, replace=False)) - elif len(out) < n_sample_rows: - remaining = np.setdiff1d(rows["row_id"].to_numpy(dtype=np.int64), out, assume_unique=False) - need = n_sample_rows - len(out) - if len(remaining) >= need: - extra = rng.choice(remaining, size=need, replace=False) - out = np.sort(np.concatenate([out, extra])) - return out - - -class RowIndex: - def __init__(self, rows: pd.DataFrame, shards: list[TimeShard], sample_row_ids: np.ndarray) -> None: - self.rows = rows - self.shards = shards - self.sample_row_ids = sample_row_ids.astype(np.int64, copy=False) - self._by_shard = {s.shard_id: s for s in shards} - - @property - def n_rows(self) -> int: - return len(self.rows) - - @property - def n_sample_rows(self) -> int: - return len(self.sample_row_ids) - - def shard_for_id(self, shard_id: str) -> TimeShard | None: - return self._by_shard.get(shard_id) - - def row_slice_for_dates(self, start: str | None, end: str | None) -> RowSlice: - dt = pd.to_datetime(self.rows["datetime"], errors="coerce") - mask = pd.Series(True, index=self.rows.index) - if start is not None: - mask &= dt >= pd.Timestamp(start) - if end is not None: - mask &= dt < pd.Timestamp(end) + pd.Timedelta(days=1) - idx = self.rows.index[mask] - if len(idx) == 0: - return RowSlice(0, 0) - rows_hit = self.rows.loc[idx, "row_id"] - return RowSlice(int(rows_hit.min()), int(rows_hit.max()) + 1) - - def save(self, paths: FactorLibraryPaths) -> None: - paths.index_dir.mkdir(parents=True, exist_ok=True) - self.rows.to_parquet(paths.rows_parquet, index=False) - self.sample_row_ids_df().to_parquet(paths.sample_row_ids, index=False) - shard_payload = [ - { - "shard_id": s.shard_id, - "start_row": s.start_row, - "stop_row": s.stop_row, - "datetime_start": s.datetime_start, - "datetime_end": s.datetime_end, - } - for s in self.shards - ] - paths.shards_json.write_text(json.dumps(shard_payload, indent=2), encoding="utf-8") - - def sample_row_ids_df(self) -> pd.DataFrame: - return pd.DataFrame({"row_id": self.sample_row_ids}) - - @classmethod - def load(cls, paths: FactorLibraryPaths) -> RowIndex: - rows = pd.read_parquet(paths.rows_parquet) - sample_ids = pd.read_parquet(paths.resolve_sample_row_ids())["row_id"].to_numpy( - dtype=np.int64 - ) - raw_shards = json.loads(paths.shards_json.read_text(encoding="utf-8")) - shards = [ - TimeShard( - shard_id=str(s["shard_id"]), - start_row=int(s["start_row"]), - stop_row=int(s["stop_row"]), - datetime_start=str(s["datetime_start"]), - datetime_end=str(s["datetime_end"]), - ) - for s in raw_shards - ] - return cls(rows=rows, shards=shards, sample_row_ids=sample_ids) - - @classmethod - def build_from_panel( - cls, - panel: pd.DataFrame, - *, - n_sample_rows: int = 200_000, - sample_seed: int = 42, - ) -> RowIndex: - frame = _panel_to_index_frame(panel) - rows = build_row_index(frame) - shards = build_time_shards(rows) - sample_ids = build_sample_row_ids(rows, n_sample_rows=n_sample_rows, seed=sample_seed) - return cls(rows=rows, shards=shards, sample_row_ids=sample_ids) - - @classmethod - def build_from_panel_path( - cls, - panel_path: Path, - *, - n_sample_rows: int = 200_000, - sample_seed: int = 42, - ) -> RowIndex: - return cls.build_from_panel( - load_panel(panel_path), - n_sample_rows=n_sample_rows, - sample_seed=sample_seed, - ) - - -def init_library( - root: Path, - *, - panel: pd.DataFrame | None = None, - panel_path: Path | None = None, - dataset: str = DEFAULT_DATASET, - bar_interval: str = DEFAULT_BAR_INTERVAL, - n_sample_rows: int = 200_000, - max_factors: int = 2048, - sample_seed: int = 42, -) -> tuple[FactorLibraryPaths, LibraryManifest, RowIndex]: - """初始化因子库目录、manifest、canonical index。""" - if panel is None and panel_path is None: - raise ValueError("必须提供 panel 或 panel_path") - paths = FactorLibraryPaths(root=Path(root).expanduser().resolve()) - paths.root.mkdir(parents=True, exist_ok=True) - paths.sample_dir.mkdir(parents=True, exist_ok=True) - paths.values_dir.mkdir(parents=True, exist_ok=True) - paths.meta_dir.mkdir(parents=True, exist_ok=True) - - if panel is None: - assert panel_path is not None - resolved_panel_path = Path(panel_path).expanduser().resolve() - index = RowIndex.build_from_panel_path( - resolved_panel_path, - n_sample_rows=n_sample_rows, - sample_seed=sample_seed, - ) - panel_path_str = str(resolved_panel_path) - else: - index = RowIndex.build_from_panel( - panel, - n_sample_rows=n_sample_rows, - sample_seed=sample_seed, - ) - panel_path_str = str(panel_path.resolve()) if panel_path else "" - - index.save(paths) - - manifest = LibraryManifest( - dataset=dataset, - bar_interval=bar_interval, - universe_path=panel_path_str, - n_rows=index.n_rows, - n_sample_rows=index.n_sample_rows, - max_factors=max_factors, - index_hash=index_content_hash(index.rows), - sample_seed=sample_seed, - extra={ - "panel_path": panel_path_str, - "base_interval": bar_interval, - }, - ) - paths.manifest.write_text( - json.dumps(manifest.to_dict(), indent=2, ensure_ascii=False), - encoding="utf-8", - ) - return paths, manifest, index - - -def verify_index_hash(manifest: LibraryManifest, index: RowIndex) -> None: - """校验 manifest 与当前 index 一致。""" - current = index_content_hash(index.rows) - if manifest.index_hash and manifest.index_hash != current: - raise ValueError( - f"index_hash 不匹配: manifest={manifest.index_hash!r} current={current!r}" - ) - - -def verify_index_prefix_stable( - old_rows: pd.DataFrame, - new_rows: pd.DataFrame, - old_n: int, -) -> bool: - """新 index 前 old_n 行是否与旧 index 完全一致(datetime, instrument 序)。""" - if len(new_rows) < old_n or len(old_rows) < old_n: - return False - old_prefix = old_rows.iloc[:old_n][["datetime", "instrument"]].copy() - new_prefix = new_rows.iloc[:old_n][["datetime", "instrument"]].copy() - old_prefix["datetime"] = pd.to_datetime(old_prefix["datetime"], errors="coerce") - new_prefix["datetime"] = pd.to_datetime(new_prefix["datetime"], errors="coerce") - old_prefix["instrument"] = old_prefix["instrument"].astype(str) - new_prefix["instrument"] = new_prefix["instrument"].astype(str) - return old_prefix.reset_index(drop=True).equals(new_prefix.reset_index(drop=True)) - - -def extend_library_index( - lib_root: Path, - *, - panel: pd.DataFrame, - panel_path: Path, -) -> RowIndex: - """panel 尾部追加且前缀稳定时:扩展 index/manifest,保留 sample_row_ids。""" - paths = FactorLibraryPaths(root=Path(lib_root).expanduser().resolve()) - if not paths.manifest.is_file(): - raise FileNotFoundError(f"因子库未初始化: {paths.manifest}") - - old_index = RowIndex.load(paths) - old_n = old_index.n_rows - panel = panel.sort_index() - - frame = _panel_to_index_frame(panel) - new_rows = build_row_index(frame) - if len(new_rows) <= old_n: - raise ValueError(f"panel 行数 {len(new_rows)} 未大于库 n_rows {old_n}") - - if not verify_index_prefix_stable(old_index.rows, new_rows, old_n): - raise ValueError("index 前缀不稳定,无法增量扩展") - - shards = build_time_shards(new_rows) - new_index = RowIndex( - rows=new_rows, - shards=shards, - sample_row_ids=old_index.sample_row_ids.copy(), - ) - new_index.save(paths) - - manifest_data = json.loads(paths.manifest.read_text(encoding="utf-8")) - panel_path_str = str(Path(panel_path).expanduser().resolve()) - manifest_data["n_rows"] = new_index.n_rows - manifest_data["index_hash"] = index_content_hash(new_rows) - manifest_data["panel_path"] = panel_path_str - manifest_data["universe_path"] = panel_path_str - paths.manifest.write_text( - json.dumps(manifest_data, indent=2, ensure_ascii=False), - encoding="utf-8", - ) - return new_index diff --git a/seekalpha/factor/zoo/realign.py b/seekalpha/factor/zoo/realign.py deleted file mode 100644 index 32602e87..00000000 --- a/seekalpha/factor/zoo/realign.py +++ /dev/null @@ -1,764 +0,0 @@ -"""panel 行数变化时重建 factorlib index 并重算库内因子值。""" - -from __future__ import annotations - -from pathlib import Path -from typing import Any - -import numpy as np -import pandas as pd - -from seekalpha.dsl.eval import collect_aux_intervals_from_expr -from seekalpha.factor.ingest import ( - compute_ingest_metrics, - mask_values_before_start, - materialize_slice_to_canonical, - prepare_stored_values, -) -from seekalpha.factor.types import DEFAULT_INGEST_POLICY, IngestPolicy -from seekalpha.factor.zoo import FactorZoo, init_library -from seekalpha.factor.zoo.index import ( - RowIndex, - build_row_index, - build_time_shards, - extend_library_index, - verify_index_prefix_stable, -) -from seekalpha.factor.zoo.similarity import SimilarityMatrix -from seekalpha.factor.zoo.types import FactorMeta, LibraryManifest - -AUX_WARMUP_CALENDAR_DAYS = 35 -DEFAULT_WARMUP_DAYS = 240 -DEFAULT_WARMUP_RETRY_DAYS = 480 -DEFAULT_OVERLAP_VERIFY_DAYS = 20 - - -def _resolve_panel_path(path: Path) -> Path: - return Path(path).expanduser().resolve() - - -def panel_paths_match(a: Path, b: Path) -> bool: - return _resolve_panel_path(a) == _resolve_panel_path(b) - - -def _reset_similarity_matrix(zoo: FactorZoo) -> None: - sim = SimilarityMatrix(zoo.paths, zoo.manifest.max_factors) - if sim.matrix_path.is_file(): - sim.matrix_path.unlink() - if sim.meta_path.is_file(): - sim.meta_path.unlink() - - -def _rebuild_similarity(zoo: FactorZoo) -> None: - _reset_similarity_matrix(zoo) - if zoo.n_factors == 0: - return - sim = SimilarityMatrix(zoo.paths, zoo.manifest.max_factors) - for fid in zoo.catalog.list_factor_ids(): - meta = zoo.catalog.get(fid) - if meta is None: - continue - values = zoo.read_factor(fid) - sim.append_factor_correlations( - zoo, - factor_id=fid, - col_idx=meta.col_idx, - values=values, - ) - - -def _build_extended_index(zoo: FactorZoo, panel: pd.DataFrame) -> RowIndex: - from seekalpha.factor.zoo.index import RowIndex, _panel_to_index_frame - - frame = _panel_to_index_frame(panel) - new_rows = build_row_index(frame) - if not verify_index_prefix_stable(zoo.index.rows, new_rows, zoo.manifest.n_rows): - raise ValueError("index 前缀不稳定,无法增量扩展") - shards = build_time_shards(new_rows) - return RowIndex( - rows=new_rows, - shards=shards, - sample_row_ids=zoo.index.sample_row_ids.copy(), - ) - - -def _zoo_for_eval(zoo: FactorZoo, extended_index: RowIndex) -> FactorZoo: - manifest = LibraryManifest( - dataset=zoo.manifest.dataset, - bar_interval=zoo.manifest.bar_interval, - universe_path=zoo.manifest.universe_path, - n_rows=extended_index.n_rows, - n_sample_rows=zoo.manifest.n_sample_rows, - max_factors=zoo.manifest.max_factors, - dtype=zoo.manifest.dtype, - index_hash=zoo.manifest.index_hash, - sample_seed=zoo.manifest.sample_seed, - version=zoo.manifest.version, - extra=dict(zoo.manifest.extra), - ) - return FactorZoo(zoo.paths, manifest, extended_index, zoo.catalog) - - -def _candidate_rows_from_panel(panel: pd.DataFrame) -> pd.DataFrame: - frame = panel.index.to_frame(index=False) - frame.columns = ["datetime", "instrument"] - return build_row_index( - pd.DataFrame( - { - "datetime": pd.to_datetime(frame["datetime"]), - "instrument": frame["instrument"].astype(str), - } - ) - ) - - -def _apply_stored_clip(values: np.ndarray, extra: dict[str, Any]) -> np.ndarray: - p01 = extra.get("clip_p01") - p99 = extra.get("clip_p99") - if p01 is None or p99 is None: - return np.asarray(values, dtype=np.float32) - out = np.asarray(values, dtype=np.float32).copy() - finite = np.isfinite(out) - if not finite.any(): - return out - out[finite] = np.clip(out[finite], float(p01), float(p99)).astype(np.float32, copy=False) - return out - - -def eval_window_dates( - index_rows: pd.DataFrame, - *, - old_n: int, - warmup_days: int, - expr: str, -) -> tuple[str, str, pd.Timestamp, pd.Timestamp]: - """返回 (slice_start_str, slice_end_str, warmup_start_ts, update_start_ts)。""" - if old_n >= len(index_rows): - raise ValueError(f"old_n={old_n} >= index 行数 {len(index_rows)}") - - dt_col = pd.to_datetime(index_rows["datetime"], errors="coerce") - update_start = pd.Timestamp(dt_col.iloc[old_n]) - slice_end = pd.Timestamp(dt_col.max()) - - trade_days = pd.Series(dt_col.unique()).sort_values().reset_index(drop=True) - pos = int(trade_days.searchsorted(update_start)) - warmup_idx = max(0, pos - int(warmup_days)) - warmup_start = pd.Timestamp(trade_days.iloc[warmup_idx]) - - if collect_aux_intervals_from_expr(expr): - warmup_start = warmup_start - pd.Timedelta(days=AUX_WARMUP_CALENDAR_DAYS) - - return ( - warmup_start.strftime("%Y-%m-%d"), - slice_end.strftime("%Y-%m-%d"), - warmup_start, - update_start, - ) - - -def overlap_row_ids( - index_rows: pd.DataFrame, - *, - old_n: int, - update_start: pd.Timestamp, - overlap_verify_days: int = DEFAULT_OVERLAP_VERIFY_DAYS, -) -> tuple[np.ndarray, pd.Timestamp]: - """update 前最后 overlap_verify_days 个交易日的旧库 row_id(不含 update_start 当日)。""" - dt = pd.to_datetime(index_rows["datetime"], errors="coerce") - trade_days = pd.Series(dt.unique()).sort_values().reset_index(drop=True) - pos = int(trade_days.searchsorted(update_start)) - start_idx = max(0, pos - int(overlap_verify_days)) - verify_start = pd.Timestamp(trade_days.iloc[start_idx]) if pos > 0 else update_start - - mask = ( - (index_rows["row_id"].to_numpy(dtype=np.int64) < old_n) - & (dt >= verify_start) - & (dt < update_start) - ) - return index_rows.loc[mask, "row_id"].to_numpy(dtype=np.int64), verify_start - - -def verify_overlap_exact( - stored: np.ndarray, - computed: np.ndarray, - overlap_ids: np.ndarray, - *, - index_rows: pd.DataFrame, - max_samples: int = 5, -) -> tuple[bool, dict[str, Any]]: - if len(overlap_ids) == 0: - return True, {"n_overlap": 0, "n_mismatch": 0, "samples": []} - - stored_v = np.asarray(stored, dtype=np.float32) - computed_v = np.asarray(computed, dtype=np.float32) - mismatches: list[dict[str, Any]] = [] - n_mismatch = 0 - - for rid in overlap_ids: - rid_int = int(rid) - s = stored_v[rid_int] - c = computed_v[rid_int] - s_fin = np.isfinite(s) - c_fin = np.isfinite(c) - ok = (s_fin and c_fin and s == c) or (not s_fin and not c_fin) - if not ok: - n_mismatch += 1 - if len(mismatches) < max_samples: - row = index_rows.loc[index_rows["row_id"] == rid_int].iloc[0] - mismatches.append( - { - "row_id": rid_int, - "datetime": str(row["datetime"]), - "instrument": str(row["instrument"]), - "stored": float(s) if s_fin else None, - "computed": float(c) if c_fin else None, - } - ) - - return n_mismatch == 0, { - "n_overlap": int(len(overlap_ids)), - "n_mismatch": n_mismatch, - "samples": mismatches, - } - - -def _rematerialize_factor( - zoo: FactorZoo, - meta: FactorMeta, - panel: pd.DataFrame, - policy: IngestPolicy, -) -> None: - values_path = zoo.paths.factor_values_path(meta.factor_id) - if values_path.is_file(): - values_path.unlink() - stored_values, expr, aux_tags, clip_extra = prepare_stored_values(meta.expr, panel, zoo, policy) - metrics = compute_ingest_metrics(stored_values, panel, policy) - extra = dict(meta.extra or {}) - extra.update({**clip_extra, "aux_tags": aux_tags, "metrics": metrics}) - zoo.overwrite_factor( - factor_id=meta.factor_id, - name=meta.name, - expr=expr, - values=stored_values, - status=meta.status, - extra=extra, - ) - - -def _prepare_computed_values( - meta: FactorMeta, - panel: pd.DataFrame, - zoo: FactorZoo, - policy: IngestPolicy, - *, - old_n: int, - warmup_days: int, - overlap_verify_days: int = DEFAULT_OVERLAP_VERIFY_DAYS, -) -> tuple[np.ndarray, dict[str, Any]]: - slice_start, slice_end, warmup_start, update_start = eval_window_dates( - zoo.index.rows, - old_n=old_n, - warmup_days=warmup_days, - expr=meta.expr, - ) - mat = materialize_slice_to_canonical( - meta.expr, - panel, - zoo, - start=slice_start, - end=slice_end, - ) - values = _apply_stored_clip(mat.values, meta.extra or {}) - values = mask_values_before_start(values, zoo, policy.mask_before_start) - overlap_ids, verify_start = overlap_row_ids( - zoo.index.rows, - old_n=old_n, - update_start=update_start, - overlap_verify_days=overlap_verify_days, - ) - return values, { - "slice_start": slice_start, - "slice_end": slice_end, - "warmup_start": str(warmup_start), - "update_start": str(update_start), - "warmup_days": warmup_days, - "overlap_verify_days": overlap_verify_days, - "overlap_verify_start": str(verify_start), - "n_overlap": int(len(overlap_ids)), - "_overlap_ids": overlap_ids, - } - - -def remask_factorlib( - lib_root: Path, - *, - panel: pd.DataFrame, - policy: IngestPolicy | None = None, - dry_run: bool = False, -) -> dict[str, Any]: - """对库内全部因子按 policy 重新物化、mask 并重算 extra.metrics。""" - pol = policy or DEFAULT_INGEST_POLICY - lib_root = Path(lib_root).expanduser().resolve() - panel = panel.sort_index() - zoo = FactorZoo.open(lib_root) - remasked: list[str] = [] - for fid in zoo.catalog.list_factor_ids(): - meta = zoo.catalog.get(fid) - if meta is None: - continue - remasked.append(fid) - if dry_run: - continue - _rematerialize_factor(zoo, meta, panel, pol) - if not dry_run and remasked: - _rebuild_similarity(zoo) - return { - "remasked_factors": remasked, - "n_factors": len(remasked), - "dry_run": dry_run, - "ingest_policy": pol.to_dict(), - } - - -def realign_factorlib_to_panel( - lib_root: Path, - *, - panel: pd.DataFrame, - panel_path: Path, - policy: IngestPolicy | None = None, -) -> dict[str, Any]: - """同一 panel 路径下行数变化:重建 manifest/index,并重算全部已有因子 memmap。""" - pol = policy or DEFAULT_INGEST_POLICY - lib_root = Path(lib_root).expanduser().resolve() - panel_path = _resolve_panel_path(panel_path) - panel = panel.sort_index() - - zoo = FactorZoo.open(lib_root, verify_hash=False) - old_n_rows = zoo.manifest.n_rows - if len(panel) == old_n_rows: - return {"realigned": False, "mode": "noop", "n_rows": old_n_rows} - - saved: list[FactorMeta] = [] - for fid in zoo.catalog.list_factor_ids(): - meta = zoo.catalog.get(fid) - if meta is not None: - saved.append(meta) - - init_library( - lib_root, - panel=panel, - panel_path=panel_path, - n_sample_rows=min(zoo.manifest.n_sample_rows, len(panel)), - max_factors=zoo.manifest.max_factors, - sample_seed=zoo.manifest.sample_seed, - ) - - zoo = FactorZoo.open(lib_root, verify_hash=True) - _reset_similarity_matrix(zoo) - - rematerialized: list[str] = [] - for meta in sorted(saved, key=lambda m: m.col_idx): - _rematerialize_factor(zoo, meta, panel, pol) - rematerialized.append(meta.factor_id) - - _rebuild_similarity(zoo) - - return { - "realigned": True, - "mode": "full", - "old_n_rows": old_n_rows, - "new_n_rows": len(panel), - "panel_path": str(panel_path), - "rematerialized_factors": rematerialized, - "n_factors": len(rematerialized), - "ingest_policy": pol.to_dict(), - } - - -def incremental_realign_factorlib_to_panel( - lib_root: Path, - *, - panel: pd.DataFrame, - panel_path: Path, - policy: IngestPolicy | None = None, - warmup_days: int = DEFAULT_WARMUP_DAYS, - warmup_retry_days: int = DEFAULT_WARMUP_RETRY_DAYS, - overlap_verify_days: int = DEFAULT_OVERLAP_VERIFY_DAYS, - dry_run: bool = False, -) -> dict[str, Any]: - """panel 尾部追加:T+N 窗口重算 + overlap 校验;失败则扩窗或全量 fallback。""" - pol = policy or DEFAULT_INGEST_POLICY - lib_root = Path(lib_root).expanduser().resolve() - panel_path = _resolve_panel_path(panel_path) - panel = panel.sort_index() - - zoo = FactorZoo.open(lib_root, verify_hash=False) - old_n = zoo.manifest.n_rows - new_n = len(panel) - - if new_n == old_n: - return { - "realigned": False, - "mode": "noop", - "n_rows": old_n, - "dry_run": dry_run, - } - - if not panel_paths_match(panel_path, Path(zoo.manifest.panel_path)): - return { - **realign_factorlib_to_panel( - lib_root, panel=panel, panel_path=panel_path, policy=pol - ), - "mode": "full", - "fallback_reason": "panel_path_mismatch", - } - - candidate_rows = _candidate_rows_from_panel(panel) - if not verify_index_prefix_stable(zoo.index.rows, candidate_rows, old_n): - return { - **realign_factorlib_to_panel( - lib_root, panel=panel, panel_path=panel_path, policy=pol - ), - "mode": "full", - "fallback_reason": "index_prefix_unstable", - } - - saved: list[FactorMeta] = [] - for fid in zoo.catalog.list_factor_ids(): - meta = zoo.catalog.get(fid) - if meta is not None: - saved.append(meta) - - old_values_map: dict[str, np.ndarray] = {} - for meta in saved: - old_values_map[meta.factor_id] = zoo.read_factor(meta.factor_id) - - extended_index = _build_extended_index(zoo, panel) - eval_zoo = _zoo_for_eval(zoo, extended_index) - - if not dry_run: - extend_library_index(lib_root, panel=panel, panel_path=panel_path) - zoo = FactorZoo.open(lib_root, verify_hash=True) - _reset_similarity_matrix(zoo) - eval_zoo = zoo - - factor_reports: dict[str, Any] = {} - incremental_factors: list[str] = [] - fallback_factors: list[str] = [] - - for meta in sorted(saved, key=lambda m: m.col_idx): - fid = meta.factor_id - stored_old = old_values_map[fid] - ok = False - used_warmup = warmup_days - last_report: dict[str, Any] = {} - - for attempt_warmup in (warmup_days, warmup_retry_days): - computed, win_info = _prepare_computed_values( - meta, - panel, - eval_zoo, - pol, - old_n=old_n, - warmup_days=attempt_warmup, - overlap_verify_days=overlap_verify_days, - ) - overlap_ids = win_info.pop("_overlap_ids") - passed, overlap_report = verify_overlap_exact( - stored_old, - computed, - overlap_ids, - index_rows=eval_zoo.index.rows, - ) - last_report = {**win_info, **overlap_report, "passed": passed} - if passed: - ok = True - used_warmup = attempt_warmup - break - - if ok: - factor_reports[fid] = { - "strategy": "incremental", - "warmup_days": used_warmup, - **last_report, - } - incremental_factors.append(fid) - if not dry_run: - tail = computed[old_n:new_n] - metrics = compute_ingest_metrics( - np.concatenate([stored_old, tail]), - panel, - pol, - ) - extra = dict(meta.extra or {}) - extra["metrics"] = metrics - extra["incremental_realign"] = { - "warmup_days": used_warmup, - "slice_start": last_report.get("slice_start"), - "slice_end": last_report.get("slice_end"), - } - zoo.extend_factor_values( - fid, - tail, - old_n=old_n, - extra=extra, - ) - continue - - factor_reports[fid] = { - "strategy": "full_fallback", - "warmup_days_tried": [warmup_days, warmup_retry_days], - **last_report, - } - fallback_factors.append(fid) - if not dry_run: - _rematerialize_factor(zoo, meta, panel, pol) - - if not dry_run and (incremental_factors or fallback_factors): - _rebuild_similarity(zoo) - - return { - "realigned": True, - "mode": "incremental" if not fallback_factors else "incremental_with_fallback", - "dry_run": dry_run, - "old_n_rows": old_n, - "new_n_rows": new_n, - "panel_path": str(panel_path), - "n_factors": len(saved), - "incremental_factors": incremental_factors, - "fallback_factors": fallback_factors, - "factor_reports": factor_reports, - "overlap_verify_days": overlap_verify_days, - "ingest_policy": pol.to_dict(), - } - - -def list_append_boundary_old_n( - index_rows: pd.DataFrame, - *, - append_trade_days: list[int] | None = None, -) -> list[dict[str, Any]]: - """按「尾部追加 K 个交易日」生成滚动测试点(old_n = 该日首行 row_id)。""" - append_trade_days = append_trade_days or [1, 2, 3, 5, 10, 20] - dt = pd.to_datetime(index_rows["datetime"], errors="coerce") - trade_days = pd.Series(dt.unique()).sort_values().tolist() - n_rows = len(index_rows) - points: list[dict[str, Any]] = [] - - for k in append_trade_days: - if k <= 0 or k >= len(trade_days): - continue - update_start = pd.Timestamp(trade_days[-k]) - hit = index_rows.loc[dt >= update_start, "row_id"] - if hit.empty: - continue - old_n = int(hit.min()) - if old_n <= 0 or old_n >= n_rows: - continue - points.append( - { - "old_n": old_n, - "new_n": n_rows, - "append_rows": n_rows - old_n, - "append_trade_days": k, - "update_start": update_start.strftime("%Y-%m-%d"), - } - ) - return points - - -def _probe_factors_at_old_n( - zoo: FactorZoo, - panel: pd.DataFrame, - saved: list[FactorMeta], - old_values_map: dict[str, np.ndarray], - *, - old_n: int, - pol: IngestPolicy, - warmup_days: int, - warmup_retry_days: int, - overlap_verify_days: int, -) -> tuple[list[str], list[str], dict[str, Any]]: - incremental_factors: list[str] = [] - fallback_factors: list[str] = [] - factor_reports: dict[str, Any] = {} - - for meta in sorted(saved, key=lambda m: m.col_idx): - fid = meta.factor_id - stored_old = old_values_map[fid] - if len(stored_old) < old_n: - factor_reports[fid] = {"strategy": "skip", "reason": "stored_shorter_than_old_n"} - continue - - ok = False - used_warmup = warmup_days - last_report: dict[str, Any] = {} - - for attempt_warmup in (warmup_days, warmup_retry_days): - computed, win_info = _prepare_computed_values( - meta, - panel, - zoo, - pol, - old_n=old_n, - warmup_days=attempt_warmup, - overlap_verify_days=overlap_verify_days, - ) - overlap_ids = win_info.pop("_overlap_ids") - passed, overlap_report = verify_overlap_exact( - stored_old[:old_n], - computed, - overlap_ids, - index_rows=zoo.index.rows, - ) - last_report = {**win_info, **overlap_report, "passed": passed} - if passed: - ok = True - used_warmup = attempt_warmup - break - - if ok: - factor_reports[fid] = { - "strategy": "incremental", - "warmup_days": used_warmup, - **last_report, - } - incremental_factors.append(fid) - else: - factor_reports[fid] = { - "strategy": "would_fallback", - "warmup_days_tried": [warmup_days, warmup_retry_days], - **last_report, - } - fallback_factors.append(fid) - - return incremental_factors, fallback_factors, factor_reports - - -def probe_incremental_realign_at_old_n( - zoo: FactorZoo, - panel: pd.DataFrame, - *, - old_n: int, - policy: IngestPolicy | None = None, - warmup_days: int = DEFAULT_WARMUP_DAYS, - warmup_retry_days: int = DEFAULT_WARMUP_RETRY_DAYS, - overlap_verify_days: int = DEFAULT_OVERLAP_VERIFY_DAYS, -) -> dict[str, Any]: - """模拟从 old_n 增至 len(panel) 的增量 realign(只读,不写盘)。""" - pol = policy or DEFAULT_INGEST_POLICY - panel = panel.sort_index() - new_n = len(panel) - if old_n <= 0 or old_n >= new_n: - raise ValueError(f"old_n 须满足 0 < old_n < new_n,得到 old_n={old_n} new_n={new_n}") - - if new_n != zoo.manifest.n_rows: - raise ValueError( - f"probe 要求 len(panel)={new_n} == manifest.n_rows={zoo.manifest.n_rows}" - ) - - saved: list[FactorMeta] = [] - for fid in zoo.catalog.list_factor_ids(): - meta = zoo.catalog.get(fid) - if meta is not None: - saved.append(meta) - - old_values_map = {m.factor_id: zoo.read_factor(m.factor_id) for m in saved} - inc, fb, reports = _probe_factors_at_old_n( - zoo, - panel, - saved, - old_values_map, - old_n=old_n, - pol=pol, - warmup_days=warmup_days, - warmup_retry_days=warmup_retry_days, - overlap_verify_days=overlap_verify_days, - ) - - dt_col = pd.to_datetime(zoo.index.rows["datetime"], errors="coerce") - update_start = str(dt_col.iloc[old_n]) - - return { - "simulated": True, - "old_n_rows": old_n, - "new_n_rows": new_n, - "append_rows": new_n - old_n, - "update_start": update_start, - "n_factors": len(saved), - "incremental_factors": inc, - "fallback_factors": fb, - "factor_reports": reports, - "overlap_verify_days": overlap_verify_days, - "warmup_days": warmup_days, - "warmup_retry_days": warmup_retry_days, - } - - -def rolling_probe_incremental_realign( - lib_root: Path, - *, - panel: pd.DataFrame, - append_trade_days: list[int] | None = None, - policy: IngestPolicy | None = None, - warmup_days: int = DEFAULT_WARMUP_DAYS, - warmup_retry_days: int = DEFAULT_WARMUP_RETRY_DAYS, - overlap_verify_days: int = DEFAULT_OVERLAP_VERIFY_DAYS, - factor_ids: list[str] | None = None, -) -> dict[str, Any]: - """在多个 append 边界上滚动 probe(只读)。""" - pol = policy or DEFAULT_INGEST_POLICY - panel = panel.sort_index() - zoo = FactorZoo.open(lib_root, verify_hash=False) - - if len(panel) != zoo.manifest.n_rows: - raise ValueError( - f"panel 行数 {len(panel)} != manifest.n_rows {zoo.manifest.n_rows};" - "滚动 probe 需在 panel 与库已对齐后进行" - ) - - if factor_ids: - keep = set(factor_ids) - saved_ids = [fid for fid in zoo.catalog.list_factor_ids() if fid in keep] - if not saved_ids: - raise ValueError(f"库中无指定因子: {factor_ids}") - else: - saved_ids = None - - points = list_append_boundary_old_n(zoo.index.rows, append_trade_days=append_trade_days) - windows: list[dict[str, Any]] = [] - - for pt in points: - result = probe_incremental_realign_at_old_n( - zoo, - panel, - old_n=int(pt["old_n"]), - policy=pol, - warmup_days=warmup_days, - warmup_retry_days=warmup_retry_days, - overlap_verify_days=overlap_verify_days, - ) - if saved_ids is not None: - result["incremental_factors"] = [ - f for f in result["incremental_factors"] if f in saved_ids - ] - result["fallback_factors"] = [ - f for f in result["fallback_factors"] if f in saved_ids - ] - result["factor_reports"] = { - k: v for k, v in result["factor_reports"].items() if k in saved_ids - } - result["n_factors"] = len(result["factor_reports"]) - - windows.append({**pt, **result}) - - return { - "lib": str(Path(lib_root).resolve()), - "n_rows": zoo.manifest.n_rows, - "n_factors": zoo.n_factors, - "append_trade_days": append_trade_days or [1, 2, 3, 5, 10, 20], - "overlap_verify_days": overlap_verify_days, - "warmup_days": warmup_days, - "warmup_retry_days": warmup_retry_days, - "windows": windows, - } diff --git a/seekalpha/factor/zoo/similarity.py b/seekalpha/factor/zoo/similarity.py deleted file mode 100644 index c42e5d9e..00000000 --- a/seekalpha/factor/zoo/similarity.py +++ /dev/null @@ -1,274 +0,0 @@ -"""因子两两截面 Pearson 相似度(逐日横截面相关均值)。""" - -from __future__ import annotations - -import json -from typing import Any - -import numpy as np -import pandas as pd - -from seekalpha.factor.zoo.index import RowIndex -from seekalpha.factor.zoo.types import FactorLibraryPaths -from seekalpha.factor.zoo.zoo import FactorZoo - -SIMILARITY_KIND = "cross_sectional_pearson_mean" - - -def _pearson_ic(a: np.ndarray, b: np.ndarray, *, min_pairs: int = 2) -> float: - x = np.asarray(a, dtype=np.float64) - y = np.asarray(b, dtype=np.float64) - mask = np.isfinite(x) & np.isfinite(y) - if int(mask.sum()) < min_pairs: - return float("nan") - xs = x[mask] - x[mask].mean() - ys = y[mask] - y[mask].mean() - denom = float(np.sqrt((xs * xs).sum() * (ys * ys).sum())) - if denom <= 0.0: - return float("nan") - return float((xs * ys).sum() / denom) - - -def panel_index_from_rows(rows: pd.DataFrame) -> pd.MultiIndex: - dt = pd.to_datetime(rows["datetime"], errors="coerce") - inst = rows["instrument"].astype(str) - return pd.MultiIndex.from_arrays([dt, inst], names=["datetime", "instrument"]) - - -def cross_sectional_pearson_series( - a: np.ndarray, - b: np.ndarray, - row_index: RowIndex, - *, - min_pairs: int = 10, -) -> pd.Series: - """逐日横截面 Pearson 相关,返回逐日序列。""" - if len(a) != len(b): - raise ValueError(f"因子长度不一致: {len(a)} vs {len(b)}") - index = panel_index_from_rows(row_index.rows) - fa = pd.Series(np.asarray(a, dtype=np.float32), index=index) - fb = pd.Series(np.asarray(b, dtype=np.float32), index=index) - - rows: list[float] = [] - idx: list[object] = [] - for ts, f_sub in fa.groupby(level="datetime", sort=False): - b_sub = fb.xs(ts, level="datetime") - ic = _pearson_ic( - f_sub.to_numpy(dtype=np.float64, copy=False), - b_sub.to_numpy(dtype=np.float64, copy=False), - min_pairs=min_pairs, - ) - rows.append(ic) - idx.append(ts) - return pd.Series(rows, index=pd.Index(idx, name="datetime"), dtype=float) - - -def cross_sectional_pearson_mean( - a: np.ndarray, - b: np.ndarray, - row_index: RowIndex, - *, - min_pairs: int = 10, -) -> float: - """逐日横截面 Pearson 相关的均值(截面相似度)。""" - daily = cross_sectional_pearson_series(a, b, row_index, min_pairs=min_pairs) - finite = daily[np.isfinite(daily.to_numpy(dtype=float, copy=False))] - if len(finite) == 0: - return float("nan") - return float(finite.mean()) - - -class SimilarityMatrix: - def __init__(self, paths: FactorLibraryPaths, max_factors: int) -> None: - self.paths = paths - self.max_factors = max_factors - self.matrix_path = paths.similarity_dir / "pearson.f32.memmap" - self.meta_path = paths.similarity_dir / "pearson.meta.json" - - def _ensure(self, n_factors: int) -> np.memmap: - self.matrix_path.parent.mkdir(parents=True, exist_ok=True) - if not self.matrix_path.is_file(): - arr = np.memmap( - self.matrix_path, - dtype=np.float32, - mode="w+", - shape=(self.max_factors, self.max_factors), - ) - arr[:] = np.nan - if n_factors > 0: - np.fill_diagonal(arr[:n_factors, :n_factors], 1.0) - arr.flush() - self._write_meta(n_factors) - return arr - return np.memmap( - self.matrix_path, - dtype=np.float32, - mode="r+", - shape=(self.max_factors, self.max_factors), - ) - - def _write_meta(self, n_factors: int) -> None: - meta = { - "n_factors": n_factors, - "max_factors": self.max_factors, - "kind": SIMILARITY_KIND, - } - self.meta_path.write_text(json.dumps(meta, indent=2), encoding="utf-8") - - def load_meta(self) -> dict: - if not self.meta_path.is_file(): - return {"n_factors": 0, "max_factors": self.max_factors, "kind": SIMILARITY_KIND} - meta = json.loads(self.meta_path.read_text(encoding="utf-8")) - meta.setdefault("kind", SIMILARITY_KIND) - return meta - - def get_matrix(self, n_factors: int) -> np.ndarray: - if not self.matrix_path.is_file(): - return np.zeros((0, 0), dtype=np.float32) - mmap = np.memmap( - self.matrix_path, - dtype=np.float32, - mode="r", - shape=(self.max_factors, self.max_factors), - ) - return np.array(mmap[:n_factors, :n_factors], copy=True) - - def max_cross_sectional_correlation( - self, - zoo: FactorZoo, - candidate_values: np.ndarray, - *, - exclude_factor_id: str | None = None, - min_pairs: int = 10, - ) -> float: - """候选因子与库内因子的最大截面 |corr|。""" - report = self.cross_sectional_neighbor_report( - zoo, - candidate_values, - exclude_factor_id=exclude_factor_id, - min_pairs=min_pairs, - top_k=0, - ) - return float(report["max_abs_corr"]) - - def cross_sectional_neighbor_report( - self, - zoo: FactorZoo, - candidate_values: np.ndarray, - *, - exclude_factor_id: str | None = None, - min_pairs: int = 10, - top_k: int = 3, - ) -> dict[str, Any]: - """候选因子与库内因子的截面相关报告:max |corr| 与 top_k 最相似因子(含 expr)。""" - order = zoo.catalog.list_factor_ids() - if exclude_factor_id is not None: - order = [fid for fid in order if fid != exclude_factor_id] - - corrs: list[tuple[str, float]] = [] - for fid in order: - other = zoo.read_factor(fid) - c = cross_sectional_pearson_mean( - candidate_values, other, zoo.index, min_pairs=min_pairs - ) - if np.isfinite(c): - corrs.append((fid, float(c))) - - corrs.sort(key=lambda x: abs(x[1]), reverse=True) - max_abs = max((abs(c) for _, c in corrs), default=0.0) - top_slice = corrs[:top_k] if top_k > 0 else [] - - enriched: list[dict[str, Any]] = [] - for fid, c in top_slice: - meta = zoo.catalog.get(fid) - enriched.append( - { - "factor_id": fid, - "name": meta.name if meta is not None else fid, - "cs_corr": c, - "expr": meta.expr if meta is not None else None, - } - ) - - return { - "kind": SIMILARITY_KIND, - "max_abs_corr": max_abs, - "top_neighbors": enriched, - } - - @staticmethod - def _max_abs_corr_from_neighbors(neighbors: list[dict[str, Any]]) -> float: - vals = [abs(float(nb["cs_corr"])) for nb in neighbors if nb.get("cs_corr") is not None] - return max(vals, default=0.0) - - @staticmethod - def _enrich_neighbors(zoo: FactorZoo, pairs: list[tuple[str, float]]) -> list[dict[str, Any]]: - enriched: list[dict[str, Any]] = [] - for fid, c in pairs: - meta = zoo.catalog.get(fid) - enriched.append( - { - "factor_id": fid, - "name": meta.name if meta is not None else fid, - "cs_corr": c, - "expr": meta.expr if meta is not None else None, - } - ) - return enriched - - def append_factor_correlations( - self, - zoo: FactorZoo, - *, - factor_id: str, - col_idx: int, - values: np.ndarray, - min_pairs: int = 10, - top_k: int = 3, - ) -> dict: - meta = zoo._load_sample_summary_meta() - col_map: dict[str, int] = { - str(k): int(v) for k, v in meta.get("factor_id_to_col", {}).items() - } - mat = self._ensure(max(col_idx + 1, int(meta.get("next_col_idx", col_idx + 1)))) - - existing_order = [fid for fid in zoo.catalog.list_factor_ids() if str(fid) != str(factor_id)] - corrs: list[tuple[str, float]] = [] - for fid in existing_order: - other = zoo.read_factor(fid) - c = cross_sectional_pearson_mean(values, other, zoo.index, min_pairs=min_pairs) - j = col_map.get(fid) - if j is not None and np.isfinite(c): - mat[col_idx, j] = c - mat[j, col_idx] = c - if np.isfinite(c): - corrs.append((fid, float(c))) - mat[col_idx, col_idx] = 1.0 - mat.flush() - - n_active = zoo.n_factors - self._write_meta(n_active) - - corrs.sort(key=lambda x: abs(x[1]), reverse=True) - top_neighbors = self._enrich_neighbors(zoo, corrs[:top_k]) - return { - "col_idx": col_idx, - "n_factors": n_active, - "kind": SIMILARITY_KIND, - "max_abs_corr": self._max_abs_corr_from_neighbors(top_neighbors), - "top_neighbors": top_neighbors, - } - - def remove_factor(self, col_idx: int, *, n_active: int) -> None: - if not self.matrix_path.is_file(): - return - mat = np.memmap( - self.matrix_path, - dtype=np.float32, - mode="r+", - shape=(self.max_factors, self.max_factors), - ) - mat[col_idx, :] = np.nan - mat[:, col_idx] = np.nan - mat.flush() - self._write_meta(max(n_active, 0)) diff --git a/seekalpha/factor/zoo/types.py b/seekalpha/factor/zoo/types.py deleted file mode 100644 index bb49db5c..00000000 --- a/seekalpha/factor/zoo/types.py +++ /dev/null @@ -1,188 +0,0 @@ -"""因子库核心类型(股票日频 stock_1d)。""" - -from __future__ import annotations - -from dataclasses import dataclass, field -from enum import Enum -from pathlib import Path -from typing import Any - - -DEFAULT_DATASET = "stock" -DEFAULT_BAR_INTERVAL = "1d" - - -class FactorStatus(str, Enum): - partial = "partial" - full = "full" - materializing = "materializing" - - -@dataclass(frozen=True) -class RowSlice: - start: int - stop: int # exclusive - - def __post_init__(self) -> None: - if self.start < 0 or self.stop < self.start: - raise ValueError(f"非法 RowSlice: {self.start}:{self.stop}") - - -@dataclass(frozen=True) -class TimeShard: - shard_id: str - start_row: int - stop_row: int - datetime_start: str - datetime_end: str - - -@dataclass -class LibraryManifest: - dataset: str - bar_interval: str - universe_path: str - n_rows: int - n_sample_rows: int - max_factors: int - dtype: str = "float32" - index_hash: str = "" - sample_seed: int = 42 - version: int = 1 - extra: dict[str, Any] = field(default_factory=dict) - - def to_dict(self) -> dict[str, Any]: - return { - "dataset": self.dataset, - "bar_interval": self.bar_interval, - "universe_path": str(self.universe_path), - "n_rows": self.n_rows, - "n_sample_rows": self.n_sample_rows, - "max_factors": self.max_factors, - "dtype": self.dtype, - "index_hash": self.index_hash, - "sample_seed": self.sample_seed, - "version": self.version, - **self.extra, - } - - @classmethod - def from_dict(cls, data: dict[str, Any]) -> LibraryManifest: - known = { - "dataset", - "bar_interval", - "universe_path", - "panel_path", - "n_rows", - "n_sample_rows", - "n_sketch", - "max_factors", - "dtype", - "index_hash", - "sample_seed", - "sketch_seed", - "version", - "base_interval", - } - extra = {k: v for k, v in data.items() if k not in known} - panel_path = data.get("panel_path", data.get("universe_path", "")) - return cls( - dataset=str(data.get("dataset", DEFAULT_DATASET)), - bar_interval=str(data.get("bar_interval", DEFAULT_BAR_INTERVAL)), - universe_path=str(panel_path), - n_rows=int(data["n_rows"]), - n_sample_rows=int( - data["n_sample_rows"] if "n_sample_rows" in data else data["n_sketch"] - ), - max_factors=int(data["max_factors"]), - dtype=str(data.get("dtype", "float32")), - index_hash=str(data.get("index_hash", "")), - sample_seed=int(data.get("sample_seed", data.get("sketch_seed", 42))), - version=int(data.get("version", 1)), - extra=extra, - ) - - @property - def panel_path(self) -> str: - return str(self.extra.get("panel_path", self.universe_path)) - - -@dataclass(frozen=True) -class FactorMeta: - factor_id: str - name: str - expr: str - col_idx: int - status: FactorStatus - finite_count: int = 0 - created_at: str = "" - extra: dict[str, Any] = field(default_factory=dict) - - -@dataclass(frozen=True) -class FactorLibraryPaths: - root: Path - - @property - def manifest(self) -> Path: - return self.root / "manifest.json" - - @property - def index_dir(self) -> Path: - return self.root / "index" - - @property - def rows_parquet(self) -> Path: - return self.index_dir / "rows.parquet" - - @property - def shards_json(self) -> Path: - return self.index_dir / "shards.json" - - @property - def sample_row_ids(self) -> Path: - return self.index_dir / "sample_row_ids.parquet" - - @property - def sample_dir(self) -> Path: - return self.root / "sample" - - @property - def sample_summary_memmap(self) -> Path: - return self.sample_dir / "factor_samples.f32.memmap" - - @property - def sample_summary_meta(self) -> Path: - return self.sample_dir / "factor_samples.meta.json" - - @property - def values_dir(self) -> Path: - return self.root / "values" - - @property - def meta_dir(self) -> Path: - return self.root / "meta" - - @property - def factors_parquet(self) -> Path: - return self.meta_dir / "factors.parquet" - - @property - def expressions_dir(self) -> Path: - return self.root / "expressions" - - @property - def similarity_dir(self) -> Path: - return self.root / "similarity" - - def factor_values_path(self, factor_id: str) -> Path: - return self.values_dir / f"f_{factor_id}.f32.memmap" - - def resolve_sample_row_ids(self) -> Path: - return self.sample_row_ids - - def resolve_sample_summary_memmap(self) -> Path: - return self.sample_summary_memmap - - def resolve_sample_summary_meta(self) -> Path: - return self.sample_summary_meta diff --git a/seekalpha/factor/zoo/zoo.py b/seekalpha/factor/zoo/zoo.py deleted file mode 100644 index 29230672..00000000 --- a/seekalpha/factor/zoo/zoo.py +++ /dev/null @@ -1,403 +0,0 @@ -"""因子库:全量 memmap + 抽样摘要。""" - -from __future__ import annotations - -import json -from pathlib import Path -from typing import Any - -import numpy as np - -from seekalpha.factor.zoo.catalog import FactorCatalog -from seekalpha.factor.zoo.index import RowIndex, verify_index_hash -from seekalpha.factor.zoo.types import ( - FactorLibraryPaths, - FactorStatus, - LibraryManifest, - RowSlice, -) - - -def _load_manifest(path: Path) -> LibraryManifest: - data = json.loads(path.read_text(encoding="utf-8")) - return LibraryManifest.from_dict(data) - - -def _finite_count(values: np.ndarray) -> int: - v = np.asarray(values, dtype=np.float32) - return int(np.isfinite(v).sum()) - - -def _create_memmap_1d(path: Path, n_rows: int, values: np.ndarray | None = None) -> np.memmap: - path.parent.mkdir(parents=True, exist_ok=True) - arr = np.memmap(path, dtype=np.float32, mode="w+", shape=(n_rows,)) - arr[:] = np.nan - if values is not None: - if len(values) != n_rows: - raise ValueError(f"values 长度 {len(values)} != n_rows {n_rows}") - arr[:] = np.asarray(values, dtype=np.float32) - arr.flush() - return arr - - -def _open_memmap_1d(path: Path, n_rows: int, mode: str = "r") -> np.memmap: - return np.memmap(path, dtype=np.float32, mode=mode, shape=(n_rows,)) - - -def _create_sample_summary_memmap(path: Path, max_factors: int, n_sample_rows: int) -> np.memmap: - path.parent.mkdir(parents=True, exist_ok=True) - arr = np.memmap(path, dtype=np.float32, mode="w+", shape=(max_factors, n_sample_rows)) - arr[:] = np.nan - arr.flush() - return arr - - -def _open_sample_summary_memmap( - path: Path, max_factors: int, n_sample_rows: int, mode: str = "r" -) -> np.memmap: - return np.memmap(path, dtype=np.float32, mode=mode, shape=(max_factors, n_sample_rows)) - - -def _normalize_sample_meta(meta: dict, *, n_sample_rows: int, max_factors: int) -> dict: - out = dict(meta) - if "n_sample_rows" not in out and "n_sketch" in out: - out["n_sample_rows"] = out["n_sketch"] - out.setdefault("n_sample_rows", n_sample_rows) - out.setdefault("max_factors", max_factors) - out.setdefault("n_factors", 0) - out.setdefault("factor_id_to_col", {}) - return out - - -class FactorZoo: - def __init__( - self, - paths: FactorLibraryPaths, - manifest: LibraryManifest, - index: RowIndex, - catalog: FactorCatalog, - ) -> None: - self.paths = paths - self.manifest = manifest - self.index = index - self.catalog = catalog - - @classmethod - def open(cls, root: Path, *, verify_hash: bool = True) -> FactorZoo: - paths = FactorLibraryPaths(root=Path(root).expanduser().resolve()) - if not paths.manifest.is_file(): - raise FileNotFoundError(f"因子库未初始化: {paths.manifest}") - manifest = _load_manifest(paths.manifest) - index = RowIndex.load(paths) - if verify_hash: - verify_index_hash(manifest, index) - catalog = FactorCatalog(paths.factors_parquet) - return cls(paths, manifest, index, catalog) - - def _ensure_sample_summary_files(self) -> None: - memmap_path = self.paths.resolve_sample_summary_memmap() - if not memmap_path.is_file(): - _create_sample_summary_memmap( - self.paths.sample_summary_memmap, - self.manifest.max_factors, - self.index.n_sample_rows, - ) - meta = { - "max_factors": self.manifest.max_factors, - "n_sample_rows": self.index.n_sample_rows, - "n_factors": 0, - "factor_id_to_col": {}, - } - self.paths.sample_summary_meta.write_text(json.dumps(meta, indent=2), encoding="utf-8") - - def _load_sample_summary_meta(self) -> dict: - self._ensure_sample_summary_files() - meta_path = self.paths.resolve_sample_summary_meta() - if meta_path.is_file(): - raw = json.loads(meta_path.read_text(encoding="utf-8")) - return _normalize_sample_meta( - raw, - n_sample_rows=self.index.n_sample_rows, - max_factors=self.manifest.max_factors, - ) - return { - "max_factors": self.manifest.max_factors, - "n_sample_rows": self.index.n_sample_rows, - "n_factors": 0, - "factor_id_to_col": {}, - } - - def _save_sample_summary_meta(self, meta: dict) -> None: - self.paths.sample_summary_meta.write_text(json.dumps(meta, indent=2), encoding="utf-8") - - def append_factor( - self, - *, - factor_id: str, - name: str, - expr: str, - values: np.ndarray, - status: FactorStatus = FactorStatus.full, - extra: dict[str, Any] | None = None, - ) -> int: - n_rows = self.manifest.n_rows - values = np.asarray(values, dtype=np.float32) - if len(values) != n_rows: - raise ValueError(f"values 长度 {len(values)} != n_rows {n_rows}") - - meta = self._load_sample_summary_meta() - col_map: dict[str, int] = {str(k): int(v) for k, v in meta.get("factor_id_to_col", {}).items()} - if factor_id in col_map: - raise ValueError(f"factor_id 已存在: {factor_id}") - - col_idx = int(meta.get("next_col_idx", meta.get("n_factors", 0))) - if col_idx >= self.manifest.max_factors: - raise RuntimeError( - f"因子列数已达 max_factors={self.manifest.max_factors},需扩容或 compact" - ) - - out_path = self.paths.factor_values_path(factor_id) - if out_path.is_file(): - raise ValueError(f"全量存储文件已存在: {out_path}") - _create_memmap_1d(out_path, n_rows, values) - - summary = _open_sample_summary_memmap( - self.paths.resolve_sample_summary_memmap(), - self.manifest.max_factors, - self.index.n_sample_rows, - mode="r+", - ) - summary[col_idx, :] = values[self.index.sample_row_ids] - summary.flush() - - col_map[factor_id] = col_idx - meta["factor_id_to_col"] = col_map - meta["next_col_idx"] = col_idx + 1 - meta["n_factors"] = len(col_map) - self._save_sample_summary_meta(meta) - - self.catalog.append( - factor_id=factor_id, - name=name, - expr=expr, - col_idx=col_idx, - status=status, - finite_count=_finite_count(values), - extra=extra, - ) - return col_idx - - def overwrite_factor( - self, - *, - factor_id: str, - name: str, - expr: str, - values: np.ndarray, - status: FactorStatus = FactorStatus.full, - extra: dict[str, Any] | None = None, - ) -> int: - entry = self.catalog.get(factor_id) - if entry is None: - raise KeyError(f"因子不存在,无法覆盖: {factor_id}") - - n_rows = self.manifest.n_rows - values = np.asarray(values, dtype=np.float32) - if len(values) != n_rows: - raise ValueError(f"values 长度 {len(values)} != n_rows {n_rows}") - - col_idx = entry.col_idx - out_path = self.paths.factor_values_path(factor_id) - if out_path.is_file(): - mmap = _open_memmap_1d(out_path, n_rows, mode="r+") - mmap[:] = values - mmap.flush() - else: - _create_memmap_1d(out_path, n_rows, values) - - summary = _open_sample_summary_memmap( - self.paths.resolve_sample_summary_memmap(), - self.manifest.max_factors, - self.index.n_sample_rows, - mode="r+", - ) - summary[col_idx, :] = values[self.index.sample_row_ids] - summary.flush() - - self.catalog.update( - factor_id, - name=name, - expr=expr, - status=status, - finite_count=_finite_count(values), - extra=extra, - ) - return col_idx - - def extend_factor_values( - self, - factor_id: str, - tail: np.ndarray, - *, - old_n: int, - extra: dict[str, Any] | None = None, - ) -> None: - """尾部追加因子值:保留 [0:old_n) 前缀,写入 tail 至新 manifest.n_rows。""" - entry = self.catalog.get(factor_id) - if entry is None: - raise KeyError(f"因子不存在: {factor_id}") - - tail = np.asarray(tail, dtype=np.float32) - new_n = self.manifest.n_rows - if old_n + len(tail) != new_n: - raise ValueError( - f"extend 长度不匹配: old_n={old_n} + tail={len(tail)} != n_rows={new_n}" - ) - - path = self.paths.factor_values_path(factor_id) - if path.is_file(): - file_n = path.stat().st_size // np.dtype(np.float32).itemsize - if file_n != old_n: - raise ValueError(f"memmap 文件长度 {file_n} != old_n {old_n}") - prefix = np.array( - np.memmap(path, dtype=np.float32, mode="r", shape=(old_n,)), - dtype=np.float32, - copy=True, - ) - else: - prefix = np.full(old_n, np.nan, dtype=np.float32) - - full = np.concatenate([prefix, tail.astype(np.float32, copy=False)]) - _create_memmap_1d(path, new_n, full) - - col_idx = entry.col_idx - summary = _open_sample_summary_memmap( - self.paths.resolve_sample_summary_memmap(), - self.manifest.max_factors, - self.index.n_sample_rows, - mode="r+", - ) - summary[col_idx, :] = full[self.index.sample_row_ids] - summary.flush() - - update_extra = dict(entry.extra or {}) - if extra is not None: - update_extra.update(extra) - self.catalog.update( - factor_id, - name=entry.name, - expr=entry.expr, - status=entry.status, - finite_count=_finite_count(full), - extra=update_extra, - ) - - @property - def n_factors(self) -> int: - return self.catalog.n_factors - - def read_factor( - self, - factor_id: str, - *, - row_slice: RowSlice | None = None, - stride: int = 1, - ) -> np.ndarray: - path = self.paths.factor_values_path(factor_id) - if not path.is_file(): - raise KeyError(f"因子不存在: {factor_id}") - mmap = _open_memmap_1d(path, self.manifest.n_rows, mode="r") - if row_slice is None: - data = mmap[::stride] - else: - if row_slice.stop <= row_slice.start: - return np.array([], dtype=np.float32) - data = mmap[row_slice.start : row_slice.stop : stride] - return np.array(data, dtype=np.float32, copy=True) - - def read_factors( - self, - factor_ids: list[str], - *, - row_slice: RowSlice | None = None, - stride: int = 1, - ) -> np.ndarray: - cols = [self.read_factor(fid, row_slice=row_slice, stride=stride) for fid in factor_ids] - if not cols: - return np.zeros((0, 0), dtype=np.float32) - return np.stack(cols, axis=0) - - def read_sample_summaries( - self, factor_ids: list[str] | None = None - ) -> tuple[np.ndarray, list[str]]: - meta = self._load_sample_summary_meta() - col_map: dict[str, int] = {str(k): int(v) for k, v in meta["factor_id_to_col"].items()} - if factor_ids is None: - factor_ids = sorted(col_map.keys(), key=lambda x: col_map[x]) - summary = _open_sample_summary_memmap( - self.paths.resolve_sample_summary_memmap(), - self.manifest.max_factors, - self.index.n_sample_rows, - mode="r", - ) - rows = [] - order: list[str] = [] - for fid in factor_ids: - if fid not in col_map: - raise KeyError(f"因子不在抽样摘要中: {fid}") - order.append(fid) - rows.append(np.array(summary[col_map[fid], :], dtype=np.float32, copy=True)) - if not rows: - return np.zeros((0, self.index.n_sample_rows), dtype=np.float32), [] - return np.stack(rows, axis=0), order - - def extract_sample_from_values(self, values: np.ndarray) -> np.ndarray: - values = np.asarray(values, dtype=np.float32) - if len(values) != self.manifest.n_rows: - raise ValueError(f"values 长度 {len(values)} != n_rows {self.manifest.n_rows}") - return values[self.index.sample_row_ids].copy() - - def delete_factor(self, factor_id: str) -> None: - entry = self.catalog.get(factor_id) - if entry is None: - raise KeyError(f"因子不存在: {factor_id}") - - col_idx = entry.col_idx - self.catalog.remove(factor_id) - - meta = self._load_sample_summary_meta() - col_map: dict[str, int] = {str(k): int(v) for k, v in meta.get("factor_id_to_col", {}).items()} - col_map.pop(factor_id, None) - meta["factor_id_to_col"] = col_map - meta["n_factors"] = len(col_map) - if "next_col_idx" not in meta: - meta["next_col_idx"] = max(col_map.values(), default=-1) + 1 - self._save_sample_summary_meta(meta) - - summary = _open_sample_summary_memmap( - self.paths.resolve_sample_summary_memmap(), - self.manifest.max_factors, - self.index.n_sample_rows, - mode="r+", - ) - summary[col_idx, :] = np.nan - summary.flush() - - full_path = self.paths.factor_values_path(factor_id) - if full_path.is_file(): - full_path.unlink() - - from seekalpha.factor.zoo.similarity import SimilarityMatrix - - sim = SimilarityMatrix(self.paths, self.manifest.max_factors) - sim.remove_factor(col_idx, n_active=self.catalog.n_factors) - - def delete_factors(self, factor_ids: list[str]) -> int: - deleted = 0 - for factor_id in factor_ids: - try: - self.delete_factor(factor_id) - deleted += 1 - except KeyError: - continue - return deleted diff --git a/tests/conftest.py b/tests/conftest.py deleted file mode 100644 index f66dfff4..00000000 --- a/tests/conftest.py +++ /dev/null @@ -1,44 +0,0 @@ -"""共享 pytest fixture。""" - -from __future__ import annotations - -import numpy as np -import pandas as pd -import pytest - - -@pytest.fixture -def mini_hq() -> pd.DataFrame: - """3 只股票 × 5 个交易日的原始 hq 宽表。""" - dates = pd.date_range("2024-01-02", periods=5, freq="B") - instruments = ["000001.SZ", "000002.SZ", "600000.SH"] - rows = [] - for dt in dates: - for i, inst in enumerate(instruments): - base = 10.0 + i - rows.append( - { - "datetime": dt, - "instrument": inst, - "open": base, - "high": base + 0.5, - "low": base - 0.5, - "close": base + 0.1, - "adjfactor": 1.0, - "volume": 1000.0, - "amount": base * 1000, - "float_cap": 1e9, - "tot_cap": 2e9, - "is_trade": 1, - "not_st": 1, - } - ) - df = pd.DataFrame(rows).set_index(["datetime", "instrument"]) - return df.sort_index() - - -@pytest.fixture -def mini_panel(mini_hq: pd.DataFrame) -> pd.DataFrame: - from seekalpha.data.panel import build_panel_from_hq - - return build_panel_from_hq(mini_hq, universe_mask=False) diff --git a/tests/test_core/__init__.py b/tests/test_core/__init__.py deleted file mode 100644 index 8b137891..00000000 --- a/tests/test_core/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/tests/test_core/test_core.py b/tests/test_core/test_core.py deleted file mode 100644 index 80778371..00000000 --- a/tests/test_core/test_core.py +++ /dev/null @@ -1,22 +0,0 @@ -"""core 模块测试。""" - -from seekalpha.core.config import load_yaml -from seekalpha.core.hash import panel_index_hash -from seekalpha.core.paths import ROOT - - -def test_root_exists(): - assert ROOT.is_dir() - - -def test_load_data_yaml(): - cfg = load_yaml("data.yaml") - assert "panel" in cfg - assert "tushare" in cfg - - -def test_panel_index_hash_stable(mini_panel): - h1 = panel_index_hash(mini_panel) - h2 = panel_index_hash(mini_panel) - assert h1 == h2 - assert len(h1) == 16 diff --git a/tests/test_data/__init__.py b/tests/test_data/__init__.py deleted file mode 100644 index 8b137891..00000000 --- a/tests/test_data/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/tests/test_data/test_fundamental_fetch.py b/tests/test_data/test_fundamental_fetch.py deleted file mode 100644 index 0d02db19..00000000 --- a/tests/test_data/test_fundamental_fetch.py +++ /dev/null @@ -1,71 +0,0 @@ -"""fundamental_fetch 单元测试(不调用 Tushare API)。""" - -from __future__ import annotations - -import pandas as pd -import pytest - -from seekalpha.data.fundamental_fetch import ( - disclosure_events_to_wide, - merge_disclosure_wide, - merge_quarterly, - quarter_periods_between, - raw_fina_to_disclosure_events, - raw_fina_to_quarterly, -) - - -def test_quarter_periods_between(): - periods = quarter_periods_between("2020-01-01", "2020-12-31") - assert periods == ["20200331", "20200630", "20200930", "20201231"] - - partial = quarter_periods_between("2020-04-01", "2020-09-30") - assert partial == ["20200630", "20200930"] - - cross_year = quarter_periods_between("2023-07-01", "2024-03-31") - assert cross_year == ["20230930", "20231231", "20240331"] - - -def test_quarter_periods_between_invalid_range(): - with pytest.raises(ValueError, match="start 不能晚于 end"): - quarter_periods_between("2024-12-31", "2024-01-01") - - -def test_raw_fina_to_quarterly_and_disclosure(): - raw = pd.DataFrame( - { - "ts_code": ["000001.SZ", "000001.SZ"], - "ann_date": ["20240428", "20240830"], - "end_date": ["20240331", "20240630"], - "roe": [0.12, 0.15], - "debt_to_assets": [0.5, 0.48], - } - ) - q = raw_fina_to_quarterly(raw) - assert q.index.names == ["report_end", "instrument"] - assert "funda_roe" in q.columns - assert q.loc[(pd.Timestamp("2024-03-31"), "000001.SZ"), "funda_roe"] == 0.12 - - events = raw_fina_to_disclosure_events(raw) - wide = disclosure_events_to_wide(events) - assert wide.loc[pd.Timestamp("2024-03-31"), "000001.SZ"] == pd.Timestamp("2024-04-28") - - -def test_merge_quarterly_and_disclosure(): - idx1 = pd.MultiIndex.from_tuples( - [("2023-12-31", "000001.SZ")], - names=["report_end", "instrument"], - ) - idx2 = pd.MultiIndex.from_tuples( - [("2024-03-31", "000001.SZ")], - names=["report_end", "instrument"], - ) - a = pd.DataFrame({"funda_roe": [0.1]}, index=idx1) - b = pd.DataFrame({"funda_roe": [0.2]}, index=idx2) - merged = merge_quarterly(a, b) - assert len(merged) == 2 - - w1 = pd.DataFrame({"000001.SZ": [pd.Timestamp("2024-01-10")]}, index=[pd.Timestamp("2023-12-31")]) - w2 = pd.DataFrame({"000001.SZ": [pd.Timestamp("2024-04-28")]}, index=[pd.Timestamp("2024-03-31")]) - w = merge_disclosure_wide(w1, w2) - assert w.shape == (2, 1) diff --git a/tests/test_data/test_fundamental_pit.py b/tests/test_data/test_fundamental_pit.py deleted file mode 100644 index e555f004..00000000 --- a/tests/test_data/test_fundamental_pit.py +++ /dev/null @@ -1,152 +0,0 @@ -"""基本面 PIT 展开单测(不调用 Tushare API)。""" - -from __future__ import annotations - -import numpy as np -import pandas as pd -import pytest - -from seekalpha.data.fundamental import ( - FUNDAMENTAL_STATEMENT_COLUMN_MAP, - _disclosure_effective_trade_positions, - append_disclosure_distance_features, - enrich_panel_fundamentals, - expand_quarterly_fundamentals_pit, - quarter_period_start, -) -from seekalpha.data.panel import build_panel_from_hq - - -def _make_hq_panel(tmp_path) -> pd.DataFrame: - """20 个交易日 × 2 只股票。""" - dates = pd.date_range("2024-01-02", periods=20, freq="B") - codes = ["000001.SZ", "000002.SZ"] - rows = [] - for dt in dates: - for i, inst in enumerate(codes): - base = 10.0 + i - rows.append( - { - "datetime": dt, - "instrument": inst, - "open": base, - "high": base + 0.5, - "low": base - 0.5, - "close": base + 0.1, - "adjfactor": 1.0, - "volume": 1000.0, - "amount": base * 1000, - "float_cap": 1e9, - "tot_cap": 2e9, - "is_trade": 1, - "not_st": 1, - } - ) - hq = pd.DataFrame(rows).set_index(["datetime", "instrument"]) - return build_panel_from_hq(hq, universe_mask=False) - - -def _make_disclosure_map_parquet(tmp_path): - report_ends = pd.to_datetime(["2023-12-31", "2024-03-31"]) - wide = pd.DataFrame( - { - "000001.SZ": ["2024-01-10", "2024-01-24"], - "000002.SZ": ["2024-01-12", None], - }, - index=report_ends, - ) - path = tmp_path / "disclosure_map.parquet" - wide.to_parquet(path) - return path - - -def _make_quarterly_fundamentals_parquet(tmp_path): - col = FUNDAMENTAL_STATEMENT_COLUMN_MAP["总资产"] - idx = pd.MultiIndex.from_tuples( - [ - ("2023-12-31", "000001.SZ"), - ("2024-03-31", "000001.SZ"), - ("2023-12-31", "000002.SZ"), - ], - names=["report_end", "instrument"], - ) - df = pd.DataFrame({col: [100.0, 120.0, 200.0]}, index=idx) - path = tmp_path / "fundamentals.parquet" - df.to_parquet(path) - return path - - -def test_disclosure_effective_next_trade_day(): - trade_dates = pd.bdate_range("2024-04-15", "2024-04-26") - pos = _disclosure_effective_trade_positions( - trade_dates, - np.array(["2024-04-20"], dtype="datetime64[ns]"), - ) - assert trade_dates[pos[0]] == pd.Timestamp("2024-04-22") - - pos2 = _disclosure_effective_trade_positions( - trade_dates, - np.array(["2024-04-24"], dtype="datetime64[ns]"), - ) - assert trade_dates[pos2[0]] == pd.Timestamp("2024-04-25") - - -def test_disclosure_distance_features(tmp_path): - panel = _make_hq_panel(tmp_path) - disc = _make_disclosure_map_parquet(tmp_path) - panel = append_disclosure_distance_features(panel, disc) - sub = panel.xs("000001.SZ", level="instrument").sort_index() - assert "funda_days_to_disclose" not in panel.columns - - jan10 = sub.loc[pd.Timestamp("2024-01-10")] - jan11 = sub.loc[pd.Timestamp("2024-01-11")] - jan12 = sub.loc[pd.Timestamp("2024-01-12")] - jan25 = sub.loc[pd.Timestamp("2024-01-25")] - - assert pd.isna(jan10["funda_days_since_disclose"]) - assert jan11["funda_days_since_disclose"] == 0 - assert jan12["funda_days_since_disclose"] == 1 - assert jan25["funda_days_since_disclose"] == 0 - - -def test_expand_quarterly_fundamentals_pit(tmp_path): - panel = _make_hq_panel(tmp_path) - disc = _make_disclosure_map_parquet(tmp_path) - funda = _make_quarterly_fundamentals_parquet(tmp_path) - panel = expand_quarterly_fundamentals_pit(panel, funda, disc) - - col = "funda_fs_total_assets" - s1 = panel.xs("000001.SZ", level="instrument").sort_index() - - assert pd.isna(s1.loc[pd.Timestamp("2024-01-10"), col]) - assert s1.loc[pd.Timestamp("2024-01-11"), col] == 100.0 - assert s1.loc[pd.Timestamp("2024-01-23"), col] == 100.0 - assert s1.loc[pd.Timestamp("2024-01-25"), col] == 120.0 - - s2 = panel.xs("000002.SZ", level="instrument").sort_index() - assert pd.isna(s2.loc[pd.Timestamp("2024-01-12"), col]) - assert s2.loc[pd.Timestamp("2024-01-15"), col] == 200.0 - - -def test_quarter_period_start_days(tmp_path): - panel = _make_hq_panel(tmp_path) - disc = _make_disclosure_map_parquet(tmp_path) - panel = enrich_panel_fundamentals( - panel, - quarterly_path=_make_quarterly_fundamentals_parquet(tmp_path), - disclosure_path=disc, - ) - sub = panel.xs("000001.SZ", level="instrument").sort_index() - - assert sub.loc[pd.Timestamp("2024-01-02"), "funda_days_since_quarter_start"] == 0 - assert sub.loc[pd.Timestamp("2024-01-03"), "funda_days_since_quarter_start"] == 1 - assert quarter_period_start(pd.Timestamp("2024-02-15")) == pd.Timestamp("2024-01-01") - - -def test_enrich_rejects_duplicate_funda_columns(tmp_path): - panel = _make_hq_panel(tmp_path) - disc = _make_disclosure_map_parquet(tmp_path) - funda = _make_quarterly_fundamentals_parquet(tmp_path) - panel = expand_quarterly_fundamentals_pit(panel, funda, disc) - with pytest.raises(ValueError, match="已含基本面列"): - expand_quarterly_fundamentals_pit(panel, funda, disc) diff --git a/tests/test_data/test_index_members.py b/tests/test_data/test_index_members.py deleted file mode 100644 index edab5ce0..00000000 --- a/tests/test_data/test_index_members.py +++ /dev/null @@ -1,83 +0,0 @@ -"""指数成分缓存(index_members)测试:不联网,用假 pro 覆盖抓取→落盘→解析。""" - -import pandas as pd - -from seekalpha.data import index_members as im - - -class _FakePro: - """按月返回固定成分快照的假 Tushare pro。""" - - def __init__(self, per_month: dict[str, list[str]]): - self.per_month = per_month - self.calls: list[str] = [] - - def index_weight(self, index_code, start_date, end_date): - self.calls.append(start_date) - cons = self.per_month.get(start_date, []) - return pd.DataFrame({"index_code": index_code, "con_code": cons, "trade_date": start_date}) - - -def test_index_members_path(): - assert im.index_members_path("zz1000").name == "000852_SH_members.parquet" - - -def test_members_union_and_covers(): - cache = pd.DataFrame( - { - "trade_date": pd.to_datetime(["2020-01-31", "2020-02-29", "2021-06-30"]), - "instrument": ["a.SZ", "b.SZ", "c.SZ"], - } - ) - assert im.members_union(cache, "2020-01-01", "2020-12-31") == ["a.SZ", "b.SZ"] - assert im.members_union(cache, "2019-01-01", "2019-12-31") == [] - assert im._cache_covers(cache, "2020-02-01", "2020-02-15") - assert not im._cache_covers(cache, "2019-01-01", "2019-06-30") - - -def test_merge_and_append(tmp_path): - path = tmp_path / "idx.parquet" - a = pd.DataFrame({"trade_date": pd.to_datetime(["2020-01-31"]), "instrument": ["a.SZ"]}) - im.save_index_members(a, path=path) - im.append_snapshot("zz1000", "2020-02-29", ["b.SZ", "a.SZ"], path=path) - got = im.load_index_members(path=path) - assert len(got) == 3 - assert set(got["instrument"]) == {"a.SZ", "b.SZ"} - - -def test_resolve_cached_fetches_then_reuses(tmp_path): - path = tmp_path / "idx.parquet" - pro = _FakePro( - { - "20200131": ["a.SZ", "b.SZ"], - "20200229": ["b.SZ", "c.SZ"], - } - ) - members = im.resolve_index_members_cached( - "zz1000", "2020-01-01", "2020-02-29", pro=pro, path=path, sleep_sec=0, verbose=False - ) - assert members == ["a.SZ", "b.SZ", "c.SZ"] - assert path.is_file() - first_calls = len(pro.calls) - assert first_calls == 2 - - # 相同区间再次解析:缓存已覆盖,不应再请求 Tushare - members2 = im.resolve_index_members_cached( - "zz1000", "2020-01-01", "2020-02-29", pro=pro, path=path, sleep_sec=0, verbose=False - ) - assert members2 == members - assert len(pro.calls) == first_calls - - -def test_resolve_cached_refresh_forces_fetch(tmp_path): - path = tmp_path / "idx.parquet" - pro = _FakePro({"20200131": ["a.SZ"]}) - im.resolve_index_members_cached( - "zz1000", "2020-01-01", "2020-01-31", pro=pro, path=path, sleep_sec=0, verbose=False - ) - calls_after_first = len(pro.calls) - im.resolve_index_members_cached( - "zz1000", "2020-01-01", "2020-01-31", pro=pro, path=path, sleep_sec=0, verbose=False, - refresh=True, - ) - assert len(pro.calls) > calls_after_first diff --git a/tests/test_data/test_panel.py b/tests/test_data/test_panel.py deleted file mode 100644 index c72e35f1..00000000 --- a/tests/test_data/test_panel.py +++ /dev/null @@ -1,508 +0,0 @@ -"""data 模块测试。""" - - - -import numpy as np -import pandas as pd - -from seekalpha.core.types import OUTPUT_COLUMNS -from seekalpha.data.market_fetch import ( - _expand_update_dates, - _group_contiguous_trade_dates, - _merge_raw_daily, - _panel_missing_trade_dates, - _select_daily_basic, -) -from seekalpha.data.panel import ( - _rederive_since, - build_panel_from_hq, - count_suspect_adjfactor_rows, - find_adjfactor_jump_instruments, - find_suspect_adjfactor_instruments, - save_panel, - load_panel, - slice_panel, -) - -from seekalpha.data.universe import apply_is_st, mark_not_st - - - - - -def test_output_columns(mini_panel): - - assert list(mini_panel.columns) == OUTPUT_COLUMNS - - - - - -def test_find_suspect_adjfactor_instruments(mini_panel): - - suspects = find_suspect_adjfactor_instruments(mini_panel) - - assert suspects == [] - - - -def test_find_suspect_adjfactor_detects_bad_stock(): - - idx = pd.MultiIndex.from_product( - - [pd.date_range("2024-01-02", periods=3, freq="B"), ["X.SH"]], - - names=["datetime", "instrument"], - - ) - - panel = pd.DataFrame( - - { - - "open": [10.0, 10.0, 10.0], - - "high": [10.5, 10.5, 10.5], - - "low": [9.5, 9.5, 9.5], - - "close": [10.0, 10.0, 10.0], - - "adjfactor": [1.0, 1.0, 2.0], - - "volume": [1000.0, 1000.0, 1000.0], - - "amount": [10000.0, 10000.0, 10000.0], - - }, - - index=idx, - - ) - - assert find_suspect_adjfactor_instruments(panel) == ["X.SH"] - - assert count_suspect_adjfactor_rows(panel, ["X.SH"]) == 2 - - - - - -def test_find_adjfactor_jump_detects_regime_break(): - - idx = pd.MultiIndex.from_product( - - [pd.date_range("2024-01-02", periods=4, freq="B"), ["Y.SH"]], - - names=["datetime", "instrument"], - - ) - - panel = pd.DataFrame( - - { - - "close": [10.0, 10.1, 10.0, 10.2], - - "adjfactor": [1.0, 1.0, 5764.0, 5764.0], - - }, - - index=idx, - - ) - - assert find_adjfactor_jump_instruments(panel) == ["Y.SH"] - - - - - -def test_build_panel_from_hq_shape(mini_hq): - - panel = build_panel_from_hq(mini_hq, universe_mask=False) - - assert panel.shape[0] == mini_hq.shape[0] - - assert "adj_close" in panel.columns - - assert "adj_vwap" in panel.columns - - assert "ret" in panel.columns - - - - - -def test_adj_vwap_matches_vwap_times_adjfactor(mini_hq): - - panel = build_panel_from_hq(mini_hq, universe_mask=False) - - expected = panel["vwap"] * panel["adjfactor"] - - np.testing.assert_allclose(panel["adj_vwap"], expected, rtol=1e-6, equal_nan=True) - - # adj_vwap / adj_close 与 vwap / close 同比例(与 OHLC 复权一致) - - ratio_v = (panel["vwap"] / panel["close"]).replace([np.inf, -np.inf], np.nan) - - ratio_a = (panel["adj_vwap"] / panel["adj_close"]).replace([np.inf, -np.inf], np.nan) - - np.testing.assert_allclose(ratio_v, ratio_a, rtol=1e-5, equal_nan=True) - - - - - -def test_slice_panel(mini_panel): - - sliced = slice_panel(mini_panel, start="2024-01-03", end="2024-01-04") - - dt = sliced.index.get_level_values("datetime") - - assert dt.min() >= pd.Timestamp("2024-01-03") - - assert dt.max() <= pd.Timestamp("2024-01-04") - - - - - -def test_mark_not_st(): - - names = pd.Series(["平安银行", "ST某某", "*ST测试"]) - - flags = mark_not_st(names) - - assert flags.tolist() == [1, 0, 0] - - - - - -def test_apply_is_st(): - - df = pd.DataFrame( - - { - - "ts_code": ["000001.SZ", "000002.SZ", "600000.SH"], - - "trade_date": ["20240102", "20240102", "20240102"], - - "close": [1.0, 2.0, 3.0], - - } - - ) - - st_table = pd.DataFrame( - - {"ts_code": ["000002.SZ"], "trade_date": ["20240102"], "is_st": [1]} - - ) - - out = apply_is_st(df, st_table) - - assert out["is_st"].tolist() == [0, 1, 0] - - assert out["not_st"].tolist() == [1, 0, 1] - - - - - -def test_select_daily_basic_filters_codes(): - - basic = pd.DataFrame( - - { - - "ts_code": ["000001.SZ", "000002.SZ", "600000.SH"], - - "trade_date": ["20240102", "20240102", "20240102"], - - "circ_mv": [100.0, 200.0, 300.0], - - "total_mv": [110.0, 220.0, 330.0], - - } - - ) - - out = _select_daily_basic(basic, ["000001.SZ", "600000.SH"]) - - assert len(out) == 2 - - assert set(out["ts_code"]) == {"000001.SZ", "600000.SH"} - - - - - -def test_merge_raw_daily_sets_float_cap_from_circ_mv(): - - daily = pd.DataFrame( - - { - - "ts_code": ["000001.SZ"], - - "trade_date": ["20240102"], - - "open": [10.0], - - "high": [11.0], - - "low": [9.0], - - "close": [10.5], - - "vol": [1000.0], - - "amount": [10500.0], - - } - - ) - - adj = pd.DataFrame( - - {"ts_code": ["000001.SZ"], "trade_date": ["20240102"], "adj_factor": [1.0]} - - ) - - basic = pd.DataFrame( - - { - - "ts_code": ["000001.SZ"], - - "trade_date": ["20240102"], - - "circ_mv": [123.45], - - "total_mv": [234.56], - - } - - ) - - st_table = pd.DataFrame(columns=["ts_code", "trade_date", "is_st"]) - - out = _merge_raw_daily(daily, adj, basic, st_table) - - assert out.loc[(pd.Timestamp("2024-01-02"), "000001.SZ"), "float_cap"] == 123.45 * 10000 - - - - - -def test_rederive_since_fills_ret_on_appended_day(mini_hq): - - first = build_panel_from_hq(mini_hq.iloc[:6], universe_mask=False) - - last_day = mini_hq.iloc[6:9] - - appended = build_panel_from_hq(last_day, universe_mask=False) - - # 模拟增量:追加日 ret 在孤立计算下会是 NaN - - assert pd.isna(appended["ret"]).all() - - - - merged = pd.concat([first, appended]).sort_index() - - merged = merged[~merged.index.duplicated(keep="last")] - - since = appended.index.get_level_values("datetime").min() - - fixed = _rederive_since(merged, since) - - - - new_dt = since - - inst = "000001.SZ" - - idx = (new_dt, inst) - - assert np.isfinite(fixed.loc[idx, "ret"]) - - - - - -def test_expand_update_dates_includes_prev_trade_day(): - - class FakePro: - - def trade_cal(self, **kwargs): - - return pd.DataFrame({"cal_date": ["20240102", "20240103", "20240104"]}) - - - - fetch, backfill = _expand_update_dates(FakePro(), ["2024-01-04"]) - - assert fetch == ["2024-01-03", "2024-01-04"] - - assert backfill == "2024-01-03" - - - - - -class _FakeTradeCalPro: - OPEN = ["20240102", "20240103", "20240104", "20240105"] - - def trade_cal(self, **kwargs): - start = kwargs["start_date"] - end = kwargs["end_date"] - days = [d for d in self.OPEN if start <= d <= end] - return pd.DataFrame({"cal_date": days}) - - - - - -def test_panel_missing_trade_dates_detects_internal_gap(): - pro = _FakeTradeCalPro() - idx = pd.MultiIndex.from_product( - [ - pd.to_datetime(["2024-01-02", "2024-01-03", "2024-01-05"]), - ["000001.SZ"], - ], - names=["datetime", "instrument"], - ) - panel = pd.DataFrame({"adj_close": [1.0, 1.1, 1.2]}, index=idx) - missing = _panel_missing_trade_dates(pro, panel, "2024-01-05") - assert missing == ["2024-01-04"] - - - - - -def test_group_contiguous_trade_dates(): - pro = _FakeTradeCalPro() - ranges = _group_contiguous_trade_dates( - pro, - ["2024-01-02", "2024-01-03", "2024-01-05"], - ) - assert ranges == [("2024-01-02", "2024-01-03"), ("2024-01-05", "2024-01-05")] - - - - - -def test_panel_gap_update_dates_fills_after_panel_max(): - pro = _FakeTradeCalPro() - idx = pd.MultiIndex.from_product( - [pd.to_datetime(["2024-01-03"]), ["000001.SZ"]], - names=["datetime", "instrument"], - ) - panel = pd.DataFrame({"adj_close": [1.0]}, index=idx) - missing = _panel_missing_trade_dates(pro, panel, "2024-01-05") - assert missing == ["2024-01-04", "2024-01-05"] - - - - - -def test_panel_missing_trade_dates_empty_when_already_latest(): - pro = _FakeTradeCalPro() - idx = pd.MultiIndex.from_product( - [ - pd.to_datetime(["2024-01-02", "2024-01-03", "2024-01-04", "2024-01-05"]), - ["000001.SZ"], - ], - names=["datetime", "instrument"], - ) - panel = pd.DataFrame({"adj_close": [1.0, 1.1, 1.2, 1.3]}, index=idx) - missing = _panel_missing_trade_dates(pro, panel, "2024-01-05") - assert missing == [] - - - - - -def test_label_nd_close_to_close_formula(): - dates = pd.date_range("2024-01-02", periods=15, freq="B") - closes = np.arange(10.0, 25.0) # 10, 11, ..., 24 - idx = pd.MultiIndex.from_product([dates, ["X.SH"]], names=["datetime", "instrument"]) - panel = pd.DataFrame({"adj_close": closes}, index=idx) - - from seekalpha.data.panel import _calc_label_nd_close_to_close - - label_1d = _calc_label_nd_close_to_close(panel["adj_close"], 1) - # T=day0: (close[T+2]-close[T+1])/(close[T+1]) = (12-11)/11 - assert np.isclose(label_1d.iloc[0], (12.0 - 11.0) / 11.0) - - label_10d = _calc_label_nd_close_to_close(panel["adj_close"], 10) - # T=day0: (close[T+11]-close[T+1])/(close[T+1]) = (21-11)/11 - assert np.isclose(label_10d.iloc[0], (21.0 - 11.0) / 11.0) - assert label_10d.iloc[-10:].isna().all() - - -def test_save_load_panel_roundtrip(mini_panel, tmp_path): - - path = tmp_path / "panel.parquet" - - save_panel(mini_panel, path) - - loaded = load_panel(path) - - assert loaded.shape == mini_panel.shape - - assert list(loaded.columns) == list(mini_panel.columns) - - -def test_build_panel_offline_from_cache(mini_hq, tmp_path): - """离线:save hq 缓存 → build_panel(market_path=...) 复现量价 panel。""" - from seekalpha.data.market_fetch import save_market_hq - from seekalpha.data.panel import build_panel - - hq_path = tmp_path / "daily_hq.parquet" - save_market_hq(mini_hq, hq_path) - - out_path = tmp_path / "panel.parquet" - panel = build_panel( - out_path=out_path, - market_path=hq_path, - universe_mask=False, - verbose=False, - ) - assert list(panel.columns) == OUTPUT_COLUMNS - assert panel.shape[0] == mini_hq.shape[0] - assert out_path.is_file() - - -def test_update_panel_from_hq_appends_and_rederives(mini_hq, tmp_path): - """离线增量:老 panel + 新一日 hq → 尾部 merge 且新日 ret 被重算为有限值。""" - from seekalpha.data.panel import build_panel_from_hq, save_panel, update_panel_from_hq - - # 老 panel:前 4 个交易日 - first_days = mini_hq.iloc[:12] - old_panel = build_panel_from_hq(first_days, universe_mask=False) - panel_path = tmp_path / "panel.parquet" - save_panel(old_panel, panel_path) - - # 增量:第 5 个交易日 - new_hq = mini_hq.iloc[12:15] - backfill_since = first_days.index.get_level_values("datetime").max() - - merged = update_panel_from_hq( - new_hq, - backfill_since, - out_path=panel_path, - universe_mask=False, - verbose=False, - ) - - new_dt = new_hq.index.get_level_values("datetime").min() - assert new_dt in merged.index.get_level_values("datetime") - # 新增交易日的 ret 应由 backfill 重算为有限值(衔接上一交易日) - assert np.isfinite(merged.loc[(new_dt, "000001.SZ"), "ret"]) - - diff --git a/tests/test_data/test_tushare_client.py b/tests/test_data/test_tushare_client.py deleted file mode 100644 index 5f65c003..00000000 --- a/tests/test_data/test_tushare_client.py +++ /dev/null @@ -1,69 +0,0 @@ -"""Tushare 客户端测试(不调用 API)。""" - -import os - -import pytest -import requests - - -def test_token_from_env(monkeypatch): - monkeypatch.setenv("TUSHARE_TOKEN", "test_token_abc") - from seekalpha.data import tushare_client - - # 重新加载模块内逻辑 - assert tushare_client._read_token() == "test_token_abc" - - -def test_token_missing_raises(monkeypatch): - monkeypatch.delenv("TUSHARE_TOKEN", raising=False) - from seekalpha.data.tushare_client import _read_token, get_pro - - # 清空 .env 影响:若 .env 存在仍可能有 token,仅测 get_pro 无 token 分支 - monkeypatch.setattr( - "seekalpha.data.tushare_client._read_token", - lambda: "", - ) - with pytest.raises(ValueError, match="TUSHARE_TOKEN"): - get_pro() - - -def test_is_retryable_network_error(): - from seekalpha.data.tushare_client import _is_retryable - - assert _is_retryable(requests.exceptions.ConnectionError("timed out")) - assert _is_retryable(TimeoutError("timed out")) - assert _is_retryable(Exception("抱歉,您每分钟最多访问该接口500次")) - - -def test_is_not_retryable_auth_error(): - from seekalpha.data.tushare_client import _is_retryable - - assert not _is_retryable(Exception("token invalid")) - - -def test_call_with_retry_recovers(monkeypatch): - from seekalpha.data import tushare_client - - tushare_client.configure(max_retries=3, retry_base_delay=0.0, retry_max_delay=0.0) - calls = {"n": 0} - - def flaky(): - calls["n"] += 1 - if calls["n"] < 3: - raise requests.exceptions.ConnectionError("Read timed out.") - return "ok" - - assert tushare_client.call_with_retry(flaky, label="flaky") == "ok" - assert calls["n"] == 3 - - -def test_call_with_retry_exhausts(monkeypatch): - from seekalpha.data import tushare_client - - tushare_client.configure(max_retries=2, retry_base_delay=0.0, retry_max_delay=0.0) - - def always_fail(): - raise requests.exceptions.Timeout("timeout") - - with pytest.raises(requests.exceptions.Timeout): - tushare_client.call_with_retry(always_fail, label="always_fail") diff --git a/tests/test_data/test_universe_pool.py b/tests/test_data/test_universe_pool.py deleted file mode 100644 index efa0d930..00000000 --- a/tests/test_data/test_universe_pool.py +++ /dev/null @@ -1,205 +0,0 @@ -"""universe 与股票池拉取测试。""" - - - -from unittest.mock import MagicMock - - - -import pandas as pd - - - -from seekalpha.data.universe import ( - - _members_from_index_member, - - apply_is_st, - - fetch_index_members, - - fetch_index_members_for_dates, - - fetch_st_table, - - resolve_index_code, - -) - - - - - -def test_resolve_index_code(): - - assert resolve_index_code("zz1000") == "000852.SH" - - - - - -def test_members_from_index_member_overlap(): - - df = pd.DataFrame( - - { - - "con_code": ["A.SZ", "B.SZ", "C.SZ", "D.SZ"], - - "in_date": ["20140101", "20160101", "20200101", "20230101"], - - "out_date": ["20151231", None, "20201231", "99991231"], - - } - - ) - - # 2015-06 ~ 2022-12: A(2015末出), B(仍在), C(2020), D(2023才入→不含) - - got = _members_from_index_member(df, "2015-06-01", "2022-12-31") - - assert got == ["A.SZ", "B.SZ", "C.SZ"] - - - - - -def test_fetch_index_members_prefers_index_member(): - - pro = MagicMock() - - pro.index_member.return_value = pd.DataFrame( - - { - - "con_code": ["000001.SZ", "000002.SZ"], - - "in_date": ["20140101", "20140101"], - - "out_date": [None, None], - - } - - ) - - members = fetch_index_members(pro, "zz1000", "2020-01-01", "2024-12-31", verbose=False) - - assert members == ["000001.SZ", "000002.SZ"] - - pro.index_weight.assert_not_called() - - - - - -def test_fetch_index_members_fallback_weight(): - - pro = MagicMock() - - pro.index_member.return_value = pd.DataFrame() - - pro.index_weight.return_value = pd.DataFrame( - - { - - "con_code": ["600000.SH"], - - "trade_date": ["20240131"], - - "weight": [0.1], - - } - - ) - - members = fetch_index_members( - - pro, "zz1000", "2024-01-01", "2024-01-31", sleep_sec=0, verbose=False - - ) - - assert "600000.SH" in members - - - - - -def test_fetch_index_members_long_span_uses_monthly_union(): - - pro = MagicMock() - - pro.index_member.return_value = pd.DataFrame() - - pro.index_weight.return_value = pd.DataFrame( - - { - - "con_code": ["600000.SH", "600001.SH"], - - "trade_date": ["20201231", "20201231"], - - "weight": [0.1, 0.1], - - } - - ) - - members = fetch_index_members( - - pro, "zz1000", "2012-01-01", "2026-06-30", sleep_sec=0, verbose=False - - ) - - assert members == ["600000.SH", "600001.SH"] - - assert pro.index_weight.call_count >= 1 - - - - - -def test_fetch_index_members_for_dates(): - pro = MagicMock() - - def fake_weight(**kwargs): - end = kwargs.get("end_date", "") - if end == "20240628": - return pd.DataFrame( - {"con_code": ["000001.SZ", "000002.SZ"], "trade_date": ["20240628", "20240628"]} - ) - return pd.DataFrame() - - pro.index_weight.side_effect = fake_weight - pool = fetch_index_members_for_dates( - pro, "zz1000", ["2024-06-28"], sleep_sec=0, verbose=False - ) - assert pool == {"000001.SZ", "000002.SZ"} - - -def test_fetch_st_table_empty(): - - pro = MagicMock() - - pro.stock_st.return_value = pd.DataFrame() - - out = fetch_st_table(pro, trade_date="20240102") - - assert list(out.columns) == ["ts_code", "trade_date", "is_st"] - - assert out.empty - - - - - -def test_apply_is_st_no_st_records(): - - df = pd.DataFrame({"ts_code": ["A.SZ"], "trade_date": ["20240102"]}) - - out = apply_is_st(df, pd.DataFrame(columns=["ts_code", "trade_date", "is_st"])) - - assert out["is_st"].tolist() == [0] - - assert out["not_st"].tolist() == [1] - - diff --git a/tests/test_dsl/__init__.py b/tests/test_dsl/__init__.py deleted file mode 100644 index 8b137891..00000000 --- a/tests/test_dsl/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/tests/test_dsl/test_eval.py b/tests/test_dsl/test_eval.py deleted file mode 100644 index 21aedeae..00000000 --- a/tests/test_dsl/test_eval.py +++ /dev/null @@ -1,21 +0,0 @@ -"""DSL 求值测试。""" - -import numpy as np - -from seekalpha.dsl import eval_factor - - -def test_ts_mean(mini_panel): - out = eval_factor("TS_MEAN($adj_close, 3)", mini_panel) - assert len(out) == len(mini_panel) - assert out.notna().any() - - -def test_ret_expr(mini_panel): - out = eval_factor("$ret", mini_panel) - assert out.notna().sum() > 0 - - -def test_cs_zscore(mini_panel): - out = eval_factor("CS_ZSCORE($close)", mini_panel) - assert out.notna().any() diff --git a/tests/test_factor/__init__.py b/tests/test_factor/__init__.py deleted file mode 100644 index ea6273f6..00000000 --- a/tests/test_factor/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""factor 层测试。""" diff --git a/tests/test_factor/conftest.py b/tests/test_factor/conftest.py deleted file mode 100644 index 76cc43f3..00000000 --- a/tests/test_factor/conftest.py +++ /dev/null @@ -1,19 +0,0 @@ -"""factor 测试 fixtures。""" - -from __future__ import annotations - -import pytest - -from seekalpha.factor import FactorZoo, init_library - - -@pytest.fixture -def mini_factorlib(tmp_path, mini_panel): - lib_root = tmp_path / "factorzoo" - init_library( - lib_root, - panel=mini_panel, - n_sample_rows=min(10, len(mini_panel)), - max_factors=8, - ) - return FactorZoo.open(lib_root, verify_hash=True) diff --git a/tests/test_factor/rolling_probe_incremental_realign.py b/tests/test_factor/rolling_probe_incremental_realign.py deleted file mode 100644 index 52af876e..00000000 --- a/tests/test_factor/rolling_probe_incremental_realign.py +++ /dev/null @@ -1,145 +0,0 @@ -#!/usr/bin/env python3 -"""滚动 probe:模拟多个 append 窗口下的增量 realign overlap 校验(只读,不写盘)。 - -panel 与因子库已对齐(manifest.n_rows == len(panel))时,用历史 memmap 前缀 -假装「刚 append 了 K 个交易日」,测试增量路径能否通过 overlap。 - -非 pytest 用例;在仓库根目录运行: - - uv run python tests/test_factor/rolling_probe_incremental_realign.py - uv run python tests/test_factor/rolling_probe_incremental_realign.py --windows 1,3,5,10,20 - uv run python tests/test_factor/rolling_probe_incremental_realign.py --factor-id idio_qspread_win_20 - uv run python tests/test_factor/rolling_probe_incremental_realign.py --json -""" - -from __future__ import annotations - -import argparse -import json -import sys -import time -from pathlib import Path - -ROOT = Path(__file__).resolve().parents[2] -sys.path.insert(0, str(ROOT)) - -from seekalpha.core.paths import FACTORZOO_DIR # noqa: E402 -from seekalpha.data.panel import load_panel # noqa: E402 -from seekalpha.factor.types import DEFAULT_INGEST_POLICY, IngestPolicy # noqa: E402 -from seekalpha.factor.zoo.realign import ( # noqa: E402 - DEFAULT_OVERLAP_VERIFY_DAYS, - DEFAULT_WARMUP_DAYS, - DEFAULT_WARMUP_RETRY_DAYS, - rolling_probe_incremental_realign, -) - - -def _parse_int_list(raw: str) -> list[int]: - return [int(x.strip()) for x in raw.split(",") if x.strip()] - - -def _print_summary(report: dict) -> None: - print("=" * 72) - print(f"lib: {report['lib']}") - print( - f"n_rows={report['n_rows']} n_factors={report['n_factors']} " - f"overlap_verify_days={report['overlap_verify_days']} " - f"warmup={report['warmup_days']}/{report['warmup_retry_days']}" - ) - print("-" * 72) - print( - f"{'K_days':>6} {'append_rows':>11} {'update_start':>12} " - f"{'inc':>4} {'fallback':>8} {'status':>12}" - ) - for w in report["windows"]: - n_inc = len(w.get("incremental_factors") or []) - n_fb = len(w.get("fallback_factors") or []) - n_f = w.get("n_factors") or 0 - if n_f == 0: - status = "empty" - elif n_fb == 0: - status = "ALL_OK" - elif n_inc == 0: - status = "ALL_FAIL" - else: - status = "PARTIAL" - print( - f"{w.get('append_trade_days', '?'):>6} " - f"{w.get('append_rows', '?'):>11} " - f"{w.get('update_start', '?'):>12} " - f"{n_inc:>4} {n_fb:>8} {status:>12}" - ) - - print("-" * 72) - print("因子明细(仅列出 fallback 窗口):") - for w in report["windows"]: - fb = w.get("fallback_factors") or [] - if not fb: - continue - print( - f" K={w.get('append_trade_days')} update={w.get('update_start')} " - f"fallback={fb}" - ) - for fid in fb[:3]: - fr = (w.get("factor_reports") or {}).get(fid, {}) - samples = fr.get("samples") or [] - if samples: - s0 = samples[0] - print( - f" ~{fid}: n_mismatch={fr.get('n_mismatch')} " - f"sample={s0.get('datetime')} {s0.get('instrument')} " - f"stored={s0.get('stored')} computed={s0.get('computed')}" - ) - print("=" * 72) - - -def main() -> None: - parser = argparse.ArgumentParser(description="滚动 probe 增量 realign(只读)") - parser.add_argument("--lib", type=Path, default=FACTORZOO_DIR) - parser.add_argument("--panel", type=Path, default=None) - parser.add_argument( - "--windows", - type=str, - default="1,2,3,5,10,20", - help="模拟尾部追加的交易日个数,逗号分隔", - ) - parser.add_argument("--warmup-days", type=int, default=DEFAULT_WARMUP_DAYS) - parser.add_argument("--warmup-retry-days", type=int, default=DEFAULT_WARMUP_RETRY_DAYS) - parser.add_argument("--overlap-verify-days", type=int, default=DEFAULT_OVERLAP_VERIFY_DAYS) - parser.add_argument("--train-start", type=str, default=DEFAULT_INGEST_POLICY.train_start) - parser.add_argument("--factor-id", type=str, default=None, help="仅测指定因子") - parser.add_argument("--json", action="store_true", help="输出完整 JSON") - args = parser.parse_args() - - from seekalpha.factor import FactorZoo - - zoo = FactorZoo.open(args.lib, verify_hash=False) - panel_path = args.panel or Path(zoo.manifest.panel_path) - panel = load_panel(panel_path).sort_index() - - policy = IngestPolicy(train_start=args.train_start) - factor_ids = [args.factor_id] if args.factor_id else None - - t0 = time.perf_counter() - report = rolling_probe_incremental_realign( - args.lib, - panel=panel, - append_trade_days=_parse_int_list(args.windows), - policy=policy, - warmup_days=args.warmup_days, - warmup_retry_days=args.warmup_retry_days, - overlap_verify_days=args.overlap_verify_days, - factor_ids=factor_ids, - ) - report["elapsed_sec"] = round(time.perf_counter() - t0, 1) - report["panel_path"] = str(panel_path) - - if args.json: - print(json.dumps(report, ensure_ascii=False, indent=2, default=str)) - else: - _print_summary(report) - print(f"elapsed={report['elapsed_sec']}s(只读 probe,未修改 factorzoo)") - - -if __name__ == "__main__": - main() diff --git a/tests/test_factor/test_eval_metrics.py b/tests/test_factor/test_eval_metrics.py deleted file mode 100644 index 837b2af0..00000000 --- a/tests/test_factor/test_eval_metrics.py +++ /dev/null @@ -1,10 +0,0 @@ -"""因子评估 metrics 测试。""" - -from seekalpha.factor import evaluate_factor - - -def test_evaluate_factor_on_mini_panel(mini_panel): - metrics = evaluate_factor("$ret", mini_panel) - assert "ic" in metrics - assert "coverage" in metrics - assert metrics["n_days"] >= 0 diff --git a/tests/test_factor/test_incremental_realign.py b/tests/test_factor/test_incremental_realign.py deleted file mode 100644 index d59f9655..00000000 --- a/tests/test_factor/test_incremental_realign.py +++ /dev/null @@ -1,196 +0,0 @@ -"""增量 realign 测试。""" - -from __future__ import annotations - -from pathlib import Path - -import numpy as np -import pytest - -from seekalpha.data.panel import slice_panel -from seekalpha.factor import FactorZoo, ingest_factor, init_library -from seekalpha.factor.zoo.index import verify_index_prefix_stable -from seekalpha.factor.zoo.realign import ( - incremental_realign_factorlib_to_panel, - list_append_boundary_old_n, - overlap_row_ids, - verify_overlap_exact, -) - - -def _init_lib(panel, tmp_path: Path) -> Path: - lib_root = tmp_path / "factorzoo" - init_library( - lib_root, - panel=panel, - panel_path=tmp_path / "panel.parquet", - n_sample_rows=min(10, len(panel)), - max_factors=8, - ) - return lib_root - - -def test_verify_index_prefix_stable_append(mini_panel, tmp_path): - old = slice_panel(mini_panel, start="2024-01-02", end="2024-01-04") - lib_root = _init_lib(old, tmp_path) - zoo = FactorZoo.open(lib_root) - from seekalpha.factor.zoo.realign import _candidate_rows_from_panel - - new_rows = _candidate_rows_from_panel(mini_panel) - assert verify_index_prefix_stable(zoo.index.rows, new_rows, len(old)) - - -def test_incremental_realign_matches_full_tail(mini_panel, tmp_path): - old = slice_panel(mini_panel, start="2024-01-02", end="2024-01-04") - lib_root = _init_lib(old, tmp_path) - zoo = FactorZoo.open(lib_root) - ingest_factor( - zoo, - factor_id="ret_factor", - name="ret", - expr="$ret", - panel=old.sort_index(), - ) - - info = incremental_realign_factorlib_to_panel( - lib_root, - panel=mini_panel.sort_index(), - panel_path=tmp_path / "panel.parquet", - warmup_days=240, - warmup_retry_days=480, - ) - assert info["mode"] == "incremental" - assert info["incremental_factors"] == ["ret_factor"] - assert info["fallback_factors"] == [] - - zoo_inc = FactorZoo.open(lib_root) - inc_vals = zoo_inc.read_factor("ret_factor") - - lib_full = tmp_path / "factorzoo_full" - init_library( - lib_full, - panel=mini_panel, - panel_path=tmp_path / "panel.parquet", - n_sample_rows=min(10, len(mini_panel)), - max_factors=8, - ) - zoo_full = FactorZoo.open(lib_full) - ingest_factor( - zoo_full, - factor_id="ret_factor", - name="ret", - expr="$ret", - panel=mini_panel.sort_index(), - ) - full_vals = zoo_full.read_factor("ret_factor") - - old_n = len(old) - assert np.allclose(inc_vals[:old_n], zoo.read_factor("ret_factor"), equal_nan=True) - assert np.allclose(inc_vals[old_n:], full_vals[old_n:], equal_nan=True) - - -def test_incremental_fallback_on_prefix_mismatch(mini_panel, tmp_path): - old = slice_panel(mini_panel, start="2024-01-02", end="2024-01-04") - lib_root = _init_lib(old, tmp_path) - zoo = FactorZoo.open(lib_root) - ingest_factor( - zoo, - factor_id="ret_factor", - name="ret", - expr="$ret", - panel=old.sort_index(), - ) - - # 在旧日期段插入新 instrument,破坏 index 前缀 - import pandas as pd - - insert = mini_panel.iloc[[0]].copy() - insert.index = pd.MultiIndex.from_tuples( - [(pd.Timestamp("2024-01-02"), "NEW.XX")], - names=["datetime", "instrument"], - ) - tail = slice_panel(mini_panel, start="2024-01-05", end="2024-01-08") - broken = pd.concat([old, insert, tail]).sort_index() - - info = incremental_realign_factorlib_to_panel( - lib_root, - panel=broken, - panel_path=tmp_path / "panel.parquet", - ) - assert info["mode"] == "full" - assert info.get("fallback_reason") == "index_prefix_unstable" - - -def test_list_append_boundary_old_n(mini_panel): - from seekalpha.factor.zoo.index import _panel_to_index_frame, build_row_index - from seekalpha.factor.zoo.realign import list_append_boundary_old_n - - rows = build_row_index(_panel_to_index_frame(mini_panel)) - points = list_append_boundary_old_n(rows, append_trade_days=[1, 2]) - assert points - assert all(0 < p["old_n"] < p["new_n"] for p in points) - - -def test_probe_incremental_at_old_n(mini_panel, tmp_path): - old = slice_panel(mini_panel, start="2024-01-02", end="2024-01-04") - lib_root = _init_lib(mini_panel, tmp_path) - zoo = FactorZoo.open(lib_root) - ingest_factor( - zoo, - factor_id="ret_factor", - name="ret", - expr="$ret", - panel=mini_panel.sort_index(), - ) - pt = list_append_boundary_old_n(zoo.index.rows, append_trade_days=[2])[0] - from seekalpha.factor.zoo.realign import probe_incremental_realign_at_old_n - - result = probe_incremental_realign_at_old_n( - zoo, - mini_panel.sort_index(), - old_n=int(pt["old_n"]), - ) - assert result["incremental_factors"] == ["ret_factor"] - assert result["fallback_factors"] == [] - - -def test_overlap_row_ids_last_k_days(mini_panel): - import pandas as pd - - from seekalpha.factor.zoo.index import _panel_to_index_frame, build_row_index - - rows = build_row_index(_panel_to_index_frame(mini_panel)) - old_n = len(slice_panel(mini_panel, start="2024-01-02", end="2024-01-04")) - update_start = pd.Timestamp(rows.iloc[old_n]["datetime"]) - - ids, verify_start = overlap_row_ids( - rows, - old_n=old_n, - update_start=update_start, - overlap_verify_days=2, - ) - dt = pd.to_datetime(rows.loc[rows["row_id"].isin(ids), "datetime"]) - unique_days = sorted(dt.unique()) - assert len(unique_days) <= 2 - assert all(pd.Timestamp(d) < update_start for d in unique_days) - assert pd.Timestamp(unique_days[0]) >= verify_start - - -def test_verify_overlap_exact_nan_and_finite(): - import pandas as pd - - stored = np.array([1.0, np.nan, 2.0], dtype=np.float32) - computed = np.array([1.0, np.nan, 3.0], dtype=np.float32) - rows = __import__("pandas").DataFrame( - { - "row_id": [0, 1, 2], - "datetime": ["2024-01-01", "2024-01-02", "2024-01-03"], - "instrument": ["A", "A", "A"], - } - ) - ok, rep = verify_overlap_exact(stored, computed, np.array([0, 1, 2]), index_rows=rows) - assert not ok - assert rep["n_mismatch"] == 1 - - ok2, _ = verify_overlap_exact(stored, computed, np.array([0, 1]), index_rows=rows) - assert ok2 diff --git a/tests/test_factor/test_ingest_smoke.py b/tests/test_factor/test_ingest_smoke.py deleted file mode 100644 index a92315c9..00000000 --- a/tests/test_factor/test_ingest_smoke.py +++ /dev/null @@ -1,29 +0,0 @@ -"""因子入库 smoke 测试。""" - -from pathlib import Path - -from seekalpha.factor import ingest_factor, load_registry - -ROOT = Path(__file__).resolve().parents[2] -REGISTRY = ROOT / "configs" / "factors" / "registry.example.json" - - -def test_registry_loads(mini_factorlib): - reg = load_registry(REGISTRY, repo_root=ROOT) - assert "ret_factor" in reg - assert reg["ret_factor"]["expression"] == "$ret" - - -def test_ingest_ret_factor(mini_factorlib, mini_panel): - zoo = mini_factorlib - reg = load_registry(REGISTRY, repo_root=ROOT) - spec = reg["ret_factor"] - result = ingest_factor( - zoo, - factor_id="ret_factor", - name=str(spec["name"]), - expr=str(spec["expression"]), - panel=mini_panel.sort_index(), - ) - assert result.stored, result.skipped_reason - assert zoo.catalog.get("ret_factor") is not None diff --git a/tests/test_factor/test_mining.py b/tests/test_factor/test_mining.py deleted file mode 100644 index bcce1de7..00000000 --- a/tests/test_factor/test_mining.py +++ /dev/null @@ -1,125 +0,0 @@ -"""mining 包最小单测:算子清单、prompt、种子因子。""" - -from __future__ import annotations - -from pathlib import Path - -from seekalpha.dsl.catalog import list_operator_names, operator_catalog_markdown -from seekalpha.factor.mining.console import _metrics_parts -from seekalpha.factor.mining.prompts import build_system_prompt -from seekalpha.factor.mining.seed_factors import build_user_message_with_seed_factors - - -def test_build_user_message_with_seed_factors(tmp_path: Path) -> None: - dsl = tmp_path / "my_factor.dsl" - dsl.write_text("raw = TS_MEAN($adj_close, 20)\nSUBTRACT($adj_close, raw)", encoding="utf-8") - msg = build_user_message_with_seed_factors( - "继续优化 IC", - [dsl], - repo_root=tmp_path, - ) - assert "初始种子因子" in msg - assert "my_factor" in msg - assert "TS_MEAN($adj_close, 20)" in msg - assert "继续优化 IC" in msg - assert "eval_on_train_set" in msg - - -def test_operator_catalog_non_empty() -> None: - names = list_operator_names() - assert names and all(n.isupper() or "_" in n for n in names) - md = operator_catalog_markdown() - assert "`TS_MEAN" in md - - -def test_system_prompt_sections() -> None: - prompt = build_system_prompt() - assert "因子构建接口" in prompt - assert "eval_on_train_set" in prompt - assert "submit_factor" in prompt - assert "会话完成条件" in prompt - assert "stored=true" in prompt - assert "cs_pearson_autocorr" in prompt - assert "mls_fmb" in prompt - assert "mean_rho" in prompt - assert "TS_MEAN" in prompt - assert "@1w" in prompt - assert "$funda_roe" in prompt - assert "funda_days_since_disclose" in prompt - assert "tool_calls" in prompt - bare = build_system_prompt(include_operator_catalog=False) - assert "未注入算子清单" in bare - no_submit = build_system_prompt(enable_submit=False) - assert "未启用" in no_submit and "submit_factor" in no_submit - - -def test_metrics_parts_includes_mls_fmb() -> None: - parts = _metrics_parts( - { - "ic": 0.0273, - "icir": 0.5112, - "rank_ic": 0.0285, - "factor_coverage": 0.9071, - "cs_pearson_autocorr": 0.9581, - "n_days": 670, - "n_instruments": 2405, - "factor_skewness": -0.002, - "factor_kurtosis": 0.4632, - "decile_mean_label": [ - {"decile": 1, "mean_label": -0.0067}, - {"decile": 10, "mean_label": 0.0029}, - ], - "mls_fmb": { - "mean_rho": -0.0173, - "nw_t_rho": -0.3873, - "nw_t_ls": 1.116, - "mls": -0.0265, - }, - }, - {"n_months": 33, "mean_monthly_ic": 0.0275, "share_months_ic_positive": 0.758}, - None, - ) - text = " ".join(parts) - assert "ρ=-0.0173" in text - assert "NWρ=-0.3873" in text - assert "NWls=1.116" in text - assert "MLS=-0.0265" in text - - -def test_system_prompt_includes_session_label() -> None: - prompt = build_system_prompt(label_col="label_10d_close_to_close", include_operator_catalog=False) - assert "本次会话 label 列:`label_10d_close_to_close`" in prompt - assert "10 日持有" in prompt - assert "长持有 label 提示" in prompt - assert "label 选用建议" in prompt - assert "label_1d_close_to_close" in prompt - assert "仅作**相对参考**" in prompt or "相对参考" in prompt - - -def test_mining_stream_observer_emits_agent_blocks() -> None: - from seekalpha.factor.mining.cli_stream import MiningStreamObserver - - events: list[tuple[str, dict]] = [] - - def _capture(event: str, payload: dict) -> None: - events.append((event, payload)) - - obs = MiningStreamObserver(emit=_capture, turn=2) - obs.on_thinking_start() - obs.on_thinking_delta("分析 IC ") - obs.on_thinking_delta("趋势") - obs.on_thinking_end() - obs.on_text_delta("继续迭代") - obs.on_text_end() - obs.on_tool_call_start("tc1", "eval_on_train_set") - obs.on_tool_call_delta("tc1", '{"factor_name": "x"}') - obs.on_tool_call_ready("tc1") - obs.on_tool_result_delta("tc1", '{"ok": true}') - obs.on_tool_result_end("tc1") - - assert events[0] == ("agent_thinking", {"turn": 2, "content": "分析 IC 趋势"}) - assert events[1] == ("assistant_message", {"turn": 2, "content": "继续迭代"}) - assert events[2][0] == "assistant_tool_call" - assert events[2][1]["name"] == "eval_on_train_set" - assert events[3][0] == "tool_results" - assert events[3][1]["results"][0]["result"]["ok"] is True diff --git a/tests/test_factor/test_mining_parallel_eval.py b/tests/test_factor/test_mining_parallel_eval.py deleted file mode 100644 index 1ddaba06..00000000 --- a/tests/test_factor/test_mining_parallel_eval.py +++ /dev/null @@ -1,43 +0,0 @@ -"""env_settings 与 StockEvalService 并行评估配置。""" - -from __future__ import annotations - -import pytest - -from seekalpha.factor.mining.env_settings import ( - DEFAULT_MAX_PARALLEL_EVAL, - ENV_MAX_PARALLEL_EVAL, - parse_max_parallel_eval, - resolve_max_parallel_eval, -) -from seekalpha.factor.mining.service import StockEvalService - - -def test_parse_max_parallel_eval_defaults(): - assert parse_max_parallel_eval("") == DEFAULT_MAX_PARALLEL_EVAL - assert parse_max_parallel_eval(None) == DEFAULT_MAX_PARALLEL_EVAL - assert parse_max_parallel_eval("4") == 4 - - -def test_parse_max_parallel_eval_invalid(): - with pytest.raises(ValueError, match=ENV_MAX_PARALLEL_EVAL): - parse_max_parallel_eval("0") - - -def test_resolve_max_parallel_eval_override(): - assert resolve_max_parallel_eval(3) == 3 - - -def test_resolve_max_parallel_eval_env(monkeypatch): - monkeypatch.setenv(ENV_MAX_PARALLEL_EVAL, "5") - assert resolve_max_parallel_eval() == 5 - assert resolve_max_parallel_eval(2) == 2 - - -def test_stock_eval_service_max_parallel_eval(monkeypatch): - monkeypatch.delenv(ENV_MAX_PARALLEL_EVAL, raising=False) - svc = StockEvalService(max_parallel_eval=2) - assert svc.max_parallel_eval == 2 - - svc_default = StockEvalService() - assert svc_default.max_parallel_eval == DEFAULT_MAX_PARALLEL_EVAL diff --git a/tests/test_factor/test_mls_thresholds.py b/tests/test_factor/test_mls_thresholds.py deleted file mode 100644 index a14b43e0..00000000 --- a/tests/test_factor/test_mls_thresholds.py +++ /dev/null @@ -1,20 +0,0 @@ -"""MLS-FMB prompt 门槛加载测试。""" - -from __future__ import annotations - -from seekalpha.factor.mining.mls_thresholds import mls_fmb_prompt_thresholds, mls_fmb_thresholds_markdown -from seekalpha.factor.mining.prompts import build_system_prompt - - -def test_mls_fmb_prompt_thresholds_loaded() -> None: - th = mls_fmb_prompt_thresholds() - assert th["train"]["mean_rho"] > 0 - assert th["val"]["nw_t_rho"] >= th["train"]["nw_t_rho"] * 0.8 - - -def test_system_prompt_includes_mls_fmb_thresholds() -> None: - prompt = build_system_prompt(include_operator_catalog=False) - assert "mls_fmb" in prompt - assert "mean_rho" in prompt - assert "nw_t_rho" in prompt - assert "25% 分位" in prompt or "25% 分位" in mls_fmb_thresholds_markdown() diff --git a/tests/test_factor/test_report.py b/tests/test_factor/test_report.py deleted file mode 100644 index ddfc413e..00000000 --- a/tests/test_factor/test_report.py +++ /dev/null @@ -1,29 +0,0 @@ -"""报告格式化测试。""" - -from seekalpha.factor.report import format_factor_report_text - - -def test_format_factor_report_text(): - text = format_factor_report_text( - { - "eval_start": "2024-06-01", - "eval_end": "2026-05-31", - "label_col": "label_1d_close_to_close", - "n_days": 388, - "ic": -0.003, - "icir": -0.0437, - "rank_ic": -0.0081, - "coverage": 0.9915, - "cs_pearson_autocorr": 0.931, - "mls_fmb": {"mean_rho": -0.0135, "mls": 0.012, "n_days_rho": 388}, - "decile_mean_label": [ - {"decile": 1, "mean_label": -0.001}, - {"decile": 10, "mean_label": 0.002}, - ], - } - ) - assert "因子评估报告" in text - assert "IC" in text - assert "MLS / FMB" in text - assert "D 1" in text - assert "D10" in text diff --git a/tests/test_factor/test_zoo_init.py b/tests/test_factor/test_zoo_init.py deleted file mode 100644 index d1c7f3c3..00000000 --- a/tests/test_factor/test_zoo_init.py +++ /dev/null @@ -1,18 +0,0 @@ -"""factorzoo 初始化测试。""" - -from seekalpha.factor import init_library -from seekalpha.factor.zoo import FactorZoo - - -def test_init_library(mini_panel, tmp_path): - lib = tmp_path / "lib" - _, manifest, index = init_library( - lib, - panel=mini_panel, - n_sample_rows=10, - max_factors=4, - ) - assert manifest.n_rows == len(mini_panel) - assert len(index.rows) == len(mini_panel) - zoo = FactorZoo.open(lib) - assert zoo.n_factors == 0 diff --git a/uv.lock b/uv.lock deleted file mode 100644 index b4352851..00000000 --- a/uv.lock +++ /dev/null @@ -1,2948 +0,0 @@ -version = 1 -revision = 1 -requires-python = ">=3.11" -resolution-markers = [ - "python_full_version >= '3.14' and sys_platform == 'win32'", - "python_full_version >= '3.14' and sys_platform == 'emscripten'", - "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'", - "python_full_version == '3.13.*' and sys_platform == 'win32'", - "python_full_version == '3.12.*' and sys_platform == 'win32'", - "python_full_version < '3.12' and sys_platform == 'win32'", - "python_full_version == '3.13.*' and sys_platform == 'emscripten'", - "python_full_version == '3.12.*' and sys_platform == 'emscripten'", - "python_full_version < '3.12' and sys_platform == 'emscripten'", - "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'win32'", - "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'win32'", - "python_full_version < '3.12' and sys_platform != 'emscripten' and sys_platform != 'win32'", -] - -[[package]] -name = "agentscope" -version = "2.0.3" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "aiofiles" }, - { name = "aioitertools" }, - { name = "anthropic" }, - { name = "dashscope" }, - { name = "docstring-parser" }, - { name = "filetype" }, - { name = "httpx" }, - { name = "jinja2" }, - { name = "json-repair" }, - { name = "json5" }, - { name = "jsonschema" }, - { name = "mcp" }, - { name = "numpy" }, - { name = "openai" }, - { name = "opentelemetry-api" }, - { name = "opentelemetry-exporter-otlp" }, - { name = "opentelemetry-sdk" }, - { name = "opentelemetry-semantic-conventions" }, - { name = "python-datauri" }, - { name = "python-frontmatter" }, - { name = "python-socketio" }, - { name = "shortuuid" }, - { name = "tree-sitter" }, - { name = "tree-sitter-bash" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/97/12/4ed72815f9bb06fd869eaf3c22765421ec263364ca52efc506fcad5cc997/agentscope-2.0.3.tar.gz", hash = "sha256:6359380e2dd0995a025d824468c4ab1b0464f92077d38333b561ab635c2956ce", size = 521636 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/da/0b/9698adbd8b0887e8ce2162a356dc1be8f9f4d090be5255234ed8a2550edd/agentscope-2.0.3-py3-none-any.whl", hash = "sha256:070cc4cec54aa0fb0fc66afaaa41cde2c04b18af6c3755a075c4bc7853ab9766", size = 697856 }, -] - -[[package]] -name = "aiofiles" -version = "25.1.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/41/c3/534eac40372d8ee36ef40df62ec129bee4fdb5ad9706e58a29be53b2c970/aiofiles-25.1.0.tar.gz", hash = "sha256:a8d728f0a29de45dc521f18f07297428d56992a742f0cd2701ba86e44d23d5b2", size = 46354 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bc/8a/340a1555ae33d7354dbca4faa54948d76d89a27ceef032c8c3bc661d003e/aiofiles-25.1.0-py3-none-any.whl", hash = "sha256:abe311e527c862958650f9438e859c1fa7568a141b22abcd015e120e86a85695", size = 14668 }, -] - -[[package]] -name = "aiohappyeyeballs" -version = "2.6.2" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/33/c6/61a2d7b7572279226bb2e7f61d7a19ca7c90da0329c93fa0d560cbf288d8/aiohappyeyeballs-2.6.2.tar.gz", hash = "sha256:e202810ee718bd01fc6ef49e8ea53d023d5cb6b581076d7925aa499fa55dbe64", size = 22591 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5f/fc/a7bf5b6e4e617b45f90f2d9d2a68519c249c81dd4fc2658c7a2a61c4f4b7/aiohappyeyeballs-2.6.2-py3-none-any.whl", hash = "sha256:4708045e2d7a6c6bdf8aafa8ed39649eaf926a4543b54560659129e3365953c4", size = 15062 }, -] - -[[package]] -name = "aiohttp" -version = "3.14.1" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "aiohappyeyeballs" }, - { name = "aiosignal" }, - { name = "attrs" }, - { name = "frozenlist" }, - { name = "multidict" }, - { name = "propcache" }, - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, - { name = "yarl" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/82/78/8ea7308cac6934de8c74a14f3d5f65d1c89287426688be79538d0e5c013d/aiohttp-3.14.1.tar.gz", hash = "sha256:307f2cff90a764d329e77040603fa032db89c5c24fdad50c4c15334cba744035", size = 7955794 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/26/dd/bf526e6f0a1120dd6f2df2e97bacfe4d358f13d17a0ff5847301a1375a51/aiohttp-3.14.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:aa00140699487bd435fde4342d85c94cb256b7cd3a5b9c3396c67f19922afda2", size = 765225 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8f/e1/a2872aa55495a70f61310d411541c6ee23812d9a884e000c716e1bc3edbf/aiohttp-3.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1c1af67559445498b502030c35c59db59966f47041ca9de5b4e707f86bd10b5f", size = 518743 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5b/e7/c60c7b209e509cc787de3cea0550a518538cfc08003e1c1e14c1c63fff71/aiohttp-3.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d44ec478e713ee7f29b439f7eb8dc2b9d4079e11ae114d2c2ac3d5daf30516c8", size = 514139 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5b/8d/614ace2f579702c9840ab1e1447fd8509e35b0b904f7196418fa2f57b25d/aiohttp-3.14.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d3b1a184a9a8f548a6b73f1e26b96b052193e4b3175ed7342aaf1151a1f00a04", size = 1784088 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/49/e0/726e90f99542bf292f81a96a12cc4847deb86f3ccf62c6f4014a201f4d33/aiohttp-3.14.1-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:5f2504bc0322437c9a1ff6d3333ca56c7477b727c995f036b976ae17b98372c8", size = 1737835 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0b/4b/d176d5c4db9d33dacf0543102ea59503bc1d528af4cfd0b719949ca49389/aiohttp-3.14.1-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:73f05ea02013e02512c3bf42714f1208c57168c779cc6fe23516e4543089d0a6", size = 1842801 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/dc/d6/5a99b563690ea0cbed912ae94a2ce33993a5709a651a3a4fe761e7dd973a/aiohttp-3.14.1-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:797457503c2d426bee06eef808d07b31ede30b65e054444e7de64cad0061b7af", size = 1929992 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/76/7f/a987b14a3859094b3cea3f4825219c3e5536242564af6e3f9c2f6c994eb2/aiohttp-3.14.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b821a1f7dedf7e37450654e620038ac3b2e81e8fa6ea269337e97101978ec730", size = 1786989 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f1/1a/420e5c85a3e73349372ed22ce0b6af86bfa6ce16a4b20a64a2e94608c781/aiohttp-3.14.1-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4cd96b5ba05d67ed0cf00b5b405c8cd99586d8e3481e8ee0a831057591af7621", size = 1640129 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a7/80/18a592ed3be0a402cc03670bd72ee1f8563ddbe1d8d5542dbf868f274136/aiohttp-3.14.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d459b98a932296c6f0e94f87511a0b1b90a8a02c30a50e60a297619cd5a58ee", size = 1756576 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ec/0b/8b3d5713373858ff71a617daf6e3b0e81ad63e79d09a3cf2f6b6b983939c/aiohttp-3.14.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:764457a7be60825fb770a644852ff717bcbb5042f189f2bd16df61a81b3f6573", size = 1754668 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9f/49/fd564575cf225821d7ba5a117cb8bc27213d8a7e1811162afb43ae077039/aiohttp-3.14.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f7a16ef45b081454ef844502d87a848876c490c4cb5c650c230f6ec79ed2c1e7", size = 1817019 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ed/1b/e850c9ae6fc91356552ae668bb6c51e93fa29c8aef13398a10b56678557f/aiohttp-3.14.1-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:2fbc3ed048b3475b9f0cbcb9978e9d2d3511acd91ead203af26ed9f0056004cf", size = 1631638 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/eb/94/3c337ba72451a89806ace6f75bddc92bafc5b8d53d90115a512858024b63/aiohttp-3.14.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:bedb0cd073cc2dc035e30aeb99444389d3cd2113afe4ef9fcd23d439f5bade85", size = 1835660 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2b/9c/9c18cf367a0498212d9ba7daf990b504a5e8ae064cda4b504e2647c89c03/aiohttp-3.14.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b6feea921016eb3d4e04d65fc4e9ca402d1a3801f562aef94989f54694917af3", size = 1775698 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b5/63/a251a9d2a6cb45065b2ddc0bde2b3dd10108740a9a42f632c66405a761a2/aiohttp-3.14.1-cp311-cp311-win32.whl", hash = "sha256:313701e488100074ce99850404ee36e741abf6330179fec908a1944ecf570126", size = 458386 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/17/ca/69274c51dcd6e8947d77b2806cf47a4a15f2c846e2cbeb1882547d3da283/aiohttp-3.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:03ab4530fdcb3a543a122ba4b65ac9919da9fe9f78a03d328a6e38ff962f7aa5", size = 483406 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2c/8a/c25904f77690c3688ec140f87591ef11a0cfe36bf3d5c0f1f38056fb62b3/aiohttp-3.14.1-cp311-cp311-win_arm64.whl", hash = "sha256:486f7d16ed54c39c2cbd7ca71fd8ba2b8bb7860df65bd7b6ed640bab96a38a8b", size = 452987 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1d/21/151624b51cd92553d95424daf4bf19f19ce9be9002d19253e7e7ce67197b/aiohttp-3.14.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d35143e27778b4bb0fb189562d7f275bff79c62ab8e98459717c0ea617ff2480", size = 757402 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c2/82/280619e0bd7bf2454987e19282616e84762255dd9c8468f62382e8c191f1/aiohttp-3.14.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:bcfb80a2cc36fba2534e5e5b5264dc7ae6fcd9bf15256da3e53d2f499e6fa29d", size = 512310 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/55/b2/2aac325583aaa1353045f96dffa586d8a34e8322e14a7ba49cffeb103ab4/aiohttp-3.14.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:27fd7c91e51729b4f7e1577865fa6d34c9adccbc39aabe9000285b48af9f0ec2", size = 512448 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8a/72/a60607cb849faa8af8a356c9329ea2eb6f395d49e82cc82ccba1fd8deb8f/aiohttp-3.14.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:64c567bf9eaf664280116a8688f63016e6b32db2505908e2bdaca1b6438142f2", size = 1766854 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b5/d3/d9fe1c9ec7557ab4d0d82bebaa728c6418f0b93295ec2f4ab015f7710cc7/aiohttp-3.14.1-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:f5e6ff2bdbb8f4cd3fbe41f99e25bbcd58e3bf9f13d3dd31a11e7917251cc77a", size = 1740884 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c1/dc/f2cecfaf9337ba3e63f181500814ff502aa3d00d9c7ec93a9d23d10a27b2/aiohttp-3.14.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2f73e01dc37122325caf079982621262f96d74823c179038a82fddfc50359264", size = 1810034 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/66/d7/2ff65c5e65c0d7476daf7e15c032e0805e36811185b9623e3238ad6c763e/aiohttp-3.14.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:bb2c0c80d431c0d03f2c7dbf125150fedd4f0de17366a7ca33f7ccb822391842", size = 1904054 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/20/9c/d445818389df371f56d141d881153ba23183c4735a03f7356ffb43f7757d/aiohttp-3.14.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3e6fc1a85fa7194a1a7d19f44e8609180f4a8eb5fa4c7ed8b4355f080fad235c", size = 1790278 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4d/aa/bf04cb4d865fc6101c2229a294ad744973b72e513fdc5a6b791e6983d72a/aiohttp-3.14.1-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:686b6c0d3911ec387b444ddf5dc62fb7f7c0a7d5186a7861626496a5ab4aff95", size = 1591795 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/dc/b4/4dac0038960427ba832f6609dfb4ea5437d7fd80c72001b9e48f834f428b/aiohttp-3.14.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c6fa4dc7ad6f8109c70bb1499e589f76b0b792baf39f9b017eb92c8a81d0a199", size = 1728397 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2b/f9/7cd4e8ad7aa3b75f17d56bb5498dd604a93d4e6eece822ba0568c413fff0/aiohttp-3.14.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:87a5eea1b2a5e21e1ebdbb33ad4165359189327e63fc4e4894693e7f821ac817", size = 1766504 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f9/df/fc01d9fcad0f73fed3f3d361f1f94f975947b50dff82919f6dc2bf4316cc/aiohttp-3.14.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1c1421eb01d4fd608d88cc8290211d177a58532b55ad94076fb349c5bf467f0a", size = 1777806 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/41/09/47e2d090bddcc8fb4ccb4c314aadc32d7c5d9bb55f50f6ad1c92fc15d501/aiohttp-3.14.1-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:34b257ec41345c1e8f2df68fa908a7952f5de932723871eb633ecbbff396c9a4", size = 1580707 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3d/36/f1a4ce904ae0b6930cfe9afc96d0896f7ec1a620c400405d63783bb95a9c/aiohttp-3.14.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:de538791a80e5d862addbc183f70f0158ac9b9bb872bb147f1fd2a683691e087", size = 1798121 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/70/0a/e0075ce9ca0279ee1d4f0c0b85f54fea02ebc83c3007651a72bece658fec/aiohttp-3.14.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6f71173be42d3241d428f760122febb748de0623f44308a6f120d0dd9ec572e3", size = 1767580 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3e/61/a0c0a8f327a9c52095cdd8e312391b00d3ed64ab6c72bb5c33d8ec251cf7/aiohttp-3.14.1-cp312-cp312-win32.whl", hash = "sha256:ec8dc383ee57ea3e883477dcca3f11b65d58199f1080acaf4cd6ad9a99698be4", size = 452771 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/df/d9/ea367c75f16ac9c6cdc8febb25e8318fa21a2b1bc8d6514d4b2d890bface/aiohttp-3.14.1-cp312-cp312-win_amd64.whl", hash = "sha256:2aa92c87868cd13674989f9ee83e5f9f7ea4237589b728048e1f0c8f6caa3271", size = 479873 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/03/64/8d96784a7851156db8a4c6c3f6f91042fdf39fb15a4cc38c8b3c14833c45/aiohttp-3.14.1-cp312-cp312-win_arm64.whl", hash = "sha256:2c840c90759922cb5e6dda94596e079a30fb5a5ba548e7e0dc00574703940847", size = 448073 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bc/97/bd137012dd97e1649162b099135a80e1fd59aaa807b2430fc448d1029aff/aiohttp-3.14.1-cp313-cp313-android_21_arm64_v8a.whl", hash = "sha256:b3a03285a7f9c7b016324574a6d92a1c895da6b978cb8f1deee3ac72bc6da178", size = 506882 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ef/79/e5cc690e9d922a66887ceeaca53a8ffd5a7b0be3816142b7abc433742d89/aiohttp-3.14.1-cp313-cp313-android_21_x86_64.whl", hash = "sha256:2a73f487ab8ef5abbb24b7aa9b73e98eaba9e9e031804ff2416f02eca315ccaf", size = 515270 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fe/22/a73ccbf9dbd6e26dda0b24d5fd5db7da92ee3383a79f47677ffb834c5c5b/aiohttp-3.14.1-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:915fbb7b41b115192259f8c9ae58f3ddc444d2b5579917270211858e606a4afd", size = 485841 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3b/b9/57ed8eaf596321c2ad747bd480fb1700dbd7177c60dfc9e4c187f629662e/aiohttp-3.14.1-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:7fb4bdf95b0561a79f259f9d28fbc109728c5ee7f27aff6391f0ca703a329abe", size = 492088 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/78/c0/5ebe5270a7c140d7c6f79dcb018640225f14d406c149e4eec04a7d82fe71/aiohttp-3.14.1-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:1b9748363260121d2927704f5d4fc498150669ca3ae93625986ee89c8f80dcd4", size = 501564 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/75/7f/8cdaa24fc7983865e0915153b96a9ac5bcdd3548d64c5a27d17cecccad2d/aiohttp-3.14.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:86a6dab78b0e43e2897a3bbe15745aa60dc5423ca437b7b0b164c069bf91b876", size = 751998 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b2/f4/c4227aacfacc5cb0cc2d119b65301d177912a6842cd64e120c47af76064f/aiohttp-3.14.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4dfd6e47d3c44c2279907607f73a4240b88c69eb8b90da7e2441a8045dfd21da", size = 510918 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ab/01/a2d5f96cd4e74424864d30bc0a7e44d0a12dacdcfa91b5b2d1bd3dca6bf3/aiohttp-3.14.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:317acd9f8602858dc7d59679812c376c7f0b97bcbbf16e0d6237f54141d8a8a6", size = 508657 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e8/ed/3c0fb5c500fdd8e7ebc10d1889c04384fffa1a9163eac1356088ca9da1b1/aiohttp-3.14.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bd869c427324e5cb15195793de951295710db28be7d818247f3097b4ab5d4b96", size = 1757907 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0b/ab/d4c924d9bd5be3050c226612413ce68cb54c70d2c31b661bfc8d9a5b6a70/aiohttp-3.14.1-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:93b032b5ec3255473c143627d21a69ac74ae12f7f33974cb587c564d11b1066f", size = 1737565 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/19/2a/37326821ff779084020cdc33224d20b19f42f4183a500ff92022a739eda7/aiohttp-3.14.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f234b4deb12f3ad59127e037bc57c40c21e45b45282df7d3a55a0f409f595296", size = 1799018 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b3/4f/6e947ba73e4ce09070761c05ed3a8ceb7c21f5e46798671d8b2aac0e4626/aiohttp-3.14.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:9af6779bfb46abf124068327abcdf9ce95c9ef8287a3e8da76ccf2d0f16c28fa", size = 1894416 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9d/6e/dbf1d0625dc711fb2851f4f3c3055c39ed58bae92082d8c627dbe6013736/aiohttp-3.14.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:faccab372e66bc76d5731525e7f1143c922271725b9d38c9f97edcc66266b451", size = 1783881 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/44/c2/5e25098a67268ed369483ae7d1a58bd0a13d03aab860d2a0e4a6eb25b046/aiohttp-3.14.1-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f380468b09d2a81633ee863b0ec5648d364bd17bb8ecfb8c2f387f7ac1faf42c", size = 1587572 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2a/bd/cf9cee17e140f942a3de73e658a543aa8fbf35a5fc67a9d2538d52d77f0b/aiohttp-3.14.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:97e704dcd26271f5bda3fa07c3ce0fb76d6d3f8659f4baa1a24442cc9ba177ca", size = 1722137 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/89/6d/5684f8c59045c96f81a18cefbc1fbbd79d25b88f1c622f2a5c5c08fcb632/aiohttp-3.14.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:269b76ac5394092b95bc4a098f4fc6c191c083c3bd12775d1e30e663132f6a09", size = 1755953 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a8/40/35caf3170f8359760740a7d9aa0fff2e344bef98e1d1186f5a0f6dec17e6/aiohttp-3.14.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:5c0b3e614340c889d575451696374c9d17affd54cd607ca0babed8f8c37b9397", size = 1766479 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6d/a1/b0c61e7a137f0d81de49a82023a6df73c3c16d6fefb0f8e4a93d21639002/aiohttp-3.14.1-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:5663ee9257cfa1add7253a7da3035a02f31b6600ec48261585e1800a81533080", size = 1580077 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0b/41/194ea4623693009fcefebef7aef63c141754f153e9cd0d39d3b9e36c175c/aiohttp-3.14.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:603a2c834142172ffddc054067f5ec0ca65d57a0aa98a71bc81952573208e345", size = 1791688 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ba/45/4de841f005cfe1fd63e2a2fe011262c515e2a62aa6994b15947e7d717ac9/aiohttp-3.14.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:cb21957bb8aca671c1765e32f58164cf0c50e6bf41c0bbbd16da20732ecaf588", size = 1761094 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e4/ae/dbce10533d3896d544d5053939ed75b7dc31a1b0973d959b1b5ae21028d6/aiohttp-3.14.1-cp313-cp313-win32.whl", hash = "sha256:e509a55f681e6158c20f70f102f9cf61fb20fbc382272bc6d94b7343f2582780", size = 452662 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7b/d9/0bf1a19362c32f06229da5e7ddfcec91f93474d6307f7a2d3135e9c674dc/aiohttp-3.14.1-cp313-cp313-win_amd64.whl", hash = "sha256:1ac8531b638959718e18c2207fbfe297819875da46a740b29dfa29beba64355a", size = 479748 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/22/0a/62e7232dc9484fbec112ceb32efb6a624cc7994ec6e2b019286f17c4e8f2/aiohttp-3.14.1-cp313-cp313-win_arm64.whl", hash = "sha256:250d14af67f6b6a1a4a811049b1afa69d61d617fca6bf33149b3ab1a6dbcf7b8", size = 447723 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c4/a1/5fafa04e1ca91ddb47608699d60649c1c6db3cf41c99e78fc4056f9513db/aiohttp-3.14.1-cp314-cp314-android_24_arm64_v8a.whl", hash = "sha256:7c106c26852ca1c2047c6b80384f17100b4e439af276f21ef3d4e2f450ae7e15", size = 508531 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fa/2e/bfa02f699d87ffc86d5959270b28f1cb410add3ccaced8ed2e0b8a5238fc/aiohttp-3.14.1-cp314-cp314-android_24_x86_64.whl", hash = "sha256:20205f7f5ade7aaec9f4b500549bbc071b046453aed72f9c06dcab87896a83e8", size = 514718 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/85/a5/9594ad6289eebbc97d167c44213d557807f90e59115caad24de21ad2c3b1/aiohttp-3.14.1-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:62a759436b29e677181a9e76bab8b8f689a29cb9c535f45f7c48c9c830d3f8c3", size = 487918 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b4/61/16a32c36c3c49edec122a3dc811f2057df2f94d3b14aa107c8017d981618/aiohttp-3.14.1-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:2964cbf553df4d7a57348da44d961d871895fc1ee4e8c322b2a95612c7b17fba", size = 494014 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9b/89/3ebcf96ed99c05bec9c434aaac6963fd3cbab4a786ae739908a144d9ce44/aiohttp-3.14.1-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:237651caadc3a59badd39319c54642b5299e9cc98a3a194310e55d5bb9f5e397", size = 502398 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/3d/b74870a0c2d40c355928cd5b96c7a11fa821b8a40fc41365e64479b151fb/aiohttp-3.14.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:896e12dfdbbab9d8f7e16d2b28c6769a60126fa92095d1ebf9473d02593a2448", size = 758018 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d3/66/f42f5c984d99e49c6cff5f26f590750f2e2f7ef1fcfb99966ab5be1b632e/aiohttp-3.14.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:d03f281ed22579314ba00821ce20115a7c0ac430660b4cc05704a3f818b3e004", size = 512462 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e9/a7/248e1aebe0c7810b0271e021a0f2a5eb6e78a051885b3c9df49f42a5802d/aiohttp-3.14.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:07eabb979d236335fed927e137a928c9adfb7df3b9ec7aa31726f133a62be983", size = 512824 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/26/97/2aa0e5ba0727dc3bd5aaebb7ccbc510f7dfb7fb961ec87497cd496635ab1/aiohttp-3.14.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4fe1f1087cbadb280b5e1bb054a4f00d1423c74d6626c5e48400d871d34ecefe", size = 1749898 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/00/8d/e97f6c96c891d457c8479d92a514ba194d0412f981d72c70341ee18488ed/aiohttp-3.14.1-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:367a9314fdc79dab0fac96e216cb41dd73c85bdca85306ce8999118ba7e0f333", size = 1710114 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6f/e6/aa8d7e863048c8fceb5cd6ce74017311cec3ead07847387e12265fb4444e/aiohttp-3.14.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a24f677ebe83749039e7bdf862ff0bbb16818ae4193d4ef96505e269375bcce0", size = 1802541 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/83/a8/72193137de57fda4ebfae4563182d082c8856e3b6e9871d0b46f028fb369/aiohttp-3.14.1-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c83afe0ba876be7e943d2e0ba645809ad441575d2840c895c21ee5de93b9377a", size = 1875776 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a0/18/938441025db6769a3464596b2410af3afde0b21eb2f204c6f766f68af4bd/aiohttp-3.14.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:634e385930fb6d2d479cf3aa66515955863b77a5e3c2b5894ca259a25b308602", size = 1760329 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/60/29/bf2496b4065e76e09fe48015aaffe5ce161d8f089b06ac6982070f653076/aiohttp-3.14.1-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:eeea07c4397bbc57719c4eed8f9c284874d4f175f9b6d57f7a1546b976d455ca", size = 1587293 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/49/a2/2136674d52123b1354bd05dd5753c318db47dc0c927cc70b27bab3755456/aiohttp-3.14.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:335c0cc3e3545ce98dcb9cfcb836f40c3411f43fa03dab757597d80c89af8a35", size = 1714756 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a7/b9/e5fd2e6f915503081c0f9b1e8540947037929c70c191da2e4d54b31a21a1/aiohttp-3.14.1-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:ae6be797afdef264e8a84864a85b196ca06045586481b3df8a967322fd2fa844", size = 1721052 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/63/5a/2833e324a2263e104e31e2e91bc5bbee81bc499afd32203faee048a883f0/aiohttp-3.14.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:8560b4d712474335d08907db7973f71912d3a9a8f1dee992ec06b5d2fe359496", size = 1766888 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/57/fa/dea6511870913162f3b2e8c42a7614eb203a4540b8c2da43e0bfb0548f3c/aiohttp-3.14.1-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7edd08e0a5deb1e8564a2fcd8f4561014a3f05252334671bbf55ddd47db0e5", size = 1581679 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/14/bd/3cf0d55e71784b33534e9710a67d382d900598b4787fbce6cc7317f8c42a/aiohttp-3.14.1-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:b6ff7fcee63287ae57b5df3e4f5957ce032122802509246dec1a5bcc55904c95", size = 1782021 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c1/af/14bb5843eccbe234f4dfb78ab73e549d99727247e62ae5d62cbd22eaf5b0/aiohttp-3.14.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6ffbb2f4ec1ceaff7e07d43922954da26b223d188bf30658e561b98e23089444", size = 1742574 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f2/1e/fbeb7af9210a67ac0f9c9bec0f8f4568497924e33137a3d5b48e1cf85f3f/aiohttp-3.14.1-cp314-cp314-win32.whl", hash = "sha256:a9875b46d910cff3ea2f5962f9d266b465459fe634e22556ab9bd6fc1192eea0", size = 457773 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f0/2b/13e8d741a9ec5db7d900c060554cf8352ab85e44e2a4469ebb9d377bda17/aiohttp-3.14.1-cp314-cp314-win_amd64.whl", hash = "sha256:af8b4b81a960eeaf1234971ac3cd0ba5901f3cd42eae42a46b4d089a8b492719", size = 485001 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/df/30/491acfa2c4d6c3ff59c49a14fc1b50be3241e25bbb0c84c09e2da4d11395/aiohttp-3.14.1-cp314-cp314-win_arm64.whl", hash = "sha256:cf4491381b1b57425c315a56a439251b1bdac07b2275f19a8c44bc57744532ec", size = 453809 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/34/e3/19dbe1a1f4cc6230eb9e314de7fe68053b0992f9302b27d12141a0b5db53/aiohttp-3.14.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:819c054312f1af92947e6a55883d1b66feefab11531a7fc45e0fb9b63880b5c2", size = 793320 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7f/20/1b7182219ba1b108430d6e4dc53d25ae02dcfcf5a045b33af4e8c5167527/aiohttp-3.14.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:10ee9c1753a8f706345b22496c79fbddb5be0599e0823f3738b1534058e25340", size = 529077 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b9/c8/14ce60ec31a2e5f5274bb17d383a6f7a3aabca31ac04eee05585bbadab16/aiohttp-3.14.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1601cc37baf5750ccacae618ec2daf020769581695550e3b654a911f859c563d", size = 532476 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7e/02/9ac85e081e53da2e061b02fa7758fe0a12d17b8ce2d1f5e6c7cb76730328/aiohttp-3.14.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4d6e0ac9da31c9c04c84e1c0182ad8d6df35965a85cae29cd71d089621b3ae94", size = 1922347 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c0/3e/d3ba07a0ab38b5389e10bec4362d21e10a4f667cba2d79ba30837b3a5059/aiohttp-3.14.1-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:9e8f2d660c350b3d0e259c7a7e3d9b7fc8b41210cbcc3d4a7076ff0a5e5c2fdc", size = 1786465 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0b/cb/e2ee978a00cfb2df829704a69528b18154eba5939f45bc1efa8f33aee4c5/aiohttp-3.14.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4691802dda97be727f79d86818acaad7eb8e9252626a1d6b519fedbb92d5e251", size = 1909423 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/73/5d/1430334858b1022b58ae50399a918f0bd6fe8fa7fa183598d657ff61e040/aiohttp-3.14.1-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c389c482a7e9b9dc3ee2701ac46c4125297a3818875b9c305ddb603c04828fd1", size = 2001906 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/66/4e/560c7472d3d198a23aa5c8b19a5115bf6a9b77b7d3e4bb363da320430ad2/aiohttp-3.14.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fc0cacab7ba4e56f0f81c82a98c09bed2f39c940107b03a34b168bdf7597edd3", size = 1877095 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0d/f1/4745806578d447db4a784a8591e2dae3afdfc2bcb96f8f81271b13df6543/aiohttp-3.14.1-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:979ed4717f59b8bb12e3963378fa285d93d367e15bcd66c721311826d3c44a6c", size = 1676222 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6a/c9/48255813cca749a229ef0ab476004ec623728ad79a9c0840616f6c076325/aiohttp-3.14.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:38e1e7daaea81df51c952e18483f323d878499a1e2bfe564790e0f9701d6f203", size = 1842922 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3d/c0/bbd054e2bee909f529523a5af3891052606af5143c09f5f183ec3b234676/aiohttp-3.14.1-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:4132e72c608fe9fecb8f409113567605915b83e9bdd3ea56538d2f9cd35002f1", size = 1825035 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a8/ae/90395d4376deceb74e09ec26b6adf7d2015a6f8802d6d84446af860fef04/aiohttp-3.14.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:eefd9cc9b6d4a2db5f00a26bc3e4f9acf71926a6ec557cd56c9c6f27c290b665", size = 1849512 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/93/bd/fb25f3049957553d4ce0ba6ae480aa2f592a6985497fca590837d16c1be0/aiohttp-3.14.1-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:b165790117eea512d7f3fb22f1f6dad3d55a7189571993eb015591c1401276d1", size = 1668571 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3f/22/7f73303d64dd567ff3addca90b556690ed1233a47b8f55d242fb90af3681/aiohttp-3.14.1-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:ed09c7eb1c391271c2ed0314a51903e72a3acb653d5ccfc264cdf3ef11f8269d", size = 1881159 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/44/be/0474c5a8b5640e1e4aa1923430a91f4151be82e511373fe764189b89aef5/aiohttp-3.14.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:99abd37084b82f5830c635fddd0b4993b9742a66eb746dacf433c8590e8f9e3c", size = 1841409 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7b/3c/bb4a7cba26956cb3da4553cc2056cf67be5b5ff6e6d8fa4fbdff73bfb7ae/aiohttp-3.14.1-cp314-cp314t-win32.whl", hash = "sha256:47ddf841cdecc810749921d25606dee45857d12d2ad5ddb7b5bd7eab12e4b365", size = 494166 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8a/84/ec80c2c1f66a952555a9f86df6b33af65108a6febfa0471b69013a12f807/aiohttp-3.14.1-cp314-cp314t-win_amd64.whl", hash = "sha256:5e78b522b7a6e27e0b25d19b247b75039ac4c94f99823e3c9e53ae1603a9f7e9", size = 530255 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2a/71/6e22be134a4061ada85a92951b842f2657f17d926b727f3f94c56ae963d6/aiohttp-3.14.1-cp314-cp314t-win_arm64.whl", hash = "sha256:90d53f1609c29ccc2193945ef732428382a28f78d0456ae4d3daf0d48b74f0f6", size = 469640 }, -] - -[[package]] -name = "aioitertools" -version = "0.13.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/3c/53c4a17a05fb9ea2313ee1777ff53f5e001aefd5cc85aa2f4c2d982e1e38/aioitertools-0.13.0.tar.gz", hash = "sha256:620bd241acc0bbb9ec819f1ab215866871b4bbd1f73836a55f799200ee86950c", size = 19322 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/10/a1/510b0a7fadc6f43a6ce50152e69dbd86415240835868bb0bd9b5b88b1e06/aioitertools-0.13.0-py3-none-any.whl", hash = "sha256:0be0292b856f08dfac90e31f4739432f4cb6d7520ab9eb73e143f4f2fa5259be", size = 24182 }, -] - -[[package]] -name = "aiosignal" -version = "1.4.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "frozenlist" }, - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/61/62/06741b579156360248d1ec624842ad0edf697050bbaf7c3e46394e106ad1/aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7", size = 25007 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490 }, -] - -[[package]] -name = "annotated-doc" -version = "0.0.4" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303 }, -] - -[[package]] -name = "annotated-types" -version = "0.7.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643 }, -] - -[[package]] -name = "anthropic" -version = "0.112.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "anyio" }, - { name = "distro" }, - { name = "docstring-parser" }, - { name = "httpx" }, - { name = "jiter" }, - { name = "pydantic" }, - { name = "sniffio" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7b/dd/808c144d4a883fcfd12fe0d7689b1d86bbbea6666c1cc957ad19f1017c22/anthropic-0.112.0.tar.gz", hash = "sha256:e180cd91aa5b9b32e4007fe69892ab128d8a86b9f90825103b1903fbc977d0af", size = 937460 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a9/26/ea71185027956325be1903d4fcaf7461d5ef40ca8f0e64f992e24ea9db0e/anthropic-0.112.0-py3-none-any.whl", hash = "sha256:bcc6268612c716dbb77133dd60fc41d26016d1b81dee9a52314d210193638751", size = 931954 }, -] - -[[package]] -name = "anyio" -version = "4.14.1" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "idna" }, - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3b/72/5562aabb8dd7181e8e860622a38bea08d17842b99ecd4c91f84ac95251b0/anyio-4.14.1.tar.gz", hash = "sha256:8d648a3544c1a700e3ff78615cd679e4c5c3f149904287e73687b2596963629e", size = 254831 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b0/7b/90df4a0a816d98d6ea26f559d87836d494a2cf1fcf063be67df50a7bcc30/anyio-4.14.1-py3-none-any.whl", hash = "sha256:4e5533c5b8ff0a24f5d7a176cbe6877129cd183893f66b537f8f227d10527d72", size = 124875 }, -] - -[[package]] -name = "attrs" -version = "26.1.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9a/8e/82a0fe20a541c03148528be8cac2408564a6c9a0cc7e9171802bc1d26985/attrs-26.1.0.tar.gz", hash = "sha256:d03ceb89cb322a8fd706d4fb91940737b6642aa36998fe130a9bc96c985eff32", size = 952055 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/64/b4/17d4b0b2a2dc85a6df63d1157e028ed19f90d4cd97c36717afef2bc2f395/attrs-26.1.0-py3-none-any.whl", hash = "sha256:c647aa4a12dfbad9333ca4e71fe62ddc36f4e63b2d260a37a8b83d2f043ac309", size = 67548 }, -] - -[[package]] -name = "beautifulsoup4" -version = "4.15.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "soupsieve" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/43/65/318323f98dbee45d42dff61d8f047181bc6f2268a9068cfad035a46be5af/beautifulsoup4-4.15.0.tar.gz", hash = "sha256:288e3ca7d54b06f2ac191970bc275c1939cb46d450b255bf6718b04aa37ab4f7", size = 632571 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/88/c6/92fcd42f1ba33e1184263f25bfabf3d27c383410470f169e4b8163bf9c17/beautifulsoup4-4.15.0-py3-none-any.whl", hash = "sha256:d6f88de62e1d4e38ecb1077eb9724cd0eff29d2a08ca16a401e9b9e93f117cf9", size = 109924 }, -] - -[[package]] -name = "bidict" -version = "0.23.1" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9a/6e/026678aa5a830e07cd9498a05d3e7e650a4f56a42f267a53d22bcda1bdc9/bidict-0.23.1.tar.gz", hash = "sha256:03069d763bc387bbd20e7d49914e75fc4132a41937fa3405417e1a5a2d006d71", size = 29093 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/99/37/e8730c3587a65eb5645d4aba2d27aae48e8003614d6aaf15dda67f702f1f/bidict-0.23.1-py3-none-any.whl", hash = "sha256:5dae8d4d79b552a71cbabc7deb25dfe8ce710b17ff41711e13010ead2abfc3e5", size = 32764 }, -] - -[[package]] -name = "bs4" -version = "0.0.2" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "beautifulsoup4" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c9/aa/4acaf814ff901145da37332e05bb510452ebed97bc9602695059dd46ef39/bs4-0.0.2.tar.gz", hash = "sha256:a48685c58f50fe127722417bae83fe6badf500d54b55f7e39ffe43b798653925", size = 698 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/51/bb/bf7aab772a159614954d84aa832c129624ba6c32faa559dfb200a534e50b/bs4-0.0.2-py2.py3-none-any.whl", hash = "sha256:abf8742c0805ef7f662dce4b51cca104cffe52b835238afc169142ab9b3fbccc", size = 1189 }, -] - -[[package]] -name = "cached-property" -version = "2.0.1" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/76/4b/3d870836119dbe9a5e3c9a61af8cc1a8b69d75aea564572e385882d5aefb/cached_property-2.0.1.tar.gz", hash = "sha256:484d617105e3ee0e4f1f58725e72a8ef9e93deee462222dbd51cd91230897641", size = 10574 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/11/0e/7d8225aab3bc1a0f5811f8e1b557aa034ac04bdf641925b30d3caf586b28/cached_property-2.0.1-py3-none-any.whl", hash = "sha256:f617d70ab1100b7bcf6e42228f9ddcb78c676ffa167278d9f730d1c2fba69ccb", size = 7428 }, -] - -[[package]] -name = "certifi" -version = "2026.6.17" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c9/c7/424b75da314c1045981bd9777432fad05a9e0c69daa4ed7e308bbaffe405/certifi-2026.6.17.tar.gz", hash = "sha256:024c88eeec92ca068db80f02b8b07c9cef7b9fe261d1d535abfd5abd6f6af432", size = 134594 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ef/2f/c5464532e965badff2f4c4c1a3a83f5697f0d7c407ed0cda44aaa99bb451/certifi-2026.6.17-py3-none-any.whl", hash = "sha256:2227dcbaafe0d2f59279d1762ddddc37783ed4354594f194ffc31d20f41fc3db", size = 133289 }, -] - -[[package]] -name = "cffi" -version = "2.0.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "pycparser", marker = "implementation_name != 'PyPy'" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/12/4a/3dfd5f7850cbf0d06dc84ba9aa00db766b52ca38d8b86e3a38314d52498c/cffi-2.0.0-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:b4c854ef3adc177950a8dfc81a86f5115d2abd545751a304c5bcf2c2c7283cfe", size = 184344 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4f/8b/f0e4c441227ba756aafbe78f117485b25bb26b1c059d01f137fa6d14896b/cffi-2.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2de9a304e27f7596cd03d16f1b7c72219bd944e99cc52b84d0145aefb07cbd3c", size = 180560 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b1/b7/1200d354378ef52ec227395d95c2576330fd22a869f7a70e88e1447eb234/cffi-2.0.0-cp311-cp311-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:baf5215e0ab74c16e2dd324e8ec067ef59e41125d3eade2b863d294fd5035c92", size = 209613 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b8/56/6033f5e86e8cc9bb629f0077ba71679508bdf54a9a5e112a3c0b91870332/cffi-2.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:730cacb21e1bdff3ce90babf007d0a0917cc3e6492f336c2f0134101e0944f93", size = 216476 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/dc/7f/55fecd70f7ece178db2f26128ec41430d8720f2d12ca97bf8f0a628207d5/cffi-2.0.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:6824f87845e3396029f3820c206e459ccc91760e8fa24422f8b0c3d1731cbec5", size = 203374 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/84/ef/a7b77c8bdc0f77adc3b46888f1ad54be8f3b7821697a7b89126e829e676a/cffi-2.0.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:9de40a7b0323d889cf8d23d1ef214f565ab154443c42737dfe52ff82cf857664", size = 202597 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d7/91/500d892b2bf36529a75b77958edfcd5ad8e2ce4064ce2ecfeab2125d72d1/cffi-2.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8941aaadaf67246224cee8c3803777eed332a19d909b47e29c9842ef1e79ac26", size = 215574 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/44/64/58f6255b62b101093d5df22dcb752596066c7e89dd725e0afaed242a61be/cffi-2.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a05d0c237b3349096d3981b727493e22147f934b20f6f125a3eba8f994bec4a9", size = 218971 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ab/49/fa72cebe2fd8a55fbe14956f9970fe8eb1ac59e5df042f603ef7c8ba0adc/cffi-2.0.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:94698a9c5f91f9d138526b48fe26a199609544591f859c870d477351dc7b2414", size = 211972 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0b/28/dd0967a76aab36731b6ebfe64dec4e981aff7e0608f60c2d46b46982607d/cffi-2.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:5fed36fccc0612a53f1d4d9a816b50a36702c28a2aa880cb8a122b3466638743", size = 217078 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2b/c0/015b25184413d7ab0a410775fdb4a50fca20f5589b5dab1dbbfa3baad8ce/cffi-2.0.0-cp311-cp311-win32.whl", hash = "sha256:c649e3a33450ec82378822b3dad03cc228b8f5963c0c12fc3b1e0ab940f768a5", size = 172076 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ae/8f/dc5531155e7070361eb1b7e4c1a9d896d0cb21c49f807a6c03fd63fc877e/cffi-2.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:66f011380d0e49ed280c789fbd08ff0d40968ee7b665575489afa95c98196ab5", size = 182820 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/95/5c/1b493356429f9aecfd56bc171285a4c4ac8697f76e9bbbbb105e537853a1/cffi-2.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:c6638687455baf640e37344fe26d37c404db8b80d037c3d29f58fe8d1c3b194d", size = 177635 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ea/47/4f61023ea636104d4f16ab488e268b93008c3d0bb76893b1b31db1f96802/cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d", size = 185271 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/df/a2/781b623f57358e360d62cdd7a8c681f074a71d445418a776eef0aadb4ab4/cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c", size = 181048 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ff/df/a4f0fbd47331ceeba3d37c2e51e9dfc9722498becbeec2bd8bc856c9538a/cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe", size = 212529 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d5/72/12b5f8d3865bf0f87cf1404d8c374e7487dcf097a1c91c436e72e6badd83/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062", size = 220097 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c2/95/7a135d52a50dfa7c882ab0ac17e8dc11cec9d55d2c18dda414c051c5e69e/cffi-2.0.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e", size = 207983 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3a/c8/15cb9ada8895957ea171c62dc78ff3e99159ee7adb13c0123c001a2546c1/cffi-2.0.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037", size = 206519 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/78/2d/7fa73dfa841b5ac06c7b8855cfc18622132e365f5b81d02230333ff26e9e/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba", size = 219572 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/07/e0/267e57e387b4ca276b90f0434ff88b2c2241ad72b16d31836adddfd6031b/cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94", size = 222963 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b6/75/1f2747525e06f53efbd878f4d03bac5b859cbc11c633d0fb81432d98a795/cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187", size = 221361 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7b/2b/2b6435f76bfeb6bbf055596976da087377ede68df465419d192acf00c437/cffi-2.0.0-cp312-cp312-win32.whl", hash = "sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18", size = 172932 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f8/ed/13bd4418627013bec4ed6e54283b1959cf6db888048c7cf4b4c3b5b36002/cffi-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5", size = 183557 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/95/31/9f7f93ad2f8eff1dbc1c3656d7ca5bfd8fb52c9d786b4dcf19b2d02217fa/cffi-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6", size = 177762 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4b/8d/a0a47a0c9e413a658623d014e91e74a50cdd2c423f7ccfd44086ef767f90/cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb", size = 185230 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4a/d2/a6c0296814556c68ee32009d9c2ad4f85f2707cdecfd7727951ec228005d/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca", size = 181043 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/eb/6d/bf9bda840d5f1dfdbf0feca87fbdb64a918a69bca42cfa0ba7b137c48cb8/cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27", size = 172909 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/37/18/6519e1ee6f5a1e579e04b9ddb6f1676c17368a7aba48299c3759bbc3c8b3/cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75", size = 183402 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cb/0e/02ceeec9a7d6ee63bb596121c2c8e9b3a9e150936f4fbef6ca1943e6137c/cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91", size = 177780 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/92/c4/3ce07396253a83250ee98564f8d7e9789fab8e58858f35d07a9a2c78de9f/cffi-2.0.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5", size = 185320 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/59/dd/27e9fa567a23931c838c6b02d0764611c62290062a6d4e8ff7863daf9730/cffi-2.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13", size = 181487 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d6/43/0e822876f87ea8a4ef95442c3d766a06a51fc5298823f884ef87aaad168c/cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", size = 220049 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b4/89/76799151d9c2d2d1ead63c2429da9ea9d7aac304603de0c6e8764e6e8e70/cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c", size = 207793 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bb/dd/3465b14bb9e24ee24cb88c9e3730f6de63111fffe513492bf8c808a3547e/cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef", size = 206300 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/47/d9/d83e293854571c877a92da46fdec39158f8d7e68da75bf73581225d28e90/cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", size = 219244 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2b/0f/1f177e3683aead2bb00f7679a16451d302c436b5cbf2505f0ea8146ef59e/cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", size = 222828 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c6/0f/cafacebd4b040e3119dcb32fed8bdef8dfe94da653155f9d0b9dc660166e/cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", size = 220926 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3e/aa/df335faa45b395396fcbc03de2dfcab242cd61a9900e914fe682a59170b1/cffi-2.0.0-cp314-cp314-win32.whl", hash = "sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f", size = 175328 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bb/92/882c2d30831744296ce713f0feb4c1cd30f346ef747b530b5318715cc367/cffi-2.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25", size = 185650 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9f/2c/98ece204b9d35a7366b5b2c6539c350313ca13932143e79dc133ba757104/cffi-2.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad", size = 180687 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3e/61/c768e4d548bfa607abcda77423448df8c471f25dbe64fb2ef6d555eae006/cffi-2.0.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9", size = 188773 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2c/ea/5f76bce7cf6fcd0ab1a1058b5af899bfbef198bea4d5686da88471ea0336/cffi-2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d", size = 185013 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/be/b4/c56878d0d1755cf9caa54ba71e5d049479c52f9e4afc230f06822162ab2f/cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", size = 221593 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e0/0d/eb704606dfe8033e7128df5e90fee946bbcb64a04fcdaa97321309004000/cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8", size = 209354 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d8/19/3c435d727b368ca475fb8742ab97c9cb13a0de600ce86f62eab7fa3eea60/cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc", size = 208480 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d0/44/681604464ed9541673e486521497406fadcc15b5217c3e326b061696899a/cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", size = 221584 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/25/8e/342a504ff018a2825d395d44d63a767dd8ebc927ebda557fecdaca3ac33a/cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", size = 224443 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a0/1d/ec1a60bd1a10daa292d3cd6bb0b359a81607154fb8165f3ec95fe003b85c/cffi-2.0.0-cp314-cp314t-win32.whl", hash = "sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e", size = 180487 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bf/41/4c1168c74fac325c0c8156f04b6749c8b6a8f405bbf91413ba088359f60d/cffi-2.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6", size = 191726 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ae/3a/dbeec9d1ee0844c679f6bb5d6ad4e9f198b1224f4e7a32825f47f6192b0c/cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9", size = 184195 }, -] - -[[package]] -name = "charset-normalizer" -version = "3.4.7" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e7/a1/67fe25fac3c7642725500a3f6cfe5821ad557c3abb11c9d20d12c7008d3e/charset_normalizer-3.4.7.tar.gz", hash = "sha256:ae89db9e5f98a11a4bf50407d4363e7b09b31e55bc117b4f7d80aab97ba009e5", size = 144271 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c2/d7/b5b7020a0565c2e9fa8c09f4b5fa6232feb326b8c20081ccded47ea368fd/charset_normalizer-3.4.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:7641bb8895e77f921102f72833904dcd9901df5d6d72a2ab8f31d04b7e51e4e7", size = 309705 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5a/53/58c29116c340e5456724ecd2fff4196d236b98f3da97b404bc5e51ac3493/charset_normalizer-3.4.7-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:202389074300232baeb53ae2569a60901f7efadd4245cf3a3bf0617d60b439d7", size = 206419 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b2/02/e8146dc6591a37a00e5144c63f29fb7c97a734ea8a111190783c0e60ab63/charset_normalizer-3.4.7-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:30b8d1d8c52a48c2c5690e152c169b673487a2a58de1ec7393196753063fcd5e", size = 227901 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fb/73/77486c4cd58f1267bf17db420e930c9afa1b3be3fe8c8b8ebbebc9624359/charset_normalizer-3.4.7-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:532bc9bf33a68613fd7d65e4b1c71a6a38d7d42604ecf239c77392e9b4e8998c", size = 222742 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a1/fa/f74eb381a7d94ded44739e9d94de18dc5edc9c17fb8c11f0a6890696c0a9/charset_normalizer-3.4.7-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2fe249cb4651fd12605b7288b24751d8bfd46d35f12a20b1ba33dea122e690df", size = 214061 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/dc/92/42bd3cefcf7687253fb86694b45f37b733c97f59af3724f356fa92b8c344/charset_normalizer-3.4.7-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:65bcd23054beab4d166035cabbc868a09c1a49d1efe458fe8e4361215df40265", size = 199239 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4c/3d/069e7184e2aa3b3cddc700e3dd267413dc259854adc3380421c805c6a17d/charset_normalizer-3.4.7-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:08e721811161356f97b4059a9ba7bafb23ea5ee2255402c42881c214e173c6b4", size = 210173 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/62/51/9d56feb5f2e7074c46f93e0ebdbe61f0848ee246e2f0d89f8e20b89ebb8f/charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e060d01aec0a910bdccb8be71faf34e7799ce36950f8294c8bf612cba65a2c9e", size = 209841 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d2/59/893d8f99cc4c837dda1fe2f1139079703deb9f321aabcb032355de13b6c7/charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:38c0109396c4cfc574d502df99742a45c72c08eff0a36158b6f04000043dbf38", size = 200304 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7d/1d/ee6f3be3464247578d1ed5c46de545ccc3d3ff933695395c402c21fa6b77/charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:1c2a768fdd44ee4a9339a9b0b130049139b8ce3c01d2ce09f67f5a68048d477c", size = 229455 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/54/bb/8fb0a946296ea96a488928bdce8ef99023998c48e4713af533e9bb98ef07/charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:1a87ca9d5df6fe460483d9a5bbf2b18f620cbed41b432e2bddb686228282d10b", size = 210036 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9a/bc/015b2387f913749f82afd4fcba07846d05b6d784dd16123cb66860e0237d/charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:d635aab80466bc95771bb78d5370e74d36d1fe31467b6b29b8b57b2a3cd7d22c", size = 224739 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/17/ab/63133691f56baae417493cba6b7c641571a2130eb7bceba6773367ab9ec5/charset_normalizer-3.4.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ae196f021b5e7c78e918242d217db021ed2a6ace2bc6ae94c0fc596221c7f58d", size = 216277 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/06/6d/3be70e827977f20db77c12a97e6a9f973631a45b8d186c084527e53e77a4/charset_normalizer-3.4.7-cp311-cp311-win32.whl", hash = "sha256:adb2597b428735679446b46c8badf467b4ca5f5056aae4d51a19f9570301b1ad", size = 147819 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/20/d9/5f67790f06b735d7c7637171bbfd89882ad67201891b7275e51116ed8207/charset_normalizer-3.4.7-cp311-cp311-win_amd64.whl", hash = "sha256:8e385e4267ab76874ae30db04c627faaaf0b509e1ccc11a95b3fc3e83f855c00", size = 159281 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ca/83/6413f36c5a34afead88ce6f66684d943d91f233d76dd083798f9602b75ae/charset_normalizer-3.4.7-cp311-cp311-win_arm64.whl", hash = "sha256:d4a48e5b3c2a489fae013b7589308a40146ee081f6f509e047e0e096084ceca1", size = 147843 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0c/eb/4fc8d0a7110eb5fc9cc161723a34a8a6c200ce3b4fbf681bc86feee22308/charset_normalizer-3.4.7-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:eca9705049ad3c7345d574e3510665cb2cf844c2f2dcfe675332677f081cbd46", size = 311328 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f8/e3/0fadc706008ac9d7b9b5be6dc767c05f9d3e5df51744ce4cc9605de7b9f4/charset_normalizer-3.4.7-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6178f72c5508bfc5fd446a5905e698c6212932f25bcdd4b47a757a50605a90e2", size = 208061 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/42/f0/3dd1045c47f4a4604df85ec18ad093912ae1344ac706993aff91d38773a2/charset_normalizer-3.4.7-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e1421b502d83040e6d7fb2fb18dff63957f720da3d77b2fbd3187ceb63755d7b", size = 229031 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/dc/67/675a46eb016118a2fbde5a277a5d15f4f69d5f3f5f338e5ee2f8948fcf43/charset_normalizer-3.4.7-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:edac0f1ab77644605be2cbba52e6b7f630731fc42b34cb0f634be1a6eface56a", size = 225239 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4b/f8/d0118a2f5f23b02cd166fa385c60f9b0d4f9194f574e2b31cef350ad7223/charset_normalizer-3.4.7-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5649fd1c7bade02f320a462fdefd0b4bd3ce036065836d4f42e0de958038e116", size = 216589 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b1/f1/6d2b0b261b6c4ceef0fcb0d17a01cc5bc53586c2d4796fa04b5c540bc13d/charset_normalizer-3.4.7-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:203104ed3e428044fd943bc4bf45fa73c0730391f9621e37fe39ecf477b128cb", size = 202733 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6f/c0/7b1f943f7e87cc3db9626ba17807d042c38645f0a1d4415c7a14afb5591f/charset_normalizer-3.4.7-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:298930cec56029e05497a76988377cbd7457ba864beeea92ad7e844fe74cd1f1", size = 212652 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/38/dd/5a9ab159fe45c6e72079398f277b7d2b523e7f716acc489726115a910097/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:708838739abf24b2ceb208d0e22403dd018faeef86ddac04319a62ae884c4f15", size = 211229 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d5/ff/531a1cad5ca855d1c1a8b69cb71abfd6d85c0291580146fda7c82857caa1/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:0f7eb884681e3938906ed0434f20c63046eacd0111c4ba96f27b76084cd679f5", size = 203552 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c1/4c/a5fb52d528a8ca41f7598cb619409ece30a169fbdf9cdce592e53b46c3a6/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4dc1e73c36828f982bfe79fadf5919923f8a6f4df2860804db9a98c48824ce8d", size = 230806 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/59/7a/071feed8124111a32b316b33ae4de83d36923039ef8cf48120266844285b/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:aed52fea0513bac0ccde438c188c8a471c4e0f457c2dd20cdbf6ea7a450046c7", size = 212316 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/35/f7dba3994312d7ba508e041eaac39a36b120f32d4c8662b8814dab876431/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:fea24543955a6a729c45a73fe90e08c743f0b3334bbf3201e6c4bc1b0c7fa464", size = 227274 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8a/2d/a572df5c9204ab7688ec1edc895a73ebded3b023bb07364710b05dd1c9be/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bb6d88045545b26da47aa879dd4a89a71d1dce0f0e549b1abcb31dfe4a8eac49", size = 218468 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/86/eb/890922a8b03a568ca2f336c36585a4713c55d4d67bf0f0c78924be6315ca/charset_normalizer-3.4.7-cp312-cp312-win32.whl", hash = "sha256:2257141f39fe65a3fdf38aeccae4b953e5f3b3324f4ff0daf9f15b8518666a2c", size = 148460 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/35/d9/0e7dffa06c5ab081f75b1b786f0aefc88365825dfcd0ac544bdb7b2b6853/charset_normalizer-3.4.7-cp312-cp312-win_amd64.whl", hash = "sha256:5ed6ab538499c8644b8a3e18debabcd7ce684f3fa91cf867521a7a0279cab2d6", size = 159330 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9e/5d/481bcc2a7c88ea6b0878c299547843b2521ccbc40980cb406267088bc701/charset_normalizer-3.4.7-cp312-cp312-win_arm64.whl", hash = "sha256:56be790f86bfb2c98fb742ce566dfb4816e5a83384616ab59c49e0604d49c51d", size = 147828 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c1/3b/66777e39d3ae1ddc77ee606be4ec6d8cbd4c801f65e5a1b6f2b11b8346dd/charset_normalizer-3.4.7-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:f496c9c3cc02230093d8330875c4c3cdfc3b73612a5fd921c65d39cbcef08063", size = 309627 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2e/4e/b7f84e617b4854ade48a1b7915c8ccfadeba444d2a18c291f696e37f0d3b/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ea948db76d31190bf08bd371623927ee1339d5f2a0b4b1b4a4439a65298703c", size = 207008 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c4/bb/ec73c0257c9e11b268f018f068f5d00aa0ef8c8b09f7753ebd5f2880e248/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a277ab8928b9f299723bc1a2dabb1265911b1a76341f90a510368ca44ad9ab66", size = 228303 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/85/fb/32d1f5033484494619f701e719429c69b766bfc4dbc61aa9e9c8c166528b/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3bec022aec2c514d9cf199522a802bd007cd588ab17ab2525f20f9c34d067c18", size = 224282 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fa/07/330e3a0dda4c404d6da83b327270906e9654a24f6c546dc886a0eb0ffb23/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e044c39e41b92c845bc815e5ae4230804e8e7bc29e399b0437d64222d92809dd", size = 215595 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e3/7c/fc890655786e423f02556e0216d4b8c6bcb6bdfa890160dc66bf52dee468/charset_normalizer-3.4.7-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:f495a1652cf3fbab2eb0639776dad966c2fb874d79d87ca07f9d5f059b8bd215", size = 201986 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d8/97/bfb18b3db2aed3b90cf54dc292ad79fdd5ad65c4eae454099475cbeadd0d/charset_normalizer-3.4.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e712b419df8ba5e42b226c510472b37bd57b38e897d3eca5e8cfd410a29fa859", size = 211711 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6f/a5/a581c13798546a7fd557c82614a5c65a13df2157e9ad6373166d2a3e645d/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7804338df6fcc08105c7745f1502ba68d900f45fd770d5bdd5288ddccb8a42d8", size = 210036 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8c/bf/b3ab5bcb478e4193d517644b0fb2bf5497fbceeaa7a1bc0f4d5b50953861/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:481551899c856c704d58119b5025793fa6730adda3571971af568f66d2424bb5", size = 202998 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e7/4e/23efd79b65d314fa320ec6017b4b5834d5c12a58ba4610aa353af2e2f577/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f59099f9b66f0d7145115e6f80dd8b1d847176df89b234a5a6b3f00437aa0832", size = 230056 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b9/9f/1e1941bc3f0e01df116e68dc37a55c4d249df5e6fa77f008841aef68264f/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:f59ad4c0e8f6bba240a9bb85504faa1ab438237199d4cce5f622761507b8f6a6", size = 211537 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/80/0f/088cbb3020d44428964a6c97fe1edfb1b9550396bf6d278330281e8b709c/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:3dedcc22d73ec993f42055eff4fcfed9318d1eeb9a6606c55892a26964964e48", size = 226176 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6a/9f/130394f9bbe06f4f63e22641d32fc9b202b7e251c9aef4db044324dac493/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:64f02c6841d7d83f832cd97ccf8eb8a906d06eb95d5276069175c696b024b60a", size = 217723 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/73/55/c469897448a06e49f8fa03f6caae97074fde823f432a98f979cc42b90e69/charset_normalizer-3.4.7-cp313-cp313-win32.whl", hash = "sha256:4042d5c8f957e15221d423ba781e85d553722fc4113f523f2feb7b188cc34c5e", size = 148085 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5d/78/1b74c5bbb3f99b77a1715c91b3e0b5bdb6fe302d95ace4f5b1bec37b0167/charset_normalizer-3.4.7-cp313-cp313-win_amd64.whl", hash = "sha256:3946fa46a0cf3e4c8cb1cc52f56bb536310d34f25f01ca9b6c16afa767dab110", size = 158819 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/68/86/46bd42279d323deb8687c4a5a811fd548cb7d1de10cf6535d099877a9a9f/charset_normalizer-3.4.7-cp313-cp313-win_arm64.whl", hash = "sha256:80d04837f55fc81da168b98de4f4b797ef007fc8a79ab71c6ec9bc4dd662b15b", size = 147915 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/97/c8/c67cb8c70e19ef1960b97b22ed2a1567711de46c4ddf19799923adc836c2/charset_normalizer-3.4.7-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:c36c333c39be2dbca264d7803333c896ab8fa7d4d6f0ab7edb7dfd7aea6e98c0", size = 309234 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/99/85/c091fdee33f20de70d6c8b522743b6f831a2f1cd3ff86de4c6a827c48a76/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1c2aed2e5e41f24ea8ef1590b8e848a79b56f3a5564a65ceec43c9d692dc7d8a", size = 208042 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/87/1c/ab2ce611b984d2fd5d86a5a8a19c1ae26acac6bad967da4967562c75114d/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:54523e136b8948060c0fa0bc7b1b50c32c186f2fceee897a495406bb6e311d2b", size = 228706 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a8/29/2b1d2cb00bf085f59d29eb773ce58ec2d325430f8c216804a0a5cd83cbca/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:715479b9a2802ecac752a3b0efa2b0b60285cf962ee38414211abdfccc233b41", size = 224727 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/47/5c/032c2d5a07fe4d4855fea851209cca2b6f03ebeb6d4e3afdb3358386a684/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bd6c2a1c7573c64738d716488d2cdd3c00e340e4835707d8fdb8dc1a66ef164e", size = 215882 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2c/c2/356065d5a8b78ed04499cae5f339f091946a6a74f91e03476c33f0ab7100/charset_normalizer-3.4.7-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:c45e9440fb78f8ddabcf714b68f936737a121355bf59f3907f4e17721b9d1aae", size = 200860 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0c/cd/a32a84217ced5039f53b29f460962abb2d4420def55afabe45b1c3c7483d/charset_normalizer-3.4.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3534e7dcbdcf757da6b85a0bbf5b6868786d5982dd959b065e65481644817a18", size = 211564 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/44/86/58e6f13ce26cc3b8f4a36b94a0f22ae2f00a72534520f4ae6857c4b81f89/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e8ac484bf18ce6975760921bb6148041faa8fef0547200386ea0b52b5d27bf7b", size = 211276 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8f/fe/d17c32dc72e17e155e06883efa84514ca375f8a528ba2546bee73fc4df81/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:a5fe03b42827c13cdccd08e6c0247b6a6d4b5e3cdc53fd1749f5896adcdc2356", size = 201238 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6a/29/f33daa50b06525a237451cdb6c69da366c381a3dadcd833fa5676bc468b3/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:2d6eb928e13016cea4f1f21d1e10c1cebd5a421bc57ddf5b1142ae3f86824fab", size = 230189 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b6/6e/52c84015394a6a0bdcd435210a7e944c5f94ea1055f5cc5d56c5fe368e7b/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:e74327fb75de8986940def6e8dee4f127cc9752bee7355bb323cc5b2659b6d46", size = 211352 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8c/d7/4353be581b373033fb9198bf1da3cf8f09c1082561e8e922aa7b39bf9fe8/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:d6038d37043bced98a66e68d3aa2b6a35505dc01328cd65217cefe82f25def44", size = 227024 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/30/45/99d18aa925bd1740098ccd3060e238e21115fffbfdcb8f3ece837d0ace6c/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7579e913a5339fb8fa133f6bbcfd8e6749696206cf05acdbdca71a1b436d8e72", size = 217869 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5c/05/5ee478aa53f4bb7996482153d4bfe1b89e0f087f0ab6b294fcf92d595873/charset_normalizer-3.4.7-cp314-cp314-win32.whl", hash = "sha256:5b77459df20e08151cd6f8b9ef8ef1f961ef73d85c21a555c7eed5b79410ec10", size = 148541 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/48/77/72dcb0921b2ce86420b2d79d454c7022bf5be40202a2a07906b9f2a35c97/charset_normalizer-3.4.7-cp314-cp314-win_amd64.whl", hash = "sha256:92a0a01ead5e668468e952e4238cccd7c537364eb7d851ab144ab6627dbbe12f", size = 159634 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c6/a3/c2369911cd72f02386e4e340770f6e158c7980267da16af8f668217abaa0/charset_normalizer-3.4.7-cp314-cp314-win_arm64.whl", hash = "sha256:67f6279d125ca0046a7fd386d01b311c6363844deac3e5b069b514ba3e63c246", size = 148384 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/94/09/7e8a7f73d24dba1f0035fbbf014d2c36828fc1bf9c88f84093e57d315935/charset_normalizer-3.4.7-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:effc3f449787117233702311a1b7d8f59cba9ced946ba727bdc329ec69028e24", size = 330133 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8d/da/96975ddb11f8e977f706f45cddd8540fd8242f71ecdb5d18a80723dcf62c/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fbccdc05410c9ee21bbf16a35f4c1d16123dcdeb8a1d38f33654fa21d0234f79", size = 216257 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e5/e8/1d63bf8ef2d388e95c64b2098f45f84758f6d102a087552da1485912637b/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:733784b6d6def852c814bce5f318d25da2ee65dd4839a0718641c696e09a2960", size = 234851 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9b/40/e5ff04233e70da2681fa43969ad6f66ca5611d7e669be0246c4c7aaf6dc8/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a89c23ef8d2c6b27fd200a42aa4ac72786e7c60d40efdc76e6011260b6e949c4", size = 233393 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/be/c1/06c6c49d5a5450f76899992f1ee40b41d076aee9279b49cf9974d2f313d5/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6c114670c45346afedc0d947faf3c7f701051d2518b943679c8ff88befe14f8e", size = 223251 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2b/9f/f2ff16fb050946169e3e1f82134d107e5d4ae72647ec8a1b1446c148480f/charset_normalizer-3.4.7-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:a180c5e59792af262bf263b21a3c49353f25945d8d9f70628e73de370d55e1e1", size = 206609 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/69/d5/a527c0cd8d64d2eab7459784fb4169a0ac76e5a6fc5237337982fd61347e/charset_normalizer-3.4.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3c9a494bc5ec77d43cea229c4f6db1e4d8fe7e1bbffa8b6f0f0032430ff8ab44", size = 220014 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7e/80/8a7b8104a3e203074dc9aa2c613d4b726c0e136bad1cc734594b02867972/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8d828b6667a32a728a1ad1d93957cdf37489c57b97ae6c4de2860fa749b8fc1e", size = 218979 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/02/9a/b759b503d507f375b2b5c153e4d2ee0a75aa215b7f2489cf314f4541f2c0/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:cf1493cd8607bec4d8a7b9b004e699fcf8f9103a9284cc94962cb73d20f9d4a3", size = 209238 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c2/4e/0f3f5d47b86bdb79256e7290b26ac847a2832d9a4033f7eb2cd4bcf4bb5b/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:0c96c3b819b5c3e9e165495db84d41914d6894d55181d2d108cc1a69bfc9cce0", size = 236110 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/96/23/bce28734eb3ed2c91dcf93abeb8a5cf393a7b2749725030bb630e554fdd8/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:752a45dc4a6934060b3b0dab47e04edc3326575f82be64bc4fc293914566503e", size = 219824 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2c/6f/6e897c6984cc4d41af319b077f2f600fc8214eb2fe2d6bcb79141b882400/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:8778f0c7a52e56f75d12dae53ae320fae900a8b9b4164b981b9c5ce059cd1fcb", size = 233103 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/76/22/ef7bd0fe480a0ae9b656189ec00744b60933f68b4f42a7bb06589f6f576a/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ce3412fbe1e31eb81ea42f4169ed94861c56e643189e1e75f0041f3fe7020abe", size = 225194 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c5/a7/0e0ab3e0b5bc1219bd80a6a0d4d72ca74d9250cb2382b7c699c147e06017/charset_normalizer-3.4.7-cp314-cp314t-win32.whl", hash = "sha256:c03a41a8784091e67a39648f70c5f97b5b6a37f216896d44d2cdcb82615339a0", size = 159827 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7a/1d/29d32e0fb40864b1f878c7f5a0b343ae676c6e2b271a2d55cc3a152391da/charset_normalizer-3.4.7-cp314-cp314t-win_amd64.whl", hash = "sha256:03853ed82eeebbce3c2abfdbc98c96dc205f32a79627688ac9a27370ea61a49c", size = 174168 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/de/32/d92444ad05c7a6e41fb2036749777c163baf7a0301a040cb672d6b2b1ae9/charset_normalizer-3.4.7-cp314-cp314t-win_arm64.whl", hash = "sha256:c35abb8bfff0185efac5878da64c45dafd2b37fb0383add1be155a763c1f083d", size = 153018 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/db/8f/61959034484a4a7c527811f4721e75d02d653a35afb0b6054474d8185d4c/charset_normalizer-3.4.7-py3-none-any.whl", hash = "sha256:3dce51d0f5e7951f8bb4900c257dad282f49190fdbebecd4ba99bcc41fef404d", size = 61958 }, -] - -[[package]] -name = "click" -version = "8.4.2" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/76/d4/81420972a676e8ffea40450d8c8c92943e7218a78fe9b64359836cc9876b/click-8.4.2.tar.gz", hash = "sha256:9a6cea6e60b17ebe0a44c5cc636d94f09bd66142c1cd7d8b4cd731c4917a15f6", size = 338000 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fb/e2/79c688af8b210d232694e31e59da9f6ec747bae31c3f5946e4e9b98860d5/click-8.4.2-py3-none-any.whl", hash = "sha256:e6f9f66136c816745b9d65817da91d61d957fb16e02e4dcd0552553c5a197b76", size = 119243 }, -] - -[[package]] -name = "colorama" -version = "0.4.6" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, -] - -[[package]] -name = "cryptography" -version = "49.0.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1f/99/d1c90d6041656cc6ee229dc99cd67fd0cd5aec3c5f7d72fffc27cc750054/cryptography-49.0.0.tar.gz", hash = "sha256:f89660a348f4f78a92366240a61404e337586ef7f5909a2fef59ca88ef505493", size = 854345 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9b/22/adf66990e63584a68dfb50c24f48a125c07b1699899381c8151e63ed458c/cryptography-49.0.0-cp311-abi3-macosx_11_0_arm64.whl", hash = "sha256:966fe0e9c67490071f14c0d2b1cb2dfb3023c5ce39457343931415f08382f2db", size = 4032100 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/09/41/3797cfaf69cae04a13ee78ebd83f0678d9c02b4779d21ce24445326f1a69/cryptography-49.0.0-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:36d1709f992593689b45bda411498d62c6e365f2ca00b84657d4dadd24de16db", size = 4692978 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e6/8b/43011f7ebe515a8aa20d61f290a326cd890c2e738e16e59eaff8d9c3a412/cryptography-49.0.0-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0e959b578856a3924bc0cbb710fc12c387b9412a951389f3ca61704a9e25f325", size = 4716422 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4a/91/01ce7303a4579e6d3a6abef01bd322848e9ea7a219adcabc5048b9033571/cryptography-49.0.0-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:53ecee2e23f7169b6117e99fc8a944e5e50f79e69758a83b52a00cb98ab2b2d2", size = 4700503 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/62/99/a2c95cf8293f07491e9e27c20cc4dcd18176d944e674679adeb1d0173fd6/cryptography-49.0.0-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:2eda353d8a27bcbcaa4cbed18994a74ab4d19a2ca897db188ea269ab9b71419b", size = 5309779 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/20/2c/0622f20ff02b2ef32558733443805dc82fd4c275be01b2d19d14676f3a1b/cryptography-49.0.0-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:2afe9051da7ae7bd5905da5a949280c7d2bb75682e188f650a9d0f2756b834c6", size = 4749683 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a3/5b/c5246635d5fd3b64e0d45ae10e99fd32fe9676a79915ccfe5a61ba9af1a5/cryptography-49.0.0-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:0b82e28ee398a386f0807bba7884d30f25218855690f45115831bcce5d90822c", size = 4337874 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6d/88/05563c7fe2e914e87d1a536d06fe83e66b4e1d95cb593e05aea375531da8/cryptography-49.0.0-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:ccac2bfebc306b862133e3bb71f3f6ee8bb525240089b2d952e4144b3a6d5da7", size = 4700283 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c4/b6/d7696e4e890d6ae1469935164c9e5215c557671cb78d6e3f458ccceaa632/cryptography-49.0.0-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:d0527ce944105f257f605a827d6ebead966c752038b6e8656abb9c5edee6fc68", size = 5265844 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a9/3c/f3ad17eecc1a57b0ba236dc01f90e783c51f4a2f35f64777cc4f47a184b2/cryptography-49.0.0-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:cbc77da8c523d5abd028635ba850a6966fcee2c82e2bf65a41d1d8afe0f98be9", size = 4749290 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4f/01/339573cf1023163a400b0b5d16f6d507de413b9f60be6fd1b77feeaf6737/cryptography-49.0.0-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:b87e65d263b3e5d3bb92a57e2a6638e2f31110fa7aa890c7b2dbba42248d0a3f", size = 4834612 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/71/fd/577302e213a1be9468f92d1afef66fcf1ef83d516819d9992ca547f592bd/cryptography-49.0.0-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:66ec79c3904820572d7e987abdf304281f141d37ad9a489b8e97066e7b9b6459", size = 4980804 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1f/09/f42b1d190c5ba75f72062a387f8030d1d75f6ab035788f1d9c4b01de6525/cryptography-49.0.0-cp311-abi3-win_amd64.whl", hash = "sha256:e5dfc1e64de5677cec922ffa8da89c546d0415bf6efdf081842e5d44c84e1f0e", size = 3810026 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ec/9e/db72b3ae7fc9cfad53e630e56c6ae83b9b6ff0bf3718ffb8012d20b3aabf/cryptography-49.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:73a205dce83953d131a4aa1e0fd917a2fd1c5b1eef251e9d7152efefcbf5caf7", size = 4013892 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/86/12/c48a424f38db03027be9f7ed5c7dc5de9933dbee992865f98b13727a009d/cryptography-49.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:196ecd6a36e4e9aa10270393bb98d8df88fccee0bf1e5128b91ae4eb4375896d", size = 4678835 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/68/28/8a3ad4653662c93fc44dc4e5d8fd374c25c42e07b34bbfbadf49cf57a5a8/cryptography-49.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7abcee80084cda3f7691f3eb1ce480d8df49cec637b429aa35986c1de71738aa", size = 4697239 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a8/b2/2193fc74f81aee4f9b62733133b73b5176718932ed8f2e4b03fa040480a6/cryptography-49.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:4ae387c9cb68ea569ca17e490d66d8142b81c3cc814bf179974b7d146e490bbb", size = 4685593 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/47/f1/1d3eaa243bfc5de4a187b22aa8c048b3e4980bfbe830ac46e6bac2e66947/cryptography-49.0.0-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:f37d847238971164fdbc68ade6f6574aecc9c0af714190e2083429ff68f4ce9d", size = 5289961 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/58/39/2d51306721330c486495853eda1c567880ff036de15a14c4b74f399934af/cryptography-49.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:c2bc30226390d60ea19d9f82b19db005fe0452154a23c1c410c12ea801e43561", size = 4731145 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/17/50/983e838c7fd0d87fd8c969bcdd328edaf5f756e38df5281637424c155873/cryptography-49.0.0-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:07cab27cc7b7e0fd28e5e26bb9eeedde5c135c868b46de4a27845abe94af6122", size = 4321719 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a7/f5/8f571d7e27c55bce9f76f026143bcb1e040a4233149ecca0bea5fa5dd5f7/cryptography-49.0.0-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:b20133d204d2bb56ba047642199603876c872026ca53e79c35b83772ab2cc505", size = 4685209 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e7/84/0e27016a6fc5a0886f797018b26aa42f40c09a82332bff77822a451deaaa/cryptography-49.0.0-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:b970c6da94d5bb18629db453d14f2a1300f6bf59b61e9b82377931ef95504866", size = 5246285 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/11/2d/5e1fb307cb5931881516b464c98774b3f2c36b5d4bb9a2830253cf553cad/cryptography-49.0.0-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:d8ecde755e2e91bf773fc94e8c9d730cd7f2007004cb492263a794ec3899a1c8", size = 4730441 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e4/c0/bff5a02ee731d207d6a1ed51732549d8c53d2bc8da1d10ec6f2844201d68/cryptography-49.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e3fb64c420688e5319ae25113a354015abbd8dffbfbc41781a1ea66fc7622ac3", size = 4815869 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b9/26/814681d14248d95d73d5c3eea0c39a94eb8302df966f670a2c60de90974b/cryptography-49.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:32703d93296f5c1f4b53349ad3a250c2cae0fdecd3a3dd5d47e616d8d616af27", size = 4960948 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4c/fe/93ecac273d3738939d023612ad12cca9a3740a5345d69fda04134c43fd96/cryptography-49.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:33cd0565932807baddb67b96dbee92f2c374b5c89dee09fd74079aeb8c8dba61", size = 3799153 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/19/2a/5bb823f5bedcf80718cea7fbc95ec5515cca3769633c4b01a32be7f30e7c/cryptography-49.0.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:ec5e529fb80935c94fe7b729f9972b50e351a0e6b50aa294fd5cabb109fcc29a", size = 4025947 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3d/df/40577043ca124e17012f408ddddaeb213b856336ac82ddb3bc915f39e29f/cryptography-49.0.0-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f78ff2c9ed8dc2d036b0f4d640e22522213d047c1b14e61205a7e55c80a494d4", size = 4692429 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2c/99/2d13299eb3dd27b02dcfaafcc91d6b5cb3329f7cbd6d8f51921acd566c1a/cryptography-49.0.0-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:35b151772baff2c74cba7fa290ceaff4c3b11c0c881eb93eb5dbc05a7cfbba18", size = 4700968 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a5/4d/9c0cd02f95e2602dd5e563da149ee0830abef3537be8b34dc56281ebe27a/cryptography-49.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:0f21641cf4b30fca7aee061ced0ec7ad7b073518088b7c9969a297c0ae796c69", size = 4697758 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/24/01/186c825898477d77e2324d5360fefe622ff1d8d1963ec0554e2cada8ec77/cryptography-49.0.0-cp39-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:9e82dcc8e56052715fb18b2429e3bca4823b1629136a2084fc45a9a5cecb9b64", size = 5298863 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b8/7b/62cbbab75d0659865bf0273790031544a0b16c8072d258f9428dcd8190dc/cryptography-49.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:6f2debedf9ca60cf1d5bd466475638af5130f89965605cd818484d19987d3a21", size = 4735983 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6c/72/3e798c064bc39e471008075d0f9bc9daf77a80879c092e4a8e170c585ed4/cryptography-49.0.0-cp39-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:8c25ceb16df5b9435f3f6a9829204985b0e0cbee3b48aacd432c7d2c850b44d9", size = 4334173 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f0/ee/6fca21d1ac73e06f8bef71940abfd4d2f6472b4bca284d770f32bd4086f6/cryptography-49.0.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:28d8b15e6275f12c8a207dc309dfa957903c927d08d0cc937ee3f63f200693cc", size = 4697298 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/67/d0/a5fcd3515f0bae49a7b6d0413cc1bdccdcc1fc0047037a0d480642cdc5d6/cryptography-49.0.0-cp39-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:6fc361c34fb6aac015ce19435876635e5c6d21db31998b0920f675f131e043b8", size = 5254338 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a0/84/84fe36f19caf857d61cb7fc9c63035a47ffabd84ea12d1d393148efa3615/cryptography-49.0.0-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:2400ef9c9e2299a25614eb1dea3db54a69b1349efd043bfac9c67630d136df36", size = 4735650 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6c/a0/db537264e234f7273a73ec020873d6d6b39dfd8a53db78b550ca8320440e/cryptography-49.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:67e1d20ad9ef3a563c59ef22e7a8a0b8210bd26604369ea4a30a7c66aefe504e", size = 4834820 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/93/77/8df9eb486495979bccecd1062e2eaf435250e84437040295b57d09048b0b/cryptography-49.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:42b0684e0e40cf26122427802486f6d93aea593612603a94fbf260c7eb1e9c1b", size = 4967968 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c2/e6/f60198ea8d9dfa15fff9ed4ca02ce362f6eadd9ba757dcc50634c4257b63/cryptography-49.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:026ac7423e6fa66872d3bf889be5974507da3944f866f704fa200eadacd00001", size = 3785547 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/63/d3/4a83af35d65e3fad632c926fad684c193ea4398569ccb0bbbc7fe8f5dc9a/cryptography-49.0.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:fc1e275c2f1d97b1a6450b8b0ea3ebfa6e087a611c2b26cb2404d48588abab7b", size = 3993685 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d6/a7/f9dac0ab7f80368c56993a7bf638ef9935f825c91902798481fac0898138/cryptography-49.0.0-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c83782480a4a9da4d0feb51950131ba32e12e70813848b3343f6e18c28a66838", size = 4676239 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d7/70/2ba3769dd0ae167e2f33dfa9592d45db6ff9a61d62ca1a5b3d1bdd09068f/cryptography-49.0.0-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:b39efa323140595abd3ecca8529d321ae50f55f3aa3ba9cc81ea56a6011953d5", size = 4715584 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/94/64/2923570ac1c0bd3a737aa366ac3abbbbde273042308b8cde95e2364a6e6a/cryptography-49.0.0-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:b47db11c2c3525083296069b98ac5221907455e989ae0c2e3008bde851921615", size = 4675885 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ab/f8/614dc7e051418cfe53d55173c1e24c6b0085e89996fe90508c2fdf769aef/cryptography-49.0.0-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:084ef1af862eb07ec46d25f68689f2102a9fc0e05ce7b80f14f5fe51e4eef0f6", size = 4715449 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/aa/50/a9caea39ad19c431c1a3f8a31114df65b260cdfe67786b6c7e7c040c4c44/cryptography-49.0.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:be9fcb48a55f023493482827d4f459bd263cc20efde64f204b97c123201850c6", size = 3783731 }, -] - -[[package]] -name = "dashscope" -version = "1.26.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "aiohttp" }, - { name = "certifi" }, - { name = "cryptography" }, - { name = "requests" }, - { name = "rich" }, - { name = "typer" }, - { name = "websocket-client" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2b/b6/17b6b08ceb08f9ec2102db5a9c0afe0b5d61b63a972a4615c50252669c1e/dashscope-1.26.0.tar.gz", hash = "sha256:2c806c6e735a47f1f31814e700f5cc073cd101a860302562bd8eaa210bdef991", size = 1390453 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fc/ec/6895fb7b8977014f8e0a009a5298ab4e6a31f06aaa07373890fee84348a3/dashscope-1.26.0-py3-none-any.whl", hash = "sha256:ddfbda55a0196ec4c1a3cfc6be610b53310dfdb95ed4a293f6cc699809500ba4", size = 1481554 }, -] - -[[package]] -name = "distro" -version = "1.9.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277 }, -] - -[[package]] -name = "docstring-parser" -version = "0.18.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e0/4d/f332313098c1de1b2d2ff91cf2674415cc7cddab2ca1b01ae29774bd5fdf/docstring_parser-0.18.0.tar.gz", hash = "sha256:292510982205c12b1248696f44959db3cdd1740237a968ea1e2e7a900eeb2015", size = 29341 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a7/5f/ed01f9a3cdffbd5a008556fc7b2a08ddb1cc6ace7effa7340604b1d16699/docstring_parser-0.18.0-py3-none-any.whl", hash = "sha256:b3fcbed555c47d8479be0796ef7e19c2670d428d72e96da63f3a40122860374b", size = 22484 }, -] - -[[package]] -name = "filetype" -version = "1.2.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bb/29/745f7d30d47fe0f251d3ad3dc2978a23141917661998763bebb6da007eb1/filetype-1.2.0.tar.gz", hash = "sha256:66b56cd6474bf41d8c54660347d37afcc3f7d1970648de365c102ef77548aadb", size = 998020 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/18/79/1b8fa1bb3568781e84c9200f951c735f3f157429f44be0495da55894d620/filetype-1.2.0-py2.py3-none-any.whl", hash = "sha256:7ce71b6880181241cf7ac8697a2f1eb6a8bd9b429f7ad6d27b8db9ba5f1c2d25", size = 19970 }, -] - -[[package]] -name = "frozenlist" -version = "1.8.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2d/f5/c831fac6cc817d26fd54c7eaccd04ef7e0288806943f7cc5bbf69f3ac1f0/frozenlist-1.8.0.tar.gz", hash = "sha256:3ede829ed8d842f6cd48fc7081d7a41001a56f1f38603f9d49bf3020d59a31ad", size = 45875 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bc/03/077f869d540370db12165c0aa51640a873fb661d8b315d1d4d67b284d7ac/frozenlist-1.8.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:09474e9831bc2b2199fad6da3c14c7b0fbdd377cce9d3d77131be28906cb7d84", size = 86912 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/df/b5/7610b6bd13e4ae77b96ba85abea1c8cb249683217ef09ac9e0ae93f25a91/frozenlist-1.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:17c883ab0ab67200b5f964d2b9ed6b00971917d5d8a92df149dc2c9779208ee9", size = 50046 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6e/ef/0e8f1fe32f8a53dd26bdd1f9347efe0778b0fddf62789ea683f4cc7d787d/frozenlist-1.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fa47e444b8ba08fffd1c18e8cdb9a75db1b6a27f17507522834ad13ed5922b93", size = 50119 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/11/b1/71a477adc7c36e5fb628245dfbdea2166feae310757dea848d02bd0689fd/frozenlist-1.8.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2552f44204b744fba866e573be4c1f9048d6a324dfe14475103fd51613eb1d1f", size = 231067 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/45/7e/afe40eca3a2dc19b9904c0f5d7edfe82b5304cb831391edec0ac04af94c2/frozenlist-1.8.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:957e7c38f250991e48a9a73e6423db1bb9dd14e722a10f6b8bb8e16a0f55f695", size = 233160 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a6/aa/7416eac95603ce428679d273255ffc7c998d4132cfae200103f164b108aa/frozenlist-1.8.0-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:8585e3bb2cdea02fc88ffa245069c36555557ad3609e83be0ec71f54fd4abb52", size = 228544 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8b/3d/2a2d1f683d55ac7e3875e4263d28410063e738384d3adc294f5ff3d7105e/frozenlist-1.8.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:edee74874ce20a373d62dc28b0b18b93f645633c2943fd90ee9d898550770581", size = 243797 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/78/1e/2d5565b589e580c296d3bb54da08d206e797d941a83a6fdea42af23be79c/frozenlist-1.8.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c9a63152fe95756b85f31186bddf42e4c02c6321207fd6601a1c89ebac4fe567", size = 247923 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/aa/c3/65872fcf1d326a7f101ad4d86285c403c87be7d832b7470b77f6d2ed5ddc/frozenlist-1.8.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b6db2185db9be0a04fecf2f241c70b63b1a242e2805be291855078f2b404dd6b", size = 230886 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a0/76/ac9ced601d62f6956f03cc794f9e04c81719509f85255abf96e2510f4265/frozenlist-1.8.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:f4be2e3d8bc8aabd566f8d5b8ba7ecc09249d74ba3c9ed52e54dc23a293f0b92", size = 245731 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b9/49/ecccb5f2598daf0b4a1415497eba4c33c1e8ce07495eb07d2860c731b8d5/frozenlist-1.8.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:c8d1634419f39ea6f5c427ea2f90ca85126b54b50837f31497f3bf38266e853d", size = 241544 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/53/4b/ddf24113323c0bbcc54cb38c8b8916f1da7165e07b8e24a717b4a12cbf10/frozenlist-1.8.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:1a7fa382a4a223773ed64242dbe1c9c326ec09457e6b8428efb4118c685c3dfd", size = 241806 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a7/fb/9b9a084d73c67175484ba2789a59f8eebebd0827d186a8102005ce41e1ba/frozenlist-1.8.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:11847b53d722050808926e785df837353bd4d75f1d494377e59b23594d834967", size = 229382 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/95/a3/c8fb25aac55bf5e12dae5c5aa6a98f85d436c1dc658f21c3ac73f9fa95e5/frozenlist-1.8.0-cp311-cp311-win32.whl", hash = "sha256:27c6e8077956cf73eadd514be8fb04d77fc946a7fe9f7fe167648b0b9085cc25", size = 39647 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0a/f5/603d0d6a02cfd4c8f2a095a54672b3cf967ad688a60fb9faf04fc4887f65/frozenlist-1.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:ac913f8403b36a2c8610bbfd25b8013488533e71e62b4b4adce9c86c8cea905b", size = 44064 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5d/16/c2c9ab44e181f043a86f9a8f84d5124b62dbcb3a02c0977ec72b9ac1d3e0/frozenlist-1.8.0-cp311-cp311-win_arm64.whl", hash = "sha256:d4d3214a0f8394edfa3e303136d0575eece0745ff2b47bd2cb2e66dd92d4351a", size = 39937 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/69/29/948b9aa87e75820a38650af445d2ef2b6b8a6fab1a23b6bb9e4ef0be2d59/frozenlist-1.8.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:78f7b9e5d6f2fdb88cdde9440dc147259b62b9d3b019924def9f6478be254ac1", size = 87782 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/64/80/4f6e318ee2a7c0750ed724fa33a4bdf1eacdc5a39a7a24e818a773cd91af/frozenlist-1.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:229bf37d2e4acdaf808fd3f06e854a4a7a3661e871b10dc1f8f1896a3b05f18b", size = 50594 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2b/94/5c8a2b50a496b11dd519f4a24cb5496cf125681dd99e94c604ccdea9419a/frozenlist-1.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f833670942247a14eafbb675458b4e61c82e002a148f49e68257b79296e865c4", size = 50448 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6a/bd/d91c5e39f490a49df14320f4e8c80161cfcce09f1e2cde1edd16a551abb3/frozenlist-1.8.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:494a5952b1c597ba44e0e78113a7266e656b9794eec897b19ead706bd7074383", size = 242411 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8f/83/f61505a05109ef3293dfb1ff594d13d64a2324ac3482be2cedc2be818256/frozenlist-1.8.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96f423a119f4777a4a056b66ce11527366a8bb92f54e541ade21f2374433f6d4", size = 243014 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d8/cb/cb6c7b0f7d4023ddda30cf56b8b17494eb3a79e3fda666bf735f63118b35/frozenlist-1.8.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3462dd9475af2025c31cc61be6652dfa25cbfb56cbbf52f4ccfe029f38decaf8", size = 234909 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/31/c5/cd7a1f3b8b34af009fb17d4123c5a778b44ae2804e3ad6b86204255f9ec5/frozenlist-1.8.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c4c800524c9cd9bac5166cd6f55285957fcfc907db323e193f2afcd4d9abd69b", size = 250049 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c0/01/2f95d3b416c584a1e7f0e1d6d31998c4a795f7544069ee2e0962a4b60740/frozenlist-1.8.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d6a5df73acd3399d893dafc71663ad22534b5aa4f94e8a2fabfe856c3c1b6a52", size = 256485 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ce/03/024bf7720b3abaebcff6d0793d73c154237b85bdf67b7ed55e5e9596dc9a/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:405e8fe955c2280ce66428b3ca55e12b3c4e9c336fb2103a4937e891c69a4a29", size = 237619 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/69/fa/f8abdfe7d76b731f5d8bd217827cf6764d4f1d9763407e42717b4bed50a0/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:908bd3f6439f2fef9e85031b59fd4f1297af54415fb60e4254a95f75b3cab3f3", size = 250320 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f5/3c/b051329f718b463b22613e269ad72138cc256c540f78a6de89452803a47d/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:294e487f9ec720bd8ffcebc99d575f7eff3568a08a253d1ee1a0378754b74143", size = 246820 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0f/ae/58282e8f98e444b3f4dd42448ff36fa38bef29e40d40f330b22e7108f565/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:74c51543498289c0c43656701be6b077f4b265868fa7f8a8859c197006efb608", size = 250518 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8f/96/007e5944694d66123183845a106547a15944fbbb7154788cbf7272789536/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:776f352e8329135506a1d6bf16ac3f87bc25b28e765949282dcc627af36123aa", size = 239096 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/66/bb/852b9d6db2fa40be96f29c0d1205c306288f0684df8fd26ca1951d461a56/frozenlist-1.8.0-cp312-cp312-win32.whl", hash = "sha256:433403ae80709741ce34038da08511d4a77062aa924baf411ef73d1146e74faf", size = 39985 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b8/af/38e51a553dd66eb064cdf193841f16f077585d4d28394c2fa6235cb41765/frozenlist-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:34187385b08f866104f0c0617404c8eb08165ab1272e884abc89c112e9c00746", size = 44591 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a7/06/1dc65480ab147339fecc70797e9c2f69d9cea9cf38934ce08df070fdb9cb/frozenlist-1.8.0-cp312-cp312-win_arm64.whl", hash = "sha256:fe3c58d2f5db5fbd18c2987cba06d51b0529f52bc3a6cdc33d3f4eab725104bd", size = 40102 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2d/40/0832c31a37d60f60ed79e9dfb5a92e1e2af4f40a16a29abcc7992af9edff/frozenlist-1.8.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8d92f1a84bb12d9e56f818b3a746f3efba93c1b63c8387a73dde655e1e42282a", size = 85717 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/30/ba/b0b3de23f40bc55a7057bd38434e25c34fa48e17f20ee273bbde5e0650f3/frozenlist-1.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:96153e77a591c8adc2ee805756c61f59fef4cf4073a9275ee86fe8cba41241f7", size = 49651 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0c/ab/6e5080ee374f875296c4243c381bbdef97a9ac39c6e3ce1d5f7d42cb78d6/frozenlist-1.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f21f00a91358803399890ab167098c131ec2ddd5f8f5fd5fe9c9f2c6fcd91e40", size = 49417 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d5/4e/e4691508f9477ce67da2015d8c00acd751e6287739123113a9fca6f1604e/frozenlist-1.8.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fb30f9626572a76dfe4293c7194a09fb1fe93ba94c7d4f720dfae3b646b45027", size = 234391 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/40/76/c202df58e3acdf12969a7895fd6f3bc016c642e6726aa63bd3025e0fc71c/frozenlist-1.8.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eaa352d7047a31d87dafcacbabe89df0aa506abb5b1b85a2fb91bc3faa02d822", size = 233048 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f9/c0/8746afb90f17b73ca5979c7a3958116e105ff796e718575175319b5bb4ce/frozenlist-1.8.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:03ae967b4e297f58f8c774c7eabcce57fe3c2434817d4385c50661845a058121", size = 226549 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7e/eb/4c7eefc718ff72f9b6c4893291abaae5fbc0c82226a32dcd8ef4f7a5dbef/frozenlist-1.8.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f6292f1de555ffcc675941d65fffffb0a5bcd992905015f85d0592201793e0e5", size = 239833 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c2/4e/e5c02187cf704224f8b21bee886f3d713ca379535f16893233b9d672ea71/frozenlist-1.8.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:29548f9b5b5e3460ce7378144c3010363d8035cea44bc0bf02d57f5a685e084e", size = 245363 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1f/96/cb85ec608464472e82ad37a17f844889c36100eed57bea094518bf270692/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ec3cc8c5d4084591b4237c0a272cc4f50a5b03396a47d9caaf76f5d7b38a4f11", size = 229314 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5d/6f/4ae69c550e4cee66b57887daeebe006fe985917c01d0fff9caab9883f6d0/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:517279f58009d0b1f2e7c1b130b377a349405da3f7621ed6bfae50b10adf20c1", size = 243365 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7a/58/afd56de246cf11780a40a2c28dc7cbabbf06337cc8ddb1c780a2d97e88d8/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:db1e72ede2d0d7ccb213f218df6a078a9c09a7de257c2fe8fcef16d5925230b1", size = 237763 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cb/36/cdfaf6ed42e2644740d4a10452d8e97fa1c062e2a8006e4b09f1b5fd7d63/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:b4dec9482a65c54a5044486847b8a66bf10c9cb4926d42927ec4e8fd5db7fed8", size = 240110 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/03/a8/9ea226fbefad669f11b52e864c55f0bd57d3c8d7eb07e9f2e9a0b39502e1/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:21900c48ae04d13d416f0e1e0c4d81f7931f73a9dfa0b7a8746fb2fe7dd970ed", size = 233717 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1e/0b/1b5531611e83ba7d13ccc9988967ea1b51186af64c42b7a7af465dcc9568/frozenlist-1.8.0-cp313-cp313-win32.whl", hash = "sha256:8b7b94a067d1c504ee0b16def57ad5738701e4ba10cec90529f13fa03c833496", size = 39628 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d8/cf/174c91dbc9cc49bc7b7aab74d8b734e974d1faa8f191c74af9b7e80848e6/frozenlist-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:878be833caa6a3821caf85eb39c5ba92d28e85df26d57afb06b35b2efd937231", size = 43882 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c1/17/502cd212cbfa96eb1388614fe39a3fc9ab87dbbe042b66f97acb57474834/frozenlist-1.8.0-cp313-cp313-win_arm64.whl", hash = "sha256:44389d135b3ff43ba8cc89ff7f51f5a0bb6b63d829c8300f79a2fe4fe61bcc62", size = 39676 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d2/5c/3bbfaa920dfab09e76946a5d2833a7cbdf7b9b4a91c714666ac4855b88b4/frozenlist-1.8.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:e25ac20a2ef37e91c1b39938b591457666a0fa835c7783c3a8f33ea42870db94", size = 89235 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d2/d6/f03961ef72166cec1687e84e8925838442b615bd0b8854b54923ce5b7b8a/frozenlist-1.8.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:07cdca25a91a4386d2e76ad992916a85038a9b97561bf7a3fd12d5d9ce31870c", size = 50742 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1e/bb/a6d12b7ba4c3337667d0e421f7181c82dda448ce4e7ad7ecd249a16fa806/frozenlist-1.8.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4e0c11f2cc6717e0a741f84a527c52616140741cd812a50422f83dc31749fb52", size = 51725 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bc/71/d1fed0ffe2c2ccd70b43714c6cab0f4188f09f8a67a7914a6b46ee30f274/frozenlist-1.8.0-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b3210649ee28062ea6099cfda39e147fa1bc039583c8ee4481cb7811e2448c51", size = 284533 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c9/1f/fb1685a7b009d89f9bf78a42d94461bc06581f6e718c39344754a5d9bada/frozenlist-1.8.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:581ef5194c48035a7de2aefc72ac6539823bb71508189e5de01d60c9dcd5fa65", size = 292506 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e6/3b/b991fe1612703f7e0d05c0cf734c1b77aaf7c7d321df4572e8d36e7048c8/frozenlist-1.8.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3ef2d026f16a2b1866e1d86fc4e1291e1ed8a387b2c333809419a2f8b3a77b82", size = 274161 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ca/ec/c5c618767bcdf66e88945ec0157d7f6c4a1322f1473392319b7a2501ded7/frozenlist-1.8.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5500ef82073f599ac84d888e3a8c1f77ac831183244bfd7f11eaa0289fb30714", size = 294676 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7c/ce/3934758637d8f8a88d11f0585d6495ef54b2044ed6ec84492a91fa3b27aa/frozenlist-1.8.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:50066c3997d0091c411a66e710f4e11752251e6d2d73d70d8d5d4c76442a199d", size = 300638 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fc/4f/a7e4d0d467298f42de4b41cbc7ddaf19d3cfeabaf9ff97c20c6c7ee409f9/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5c1c8e78426e59b3f8005e9b19f6ff46e5845895adbde20ece9218319eca6506", size = 283067 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/dc/48/c7b163063d55a83772b268e6d1affb960771b0e203b632cfe09522d67ea5/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:eefdba20de0d938cec6a89bd4d70f346a03108a19b9df4248d3cf0d88f1b0f51", size = 292101 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9f/d0/2366d3c4ecdc2fd391e0afa6e11500bfba0ea772764d631bbf82f0136c9d/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:cf253e0e1c3ceb4aaff6df637ce033ff6535fb8c70a764a8f46aafd3d6ab798e", size = 289901 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b8/94/daff920e82c1b70e3618a2ac39fbc01ae3e2ff6124e80739ce5d71c9b920/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:032efa2674356903cd0261c4317a561a6850f3ac864a63fc1583147fb05a79b0", size = 289395 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e3/20/bba307ab4235a09fdcd3cc5508dbabd17c4634a1af4b96e0f69bfe551ebd/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6da155091429aeba16851ecb10a9104a108bcd32f6c1642867eadaee401c1c41", size = 283659 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/00/04ca1c3a7a124b6de4f8a9a17cc2fcad138b4608e7a3fc5877804b8715d7/frozenlist-1.8.0-cp313-cp313t-win32.whl", hash = "sha256:0f96534f8bfebc1a394209427d0f8a63d343c9779cda6fc25e8e121b5fd8555b", size = 43492 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/59/5e/c69f733a86a94ab10f68e496dc6b7e8bc078ebb415281d5698313e3af3a1/frozenlist-1.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5d63a068f978fc69421fb0e6eb91a9603187527c86b7cd3f534a5b77a592b888", size = 48034 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/16/6c/be9d79775d8abe79b05fa6d23da99ad6e7763a1d080fbae7290b286093fd/frozenlist-1.8.0-cp313-cp313t-win_arm64.whl", hash = "sha256:bf0a7e10b077bf5fb9380ad3ae8ce20ef919a6ad93b4552896419ac7e1d8e042", size = 41749 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f1/c8/85da824b7e7b9b6e7f7705b2ecaf9591ba6f79c1177f324c2735e41d36a2/frozenlist-1.8.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:cee686f1f4cadeb2136007ddedd0aaf928ab95216e7691c63e50a8ec066336d0", size = 86127 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8e/e8/a1185e236ec66c20afd72399522f142c3724c785789255202d27ae992818/frozenlist-1.8.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:119fb2a1bd47307e899c2fac7f28e85b9a543864df47aa7ec9d3c1b4545f096f", size = 49698 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a1/93/72b1736d68f03fda5fdf0f2180fb6caaae3894f1b854d006ac61ecc727ee/frozenlist-1.8.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4970ece02dbc8c3a92fcc5228e36a3e933a01a999f7094ff7c23fbd2beeaa67c", size = 49749 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a7/b2/fabede9fafd976b991e9f1b9c8c873ed86f202889b864756f240ce6dd855/frozenlist-1.8.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:cba69cb73723c3f329622e34bdbf5ce1f80c21c290ff04256cff1cd3c2036ed2", size = 231298 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3a/3b/d9b1e0b0eed36e70477ffb8360c49c85c8ca8ef9700a4e6711f39a6e8b45/frozenlist-1.8.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:778a11b15673f6f1df23d9586f83c4846c471a8af693a22e066508b77d201ec8", size = 232015 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/dc/94/be719d2766c1138148564a3960fc2c06eb688da592bdc25adcf856101be7/frozenlist-1.8.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0325024fe97f94c41c08872db482cf8ac4800d80e79222c6b0b7b162d5b13686", size = 225038 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e4/09/6712b6c5465f083f52f50cf74167b92d4ea2f50e46a9eea0523d658454ae/frozenlist-1.8.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:97260ff46b207a82a7567b581ab4190bd4dfa09f4db8a8b49d1a958f6aa4940e", size = 240130 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f8/d4/cd065cdcf21550b54f3ce6a22e143ac9e4836ca42a0de1022da8498eac89/frozenlist-1.8.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:54b2077180eb7f83dd52c40b2750d0a9f175e06a42e3213ce047219de902717a", size = 242845 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/62/c3/f57a5c8c70cd1ead3d5d5f776f89d33110b1addae0ab010ad774d9a44fb9/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2f05983daecab868a31e1da44462873306d3cbfd76d1f0b5b69c473d21dbb128", size = 229131 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6c/52/232476fe9cb64f0742f3fde2b7d26c1dac18b6d62071c74d4ded55e0ef94/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:33f48f51a446114bc5d251fb2954ab0164d5be02ad3382abcbfe07e2531d650f", size = 240542 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5f/85/07bf3f5d0fb5414aee5f47d33c6f5c77bfe49aac680bfece33d4fdf6a246/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:154e55ec0655291b5dd1b8731c637ecdb50975a2ae70c606d100750a540082f7", size = 237308 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/11/99/ae3a33d5befd41ac0ca2cc7fd3aa707c9c324de2e89db0e0f45db9a64c26/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:4314debad13beb564b708b4a496020e5306c7333fa9a3ab90374169a20ffab30", size = 238210 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b2/60/b1d2da22f4970e7a155f0adde9b1435712ece01b3cd45ba63702aea33938/frozenlist-1.8.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:073f8bf8becba60aa931eb3bc420b217bb7d5b8f4750e6f8b3be7f3da85d38b7", size = 231972 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3f/ab/945b2f32de889993b9c9133216c068b7fcf257d8595a0ac420ac8677cab0/frozenlist-1.8.0-cp314-cp314-win32.whl", hash = "sha256:bac9c42ba2ac65ddc115d930c78d24ab8d4f465fd3fc473cdedfccadb9429806", size = 40536 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/59/ad/9caa9b9c836d9ad6f067157a531ac48b7d36499f5036d4141ce78c230b1b/frozenlist-1.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:3e0761f4d1a44f1d1a47996511752cf3dcec5bbdd9cc2b4fe595caf97754b7a0", size = 44330 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/82/13/e6950121764f2676f43534c555249f57030150260aee9dcf7d64efda11dd/frozenlist-1.8.0-cp314-cp314-win_arm64.whl", hash = "sha256:d1eaff1d00c7751b7c6662e9c5ba6eb2c17a2306ba5e2a37f24ddf3cc953402b", size = 40627 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c0/c7/43200656ecc4e02d3f8bc248df68256cd9572b3f0017f0a0c4e93440ae23/frozenlist-1.8.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:d3bb933317c52d7ea5004a1c442eef86f426886fba134ef8cf4226ea6ee1821d", size = 89238 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d1/29/55c5f0689b9c0fb765055629f472c0de484dcaf0acee2f7707266ae3583c/frozenlist-1.8.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:8009897cdef112072f93a0efdce29cd819e717fd2f649ee3016efd3cd885a7ed", size = 50738 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ba/7d/b7282a445956506fa11da8c2db7d276adcbf2b17d8bb8407a47685263f90/frozenlist-1.8.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2c5dcbbc55383e5883246d11fd179782a9d07a986c40f49abe89ddf865913930", size = 51739 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/62/1c/3d8622e60d0b767a5510d1d3cf21065b9db874696a51ea6d7a43180a259c/frozenlist-1.8.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:39ecbc32f1390387d2aa4f5a995e465e9e2f79ba3adcac92d68e3e0afae6657c", size = 284186 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2d/14/aa36d5f85a89679a85a1d44cd7a6657e0b1c75f61e7cad987b203d2daca8/frozenlist-1.8.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92db2bf818d5cc8d9c1f1fc56b897662e24ea5adb36ad1f1d82875bd64e03c24", size = 292196 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/05/23/6bde59eb55abd407d34f77d39a5126fb7b4f109a3f611d3929f14b700c66/frozenlist-1.8.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:2dc43a022e555de94c3b68a4ef0b11c4f747d12c024a520c7101709a2144fb37", size = 273830 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d2/3f/22cff331bfad7a8afa616289000ba793347fcd7bc275f3b28ecea2a27909/frozenlist-1.8.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cb89a7f2de3602cfed448095bab3f178399646ab7c61454315089787df07733a", size = 294289 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a4/89/5b057c799de4838b6c69aa82b79705f2027615e01be996d2486a69ca99c4/frozenlist-1.8.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:33139dc858c580ea50e7e60a1b0ea003efa1fd42e6ec7fdbad78fff65fad2fd2", size = 300318 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/30/de/2c22ab3eb2a8af6d69dc799e48455813bab3690c760de58e1bf43b36da3e/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:168c0969a329b416119507ba30b9ea13688fafffac1b7822802537569a1cb0ef", size = 282814 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/59/f7/970141a6a8dbd7f556d94977858cfb36fa9b66e0892c6dd780d2219d8cd8/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:28bd570e8e189d7f7b001966435f9dac6718324b5be2990ac496cf1ea9ddb7fe", size = 291762 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c1/15/ca1adae83a719f82df9116d66f5bb28bb95557b3951903d39135620ef157/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:b2a095d45c5d46e5e79ba1e5b9cb787f541a8dee0433836cea4b96a2c439dcd8", size = 289470 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ac/83/dca6dc53bf657d371fbc88ddeb21b79891e747189c5de990b9dfff2ccba1/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:eab8145831a0d56ec9c4139b6c3e594c7a83c2c8be25d5bcf2d86136a532287a", size = 289042 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/96/52/abddd34ca99be142f354398700536c5bd315880ed0a213812bc491cff5e4/frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:974b28cf63cc99dfb2188d8d222bc6843656188164848c4f679e63dae4b0708e", size = 283148 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/af/d3/76bd4ed4317e7119c2b7f57c3f6934aba26d277acc6309f873341640e21f/frozenlist-1.8.0-cp314-cp314t-win32.whl", hash = "sha256:342c97bf697ac5480c0a7ec73cd700ecfa5a8a40ac923bd035484616efecc2df", size = 44676 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/89/76/c615883b7b521ead2944bb3480398cbb07e12b7b4e4d073d3752eb721558/frozenlist-1.8.0-cp314-cp314t-win_amd64.whl", hash = "sha256:06be8f67f39c8b1dc671f5d83aaefd3358ae5cdcf8314552c57e7ed3e6475bdd", size = 49451 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e0/a3/5982da14e113d07b325230f95060e2169f5311b1017ea8af2a29b374c289/frozenlist-1.8.0-cp314-cp314t-win_arm64.whl", hash = "sha256:102e6314ca4da683dca92e3b1355490fed5f313b768500084fbe6371fddfdb79", size = 42507 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9a/9a/e35b4a917281c0b8419d4207f4334c8e8c5dbf4f3f5f9ada73958d937dcc/frozenlist-1.8.0-py3-none-any.whl", hash = "sha256:0c18a16eab41e82c295618a77502e17b195883241c563b00f0aa5106fc4eaa0d", size = 13409 }, -] - -[[package]] -name = "googleapis-common-protos" -version = "1.75.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "protobuf" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b5/c8/f439cffde755cffa462bfbb156278fa6f9d09119719af9814b858fd4f81f/googleapis_common_protos-1.75.0.tar.gz", hash = "sha256:53a062ff3c32552fbd62c11fe23768b78e4ddf0494d5e5fd97d3f4689c75fbbd", size = 151035 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e7/c8/e2645aa8ed02fd4c7a2f59d68783b65b1f3cbdfe39a6308e156509d1fee8/googleapis_common_protos-1.75.0-py3-none-any.whl", hash = "sha256:961ed60399c457ceb0ee8f285a84c870aabc9c6a832b9d37bb281b5bebde43ed", size = 300631 }, -] - -[[package]] -name = "grpcio" -version = "1.81.1" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "typing-extensions" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b0/b5/1ff353970a87eda4c98251e34d2dfd214abd4982dc89119c9252a2a482d2/grpcio-1.81.1.tar.gz", hash = "sha256:6fa10a767143a5e82e8eaab53918af0cd8909a57a27f8cb2288b80a613ac671b", size = 13026582 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/52/ea/1c2fa386b718ff493225e61cfc052ef400b4d6ffc54cbe261026432624b5/grpcio-1.81.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:d71d30f2d92f67d944631c523713934fee37292469e182ebcd2c1dd8a64ce53f", size = 6093112 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2b/18/acf45fa8bd1bc5d7b0c2fd3dc4c209379fbd5bb396b440b68a83342226b7/grpcio-1.81.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:b137f4bf3ada9dc44d411478decc6ff09a79ed30b306cd2abaa98408c3588137", size = 12074277 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/48/d7/ee86a60699b7db039f772a2c4a7e4facc7138984ff42c0130933a0063884/grpcio-1.81.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a3acb384427816dd5d470f47e62137b87f74da694faa8a50147012cf40df276a", size = 6640348 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/26/ee/d2de5e47378ffc207d476c230fea3be4d2601edbce9995f4fe45535d4896/grpcio-1.81.1-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:f9a0ebbe45c29b5e5866593c12b78bd9035f0f0f0d4bc8361680cd580d99db49", size = 7331842 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/23/d6/abeda5c2b896a0b341584fe5ac411bbf72e197a9a374c355fb90965e08d2/grpcio-1.81.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0a37165cc80b1a368384b383e63a4c38116a10467ae44c904d2d7468c4470ec2", size = 6842229 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/10/1c/1f0da7d590b4aeee006826ba568d0e419ca14b23e18f901a3da3e9fba613/grpcio-1.81.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6282caffb41ec326d4cb67ca9cf53b739d1b2f975a2acb498c7418e9f7d9a416", size = 7446096 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6a/81/5c505d508f7c887aa7982d21443a4126597c80d34b0bcf40f9cec576d7f3/grpcio-1.81.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:a35009284d0d3d5c2c9601c164a911b8b4331608d98a9a66d47d97bb2f522b70", size = 8445238 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f7/b2/524847365122ee509ca17bcc4e092198b700e94af7bfd5bb5e6dd9f3ee66/grpcio-1.81.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1b22c80559854b789a01fd89e8929b3798a156c0829b5282a8939f33ad4115ad", size = 7873989 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/18/fa/07c037c50b006909d1d13a5848774f8aa7b242f70dc03a035c64eea0e6db/grpcio-1.81.1-cp311-cp311-win32.whl", hash = "sha256:428bec0161b48d8cf583c068591bc0016d0d9cfff52462b72b3884861ea768c5", size = 4202223 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/41/ed/6bff15376920942fac6b95b9802752b837437172c9e8fc2d3170546b89cc/grpcio-1.81.1-cp311-cp311-win_amd64.whl", hash = "sha256:30e825f6848d9f18bba350ed6c75c1b02a0b5184474a31db9a32b1fa66fd8c79", size = 4941303 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/85/07/9a979c81738863a738dc23d65177056e71fbb2db817740ed870b33434e7a/grpcio-1.81.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:8b39472beafc0bdcafc4c8c73ad082ebfdb449d566897a61e7acb4fa88089115", size = 6053264 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/75/95/539706ca0d3bd40dbad583dc56fd883da941f37556b629132da5762781b9/grpcio-1.81.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:12b7524c88d4026d3dcb7b0ebe16b6714f3b4af402ddd0f0639ab064a00c87c3", size = 12052560 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e0/44/f257b7e0bd69c93b06c6cb8ac8d1b901ccb42bedabd83c1a4c77a71f8810/grpcio-1.81.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1e123f9b37edb8375fd74130d1f69c944bbf0a7b06761ae7211154b8759e94d2", size = 6595983 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b9/f3/19782aa04c960968bef8c5539329d8e3bbc3364e2e46d19eb5e5cc5e43b7/grpcio-1.81.1-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:2c2e2ae6867c2966b8daccc836d54a13218e0007e9a490aeb81dd05be64d22d7", size = 7303455 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/eb/8c/dea020b6d91508cd84463917a63149ec196ee7db505d032ae43fcb3303b9/grpcio-1.81.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:766bc7c9a9c340342f4c864ccbda8e78111e4751f13b895812b9c148fb79e9d0", size = 6809167 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1c/c7/3030dd940408083bd32cd95d634777a71605ade4887154d93e8a89244946/grpcio-1.81.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b259a04a737cb3496be0901328eb8b7552ed8df4865d8c8f1cf1bffcfc0776a3", size = 7412536 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e0/dd/1172a9e42b168edcafefad6115346ef619a3fc02158bb170e66ced24bcdd/grpcio-1.81.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:85b10a45b8993d195c4f3ff57025b8d1e11834909ee475c403bfa60cb4caefaf", size = 8408276 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/25/7a/71437c7f3596e5246155c515852795a85a1a8d228190212432b13b97a95d/grpcio-1.81.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8ea1936c26b99999b27479853039a7f34713f56c49375ad52b38535ec93a796c", size = 7849660 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/65/40/7debc0da45d2efebafb82da75644be347497fe4ee250514b8cd3b86ae8bf/grpcio-1.81.1-cp312-cp312-win32.whl", hash = "sha256:a185a04039df6cae8648bc8ab6d6fde7bf94f7188ecf7828e76ac52eef1e41d6", size = 4185819 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2e/b9/8fe3ba5ed462067774ebc1f9c7f26aa7ebcc280ddd476be107153de1339e/grpcio-1.81.1-cp312-cp312-win_amd64.whl", hash = "sha256:3ad74f8bb1a18963914c5452d289422830b39459e8776ebbcd207be1fbfb1d94", size = 4930461 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7a/42/dcc2e4b600538ef18327c0839d56b7d3c3812337c5d710df5877dbb39b1e/grpcio-1.81.1-cp313-cp313-linux_armv7l.whl", hash = "sha256:b10e1ff4756ed27d5a29d7fc79cfce7ef1ff56ad20025b89bac7cf79e09abbbe", size = 6054466 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7b/4a/a36e03210183a8a7d4c80c3936acee679f4bd77d5861f369db47b2cc5f05/grpcio-1.81.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:819edbdcb42ab8598b494bcf0222684bbb7a3c772bd1b1f0be7e029a6063c28e", size = 12048795 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b0/d5/d68e30b29098f63beab6fe501100fe82674ff142b32c672532da86a99b3a/grpcio-1.81.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c5bf2dc311127d91230cc79b92188c082634a06cf66c5234db49a43b910183b0", size = 6599094 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3d/b3/e837954d279754f638a11cca5dcf6b24a005efb398984cefaf7735945a54/grpcio-1.81.1-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:e8ca6a1fcdb2943c9cbc1804a1baf3acb6071d72a471591678ded84218006e14", size = 7307182 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0d/1e/b47957057e729adc6cdf519a47f8be2562b7140e280f1418443eb4022192/grpcio-1.81.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e64dd101d380a115cc5a0c7856788adb535f1a4e21fc543775602f8be95180ae", size = 6810962 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/40/26/569868e364e05b19ec8f969da53d230bcd89c962cd198f7c29943155c4d3/grpcio-1.81.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:98a07f9bf591e3a8919797bee1c53f026ba4acd587e5a4404c8e57c9ec36b2a5", size = 7415698 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/36/0c/5440a0582cb5653fc42a6e262eeb22700943313f8076f9dc927491b20a59/grpcio-1.81.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:c261d74b1a945cf895a9d6eccd1685a8e837531beaab782da4d630a8d12deffb", size = 8407779 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ff/aa/66fe9f39871d766987d869a03ee0842a026f499c7b1e62decb9e78a8088e/grpcio-1.81.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:58ad1131c300d3c9b933802b3cc4dc69d380822935ba50b28703156ea826fbf7", size = 7844521 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f0/9e/69bb7194861bcd28fb3193261d4f9c3831b4446993f002cf59068943e7ab/grpcio-1.81.1-cp313-cp313-win32.whl", hash = "sha256:78e29211f26da2fdd0e9c6d2b79f489476140cf7029b6a64808ade7ca4156a42", size = 4182786 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0d/20/3da8bb0d637feccdc3e1e419bb511ce93651ce7d54164f95de22cc0b8b34/grpcio-1.81.1-cp313-cp313-win_amd64.whl", hash = "sha256:edb59506291b647a30884b1d51a599d605f40b20af4a7dc3d33786a47a31de60", size = 4928648 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b6/58/19414622b1bf6981bc9c05a365bd548e71876c89000083b3af489251e9c0/grpcio-1.81.1-cp314-cp314-linux_armv7l.whl", hash = "sha256:506f48f2f9c29b143fca3dad7b0d518c188b6c9648c75a2ae6e2d9f2c13a060b", size = 6055336 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/32/f1/2ec88adb92b0eba970dd0e0e7dd086341daa3c75eba4f735f9e44bf684b0/grpcio-1.81.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:d865db4a6318e1c1bea83292e0ed231090538fc4ca45425b0f0480eb338bbc6e", size = 12056279 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/41/36/e8c5f8c6ec71de73733695ebc809e98b178b534ec6d8eaa31a7ebab4ad4c/grpcio-1.81.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e2aa72e3ce1770317ef534f63d397b55e130725f5149bd36077c3b539019db27", size = 6608225 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/30/22/96fc577a845ab093326d9ab1adb874bd4936c8cf98ac8ed2f3db13a0a2fb/grpcio-1.81.1-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:0490c30c261eded63f3f354979f9dc4502a9fb944cccb60cd9dc85f5a7349854", size = 7306576 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/76/7b/61dab5d5969f28d97fb1009cead1df0a5cd987d3315e1b37f18a4449f8bc/grpcio-1.81.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:410482da976329fe5f4067270401b12cf2bd552ff8020f054ecfaddb5475f9d6", size = 6812165 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/82/78/6e501929d4f5f96462fd82fd9f0f06e5f9612207582b862868d68757b27d/grpcio-1.81.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e3657301562ac3cb8018d30d0d3ebfa39932239f7b5703422057ef14b69949f5", size = 7422962 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2a/7e/f2157589e66daa78ebb3165942d05a08bdea93b9d11c2bc1e172aef89685/grpcio-1.81.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:24c8e57504c8f45b237e40b99262d181071e5099a07053695b75d97bb53053a0", size = 8408176 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/da/df/c6717fef716e00d235ffb96123baf6dce76d6004f6233fa767c502861460/grpcio-1.81.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b427c19380991a4eaab2f6144b64b99b412043314c6bf4ab544f97bb31ee4190", size = 7846681 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/36/84/3502e9f210a6a5c4438c8aca3f88edd2e04f6a27f3d41b26cf0a0024b096/grpcio-1.81.1-cp314-cp314-win32.whl", hash = "sha256:61233fe8951e5c85dff81c2458b6528624760166946b5b47ea150a589168411f", size = 4264615 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ff/b0/4af731ff7492c68a96e4c71bfd0f4590acde92b31c6fe4894e6465c10ff6/grpcio-1.81.1-cp314-cp314-win_amd64.whl", hash = "sha256:3768a5ff1b2125e6f552e561b6b2dca0e64982d8949689b4df145cf8b98d7821", size = 5070275 }, -] - -[[package]] -name = "h11" -version = "0.16.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 }, -] - -[[package]] -name = "httpcore" -version = "1.0.9" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "certifi" }, - { name = "h11" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784 }, -] - -[[package]] -name = "httpx" -version = "0.28.1" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "anyio" }, - { name = "certifi" }, - { name = "httpcore" }, - { name = "idna" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517 }, -] - -[[package]] -name = "httpx-sse" -version = "0.4.3" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0f/4c/751061ffa58615a32c31b2d82e8482be8dd4a89154f003147acee90f2be9/httpx_sse-0.4.3.tar.gz", hash = "sha256:9b1ed0127459a66014aec3c56bebd93da3c1bc8bb6618c8082039a44889a755d", size = 15943 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d2/fd/6668e5aec43ab844de6fc74927e155a3b37bf40d7c3790e49fc0406b6578/httpx_sse-0.4.3-py3-none-any.whl", hash = "sha256:0ac1c9fe3c0afad2e0ebb25a934a59f4c7823b60792691f779fad2c5568830fc", size = 8960 }, -] - -[[package]] -name = "idna" -version = "3.18" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cd/63/9496c57188a2ee585e0f1db071d75089a11e98aa86eb99d9d7618fc1edce/idna-3.18.tar.gz", hash = "sha256:ffb385a7e039654cef1ab9ef32c6fafe283c0c0467bba1d9029738ce4a14a848", size = 196711 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1e/5e/d4e9f1a599fb8e573b7b87160658329fbf28d19eac2718f51fc3def3aa5a/idna-3.18-py3-none-any.whl", hash = "sha256:7f952cbe720b688055e3f87de14f5c3e5fdaa8bc3928985c4077ca689de849a2", size = 65455 }, -] - -[[package]] -name = "iniconfig" -version = "2.3.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484 }, -] - -[[package]] -name = "jinja2" -version = "3.1.6" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "markupsafe" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899 }, -] - -[[package]] -name = "jiter" -version = "0.15.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/66/b5/55f06bb281d92fb3cc86d14e1def2bd908bb77693183e7cb1f5a3c388b0c/jiter-0.15.0.tar.gz", hash = "sha256:4251acc80e2b7c9b7b8823456ea0fceeb0734dac2df7636d3c711b38476b5a76", size = 166640 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e4/13/daa722f5765c393576f466378f9dfd29d77c9bed939e0688f96afa3601ea/jiter-0.15.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:0f862193b8696249d22ec433e85fd2ab0ad9596bc3e45e6c0bc55e8aeba97be2", size = 310899 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7f/82/2d2551829b082f4b6d82b9f939b031fb808a10aab1ec0664f82e150bb9a2/jiter-0.15.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1303d4d68a9b051ea90502402063ecf3807da00ad2affa19ca1ae3b90b3c5f67", size = 314963 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2a/0a/8b1a51466f7fe9f31dbe4bc7e0ca848674f9825e0f737b929b97e8c60aa7/jiter-0.15.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:392b8ab019e5502d08aff85c6272209c24bc2cbe706ea82a56368f524236614a", size = 341730 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f6/2a/e71dea19822e2e404e83992a08c1d6b9b617bb944f28c9c2fbd85d02c91e/jiter-0.15.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:773b6eb282ce11ee19f05f6b2d4404fa308e5bbd353b0b80a0262caad6db2cd7", size = 366214 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c4/59/97e1fa539d124a509a00ab7f669289d1c1d236ecabf12948a18f16c91082/jiter-0.15.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8d2c0c44d569ce0f2850f5c926f8caeb5f245fbc84475aeb36efccc2103e6dbd", size = 459527 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d1/7a/4a68d331aef8cf2e2393c14a3aacb635c62aa86071b0229899fb5baaa907/jiter-0.15.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:032396229564bca02440396bd327710719f724f5e7b7e9f7a8eb3faa4a2c2281", size = 375451 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7b/7e/1c445c2b6f0e30a274dc8082e0c3c7825411cce80d726bccd697c98cc8d3/jiter-0.15.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3d37768fce7f88dd2a8c6091f2325dea27d30d30d5c6e7a1c0f0af77723b708", size = 349428 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/00/94/e20d38984fc17a636371bffd2ae0f698124fdc8e75ef969cd2da6ba7cea7/jiter-0.15.0-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:2c9cb907439d20bd0c7d7565ca01ee52234203208433749bae5b516907526928", size = 355405 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/94/fa/4d09f814779d0ea80a28ed8e4c6662ec9a4a8ecef0ac52190ebac6262d14/jiter-0.15.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9100ddbec09741cc66feb0fc6773f8bdbd0e3c345689368f260082ff85dcc0cd", size = 393688 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/54/9d/8eb5d4fb8bf7e93a75964a5da71a75c67c864baf7fa3f98598187b3c7e57/jiter-0.15.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ae1b0d82ac2d987f9ea512b1c9adfcc71a28de3dea3a6039b54d76cffda9901e", size = 520853 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e7/2c/5e07874e59e623a943a0acf1552a80d05b70f31b402287a8fc6d7ec634c7/jiter-0.15.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8020c99ec13a7db2b6f96cbe82ef4721c88b426a4892f27478044af0284615ef", size = 551016 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/22/ed/d2d34422143474cadc15b60d482b1c35683dbc5c63c24346ddd0df09bcaf/jiter-0.15.0-cp311-cp311-win32.whl", hash = "sha256:42bfb257930800cf43e7c62c832402c704ab60797c992faf88d20e903eac8f32", size = 209518 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1d/7d/52778b930e5cc3e52a37d950b1c10494244308b4329b25a0ff0d88303a81/jiter-0.15.0-cp311-cp311-win_amd64.whl", hash = "sha256:860a74063284a2ae9bfedd694f299cc2c68e2696c5f3d440cc9d18bb81b9dd04", size = 200565 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3b/4f/d9b4067feb69b3fa6eb0488e1b59e2ad5b463fe39f59e527eab2aca00bb0/jiter-0.15.0-cp311-cp311-win_arm64.whl", hash = "sha256:37a10c377ce3a4a85f4a67f28b7afe093154cde77eaf248a72e856aa08b4d865", size = 195488 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/44/53/4f6bddbcde3c71e56d0aa1337ec95950f3d27dd4153e25aadf0feac71751/jiter-0.15.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:0e90a1c315a0226ec822d973817967f9223b7701546c8c2a7913e7ab0926294d", size = 308793 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/01/84/c01099b59a285a1ebba64ae93f62bfa036675340fd1b0045ae65890a0442/jiter-0.15.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8c9004af7c8d67cce7f1aae1026fb55607f4aa600710d08ede3a3ce4aeefe7e0", size = 309570 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/58/64/8fb7f9d45bb98190355454cd04dad8d8f27223d6bd52f83af07f637168a6/jiter-0.15.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c210f8b35dc6f30aafd4b4365ca89b9d1189f21ab49b8e68fa6322a847aef138", size = 336783 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c3/b6/f5739011d009b3a30f6a53c5240979030ba29ae46a8c67e3a15759f7c37d/jiter-0.15.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f30bae8bc1c2d613e28e5af3e8cceb09b742f1c8a8a5f839fb67afaffc03b61", size = 363555 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e5/12/98a9d9f766665e8a3b6252454e17cb0c464606a28cf2fa09399b003345fa/jiter-0.15.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c60e71b6d10cfc284c9bf36bd885e8d44c46f688ce50aa91b5edd90181dea687", size = 452255 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e8/d5/60f972840f79c5e7544fce567c56f1e4e50468f996baba3e78d823dd62a6/jiter-0.15.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ab068bce62a45aa3e7367eceaffb5dde60b7eb853be8dece45132e3d0ff4879", size = 373559 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ee/cf/d46ef1234ba335aabc2f013210db8e0821a22f5e644a2e9449df199ecc23/jiter-0.15.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa248c9eb220197d363f688818dac2fd4b2f0cd7d843ca7105d652034823427d", size = 346055 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f0/63/4d2749d8d54d230bad9b3a6b0d00cc28c6ff6b2fdffc26a8ccf76cc5a974/jiter-0.15.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:2a77aadd57cac1682e4401a72724d2796d89a4ba129b1a5812aa94ee480826eb", size = 351406 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d9/b9/9965b990035d8773328e0a8c8b457a87bf2b19f6c4126d9d99296be5d16a/jiter-0.15.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2ae901f3a55bfafdde31d289590fa25e3245735a2b1e8c7cc15871710a002871", size = 389357 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2d/55/9ddf903deda1413e87fed792f416b7123daee5b8efbad6a202a7421c36a5/jiter-0.15.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:f0b271b462769543716f92d3a4f90527df6ef5ed05ee95ec4137f513e21e1b77", size = 517263 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e8/76/a0c40ad064d3a20a4fde231e35d56e9a01ce82164278180e82d5daf85469/jiter-0.15.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2fb6a5d26af81fc0f00f9360a891e05cf755e149bba391c4d563adc54812973d", size = 548646 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/23/4f/eca9b954942916ba2f453891b8593ab444cd872396fe66a3936616f236f3/jiter-0.15.0-cp312-cp312-win32.whl", hash = "sha256:c2f6bb8b5216ab9e7873bc08b5d7bef2b8abbb578a3069bf1cd14a45d71d771d", size = 206427 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/95/bf/8ead82a87495149542748e828d153fd232a512a22c83b02c4815c1a9c7d8/jiter-0.15.0-cp312-cp312-win_amd64.whl", hash = "sha256:40b2c7e92c44a84d748d21706c68dc6ff8161d80b59c99d774721a0d2317d7c7", size = 197300 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f4/e4/9b8a78fb2d894471bc344e37f1949bdd784bd914d031dba0ba3a40c71dd7/jiter-0.15.0-cp312-cp312-win_arm64.whl", hash = "sha256:cc0bc345cf2df9d1c00ac443f50d543c1ccfa8b0422cb85b1ab70d681c0b255b", size = 192702 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e5/f4/f708c900ecee41b2025ef8413d5351e5649eb2125c506f6720cc69b06f5c/jiter-0.15.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1c11465f97e2abf45a014b83b730222f8f1c5335e802c7055a67d50de6f1f4e3", size = 307829 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/86/59/db537c0949e83668c38481d426b9f2fd5ab758c4ee53a811dd0a510626a0/jiter-0.15.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d1e7b1776f0797956c509e123d0952d10d293a9492dea9f288ab9570ec01d1a5", size = 308445 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/37/38/ea0e13b18c30ef951da0d47d39e7fa9edb82a93a62990ffbd7cea9b622d4/jiter-0.15.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:351a341c2105aa430b7047e30f1bf7975f6313b00165d3fc07be2edaf741f279", size = 336181 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/58/fc/2303901b16c4ba05865588990a420c0b4156270b44379c20931544a1d962/jiter-0.15.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4ab395feec8d249ec4044e228e98a7033f043426a265df439dc3698823f0a4e4", size = 362985 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5b/6f/11bace093c52e7d4d26c8e606ccd7ae8c972189622469ec0d9e28161e28b/jiter-0.15.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a2a438005b6f22d0273413484d6094d7c2c5d10ec1b3a3bf128e0d1d3ba53258", size = 453292 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/22/db/987f2f086ca4d7a6582eb4ccd513f9b26b42d9e4243a087609a3137a8fc7/jiter-0.15.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f18f85e4218d1b40f000f42a92239a7a61a902cd42c65e6c360dbd17dcb20894", size = 373501 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8f/7c/89fbcabb2739b7a5b8dc959a1b6c5761f6484f5fed3486854b3c789bb1de/jiter-0.15.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1aa62e277fc1cbd80e6deacae6f4d983b41b3d7728e0645c5d741a6149bba45", size = 344683 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/30/6f/6cca7692e7dddfec6d8d76c54dc97f2af2a41df4ac0674b999df1f09a5f3/jiter-0.15.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:6550fa135c7deb8ead6af49ed7ff648532ea8334a1447fe34a36315ef79c5c29", size = 350892 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/39/14/0338d6190cb8e6d22e677ab1d4eabd4117f67cca70c54cd04b82ff64e068/jiter-0.15.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:066f8f33f18b2419cd8213b2436fa7fbc9c499f315971cfa3ce1f9820c001b1b", size = 388723 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/90/31/cc19f4a1bdb6afb09ce6a2f2615aa8d44d994eba0d8e6105ed1af920e736/jiter-0.15.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:75e8a04e91432dde9f1838373cf93d23726c79d3e908d319acf0e796f85592e7", size = 516648 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/49/9f/833c541512cd091b63c10c0381973dfe11bc7a503a818c16384417e0c81e/jiter-0.15.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:a97261f1fccb8e50ecd2890a96e46efdc3f57c80a197324c6777827231eca712", size = 547382 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d2/11/e7b70e91f90bc4477e8eee9e8a5f7cf3cb41b4525d6394dc98a714eb8f7f/jiter-0.15.0-cp313-cp313-win32.whl", hash = "sha256:c77496cb10bd7549690fbbab3e5ec05857b83e49276f4a9423a766ddd2afcd4c", size = 205845 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4b/23/5c20d9ad6f02c493e4023e5d2d09e1c1f15fe2753c9102c544aff068a88e/jiter-0.15.0-cp313-cp313-win_amd64.whl", hash = "sha256:b15741f501469009ae0ae90b7147958a664a7dede40aa7ff174a8a4645f546d0", size = 196842 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6b/11/1eb400ef248e8c925fd883fbe325daf5e42cd1b0d308539dd332bd4f7ffc/jiter-0.15.0-cp313-cp313-win_arm64.whl", hash = "sha256:5d6a60072b44c3c2b797a7ddcbcbbf2b34ea3cfd4721580fbfd2a09d9d9b84ba", size = 192212 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8a/60/2fd8d7c79da8acf9b7b277c7616847773779356b92acfc9bb158452174da/jiter-0.15.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:ef1fd24d9413f6209e00d3d5a453e67acfe004a25cc6c8e8484faed4311ab9e8", size = 315065 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/46/f4/008fb7d65e8ac2abf00811651a661e025c4ba80bbc6f378450384ddd3aed/jiter-0.15.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:144f8e72cb53dab146347b91cceac01f5481237f2b93b4a339a1ee8f8878b67c", size = 339444 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/00/55/90b0c7b9c6896c0f2a591dd36d36b71d22e09674bfef178fa03ba3f81499/jiter-0.15.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:553fcac2ef2cb990877f9fc0833b8b629a3e6a5670b6b5fd58219b41a653ddc4", size = 347779 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/51/6b/69666cec5000fd57734c118437394516c749ae8dbeea9fb66d6fef9c4775/jiter-0.15.0-cp313-cp313t-win_amd64.whl", hash = "sha256:774f93f65031856bf14ad9f59bdcab8b8cad501e5ceabd51ba3525f76937a25b", size = 200395 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/39/04/a6aa62cd27e8149b0d28df5561f10f6cceaf7935a9ccf3f1c5a05f9a0cd8/jiter-0.15.0-cp313-cp313t-win_arm64.whl", hash = "sha256:f1e1754960f38ec40613a07e5e372df67acb3b890fb383b6fb3de3e49ddbf3c7", size = 190516 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/eb/d2/079f350ebf7859d081de30aa890f9e3be68516f754f3ba32366ffff4dcee/jiter-0.15.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:ac0d9ddea4350974be7a221fc25895f251a8fee748c889bdced2141c0fec1a49", size = 308884 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/04/4e/a2c30a7f69b48c03b20935d647479106fe932f6e63f75faf53937197e05d/jiter-0.15.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:01a8222cf05ab1128e239421156c207949808acaaea2bdfd33130ae666786e86", size = 310028 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/40/90/2e7cdfd3cf8ca967be38c48f5cf474d79f089efaf559a40f15984a77ae69/jiter-0.15.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:182226cbc930c9fab81bc2e41a4da672f89539906dadb05e75670ac07b94f71f", size = 337485 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9b/11/15a1aa28b120b8ee5b4f1fb894c125046225f09847738bd64233d3b84883/jiter-0.15.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:71683c38c825452999b5717fcae07ea708e8c93003e808be4319c1b02e3d176e", size = 364223 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b7/25/f442e8af5f3d0dcf47b39e83a0efd9ee45ea946aa6d04625dc3181eae3b6/jiter-0.15.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:30f2218e6a9e5c18bc10fe6d41ac189c442c88eacf11bad9f28ef95a9bef00e6", size = 456387 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/da/f4/37f2d2c9f64f49af7da652ed7532bb5a2372e588e6927c3fdd76f911db65/jiter-0.15.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5157de9f76eb4bc5ea74a1219366a25f945ad305641d74e04f59c54087091aa9", size = 374461 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/60/28/edcfbbbf0cb15436f36664a8908a0df47ab9006298d4cd937dc08ea932d6/jiter-0.15.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90c5db5527c221249a876160663ab891ace358c17f7b9c93ec1478b7f0550e5c", size = 345924 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/47/13/89fba6398dab7f202b7278c4b4aac122399d2c0183971c4a57a3b7088df5/jiter-0.15.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:3e4540b8e74e4268811ac05db226a6a128ff572e7e0ce3f1163b693cadb184cd", size = 352283 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1b/da/0f6af8cef2c565a1ab44d970f268c43ccaa72707386ea6388e6fe2b6cd26/jiter-0.15.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:62ebd14e47e9aed9df4472afcb2663668ce4d74891cd54f86bf6e44029d6dc89", size = 389985 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a1/ec/b9cb7d6d29e24ee14910266157d2a279d7a8f60ee0df7fa840882976ba64/jiter-0.15.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0be6f5ad41a809f303f416d17cec92a7a725902fb9b4f3de3d19362ac0ef8554", size = 517695 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/64/5e/6d1bda880723aae0ad86b4b763f044362448efe31e3e819635d41cb03451/jiter-0.15.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:813dfbb17d65328bf86e5f0905dd277ba2265d3ca20556e86c0c7035b7182e5a", size = 548868 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0c/72/7de501cf38dcacaf35098796f3a50e0f2e338baba18a58946c618544b809/jiter-0.15.0-cp314-cp314-win32.whl", hash = "sha256:50e51156192722a9c58db112837d3f8ef96fb3c5ecc14e95f409134b08b158ec", size = 206380 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1e/a9/e19addf4b0c1bdce52c6da12351e6bc42c340c45e7c09e2158e46d293ccc/jiter-0.15.0-cp314-cp314-win_amd64.whl", hash = "sha256:30ce1a5d16b5641dc935d50ef775af6a0871e3d14ab05d6fc54dff371b78e558", size = 197687 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f2/c9/776b1db01db25fc6c1d58d1979a37b0a9fe787e5f5b1d062d2eaacb77923/jiter-0.15.0-cp314-cp314-win_arm64.whl", hash = "sha256:510c8b3c17a0ed9ac69850c0438dada3c9b82d9c4d589fcb62002a5a9cf3a866", size = 192571 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a0/f6/45bb4670bacf300fd2c7abadbfb3af376e5f1b6ae75fd9bc069891d15870/jiter-0.15.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7553333dd0930c104a5a0db8df72bf7219fe663d731383b576bb6ed6351c984d", size = 317151 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d7/68/ed635ad5acd7b73e454283083bbb7c8205ad10e88b0d9d7d793b09fe8226/jiter-0.15.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2143ab06181d2b029eedcb6af3cebe95f11bbac62441781860f98ee9330a6a6", size = 341243 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5d/db/3ff4176b817b8ea33879e71e13d8bc2b0d481a7ed3fe9e080f333d415c16/jiter-0.15.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6eac374c5c975709b69c10f09afd199df74150172156ad10c8d4fd785b7da995", size = 363629 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ab/24/5f8270e0ba9c883582f96f722f8a0b58015c7ce1f8c6d4571cf394e99b6b/jiter-0.15.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b3b3b775e33d3bfaec9899edc526ae97b0da0bf9d071a46124ba419149a414f8", size = 456198 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/45/5b/76fc02b0b5c54c3d18c60653156e2f76fde1816f9b4722db68d6ee2c897e/jiter-0.15.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eda3071db3346334beae1360b46da4606da57bf3528c167b3c38533afaf9f2c5", size = 373710 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c4/52/4310821b0ea9277994d3e1f49fc6a4b34e4800caebacb2c0af81da59a454/jiter-0.15.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6694a173ecabc12eb60efbc0b474464ead1951ff65cd8b1e72100715c64512b", size = 349901 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/93/fe/67648c35b3594fba8854ac64cc8a826d8bcd18324bbdb53d77697c60b6ef/jiter-0.15.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:a254e10b593624d230c365b6d616b22ca0ad65e63a16e6631c2b3466022e6ba8", size = 352438 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cb/28/0a1879d07ad6b3e025a2750027363452ced93c2d16d1c9d4b153ffd51c91/jiter-0.15.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d8d2955167274e15d79a7a020afdd9b39c990eb80b2d89fca695d92dcfdd38ec", size = 388152 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c1/78/46c6f6b56ba85c90021f4afd72ed42f691f8f84daacb5fe27277070e3858/jiter-0.15.0-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:acf4ee4d1fc55917239fe72972fb292dd773055d05eb040d36f4326e02cc2c0e", size = 517707 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ca/cb/720662d4c88fcad606e826fef5424365527ba43ce4868a479aed8f8c507e/jiter-0.15.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:e7196e56f1cd69af1dbb07dff02dcfb260a50b45a82d409d92a06fedb32473b5", size = 548241 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/60/e3/935b8034fd143f21125c87d51404a9e0e1449186a494405721ff5d1d695e/jiter-0.15.0-cp314-cp314t-win32.whl", hash = "sha256:7f6163c0f10b055245f814dcc59f4818da60dfe72f3e72ab89fc24b6bd5e9c52", size = 207950 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/93/59/984fd9ece895953dad3e0880a650e766f5a2da2c5514f0eafdaaabbeb5f9/jiter-0.15.0-cp314-cp314t-win_amd64.whl", hash = "sha256:980c256edb05b78a111b99c4de3b1d32e31634b867fd1fc2cf726e7b7bba9854", size = 200055 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0e/a4/cf8d779feb133a27a2e3bc833bccb9e13aa332cdf820497ebf72c10ce8c3/jiter-0.15.0-cp314-cp314t-win_arm64.whl", hash = "sha256:66b1880df2d01e206e8339769d1c7c1753bcb653efd6289e203f6f24ebada0c0", size = 191244 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/65/43/1fc62172aa98b50a7de9a25554060db510f85c89cfbed0dfe13e1907a139/jiter-0.15.0-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:411fa4dfa5a7ae3d11491027ffb9beadec3996010a986862db70d91abba1c750", size = 305585 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e8/c4/dd58fcd9e2df83666e5c1c1347bef58ce919cd8efc3ffa38aeea62ce493b/jiter-0.15.0-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:2b0074e2f56eb2dacca1689760fd2852a068f85a0547a157b82cb4cafeb6768b", size = 306936 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/39/86/b695e16f1180c07f43ea98e73ecd21cf63fa2e1b0c1103739013784d11ae/jiter-0.15.0-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:913d02d29c9606643418d9ccfc3b72492ab25a6bf7889934e09a3490f8d3438b", size = 342453 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/34/56/55d76614af37fe3f22a3347d1e410d2a15da581997cb2da499a625000bb5/jiter-0.15.0-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b15d3ec9b0449c40e85319bdb4caa8b77ab526e74f5532ed94bec15e2f66822c", size = 345606 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/73/38/505941b2b092fd5bbbd60a52a880db1173f1690ae6751bed3af1c9ddcb4e/jiter-0.15.0-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:631f13a3d04e97d4e083993b10f4b99530e3a10d953e2eb5e196b7dc7f812ce0", size = 303769 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e7/95/a06692b29e77473f286e1ec1f426d3ca44d7b5843be8ad21d7a5f3fcdcc0/jiter-0.15.0-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:b6c0ffae686c39bf3737be60793783267628783ea42545632c10b291105aee45", size = 305128 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/23/85/7270d7ad41d6061a25b950c6bf91d638bd9aacb113200a8c8d57a055fd67/jiter-0.15.0-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d54fb5b31dea401a41af3f8a7d2512e9b6a6a005491e6166c7e4ffab9639a9c", size = 340459 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c8/8d/302cb2057b7513327b4d575cff6b1d066ee6431a5357fc3f8867cd684406/jiter-0.15.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54d5d6090cdc1b7c9e780dfb04949a990adb1e301a2fc0bbcee7de4638d33f9a", size = 344469 }, -] - -[[package]] -name = "joblib" -version = "1.5.3" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/41/f2/d34e8b3a08a9cc79a50b2208a93dce981fe615b64d5a4d4abee421d898df/joblib-1.5.3.tar.gz", hash = "sha256:8561a3269e6801106863fd0d6d84bb737be9e7631e33aaed3fb9ce5953688da3", size = 331603 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071 }, -] - -[[package]] -name = "json-repair" -version = "0.61.1" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7c/92/b46b19c400cdb87c634778cc7eec140c922f7ec0af3941d1e4a04ebeda81/json_repair-0.61.1.tar.gz", hash = "sha256:24a68de2891c696ad3bd9a94874e8d3ef2d309c56af2973094b8297c975b5b58", size = 50069 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/36/0d/abd5fe1251c8588c6c8d441fab263734bcaa71dfd895ec4b88c202a86254/json_repair-0.61.1-py3-none-any.whl", hash = "sha256:7ab26583e4c73418b8b60cc61202f64f119984a9b5fed61087e84158fa29e7d0", size = 48543 }, -] - -[[package]] -name = "json5" -version = "0.15.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e4/7d/05c46a96a78147ae3bf99c2f4169ce144a70220b8d6fcd56f6ec368b8ce9/json5-0.15.0.tar.gz", hash = "sha256:7424d1f1eb1d56da6e3d70643f53619862b4ce81440bdb8ecfd6f875e5ba4a71", size = 53278 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/eb/be/59527c99478aade6bb33a68d72e6e18dd4e6ff6eacfc7d01bdb15bc76912/json5-0.15.0-py3-none-any.whl", hash = "sha256:56636a30c0e8a4665fe2179c0212f32eae3796dea89ea6f649b9436ecdb39618", size = 36570 }, -] - -[[package]] -name = "jsonschema" -version = "4.26.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "attrs" }, - { name = "jsonschema-specifications" }, - { name = "referencing" }, - { name = "rpds-py" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b3/fc/e067678238fa451312d4c62bf6e6cf5ec56375422aee02f9cb5f909b3047/jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326", size = 366583 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/69/90/f63fb5873511e014207a475e2bb4e8b2e570d655b00ac19a9a0ca0a385ee/jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce", size = 90630 }, -] - -[[package]] -name = "jsonschema-specifications" -version = "2025.9.1" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "referencing" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/19/74/a633ee74eb36c44aa6d1095e7cc5569bebf04342ee146178e2d36600708b/jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d", size = 32855 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437 }, -] - -[[package]] -name = "lightgbm" -version = "4.6.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "numpy" }, - { name = "scipy" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/68/0b/a2e9f5c5da7ef047cc60cef37f86185088845e8433e54d2e7ed439cce8a3/lightgbm-4.6.0.tar.gz", hash = "sha256:cb1c59720eb569389c0ba74d14f52351b573af489f230032a1c9f314f8bab7fe", size = 1703705 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f2/75/cffc9962cca296bc5536896b7e65b4a7cdeb8db208e71b9c0133c08f8f7e/lightgbm-4.6.0-py3-none-macosx_10_15_x86_64.whl", hash = "sha256:b7a393de8a334d5c8e490df91270f0763f83f959574d504c7ccb9eee4aef70ed", size = 2010151 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/21/1b/550ee378512b78847930f5d74228ca1fdba2a7fbdeaac9aeccc085b0e257/lightgbm-4.6.0-py3-none-macosx_12_0_arm64.whl", hash = "sha256:2dafd98d4e02b844ceb0b61450a660681076b1ea6c7adb8c566dfd66832aafad", size = 1592172 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/64/41/4fbde2c3d29e25ee7c41d87df2f2e5eda65b431ee154d4d462c31041846c/lightgbm-4.6.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:4d68712bbd2b57a0b14390cbf9376c1d5ed773fa2e71e099cac588703b590336", size = 3454567 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/42/86/dabda8fbcb1b00bcfb0003c3776e8ade1aa7b413dff0a2c08f457dace22f/lightgbm-4.6.0-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:cb19b5afea55b5b61cbb2131095f50538bd608a00655f23ad5d25ae3e3bf1c8d", size = 3569831 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5e/23/f8b28ca248bb629b9e08f877dd2965d1994e1674a03d67cd10c5246da248/lightgbm-4.6.0-py3-none-win_amd64.whl", hash = "sha256:37089ee95664b6550a7189d887dbf098e3eadab03537e411f52c63c121e3ba4b", size = 1451509 }, -] - -[[package]] -name = "llvmlite" -version = "0.47.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/01/88/a8952b6d5c21e74cbf158515b779666f692846502623e9e3c39d8e8ba25f/llvmlite-0.47.0.tar.gz", hash = "sha256:62031ce968ec74e95092184d4b0e857e444f8fdff0b8f9213707699570c33ccc", size = 193614 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/34/0b/b9d1911cfefa61399821dfb37f486d83e0f42630a8d12f7194270c417002/llvmlite-0.47.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:74090f0dcfd6f24ebbef3f21f11e38111c4d7e6919b54c4416e1e357c3446b07", size = 37232770 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/46/27/5799b020e4cdfb25a7c951c06a96397c135efcdc21b78d853bbd9c814c7d/llvmlite-0.47.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ca14f02e29134e837982497959a8e2193d6035235de1cb41a9cb2bd6da4eedbb", size = 56275177 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7e/51/48a53fedf01cb1f3f43ef200be17ebf83c8d9a04018d3783c1a226c342c2/llvmlite-0.47.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:12a69d4bb05f402f30477e21eeabe81911e7c251cecb192bed82cd83c9db10d8", size = 55128631 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a2/50/59227d06bdc96e23322713c381af4e77420949d8cd8a042c79e0043096cc/llvmlite-0.47.0-cp311-cp311-win_amd64.whl", hash = "sha256:c37d6eb7aaabfa83ab9c2ff5b5cdb95a5e6830403937b2c588b7490724e05327", size = 38138400 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fa/48/4b7fe0e34c169fa2f12532916133e0b219d2823b540733651b34fdac509a/llvmlite-0.47.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:306a265f408c259067257a732c8e159284334018b4083a9e35f67d19792b164f", size = 37232769 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e6/4b/e3f2cd17822cf772a4a51a0a8080b0032e6d37b2dbe8cfb724eac4e31c52/llvmlite-0.47.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5853bf26160857c0c2573415ff4efe01c4c651e59e2c55c2a088740acfee51cd", size = 56275178 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b6/55/a3b4a543185305a9bdf3d9759d53646ed96e55e7dfd43f53e7a421b8fbae/llvmlite-0.47.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:003bcf7fa579e14db59c1a1e113f93ab8a06b56a4be31c7f08264d1d4072d077", size = 55128632 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2f/f5/d281ae0f79378a5a91f308ea9fdb9f9cc068fddd09629edc0725a5a8fde1/llvmlite-0.47.0-cp312-cp312-win_amd64.whl", hash = "sha256:f3079f25bdc24cd9d27c4b2b5e68f5f60c4fdb7e8ad5ee2b9b006007558f9df7", size = 38138692 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/77/6f/4615353e016799f80fa52ccb270a843c413b22361fadda2589b2922fb9b0/llvmlite-0.47.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:a3c6a735d4e1041808434f9d440faa3d78d9b4af2ee64d05a66f351883b6ceec", size = 37232771 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/31/b8/69f5565f1a280d032525878a86511eebed0645818492feeb169dfb20ae8e/llvmlite-0.47.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2699a74321189e812d476a43d6d7f652f51811e7b5aad9d9bba842a1c7927acb", size = 56275178 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d6/da/b32cafcb926fb0ce2aa25553bf32cb8764af31438f40e2481df08884c947/llvmlite-0.47.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6c6951e2b29930227963e53ee152441f0e14be92e9d4231852102d986c761e40", size = 55128632 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/46/9f/4898b44e4042c60fafcb1162dfb7014f6f15b1ec19bf29cfea6bf26df90d/llvmlite-0.47.0-cp313-cp313-win_amd64.whl", hash = "sha256:c2e9adf8698d813a9a5efb2d4370caf344dbc1e145019851fee6a6f319ba760e", size = 38138695 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1c/d4/33c8af00f0bf6f552d74f3a054f648af2c5bc6bece97972f3bfadce4f5ec/llvmlite-0.47.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:de966c626c35c9dff5ae7bf12db25637738d0df83fc370cf793bc94d43d92d14", size = 37232773 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/64/1d/a760e993e0c0ba6db38d46b9f48f6c7dceb8ac838824997fb9e25f97bc04/llvmlite-0.47.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ddbccff2aeaff8670368340a158abefc032fe9b3ccf7d9c496639263d00151aa", size = 56275176 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/84/3b/e679bc3b29127182a7f4aa2d2e9e5bea42adb93fb840484147d59c236299/llvmlite-0.47.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d4a7b778a2e144fc64468fb9bf509ac1226c9813a00b4d7afea5d988c4e22fca", size = 55128631 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/be/f7/19e2a09c62809c9e63bbd14ce71fb92c6ff7b7b3045741bb00c781efc3c9/llvmlite-0.47.0-cp314-cp314-win_amd64.whl", hash = "sha256:694e3c2cdc472ed2bd8bd4555ca002eec4310961dd58ef791d508f57b5cc4c94", size = 39153826 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/40/a1/581a8c707b5e80efdbbe1dd94527404d33fe50bceb71f39d5a7e11bd57b7/llvmlite-0.47.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:92ec8a169a20b473c1c54d4695e371bde36489fc1efa3688e11e99beba0abf9c", size = 37232772 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/11/03/16090dd6f74ba2b8b922276047f15962fbeea0a75d5601607edb301ba945/llvmlite-0.47.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fa1cbd800edd3b20bc141521f7fd45a6185a5b84109aa6855134e81397ffe72b", size = 56275178 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f5/cb/0abf1dd4c5286a95ffe0c1d8c67aec06b515894a0dd2ac97f5e27b82ab0b/llvmlite-0.47.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f6725179b89f03b17dabe236ff3422cb8291b4c1bf40af152826dfd34e350ae8", size = 55128632 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4f/79/d3bbab197e86e0ff4f9c07122895b66a3e0d024247fcff7f12c473cb36d9/llvmlite-0.47.0-cp314-cp314t-win_amd64.whl", hash = "sha256:6842cf6f707ec4be3d985a385ad03f72b2d724439e118fcbe99b2929964f0453", size = 39153839 }, -] - -[[package]] -name = "lxml" -version = "6.1.1" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/05/3b/aab6728cae887456f409b4d75e8a01856e4f04bd510de38052a47768b680/lxml-6.1.1.tar.gz", hash = "sha256:ba96ae44888e0185281e937633a743ea90d5a196c6000f82565ebb0580012d40", size = 4197430 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/62/b0/83f481780d1548750b8ce2ec824073deef2f452d9cd1a6faff8507e3d16d/lxml-6.1.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:53b7d2b7a10b1c35c0a5e21e9224accf60c1bbfba523990732e521b2b73adef2", size = 8526461 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b9/d5/30fa0f808002c7329397bfbb24e306789c0b29f04aa5842c07b174b4216f/lxml-6.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ff3f333630ab480244a1bff72043e511a91eb22e7595dead8653ee5612dd8f3d", size = 4595375 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4f/d2/edb71cf0e561581a7c5eb2626244320eb04e9f8ce6d563184fd668b45073/lxml-6.1.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a4bbea04c97f6d78a48e3fbc1cb9116d2780b1b39e03a23f6eb9b603fd61f510", size = 4923654 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4c/77/1bc7eeb0de4577d783fb625aa092cc9357883bba35845a3666bf1259f3dc/lxml-6.1.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:db1d75f6617a49c1c01bc7023713e0ff59ab32c9579ae62a7674c0e34f3b0b0a", size = 5067921 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1b/3c/c0690d74bd2bc17bc03b5b0d093569ead597dd0bfa088bf99eef8c24e19c/lxml-6.1.1-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a12689be69a28ddaa0ab99a5a1137da2afd5f8f16df7b5680b66f616d3eda1d", size = 5002456 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/66/8d/d1b3271af0c0f1e27e8472a849e4d2c65bc7766884b9ad2da9e76e145c88/lxml-6.1.1-cp311-cp311-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18b73c339ae29b90fd2d06e58ebd555a751bde9cd6bbd36cc0281b9a2c94e9d8", size = 5202776 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7a/45/689824ffb237fd10125ad273f32b28ff04dc6203c2822c85ff65a93df65e/lxml-6.1.1-cp311-cp311-manylinux_2_28_i686.whl", hash = "sha256:752d3bbfe874715ccd0aec7f88d7fc623c0f1fd7aa7b3238a084e017bad2a009", size = 5329945 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5d/c0/ef73af53767e958fd87d437c170f272e2f6e6c0f854939f133a895f1e711/lxml-6.1.1-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:6b1761fbf9ec984e2e9d9c589ef5f5fd684b7c19f92aadd567a26c5224958db6", size = 4659237 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a0/5e/e1158e40397585e91cb0472374a1f63d0926a1ddeaa92f13d1a1ffe306d5/lxml-6.1.1-cp311-cp311-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d680fbcb768404c601ecb43519ecd8461f6954cb11c06a78962f666832ccfca8", size = 5265904 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a0/16/8687e5d1400ed1c0bc41dace232ebb7553952b618ea1f2e5fb6e2cfbbe23/lxml-6.1.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:162af1091cd785f2f27e62d3547ae9bc58ec5c86dd314d67021fd02463708d83", size = 5045225 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ca/18/d877bd1ae2e5ffdfd4836565aba350db31feb2f2656d6ce70316ed66a05e/lxml-6.1.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:e9308ff8241c532df3f3e570f9a5aeed6c853f888512ba4b75638d7c11c95ef6", size = 4712721 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/44/4d/1f44fd1d770b10dacbf6b5c6e520f4d6e0708744930f719dc04e67cab981/lxml-6.1.1-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:5f6994074ebae6ffb04447268e37dc16edc304f9859cf91acb86e0af6c1b395c", size = 5252549 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/64/5d/1d66b84f850089254c230ef6ea6b267a5a54e2e179a5d960036a05d501d7/lxml-6.1.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:80c2dfadb855da477cf73373ad29a333535dedb9b12bad02c9814c8e2b43bf08", size = 5226877 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ad/00/84c4b5302d42a2d0184f38d538c8a197f33b52a50bd4f7bcfe990bce3036/lxml-6.1.1-cp311-cp311-win32.whl", hash = "sha256:30a89d3ac8faec007453fb541f3f46807eeec88edd5826f6e3fe001752a2c621", size = 3594072 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/61/9d/2e2f7d876349f45e0f3e29f72da311668853d59b58d473a2dea4f0160135/lxml-6.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:abbefa31eee84842140f67acef1c828e28bba8bbf0c3bc6e5492a9af88152c28", size = 4025469 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b0/d5/570e6390e4110331e6208b2ba83d1482cc9146808ee118b22824a34c1070/lxml-6.1.1-cp311-cp311-win_arm64.whl", hash = "sha256:dcb292aa7fe485ceff7af4f92e46c5af397daec5dff64871a528f0fc47a3cc5b", size = 3667640 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6a/6e/c4add832b6fc1e887125b96f880d7b9b70aae5248718e046b1704bcac4b9/lxml-6.1.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:104c09bda8d2a562824c0e319d0768ce26a779b7601e0931d33b09b53c392ef7", size = 8570821 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/22/00/ff3009c88e65de8011630acf8ab5a09cb2becd2aaf47fba2f3449f6224e9/lxml-6.1.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:25c6997a9a534e016695a0ba06b2f07945de682731ff01065b6d5a4474179da1", size = 4624252 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/42/95/bb63f0fd62e554fe078e1fb3c8fe9083c14ddc7ad7fa178d10e57e071ac7/lxml-6.1.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c921ba5c51e4e9f63b8b00267d06566e1f63407408a0496da2d1d0bfc819c7fc", size = 4930746 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/eb/99/0013e8d9b5960f4f041cf0b73e2f80c23eb5205b1f7bfb20203243651359/lxml-6.1.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:54a7f95e4de5fb94e2f9f4b9055c6ba33bf3d628fd77a1d647c5923caa2cdcdc", size = 5093723 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/29/91/317b332636bfc7bddcff828d41b3307f50043f4b237e40849c333d80fa1a/lxml-6.1.1-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96f2ec43df44b1f76249ee0a615334f9b5b060e1c8bd90e706dad2d14d02f383", size = 5005557 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/42/2f/cc9bf06afe70f9c9093ae60855d9759da9db601ec4080f7473319666ffd7/lxml-6.1.1-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:70ef8a7e102a1508f8121aae5b0867abd663f72c14f0a9c937e6554cb4587b7b", size = 5631036 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/08/f6/af32e23e563971ffb0fb86be52bc5be5c2c118858ffc119bf6a9039b173d/lxml-6.1.1-cp312-cp312-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ebe6af670449830d6d9b752c256a983291c766a1365ba5d5460048f9e33a7818", size = 5240367 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/78/83/8555d40948b09ce86f1bd0c68a7ac31d07b1929f92cc1b074006c97ef2d2/lxml-6.1.1-cp312-cp312-manylinux_2_28_i686.whl", hash = "sha256:27acc820660aaffa4f7c087f29120e12980f7779d56d8492d263170111284740", size = 5350171 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/63/75/5d92da93729b7bad783689e6496049fa40927b45bec7bf183c981de3ca70/lxml-6.1.1-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:1db753c9115ec7100d073b744d17e25e88a8f90f5c39b2f5dd878149af59671f", size = 4694874 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c5/b5/3aad415a9a25b822e783f15deeb4dffccf5113030f1afa2222dd929313d9/lxml-6.1.1-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c4f469aebd783bb741c2ecb2a681008fd26bfe5c16a9a72ed5467f834e810df2", size = 5244492 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f1/a1/5fcf7eb9904b80086aa47dcf0027de07b1bb990afad2e6823144c368ae04/lxml-6.1.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:766b010012d59470072c1816b5b6c69f1d243e5db36ea5968e94accf430a4635", size = 5048232 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/77/74/1f601b63c7a69fcdf10fa9b148c81da8442204194f6c55509cc485c786b9/lxml-6.1.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:b8d812c6011c08b8111a15e54dd990b8923692d80adf35488bee34026c35accf", size = 4777023 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a2/b9/7a78f51aec95b1bf780d78e12705a9f6533284f8693dc5c0e6724fa53d3f/lxml-6.1.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:fe0306bd29505a9177aac19f1877174b0e7422c222a59f70b2cd41633448c3dc", size = 5645773 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a5/6e/98a7b7ad54e4e74fa1f20fff776913980619d0ebe5558232d7da6580bdd8/lxml-6.1.1-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:5ba186ad207446c65d3bb3d3e0412b032b1d9f595e59861e2354798c5703d955", size = 5233088 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/65/d1/bc0ed2427bf609f2ee10da303a6a226f9c8bce94f945dc29a32ce55de6e4/lxml-6.1.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:aa366a1e55b8ebfe8ca8ddc3cfe75c8ebade181aeb0f661d0cb05986b647f72a", size = 5260995 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/69/8b/6772e1a4b513fc50a8d931f19edde0e13ae6918510a1e13ff67864f3e5ed/lxml-6.1.1-cp312-cp312-win32.whl", hash = "sha256:126c93f7f56f0eda92f6d8c619edc463a4f23d9252f1c9d0405a76f25fa9f11a", size = 3596382 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1b/89/45198e9624762af2dfd2cb8782598477ceb29f6e59caab560388ae1f4ec1/lxml-6.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:26e6eda8d38c1fcab1090dd196ee87cbd13788e531937610e2589085de074e77", size = 3997255 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/90/a9/7a54b6834088d9ae528a7b780584ba6a39a9457b0ac330479f20ffbc9449/lxml-6.1.1-cp312-cp312-win_arm64.whl", hash = "sha256:6540377fbd53fe1b629172288c464fb18db11ce1fa7dc15891da10aa9dcc3e7f", size = 3659610 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a5/eb/7e6f37c5584ccbb2ff267f56fd0339016938c1c8684cfefab9b33ffc2f36/lxml-6.1.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:68a9198d0fc122d14bb76837de9aa80cf84caed990b5b237f532ed87d3706736", size = 8559780 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a1/36/587c2521cf23a2cd6c9c22108aa7528f683a1f195ed7ccd23a4b1786ad36/lxml-6.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7d47866cb32fb503450b6edc9df355d10dc49836af2e89901bd6ac6b0896d9d9", size = 4618006 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6e/ca/ab7bfe2bf4c972af5e7878262845ead3a24a929a9b04bc11c7c1ece6c82a/lxml-6.1.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:eb7c9811bfaa8b1ed5ed319f5d370dfbcaa59d52ea64be2a5a85e18195930354", size = 4924139 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6b/55/a0c72851dfee5ecc689f949723a73dea457758912542cb955b108eaf0d8f/lxml-6.1.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:762ff394d5bd56da0cf034a23dcce4e13923f15321a2adfa2ac00201dc6d3fca", size = 5082329 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f0/b6/0608f7d61a3b96cc67e5648a3d906e31a5082093e10e7be65b3886289938/lxml-6.1.1-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a088f287f7d8275a33c07f2cac6c50b9319309a0200a39e7e75d80c707723099", size = 4993564 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4c/66/ae227524b066d29d55bf0b453d93d2d793c40218657d643dcbbca13b8faf/lxml-6.1.1-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e902da4b04e6b52e5893900d4b8ab46068f75f3561f01bf1080957f9fd932ed6", size = 5613467 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a6/76/dbe4a00b50385e40194231dcfe5a12c059de7cf90e89c83407d2b085b719/lxml-6.1.1-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1d4962d4c66bf830a7e59ed6cfc17d148149898a3aefa8ec6e59763e6e3ed085", size = 5228304 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1c/01/00b1b8442ed2041793336868ba0b9ea4b13d7da7c085c6404c207a63bf79/lxml-6.1.1-cp313-cp313-manylinux_2_28_i686.whl", hash = "sha256:581d4c8ae690a6609e64862dd6b7c2489635c2d13907fc2b20f2bc200ff1d21e", size = 5341607 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/63/36/1ad29931e9a4638bb707869f01d423a6c815f82152138d1a40dfcfde2b95/lxml-6.1.1-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:876e1ff5930ed8bf295ec5ef9a8155e9b6b1876bbf1deed8b3a8069311875a8f", size = 4700168 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3c/d1/a9536cecf9be18a0dc72d32bead283a2332d1ffebd2dd3ac70ce444686e5/lxml-6.1.1-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9eb9b5a968f6e0f6d640092a567e14529ff8cea2e29d00da6f78a79fa49f013c", size = 5232487 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0e/77/b4fb1e03bf5d130e879214d3100092e386418807fb74dd0adc4b0a48f351/lxml-6.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:aa49e06d94aba782c6a02eecb7e507969e7e7a41b267f1b359bb35585f295d5b", size = 5044231 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/26/4c/d00daeeb0a5530c4028a9232aa1b93db3ef4ed2158c116ea73c79a9765b3/lxml-6.1.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:70cdfd80589d59e43e18005dd7244e8895e93db8ab6a620b7e23df5445a4e3d2", size = 4769450 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ed/6a/715a3a8d156ce42f29cf014706f5410c2ff3b02267774110fc23266409fe/lxml-6.1.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:aad9aa39483ed8ec44d6d2e59e5b98a0d80676ef0d92f44bfc374836111f62f5", size = 5635874 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/45/37/0544bc21dde2a88f3a17b504e6fc79c0e01d25a33c2f6079724e9e72b9c7/lxml-6.1.1-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:d49514be2f28d895c38cf9d2b72d7b9a07d00314519f456c0b50b53cfcf4c785", size = 5223987 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4d/f8/f6a5e8185bcb28c2befae3d31f8e3df3b811cb0f47746517a81279fcafe1/lxml-6.1.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:47402e62c52ff5988c1e8c6c63177f5708bccf48e366dea4e3dcf1e645e04947", size = 5250276 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c7/f2/1a2b9f1b7a49d45495369be7ef9ad05b262930f2eab3e3145706fca8083f/lxml-6.1.1-cp313-cp313-win32.whl", hash = "sha256:3483644525531e1d5762b0c44a8e18b6efba321b6dcf8a8952de10b037618bca", size = 3596903 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e6/99/f4ffb024f238eec2131aaa09f3278fb6129cf892741bf68e1fc1afb8c100/lxml-6.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:a10bd2fd62e8ce916ececb342f348f190724a098c1faa056fdfb2a22ad5e8660", size = 3995869 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d1/53/70eb8c5c6037f27448f1e3c54ebede9545a801ae63f0a7254afca4fe8e45/lxml-6.1.1-cp313-cp313-win_arm64.whl", hash = "sha256:424aa57aca0897eb922aef34395bd1289b3b6f04e6bae20ea123c0c7e333cffc", size = 3658490 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/13/e2/2e325795566de01d0d7c3bb57d3c370616b2d07b01214e84eec5d3b10963/lxml-6.1.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:19b7ab10b210b0b3ad7985d9ac4eb66ab09a90b20fe6e2f7ba55d01a234345d0", size = 8577146 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/93/cf/5630b5e4be7d2e6bee8efe83865c925221103cf0221303b104ce134b01e2/lxml-6.1.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:c08e5c694306507275f2290073350c4f32e383db15213b2c69e7ff39c1193840", size = 4623866 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d2/51/3904907c063451cf8d4a5c9fe0cad95fa1f4ec57f4e3884fa0731bd7a305/lxml-6.1.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:74a9717fd0d82effef5c2854f0d917231d5324b5a3eb7275c43ac9fa32f97a14", size = 4950022 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/94/cd/9c7611a51c37a2830928405817cc5d56a97f64fab83cc3f628748b135749/lxml-6.1.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:efe0374196335f93b53269acd811b944f2e6bdc88e8894f214bd636455484909", size = 5086695 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/da/d6/24e3b5906abb0b674ff2ae195bc3ce59708df2bcd17cf17703b2d7dd643a/lxml-6.1.1-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ac931cdc9442c1763b8a8f6cd62c0c938737eafc5be75eff88df55fc73bc0d00", size = 5031642 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2d/db/6ec54f99019838bff54785c51da07f189eb4676861c5f2730962b0d8d665/lxml-6.1.1-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:aee395f5d0927f947758b4ec119fd5fc8ec71f07a1c5c52077b30b04c0fa6955", size = 5647338 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/42/3d/ef4dcfffd22d27a61805d8ed9f7fb888495bc6aa88648fa07c1eaa5586b6/lxml-6.1.1-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9395002973c827b3ed67db77e6ec09f092919a587022174554096a269378fb13", size = 5239528 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/62/bb/37fb3f0dff146bdcfa78eec47879273820b2a0bf350ec236ce14bd0b1c26/lxml-6.1.1-cp314-cp314-manylinux_2_28_i686.whl", hash = "sha256:73bc2086f141224ebddb7fc5c6a36ca58b31b94b561e1dfe8e073e3270fad1e7", size = 5350730 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/90/42/43253f168388df4fae1f38c01df36ddb9bee39e2048167b54cdcbae85ea3/lxml-6.1.1-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:3779def59032b81e44a5f70096ef6bf2082f8d901937dca354474ba09782e245", size = 4697530 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/eb/a8/c5a8504f81bbdfc8e7094c2c850cdb4ed6777fc4d5ddd9e5ab819f3b0d54/lxml-6.1.1-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:86c89b9d55ebf820ad7c90bc533410f0d098054f293351f10603c0c46ff598f5", size = 5250670 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/77/b7/c7e76ab18744d75e21f320ebf9ff9d1ceae2b54dd431ea5a64caf26c9672/lxml-6.1.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:19607c6bbff2a44cf3fe8250abccd20942d3462473e0a721d01d379ed017e462", size = 5084485 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/31/31/b35c53f8ef7b7c31cacd23d3638652fff7bcd1deb6eedb709ab43b685908/lxml-6.1.1-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:c6ed5141a5c7507cf3ee76bd363b0d6f801e3321adc35b5d825a23115faa5465", size = 4737635 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d9/06/31f23c813a7fe8e0cb1b175e915b08c9bf4e86d225b210feadbdbe519667/lxml-6.1.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:62aeb7e85b5d60320b9d77eef2e773994e2c0ce10121b277e0a19804e1654a5a", size = 5670681 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1a/bc/ce619bccc89b1fd9ad8a8e1330ee3f3beff9f2ff95b712d7bbcdd6e22fc3/lxml-6.1.1-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:b1b963fd8f5caa68e99dfae060d54de1fe9cba899b8718b44a00cdca53c3e590", size = 5238229 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2f/5d/b329acbbedc0b619ebc2be6cf7ee9ed07e80892c88d4dfd612c33805789a/lxml-6.1.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:63876be28efefa04a1df615b46770e82042cce445cfdce55160522f57b231ccb", size = 5264191 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d6/85/be36fb1425b30db3c3f9df75fe86343ebffb79e6320bd7f588e25bfeac39/lxml-6.1.1-cp314-cp314-win32.whl", hash = "sha256:7f7a92e8583f06b1fd49d01158143b8461cfcd135dcb10ec807270a3051bd603", size = 3657202 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b8/ce/3cf9a827342269f54d405a6202397de63f07c69cbd6ce7d183a3f0cba1e9/lxml-6.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:b2d444f2e66624d68e9c6b211e28a76e22fff5fcabcfff4deac18b529b7d4137", size = 4064497 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d9/3e/1a957bde8f0760039e627f94699f82caa782c9d838d86c3d28245ee67212/lxml-6.1.1-cp314-cp314-win_arm64.whl", hash = "sha256:3fd9728a2735fda14f4e8235830c86b539e9661e849665bf926d3f867943b4bf", size = 3741991 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/78/b2/00ed55b3a2efa4658fb795c38d1090ec9b3e8a6c3683d4441fa517f09c3b/lxml-6.1.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:787b2496d0dbe8cd180984e8d29e3a6f76e7ea34db781cb3bd55e4ba1ef8b4ee", size = 8827545 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c0/73/74573db19baa618d5f266f2407898b087ff6927115b00b71e5fc1b700847/lxml-6.1.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:2c8daa471358dc2d6fcf02165e80ec68f77871a286df95bc5cc3816153b0fd2c", size = 4735736 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/16/02/6f7061f4f95f51e545d48e87647c54791d204a4e881be4156e7a26ba5338/lxml-6.1.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:acd7d70b64c0aae0c7922cca83d288a16f5f6da523637697872253415269baef", size = 4970291 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b0/02/55fc057d8283427dea7d6edb102e7a840239c77a64a983d92f62a304c0e9/lxml-6.1.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4f0dd2f01f9f8a89f565d000e03abcf0a13d692a346c8d22f628d49af098777a", size = 5102822 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e4/48/8e1cf78d89d66850121d9255a2a24414c98f775da93b90cf976956c24b14/lxml-6.1.1-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b7e8a14c8634bf6f7a568634cb395305a6d964aeb5b7ee32248094bed3a7e2c", size = 5027923 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ed/00/0632a0647612c8af24d26997b3b961397daa9d5b2581444805933629a4cb/lxml-6.1.1-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:86281fbdd6a8162756f8d603f37e3435bfa38043adb79c6dc6a2dfee065e7525", size = 5595843 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bc/86/ab008a7dc360711b66858d61c80a5979a70a09f2aa2b05d9698df80b803d/lxml-6.1.1-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c5d7152ec39ca7c402d8fb9bad86140a15b9503bd0c54484e3f1bbe3dd37ceca", size = 5224515 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/75/c6/2702ff375e728e34f56d9a45339a9cf7e4427e917f542225242d63a05afa/lxml-6.1.1-cp314-cp314t-manylinux_2_28_i686.whl", hash = "sha256:88d8cb75b9d82858497a5393e3c63cfbf03035225e4b35a49ed7ccb151e4dc0e", size = 5312511 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b7/57/a5807c98f87a86f10ef9ffab35516df7c0f0c4b6d5d33e9f608ab9c04a31/lxml-6.1.1-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:f64ec5397ea6a41fc1b4af0380d79b44a755b5531dcaccd9940fb260dca93038", size = 4639206 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1f/e1/8a0a2c35734812395f4da4eaf33748a7e5705bfb2a58b128da764339d5ec/lxml-6.1.1-cp314-cp314t-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d34bbf07dbc7ca5970671b1512e928991fb5e9d95365636c9b2d8b4f53af405e", size = 5232404 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c2/e2/0e6a4dd5ad84d01d99aa7bae7cfefd4a760a0e0f8176818241de17d9b6c0/lxml-6.1.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:17e0e18d4ad8adbd0399291bc44845b69d9dd68439a3cdebdf35ff902ec05072", size = 5083769 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a0/7e/161f33d463f6ffc1c7679104b65086dea120080d49dde4d238f015aaee2f/lxml-6.1.1-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:3ab541146f1f6968c462d6c2ac495148e8cdba2f8347700b2141b6ec5a75bf52", size = 4758936 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f1/fb/2369825e3f6ca99305bf9f7b7085fda91c8b0922a89e54d900974aa3ef85/lxml-6.1.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:2a0217714657e023ef4293500f65aa20fce6164c8fd6b08fa5bd4a859fb14b9b", size = 5620296 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/30/90/d61e383146f74c5ab683947ea14dc7b82778838ab9b95ea73a23b60d0191/lxml-6.1.1-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:05a82eb6e1530a64f26225b55cbd178113bd0b5af1c2b625f25e5296742c26d2", size = 5228598 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/76/2d/2dafd8149e94b05bb070690efd5bb2680720681e03ff03fc57d2b70a1105/lxml-6.1.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9e36f163528fc50cbef305f02a5fd66d404edf7049cdaff211dbc2cba5a7013e", size = 5247845 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ce/68/b30e913340c380ddac9580c6e6230991fc37240ec4f64704833e4f3e2769/lxml-6.1.1-cp314-cp314t-win32.whl", hash = "sha256:649dda677cf3bd6ac9ae14007ba0c824ded8ce5808b53fc7431d9140399118c1", size = 3897345 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3c/4e/9eb2af5335545f9fbcd7af57bcf87c6025d31eaa31b14ec184a6c8675328/lxml-6.1.1-cp314-cp314t-win_amd64.whl", hash = "sha256:793033d6c5cdf33a573f910d9bea14ef8f5771820411d118da8e1182edb53d5e", size = 4393350 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7f/2c/0f1e93c636720e8a3eb59af2bfda99d98b55891e1c53bc30c2e0e865f01b/lxml-6.1.1-cp314-cp314t-win_arm64.whl", hash = "sha256:58bb955caba94e467d2a96da17660d2d704e0675894cba21ab8a775b8621fd1c", size = 3817223 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b5/32/86a3f0f724a3a402d4627937a7fc27b160e45e7012b4adf47f6e1e844511/lxml-6.1.1-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:31033dc34636ea6b7d5cc11b1ddbda78a14de858ba9d3e1ed4b69a3085bc521e", size = 3930127 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/40/44/d832e82af08723761556d004b1d04d281c09f9a8cecd7d3148548c9941a3/lxml-6.1.1-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3893c14c4b6ac5b2d54ba8cf03e99fe5104e592de491f19bd6b82756c09f8004", size = 4210769 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6d/39/0dc5949f759ed7d951e0bb8c2f2d9d7aca1908d22352fa84a8afd2ea54af/lxml-6.1.1-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c07da4cebf6889f03ebac8d238f62318e29f495de0aa18a51ea14e61ae907e2e", size = 4318163 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e6/fb/8ab3845fe046ba4cbf74536bcf6801a774b7caf4350de1c5d37f1f0a9e90/lxml-6.1.1-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f6f0ce10945fab9c4c06ce14e22af9059d1a87493a9af4501a5b0b9187e21cf2", size = 4250945 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/68/1b/7553ab136894374ffae8851ec06f98f511cd8e66246e41b6be059d0a7289/lxml-6.1.1-pp311-pypy311_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f8844cd288697c6425c9beba919302241e3278871dc6519515e72b04e987abcf", size = 4401664 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/db/a4/441aee36c6f6b249823d20fd91f9be9ab89d7c5a8ae542a4a4ca6d342d56/lxml-6.1.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:ed21202aec73cda4d55d1ce57b389aadb90ffb044e6cd1080b8347efe1b1ec84", size = 3508989 }, -] - -[[package]] -name = "markdown-it-py" -version = "4.2.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "mdurl" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/06/ff/7841249c247aa650a76b9ee4bbaeae59370dc8bfd2f6c01f3630c35eb134/markdown_it_py-4.2.0.tar.gz", hash = "sha256:04a21681d6fbb623de53f6f364d352309d4094dd4194040a10fd51833e418d49", size = 82454 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b3/81/4da04ced5a082363ecfa159c010d200ecbd959ae410c10c0264a38cac0f5/markdown_it_py-4.2.0-py3-none-any.whl", hash = "sha256:9f7ebbcd14fe59494226453aed97c1070d83f8d24b6fc3a3bcf9a38092641c4a", size = 91687 }, -] - -[[package]] -name = "markupsafe" -version = "3.0.3" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/08/db/fefacb2136439fc8dd20e797950e749aa1f4997ed584c62cfb8ef7c2be0e/markupsafe-3.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1cc7ea17a6824959616c525620e387f6dd30fec8cb44f649e31712db02123dad", size = 11631 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e1/2e/5898933336b61975ce9dc04decbc0a7f2fee78c30353c5efba7f2d6ff27a/markupsafe-3.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4bd4cd07944443f5a265608cc6aab442e4f74dff8088b0dfc8238647b8f6ae9a", size = 12058 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1d/09/adf2df3699d87d1d8184038df46a9c80d78c0148492323f4693df54e17bb/markupsafe-3.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b5420a1d9450023228968e7e6a9ce57f65d148ab56d2313fcd589eee96a7a50", size = 24287 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/30/ac/0273f6fcb5f42e314c6d8cd99effae6a5354604d461b8d392b5ec9530a54/markupsafe-3.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0bf2a864d67e76e5c9a34dc26ec616a66b9888e25e7b9460e1c76d3293bd9dbf", size = 22940 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/19/ae/31c1be199ef767124c042c6c3e904da327a2f7f0cd63a0337e1eca2967a8/markupsafe-3.0.3-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc51efed119bc9cfdf792cdeaa4d67e8f6fcccab66ed4bfdd6bde3e59bfcbb2f", size = 21887 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b2/76/7edcab99d5349a4532a459e1fe64f0b0467a3365056ae550d3bcf3f79e1e/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:068f375c472b3e7acbe2d5318dea141359e6900156b5b2ba06a30b169086b91a", size = 23692 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a4/28/6e74cdd26d7514849143d69f0bf2399f929c37dc2b31e6829fd2045b2765/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:7be7b61bb172e1ed687f1754f8e7484f1c8019780f6f6b0786e76bb01c2ae115", size = 21471 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/62/7e/a145f36a5c2945673e590850a6f8014318d5577ed7e5920a4b3448e0865d/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f9e130248f4462aaa8e2552d547f36ddadbeaa573879158d721bbd33dfe4743a", size = 22923 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0f/62/d9c46a7f5c9adbeeeda52f5b8d802e1094e9717705a645efc71b0913a0a8/markupsafe-3.0.3-cp311-cp311-win32.whl", hash = "sha256:0db14f5dafddbb6d9208827849fad01f1a2609380add406671a26386cdf15a19", size = 14572 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/83/8a/4414c03d3f891739326e1783338e48fb49781cc915b2e0ee052aa490d586/markupsafe-3.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:de8a88e63464af587c950061a5e6a67d3632e36df62b986892331d4620a35c01", size = 15077 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/35/73/893072b42e6862f319b5207adc9ae06070f095b358655f077f69a35601f0/markupsafe-3.0.3-cp311-cp311-win_arm64.whl", hash = "sha256:3b562dd9e9ea93f13d53989d23a7e775fdfd1066c33494ff43f5418bc8c58a5c", size = 13876 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5a/72/147da192e38635ada20e0a2e1a51cf8823d2119ce8883f7053879c2199b5/markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e", size = 11615 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9a/81/7e4e08678a1f98521201c3079f77db69fb552acd56067661f8c2f534a718/markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce", size = 12020 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1e/2c/799f4742efc39633a1b54a92eec4082e4f815314869865d876824c257c1e/markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d", size = 24332 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3c/2e/8d0c2ab90a8c1d9a24f0399058ab8519a3279d1bd4289511d74e909f060e/markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d", size = 22947 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2c/54/887f3092a85238093a0b2154bd629c89444f395618842e8b0c41783898ea/markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a", size = 21962 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c9/2f/336b8c7b6f4a4d95e91119dc8521402461b74a485558d8f238a68312f11c/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b", size = 23760 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/32/43/67935f2b7e4982ffb50a4d169b724d74b62a3964bc1a9a527f5ac4f1ee2b/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f", size = 21529 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/89/e0/4486f11e51bbba8b0c041098859e869e304d1c261e59244baa3d295d47b7/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b", size = 23015 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2f/e1/78ee7a023dac597a5825441ebd17170785a9dab23de95d2c7508ade94e0e/markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d", size = 14540 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/aa/5b/bec5aa9bbbb2c946ca2733ef9c4ca91c91b6a24580193e891b5f7dbe8e1e/markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c", size = 15105 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e5/f1/216fc1bbfd74011693a4fd837e7026152e89c4bcf3e77b6692fba9923123/markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f", size = 13906 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/38/2f/907b9c7bbba283e68f20259574b13d005c121a0fa4c175f9bed27c4597ff/markupsafe-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795", size = 11622 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9c/d9/5f7756922cdd676869eca1c4e3c0cd0df60ed30199ffd775e319089cb3ed/markupsafe-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219", size = 12029 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/00/07/575a68c754943058c78f30db02ee03a64b3c638586fba6a6dd56830b30a3/markupsafe-3.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6", size = 24374 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a9/21/9b05698b46f218fc0e118e1f8168395c65c8a2c750ae2bab54fc4bd4e0e8/markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676", size = 22980 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7f/71/544260864f893f18b6827315b988c146b559391e6e7e8f7252839b1b846a/markupsafe-3.0.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9", size = 21990 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c2/28/b50fc2f74d1ad761af2f5dcce7492648b983d00a65b8c0e0cb457c82ebbe/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1", size = 23784 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ed/76/104b2aa106a208da8b17a2fb72e033a5a9d7073c68f7e508b94916ed47a9/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc", size = 21588 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b5/99/16a5eb2d140087ebd97180d95249b00a03aa87e29cc224056274f2e45fd6/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12", size = 23041 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/19/bc/e7140ed90c5d61d77cea142eed9f9c303f4c4806f60a1044c13e3f1471d0/markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed", size = 14543 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/05/73/c4abe620b841b6b791f2edc248f556900667a5a1cf023a6646967ae98335/markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5", size = 15113 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f0/3a/fa34a0f7cfef23cf9500d68cb7c32dd64ffd58a12b09225fb03dd37d5b80/markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485", size = 13911 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e4/d7/e05cd7efe43a88a17a37b3ae96e79a19e846f3f456fe79c57ca61356ef01/markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73", size = 11658 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/99/9e/e412117548182ce2148bdeacdda3bb494260c0b0184360fe0d56389b523b/markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37", size = 12066 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bc/e6/fa0ffcda717ef64a5108eaa7b4f5ed28d56122c9a6d70ab8b72f9f715c80/markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19", size = 25639 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/96/ec/2102e881fe9d25fc16cb4b25d5f5cde50970967ffa5dddafdb771237062d/markupsafe-3.0.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025", size = 23569 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4b/30/6f2fce1f1f205fc9323255b216ca8a235b15860c34b6798f810f05828e32/markupsafe-3.0.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6", size = 23284 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/58/47/4a0ccea4ab9f5dcb6f79c0236d954acb382202721e704223a8aafa38b5c8/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f", size = 24801 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6a/70/3780e9b72180b6fecb83a4814d84c3bf4b4ae4bf0b19c27196104149734c/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb", size = 22769 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/98/c5/c03c7f4125180fc215220c035beac6b9cb684bc7a067c84fc69414d315f5/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009", size = 23642 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/80/d6/2d1b89f6ca4bff1036499b1e29a1d02d282259f3681540e16563f27ebc23/markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354", size = 14612 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2b/98/e48a4bfba0a0ffcf9925fe2d69240bfaa19c6f7507b8cd09c70684a53c1e/markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218", size = 15200 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0e/72/e3cc540f351f316e9ed0f092757459afbc595824ca724cbc5a5d4263713f/markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287", size = 13973 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/33/8a/8e42d4838cd89b7dde187011e97fe6c3af66d8c044997d2183fbd6d31352/markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe", size = 11619 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b5/64/7660f8a4a8e53c924d0fa05dc3a55c9cee10bbd82b11c5afb27d44b096ce/markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026", size = 12029 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/da/ef/e648bfd021127bef5fa12e1720ffed0c6cbb8310c8d9bea7266337ff06de/markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737", size = 24408 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/41/3c/a36c2450754618e62008bf7435ccb0f88053e07592e6028a34776213d877/markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97", size = 23005 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bc/20/b7fdf89a8456b099837cd1dc21974632a02a999ec9bf7ca3e490aacd98e7/markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d", size = 22048 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9a/a7/591f592afdc734f47db08a75793a55d7fbcc6902a723ae4cfbab61010cc5/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda", size = 23821 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7d/33/45b24e4f44195b26521bc6f1a82197118f74df348556594bd2262bda1038/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf", size = 21606 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ff/0e/53dfaca23a69fbfbbf17a4b64072090e70717344c52eaaaa9c5ddff1e5f0/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe", size = 23043 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/46/11/f333a06fc16236d5238bfe74daccbca41459dcd8d1fa952e8fbd5dccfb70/markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9", size = 14747 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/28/52/182836104b33b444e400b14f797212f720cbc9ed6ba34c800639d154e821/markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581", size = 15341 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6f/18/acf23e91bd94fd7b3031558b1f013adfa21a8e407a3fdb32745538730382/markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4", size = 14073 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3c/f0/57689aa4076e1b43b15fdfa646b04653969d50cf30c32a102762be2485da/markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab", size = 11661 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/89/c3/2e67a7ca217c6912985ec766c6393b636fb0c2344443ff9d91404dc4c79f/markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175", size = 12069 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f0/00/be561dce4e6ca66b15276e184ce4b8aec61fe83662cce2f7d72bd3249d28/markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634", size = 25670 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/50/09/c419f6f5a92e5fadde27efd190eca90f05e1261b10dbd8cbcb39cd8ea1dc/markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50", size = 23598 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/22/44/a0681611106e0b2921b3033fc19bc53323e0b50bc70cffdd19f7d679bb66/markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e", size = 23261 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5f/57/1b0b3f100259dc9fffe780cfb60d4be71375510e435efec3d116b6436d43/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5", size = 24835 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/26/6a/4bf6d0c97c4920f1597cc14dd720705eca0bf7c787aebc6bb4d1bead5388/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523", size = 22733 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fb/df/5bd7a48c256faecd1d36edc13133e51397e41b73bb77e1a69deab746ebac/markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d", size = 14819 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1a/8a/0402ba61a2f16038b48b39bccca271134be00c5c9f0f623208399333c448/markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9", size = 15426 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146 }, -] - -[[package]] -name = "mcp" -version = "1.28.1" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "anyio" }, - { name = "httpx" }, - { name = "httpx-sse" }, - { name = "jsonschema" }, - { name = "pydantic" }, - { name = "pydantic-settings" }, - { name = "pyjwt", extra = ["crypto"] }, - { name = "python-multipart" }, - { name = "pywin32", marker = "sys_platform == 'win32'" }, - { name = "sse-starlette" }, - { name = "starlette" }, - { name = "typing-extensions" }, - { name = "typing-inspection" }, - { name = "uvicorn", marker = "sys_platform != 'emscripten'" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6e/77/9450b8f251a13affb6281997d0523c4615f8a8b35d0b21ff30db3a5aac9d/mcp-1.28.1.tar.gz", hash = "sha256:d51e36a5f5644faea4f85ea649bfffa6bc6c26770d42798ad6a3de3d2ba69683", size = 638501 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e2/5e/d118fce19f87a2e7d8101c35c8ae0ec289098a4df0ff244cec23e415aca0/mcp-1.28.1-py3-none-any.whl", hash = "sha256:2726bca5e7193f61c5dde8b12500a6de2d9acf6d1a1c0be9e8c2e706437991df", size = 222620 }, -] - -[[package]] -name = "mdurl" -version = "0.1.2" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 }, -] - -[[package]] -name = "multidict" -version = "6.7.1" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1a/c2/c2d94cbe6ac1753f3fc980da97b3d930efe1da3af3c9f5125354436c073d/multidict-6.7.1.tar.gz", hash = "sha256:ec6652a1bee61c53a3e5776b6049172c53b6aaba34f18c9ad04f82712bac623d", size = 102010 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ce/f1/a90635c4f88fb913fbf4ce660b83b7445b7a02615bda034b2f8eb38fd597/multidict-6.7.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:7ff981b266af91d7b4b3793ca3382e53229088d193a85dfad6f5f4c27fc73e5d", size = 76626 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a6/9b/267e64eaf6fc637a15b35f5de31a566634a2740f97d8d094a69d34f524a4/multidict-6.7.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:844c5bca0b5444adb44a623fb0a1310c2f4cd41f402126bb269cd44c9b3f3e1e", size = 44706 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/dd/a4/d45caf2b97b035c57267791ecfaafbd59c68212004b3842830954bb4b02e/multidict-6.7.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f2a0a924d4c2e9afcd7ec64f9de35fcd96915149b2216e1cb2c10a56df483855", size = 44356 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/d2/0a36c8473f0cbaeadd5db6c8b72d15bbceeec275807772bfcd059bef487d/multidict-6.7.1-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:8be1802715a8e892c784c0197c2ace276ea52702a0ede98b6310c8f255a5afb3", size = 244355 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5d/16/8c65be997fd7dd311b7d39c7b6e71a0cb449bad093761481eccbbe4b42a2/multidict-6.7.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2e2d2ed645ea29f31c4c7ea1552fcfd7cb7ba656e1eafd4134a6620c9f5fdd9e", size = 246433 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/01/fb/4dbd7e848d2799c6a026ec88ad39cf2b8416aa167fcc903baa55ecaa045c/multidict-6.7.1-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:95922cee9a778659e91db6497596435777bd25ed116701a4c034f8e46544955a", size = 225376 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b6/8a/4a3a6341eac3830f6053062f8fbc9a9e54407c80755b3f05bc427295c2d0/multidict-6.7.1-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6b83cabdc375ffaaa15edd97eb7c0c672ad788e2687004990074d7d6c9b140c8", size = 257365 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f7/a2/dd575a69c1aa206e12d27d0770cdf9b92434b48a9ef0cd0d1afdecaa93c4/multidict-6.7.1-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:38fb49540705369bab8484db0689d86c0a33a0a9f2c1b197f506b71b4b6c19b0", size = 254747 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5a/56/21b27c560c13822ed93133f08aa6372c53a8e067f11fbed37b4adcdac922/multidict-6.7.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:439cbebd499f92e9aa6793016a8acaa161dfa749ae86d20960189f5398a19144", size = 246293 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5a/a4/23466059dc3854763423d0ad6c0f3683a379d97673b1b89ec33826e46728/multidict-6.7.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6d3bc717b6fe763b8be3f2bee2701d3c8eb1b2a8ae9f60910f1b2860c82b6c49", size = 242962 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1f/67/51dd754a3524d685958001e8fa20a0f5f90a6a856e0a9dcabff69be3dbb7/multidict-6.7.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:619e5a1ac57986dbfec9f0b301d865dddf763696435e2962f6d9cf2fdff2bb71", size = 237360 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/64/3f/036dfc8c174934d4b55d86ff4f978e558b0e585cef70cfc1ad01adc6bf18/multidict-6.7.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:0b38ebffd9be37c1170d33bc0f36f4f262e0a09bc1aac1c34c7aa51a7293f0b3", size = 245940 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3d/20/6214d3c105928ebc353a1c644a6ef1408bc5794fcb4f170bb524a3c16311/multidict-6.7.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:10ae39c9cfe6adedcdb764f5e8411d4a92b055e35573a2eaa88d3323289ef93c", size = 253502 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b1/e2/c653bc4ae1be70a0f836b82172d643fcf1dade042ba2676ab08ec08bff0f/multidict-6.7.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:25167cc263257660290fba06b9318d2026e3c910be240a146e1f66dd114af2b0", size = 247065 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c8/11/a854b4154cd3bd8b1fd375e8a8ca9d73be37610c361543d56f764109509b/multidict-6.7.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:128441d052254f42989ef98b7b6a6ecb1e6f708aa962c7984235316db59f50fa", size = 241870 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/13/bf/9676c0392309b5fdae322333d22a829715b570edb9baa8016a517b55b558/multidict-6.7.1-cp311-cp311-win32.whl", hash = "sha256:d62b7f64ffde3b99d06b707a280db04fb3855b55f5a06df387236051d0668f4a", size = 41302 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c9/68/f16a3a8ba6f7b6dc92a1f19669c0810bd2c43fc5a02da13b1cbf8e253845/multidict-6.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:bdbf9f3b332abd0cdb306e7c2113818ab1e922dc84b8f8fd06ec89ed2a19ab8b", size = 45981 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ac/ad/9dd5305253fa00cd3c7555dbef69d5bf4133debc53b87ab8d6a44d411665/multidict-6.7.1-cp311-cp311-win_arm64.whl", hash = "sha256:b8c990b037d2fff2f4e33d3f21b9b531c5745b33a49a7d6dbe7a177266af44f6", size = 43159 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8d/9c/f20e0e2cf80e4b2e4b1c365bf5fe104ee633c751a724246262db8f1a0b13/multidict-6.7.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a90f75c956e32891a4eda3639ce6dd86e87105271f43d43442a3aedf3cddf172", size = 76893 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fe/cf/18ef143a81610136d3da8193da9d80bfe1cb548a1e2d1c775f26b23d024a/multidict-6.7.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3fccb473e87eaa1382689053e4a4618e7ba7b9b9b8d6adf2027ee474597128cd", size = 45456 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a9/65/1caac9d4cd32e8433908683446eebc953e82d22b03d10d41a5f0fefe991b/multidict-6.7.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b0fa96985700739c4c7853a43c0b3e169360d6855780021bfc6d0f1ce7c123e7", size = 43872 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cf/3b/d6bd75dc4f3ff7c73766e04e705b00ed6dbbaccf670d9e05a12b006f5a21/multidict-6.7.1-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cb2a55f408c3043e42b40cc8eecd575afa27b7e0b956dfb190de0f8499a57a53", size = 251018 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/80/c959c5933adedb9ac15152e4067c702a808ea183a8b64cf8f31af8ad3155/multidict-6.7.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb0ce7b2a32d09892b3dd6cc44877a0d02a33241fafca5f25c8b6b62374f8b75", size = 258883 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/86/85/7ed40adafea3d4f1c8b916e3b5cc3a8e07dfcdcb9cd72800f4ed3ca1b387/multidict-6.7.1-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:c3a32d23520ee37bf327d1e1a656fec76a2edd5c038bf43eddfa0572ec49c60b", size = 242413 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d2/57/b8565ff533e48595503c785f8361ff9a4fde4d67de25c207cd0ba3befd03/multidict-6.7.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:9c90fed18bffc0189ba814749fdcc102b536e83a9f738a9003e569acd540a733", size = 268404 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e0/50/9810c5c29350f7258180dfdcb2e52783a0632862eb334c4896ac717cebcb/multidict-6.7.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:da62917e6076f512daccfbbde27f46fed1c98fee202f0559adec8ee0de67f71a", size = 269456 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f3/8d/5e5be3ced1d12966fefb5c4ea3b2a5b480afcea36406559442c6e31d4a48/multidict-6.7.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bfde23ef6ed9db7eaee6c37dcec08524cb43903c60b285b172b6c094711b3961", size = 256322 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/31/6e/d8a26d81ac166a5592782d208dd90dfdc0a7a218adaa52b45a672b46c122/multidict-6.7.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3758692429e4e32f1ba0df23219cd0b4fc0a52f476726fff9337d1a57676a582", size = 253955 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/59/4c/7c672c8aad41534ba619bcd4ade7a0dc87ed6b8b5c06149b85d3dd03f0cd/multidict-6.7.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:398c1478926eca669f2fd6a5856b6de9c0acf23a2cb59a14c0ba5844fa38077e", size = 251254 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7b/bd/84c24de512cbafbdbc39439f74e967f19570ce7924e3007174a29c348916/multidict-6.7.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c102791b1c4f3ab36ce4101154549105a53dc828f016356b3e3bcae2e3a039d3", size = 252059 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fa/ba/f5449385510825b73d01c2d4087bf6d2fccc20a2d42ac34df93191d3dd03/multidict-6.7.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:a088b62bd733e2ad12c50dad01b7d0166c30287c166e137433d3b410add807a6", size = 263588 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d7/11/afc7c677f68f75c84a69fe37184f0f82fce13ce4b92f49f3db280b7e92b3/multidict-6.7.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3d51ff4785d58d3f6c91bdbffcb5e1f7ddfda557727043aa20d20ec4f65e324a", size = 259642 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2b/17/ebb9644da78c4ab36403739e0e6e0e30ebb135b9caf3440825001a0bddcb/multidict-6.7.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fc5907494fccf3e7d3f94f95c91d6336b092b5fc83811720fae5e2765890dfba", size = 251377 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ca/a4/840f5b97339e27846c46307f2530a2805d9d537d8b8bd416af031cad7fa0/multidict-6.7.1-cp312-cp312-win32.whl", hash = "sha256:28ca5ce2fd9716631133d0e9a9b9a745ad7f60bac2bccafb56aa380fc0b6c511", size = 41887 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/80/31/0b2517913687895f5904325c2069d6a3b78f66cc641a86a2baf75a05dcbb/multidict-6.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:fcee94dfbd638784645b066074b338bc9cc155d4b4bffa4adce1615c5a426c19", size = 46053 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0c/5b/aba28e4ee4006ae4c7df8d327d31025d760ffa992ea23812a601d226e682/multidict-6.7.1-cp312-cp312-win_arm64.whl", hash = "sha256:ba0a9fb644d0c1a2194cf7ffb043bd852cea63a57f66fbd33959f7dae18517bf", size = 43307 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f2/22/929c141d6c0dba87d3e1d38fbdf1ba8baba86b7776469f2bc2d3227a1e67/multidict-6.7.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:2b41f5fed0ed563624f1c17630cb9941cf2309d4df00e494b551b5f3e3d67a23", size = 76174 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c7/75/bc704ae15fee974f8fccd871305e254754167dce5f9e42d88a2def741a1d/multidict-6.7.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:84e61e3af5463c19b67ced91f6c634effb89ef8bfc5ca0267f954451ed4bb6a2", size = 45116 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/79/76/55cd7186f498ed080a18440c9013011eb548f77ae1b297206d030eb1180a/multidict-6.7.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:935434b9853c7c112eee7ac891bc4cb86455aa631269ae35442cb316790c1445", size = 43524 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e9/3c/414842ef8d5a1628d68edee29ba0e5bcf235dbfb3ccd3ea303a7fe8c72ff/multidict-6.7.1-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:432feb25a1cb67fe82a9680b4d65fb542e4635cb3166cd9c01560651ad60f177", size = 249368 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f6/32/befed7f74c458b4a525e60519fe8d87eef72bb1e99924fa2b0f9d97a221e/multidict-6.7.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e82d14e3c948952a1a85503817e038cba5905a3352de76b9a465075d072fba23", size = 256952 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/03/d6/c878a44ba877f366630c860fdf74bfb203c33778f12b6ac274936853c451/multidict-6.7.1-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:4cfb48c6ea66c83bcaaf7e4dfa7ec1b6bbcf751b7db85a328902796dfde4c060", size = 240317 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/68/49/57421b4d7ad2e9e60e25922b08ceb37e077b90444bde6ead629095327a6f/multidict-6.7.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1d540e51b7e8e170174555edecddbd5538105443754539193e3e1061864d444d", size = 267132 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b7/fe/ec0edd52ddbcea2a2e89e174f0206444a61440b40f39704e64dc807a70bd/multidict-6.7.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:273d23f4b40f3dce4d6c8a821c741a86dec62cded82e1175ba3d99be128147ed", size = 268140 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b0/73/6e1b01cbeb458807aa0831742232dbdd1fa92bfa33f52a3f176b4ff3dc11/multidict-6.7.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d624335fd4fa1c08a53f8b4be7676ebde19cd092b3895c421045ca87895b429", size = 254277 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6a/b2/5fb8c124d7561a4974c342bc8c778b471ebbeb3cc17df696f034a7e9afe7/multidict-6.7.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:12fad252f8b267cc75b66e8fc51b3079604e8d43a75428ffe193cd9e2195dfd6", size = 252291 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5a/96/51d4e4e06bcce92577fcd488e22600bd38e4fd59c20cb49434d054903bd2/multidict-6.7.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:03ede2a6ffbe8ef936b92cb4529f27f42be7f56afcdab5ab739cd5f27fb1cbf9", size = 250156 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/db/6b/420e173eec5fba721a50e2a9f89eda89d9c98fded1124f8d5c675f7a0c0f/multidict-6.7.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:90efbcf47dbe33dcf643a1e400d67d59abeac5db07dc3f27d6bdeae497a2198c", size = 249742 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/44/a3/ec5b5bd98f306bc2aa297b8c6f11a46714a56b1e6ef5ebda50a4f5d7c5fb/multidict-6.7.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:5c4b9bfc148f5a91be9244d6264c53035c8a0dcd2f51f1c3c6e30e30ebaa1c84", size = 262221 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cd/f7/e8c0d0da0cd1e28d10e624604e1a36bcc3353aaebdfdc3a43c72bc683a12/multidict-6.7.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:401c5a650f3add2472d1d288c26deebc540f99e2fb83e9525007a74cd2116f1d", size = 258664 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/52/da/151a44e8016dd33feed44f730bd856a66257c1ee7aed4f44b649fb7edeb3/multidict-6.7.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:97891f3b1b3ffbded884e2916cacf3c6fc87b66bb0dde46f7357404750559f33", size = 249490 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/87/af/a3b86bf9630b732897f6fc3f4c4714b90aa4361983ccbdcd6c0339b21b0c/multidict-6.7.1-cp313-cp313-win32.whl", hash = "sha256:e1c5988359516095535c4301af38d8a8838534158f649c05dd1050222321bcb3", size = 41695 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b2/35/e994121b0e90e46134673422dd564623f93304614f5d11886b1b3e06f503/multidict-6.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:960c83bf01a95b12b08fd54324a4eb1d5b52c88932b5cba5d6e712bb3ed12eb5", size = 45884 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ca/61/42d3e5dbf661242a69c97ea363f2d7b46c567da8eadef8890022be6e2ab0/multidict-6.7.1-cp313-cp313-win_arm64.whl", hash = "sha256:563fe25c678aaba333d5399408f5ec3c383ca5b663e7f774dd179a520b8144df", size = 43122 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6d/b3/e6b21c6c4f314bb956016b0b3ef2162590a529b84cb831c257519e7fde44/multidict-6.7.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:c76c4bec1538375dad9d452d246ca5368ad6e1c9039dadcf007ae59c70619ea1", size = 83175 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fb/76/23ecd2abfe0957b234f6c960f4ade497f55f2c16aeb684d4ecdbf1c95791/multidict-6.7.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:57b46b24b5d5ebcc978da4ec23a819a9402b4228b8a90d9c656422b4bdd8a963", size = 48460 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c4/57/a0ed92b23f3a042c36bc4227b72b97eca803f5f1801c1ab77c8a212d455e/multidict-6.7.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e954b24433c768ce78ab7929e84ccf3422e46deb45a4dc9f93438f8217fa2d34", size = 46930 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b5/66/02ec7ace29162e447f6382c495dc95826bf931d3818799bbef11e8f7df1a/multidict-6.7.1-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3bd231490fa7217cc832528e1cd8752a96f0125ddd2b5749390f7c3ec8721b65", size = 242582 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/58/18/64f5a795e7677670e872673aca234162514696274597b3708b2c0d276cce/multidict-6.7.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:253282d70d67885a15c8a7716f3a73edf2d635793ceda8173b9ecc21f2fb8292", size = 250031 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c8/ed/e192291dbbe51a8290c5686f482084d31bcd9d09af24f63358c3d42fd284/multidict-6.7.1-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0b4c48648d7649c9335cf1927a8b87fa692de3dcb15faa676c6a6f1f1aabda43", size = 228596 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1e/7e/3562a15a60cf747397e7f2180b0a11dc0c38d9175a650e75fa1b4d325e15/multidict-6.7.1-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:98bc624954ec4d2c7cb074b8eefc2b5d0ce7d482e410df446414355d158fe4ca", size = 257492 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/24/02/7d0f9eae92b5249bb50ac1595b295f10e263dd0078ebb55115c31e0eaccd/multidict-6.7.1-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1b99af4d9eec0b49927b4402bcbb58dea89d3e0db8806a4086117019939ad3dd", size = 255899 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/00/e3/9b60ed9e23e64c73a5cde95269ef1330678e9c6e34dd4eb6b431b85b5a10/multidict-6.7.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6aac4f16b472d5b7dc6f66a0d49dd57b0e0902090be16594dc9ebfd3d17c47e7", size = 247970 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3e/06/538e58a63ed5cfb0bd4517e346b91da32fde409d839720f664e9a4ae4f9d/multidict-6.7.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:21f830fe223215dffd51f538e78c172ed7c7f60c9b96a2bf05c4848ad49921c3", size = 245060 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b2/2f/d743a3045a97c895d401e9bd29aaa09b94f5cbdf1bd561609e5a6c431c70/multidict-6.7.1-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:f5dd81c45b05518b9aa4da4aa74e1c93d715efa234fd3e8a179df611cc85e5f4", size = 235888 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/38/83/5a325cac191ab28b63c52f14f1131f3b0a55ba3b9aa65a6d0bf2a9b921a0/multidict-6.7.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:eb304767bca2bb92fb9c5bd33cedc95baee5bb5f6c88e63706533a1c06ad08c8", size = 243554 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/20/1f/9d2327086bd15da2725ef6aae624208e2ef828ed99892b17f60c344e57ed/multidict-6.7.1-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:c9035dde0f916702850ef66460bc4239d89d08df4d02023a5926e7446724212c", size = 252341 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e8/2c/2a1aa0280cf579d0f6eed8ee5211c4f1730bd7e06c636ba2ee6aafda302e/multidict-6.7.1-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:af959b9beeb66c822380f222f0e0a1889331597e81f1ded7f374f3ecb0fd6c52", size = 246391 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e5/03/7ca022ffc36c5a3f6e03b179a5ceb829be9da5783e6fe395f347c0794680/multidict-6.7.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:41f2952231456154ee479651491e94118229844dd7226541788be783be2b5108", size = 243422 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/dc/1d/b31650eab6c5778aceed46ba735bd97f7c7d2f54b319fa916c0f96e7805b/multidict-6.7.1-cp313-cp313t-win32.whl", hash = "sha256:df9f19c28adcb40b6aae30bbaa1478c389efd50c28d541d76760199fc1037c32", size = 47770 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ac/5b/2d2d1d522e51285bd61b1e20df8f47ae1a9d80839db0b24ea783b3832832/multidict-6.7.1-cp313-cp313t-win_amd64.whl", hash = "sha256:d54ecf9f301853f2c5e802da559604b3e95bb7a3b01a9c295c6ee591b9882de8", size = 53109 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3d/a3/cc409ba012c83ca024a308516703cf339bdc4b696195644a7215a5164a24/multidict-6.7.1-cp313-cp313t-win_arm64.whl", hash = "sha256:5a37ca18e360377cfda1d62f5f382ff41f2b8c4ccb329ed974cc2e1643440118", size = 45573 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/91/cc/db74228a8be41884a567e88a62fd589a913708fcf180d029898c17a9a371/multidict-6.7.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:8f333ec9c5eb1b7105e3b84b53141e66ca05a19a605368c55450b6ba208cb9ee", size = 75190 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d5/22/492f2246bb5b534abd44804292e81eeaf835388901f0c574bac4eeec73c5/multidict-6.7.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:a407f13c188f804c759fc6a9f88286a565c242a76b27626594c133b82883b5c2", size = 44486 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f1/4f/733c48f270565d78b4544f2baddc2fb2a245e5a8640254b12c36ac7ac68e/multidict-6.7.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0e161ddf326db5577c3a4cc2d8648f81456e8a20d40415541587a71620d7a7d1", size = 43219 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/24/bb/2c0c2287963f4259c85e8bcbba9182ced8d7fca65c780c38e99e61629d11/multidict-6.7.1-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:1e3a8bb24342a8201d178c3b4984c26ba81a577c80d4d525727427460a50c22d", size = 245132 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a7/f9/44d4b3064c65079d2467888794dea218d1601898ac50222ab8a9a8094460/multidict-6.7.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97231140a50f5d447d3164f994b86a0bed7cd016e2682f8650d6a9158e14fd31", size = 252420 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8b/13/78f7275e73fa17b24c9a51b0bd9d73ba64bb32d0ed51b02a746eb876abe7/multidict-6.7.1-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:6b10359683bd8806a200fd2909e7c8ca3a7b24ec1d8132e483d58e791d881048", size = 233510 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4b/25/8167187f62ae3cbd52da7893f58cb036b47ea3fb67138787c76800158982/multidict-6.7.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:283ddac99f7ac25a4acadbf004cb5ae34480bbeb063520f70ce397b281859362", size = 264094 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a1/e7/69a3a83b7b030cf283fb06ce074a05a02322359783424d7edf0f15fe5022/multidict-6.7.1-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:538cec1e18c067d0e6103aa9a74f9e832904c957adc260e61cd9d8cf0c3b3d37", size = 260786 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fe/3b/8ec5074bcfc450fe84273713b4b0a0dd47c0249358f5d82eb8104ffe2520/multidict-6.7.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7eee46ccb30ff48a1e35bb818cc90846c6be2b68240e42a78599166722cea709", size = 248483 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/48/5a/d5a99e3acbca0e29c5d9cba8f92ceb15dce78bab963b308ae692981e3a5d/multidict-6.7.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:fa263a02f4f2dd2d11a7b1bb4362aa7cb1049f84a9235d31adf63f30143469a0", size = 248403 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/35/48/e58cd31f6c7d5102f2a4bf89f96b9cf7e00b6c6f3d04ecc44417c00a5a3c/multidict-6.7.1-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:2e1425e2f99ec5bd36c15a01b690a1a2456209c5deed58f95469ffb46039ccbb", size = 240315 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/94/33/1cd210229559cb90b6786c30676bb0c58249ff42f942765f88793b41fdce/multidict-6.7.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:497394b3239fc6f0e13a78a3e1b61296e72bf1c5f94b4c4eb80b265c37a131cd", size = 245528 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/64/f2/6e1107d226278c876c783056b7db43d800bb64c6131cec9c8dfb6903698e/multidict-6.7.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:233b398c29d3f1b9676b4b6f75c518a06fcb2ea0b925119fb2c1bc35c05e1601", size = 258784 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4d/c1/11f664f14d525e4a1b5327a82d4de61a1db604ab34c6603bb3c2cc63ad34/multidict-6.7.1-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:93b1818e4a6e0930454f0f2af7dfce69307ca03cdcfb3739bf4d91241967b6c1", size = 251980 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e1/9f/75a9ac888121d0c5bbd4ecf4eead45668b1766f6baabfb3b7f66a410e231/multidict-6.7.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:f33dc2a3abe9249ea5d8360f969ec7f4142e7ac45ee7014d8f8d5acddf178b7b", size = 243602 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9a/e7/50bf7b004cc8525d80dbbbedfdc7aed3e4c323810890be4413e589074032/multidict-6.7.1-cp314-cp314-win32.whl", hash = "sha256:3ab8b9d8b75aef9df299595d5388b14530839f6422333357af1339443cff777d", size = 40930 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e0/bf/52f25716bbe93745595800f36fb17b73711f14da59ed0bb2eba141bc9f0f/multidict-6.7.1-cp314-cp314-win_amd64.whl", hash = "sha256:5e01429a929600e7dab7b166062d9bb54a5eed752384c7384c968c2afab8f50f", size = 45074 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/97/ab/22803b03285fa3a525f48217963da3a65ae40f6a1b6f6cf2768879e208f9/multidict-6.7.1-cp314-cp314-win_arm64.whl", hash = "sha256:4885cb0e817aef5d00a2e8451d4665c1808378dc27c2705f1bf4ef8505c0d2e5", size = 42471 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e0/6d/f9293baa6146ba9507e360ea0292b6422b016907c393e2f63fc40ab7b7b5/multidict-6.7.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:0458c978acd8e6ea53c81eefaddbbee9c6c5e591f41b3f5e8e194780fe026581", size = 82401 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7a/68/53b5494738d83558d87c3c71a486504d8373421c3e0dbb6d0db48ad42ee0/multidict-6.7.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:c0abd12629b0af3cf590982c0b413b1e7395cd4ec026f30986818ab95bfaa94a", size = 48143 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/37/e8/5284c53310dcdc99ce5d66563f6e5773531a9b9fe9ec7a615e9bc306b05f/multidict-6.7.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:14525a5f61d7d0c94b368a42cff4c9a4e7ba2d52e2672a7b23d84dc86fb02b0c", size = 46507 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e4/fc/6800d0e5b3875568b4083ecf5f310dcf91d86d52573160834fb4bfcf5e4f/multidict-6.7.1-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:17307b22c217b4cf05033dabefe68255a534d637c6c9b0cc8382718f87be4262", size = 239358 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/41/75/4ad0973179361cdf3a113905e6e088173198349131be2b390f9fa4da5fc6/multidict-6.7.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7a7e590ff876a3eaf1c02a4dfe0724b6e69a9e9de6d8f556816f29c496046e59", size = 246884 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c3/9c/095bb28b5da139bd41fb9a5d5caff412584f377914bd8787c2aa98717130/multidict-6.7.1-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:5fa6a95dfee63893d80a34758cd0e0c118a30b8dcb46372bf75106c591b77889", size = 225878 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/07/d0/c0a72000243756e8f5a277b6b514fa005f2c73d481b7d9e47cd4568aa2e4/multidict-6.7.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a0543217a6a017692aa6ae5cc39adb75e587af0f3a82288b1492eb73dd6cc2a4", size = 253542 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c0/6b/f69da15289e384ecf2a68837ec8b5ad8c33e973aa18b266f50fe55f24b8c/multidict-6.7.1-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f99fe611c312b3c1c0ace793f92464d8cd263cc3b26b5721950d977b006b6c4d", size = 252403 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a2/76/b9669547afa5a1a25cd93eaca91c0da1c095b06b6d2d8ec25b713588d3a1/multidict-6.7.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9004d8386d133b7e6135679424c91b0b854d2d164af6ea3f289f8f2761064609", size = 244889 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7e/a9/a50d2669e506dad33cfc45b5d574a205587b7b8a5f426f2fbb2e90882588/multidict-6.7.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e628ef0e6859ffd8273c69412a2465c4be4a9517d07261b33334b5ec6f3c7489", size = 241982 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c5/bb/1609558ad8b456b4827d3c5a5b775c93b87878fd3117ed3db3423dfbce1b/multidict-6.7.1-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:841189848ba629c3552035a6a7f5bf3b02eb304e9fea7492ca220a8eda6b0e5c", size = 232415 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d8/59/6f61039d2aa9261871e03ab9dc058a550d240f25859b05b67fd70f80d4b3/multidict-6.7.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:ce1bbd7d780bb5a0da032e095c951f7014d6b0a205f8318308140f1a6aba159e", size = 240337 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a1/29/fdc6a43c203890dc2ae9249971ecd0c41deaedfe00d25cb6564b2edd99eb/multidict-6.7.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:b26684587228afed0d50cf804cc71062cc9c1cdf55051c4c6345d372947b268c", size = 248788 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a9/14/a153a06101323e4cf086ecee3faadba52ff71633d471f9685c42e3736163/multidict-6.7.1-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:9f9af11306994335398293f9958071019e3ab95e9a707dc1383a35613f6abcb9", size = 242842 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/41/5f/604ae839e64a4a6efc80db94465348d3b328ee955e37acb24badbcd24d83/multidict-6.7.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:b4938326284c4f1224178a560987b6cf8b4d38458b113d9b8c1db1a836e640a2", size = 240237 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5f/60/c3a5187bf66f6fb546ff4ab8fb5a077cbdd832d7b1908d4365c7f74a1917/multidict-6.7.1-cp314-cp314t-win32.whl", hash = "sha256:98655c737850c064a65e006a3df7c997cd3b220be4ec8fe26215760b9697d4d7", size = 48008 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0c/f7/addf1087b860ac60e6f382240f64fb99f8bfb532bb06f7c542b83c29ca61/multidict-6.7.1-cp314-cp314t-win_amd64.whl", hash = "sha256:497bde6223c212ba11d462853cfa4f0ae6ef97465033e7dc9940cdb3ab5b48e5", size = 53542 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4c/81/4629d0aa32302ef7b2ec65c75a728cc5ff4fa410c50096174c1632e70b3e/multidict-6.7.1-cp314-cp314t-win_arm64.whl", hash = "sha256:2bbd113e0d4af5db41d5ebfe9ccaff89de2120578164f86a5d17d5a576d1e5b2", size = 44719 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/81/08/7036c080d7117f28a4af526d794aab6a84463126db031b007717c1a6676e/multidict-6.7.1-py3-none-any.whl", hash = "sha256:55d97cc6dae627efa6a6e548885712d4864b81110ac76fa4e534c03819fa4a56", size = 12319 }, -] - -[[package]] -name = "narwhals" -version = "2.22.1" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/62/3c/c4ef2164a71c1a63d7f1ae411c4082c5fa872405106db60a4b7114989ad7/narwhals-2.22.1.tar.gz", hash = "sha256:d62920805a0a43b7ff8b54b0c0d3142d796f8a9301836ada37e573d6a33cbcd9", size = 647493 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/48/ca/36339329c4604adbcc99c899b7eb1ce1a555c499b6a6860757dc9bfed36d/narwhals-2.22.1-py3-none-any.whl", hash = "sha256:60567d774edf77db53906f89d9fbd164e66e56d66d388e1e6990f17ac33cfb53", size = 454815 }, -] - -[[package]] -name = "numba" -version = "0.65.1" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "llvmlite" }, - { name = "numpy" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f6/c5/db2ac3685833d626c0dcae6bd2330cd68433e1fd248d15f70998160d3ad7/numba-0.65.1.tar.gz", hash = "sha256:19357146c32fe9ed25059ab915e8465fb13951cf6b0aace3826b76886373ab23", size = 2765600 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/96/b3/650500c2eab4534d98e9166f4298e0f3c69c742afdf24e6eabccd1f16ad8/numba-0.65.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:7020d74b19cdb8cff16506542fdd510756e28c5e7f3bd0b7f574f0f42272fcd9", size = 2680563 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/44/0b/0615dbedb98f5b32a35a53290fbdc6e22306968109278d7e58df82d7a9f6/numba-0.65.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f80ed83774b5173abd6581cd8d2165d1d38e13d2e5c8155c0c0b421784745420", size = 3745018 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/49/aa/4361698f35bf63bff67dfe6c90493731177f48ede954f77b0588731537bc/numba-0.65.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7ed425a43b0a5f9772f2f4e2dd0bbd12eabecae1af0b24efcfd4e053f012aac6", size = 3450962 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bd/9a/af61ec03b3116c161fd7a06b9e8a265729a8718458333e8ffbb06d9a3978/numba-0.65.1-cp311-cp311-win_amd64.whl", hash = "sha256:df40a5028a975b9ea66f6a2a3f7abbdbd541a863070e34ed367aff21141248e4", size = 2747417 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/57/bc/76f8f8c5cf9adee47fdb7bbb03be8900f76f902d451d7477cf12b845e1de/numba-0.65.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:ac3f1e77c352dd0ea9712732c2d8f9ca507717435eec5b5013bf138ac33c4a08", size = 2681371 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/69/47/a415af0283e4db0398104c6d1c11c9861a98dc67a7aa442a7769ed5d6196/numba-0.65.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:52bc6f3ceb8fcaff9b2ae26b4c6b1e9fee39db8d355534c0fe4f39a901246b84", size = 3802467 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/46/36/246f73ec99cfeab2f2cb2ce7d4218766cc36a2da418901223f4f4da9c813/numba-0.65.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:90ca10b3463bae0bd70589726fe3c77d01d6b5fc86bee54bcdf9fb6b47c28977", size = 3502628 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/db/9e/3c679b2ee078425b9e99a91e44f8d132a6830d8ccce5227bc5e9181aeed8/numba-0.65.1-cp312-cp312-win_amd64.whl", hash = "sha256:5971c632be2a2351500431f46213821dba8d02b18a9f7d02fd36bd2743e41a6a", size = 2750611 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/79/37/14a4579049c1eb673afd0de0cb4842982acd55b9ce2643e763db858bcea0/numba-0.65.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:1735c15c1134a5108b4d6a5c77fc0947924ea066a738dc09a52008c13df9cad3", size = 2681344 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a0/22/b8d873f6466b20aa563fc9b33acd48dec89a07803ddaa2f1c8ca1cd33126/numba-0.65.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c09f49117ef255e1f1c6dad0c7a1ed39868243862a73be5706793241a3755f1b", size = 3810619 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/62/08/e16a8b5d9a018962ebb5c66be662317cde32b9f5dab08441f90bed5522fb/numba-0.65.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:594a8680b3fadac99e97e489b1fd89007177e5336713745c3b769528c635a464", size = 3509783 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/a5/03c970d57f4c1741354837353ce39fb5206952ae1dba8922d29c86f64805/numba-0.65.1-cp313-cp313-win_amd64.whl", hash = "sha256:85be74c0d036842699a30058f82fb88fc5ffdc59f7615cab5792ea92914c9b62", size = 2750534 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4f/2e/8aed9b726d9ba5f11ad287645fd479e88278db3060a25cb1225d730eb2b7/numba-0.65.1-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:33f5eb68eb1c843511615d14663ce60258525d6a4c65ab040e2c2b0c4cf17450", size = 2681554 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/87/96/f3eb235fafa82a34e2ab5dd7dc9ffff998ebf5f0bbc23fa56a96aeb44da6/numba-0.65.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:71e73029bf53a62cc6afcf96be4bd942290d8b4c55f0a454fb536158115790f7", size = 3779602 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/09/90/b0f09b48752d23640b8284f22aa597737e8adaddc7fbfacc4708b7f73a4c/numba-0.65.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a07635e0be926b9bdbffb09137c230fb13f6ec0e564914ba937cee12ce3eb35", size = 3479532 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/56/46/3f7fc04fb853559e74b210e0b62c19974ec844cefec611f9e535f4da3761/numba-0.65.1-cp314-cp314-win_amd64.whl", hash = "sha256:2a20fcdabdefbdacf88d85caf70c3b18c4bcb7ebb8f82e6a19486383dd26ab63", size = 2752637 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/81/7b/c1a341a9067367778f4152a5f01061cf281fb09582c92c510ec4918cabf6/numba-0.65.1-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:548dd4b3a4508d5062768d1514b2cd7b015f9a25ec7af651c50dee243965e652", size = 2684600 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/03/36/98ddbcf3e4f04a6dd07e1c67249955920579ba4af6bb6868e3088f4ed282/numba-0.65.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:78abc28feff2c2ff8307fff3975b6438352759c9acb797ecd6b1fb6e7e39e31d", size = 3817198 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a3/83/0dad21057ece5a835599f5d24099b091703995e23dbbf894f259e91c010b/numba-0.65.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee7676cb389555805f9b9a1840cbcd1ea6c8bd5376ab6918e3a29c5ea1dbda20", size = 3533862 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/32/36/8be7118ffd4c8440881046eac3d0982cc5ab42909508cf5d67024d62a2e4/numba-0.65.1-cp314-cp314t-win_amd64.whl", hash = "sha256:20609346e3bd75204950dcbbfe383a8d7dbf4902f442aedbf00f97fef4aa8f38", size = 2758237 }, -] - -[[package]] -name = "numpy" -version = "1.26.4" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/65/6e/09db70a523a96d25e115e71cc56a6f9031e7b8cd166c1ac8438307c14058/numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010", size = 15786129 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/11/57/baae43d14fe163fa0e4c47f307b6b2511ab8d7d30177c491960504252053/numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71", size = 20630554 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1a/2e/151484f49fd03944c4a3ad9c418ed193cfd02724e138ac8a9505d056c582/numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef", size = 13997127 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/79/ae/7e5b85136806f9dadf4878bf73cf223fe5c2636818ba3ab1c585d0403164/numpy-1.26.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e", size = 14222994 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3a/d0/edc009c27b406c4f9cbc79274d6e46d634d139075492ad055e3d68445925/numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5", size = 18252005 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/09/bf/2b1aaf8f525f2923ff6cfcf134ae5e750e279ac65ebf386c75a0cf6da06a/numpy-1.26.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a", size = 13885297 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/df/a0/4e0f14d847cfc2a633a1c8621d00724f3206cfeddeb66d35698c4e2cf3d2/numpy-1.26.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a", size = 18093567 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d2/b7/a734c733286e10a7f1a8ad1ae8c90f2d33bf604a96548e0a4a3a6739b468/numpy-1.26.4-cp311-cp311-win32.whl", hash = "sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20", size = 5968812 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3f/6b/5610004206cf7f8e7ad91c5a85a8c71b2f2f8051a0c0c4d5916b76d6cbb2/numpy-1.26.4-cp311-cp311-win_amd64.whl", hash = "sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2", size = 15811913 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/95/12/8f2020a8e8b8383ac0177dc9570aad031a3beb12e38847f7129bacd96228/numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218", size = 20335901 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/75/5b/ca6c8bd14007e5ca171c7c03102d17b4f4e0ceb53957e8c44343a9546dcc/numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b", size = 13685868 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/79/f8/97f10e6755e2a7d027ca783f63044d5b1bc1ae7acb12afe6a9b4286eac17/numpy-1.26.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b", size = 13925109 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0f/50/de23fde84e45f5c4fda2488c759b69990fd4512387a8632860f3ac9cd225/numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed", size = 17950613 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4c/0c/9c603826b6465e82591e05ca230dfc13376da512b25ccd0894709b054ed0/numpy-1.26.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a", size = 13572172 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/76/8c/2ba3902e1a0fc1c74962ea9bb33a534bb05984ad7ff9515bf8d07527cadd/numpy-1.26.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0", size = 17786643 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/28/4a/46d9e65106879492374999e76eb85f87b15328e06bd1550668f79f7b18c6/numpy-1.26.4-cp312-cp312-win32.whl", hash = "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110", size = 5677803 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/16/2e/86f24451c2d530c88daf997cb8d6ac622c1d40d19f5a031ed68a4b73a374/numpy-1.26.4-cp312-cp312-win_amd64.whl", hash = "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818", size = 15517754 }, -] - -[[package]] -name = "openai" -version = "2.44.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "anyio" }, - { name = "distro" }, - { name = "httpx" }, - { name = "jiter" }, - { name = "pydantic" }, - { name = "sniffio" }, - { name = "tqdm" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/49/f5/7c7cb955305cb41f7f3c5fd7e0e38bf6bbf2658468863d4b7b868a5cb8df/openai-2.44.0.tar.gz", hash = "sha256:68a5a5ffad82b8ff7d451c437529fb64f7c3b8123aaf0c021966a882d9e3947d", size = 988753 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ae/f4/561ed79fd94876160018a5e75254cfcb9b0e62d4dded9dcb20072e86d623/openai-2.44.0-py3-none-any.whl", hash = "sha256:0a2a3ab2e29aeda368700f662ff9ba0f9df17ba4c54577a64e08b8115a3cc0ad", size = 1366216 }, -] - -[[package]] -name = "opentelemetry-api" -version = "1.43.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "typing-extensions" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ae/cc/e4c9584181f86494df0f6bdec1a4f3280c50db44704dc2a407e994fc87bb/opentelemetry_api-1.43.0.tar.gz", hash = "sha256:107d0d03857ea8fc7c5fcbbbd83f800c281f0d560553d61c1d675fccfd1761c1", size = 73476 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/17/83/6dba32b85f31868400440dc7ad2ca1eab94cbbf3a7b0459ed39f8311a9e2/opentelemetry_api-1.43.0-py3-none-any.whl", hash = "sha256:20acf45e9b21851926835292e4045d290acade1edd2ff3de86d2f069687ba1fd", size = 61912 }, -] - -[[package]] -name = "opentelemetry-exporter-otlp" -version = "1.43.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "opentelemetry-exporter-otlp-proto-grpc" }, - { name = "opentelemetry-exporter-otlp-proto-http" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ee/47/b77366bcbe719373a8cac2b4e8ad01f8ff3c9f2c223374d77ece280aae6f/opentelemetry_exporter_otlp-1.43.0.tar.gz", hash = "sha256:65aded6c50ee7dd2b9948c9d0e59ddb4ed4eea6e8532fba95cbe6a4a64a566ba", size = 6086 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d9/26/d28cc854d2eb779f91351216c51ed0d54886f89f2dd56db9d493ba9fd429/opentelemetry_exporter_otlp-1.43.0-py3-none-any.whl", hash = "sha256:70f3fe740a64596d4157588a2ee7e4fd37d2acc0c0f522a2882b8c29316cd0f0", size = 6726 }, -] - -[[package]] -name = "opentelemetry-exporter-otlp-proto-common" -version = "1.43.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "opentelemetry-proto" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/55/c1/e8098490ab15abf116dcaf9fa89ededcb35547c7d08d4b5a62f573dc1e63/opentelemetry_exporter_otlp_proto_common-1.43.0.tar.gz", hash = "sha256:c4e32ba6d6b13bdb2b8f6764c4fd28d00192826561aa04f6d14eedfce7ac076f", size = 20197 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d0/b2/41ebc74ae1d5859901f1b69305de58724bf043381103d6ef413521cbc35a/opentelemetry_exporter_otlp_proto_common-1.43.0-py3-none-any.whl", hash = "sha256:123c3f9cc87218562490c63b36f497bf3a722faf174a515d1443f31ababa6264", size = 17048 }, -] - -[[package]] -name = "opentelemetry-exporter-otlp-proto-grpc" -version = "1.43.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "googleapis-common-protos" }, - { name = "grpcio" }, - { name = "opentelemetry-api" }, - { name = "opentelemetry-exporter-otlp-proto-common" }, - { name = "opentelemetry-proto" }, - { name = "opentelemetry-sdk" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e2/1d/6336453716ca0a240d4417d19e6d5b77a5e7163e5670ec4f7ec4d3ede7bf/opentelemetry_exporter_otlp_proto_grpc-1.43.0.tar.gz", hash = "sha256:1b3e0627daa9bc21884d4a13946807c255eb558bfe5bdd543dffb6f4c9faee0d", size = 27213 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6d/74/2700b5d5c946bf2dba87073fce3dfc198c46bc92ea3d5693f54bc51c90b1/opentelemetry_exporter_otlp_proto_grpc-1.43.0-py3-none-any.whl", hash = "sha256:6a10d1feacffffda19acacbf277b736094b1e2f4dbb98c90ccb2c6e1962e2ec6", size = 19626 }, -] - -[[package]] -name = "opentelemetry-exporter-otlp-proto-http" -version = "1.43.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "googleapis-common-protos" }, - { name = "opentelemetry-api" }, - { name = "opentelemetry-exporter-otlp-proto-common" }, - { name = "opentelemetry-proto" }, - { name = "opentelemetry-sdk" }, - { name = "requests" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fc/92/0b9f56412483a8891d4843890294796c9df8ab42417bd9bad8035d840cb3/opentelemetry_exporter_otlp_proto_http-1.43.0.tar.gz", hash = "sha256:fa8a42bb7d00ee5391f4c0b04d8e6a46c03caa437903296ab73a81dc11ba118f", size = 25406 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b3/20/b685ed7af2e17c29ffc8af56f1fa8bc2033258fc30fb0d2b722f49d13ba0/opentelemetry_exporter_otlp_proto_http-1.43.0-py3-none-any.whl", hash = "sha256:647f603aa8efdbdb4dbff842e0729d0406a6fff26b295a72d3d60e7d963b2610", size = 21795 }, -] - -[[package]] -name = "opentelemetry-proto" -version = "1.43.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "protobuf" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e0/b9/d357faefb40bda1d4799913e6af611171ff22a2dedcb93576bc92242d056/opentelemetry_proto-1.43.0.tar.gz", hash = "sha256:224778df17e1f3fafeaaa21d874236ca5f6ffc2f86e0899298ec7351aac27924", size = 46481 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ed/a7/3e5308cf548b8f72529c7db1afdb3a404211982376a12927fd7759f77bf3/opentelemetry_proto-1.43.0-py3-none-any.whl", hash = "sha256:c58f1f7ef84bc7dc2834016c0c37fe0081dde7ca9f6339be1970fbf9cdaaa90d", size = 72489 }, -] - -[[package]] -name = "opentelemetry-sdk" -version = "1.43.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "opentelemetry-api" }, - { name = "opentelemetry-semantic-conventions" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3e/eb/5041074274ac0956b03637cc039d434569112468e875eddfcc9a0674ce06/opentelemetry_sdk-1.43.0.tar.gz", hash = "sha256:d8187c81c162df9913e4003dd6485f7390d9a24fc17026ec7387b8b8218b08e9", size = 254744 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/49/e3/b17be23af124201c9f52eececd4cc8ddfed1597d37b4ee771895d325805c/opentelemetry_sdk-1.43.0-py3-none-any.whl", hash = "sha256:d1323a547c1ce69d6a069a17a44b7da82bb8b332051ecb074041f87642c86823", size = 178852 }, -] - -[[package]] -name = "opentelemetry-semantic-conventions" -version = "0.64b0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "opentelemetry-api" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5a/30/5f26df29509eccd86b99b481ac9ffa39da49ba9577cc69071c552ae30447/opentelemetry_semantic_conventions-0.64b0.tar.gz", hash = "sha256:72f76fb2d1582d9d033dd1fcd84532e961e6ff3d90d24ba6fabc72975a83864c", size = 148340 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f2/ca/23ba87a221b574a7c5a99d48849d80bfe8b047624681357e2b002e566187/opentelemetry_semantic_conventions-0.64b0-py3-none-any.whl", hash = "sha256:ea77e85e354b8f604ddbe5f3d9135216f982fa4d77e5859ac30f6d8a50505aa6", size = 203713 }, -] - -[[package]] -name = "packaging" -version = "26.2" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d7/f1/e7a6dd94a8d4a5626c03e4e99c87f241ba9e350cd9e6d75123f992427270/packaging-26.2.tar.gz", hash = "sha256:ff452ff5a3e828ce110190feff1178bb1f2ea2281fa2075aadb987c2fb221661", size = 228134 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/df/b2/87e62e8c3e2f4b32e5fe99e0b86d576da1312593b39f47d8ceef365e95ed/packaging-26.2-py3-none-any.whl", hash = "sha256:5fc45236b9446107ff2415ce77c807cee2862cb6fac22b8a73826d0693b0980e", size = 100195 }, -] - -[[package]] -name = "pandas" -version = "2.3.3" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "numpy" }, - { name = "python-dateutil" }, - { name = "pytz" }, - { name = "tzdata" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/33/01/d40b85317f86cf08d853a4f495195c73815fdf205eef3993821720274518/pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b", size = 4495223 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c1/fa/7ac648108144a095b4fb6aa3de1954689f7af60a14cf25583f4960ecb878/pandas-2.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:602b8615ebcc4a0c1751e71840428ddebeb142ec02c786e8ad6b1ce3c8dec523", size = 11578790 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9b/35/74442388c6cf008882d4d4bdfc4109be87e9b8b7ccd097ad1e7f006e2e95/pandas-2.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8fe25fc7b623b0ef6b5009149627e34d2a4657e880948ec3c840e9402e5c1b45", size = 10833831 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fe/e4/de154cbfeee13383ad58d23017da99390b91d73f8c11856f2095e813201b/pandas-2.3.3-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b468d3dad6ff947df92dcb32ede5b7bd41a9b3cceef0a30ed925f6d01fb8fa66", size = 12199267 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bf/c9/63f8d545568d9ab91476b1818b4741f521646cbdd151c6efebf40d6de6f7/pandas-2.3.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b98560e98cb334799c0b07ca7967ac361a47326e9b4e5a7dfb5ab2b1c9d35a1b", size = 12789281 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f2/00/a5ac8c7a0e67fd1a6059e40aa08fa1c52cc00709077d2300e210c3ce0322/pandas-2.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37b5848ba49824e5c30bedb9c830ab9b7751fd049bc7914533e01c65f79791", size = 13240453 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/27/4d/5c23a5bc7bd209231618dd9e606ce076272c9bc4f12023a70e03a86b4067/pandas-2.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db4301b2d1f926ae677a751eb2bd0e8c5f5319c9cb3f88b0becbbb0b07b34151", size = 13890361 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8e/59/712db1d7040520de7a4965df15b774348980e6df45c129b8c64d0dbe74ef/pandas-2.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:f086f6fe114e19d92014a1966f43a3e62285109afe874f067f5abbdcbb10e59c", size = 11348702 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9c/fb/231d89e8637c808b997d172b18e9d4a4bc7bf31296196c260526055d1ea0/pandas-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d21f6d74eb1725c2efaa71a2bfc661a0689579b58e9c0ca58a739ff0b002b53", size = 11597846 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5c/bd/bf8064d9cfa214294356c2d6702b716d3cf3bb24be59287a6a21e24cae6b/pandas-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3fd2f887589c7aa868e02632612ba39acb0b8948faf5cc58f0850e165bd46f35", size = 10729618 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/57/56/cf2dbe1a3f5271370669475ead12ce77c61726ffd19a35546e31aa8edf4e/pandas-2.3.3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecaf1e12bdc03c86ad4a7ea848d66c685cb6851d807a26aa245ca3d2017a1908", size = 11737212 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e5/63/cd7d615331b328e287d8233ba9fdf191a9c2d11b6af0c7a59cfcec23de68/pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b3d11d2fda7eb164ef27ffc14b4fcab16a80e1ce67e9f57e19ec0afaf715ba89", size = 12362693 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a6/de/8b1895b107277d52f2b42d3a6806e69cfef0d5cf1d0ba343470b9d8e0a04/pandas-2.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a68e15f780eddf2b07d242e17a04aa187a7ee12b40b930bfdd78070556550e98", size = 12771002 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/87/21/84072af3187a677c5893b170ba2c8fbe450a6ff911234916da889b698220/pandas-2.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:371a4ab48e950033bcf52b6527eccb564f52dc826c02afd9a1bc0ab731bba084", size = 13450971 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/86/41/585a168330ff063014880a80d744219dbf1dd7a1c706e75ab3425a987384/pandas-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:a16dcec078a01eeef8ee61bf64074b4e524a2a3f4b3be9326420cabe59c4778b", size = 10992722 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cd/4b/18b035ee18f97c1040d94debd8f2e737000ad70ccc8f5513f4eefad75f4b/pandas-2.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:56851a737e3470de7fa88e6131f41281ed440d29a9268dcbf0002da5ac366713", size = 11544671 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/31/94/72fac03573102779920099bcac1c3b05975c2cb5f01eac609faf34bed1ca/pandas-2.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdcd9d1167f4885211e401b3036c0c8d9e274eee67ea8d0758a256d60704cfe8", size = 10680807 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/16/87/9472cf4a487d848476865321de18cc8c920b8cab98453ab79dbbc98db63a/pandas-2.3.3-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e32e7cc9af0f1cc15548288a51a3b681cc2a219faa838e995f7dc53dbab1062d", size = 11709872 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/15/07/284f757f63f8a8d69ed4472bfd85122bd086e637bf4ed09de572d575a693/pandas-2.3.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:318d77e0e42a628c04dc56bcef4b40de67918f7041c2b061af1da41dcff670ac", size = 12306371 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/33/81/a3afc88fca4aa925804a27d2676d22dcd2031c2ebe08aabd0ae55b9ff282/pandas-2.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4e0a175408804d566144e170d0476b15d78458795bb18f1304fb94160cabf40c", size = 12765333 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8d/0f/b4d4ae743a83742f1153464cf1a8ecfafc3ac59722a0b5c8602310cb7158/pandas-2.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:93c2d9ab0fc11822b5eece72ec9587e172f63cff87c00b062f6e37448ced4493", size = 13418120 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4f/c7/e54682c96a895d0c808453269e0b5928a07a127a15704fedb643e9b0a4c8/pandas-2.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:f8bfc0e12dc78f777f323f55c58649591b2cd0c43534e8355c51d3fede5f4dee", size = 10993991 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f9/ca/3f8d4f49740799189e1395812f3bf23b5e8fc7c190827d55a610da72ce55/pandas-2.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:75ea25f9529fdec2d2e93a42c523962261e567d250b0013b16210e1d40d7c2e5", size = 12048227 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0e/5a/f43efec3e8c0cc92c4663ccad372dbdff72b60bdb56b2749f04aa1d07d7e/pandas-2.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74ecdf1d301e812db96a465a525952f4dde225fdb6d8e5a521d47e1f42041e21", size = 11411056 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/46/b1/85331edfc591208c9d1a63a06baa67b21d332e63b7a591a5ba42a10bb507/pandas-2.3.3-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6435cb949cb34ec11cc9860246ccb2fdc9ecd742c12d3304989017d53f039a78", size = 11645189 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/44/23/78d645adc35d94d1ac4f2a3c4112ab6f5b8999f4898b8cdf01252f8df4a9/pandas-2.3.3-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:900f47d8f20860de523a1ac881c4c36d65efcb2eb850e6948140fa781736e110", size = 12121912 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/53/da/d10013df5e6aaef6b425aa0c32e1fc1f3e431e4bcabd420517dceadce354/pandas-2.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a45c765238e2ed7d7c608fc5bc4a6f88b642f2f01e70c0c23d2224dd21829d86", size = 12712160 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bd/17/e756653095a083d8a37cbd816cb87148debcfcd920129b25f99dd8d04271/pandas-2.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c4fc4c21971a1a9f4bdb4c73978c7f7256caa3e62b323f70d6cb80db583350bc", size = 13199233 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/04/fd/74903979833db8390b73b3a8a7d30d146d710bd32703724dd9083950386f/pandas-2.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:ee15f284898e7b246df8087fc82b87b01686f98ee67d85a17b7ab44143a3a9a0", size = 11540635 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/21/00/266d6b357ad5e6d3ad55093a7e8efc7dd245f5a842b584db9f30b0f0a287/pandas-2.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1611aedd912e1ff81ff41c745822980c49ce4a7907537be8692c8dbc31924593", size = 10759079 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ca/05/d01ef80a7a3a12b2f8bbf16daba1e17c98a2f039cbc8e2f77a2c5a63d382/pandas-2.3.3-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d2cefc361461662ac48810cb14365a365ce864afe85ef1f447ff5a1e99ea81c", size = 11814049 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/15/b2/0e62f78c0c5ba7e3d2c5945a82456f4fac76c480940f805e0b97fcbc2f65/pandas-2.3.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ee67acbbf05014ea6c763beb097e03cd629961c8a632075eeb34247120abcb4b", size = 12332638 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c5/33/dd70400631b62b9b29c3c93d2feee1d0964dc2bae2e5ad7a6c73a7f25325/pandas-2.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c46467899aaa4da076d5abc11084634e2d197e9460643dd455ac3db5856b24d6", size = 12886834 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d3/18/b5d48f55821228d0d2692b34fd5034bb185e854bdb592e9c640f6290e012/pandas-2.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6253c72c6a1d990a410bc7de641d34053364ef8bcd3126f7e7450125887dffe3", size = 13409925 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a6/3d/124ac75fcd0ecc09b8fdccb0246ef65e35b012030defb0e0eba2cbbbe948/pandas-2.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:1b07204a219b3b7350abaae088f451860223a52cfb8a6c53358e7948735158e5", size = 11109071 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/89/9c/0e21c895c38a157e0faa1fb64587a9226d6dd46452cac4532d80c3c4a244/pandas-2.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2462b1a365b6109d275250baaae7b760fd25c726aaca0054649286bcfbb3e8ec", size = 12048504 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d7/82/b69a1c95df796858777b68fbe6a81d37443a33319761d7c652ce77797475/pandas-2.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0242fe9a49aa8b4d78a4fa03acb397a58833ef6199e9aa40a95f027bb3a1b6e7", size = 11410702 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f9/88/702bde3ba0a94b8c73a0181e05144b10f13f29ebfc2150c3a79062a8195d/pandas-2.3.3-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a21d830e78df0a515db2b3d2f5570610f5e6bd2e27749770e8bb7b524b89b450", size = 11634535 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a4/1e/1bac1a839d12e6a82ec6cb40cda2edde64a2013a66963293696bbf31fbbb/pandas-2.3.3-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2e3ebdb170b5ef78f19bfb71b0dc5dc58775032361fa188e814959b74d726dd5", size = 12121582 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/44/91/483de934193e12a3b1d6ae7c8645d083ff88dec75f46e827562f1e4b4da6/pandas-2.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d051c0e065b94b7a3cea50eb1ec32e912cd96dba41647eb24104b6c6c14c5788", size = 12699963 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/70/44/5191d2e4026f86a2a109053e194d3ba7a31a2d10a9c2348368c63ed4e85a/pandas-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3869faf4bd07b3b66a9f462417d0ca3a9df29a9f6abd5d0d0dbab15dac7abe87", size = 13202175 }, -] - -[[package]] -name = "pluggy" -version = "1.6.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538 }, -] - -[[package]] -name = "propcache" -version = "0.5.2" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ec/44/c87281c333769159c50594f22610f77398a47ccbfbbf23074e744e86f87c/propcache-0.5.2.tar.gz", hash = "sha256:01c4fc7480cd0598bb4b57022df55b9ca296da7fc5a8760bd8451a7e63a7d427", size = 50208 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e7/f1/8a8cc1c2c7e7934ab77e0163414f736fadbc0f5e8dd9673b952355ac175b/propcache-0.5.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:74b70780220e2dd89175ca24b81b68b67c83db499ae611e7f2313cb329801c78", size = 90744 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c2/f4/651b1225e976bd1a2ba5cfba0c29d096581c2636b437e3a9a7ab6276270a/propcache-0.5.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a4840ab0ae0216d952f4b53dc6d0b992bfc2bedbfe360bdd9b548bc184c08959", size = 52033 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/15/a8/8ede85d6aa1f79fc7dc2f8fd2c8d65920b8272c3892903c8a1affde48cfb/propcache-0.5.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c6844ba6364fb12f403928a82cfd295ab103a2b315c77c747b2dbe4a41894ea7", size = 52754 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7d/fe/b3551b41bbc2f5b5bb088fc6920567cd43101253e68fbaa261339eb96fe1/propcache-0.5.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2293949b855ce597f2826452d17c2d545fb5622379c4ea6fdf525e9b8e8a2511", size = 57573 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/83/27/ab851ebd1b7172e3e161f5f8d39e315d54a91bea246f01f4d872d3376aef/propcache-0.5.2-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:0fd59b5af35f74da48d905dcbad55449ba13be91823cb05a9bd590bbf5b61660", size = 60645 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/95/7d/466b3d18022e9897cbda9c735c493c5bd747d7a4c6f5ea1480b4cec434b6/propcache-0.5.2-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:29f9309a2e42b0d273be006fdb4be2d6c39a47f6f57d8fb1cf9f81481df81b66", size = 61563 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/27/1b/16ab7f2cf2041da2f60d156ba64c2484eadf9168075b4ff43c3ef60045af/propcache-0.5.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5aaa2b923c1944ac8febd6609cb373540a5563e7cbcb0fd770f75dace2eb817b", size = 58888 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0a/67/bb777ffd907633563bf35fd859c4ce97b0512c32f4633cf5d1eb7c33512b/propcache-0.5.2-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:66ea454f095ddf5b6b14f56c064c0941c4788be11e18d2464cf643bf7203ff67", size = 59253 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b9/42/64f8d90b73fd9cdc1499b48057ff6d9cd2a98a25734c9bb62ecf07e87061/propcache-0.5.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:95f1e3f4760d404b13c9976c0229b2b49a3c8e2c62a9ce92efdd2b11ada75e3f", size = 57558 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/eb/02/dba5bc03c9041f2092ea55a449caf5dfe68352c6654511b29ba0654ddb69/propcache-0.5.2-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:85341b12b9d55bad0bded24cac341bb34289469e03a11f3f583ea1cc1db0326c", size = 55007 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/14/c0/43f649c7aa2a77a3b100d84e9dea3a483120ecb608bfe36ce49eaff517fe/propcache-0.5.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:26a4dca084132874e639895c3135dfad5eb20bae209f62d1aeb31b03e601c3c0", size = 60355 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/83/c0/435dafd27f1cb4a495381dae60e25883ccfe4020bb72818e8184c1678092/propcache-0.5.2-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:3b199b9b2b3d6a7edf3183ba8a9a137a22b97f7df525feb5ae1eccf026d2a9c6", size = 59057 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/53/ae/6e292df9135d659944e96cb3389258e4a663e5b2b5f6c217ef0ddc8d2f73/propcache-0.5.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:e59bc9e66329185b93dab73f210f1a37f81cb40f321501db8017c9aea15dba27", size = 61938 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0b/42/314ebc50d8159055411fd6b0bda322ff510e4b1f7d2e4927940ad0f6af20/propcache-0.5.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:552ffadf6ad409844bc5919c42a0a83d88314cedddaea0e41e80a8b8fffe881f", size = 59731 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b8/9b/2da6dee38871c3c8772fabc2758325a5c9077d6d18c597737dc04dd884cd/propcache-0.5.2-cp311-cp311-win32.whl", hash = "sha256:cd416c1de191973c52ff1a12a57446bfc7642797b282d7caf2162d7d1b8aa9a0", size = 38966 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/42/4e/f17363fb58c0afe05b067361cb6d86ed2d29de6506779a27547c4d183075/propcache-0.5.2-cp311-cp311-win_amd64.whl", hash = "sha256:44e488ef40dbb452700b2b1f8188934121f6648f52c295055662d2191959ff82", size = 42135 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c6/eb/6af6685077d22e8b33358d3c548e3282706a0b3cd85044ffba4e5dd08e3b/propcache-0.5.2-cp311-cp311-win_arm64.whl", hash = "sha256:54adaa85a22078d1e306304a40984dc5be99d599bf3dc0a24dc98f7daeab89ab", size = 38381 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4a/cb/e27bc2b2737a0bb49962b275efa051e8f1c35a936df7d5139b6b658b7dc9/propcache-0.5.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:806719138ecd720339a12410fb9614ac9b2b2d3a5fdf8235d56981c36f4039ba", size = 95887 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e6/13/b8ae04c59392f8d11c6cd9fb4011d1dc7c86b81225c770280300e259ffe1/propcache-0.5.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:db2b80ea58eab4f86b2beec3cc8b39e8ff9276ac20e96b7cce43c8ae84cd6b5a", size = 54654 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2c/7d/49777a3e20b55863d4794384a38acd460c04157b0a00f8602b0d508b8431/propcache-0.5.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e5cbfac9f61484f7e9f3597775500cd3ebe8274e9b050c38f9525c77c97520bf", size = 55190 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/44/c7/085d0cd63062e84044e3f05797749c3f8e3938ff3aeb0eb2f69d43fafc91/propcache-0.5.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5dbc581d2814337da56222fab8dc5f161cd798a434e49bac27930aaef798e144", size = 59995 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9c/42/32cf8e3009e92b2645cf1e944f701e8ea4e924dffde1ee26db860bcbf7e4/propcache-0.5.2-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:857187f381f88c8e2fa2fe56ab94879d011b883d5a2ee5a1b60a8cd2a06846d9", size = 63422 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9e/1b/f112433f99fc979431b87a39ef169e3f8df070d99a72792c56d6937ac48b/propcache-0.5.2-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:178b4a2cdaac1818e2bf1c5a99b94383fa73ea5382e032a48dec07dc5668dc42", size = 64342 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/14/15/5574111ae50dd6e879456888c0eadd4c5a869959775854e18e18a6b345f3/propcache-0.5.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6f328175a2cde1f0ff2c4ed8ce968b9dcfb55f3a7153f39e2957ed994da13476", size = 61639 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cc/da/4d775080b1490c0ae604acda868bd71aabe3a89ed16f2aa4339eb8a283e7/propcache-0.5.2-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:5671d09a36b06d0fd4a3da0fccbcae360e9b1570924171a15e9e0997f0249fba", size = 61588 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/04/ac/f076982cbe2195ee9cf32de5a1e46951d9fb399fc207f390562dd0fd8fb2/propcache-0.5.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:80168e2ebe4d3ec6599d10ad8f520304ae1cad9b6c5a95372aef1b66b7bfb53a", size = 60029 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/70/60/189be62e0dd898dce3b331e1b8c7a543cd3a405ac0c81fe8ee8a9d5d77e1/propcache-0.5.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:45f11346f884bc47444f6e6647131055844134c3175b629f84952e2b5cd62b64", size = 56774 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ea/9e/93377b9c7939c1ffae98f878dee955efadfd638078bc86dbc21f9d52f651/propcache-0.5.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8e778ebd44ef4f66ed60a0416b06b489687db264a9c0b3620362f26489492913", size = 63532 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/14/f9/590ef6cfb9b8028d516d287812ece32bb0bc5f11fbb9c8bf6b2e6313fec8/propcache-0.5.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:c0cb9ed24c8964e172768d455a38254c2dd8a552905729ce006cad3d3dda59b1", size = 61592 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b4/5e/70958b3034c297a630bba2f17ca7abc2d5f39a803ad7e370ab79d1ecd022/propcache-0.5.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:1d1ad32d9d4355e2be65574fd0bfd3677e7066b009cd5b9b2dee8aa6a6393b33", size = 64788 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/12/fd/77fe5936d8c3086ca9048f7f415f122ed82e53884a9ec193646b42deef06/propcache-0.5.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c80f4ba3e8f00189165999a742ee526ebeccedf6c3f7beb0c7df821e9772435a", size = 62514 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cf/74/66bd798b5b3be70aa1b391f5cc9d6a0a5532d7fd3b19ec0b213e72e6ad9d/propcache-0.5.2-cp312-cp312-win32.whl", hash = "sha256:8c7972d8f193740d9175f0998ab38717e6cd322d5935c5b0fef8c0d323fd9031", size = 39018 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/61/7c/5c0d34aa3024694d6dcb9271cdbdd08c4e47c1c0ad95ec7e7bc74cdea145/propcache-0.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:d9ee8826a7d47863a08ac44e1a5f611a462eefc3a194b492da242128bec75b42", size = 42322 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4d/91/875812f1a3feb20ceba818ef39fbe4d92f1081e04ac815c822496d0d038b/propcache-0.5.2-cp312-cp312-win_arm64.whl", hash = "sha256:2800a4a8ead6b28cccd1ec54b59346f0def7922ee1c7598e8499c733cfbb7c84", size = 38172 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c5/09/f049e45385503fe67db75a6b6186a7b9f0c3930366dc960522c312a825b1/propcache-0.5.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:099aaf4b4d1a02265b92a977edf00b5c4f63b3b17ac6de39b0d637c9cac0188a", size = 94457 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6b/65/83d1d05655baf63113731bd5a1008435e14f8d1e5a06cbe4ec5b23ad7a31/propcache-0.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:68ce1c44c7a813a7f71ea04315a8c7b330b63db99d059a797a4651bb6f69f117", size = 53835 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a9/12/a6ba6482bb5ea3260c000c9b20881c95fa11c6b30173715668259f844ed7/propcache-0.5.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:fc299c129490f55f254cd90be0deca4764e36e9a7c08b4aa588479a3bbed3098", size = 54545 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a9/19/7fa086f5764c59ec8a8e157cd93aa8497acc00aba9dcdec56bfffb32602d/propcache-0.5.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a6ae2198be502c10f09b2516e7b5d019816924bc3183a43ce792a7bd6625e6f4", size = 59886 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a1/e4/5d7663dc8235956c8f5281698a3af1d351d8820341ddd890f59d9a9127f2/propcache-0.5.2-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6041d31504dc1779d700e1edcfb08eea334b357620b06681a4eabb57a74e574e", size = 63261 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4a/4a/15a03adee24d6350da4292caeac44c34c033d2afe5e87eb370f38854560f/propcache-0.5.2-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f7eabc04151c78a9f4d5bbb5f1faf571e4defeb4b585e0fe95b60ff2dbe4d3d7", size = 64184 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8b/c6/979176efdaa3d239e36d503d5af63a0a773b36662ed8f52e5b6a6d9fd40e/propcache-0.5.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4db0ba63d693afd40d249bd93f842b5f144f8fcbb83de05660373bcf30517b1d", size = 61534 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c8/22/63e8cd1bae4c2d2be6493b6b7d10566ddafad88137cfbc99964a1119853c/propcache-0.5.2-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:1dbcf7675229b35d31abb6547d8ebc8c27a830ac3f9a794edff6254873ec7c0a", size = 61500 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/60/5a/28e5d9acbac1cc9ccb67045e8c1b943aa8d79fdf39c93bd73cacd68008ea/propcache-0.5.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d310c013aad2c72f1c3f2f8dd3279d460a858c551f97aeb8c63e4693cca7b4d2", size = 59994 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f3/40/db650677f554a95b9c01a7c9d93d629e93a15562f5deb4573c9ee136fed2/propcache-0.5.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:06187263ddad280d05b4d8a8b3bb7d164cbebd469236544a42e6d9b28ac6a4fa", size = 56884 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/80/45/70b39b89516ff8b96bf732fa6fded8cef20f293cb1508690101c3c07ec51/propcache-0.5.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3115559b8effafd63b142ea5ed53d63a16ea6469cbc63dce4ee194b42db5d853", size = 63464 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f9/e2/fa59d3a89eac5534293124af4f1d0d0ada091ce4a0ab4610ce03fd2bdd8d/propcache-0.5.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c60462af8e6dc30c35407c7237ea908d777b22862bbee27bc4699c0d8bcdc45a", size = 61588 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0b/97/efb547a55c4bc7381cfb202d6a2239ac621045277bc1ea5dfd3a7f0516c0/propcache-0.5.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:40314bca9ac559716fe374094fc81c11dcc34b64fd6c585360f5775690505704", size = 64667 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/92/56/f5c7d9b4b7595d5127da38974d791b2153f3d1eae6c674af3583ace92ad3/propcache-0.5.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:cfa21e036ce1e1db2be04ba3b85d2df1bb1702fa01932d984c5464c665228ff4", size = 62463 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bd/3b/484a3a65fc9f9f60c41dcd17b428bace5389544e2c680994534a20755066/propcache-0.5.2-cp313-cp313-win32.whl", hash = "sha256:f156a3529f38063b6dbaf356e15602a7f95f8055b1295a438433a6386f10463d", size = 38621 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1c/fd/3f0f10dba4dabad3bf53102be007abf55481067952bde0fdddff439e7c61/propcache-0.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:dfed59d0a5aeb01e242e66ff0300bc4a265a7c05f612d30016f0b60b1017d757", size = 41649 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/90/ec/6ce619cc32bb500a482f811f9cd509368b4e58e638d13f2c68f370d6b475/propcache-0.5.2-cp313-cp313-win_arm64.whl", hash = "sha256:ba338430e87ceb9c8f0cf754de38a9860560261e56c00376debd628698a7364f", size = 37636 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1b/82/c1d268bbbf2ef981c5bf0fbbe746db617c66e3bcefe431a1aa8943fbe23a/propcache-0.5.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:a592f5f3da71c8691c788c13cb6734b6d17663d2e1cb8caddf0673d01ef8847d", size = 98872 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f4/d4/52c871e73e864e6b34c0e2d58ac1ec5ccd149497ddc7ad2137ae98323a35/propcache-0.5.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:6a997d0489e9668a384fcfd5061b857aa5361de73191cac204d04b889cfbbafa", size = 56257 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/67/f0/9b90ca2a210b3d09bcfcd96ecd0f55545c091535abce2a45de2775cfd357/propcache-0.5.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:10734b5484ea113152ee25a91dccedf81631791805d2c9ccb054958e51842c94", size = 56696 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9d/0e/6e9d4ba07c8e56e21ddec1e75f12148142b21ca83a51871babce095334f4/propcache-0.5.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cafca7e56c12bb02ae16d283742bef25a61122e9dab2b5b3f2ccbe589ce32164", size = 62378 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/65/19/c10badaa463dde8a27ce884f8ee2ec37e6035b7c9f5ff0c8f74f06f08dac/propcache-0.5.2-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f064f8d2b59177878b7615df1735cd8fe3462ed6be8c7b217d17a276489c2b7f", size = 65283 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b0/b6/93bea99ca80e19cef6512a8580e5b7857bbe09422d9daa7fd4ef5723306c/propcache-0.5.2-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f78abfa8dfc32376fd1aacf597b2f2fbbe0ea751419aee718af5d4f82537ef8c", size = 66616 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/83/e4/5c7462e50625f051f37fb38b8224f7639f667184bbd34424ec83819bb1b7/propcache-0.5.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f7467da8a9822bf1a55336f877340c5bcbd3c482afc43a99771169f74a26dedc", size = 63773 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ca/b6/99238894047b13c823be25027e736626cd414a52a5e30d2c3347c2733529/propcache-0.5.2-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a6ddc6ac9e25de626c1f129c1b467d7ecd33ce2237d3fd0c4e429feef0a7ee1f", size = 63664 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/85/1e/a3a1a63116a2b8edb415a8bb9a6f0c34bd03830b1e18e8ce2904e1dc1cf4/propcache-0.5.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2f22cbbac9e26a8e864c0985ff1268d5d939d53d9d9411a9824279097e03a2cb", size = 62643 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e4/03/893cf147de2fc6543c5eaa07ad833170e7e2a2385725bbebe8c0503723bb/propcache-0.5.2-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:fc76378c62a0f04d0cd82fbb1a2cd2d7e28fcb40d5873f28a6c44e388aaa2751", size = 59595 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/86/3b/04c1a2e12c57766568ba75ba72b3bf2042818d4c1425fab6fc07155c7cff/propcache-0.5.2-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:acd2c8edba48e31e58a363b8cf4e5c7db3b04b3f9e371f601df30d9b0d244836", size = 65711 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1c/34/80f8d0099f8d6bacc4de1624c85672681c8cd1149ca2da0e38fd120b817f/propcache-0.5.2-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:452b5065457eb9991ec5eb38ff41d6cd4c991c9ac7c531c4d5849ae473a9a13f", size = 64247 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f3/1a/8b08f3a5f1037e9e370c55883ceeeee0f6dd0416fb2d2d67b8bfc91f2a79/propcache-0.5.2-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:3430bb2bfe1331885c427745a751e774ee679fd4344f80b97bf879815fe8fa55", size = 67102 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/34/68/8bdb7bb7756d76e005490649d10e4a8369e610c74d619f71e1aedf889e9c/propcache-0.5.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:cef6cea3922890dd6c9654971001fa797b526c16ab5e1e46c05fd6f877be7568", size = 64964 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0a/aa/50fb0b5d3968b61a510926ff8b8465f1d6e976b3ab74496d7a4b9fc42515/propcache-0.5.2-cp313-cp313t-win32.whl", hash = "sha256:72d61e16dd78228b58c5d47be830ff3da7e5f139abdf0aef9d86cde1c5cf2191", size = 42546 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ae/4c/0ddbae64321bd4a95bcbfc19307238016b5b1fee645c84626c8d539e5b74/propcache-0.5.2-cp313-cp313t-win_amd64.whl", hash = "sha256:0958834041a0166d343b8d2cedcd8bcbaeb4fdbe0cf08320c5379f143c3be6e7", size = 46330 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/00/d9/9cddc8efb78d8af264c5ec9f6d10b62f57c515feda8d321595f56010fb23/propcache-0.5.2-cp313-cp313t-win_arm64.whl", hash = "sha256:6de8bd93ddde9b992cf2b2e0d796d501a19026b5b9fd87356d7d0779531a8d96", size = 40521 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e2/ea/23ee535d90ce8bcc465a3028eb3cc0ce3bd1005f4bb27710b30587de798d/propcache-0.5.2-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:46088abff4cba581dea21ae0467a480526cb25aa5f3c269e909f800328bc3999", size = 94662 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b5/06/c5a52f419b5d8972f8d46a7577476090d8e3263ff589ce40b5ca4968d5be/propcache-0.5.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:fc88b26f08d634f7bc819a7852e5214f5802641ab8d9fd5326892292eee1993e", size = 53928 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/63/b1/4260d67d6bd85e58a66b72d54ce15d5de789b6f3870cc6bedf8ff9667401/propcache-0.5.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:97797ebb098e670a2f92dd66f32897e30d7615b14e7f59711de23e30a9072539", size = 54650 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/70/06/2f46c318e3307cd7a6a7481def374ce838c0fe20084b39dd54b0879d0e99/propcache-0.5.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ba57fffe4ac99c5d30076161b5866336d97600769bad35cc68f7774b15298a4e", size = 59912 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4c/29/fe1aebec2ce57ab985a9c382bded1124431f85078113aa222c5d278430d4/propcache-0.5.2-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:583c19759d9eec1e5b69e2fbef36a7d9c326041be9746cb822d335c8cedc2979", size = 63300 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b4/18/2334b26768b6c82be8c69e83671b767d5ef426aa09b0cba6c2ea47816774/propcache-0.5.2-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d0326e2e5e1f3163fa306c834e48e8d490e5fae607a097a40c0648109b47ba80", size = 64208 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2b/76/7f1bfd6afff4c5e38e36a3c6d68eb5f4b7311ea80baf693db78d95b603c4/propcache-0.5.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e00820e192c8dbebcafb383ebbf99030895f09905e7a0eb2e0340a0bcc2bc825", size = 61633 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c4/46/b3ff8aba2b4953a3e50de2cf72f1b5748b8eca93b15f3dc2c84339084c09/propcache-0.5.2-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c66afea89b1e43725731d2004732a046fe6fe955d51f952c3e95a7314a284a39", size = 61724 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c5/01/814cfcafbcff954f94c01cf30e097ddc88a076b5440fbcf4570753437d40/propcache-0.5.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d4dc37dec6c6cdad0b57881a5658fd14fbf53e333b1a86cf86559f190e1d9ec4", size = 60069 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/da/68/5c6f7622d510cc666a300687e06fd060c1a43361c0c9b20d284f06d8096a/propcache-0.5.2-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:5570dbcc97571c15f68068e529c92715a12f8d54030e272d264b377e22bd17a5", size = 57099 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/55/27/9cb0b4c679124085327957d42521c99dba04c88c90c3e55a6f0b633ebccc/propcache-0.5.2-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:f814362777a9f841adddb200ecdf8f5cb1e5a3c4b7a86378edbd6ccb26edd702", size = 63391 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f0/9d/7258aaa5bdf60fc6f27591eef6fe52768cb0beda7140be477c8b12c9794a/propcache-0.5.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:196913dea116aeb5a2ba95af4ddcb7ea85559ae07d8eee8751688310d09168c3", size = 61626 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8e/0d/41c602003e8a9b16fe1e7eadf62c7bfba9d5474370b24200bf48b315f45f/propcache-0.5.2-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:6e7b8719005dd1175be4ab1cd25e9b98659a5e0347331506ec6760d2773a7fb5", size = 64781 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8b/f3/38e66b1856e9bd079deea015bc4a55f7767c0e4db2f7dcf69e7e680ba4ce/propcache-0.5.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:51f96d685ab16e88cab128cd37a52c5da540809c8b879fa047731bfcb4ad35a4", size = 62570 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/95/ca/bbfe9b910ce57dde8bb4876b4520fc02a4e89497c10de26be936758a3aaa/propcache-0.5.2-cp314-cp314-win32.whl", hash = "sha256:cc6fc3cc62e8501d3ed62894425040d2728ecddb1ed072737a5c70bd537aa9f0", size = 39436 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/61/d2/45c9defbaa1ea297035d9d4cce9e8f80daafbf19319c6007f157c6256ea9/propcache-0.5.2-cp314-cp314-win_amd64.whl", hash = "sha256:81e3a30b0bb60caa22033dd0f8a3618d1d67356212514f62c57db75cb0ef410c", size = 42373 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/44/68/9ea5103f41d5217d7d6ec24db90018e23aebec070c3f9a6e54d12b841fd8/propcache-0.5.2-cp314-cp314-win_arm64.whl", hash = "sha256:0d2c9bf8528f135dbb805ce027567e09164f7efa51a2be07458a2c0420f292d0", size = 38554 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8a/81/fadf555f42d3b762eea8a53950b0489fdc0aa9da5f8ed9e10ce0a4e01b48/propcache-0.5.2-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:4bc8ff1feffc6a61c7002ffe84634c41b822e104990ae009f44a0834430070bb", size = 99395 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f5/c9/c61e134a686949cf7971af3a390148b1156f7be81c73bc0cd12c873e2d48/propcache-0.5.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:79aa3ff0a9b566633b642fa9caf7e21ed1c13d6feca718187873f199e1514078", size = 56653 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cb/73/daf935ea7048ddd7ec8eec5345b4a40b619d2d178b3c0a0900796bc3c794/propcache-0.5.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1b31822f4474c4036bae62de9402710051d431a606d6a0f907fec79935a071aa", size = 56914 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/79/9f/aba959b435ea18617edd7cf0a7ad0b9c574b8fc7e3d2cd55fb59cb255d33/propcache-0.5.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:13fef48778b5a2a756523fdb781326b028ca75e32858b04f2cdd19f394564917", size = 62567 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6c/a1/859942de9a791ff42f6141736f5b37749b8f53e65edfa49638c67dd67e6a/propcache-0.5.2-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8b73ab70f1a3351fbc71f663b3e645af6dd0329100c353081cf69c37433fc6fe", size = 65542 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b5/61/315bc0fd6c0fc7f80a528b8afd209e5fc4a875ea79571b91b8f50f442907/propcache-0.5.2-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5538d2c13d93e4698af7e092b57bc7298fd35d1d58e656ae18f23ee0d0378e03", size = 66845 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/47/f7/9f8122e3132e8e354ac41975ef8f1099be7d5a16bc7ae562734e993665c0/propcache-0.5.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cd645f03898405cabe694fb8bc35241e3a9c332ec85627584fe3de201452b335", size = 63985 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c8/54/c317819ec157cbf6f35df9df9657a6f82daf34d5faf15948b2f639c2192e/propcache-0.5.2-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a473b3440261e0c60706e732b2ed2f517857344fc21bf48fdfe211e2d98eb285", size = 63999 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5a/56/387e3f7dfce0a9233df41fb888aa1c30222cb4bbbf09537c02dd9bd85fe2/propcache-0.5.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7afa37062e6650640e932e4cc9297d81f9f42d9944029cc386b8247dea4da837", size = 62779 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a1/9c/596784cb5824ed61ee960d3f8655a3f0993e107c6e98ab6c818b7fb92ccb/propcache-0.5.2-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:8a90efd5777e996e42d568db9ac740b944d691e565cbfd31b2f7832f9184b2b8", size = 59796 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c2/3d/1a6cfa1726a48542c1e8784a0761421476a5b68e09b7f36bf95eb954aaba/propcache-0.5.2-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:f19bb891234d72535764d703bfed1153cc34f4214d5bd7150aee1eec9e8f4366", size = 66023 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e4/0e/05fd6990369477076e4e280bcb970de760fddf0161a46e988bc95f7940ec/propcache-0.5.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:32775082acd2d807ee3db715c7770d38767b817870acfa08c29e057f3c4d5b56", size = 64448 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cd/86/5f8da315a4309c62c10c0b2516b17492d5d3bbe1bb862b96604db67e2a37/propcache-0.5.2-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:9282fb1a3bccd038da9f768b927b24a0c753e466c086b7c4f3c6982851eefb2d", size = 67329 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/da/d3/3368efe79ab21f0cdf86ef49895811c9cc933131d4cde1f28a624e22e712/propcache-0.5.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:cc49723e2f60d6b32a0f0b08a3fd6d13203c07f1cd9566cfce0f12a917c967a2", size = 65172 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d5/07/127e8b0bacfb325396196f9d976a22453049b89b9b2b08477cc3145faa44/propcache-0.5.2-cp314-cp314t-win32.whl", hash = "sha256:2d7aa89ebca5acc98cba9d1472d976e394782f587bad6661003602a619fd1821", size = 43813 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/88/fb/46dad6c0ae49ed230ab1b16c890c2b6314e2403e6c412976f4a72d64a527/propcache-0.5.2-cp314-cp314t-win_amd64.whl", hash = "sha256:d447bb0b3054be5818458fbb171208b1d9ff11eba14e18ca18b90cbb45767370", size = 47764 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e7/c4/a47d0a63aa309d10d59ede6e9d4cff03a344a79d1f0f4cd0cd74997b53e0/propcache-0.5.2-cp314-cp314t-win_arm64.whl", hash = "sha256:fe67a3d11cd9b4efabfa45c3d00ffba2b26811442a73a581a94b67c2b5faccf6", size = 41140 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3a/ed/1cdcab6ba3d6ab7feca11fc14f0eeea80755bb53ef4e892079f31b10a25f/propcache-0.5.2-py3-none-any.whl", hash = "sha256:be1ddfcbb376e3de5d2e2db1d58d6d67463e6b4f9f040c000de8e300295465fe", size = 14036 }, -] - -[[package]] -name = "protobuf" -version = "7.35.1" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/da/01/9ef0afd7999eb9badb3a768b4aedd78c86d4c65cfaf1958ab276199e76b4/protobuf-7.35.1.tar.gz", hash = "sha256:ce115a26fe0c39a2c29973d914d327e516a6455464489fe3cd1e51a1b354f81a", size = 458717 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/10/03/8aeeb7458d22546bf64b5250ca1daeb5ff757d900e8e4a7476c6f0db843e/protobuf-7.35.1-cp310-abi3-macosx_10_9_universal2.whl", hash = "sha256:24f857477359a85c0c235261b8ba905fd51b2562f4a64ca1df5473f29850cbf6", size = 433226 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/37/4b/dfb89eb0e652a1ff073c39a59fb5e3a83cfe9b57a2c83fa6d78270101767/protobuf-7.35.1-cp310-abi3-manylinux2014_aarch64.whl", hash = "sha256:11d6b0ec246892d85215b0a13ca6e0233cf5284b68f0ac02646427f4ff88a799", size = 328847 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0f/58/dc12f2cd484951524af6e3382c785869b9b3fb5e52ee95ae23add53ee8f9/protobuf-7.35.1-cp310-abi3-manylinux2014_s390x.whl", hash = "sha256:b73f9489a4b8b1c9cb1f8ed951c736392592edb24b9d6819f36d2e10b171d5b4", size = 344030 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e4/be/5b3cfe508bfab6761414ff944e3366eb13be4fd71efcd69450f89ba39f43/protobuf-7.35.1-cp310-abi3-manylinux2014_x86_64.whl", hash = "sha256:74758715c53d7158fb76caf4f0cfdacc5329a4b1bb994f865d6cf302d413a1c4", size = 327130 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d8/bc/6d6c7ba8709c85f8f2c390b2b118d6fb08a783676a572271851bf45a7d22/protobuf-7.35.1-cp310-abi3-win32.whl", hash = "sha256:353652e4efd0bca5b5fc2656abf8307ef351f0cf938c9eba09f0e09c20a25c30", size = 428945 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0a/19/8d0cb6f20a1ef7b18f1c8986ad5783f22f84cce39c6ce9a6e645ea55192e/protobuf-7.35.1-cp310-abi3-win_amd64.whl", hash = "sha256:230a75ddfc2de4806e56696ce9640c1cdfdb6543b7cfce98d42a4c0a0e7bdb87", size = 439996 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/19/c7/5f7c636ec43e0c545e28d1f1db71990108306f7bdcb89f069ba97e428e7f/protobuf-7.35.1-py3-none-any.whl", hash = "sha256:4bc97768d8fe4ad6743c8a19403e314511ed9f6d13205b687e52421c023ac1b9", size = 171659 }, -] - -[[package]] -name = "pyarrow" -version = "24.0.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/91/13/13e1069b351bdc3881266e11147ffccf687505dbb0ea74036237f5d454a5/pyarrow-24.0.0.tar.gz", hash = "sha256:85fe721a14dd823aca09127acbb06c3ca723efbd436c004f16bca601b04dcc83", size = 1180261 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/62/c9/a47ab7ece0d86cbe6678418a0fbd1ac4bb493b9184a3891dfa0e7f287ae0/pyarrow-24.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:b0e131f880cda8d04e076cee175a46fc0e8bc8b65c99c6c09dff6669335fde74", size = 35068898 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d1/bc/8db86617a9a58008acf8913d6fed68ea2a46acb6de928db28d724c891a68/pyarrow-24.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:1b2fe7f9a5566401a0ef2571f197eb92358925c1f0c8dba305d6e43ea0871bb3", size = 36679915 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/eb/8e/fb178720400ef69db251eb4a9c3ccf4af269bc1feb5055529b8fc87170d1/pyarrow-24.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:0b3537c00fb8d384f15ac1e79b6eb6db04a16514c8c1d22e59a9b95c8ba42868", size = 45697931 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f3/27/99c42abe8e21b44f4917f62631f3aa31404882a2c41d8a4cd5c110e13d52/pyarrow-24.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:14e31a3c9e35f1ab6356c6378f6f72830e6d2d5f1791df3774a7b097d18a6a1e", size = 48837449 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/36/b6/333749e2666e9032891125bf9c691146e92901bece62030ac1430e2e7c88/pyarrow-24.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b7d9a514e73bc42711e6a35aaccf3587c520024fe0a25d830a1a8a27c15f4f57", size = 49395949 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/17/25/c5201706a2dd374e8ba6ee3fd7a8c89fb7ffc16eed5217a91fd2bd7f7626/pyarrow-24.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b196eb3f931862af3fa84c2a253514d859c08e0d8fe020e07be12e75a5a9780c", size = 51912986 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f8/d2/4d1bbba65320b21a49678d6fbdc6ff7c649251359fdcfc03568c4136231d/pyarrow-24.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:35405aecb474e683fb36af650618fd5340ee5471fc65a21b36076a18bbc6c981", size = 27255371 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b4/a9/9686d9f07837f91f775e8932659192e02c74f9d8920524b480b85212cc68/pyarrow-24.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:6233c9ed9ab9d1db47de57d9753256d9dcffbf42db341576099f0fd9f6bf4810", size = 34981559 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/80/b6/0ddf0e9b6ead3474ab087ae598c76b031fc45532bf6a63f3a553440fb258/pyarrow-24.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:f7616236ec1bc2b15bfdec22a71ab38851c86f8f05ff64f379e1278cf20c634a", size = 36663654 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7c/3b/926382efe8ce27ba729071d3566ade6dfb86bdf112f366000196b2f5780a/pyarrow-24.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:1617043b99bd33e5318ae18eb2919af09c71322ef1ca46566cdafc6e6712fb66", size = 45679394 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b3/7a/829f7d9dfd37c207206081d6dad474d81dde29952401f07f2ba507814818/pyarrow-24.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:6165461f55ef6314f026de6638d661188e3455d3ec49834556a0ebbdbace18bb", size = 48863122 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5f/e8/f88ce625fe8babaae64e8db2d417c7653adb3019b08aae85c5ed787dc816/pyarrow-24.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3b13dedfe76a0ad2d1d859b0811b53827a4e9d93a0bcb05cf59333ab4980cc7e", size = 49376032 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/36/7a/82c363caa145fff88fb475da50d3bf52bb024f61917be5424c3392eaf878/pyarrow-24.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:25ea65d868eb04015cd18e6df2fbe98f07e5bda2abefabcb88fce39a947716f6", size = 51929490 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/66/1c/e3e72c8014ad2743ca64a701652c733cc5cbcee15c0463a32a8c55518d9e/pyarrow-24.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:295f0a7f2e242dabd513737cf076007dc5b2d59237e3eca37b05c0c6446f3826", size = 27355660 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6f/d3/a1abf004482026ddc17f4503db227787fa3cfe41ec5091ff20e4fea55e57/pyarrow-24.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:02b001b3ed4723caa44f6cd1af2d5c86aa2cf9971dacc2ffa55b21237713dfba", size = 34976759 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4f/4a/34f0a36d28a2dd32225301b79daad44e243dc1a2bb77d43b60749be255c4/pyarrow-24.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:04920d6a71aabd08a0417709efce97d45ea8e6fb733d9ca9ecffb13c67839f68", size = 36658471 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1f/78/543b94712ae8bb1a6023bcc1acf1a740fbff8286747c289cd9468fced2a5/pyarrow-24.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:a964266397740257f16f7bb2e4f08a0c81454004beab8ff59dd531b73610e9f2", size = 45675981 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/84/9f/8fb7c222b100d314137fa40ec050de56cd8c6d957d1cfff685ce72f15b17/pyarrow-24.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:6f066b179d68c413374294bc1735f68475457c933258df594443bb9d88ddc2a0", size = 48859172 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a7/d3/1ea72538e6c8b3b475ed78d1049a2c518e655761ea50fe1171fc855fcab7/pyarrow-24.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1183baeb14c5f587b1ec52831e665718ce632caab84b7cd6b85fd44f96114495", size = 49385733 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c3/be/c3d8b06a1ba35f2260f8e1f771abbee7d5e345c0937aab90675706b1690a/pyarrow-24.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:806f24b4085453c197a5078218d1ee08783ebbba271badd153d1ae22a3ee804f", size = 51934335 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9c/62/89e07a1e7329d2cde3e3c6994ba0839a24977a2beda8be6005ea3d860b99/pyarrow-24.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:e4505fc6583f7b05ab854934896bcac8253b04ac1171a77dfb73efef92076d91", size = 27271748 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/17/1a/cff3a59f80b5b1658549d46611b67163f65e0664431c076ad728bf9d5af4/pyarrow-24.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:1a4e45017efbf115032e4475ee876d525e0e36c742214fbe405332480ecd6275", size = 35238554 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a8/99/cce0f42a327bfef2c420fb6078a3eb834826e5d6697bf3009fe11d2ad051/pyarrow-24.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:7986f1fa71cee060ad00758bcc79d3a93bab8559bf978fab9e53472a2e25a17b", size = 36782301 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2a/66/8e560d5ff6793ca29aca213c53eec0dd482dd46cb93b2819e5aab52e4252/pyarrow-24.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:d3e0b61e8efb24ed38898e5cdc5fffa9124be480008d401a1f8071500494ae42", size = 45721929 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/27/0c/a26e25505d030716e078d9f16eb74973cbf0b33b672884e9f9da1c83b871/pyarrow-24.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:55a3bc1e3df3b5567b7d27ef551b2283f0c68a5e86f1cd56abc569da4f31335b", size = 48825365 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5f/eb/771f9ecb0c65e73fe9dccdd1717901b9594f08c4515d000c7c62df573811/pyarrow-24.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:641f795b361874ac9da5294f8f443dfdbee355cf2bd9e3b8d97aaac2306b9b37", size = 49451819 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/48/da/61ae89a88732f5a785646f3ec6125dbb640fa98a540eb2b9889caa561403/pyarrow-24.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8adc8e6ce5fccf5dc707046ae4914fd537def529709cc0d285d37a7f9cd442ca", size = 51909252 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cb/1a/8dd5cafab7b66573fa91c03d06d213356ad4edd71813aa75e08ce2b3a844/pyarrow-24.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:9b18371ad2f44044b81a8d23bc2d8a9b6a6226dca775e8e16cfee640473d6c5d", size = 27388127 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ad/80/d022a34ff05d2cbedd8ccf841fc1f532ecfa9eb5ed1711b56d0e0ea71fc9/pyarrow-24.0.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:1cc9057f0319e26333b357e17f3c2c022f1a83739b48a88b25bfd5fa2dc18838", size = 35007997 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1a/ff/f01485fda6f4e5d441afb8dd5e7681e4db18826c1e271852f5d3957d6a80/pyarrow-24.0.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:e6f1278ee4785b6db21229374a1c9e54ec7c549de5d1efc9630b6207de7e170b", size = 36678720 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9e/c2/2d2d5fea814237923f71b36495211f20b43a1576f9a4d6da7e751a64ec6f/pyarrow-24.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:adbbedc55506cbdabb830890444fb856bfb0060c46c6f8026c6c2f2cf86ae795", size = 45741852 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8e/3a/28ba9c1c1ebdbb5f1b94dfebb46f207e52e6a554b7fe4132540fde29a3a0/pyarrow-24.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:ae8a1145af31d903fa9bb166824d7abe9b4681a000b0159c9fb99c11bc11ad26", size = 48889852 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/df/51/4a389acfd31dca009f8fb82d7f510bb4130f2b3a8e18cf00194d0687d8ac/pyarrow-24.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d7027eba1df3b2069e2e8d80f644fa0918b68c46432af3d088ddd390d063ecde", size = 49445207 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/19/4b/0bab2b23d2ae901b1b9a03c0efd4b2d070256f8ce3fc43f6e58c167b2081/pyarrow-24.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e56a1ffe9bf7b727432b89104cc0849c21582949dd7bdcb34f17b2001a351a76", size = 51954117 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/29/88/f4e9145da0417b3d2c12035a8492b35ff4a3dbc653e614fcfb51d9dedb38/pyarrow-24.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:38be1808cdd068605b787e6ca9119b27eb275a0234e50212c3492331680c3b1e", size = 28001155 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/79/4f/46a49a63f43526da895b1a45bbb51d5baf8e4d77159f8528fc3e5490007f/pyarrow-24.0.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:418e48ce50a45a6a6c73c454677203a9c75c966cb1e92ca3370959185f197a05", size = 35250387 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a0/da/d5e0cd5ef00796922404806d5f00325cdadc3441ce2c13fe7115f2df9a64/pyarrow-24.0.0-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:2f16197705a230a78270cdd4ea8a1d57e86b2fdcbc34a1f6aebc72e65c986f9a", size = 36797102 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/34/c7/5904145b0a593a05236c882933d439b5720f0a145381179063722fbfc123/pyarrow-24.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:fb24ac194bfc5e86839d7dcd52092ee31e5fe6733fe11f5e3b06ef0812b20072", size = 45745118 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/13/d3/cca42fe166d1c6e4d5b80e530b7949104d10e17508a90ae202dac205ce2a/pyarrow-24.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:9700ebd9a51f5895ce75ff4ac4b3c47a7d4b42bc618be8e713e5d56bacf5f931", size = 48844765 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b0/49/942c3b79878ba928324d1e17c274ed84581db8c0a749b24bcf4cbdf15bd3/pyarrow-24.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d8ddd2768da81d3ee08cfea9b597f4abb4e8e1dc8ae7e204b608d23a0d3ab699", size = 49471890 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/76/97/ff71431000a75d84135a1ace5ca4ba11726a231a8007bbb320a4c54075d5/pyarrow-24.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:61a3d7eaa97a14768b542f3d284dc6400dd2470d9f080708b13cd46b6ae18136", size = 51932250 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/51/be/6f79d55816d5c22557cf27533543d5d70dfe692adfbee4b99f2760674f38/pyarrow-24.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:c91d00057f23b8d353039520dc3a6c09d8608164c692e9f59a175a42b2ae0c19", size = 28131282 }, -] - -[[package]] -name = "pycparser" -version = "3.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1b/7d/92392ff7815c21062bea51aa7b87d45576f649f16458d78b7cf94b9ab2e6/pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29", size = 103492 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172 }, -] - -[[package]] -name = "pydantic" -version = "2.13.4" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "annotated-types" }, - { name = "pydantic-core" }, - { name = "typing-extensions" }, - { name = "typing-inspection" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/18/a5/b60d21ac674192f8ab0ba4e9fd860690f9b4a6e51ca5df118733b487d8d6/pydantic-2.13.4.tar.gz", hash = "sha256:c40756b57adaa8b1efeeced5c196f3f3b7c435f90e84ea7f443901bec8099ef6", size = 844775 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/7b/122376b1fd3c62c1ed9dc80c931ace4844b3c55407b6fb2d199377c9736f/pydantic-2.13.4-py3-none-any.whl", hash = "sha256:45a282cde31d808236fd7ea9d919b128653c8b38b393d1c4ab335c62924d9aba", size = 472262 }, -] - -[[package]] -name = "pydantic-core" -version = "2.46.4" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "typing-extensions" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9d/56/921726b776ace8d8f5db44c4ef961006580d91dc52b803c489fafd1aa249/pydantic_core-2.46.4.tar.gz", hash = "sha256:62f875393d7f270851f20523dd2e29f082bcc82292d66db2b64ea71f64b6e1c1", size = 471464 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5c/fa/6d7708d2cfc1a832acb6aeb0cd16e801902df8a0f583bb3b4b527fde022e/pydantic_core-2.46.4-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:0e96592440881c74a213e5ad528e2b24d3d4f940de2766bed9010ab1d9e51594", size = 2111872 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ae/6f/aa064a3e74b5745afbdf250594f38e7ead05e2d651bcb35994b9417a0d4d/pydantic_core-2.46.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e0d65b8c354be7fb5f720c3caa8bc940bc2d20ce749c8e06135f07f8ed95dd7c", size = 1948255 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/43/3a/41114a9f7569b84b4d84e7a018c57c56347dac30c0d4a872946ec4e36c46/pydantic_core-2.46.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bfb192b3f4b9e8a89b6277b6ce787564f62cfd272055f6e685726b111dc7826", size = 1972827 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ef/25/1ab42e8048fe551934d9884e8d64daa7e990ad386f310a15981aeb6a5b08/pydantic_core-2.46.4-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9037063db01f09b09e237c282b6792bd4da634b5402c4e7f0c61effed7701a04", size = 2041051 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/94/c2/1a934597ddf08da410385b3b7aae91956a5a76c635effef456074fad7e88/pydantic_core-2.46.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fc010ab034c8c7452522748bf937df58020d256ccae0874463d1f4d01758af8e", size = 2221314 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/02/6d/9e8ad178c9c4df27ad3c8f25d1fe2a7ab0d2ba0559fad4aee5d3d1f16771/pydantic_core-2.46.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8c5dac79fa1614d1e06ca695109c6105923bd9c7d1d6c918d4e637b7e6b32fd3", size = 2285146 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/80/50/540cd3aeefc041beb111125c4bff779831a2111fc6b15a9138cda277d32c/pydantic_core-2.46.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9fa868638bf362d3d138ea55829cefb3d5f4b0d7f142234382a15e2485dbec4", size = 2089685 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6b/a4/b440ad35f05f6a38f89fa0f149accb3f0e02be94ca5e15f3c449a61b4bc9/pydantic_core-2.46.4-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:17299feefe090f2caa5b8e37222bb5f663e4935a8bfa6931d4102e5df1a9f398", size = 2115420 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/99/61/de4f55db8dfd57bfdfa9a12ec90fe1b57c4f41062f7ca86f08586b3e0ac0/pydantic_core-2.46.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4c63ebc82684aa89d9a3bcbd13d515b3be44250dc68dd3bd81526c1cb31286c3", size = 2165122 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f7/52/7c529d7bdb2d1068bd52f51fe32572c8301f9a4febf1948f10639f1436f5/pydantic_core-2.46.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:aaa2a54443eff1950ba5ddc6b6ccda0d9c84a364276a62f969bdf2a390650848", size = 2182573 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/37/b3/7c40325848ba78247f2812dcf9c7274e38cd801820ca6dd9fe63bcfb0eb4/pydantic_core-2.46.4-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:18e5ceec2ab67e6d5f1a9085e5a24c9c4e2ac4545730bfe668680bca05e555f3", size = 2317139 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d9/37/f913f81a657c865b75da6c0dbed79876073c2a43b5bd9edbe8da785e4d49/pydantic_core-2.46.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a0f62d0a58f4e7da165457e995725421e0064f2255d8eccebc49f41bbc23b109", size = 2360433 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c4/67/6acaa1be2567f9256b056d8477158cac7240813956ce86e49deae8e173b4/pydantic_core-2.46.4-cp311-cp311-win32.whl", hash = "sha256:041bde0a48fd37cf71cab1c9d56d3e8625a3793fef1f7dd232b3ff37e978ecda", size = 1985513 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/aa/e6/c505f83dfeda9a2e5c995cfd872949e4d05e12f7feb3dca72f633daefa94/pydantic_core-2.46.4-cp311-cp311-win_amd64.whl", hash = "sha256:6f2eeda33a839975441c86a4119e1383c50b47faf0cbb5176985565c6bb02c33", size = 2071114 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0f/da/7a263a96d965d9d0df5e8de8a475f33495451117035b09acb110288c381f/pydantic_core-2.46.4-cp311-cp311-win_arm64.whl", hash = "sha256:14f4c5d6db102bd796a627bbb3a17b4cf4574b9ae861d8b7c9a9661c6dd3362d", size = 2044298 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ce/8c/af022f0af448d7747c5154288d46b5f2bc5f17366eaa0e23e9aa04d59f3b/pydantic_core-2.46.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:3245406455a5d98187ec35530fd772b1d799b26667980872c8d4614991e2c4a2", size = 2106158 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/19/95/6195171e385007300f0f5574592e467c568becce2d937a0b6804f218bc49/pydantic_core-2.46.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:962ccbab7b642487b1d8b7df90ef677e03134cf1fd8880bf698649b22a69371f", size = 1951724 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8e/bc/f47d1ff9cbb1620e1b5b697eef06010035735f07820180e74178226b27b3/pydantic_core-2.46.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8233f2947cf85404441fd7e0085f53b10c93e0ee78611099b5c7237e36aacbf7", size = 1975742 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5b/11/9b9a5b0306345664a2da6410877af6e8082481b5884b3ddd78d47c6013ce/pydantic_core-2.46.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3a233125ac121aa3ffba9a2b59edfc4a985a76092dc8279586ab4b71390875e7", size = 2052418 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f1/b7/a65fec226f5d78fc39f4a13c4cc0c768c22b113438f60c14adc9d2865038/pydantic_core-2.46.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5b712b53160b79a5850310b912a5ef8e57e56947c8ad690c227f5c9d7e561712", size = 2232274 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/68/f0/92039db98b907ef49269a8271f67db9cb78ae2fc68062ef7e4e77adb5f61/pydantic_core-2.46.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9401557acd873c3a7f3eb9383edef8ac4968f9510e340f4808d427e75667e7b4", size = 2309940 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5f/97/2aab507d3d00ca626e8e57c1eac6a79e4e5fbcc63eb99733ff55d1717f65/pydantic_core-2.46.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:926c9541b14b12b1681dca8a0b75feb510b06c6341b70a8e500c2fdcff837cce", size = 2094516 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/22/37/a8aca44d40d737dde2bc05b3c6c07dff0de07ce6f82e9f3167aeaf4d5dea/pydantic_core-2.46.4-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:56cb4851bcaf3d117eddcef4fe66afd750a50274b0da8e22be256d10e5611987", size = 2136854 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/24/99/fcef1b79238c06a8cbec70819ac722ba76e02bc8ada9b0fd66eba40da01b/pydantic_core-2.46.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c68fcd102d71ea85c5b2dfac3f4f8476eff42a9e078fd5faefff6d145063536b", size = 2180306 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ae/6c/fc44000918855b42779d007ae63b0532794739027b2f417321cddbc44f6a/pydantic_core-2.46.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b2f69dec1725e79a012d920df1707de5caf7ed5e08f3be4435e25803efc47458", size = 2190044 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6b/65/d9cadc9f1920d7a127ad2edba16c1db7916e59719285cd6c94600b0080ba/pydantic_core-2.46.4-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:8d0820e8192167f80d88d64038e609c31452eeca865b4e1d9950a27a4609b00b", size = 2329133 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d0/cf/c873d91679f3a30bcf5e7ac280ce5573483e72295307685120d0d5ad3416/pydantic_core-2.46.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fbdb89b3e1c94a30cc5edfce477c6e6a5dc4d8f84665b455c27582f211a1c72c", size = 2374464 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/47/bd/6f2fc8188f31bf10590f1e98e7b306336161fac930a8c514cd7bd828c7dc/pydantic_core-2.46.4-cp312-cp312-win32.whl", hash = "sha256:9aa768456404a8bf48a4406685ac2bec8e72b62c69313734fa3b73cf33b3a894", size = 1974823 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/40/8c/985c1d41ea1107c2534abd9870e4ed5c8e7669b5c308297835c001e7a1c4/pydantic_core-2.46.4-cp312-cp312-win_amd64.whl", hash = "sha256:e9c26f834c65f5752f3f06cb08cb86a913ceb7274d0db6e267808a708b46bc89", size = 2072919 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c4/ba/f463d006e0c47373ca7ec5e1a261c59dc01ef4d62b2657af925fb0deee3a/pydantic_core-2.46.4-cp312-cp312-win_arm64.whl", hash = "sha256:4fc73cb559bdb54b1134a706a2802a4cddd27a0633f5abb7e53056268751ac6a", size = 2027604 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/51/a2/5d30b469c5267a17b39dec53208222f76a8d351dfac4af661888c5aee77d/pydantic_core-2.46.4-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:5d5902252db0d3cedf8d4a1bc68f70eeb430f7e4c7104c8c476753519b423008", size = 2106306 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c1/81/4fa520eaffa8bd7d1525e644cd6d39e7d60b1592bc5b516693c7340b50f1/pydantic_core-2.46.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c94f0688e7b8d0a67abf40e57a7eaaecd17cc9586706a31b76c031f63df052b4", size = 1951906 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/03/d5/fd02da45b659668b05923b17ba3a0100a0a3d5541e3bd8fcc4ecb711309e/pydantic_core-2.46.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f027324c56cd5406ca49c124b0db10e56c69064fec039acc571c29020cc87c76", size = 1976802 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/21/f2/95727e1368be3d3ed485eaab7adbd7dda408f33f7a36e8b48e0144002b91/pydantic_core-2.46.4-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e739fee756ba1010f8bcccb534252e85a35fe45ae92c295a06059ce58b74ccd3", size = 2052446 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9c/86/5d99feea3f77c7234b8718075b23db11532773c1a0dbd9b9490215dc2eeb/pydantic_core-2.46.4-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9d56801be94b86a9da183e5f3766e6310752b99ff647e38b09a9500d88e46e76", size = 2232757 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d2/3a/508ac615935ef7588cf6d9e9b91309fdc2da751af865e02a9098de88258c/pydantic_core-2.46.4-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2412e734dcb48da14d4e4006b82b46b74f2518b8a26ee7e58c6844a6cd6d03c4", size = 2309275 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/07/f8/41db9de19d7987d6b04715a02b3b40aea467000275d9d758ffaa31af7d50/pydantic_core-2.46.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9551187363ffc0de2a00b2e47c25aeaeb1020b69b668762966df15fc5659dd5a", size = 2094467 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2c/e2/f35033184cb11d0052daf4416e8e10a502ea2ac006fc4f459aee872727d1/pydantic_core-2.46.4-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:0186750b482eefa11d7f435892b09c5c606193ef3375bcf94aa00ae6bfb66262", size = 2134417 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7e/7b/6ceeb1cc90e193862f444ebe373d8fdf613f0a82572dde03fb10734c6c71/pydantic_core-2.46.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5855698a4856556d86e8e6cd8434bc3ac0314ee8e12089ae0e143f64c6256e4e", size = 2179782 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5a/f2/c8d7773ede6af08036423a00ae0ceffce266c3c52a096c435d68c896083f/pydantic_core-2.46.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:cbaf13819775b7f769bf4a1f066cb6df7a28d4480081a589828ef190226881cd", size = 2188782 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/59/31/0c864784e31f09f05cdd87606f08923b9c9e7f6e51dd27f20f62f975ce9f/pydantic_core-2.46.4-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:633147d34cf4550417f12e2b1a0383973bdf5cdfde212cb09e9a581cf10820be", size = 2328334 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c2/eb/4f6c8a41efa30baa755590f4141abf3a8c370fab610915733e74134a7270/pydantic_core-2.46.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:82cf5301172168103724d49a1444d3378cb20cdee30b116a1bd6031236298a5d", size = 2372986 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5b/24/b375a480d53113860c299764bfe9f349a3dc9108b3adc0d7f0d786492ebf/pydantic_core-2.46.4-cp313-cp313-win32.whl", hash = "sha256:9fa8ae11da9e2b3126c6426f147e0fba88d96d65921799bb30c6abd1cb2c97fb", size = 1973693 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7e/e8/cff247591966f2d22ec8c003cd7587e27b7ba7b81ab2fb888e3ab75dc285/pydantic_core-2.46.4-cp313-cp313-win_amd64.whl", hash = "sha256:6b3ace8194b0e5204818c92802dcdca7fc6d88aabbb799d7c795540d9cd6d292", size = 2071819 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c6/1a/f4aee670d5670e9e148e0c82c7db98d780be566c6e6a97ee8035528ca0b3/pydantic_core-2.46.4-cp313-cp313-win_arm64.whl", hash = "sha256:184c081504d17f1c1066e430e117142b2c77d9448a97f7b65c6ac9fd9aee238d", size = 2027411 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8d/74/228a26ddad29c6672b805d9fd78e8d251cd04004fa7eed0e622096cd0250/pydantic_core-2.46.4-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:428e04521a40150c85216fc8b85e8d39fece235a9cf5e383761238c7fa9b96fb", size = 2102079 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ad/1f/8970b150a4b4365623ae00fc88603491f763c627311ae8031e3111356d6e/pydantic_core-2.46.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:23ace664830ee0bfe014a0c7bc248b1f7f25ed7ad103852c317624a1083af462", size = 1952179 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/95/30/5211a831ae054928054b2f79731661087a2bc5c01e825c672b3a4a8f1b3e/pydantic_core-2.46.4-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce5c1d2a8b27468f433ca974829c44060b8097eedc39933e3c206a90ee49c4a9", size = 1978926 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/57/e9/689668733b1eb67adeef047db3c2e8788fcf65a7fd9c9e2b46b7744fe245/pydantic_core-2.46.4-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7283d57845ecf5a163403eb0702dfc220cc4fbdd18919cb5ccea4f95ee1cdab4", size = 2046785 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/60/d9/6715260422ff50a2109878fd24d948a6c3446bb2664f34ee78cd972b3acd/pydantic_core-2.46.4-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8daafc69c93ee8a0204506a3b6b30f586ef54028f52aeeeb5c4cfc5184fd5914", size = 2228733 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/18/ae/fdb2f64316afca925640f8e70bb1a564b0ec2721c1389e25b8eb4bf9a299/pydantic_core-2.46.4-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd2213145bcc2ba85884d0ac63d222fece9209678f77b9b4d76f054c561adb28", size = 2307534 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/89/1d/8eff589b45bb8190a9d12c49cfad0f176a5cbd1534908a6b5125e2886239/pydantic_core-2.46.4-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a5f930472650a82629163023e630d160863fce524c616f4e5186e5de9d9a49b", size = 2099732 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/06/d5/ee5a3366637fee41dee51a1fc91562dcf12ddbc68fda34e6b253da2324bb/pydantic_core-2.46.4-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:c1b3f518abeca3aa13c712fd202306e145abf59a18b094a6bafb2d2bbf59192c", size = 2129627 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/94/33/2414be571d2c6a6c4d08be21f9292b6d3fdb08949a97b6dfe985017821db/pydantic_core-2.46.4-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1a7dd0b3ee80d90150e3495a3a13ac34dbcbfd4f012996a6a1d8900e91b5c0fb", size = 2179141 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7b/79/7daa95be995be0eecc4cf75064cb33f9bbbfe3fe0158caf2f0d4a996a5c7/pydantic_core-2.46.4-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:3fb702cd90b0446a3a1c5e470bfa0dd23c0233b676a9099ddcc964fa6ca13898", size = 2184325 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9f/cb/d0a382f5c0de8a222dc61c65348e0ce831b1f68e0a018450d31c2cace3a5/pydantic_core-2.46.4-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:b8458003118a712e66286df6a707db01c52c0f52f7db8e4a38f0da1d3b94fc4e", size = 2323990 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/05/db/d9ba624cc4a5aced1598e88c04fdbd8310c8a69b9d38b9a3d39ce3a61ed7/pydantic_core-2.46.4-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:372429a130e469c9cd698925ce5fc50940b7a1336b0d82038e63d5bbc4edc519", size = 2369978 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f2/20/d15df15ba918c423461905802bfd2981c3af0bfa0e40d05e13edbfa48bc3/pydantic_core-2.46.4-cp314-cp314-win32.whl", hash = "sha256:85bb3611ff1802f3ee7fdd7dbff26b56f343fb432d57a4728fdd49b6ef35e2f4", size = 1966354 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fc/b6/6b8de4c0a7d7ab3004c439c80c5c1e0a3e8d78bbae19379b01960383d9e5/pydantic_core-2.46.4-cp314-cp314-win_amd64.whl", hash = "sha256:811ff8e9c313ab425368bcbb36e5c4ebd7108c2bbf4e4089cfbb0b01eff63fac", size = 2072238 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/32/36/51eb763beec1f4cf59b1db243a7dcc39cbb41230f050a09b9d69faaf0a48/pydantic_core-2.46.4-cp314-cp314-win_arm64.whl", hash = "sha256:bfec22eab3c8cc2ceec0248aec886624116dc079afa027ecc8ad4a7e62010f8a", size = 2018251 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e8/91/855af51d625b23aa987116a19e231d2aaef9c4a415273ddc189b79a45fee/pydantic_core-2.46.4-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:af8244b2bef6aaad6d92cda81372de7f8c8d36c9f0c3ea36e827c60e7d9467a0", size = 2099593 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fb/1b/8784a54c65edb5f49f0a14d6977cf1b209bba85a4c77445b255c2de58ab3/pydantic_core-2.46.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5a4330cdbc57162e4b3aa303f588ba752257694c9c9be3e7ebb11b4aca659b5d", size = 1935226 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e8/e7/1955d28d1afc56dd4b3ad7cc0cf39df1b9852964cf16e5d13912756d6d6b/pydantic_core-2.46.4-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29c61fc04a3d840155ff08e475a04809278972fe6aef51e2720554e96367e34b", size = 1974605 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/93/e2/3fedbf0ba7a22850e6e9fd78117f1c0f10f950182344d8a6c535d468fdd8/pydantic_core-2.46.4-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c50f2528cf200c5eed56faf3f4e22fcd5f38c157a8b78576e6ba3168ec35f000", size = 2030777 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f8/61/46be275fcaaba0b4f5b9669dd852267ce1ff616592dccf7a7845588df091/pydantic_core-2.46.4-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0cbe8b01f948de4286c74cdd6c667aceb38f5c1e26f0693b3983d9d74887c65e", size = 2236641 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/60/db/12e93e46a8bac9988be3c016860f83293daea8c716c029c9ace279036f2f/pydantic_core-2.46.4-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:617d7e2ca7dcb8c5cf6bcb8c59b8832c94b36196bbf1cbd1bfb56ed341905edd", size = 2286404 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e2/4a/4d8b19008f38d31c53b8219cfedc2e3d5de5fe99d90076b7e767de29274f/pydantic_core-2.46.4-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7027560ee92211647d0d34e3f7cd6f50da56399d26a9c8ad0da286d3869a53f3", size = 2109219 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/88/70/3cbc40978fefb7bb09c6708d40d4ad1a5d70fd7213c3d17f971de868ec1f/pydantic_core-2.46.4-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:f99626688942fb746e545232e7726926f3be91b5975f8b55327665fafda991c7", size = 2110594 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9d/20/b8d36736216e29491125531685b2f9e61aa5b4b2599893f8268551da3338/pydantic_core-2.46.4-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fc3e9034a63de20e15e8ade85358bc6efc614008cab72898b4b4952bea0509ff", size = 2159542 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1d/a2/367df868eb584dacf6bf82a389272406d7178e301c4ac82545ab98bc2dd9/pydantic_core-2.46.4-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:97e7cf2be5c77b7d1a9713a05605d49460d02c6078d38d8bef3cbe323c548424", size = 2168146 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c1/b8/4460f77f7e201893f649a29ab355dddd3beee8a97bcb1a320db414f9a06e/pydantic_core-2.46.4-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:3bf92c5d0e00fefaab325a4d27828fe6b6e2a21848686b5b60d2d9eeb09d76c6", size = 2306309 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/64/c4/be2639293acd87dc8ddbcec41a73cee9b2ebf996fe6d892a1a74e88ad3f7/pydantic_core-2.46.4-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:3ecbc122d18468d06ca279dc26a8c2e2d5acb10943bb35e36ae92096dc3b5565", size = 2369736 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/30/a6/9f9f380dbb301f67023bf8f707aaa75daadf84f7152d95c410fd7e81d994/pydantic_core-2.46.4-cp314-cp314t-win32.whl", hash = "sha256:e846ae7835bf0703ae43f534ab79a867146dadd59dc9ca5c8b53d5c8f7c9ef02", size = 1955575 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/40/1f/f1eb9eb350e795d1af8586289746f5c5677d16043040d63710e22abc43c9/pydantic_core-2.46.4-cp314-cp314t-win_amd64.whl", hash = "sha256:2108ba5c1c1eca18030634489dc544844144ee36357f2f9f780b93e7ddbb44b5", size = 2051624 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f6/d2/42dd53d0a85c27606f316d3aa5d2869c4e8470a5ed6dec30e4a1abe19192/pydantic_core-2.46.4-cp314-cp314t-win_arm64.whl", hash = "sha256:4fcbe087dbc2068af7eda3aa87634eba216dbda64d1ae73c8684b621d33f6596", size = 2017325 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ee/a4/73995fd4ebbb46ba0ee51e6fa049b8f02c40daebb762208feda8a6b7894d/pydantic_core-2.46.4-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:14d4edf427bdcf950a8a02d7cb44a08614388dd6e1bdcbf4f67504fa7887da9c", size = 2111589 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fb/7f/f37d3a5e8bfcc2e403f5c57a730f2d815693fb42119e8ea48b3789335af1/pydantic_core-2.46.4-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:0ce40cd7b21210e99342afafbd4d0f76d784eb5b1d60f3bdc566be4983c6c73b", size = 1944552 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/15/3c/d7eb777b3ff43e8433a4efb39a17aa8fd98a4ee8561a24a67ef5db07b2d6/pydantic_core-2.46.4-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:90884113d8b48f760e9587002789ddd741e76ab9f89518cd1e43b1f1a52ec44b", size = 1982984 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/63/87/70b9f40170a81afd55ca26c9b2acb25c20d64bcfbf888fafecb3ba077d4c/pydantic_core-2.46.4-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66ce7632c22d837c95301830e111ad0128a32b8207533b60896a96c4915192ea", size = 2138417 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9d/1d/8987ad40f65ae1432753072f214fb5c74fe47ffbd0698bb9cbbb585664f8/pydantic_core-2.46.4-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:1d8ba486450b14f3b1d63bc521d410ec7565e52f887b9fb671791886436a42f7", size = 2095527 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/64/d3/84c282a7eee1d3ac4c0377546ef5a1ea436ce26840d9ac3b7ed54a377507/pydantic_core-2.46.4-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:3009f12e4e90b7f88b4f9adb1b0c4a3d58fe7820f3238c190047209d148026df", size = 1936024 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d7/ca/eac61596cdeb4d7e174d3dc0bd8a6238f14f75f97a24e7b7db4c7e7340a0/pydantic_core-2.46.4-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad785e92e6dc634c21555edc8bd6b64957ab844541bcb96a1366c202951ae526", size = 1990696 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fa/c3/7c8b240552251faf6b3a957db200fcfbbcec36763c050428b601e0c9b83b/pydantic_core-2.46.4-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00c603d540afdd6b80eb39f078f33ebd46211f02f33e34a32d9f053bba711de0", size = 2147590 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/11/cb/428de0385b6c8d44b716feba566abfacfbd23ee3c4439faa789a1456242f/pydantic_core-2.46.4-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:0c563b08bca408dc7f65f700633d8442fffb2421fc47b8101377e9fd65051ff0", size = 2112782 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0b/b5/6a17bdadd0fc1f170adfd05a20d37c832f52b117b4d9131da1f41bb097ce/pydantic_core-2.46.4-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:db06ffe51636ffe9ca531fe9023dd64bdd794be8754cb5df57c5498ae5b518a7", size = 1952146 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2a/dc/03734d80e362cd43ef65428e9de77c730ce7f2f11c60d2b1e1b39f0fbf99/pydantic_core-2.46.4-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:133878133d271ade3d41d1bfb2a45ec38dbdbda40bc065921c6b04e4630127e2", size = 2134492 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/de/df/5e5ffc085ed07cc22d298134d3d911c63e91f6a0eb91fe646750a3209910/pydantic_core-2.46.4-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9bc519fbf2b7578398853d815009ae5e4d4603d12f4e3f91da8c06852d3da3e9", size = 2156604 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/81/44/6e112a4253e56f5705467cbab7ab5e91ee7398ba3d56d358635958893d3e/pydantic_core-2.46.4-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:c7a7bd4e39e8e4c12c39cd480356842b6a8a06e41b23a55a5e3e191718838ddf", size = 2183828 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ac/ad/5565071e937d8e752842ac241463944c9eb14c87e2d269f2658a5bd05e98/pydantic_core-2.46.4-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:d396ec2b979760aaf3218e76c24e65bd0aca24983298653b3a9d7a45f9e47b30", size = 2310000 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4f/c3/66883a5cec183e7fba4d024b4cbbe61851a63750ef606b0afecc46d1f2bf/pydantic_core-2.46.4-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:86e1a4418c6cd97d60c95c71164158eaf7324fae7b0923264016baa993eba6fc", size = 2361286 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4b/2d/69abac8f838090bbecd5df894befb2c2619e7996a98ddb949db9f3b93225/pydantic_core-2.46.4-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:d51026d73fcfd93610abc7b27789c26b313920fcfb20e27462d74a7f8b06e983", size = 2193071 }, -] - -[[package]] -name = "pydantic-settings" -version = "2.14.2" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "pydantic" }, - { name = "python-dotenv" }, - { name = "typing-inspection" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5c/b5/8f48e906c3e0205276e8bd8cb7512217a87b2685304d64be27cad5b3019f/pydantic_settings-2.14.2.tar.gz", hash = "sha256:c19dd64b19097f1de80184f0cc7b0272a13ae6e170cbf240a3e27e381ed14a5f", size = 237700 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/77/c1/6e422f34e569cf8e18df68d1939c81c099d2b61e4f7d9621c8a77560799c/pydantic_settings-2.14.2-py3-none-any.whl", hash = "sha256:a20c97b37910b6550d5ea50fbcc2d4187defe58cd57070b73863d069419c9440", size = 61715 }, -] - -[[package]] -name = "pygments" -version = "2.20.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c3/b2/bc9c9196916376152d655522fdcebac55e66de6603a76a02bca1b6414f6c/pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f", size = 4955991 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f4/7e/a72dd26f3b0f4f2bf1dd8923c85f7ceb43172af56d63c7383eb62b332364/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176", size = 1231151 }, -] - -[[package]] -name = "pyjwt" -version = "2.13.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3b/81/58d0ac84e1ef3a3843791d6954d94c0b33d526c75eeb1efbce9d0a4c4077/pyjwt-2.13.0.tar.gz", hash = "sha256:41571c89ca91598c79e8ef18a2d07367d4810fbbd6f637794879baf1b7703423", size = 107515 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a3/5e/ecf12fdb62546d64385c158514e9b2b671f7832108ef2ecd2020ce0af2d1/pyjwt-2.13.0-py3-none-any.whl", hash = "sha256:66adcc2aff09b3f1bbd95fc1e1577df8ac8723c978552fd43304c8a290ac5728", size = 31274 }, -] - -[package.optional-dependencies] -crypto = [ - { name = "cryptography" }, -] - -[[package]] -name = "pyparsing" -version = "3.3.2" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f3/91/9c6ee907786a473bf81c5f53cf703ba0957b23ab84c264080fb5a450416f/pyparsing-3.3.2.tar.gz", hash = "sha256:c777f4d763f140633dcb6d8a3eda953bf7a214dc4eff598413c070bcdc117cbc", size = 6851574 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781 }, -] - -[[package]] -name = "pytest" -version = "9.1.1" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, - { name = "iniconfig" }, - { name = "packaging" }, - { name = "pluggy" }, - { name = "pygments" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e4/47/b9efed96c114afcfa3c9d3fe98a76a1d14c74a9e266d397cf6eb64be5e01/pytest-9.1.1.tar.gz", hash = "sha256:1088fbde8f2b49d95a549a195707afa7a76a3ce9bcadc26b6d71f0ffda5fe313", size = 1636369 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/24/25/1de2678b631f5a49215c6c96fff41ba892b0a34df68d6d80292b1b48aa7f/pytest-9.1.1-py3-none-any.whl", hash = "sha256:37a86b45efb9a47a61a36449063e8e18d0cab3161329fc099eb21783169c4f0c", size = 386536 }, -] - -[[package]] -name = "python-datauri" -version = "3.0.2" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "cached-property" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7c/3b/8a9a2ec12424a8617678d663fa70de43d917d3590416d3a2b9c7fc065d5b/python_datauri-3.0.2.tar.gz", hash = "sha256:d77c37f1f734fc035de424e643464990b2b840e9b8c7c1817c11fca19b71eeb7", size = 9746 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bc/b6/3332df034d7f322506f2267517b051cd3605e129ecc7f9d46a6fbd540279/python_datauri-3.0.2-py2.py3-none-any.whl", hash = "sha256:b365690a1d7d1b7777009eb11a86bd069db4f194e50f4f871a47302f0587c144", size = 5803 }, -] - -[[package]] -name = "python-dateutil" -version = "2.9.0.post0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "six" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892 }, -] - -[[package]] -name = "python-dotenv" -version = "1.2.2" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/82/ed/0301aeeac3e5353ef3d94b6ec08bbcabd04a72018415dcb29e588514bba8/python_dotenv-1.2.2.tar.gz", hash = "sha256:2c371a91fbd7ba082c2c1dc1f8bf89ca22564a087c2c287cd9b662adde799cf3", size = 50135 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0b/d7/1959b9648791274998a9c3526f6d0ec8fd2233e4d4acce81bbae76b44b2a/python_dotenv-1.2.2-py3-none-any.whl", hash = "sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a", size = 22101 }, -] - -[[package]] -name = "python-engineio" -version = "4.13.3" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "simple-websocket" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fb/a0/f75491f942184d9960b15e763270f765fe9f239745ca5f9e16289011aed4/python_engineio-4.13.3.tar.gz", hash = "sha256:572b7783e341fed21edbc7cea297ccd378dad79265fdde96aa4664420a7c06c9", size = 79734 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5b/96/82f6328e410515fab21d5602ba35b9377a47b5a141a0c1f9efa00ce21eb4/python_engineio-4.13.3-py3-none-any.whl", hash = "sha256:1f60ecaf1358190f0e26c48c578a60428dc02a8f1295bc3dbf53d1b31116821f", size = 59993 }, -] - -[[package]] -name = "python-frontmatter" -version = "1.3.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "pyyaml" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9d/e8/79cbe69864d44f3b48e70ebee0a872a7d5a4e7150c9f8577ed7a5beefff0/python_frontmatter-1.3.0.tar.gz", hash = "sha256:acc73e477a568dc2a25c9e130c6c68ae8daa8c204c8f7e813db47d6a7280dcf2", size = 8322 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a6/a3/17c284b4f4d8ad50f0f9ba70ad8fcc35c777aeafcdbbffdd91bbdc5ab379/python_frontmatter-1.3.0-py3-none-any.whl", hash = "sha256:9f7dd9260bec99044219159a329f64f039087f9d1a2124c9442556f2fe6f82ec", size = 10562 }, -] - -[[package]] -name = "python-multipart" -version = "0.0.32" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5b/42/55c32bb9b12693c092ad250a0e82edb5b31ddeda6eb772de5f308b3804ad/python_multipart-0.0.32.tar.gz", hash = "sha256:be54b7f3fa167bb83e4fcd936b887b708f4e57fe75911c02aebf53efaf8d938e", size = 46881 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e1/04/e8135ebd1ad02c56ec633277529b2602ff99ff634be76cdba5744cf554fd/python_multipart-0.0.32-py3-none-any.whl", hash = "sha256:ff6d3f776f16878c894e52e107296ffc890e913c611b1a4ec6c44e2821fe2e23", size = 30042 }, -] - -[[package]] -name = "python-socketio" -version = "5.16.3" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "bidict" }, - { name = "python-engineio" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/32/2d/ffce71017c106b75099fea569df6518c63fee5d6202ce0cfe7b01e6f22c3/python_socketio-5.16.3.tar.gz", hash = "sha256:89b136f677ae65607a84cecda9b4d6c5377b40a97582c504c25df89af16d520e", size = 128095 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0a/38/8c5e72d53ff8eb27497c4f268a7f6d9121e727a50b65248288ad79a93053/python_socketio-5.16.3-py3-none-any.whl", hash = "sha256:e7ad14202a5e6448824c7c2f86161d04e13dec05992257df5c709e6a2798c041", size = 82087 }, -] - -[[package]] -name = "pytz" -version = "2026.2" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ff/46/dd499ec9038423421951e4fad73051febaa13d2df82b4064f87af8b8c0c3/pytz-2026.2.tar.gz", hash = "sha256:0e60b47b29f21574376f218fe21abc009894a2321ea16c6754f3cad6eb7cdd6a", size = 320861 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ec/dd/96da98f892250475bdf2328112d7468abdd4acc7b902b6af23f4ed958ea0/pytz-2026.2-py2.py3-none-any.whl", hash = "sha256:04156e608bee23d3792fd45c94ae47fae1036688e75032eea2e3bf0323d1f126", size = 510141 }, -] - -[[package]] -name = "pywin32" -version = "312" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1f/f5/10a6e845a00fc5e7afd0a988b744f403d4d57162a28d160a093c4d9322f0/pywin32-312-cp311-cp311-win32.whl", hash = "sha256:17948aeadbdb091f0ced6ef0841620794e68327b94ee415571c1203594b7215c", size = 6362659 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/35/c4/dcd2d62b5944b6d5db53413a5899016ccd57ffcb7278f3f81655d25d2027/pywin32-312-cp311-cp311-win_amd64.whl", hash = "sha256:d11417d84412f859b722fad0841b3614459ed0047f7542d8362e77884f6b6e8a", size = 6928825 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b7/56/3cbb433fe4501cdba2eb9040f56a4e1a8243faa4186b25295564d1a7a79d/pywin32-312-cp311-cp311-win_arm64.whl", hash = "sha256:b2200a054ca6d6625c4842fc56a4976a4b47f96b73dbe5538c3f813a80359f47", size = 6721875 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/83/ff/32aa7d2ed0ab12b323aaa64f9b75e6ad4f8fd09f9ccfc28c79414d46838d/pywin32-312-cp312-cp312-win32.whl", hash = "sha256:dab4f65ac9c4e48400a2a0530c46c3c579cd5905ecd11b80692373915269208b", size = 6371877 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/03/d9/77040d3b43df3f3be32ea289433d660d2727f5ba327bc73be835127d9d60/pywin32-312-cp312-cp312-win_amd64.whl", hash = "sha256:b457f6d628a47e8a7346ce22acb7e1a46a4a78b52e1d17e1af56871bd19a93bc", size = 6914841 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e3/cc/7b1ec671775756020a0ee7f4feeaf3c568f0ab86bd3900088cf986937a92/pywin32-312-cp312-cp312-win_arm64.whl", hash = "sha256:6017c58e12f6809fbb0555b75df144c2922a9ffd18e4b9b5afa863b6c1a9d950", size = 6727901 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2d/41/12fbfd7f36ed2146d8bc9de96c2741296bf0d490b98508496cff322e274c/pywin32-312-cp313-cp313-win32.whl", hash = "sha256:7a27df850933d16a8eabfbaeb73d52b273e2da667f80d70b01a89d1f6828d02c", size = 6370184 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ba/db/36a78e3403099d31d9746d13fdcde5accc43c1155f375a34d15983a479a7/pywin32-312-cp313-cp313-win_amd64.whl", hash = "sha256:c53e878d15a1c44788082bfe712a905433473aa38f86375b7cf8b45e3acbaaf9", size = 6914298 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/84/37/c1697194092b76de9ed47ca124323f02c57ffc8a45c06f88a3d5acaf01eb/pywin32-312-cp313-cp313-win_arm64.whl", hash = "sha256:59aba5d5940842075343a5ddc6b11f1cdf0d1567fe745290359dfbcc7c2eb831", size = 6727640 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fc/2b/1f3cded5822fd49c02f40544cbb5f58c7cfd6b1694869fd476cb6170ee97/pywin32-312-cp314-cp314-win32.whl", hash = "sha256:a77a90fbb6881238d2ca9c6fd797b25817f3768fe78d214a90137ff055a75f5b", size = 6468928 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/21/82/3bf86d2e2808902013132e1ce905a7da0da53790f3836c64bf44d55e24f3/pywin32-312-cp314-cp314-win_amd64.whl", hash = "sha256:a4dd3a848290ef724347b19f301045831d8e802fa4464f491b98b1e0a081432e", size = 7024157 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a4/0e/73f6d6800b4f27655abd9e9f6aaeaefcddb2b946e4674efa2bab184a7f7b/pywin32-312-cp314-cp314-win_arm64.whl", hash = "sha256:9fce94568364e0155e6dfb781ac5d95903be8baf28670632beab1b523f300daa", size = 6839598 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/eb/61/caa39686032d2ebdd04ff0ab5cbe163126c0066d98e00c9018646e42393b/pywin32-312-cp315-cp315-win32.whl", hash = "sha256:5c1fbe4a937a73ae9297384a3da38518cbc694c68ad8a809b2e19acd350f03ed", size = 6471159 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0f/cd/7e1de64a4a6f69c04214169657ccab0d93a670ea50e35eb8f489d7378249/pywin32-312-cp315-cp315-win_amd64.whl", hash = "sha256:c2f03a0f73f804a13c2735b99392b0cd426bb4f2c4d0178e5ac966a0f21618d5", size = 7025293 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/23/ed/4532e9388e65fa16b46776ef47ad631a64eda1631884488af707666350ed/pywin32-312-cp315-cp315-win_arm64.whl", hash = "sha256:a8597d28f267b39074aef51fa593530082b39cbe5a074226096857b1fed2dfb9", size = 6840337 }, -] - -[[package]] -name = "pyyaml" -version = "6.0.3" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6d/16/a95b6757765b7b031c9374925bb718d55e0a9ba8a1b6a12d25962ea44347/pyyaml-6.0.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e", size = 185826 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/16/19/13de8e4377ed53079ee996e1ab0a9c33ec2faf808a4647b7b4c0d46dd239/pyyaml-6.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824", size = 175577 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0c/62/d2eb46264d4b157dae1275b573017abec435397aa59cbcdab6fc978a8af4/pyyaml-6.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c", size = 775556 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/10/cb/16c3f2cf3266edd25aaa00d6c4350381c8b012ed6f5276675b9eba8d9ff4/pyyaml-6.0.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00", size = 882114 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/71/60/917329f640924b18ff085ab889a11c763e0b573da888e8404ff486657602/pyyaml-6.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d", size = 806638 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/dd/6f/529b0f316a9fd167281a6c3826b5583e6192dba792dd55e3203d3f8e655a/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a", size = 767463 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f2/6a/b627b4e0c1dd03718543519ffb2f1deea4a1e6d42fbab8021936a4d22589/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4", size = 794986 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/45/91/47a6e1c42d9ee337c4839208f30d9f09caa9f720ec7582917b264defc875/pyyaml-6.0.3-cp311-cp311-win32.whl", hash = "sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b", size = 142543 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/da/e3/ea007450a105ae919a72393cb06f122f288ef60bba2dc64b26e2646fa315/pyyaml-6.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf", size = 158763 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341 }, -] - -[[package]] -name = "referencing" -version = "0.37.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "attrs" }, - { name = "rpds-py" }, - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766 }, -] - -[[package]] -name = "requests" -version = "2.34.2" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "certifi" }, - { name = "charset-normalizer" }, - { name = "idna" }, - { name = "urllib3" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ac/c3/e2a2b89f2d3e2179abd6d00ebd70bff6273f37fb3e0cc209f48b39d00cbf/requests-2.34.2.tar.gz", hash = "sha256:f288924cae4e29463698d6d60bc6a4da69c89185ad1e0bcc4104f584e960b9ed", size = 142856 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a0/f4/c67b0b3f1b9245e8d266f0f112c500d50e5b4e83cb6f3b71b6528104182a/requests-2.34.2-py3-none-any.whl", hash = "sha256:2a0d60c172f83ac6ab31e4554906c0f3b3588d37b5cb939b1c061f4907e278e0", size = 73075 }, -] - -[[package]] -name = "rich" -version = "15.0.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "markdown-it-py" }, - { name = "pygments" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c0/8f/0722ca900cc807c13a6a0c696dacf35430f72e0ec571c4275d2371fca3e9/rich-15.0.0.tar.gz", hash = "sha256:edd07a4824c6b40189fb7ac9bc4c52536e9780fbbfbddf6f1e2502c31b068c36", size = 230680 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/82/3b/64d4899d73f91ba49a8c18a8ff3f0ea8f1c1d75481760df8c68ef5235bf5/rich-15.0.0-py3-none-any.whl", hash = "sha256:33bd4ef74232fb73fe9279a257718407f169c09b78a87ad3d296f548e27de0bb", size = 310654 }, -] - -[[package]] -name = "rpds-py" -version = "2026.5.1" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2e/43/25a8dcd3feedd735039a8f0b5b7e3b118232b5eae288c4fd9ab200d41094/rpds_py-2026.5.1.tar.gz", hash = "sha256:07b24fea40541e28570e5b795a4a38fbdcd12550c06bd0748005ecc8116ca256", size = 64459 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4f/a0/acf8b6fc20bfdcd3a45bd3f57680fb198e157b7e997b9123b10763798bd2/rpds_py-2026.5.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:3397a5ed7174dc2786bb214030232fc36fe8e5584fec43a9952cc542b1a12036", size = 355609 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b6/95/f8203fd997484b1690a6869cd0e503b6c3c6be55b0ecc36d1a491fe742f0/rpds_py-2026.5.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:99ab6ba7bfa2cb0f96a04e3652355bf04e3f51aceb1e943b8541dab7ba4828cc", size = 348460 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/33/8c/b47326ad2f0be545a5e5c1a55937a12afaea7d392ba2837bb9680f57e6c9/rpds_py-2026.5.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0efbe45632665e53e3db8fe1e5692db58fc5cb9bab4459d570b83efefe11164", size = 381031 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/22/0b/e83bbd97ffac6f6389b605cd4e1c8ac5761dc7e977769c9255d8c5adb7bd/rpds_py-2026.5.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:01d17b29c0c23d82b1f4751147ec49cf451f1fc2554eb9ef5f957e55d2656ead", size = 387121 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/0e/d285d1bc8864245919c61e1ca82263e4a66d337759c3a4cef72766ff9afc/rpds_py-2026.5.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7559f72b94ae52659086c595dfa017cde03155f7832071d30959049052cb3ece", size = 501026 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/86/06/ccb2109a1e543437b5e43816f2b43b9554cc6783145528a4e3711e05c011/rpds_py-2026.5.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9e25b7088f9ccbfc0dfcaa52bf969300ca229e10ecf758974ebcbb080a4b37bb", size = 391865 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3d/33/237173db1cfef10105b3839a24de00eb8d2a523711add4632447cdf0aedd/rpds_py-2026.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:613fc4ee9eaef26dc5840666214dd6fbcebcf32f46e76f4abc473059f4e13dda", size = 378012 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/97/64/1eae54e34d5161f9969295e80bd6b62a55f2b6ac5f2a5b60d02c2140e758/rpds_py-2026.5.1-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:85264a90ff4c05c1568dd65f5921c837614b67c60358fb4c17df3b7f2e90690a", size = 391111 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d8/34/5bb334a5a0f65d77869217c4654f34c78a7d11b93938a3c076a2edeafc52/rpds_py-2026.5.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fe71bca7d547acb17027c7fd1624ff8aae623499c498d3e7011182c4de5c25e0", size = 409225 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/16/0f/007ec21283b5b040b4ec3bd95e0402591e22bfa7d5c93dfe01c465c2d2d7/rpds_py-2026.5.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a05fa4f41f37ec97c9c260441a940450a192f78d774d2b097eee1379f1e1246a", size = 556487 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ff/10/5437c94508169b6b22d8418fef7a66e9ffb5f3b9e9c94460f2eedafe06ff/rpds_py-2026.5.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:df1d2a1996755b24b9ecee92cb4d36c28f86f464a6a173349c26bab41e94b8c2", size = 620798 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e0/d5/9937dce4d6bda74157b954e7d1460db05a22f5929dccfeeba1ed27a93df0/rpds_py-2026.5.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:8895840ac4809e5f60c88fd07617cd71326e73d6e5a8aa783c5c0f7c24985de2", size = 584053 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6c/31/750617dd0ae1752471bf43f9e41d263398fae7cde7849d23b8574a70e617/rpds_py-2026.5.1-cp311-cp311-win32.whl", hash = "sha256:3684a59b158a7683aaeb8e25352e9a9dd2122cec78f2d8530266e4f91b4c7b3f", size = 214390 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3c/bb/3dcab0e1d9516303f2eb672a5d6f62eca5a69e2886301e9c8c54b520c39b/rpds_py-2026.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:7bd530e6a530bb3ea892f194fafa455f3516ac25ecf7143fd33c09be62b0470a", size = 231097 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/49/d6/c6bbf5cb1cf12b9732df8074b57f6ef8341ba884c95d40632ae8bddb44e4/rpds_py-2026.5.1-cp311-cp311-win_arm64.whl", hash = "sha256:0a5ae4dbe43c1076983b72616496919872ae7bbe7a1e21cc48336bc3154d130b", size = 226361 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d4/e7/a78582dc57caa592dcc7d4fb69b61390561e908eb3d2f5df5928a8e354c0/rpds_py-2026.5.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:3abe24a66e57adcfa645d718063a5fa5103ecc71ddbf26d78af8f9368018ff1d", size = 353040 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a3/43/35e3f136343aef451e545ce8c38d36c2f93c0ed88703db8b64ba2b205c68/rpds_py-2026.5.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:58b1d94308ddf0b1982f61f2eb54bf92997c9ece8a8093ef014250f4a517906c", size = 345775 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/20/e1/0f2160c5982d3157734d5cb3ed63d8b2d583a73c9864f77b666449f32cf8/rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fa92420128dadce7f54bd73ba1825a273e9268fe9e35dbf7e6362890efa4e08", size = 376329 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d0/11/ee0ba42aff83bf4effdbc576673c6be64c5e173978c3f6d537e94482f77d/rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ca653c6546386227cd9800d1bef6a348099acf8db4250341da6d90f663d6dfcb", size = 383539 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/11/df/d94aa6a499d4ac40afe2d7620f2c597fd3c0f182e854ad7cf3f596a81cb6/rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:66c93681c4729e4e3ecba31b8179fae083ff3118841672835140338b4b9867c1", size = 494674 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1f/75/33d30f43bb2f458de11979486a591b1bf6e5651765ed1704c6197c2dc773/rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:40ff257542e04796880e011e15cd4dc21c2599975df2aaa8f2c8495ca574e1a5", size = 389268 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f4/1e/2c9096fc19d5fd084b0184ca2b651e659aa0a37e6fdbecf6ece47f147fe1/rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6825cc329b290e93c5f6a9be2393118a763f6ccf6abd83704e0c102ca583644", size = 376280 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b9/e5/61ec9f8be8211ea7f48448195549e4aaf02004083475493b0e137702ecb2/rpds_py-2026.5.1-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:de42116e69cb53b911cc34aee5ab98f36c597b822545045d49e938818b99e5e4", size = 387233 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0d/ca/bcec1005c4f4a234f92a29078631fee49206c7265ccae966f18fd332e80e/rpds_py-2026.5.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c0f920015df2a504bebaba6d4c31ccf3fcf942f92655c086da30b671aad19aa6", size = 405009 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/72/e6/4d5718c5cf26c522dc7c9999e238da1e77380b81d0c5d1df11e271ddfeb1/rpds_py-2026.5.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0408a24e44feb919423dc6d9da677cb5cddb894d2ca9e763967d156d9c60fab4", size = 553113 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d4/25/2ee807bdb3e1f0b7eddf7782acd5665a8b5205a331a7d7244a52c4812fd9/rpds_py-2026.5.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:cea68bcd53467561ae2f96a6bdad1544299ba97b5b0ddcd5ac3d376e5c781c24", size = 618838 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6a/c1/7d4c26f167f8c41501cc073d30ee22082b16ce358cf5b00ec97cbc7804ea/rpds_py-2026.5.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4be8b1d2a705cc37d08256004e1d07de143fa0075c8e85a3df020b776f62b732", size = 582436 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/04/1d/9d12b0a337bab46f4769f8857f4007e3b2d639e14f9a44a0efe157696e64/rpds_py-2026.5.1-cp312-cp312-win32.whl", hash = "sha256:6736718bd4fc49cbcb538ba30516fdbef161522acefb739657d48b97bd864fed", size = 212734 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c5/93/e4116f2de7f56bc7406a76033dc501811ddeb22b7f056b92d632871ebb0c/rpds_py-2026.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:0a7d1eec967df0e9b22614a5e177622e0c89611d03727fa0cb48e45028907870", size = 229045 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cb/53/6c3419d85eb2ec5938a37627c585b42d76a63bb731d6e42ed4b079ebf486/rpds_py-2026.5.1-cp312-cp312-win_arm64.whl", hash = "sha256:1841d067089e117142d79b98aa0df2f08b52f2ecc1819dd2700636c0db74a473", size = 223967 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6c/32/14c961ad295f490eb0849ada8b79683e93a59b9de3afdd983eaf55fa6867/rpds_py-2026.5.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:efef4ac29c6ff495531eb17ee705b62841ecaa291b7c7077e848ea03e237164d", size = 352787 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ca/bb/d1b85117967c11191441a7274ae616c65d93901d082c588f89a50a8da5ae/rpds_py-2026.5.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c39f5b67a8a2e67179ada2a954227d670fe65fa9098457f698f56ddf248709b3", size = 345179 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7c/46/d84105f062e626a1b233f863907288a4708c2d833b8b4c6fb2764bc080c0/rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5c30f3f04eef4fbd362226a6f31d7c8895ca4fbb6e0b790f6890a98d8da8559", size = 376173 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e2/ae/469d7959ce5b1201e1de135dc735b86db3b35dd0d1734f6a44246d5f061c/rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:277f6c82f0580848796c7ecc8a7173aa3bfb928e4ff831261c2f60a81dc270db", size = 383162 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/dc/a2/57853d31a1116a561aa072794602ad3f6341e18d70a8523f1bd5b9fc1e5a/rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:63c2c4c213f1a4e3f3de28ecab029dbdee976324e729c0d7a55211be72576b02", size = 495093 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/99/63/3a8eabcad9314b7daf5c65f451d2c33d989235cd8a5762186cf2c3f5a4f8/rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3350ec808fb538fe71a1f94dfaa0e29c598dfad805ce49f0caec5ae3183c652b", size = 389829 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4b/25/05678d97fc25e2622df14dc530fb82023174ecfff6733991ed0d78f167bd/rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1b964e3ab599e718dc46c018d104b1ebc007cbc6567d827c94a687fca56d77e", size = 374786 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/88/d1/8c90b6431e80a3b91b284a5c7c8c0c4f9c006444d90477a740d6e0f9c694/rpds_py-2026.5.1-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:19cb09fab7b7fc96b2a6e28f2e34b72a3705ff27b37edb77455316e5d3f3dc9b", size = 386920 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ff/99/4638f672ab356682d633ee0da9255f5b67ce6efd0b85eb94ad3e255e65a5/rpds_py-2026.5.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:abe76bcdba31e576cb83eeb8797aa0d882b738fef6dc65d0601fc753806a5b46", size = 405059 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/66/3f/3546524b6eb4cc2e1f363a3d638fa52f6c24faae3500c25fb488b02f1740/rpds_py-2026.5.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8bff7073db3899158fff55ebf57b113a67030af26f80a18978f9f0aa60250ddf", size = 553030 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c6/c3/7b3388c796fcf471bd17194242d4dc1a7608567c0fa422bcc1c5e79f9c1e/rpds_py-2026.5.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:8ba264fa49be666cd9cc56bf34ec7002fb3d27a4aee5bcb4d43d0d18feb1bb6f", size = 618975 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/61/1e/a3cb07f2795075d1d88efddae2f541359fde5f08c81ee114c29c2949c90a/rpds_py-2026.5.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4860b603ddda0475a8885499b3729e90229d480105b42651962a5397d995fa89", size = 581178 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a1/74/e758c03a5ef46f04c37f2651a2893db846d569ba8a7bca469d4b58939bcd/rpds_py-2026.5.1-cp313-cp313-win32.whl", hash = "sha256:7944270ae71383f6e2657dd7d5ce4eeb4ac2d0059a6738f0510583d462ab4842", size = 212481 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/70/ec/a2aca432db9c7359b40fa393eeeaa0d166c2f70175be956e75fa24197c44/rpds_py-2026.5.1-cp313-cp313-win_amd64.whl", hash = "sha256:88647f43a73c4e01be19b04ceef0c8d3a1958153604d13c773becd8016f2a0cf", size = 228519 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/29/60/a73bfdd45b096574556acf303bbd9fa9eed36ca8a818b514e2a5d5fe2b9d/rpds_py-2026.5.1-cp313-cp313-win_arm64.whl", hash = "sha256:453895624ecf7db7063b1004e44037522bbaef9ff6a945e59bc71662d7a03abd", size = 223446 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/18/e2/408105fd611823f00882aea810f3989a30d26b1bab8b6beb20f98c724e0e/rpds_py-2026.5.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:b4e4bc98639ec915f512fde3aa7a95e0041d95d9c3cc86eea841fa63cb1e8600", size = 355287 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8d/58/5c4a43436843c90d0f6d19f82c200c80e3843ca9fa07b237623327f6d384/rpds_py-2026.5.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cacedb7a6e167680acba45ad5716e89067d225dc80da0d7040cae8c81d4572fa", size = 347033 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fb/c2/1a71acdacaf4e259b10278fb87b039ded3cf80041bcd89dd8a3ea702ded6/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68700371c5d7ae1412862ddfa719090925c93ecf351c566d66f09d04b136ea00", size = 376891 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c2/c8/535f3d9b65addd8e28aa87b83c6e526799c3717a88273db8ea795beeef7a/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:296c799becfa849c779c8725494fe9ed94959ed886787df4364b058465bad7f0", size = 385646 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1c/91/dc033f313345c354ade914dbe73cdb90b615a4409ea02430d5356794f3d8/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d3858b908218ee108d0bbfb2095ccc237648053c9bf98affad7cb079acaf1d97", size = 498830 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/27/fc/90fcbea459dbb8ddc18a2e0fd1de9412b48bc84ffff2db771cf714bacfd6/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4fb8d2e7cb2f850b169806d61d1b991738acec96500a75c30f49caf064ce7cef", size = 392830 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b2/1d/46cd11a228c9750684a798d98f878be6f614aa762438da7378f035e79e35/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27b74c10ed6a8f190f4287f53bcfea348b92a84a9c9f70d30183d1e6172d580d", size = 379613 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/24/4a/d9b0c6af3a1de03eb93741bbe8be2bdce84d8fda8224f3005451d86df389/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:b9a6528956191c48c52294a592dbd4a8386d7048bdb25c0efcb6b966466c6d83", size = 388183 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c5/b4/db7aaabdda6d020afc87d981bcc2f57a434c7dec60ecfc2ab3dd50b20351/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:af03e34e860047bc7a352b842856fcf78798fbb81132cc98bd2f907ab4eb9cd2", size = 408578 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/08/d6/070f6a41cbb343e2ac4171859bf3f3623e0ab002f72619d6d505313ec2de/rpds_py-2026.5.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fea6e836d10abbe191d557d33bd58bd5987725fe63aa1eefe557d230209855bd", size = 553573 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/75/ab/1a71ea3589c4345dac0a0518f0e6a031cb42689277851b683c46d27463a5/rpds_py-2026.5.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:fc0c0f878ea770a0a8a462456c5ad36fc9fe6358e6b76fdadc7f17575e0b8bf1", size = 620861 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8a/22/9bf80a56069c0c443fcfefac639a86a744550a2898817a6dfd3e26654924/rpds_py-2026.5.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e0b360f316d966b048b085857630b3cc51f3db2f07b06f440eac8f695374d1e3", size = 585633 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/da/68/3b2c0a75c9e04125696f84ebdbbf304acf5a40b58ba4481cdb98a922c3ba/rpds_py-2026.5.1-cp313-cp313t-win32.whl", hash = "sha256:a2999883eedf72fdfb7520b92c7d4ec2572a71ff40239377aa604cc529eecafc", size = 210074 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e7/8b/609157d5a25d37d4f29f92840ba531f416907c34ae5c5739dd21fc2bef98/rpds_py-2026.5.1-cp313-cp313t-win_amd64.whl", hash = "sha256:e07be2a9d7122bd6e82dea89814ef8dc893feb1aae97fec1630f3263bbb30e55", size = 228635 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d4/6f/19c1918a4b590d8de87e712e4abe4b3875771eff60216fb6153cf6665c68/rpds_py-2026.5.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:1f2c391c3059798093b65df23aca2cac150460ae9c630d99dec83d703d9485b9", size = 349756 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e5/60/a06fe7da34eca79dacbf958a2ba0c6eea85bc2b29de20080bf40f72f66fa/rpds_py-2026.5.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:413b424f7c4ee65ab5e5be91f5731be0f8b41a1ee2b12dfe810d716312e95a78", size = 343831 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bf/ec/b2333b97b90e2a6ef6ca8ad386ee284968e74bcfe113b3f1a8d9036429a9/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c595a1d9255dce0599e13130d1440ab2506654f2b50294226ee06402f8fef63", size = 375127 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/14/7f/e00aae54067f2b488c4637961d5f58204d470795fc791085fa3f15060d2e/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1c27c5f6102eac8c03e7595a00827a53b271ba40a53b59ff8709170e0855ea4a", size = 379034 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/be/cc/423999bbb8ae8dc93c77fc1d5e984ade5eb89d237d3bb884ccfa72ae2890/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6c7fcf61d44cacecaf3aea542b0e053db77972a4573e7ceda16fb2b399161195", size = 490823 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0f/aa/c671bf660f12e68d3c52ff86c7066ed1372df5a0f4f2ff584e419b8207e7/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2c817a189d4ee14290420e5ff051e4dd6baa13f3edf84685071dee07a6d538ee", size = 388144 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/19/c8/d63bb75b68afe77b229e3021c6031bcaf01da5db5b0e69d0d10f9ba679a7/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21846aac0ed2e0589f38c12dc44e77bb64e494b771eadbcf169cba00566ba7ba", size = 371959 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/82/35/c51122014d8274ff37dc606d60049c3db7d83da02b5b282511e5a906a9a6/rpds_py-2026.5.1-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:b317c87a13f769a4e787819bd508aaa5d69aa09b0880de9af6d3a8a54571cdec", size = 383558 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e3/f9/2790cb99c136a5363acdeacf5c27c56f3de0d4118a1f48fca83404c99c89/rpds_py-2026.5.1-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ce87129d9f2c14fa6c4a8601fb80eb4488c80d38a20cd13758ef11123e14995d", size = 402789 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e5/1b/e4fb584f8c75d35c38150ff6a332cda949e6f97acba1f4fd123b14ab56fe/rpds_py-2026.5.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9cdddb6c1207d284d94fd1530adf57fbd797fe7c4b8704ba85f49414f2557e7d", size = 551405 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d8/f7/a6731b4216cb3793ea1af5391da240f5683dacc0d13e034fe5fc3503f240/rpds_py-2026.5.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:4e237e139f94d3c036fd28eb9f564c99055476ff4ff05cd42be55ce349b5aa02", size = 616975 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2c/ea/2e051a81d95d8e63f4b35a1c463a87e8766bc3d083c067c5dfb6bf220747/rpds_py-2026.5.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ed0954b524873214369184a9c82b0eaa45a3fbb9a798cd95b17e0d98499e7ea0", size = 578701 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/65/56/b5f6fdb2083e32bca8a8993d89e70db114b4756c9e2c38421328126689d2/rpds_py-2026.5.1-cp314-cp314-win32.whl", hash = "sha256:2d88621d6a7d4dfa633d21abe90f280bb205274e16b1d1e61c6ad4640b2453b7", size = 209806 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fb/80/65a5aa96c155e611d1ed844e4e1f57f3e36b021f396d9f8585d756e6b90d/rpds_py-2026.5.1-cp314-cp314-win_amd64.whl", hash = "sha256:cef8ac28d26f4dda3533060c20fbf80a325458fa9fd23ea72a73cdfa8e978838", size = 225985 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/27/7c/ad185212e87b05f196daef92bc5f3caf07298eb47c295b5585c3dd3093ac/rpds_py-2026.5.1-cp314-cp314-win_arm64.whl", hash = "sha256:eaaea962c68cdc68d4a533ba985ab8e9484277910bbfaa2ab3ef7732667bfed8", size = 221219 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/23/58/e14ae18759020334646b031e708ab4158d653a938822bfb7b95ef2e93aa3/rpds_py-2026.5.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:21942f52dbbd5f8758bf021213d28bd45c39e873e65e2407faf5f1846f5761ad", size = 352148 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/31/9b/5f4a1e2f960bca3ac5d052b139dd31eed97b259f9d909173821760d542e8/rpds_py-2026.5.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f414556f6e3958300ff941e40c9f97e3dc9774ddd1b3434c475d73dd354bbed3", size = 345196 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1a/71/1d9574d6a2fa20ab60eaa55c7467f5aa20cbc770f341a05f09c0876f59e2/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef1013a8625c74043210190b246f5b1551e09757c1f356c6e4160ef96c5bc081", size = 374981 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0c/9a/37e99f4915a80aa71670263c1267f7ae0af95f53a3f61e6c3bdc016d4515/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cc68e231a77a5f0d774ae278a1f8e55c0456501820847c1e4efb3829f3441df6", size = 379961 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a8/ff/6e73f74b89d2e0715e0fc86b7dde893f9a61ae2f9b256ff3bdfe41ac4e94/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9baffb505aff33acc69b422a19f77806680f3c8632227d79f48de8a810d1c2c5", size = 495965 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ea/e0/425faba25f59d74d4638b267f7c7a80e8649d2ef4db10a19b0c4a71e6e6f/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b8d2f912928d426e8cfa396f7f3f8d29a59e6689c86dcca3c420730c1096322b", size = 389526 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c6/76/7a41960e3fddae47fab43a28684d5da981401dffd88253de0944148654cb/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90f628283be835db980c941767d41c9a27b5239e54ba0a9c1335247e82406964", size = 376190 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/27/60/5f38dc70824fc6951b51d35377e577a3a3a4c81a6769cc5a2de25ebe0ad1/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:1ebb2f0ab7e16132995a72de805170e0203df0c3dd22e1ef1cd1fdd90bd7a131", size = 383921 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/60/1a/d60a38caa1505f4b9483c3fbbde12c94e1079154f4f401a6da96f7e77621/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f3df3d16ded76f1f8c9cdebd0e1ea55fdf4c23b812de189814da7cf229c22a81", size = 404766 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/87/ff/602fd3f174d6425f0bce05ad0dfbec0e96b38d0f7d08a79af5aa20083885/rpds_py-2026.5.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:9af8905b8f854990e40d5206aa5ac58d9b0fe0b7f351ff2bb086c20f6c8c6a47", size = 551343 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b8/c1/1be13327acdbead3eca1fde03b6a34dbb011f1e864e217f0d32cc1779a7f/rpds_py-2026.5.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:036a36a87fb1cd3b214d11c4b3c4f7d2ddad933625dca1c900b56a057c07740a", size = 618502 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f3/d7/afb49b49d7f2be8b7ba1a9f0977fa5168003437b93086726f066544e8351/rpds_py-2026.5.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:62ae3853454fe9ef283a03c96c2d835d39e84b14643a9d62c82ef0fb87d702ca", size = 581916 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/25/d1/dbef8c1f8a10f07beb62b5f054e20099fd9924b3ec001b8f0b6ac7813a85/rpds_py-2026.5.1-cp314-cp314t-win32.whl", hash = "sha256:6c3d771a46ec18b12af06ce36243a9a80b07a5d0515236332d90863ca8bb326a", size = 207855 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2a/72/bfa4e61ab8e7dc1c8adf397e05e6cbdd4239357bd72b248d3de662f23915/rpds_py-2026.5.1-cp314-cp314t-win_amd64.whl", hash = "sha256:c93c629be4636cf54337bd5f06c104d55e42ced54d681f6fe21ae510a65116f6", size = 225422 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/27/3a/7b5da92b640f67b6717ccafc83cdd06bfa7ff2395c3685c68922bb54d703/rpds_py-2026.5.1-cp315-cp315-macosx_10_12_x86_64.whl", hash = "sha256:3574b55c604b8f75dacb007136508bbc0db406e626301778096a133327e7f2fb", size = 349576 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d7/8a/2aafd7ad355a1bd48ca76e2262b74b15e6432b5a1efe150efd4d779cd55d/rpds_py-2026.5.1-cp315-cp315-macosx_11_0_arm64.whl", hash = "sha256:94068eb3ae6d43f5a786b7db96a406a34e6d5c24489feef32fd6e8946ea7b291", size = 343640 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f7/7d/6c9523c1abbe840a1b7fba3c516d48e1d3487cc80fea4366c4071cf56784/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a5b10e8ce894825f380a8f1b6444cf73c294dfea62afbb2d13e3a9e630cec1", size = 375322 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5a/5d/0b7b03fb1dc509321f01de3149784ab773e34c8573022029af8076afcb9c/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fc09f82e63d4bcd58149572f857a431bae851dc747e313c3b5bdf7abb907fda8", size = 379066 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d7/e2/8ef6012999ebf1cb1c22f876d9ce5e63d960fd4631d2af3202d3f480aa25/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e10464d17df3b582745c25cec695cb9558bca2cb6ddb631aee1787fc72c767b2", size = 494586 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/80/af/1eeb029bec67582c226b7809172207cd005073af4ebd906e65ff494f4983/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ba05adbf15d994c38ec0b7ab32e858e5110c21e9009a00a86545fd220f84e038", size = 388415 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/18/23/ffbe10711c4d766c1cab0557d6906c074f795814863c67b351355d29354a/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77c004fdc7b891967106f78ddfd7b076bfe6813c6139c6fff6aed3bcaa960b26", size = 372427 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bd/3a/30ba4a6ad457e5b070c18d742a33fb77d8d922b565cc881f8a5313d63bfe/rpds_py-2026.5.1-cp315-cp315-manylinux_2_31_riscv64.whl", hash = "sha256:83bcf894486c9d78dd290d3c0124ff6dd8875d3025e2090a8ec49fcc37c55fdd", size = 383615 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d3/69/62e242b53ce39c0814bd24e1a6e6eba6c92be716277745f317f9540a2e7b/rpds_py-2026.5.1-cp315-cp315-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c3df104083952a0e0c6f10de33e440eabe98fb6317d23e1a58c68f6df08d01b9", size = 402786 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/38/c1/a770b9c186928a1ed0f7e6d7ae50e7f3950ed23e3f9e366dbc8e38cb55de/rpds_py-2026.5.1-cp315-cp315-musllinux_1_2_aarch64.whl", hash = "sha256:980450826cf22e133c57e0835070bdd0dd3f73b9b708c3ce223def2cb9469e14", size = 551583 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/21/7c/68e8579b95375b70d2a963103c42e705856cdb98569258bd807f4423891c/rpds_py-2026.5.1-cp315-cp315-musllinux_1_2_i686.whl", hash = "sha256:205dde846f24332ab0c1188699a043b8d165b79bb84529ce272c45048ff6be01", size = 616941 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/70/a1/a6135aed5730ff03ab957182259987ac11e55fb392a28dc6f0592048a280/rpds_py-2026.5.1-cp315-cp315-musllinux_1_2_x86_64.whl", hash = "sha256:3966b82dd563176396df030f3dd52a6e54cb69b718e95e78bd555ed3d1e0185d", size = 578349 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/09/6e/f24201a76a84e6c49d0bdfdfcb735210e21701e9b21c5bfc0ba497dd62f6/rpds_py-2026.5.1-cp315-cp315-win32.whl", hash = "sha256:7818f8d0a415be74d2be3590b0a1c1f463a642f4d0217e7d10602dceef5b79aa", size = 209922 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9e/e4/966bc240bb0485fc265278f6de44d05834bf0b3618886e0b22e33d54c49a/rpds_py-2026.5.1-cp315-cp315-win_amd64.whl", hash = "sha256:b3cc20c0d800af78fd0fac68086e28c1856cec51ea528bb81ea851aa40d39325", size = 226003 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5c/5c/a15a59269cd5e74472734516c73795c15eccfc841b3d4b0228c3f53f19d0/rpds_py-2026.5.1-cp315-cp315-win_arm64.whl", hash = "sha256:3609e9939a8a76cd904cf98a3f1f13b5dc7e150adeaee89e0ea09652ea213e16", size = 221245 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e0/22/135ce03804e179a71ceb13be095deda4a279bc88f7a6b8fa161c5ad44e12/rpds_py-2026.5.1-cp315-cp315t-macosx_10_12_x86_64.whl", hash = "sha256:5d333a7127d4b307601ac37792bee01bb95c867cbfacf21b6375b804d6bbd723", size = 352015 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3b/5f/f1f6d2652eb9d848f6eb369d8db83a2da6249bb49ad2c2a48f45d54538d3/rpds_py-2026.5.1-cp315-cp315t-macosx_11_0_arm64.whl", hash = "sha256:b5f077b44a4f7808520f66dae234988d867deb9aed9be5da057ce9ba831b2a41", size = 345016 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/88/66/b74182775691ea2290c99e52ac8d5db844e56fbec90ce421f107658c8314/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55d8f9b7b78c9538fc9e04e82ec0e888ff0c3cffcfad152c77e57cd09351a98a", size = 374775 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ff/8f/15e5a61d9f0a43902d36561d4f07cae6ae9f4716be825159fd72717f33af/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e3a8ae58895ac107ed934a6bf51e5846f95c53b9b940c2c6d310838fd5846358", size = 380270 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/02/c3/f859b12763a80540cdf2af0f15b19904cf756a71d7bdd3f82ff3e5b1bbf9/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0957cf3c2b8632ec7aaebffebea8005b353cc2a237b6e2ae3c2cac0820704cfb", size = 495285 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1c/c7/ff27c2ac8411d30b03b1829fd88cae8dad1a4d0da48dd25e57c4038042e6/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c396c1304de421050b3681ea70f371874b54d41b0151e96109758144c231e30b", size = 389581 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6e/67/fe92ee32a6cc05c77228a2f8b1762e7124f386ec20ff83d0757b762d58d0/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aad1bff7f666b9598e573815affd666aac6a13a585dde336f843e33350c7fadc", size = 376041 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f8/91/b4d6685c27aba55bd82f25b278be8237038117d05f9659a6213ad3408130/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_31_riscv64.whl", hash = "sha256:656a042550878f12d45752452d47094b7cfe5ad1e9d7b87b5a22ad3ae5ff8015", size = 383946 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bd/79/2c1d832a53c8e0f8e98fc970ec257b950fecd4f62be2ab7182b500a0cbc8/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:73c4bd4f70294737b5206a3e8e30ccadbf8a60301831c8ea23eec5dbeea1ecfa", size = 405526 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/78/c4/c98117b03c6a8581ab2c2dfccfe9a5ad82bd8128a3c28b46a6ad2d97c393/rpds_py-2026.5.1-cp315-cp315t-musllinux_1_2_aarch64.whl", hash = "sha256:43bca78665423cabae77146f2fe7ce55272b6c8d55d82cca83effd42c7e13972", size = 551165 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3b/c1/bc479ca069200af730881b1bd525e3114b2b391a351509fcb1b772f28086/rpds_py-2026.5.1-cp315-cp315t-musllinux_1_2_i686.whl", hash = "sha256:42d0f20e85e549c870749d0e247f0c10d318a45b7e9676d575d2dcb04a1b2e66", size = 618778 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/77/65/38ab2f90df44c2febfb63cc10ced40763d9b4bc94d173e734528663fe7f5/rpds_py-2026.5.1-cp315-cp315t-musllinux_1_2_x86_64.whl", hash = "sha256:b1be5c35683684d5331b93600c210e8367c254683d8a6df6bd21bd2da3a334fb", size = 581839 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/15/2d/ce1f605fe036aadd460e5822e578c6c7ec3a860936cca37d6e0f299daa77/rpds_py-2026.5.1-cp315-cp315t-win32.whl", hash = "sha256:75808f6c38ce7749bb68cc2770161aae5045e6c6f6781a9782e74b93304399df", size = 207866 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/79/cb/966040123eb102371559746908ef2c9471f4d43e17ec9a645a2258dab64b/rpds_py-2026.5.1-cp315-cp315t-win_amd64.whl", hash = "sha256:90bd6630002a1c7f09e7843dd79f0d24f3d2897cc25a753480917865d14f15b3", size = 225441 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/42/56/3fe0fb34820ff667be791b3a3c22b85e8bcba54e9c832f47438c191fa7be/rpds_py-2026.5.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:edf2765d84e42447f112ad877af8fe1db0089aaec5b28e88d6eab45e7fe99cea", size = 357151 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8b/f2/3eb9ccdb9f143b8c9b003978898cb497f942a324c077401e6b8834238e63/rpds_py-2026.5.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:ad3773236e95f7f33991eb125224b7da66f206504d032a253a02da7e134519fb", size = 350195 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a7/24/dbda232bc4f3ed732120692ab0d2c8402cb020516556d8bee622dcef2413/rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a04df86b3f0fade39ec8fd0e0aab089b1da9fbd2b48df778a57ef96f5e7d38df", size = 381850 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/40/30/32e769839a358f78810c234f160f2cc21d1e4e47e1c0e0e0d535be5a0219/rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6142dbd80c4df62a5d899f0d616d417f84e0bc8d32526c8e5589019d75d028a7", size = 387899 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ab/86/ec84d243aadb3b34b71dd26a010d0930b2d284ff5fc9a69fec53810ee6fd/rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0b35217adefe87f2fe4db7e9766cabe84744bfe9616d9667be18988928c7f2dc", size = 501618 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/74/25/b60e52686bbff777a64f9e4f4d3dd57980dc846913777177a2c92e4937aa/rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b95d5e11fc712b752081183a55a244c03cd00570489edd7014d8899f8ceb8162", size = 394003 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9b/c7/b3a6a588cc2219510ef3f42e207483a93950bedd1e3a0fd4015c95cff9e5/rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:141c9498daf2ace9eda35d2b0e376f9ea8b058d84f2aef4f96fccfd449a2f251", size = 379778 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/31/00/c7dba3fc8a3da8cb3f6db1eb3386be4d79c2e97c6890d20eb9ac66ae8c43/rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:6f249f8b860a200ad35193af961183ebe9132710484e6f6ce0cf89fd83c63a9a", size = 392359 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/93/dd/472ba494c70753f93745992c99855bee0636daf74e6984e5e003f150316f/rpds_py-2026.5.1-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e4abbf391a70be864920858bf360f4fb380577c9a0f732438a1996726e2c195b", size = 412820 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1d/6f/93831a3bfe789542ed0c1d0d74b78b440f055d6dc3ea4640eba2d95e6e23/rpds_py-2026.5.1-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:c74005a7bb87752acf351c93897ec63ad77a07a0da7ecad9c050e32e7286ba34", size = 557243 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1f/ff/0b3d604614ffc77522c6b288fdbce68957eb583da1002aa65ba38ac0ee40/rpds_py-2026.5.1-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:8213afbe8a3a906fb9acb2014423fe3359ee783d0bf90995f70623a3217bfa6c", size = 623541 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ea/ea/e7b0251441da9adfeaebcf29601d10f2a1455fcf0772fae9e7e19032bd96/rpds_py-2026.5.1-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:8c43a8a973270fd173bf48cdf80bbe66312421cba68d40845034f174f2389049", size = 586326 }, -] - -[[package]] -name = "scikit-learn" -version = "1.9.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "joblib" }, - { name = "narwhals" }, - { name = "numpy" }, - { name = "scipy" }, - { name = "threadpoolctl" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fa/6f/37092bdb25f712817231799fc5674d8e704066a8a70c1d2d40517e18b4ab/scikit_learn-1.9.0.tar.gz", hash = "sha256:8833266989d3a5110178a9fae30783675460724d0e1efb13b14901d2c660c557", size = 7750767 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f5/be/e844fd9586e66540a15b71924d17a6cbc1bb749e81ddd0a796bcdba4c055/scikit_learn-1.9.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9db6f4d34e68c8899e4cab27fdf8eafe6ed21f2ba52ceb25ea250cd237f8e47b", size = 8789686 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/42/e2/ff880f62677a17d035817d543cb0fc8727d01eccbee81c5f7fc733a9d856/scikit_learn-1.9.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:f401448645a3e7bc115aa3c094097865155b34bff1cba8101857d9104e99074c", size = 8256782 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/25/64/eb40435e1a508ab1b4e284ce43ae80f6a162e5be5e38ed5a6fab467a9ea4/scikit_learn-1.9.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fd3a8ef0c758555a3b23c03adaa858af32f7736785ded50ad5991f59c4ed03fa", size = 8992419 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8d/da/4810a28e473185429e45a57eebcc91fc991b33d889cc0676063e671db03d/scikit_learn-1.9.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f7e254636164090da847715a27f8e5478feb98c40a9e0ee90cbd277de9e5ceb8", size = 9281411 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3b/67/be3d369f40d8178ba3bd86635d132e08cb5329b023e4669d9426d84bc007/scikit_learn-1.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:5dc1818c77575d149e25fce9ef82dd7b7263ae372f03494158668ad632a69759", size = 8272736 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/37/79/a733f02dc2118da7e77a134b34f39f40201a353311b011d20859d2db3556/scikit_learn-1.9.0-cp311-cp311-win_arm64.whl", hash = "sha256:366652351f092b219c248f1e72821e841960a63d8f358f1dcfd54dc1cbdbbc28", size = 7919564 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ac/20/75f915ff375d6249e6550ac740fdbbd66159a068fd3af1400ff62036b07a/scikit_learn-1.9.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2bd41b0d201bc81575531b96b713d3eb5e5f50fb0b82101ff0f92294fdc236ac", size = 8741122 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cc/d5/2b5148f2279196775e1db2aeb85d14b70ac80e7e32b3b28e7ebeafb0901d/scikit_learn-1.9.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:5be45aa4a42a68a533913a6ed736cf309de2226411c79ef8d609a5456f1939b1", size = 8261512 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a0/ee/5adbc77656b71f9456a2f5a7a9fdb4bcf9207a6b962889f1c2f9323afa4e/scikit_learn-1.9.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e50ed4da51974e86e940690e9a3d82e729b62b5a49f7c9bac534d515d39d86f", size = 8837603 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6c/c2/63fdda36c56437eeb44aaf9493c8bcd62ce230ab1598924fc626ffbfa943/scikit_learn-1.9.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:056c92bb67ad4c28463c2f2653d9701449201e7e7a9e94e321be0f71c4fef2b8", size = 9132097 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/83/a4/c8e67227c680e2259c8864ae72ff48b06e16a6f51253a22167aa02a8aa4e/scikit_learn-1.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:4306775fad04cc4b472a1b15af1ae9cede1540fbfcc17fbce3767cd8dc7ae283", size = 8211173 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cf/fd/3c0863792e98e67e9184aa4029288a175935eb65443afcd30d4f143450cf/scikit_learn-1.9.0-cp312-cp312-win_arm64.whl", hash = "sha256:26e22435f63bcdcf396b574273f29f13dd531f5ea035801f5be10ba1540a4e60", size = 7867451 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3c/01/cf3310626b6d48d3e9be69a1223f9180360b5e6edb045f50fade723ce494/scikit_learn-1.9.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:80746d63bd4b6eaca54d36fe5feaf4d28bb38dc6f9470f81c7cad7c40155f119", size = 8705188 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3e/04/5acd7ae280c5f93b6ac5ef6cdec14eef4c8d1cd91d85b3292989c94d96b1/scikit_learn-1.9.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:5b934c45c252844a91d69fda3a34cff5e7307e1db10d77cb10a3980312c74713", size = 8228299 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0c/39/ffe829a5b8ecb40a518724a997794657fdc354ada5e8fe8e64d998c0bac9/scikit_learn-1.9.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:38c3dcb9a1ffb85505ec53d54c7b4aea0cff70050425a7760c2af661ac85df05", size = 8789690 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1f/88/8dab5de10c638c083772a6be83a3d8106ced492f74a928c8693638e5bb50/scikit_learn-1.9.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da76d09304a4706db7cc1e3ebaa3b6b98a67365cc11d2996c4f1e58ba47df714", size = 9087723 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/20/3f/7917ca72464038f6240ec70c29f94862d08a34a74291ae4d4ec5eb8186a0/scikit_learn-1.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:5808d98f15c6bf6d9d96d2348c1997392a5888ce7097e664105f930c4bca1277", size = 8184330 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/78/c7/15739eb2f61fda3c54639e9942414e5a19ad8a8d1f5a3266afad7cb7df80/scikit_learn-1.9.0-cp313-cp313-win_arm64.whl", hash = "sha256:d77f54c017633791bc0225a43e2f8d03745fdcfe4880268fcc4df15f505dec2e", size = 7840653 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f4/7d/c9a35cf59b20a86fec24d306f1547b78dec194b08d367ce2a3e4854169d9/scikit_learn-1.9.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:9656acd4e93f74e0b66c8a36c88830a99252dfa900044d36bc2212ae89a47162", size = 8713289 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3c/a7/552a7821597c632b907f7bfe8f36f9f572777af8ef8a48353041cf8e091a/scikit_learn-1.9.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:24360002ae845e7866522b0a5bbf690802e7bc388cac8663502e78aa98598aa2", size = 8245141 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7d/79/f4a0c4fe9711154cddabf913471153af79056382ddc612cfe5ee0ff4b72e/scikit_learn-1.9.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5162ad10a418c8a282dde04c9aa06965de3e9a65f33c1440c0ae69bb1a09d913", size = 8847671 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f0/af/4d72d9e475ac83719160c662619e4bf7b95c19507cd582e7d0167a3c3dae/scikit_learn-1.9.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fea2cc5677ab49d6f5bade978c866da44957b712d92e9635e8b4f723013c3cb", size = 9118104 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a2/d5/6a58eea2cb9abbb9b3f2bb8b2cfb3243d1152d69f442d256c7af71304769/scikit_learn-1.9.0-cp314-cp314-win_amd64.whl", hash = "sha256:64fa347efc1c839c487433e40c5144d38c336e8a2b59c81aa8660373945c2673", size = 8290674 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/65/5b/d4c879cf358f1187141cf90ced473f087183489090244f50c124a2ee478b/scikit_learn-1.9.0-cp314-cp314-win_arm64.whl", hash = "sha256:1b944b6db288f6b926e3650026ddafb988929de95d11fc2cc5fa117773c9ba42", size = 7978807 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8a/43/bfae3121ec67ae09150d453c442c7c1cc166e9aefe056e6ab3b7728a5cfc/scikit_learn-1.9.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:4ccacf04ca5f4b492158a5f28afe0ace43f81b2571e4b9a66d34848b46128949", size = 9031941 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/75/b0/20a4546eb17f3b25d3c66df15810411c14ed5065bcfab50b53c96fb627b2/scikit_learn-1.9.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:ee1a8db2c18c08e34c7412d4b10be1cac214cd4ea7dc9715a6a327eb49a37c96", size = 8613528 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/18/3c/e440e039bb82cd19004edaaad00acbde0fb9b461083c3ecf37941c557312/scikit_learn-1.9.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:147e9329ef0e39f75d4cffa02b2aa48d827832684926cd5210d9a2cb5c57246b", size = 8855050 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/43/26/b341b8dab5998da6270a3a42c2152c578501354d36f944b5856757035ef8/scikit_learn-1.9.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5bad8f8b9950321b54c965fdcbac6c6c55e79e16646b49977bcf3668d3870a1a", size = 9097190 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fb/de/b650b4d69b84468cfa2e28a3ff7b8103743029e6446ce1a97fe060ef688c/scikit_learn-1.9.0-cp314-cp314t-win_amd64.whl", hash = "sha256:78fc56eafd4edb9575d2d8950d1dd152061abb573341a1cb7e099fc40f6c6666", size = 8963204 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ee/f3/ff83d76d7418112e5a61326443cdda87be3545dd8d6599c95b2481a4419e/scikit_learn-1.9.0-cp314-cp314t-win_arm64.whl", hash = "sha256:051075bda8b7aab87b1906ab3d4740a1e1224a19d7b3781a576736edc94e76aa", size = 8222661 }, -] - -[[package]] -name = "scipy" -version = "1.17.1" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "numpy" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7a/97/5a3609c4f8d58b039179648e62dd220f89864f56f7357f5d4f45c29eb2cc/scipy-1.17.1.tar.gz", hash = "sha256:95d8e012d8cb8816c226aef832200b1d45109ed4464303e997c5b13122b297c0", size = 30573822 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/df/75/b4ce781849931fef6fd529afa6b63711d5a733065722d0c3e2724af9e40a/scipy-1.17.1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:1f95b894f13729334fb990162e911c9e5dc1ab390c58aa6cbecb389c5b5e28ec", size = 31613675 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f7/58/bccc2861b305abdd1b8663d6130c0b3d7cc22e8d86663edbc8401bfd40d4/scipy-1.17.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:e18f12c6b0bc5a592ed23d3f7b891f68fd7f8241d69b7883769eb5d5dfb52696", size = 28162057 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6d/ee/18146b7757ed4976276b9c9819108adbc73c5aad636e5353e20746b73069/scipy-1.17.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:a3472cfbca0a54177d0faa68f697d8ba4c80bbdc19908c3465556d9f7efce9ee", size = 20334032 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ec/e6/cef1cf3557f0c54954198554a10016b6a03b2ec9e22a4e1df734936bd99c/scipy-1.17.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:766e0dc5a616d026a3a1cffa379af959671729083882f50307e18175797b3dfd", size = 22709533 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4d/60/8804678875fc59362b0fb759ab3ecce1f09c10a735680318ac30da8cd76b/scipy-1.17.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:744b2bf3640d907b79f3fd7874efe432d1cf171ee721243e350f55234b4cec4c", size = 33062057 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/09/7d/af933f0f6e0767995b4e2d705a0665e454d1c19402aa7e895de3951ebb04/scipy-1.17.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43af8d1f3bea642559019edfe64e9b11192a8978efbd1539d7bc2aaa23d92de4", size = 35349300 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b4/3d/7ccbbdcbb54c8fdc20d3b6930137c782a163fa626f0aef920349873421ba/scipy-1.17.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cd96a1898c0a47be4520327e01f874acfd61fb48a9420f8aa9f6483412ffa444", size = 35127333 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e8/19/f926cb11c42b15ba08e3a71e376d816ac08614f769b4f47e06c3580c836a/scipy-1.17.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4eb6c25dd62ee8d5edf68a8e1c171dd71c292fdae95d8aeb3dd7d7de4c364082", size = 37741314 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/95/da/0d1df507cf574b3f224ccc3d45244c9a1d732c81dcb26b1e8a766ae271a8/scipy-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:d30e57c72013c2a4fe441c2fcb8e77b14e152ad48b5464858e07e2ad9fbfceff", size = 36607512 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/68/7f/bdd79ceaad24b671543ffe0ef61ed8e659440eb683b66f033454dcee90eb/scipy-1.17.1-cp311-cp311-win_arm64.whl", hash = "sha256:9ecb4efb1cd6e8c4afea0daa91a87fbddbce1b99d2895d151596716c0b2e859d", size = 24599248 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/35/48/b992b488d6f299dbe3f11a20b24d3dda3d46f1a635ede1c46b5b17a7b163/scipy-1.17.1-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:35c3a56d2ef83efc372eaec584314bd0ef2e2f0d2adb21c55e6ad5b344c0dcb8", size = 31610954 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b2/02/cf107b01494c19dc100f1d0b7ac3cc08666e96ba2d64db7626066cee895e/scipy-1.17.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:fcb310ddb270a06114bb64bbe53c94926b943f5b7f0842194d585c65eb4edd76", size = 28172662 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cf/a9/599c28631bad314d219cf9ffd40e985b24d603fc8a2f4ccc5ae8419a535b/scipy-1.17.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:cc90d2e9c7e5c7f1a482c9875007c095c3194b1cfedca3c2f3291cdc2bc7c086", size = 20344366 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/35/f5/906eda513271c8deb5af284e5ef0206d17a96239af79f9fa0aebfe0e36b4/scipy-1.17.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:c80be5ede8f3f8eded4eff73cc99a25c388ce98e555b17d31da05287015ffa5b", size = 22704017 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/da/34/16f10e3042d2f1d6b66e0428308ab52224b6a23049cb2f5c1756f713815f/scipy-1.17.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e19ebea31758fac5893a2ac360fedd00116cbb7628e650842a6691ba7ca28a21", size = 32927842 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/01/8e/1e35281b8ab6d5d72ebe9911edcdffa3f36b04ed9d51dec6dd140396e220/scipy-1.17.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:02ae3b274fde71c5e92ac4d54bc06c42d80e399fec704383dcd99b301df37458", size = 35235890 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c5/5c/9d7f4c88bea6e0d5a4f1bc0506a53a00e9fcb198de372bfe4d3652cef482/scipy-1.17.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8a604bae87c6195d8b1045eddece0514d041604b14f2727bbc2b3020172045eb", size = 35003557 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/65/94/7698add8f276dbab7a9de9fb6b0e02fc13ee61d51c7c3f85ac28b65e1239/scipy-1.17.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f590cd684941912d10becc07325a3eeb77886fe981415660d9265c4c418d0bea", size = 37625856 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a2/84/dc08d77fbf3d87d3ee27f6a0c6dcce1de5829a64f2eae85a0ecc1f0daa73/scipy-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:41b71f4a3a4cab9d366cd9065b288efc4d4f3c0b37a91a8e0947fb5bd7f31d87", size = 36549682 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bc/98/fe9ae9ffb3b54b62559f52dedaebe204b408db8109a8c66fdd04869e6424/scipy-1.17.1-cp312-cp312-win_arm64.whl", hash = "sha256:f4115102802df98b2b0db3cce5cb9b92572633a1197c77b7553e5203f284a5b3", size = 24547340 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/76/27/07ee1b57b65e92645f219b37148a7e7928b82e2b5dbeccecb4dff7c64f0b/scipy-1.17.1-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:5e3c5c011904115f88a39308379c17f91546f77c1667cea98739fe0fccea804c", size = 31590199 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ec/ae/db19f8ab842e9b724bf5dbb7db29302a91f1e55bc4d04b1025d6d605a2c5/scipy-1.17.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:6fac755ca3d2c3edcb22f479fceaa241704111414831ddd3bc6056e18516892f", size = 28154001 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5b/58/3ce96251560107b381cbd6e8413c483bbb1228a6b919fa8652b0d4090e7f/scipy-1.17.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:7ff200bf9d24f2e4d5dc6ee8c3ac64d739d3a89e2326ba68aaf6c4a2b838fd7d", size = 20325719 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b2/83/15087d945e0e4d48ce2377498abf5ad171ae013232ae31d06f336e64c999/scipy-1.17.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:4b400bdc6f79fa02a4d86640310dde87a21fba0c979efff5248908c6f15fad1b", size = 22683595 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b4/e0/e58fbde4a1a594c8be8114eb4aac1a55bcd6587047efc18a61eb1f5c0d30/scipy-1.17.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2b64ca7d4aee0102a97f3ba22124052b4bd2152522355073580bf4845e2550b6", size = 32896429 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f5/5f/f17563f28ff03c7b6799c50d01d5d856a1d55f2676f537ca8d28c7f627cd/scipy-1.17.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:581b2264fc0aa555f3f435a5944da7504ea3a065d7029ad60e7c3d1ae09c5464", size = 35203952 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8d/a5/9afd17de24f657fdfe4df9a3f1ea049b39aef7c06000c13db1530d81ccca/scipy-1.17.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:beeda3d4ae615106d7094f7e7cef6218392e4465cc95d25f900bebabfded0950", size = 34979063 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8b/13/88b1d2384b424bf7c924f2038c1c409f8d88bb2a8d49d097861dd64a57b2/scipy-1.17.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6609bc224e9568f65064cfa72edc0f24ee6655b47575954ec6339534b2798369", size = 37598449 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/35/e5/d6d0e51fc888f692a35134336866341c08655d92614f492c6860dc45bb2c/scipy-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:37425bc9175607b0268f493d79a292c39f9d001a357bebb6b88fdfaff13f6448", size = 36510943 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2a/fd/3be73c564e2a01e690e19cc618811540ba5354c67c8680dce3281123fb79/scipy-1.17.1-cp313-cp313-win_arm64.whl", hash = "sha256:5cf36e801231b6a2059bf354720274b7558746f3b1a4efb43fcf557ccd484a87", size = 24545621 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6f/6b/17787db8b8114933a66f9dcc479a8272e4b4da75fe03b0c282f7b0ade8cd/scipy-1.17.1-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:d59c30000a16d8edc7e64152e30220bfbd724c9bbb08368c054e24c651314f0a", size = 31936708 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/38/2e/524405c2b6392765ab1e2b722a41d5da33dc5c7b7278184a8ad29b6cb206/scipy-1.17.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:010f4333c96c9bb1a4516269e33cb5917b08ef2166d5556ca2fd9f082a9e6ea0", size = 28570135 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/c3/5bd7199f4ea8556c0c8e39f04ccb014ac37d1468e6cfa6a95c6b3562b76e/scipy-1.17.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:2ceb2d3e01c5f1d83c4189737a42d9cb2fc38a6eeed225e7515eef71ad301dce", size = 20741977 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d9/b8/8ccd9b766ad14c78386599708eb745f6b44f08400a5fd0ade7cf89b6fc93/scipy-1.17.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:844e165636711ef41f80b4103ed234181646b98a53c8f05da12ca5ca289134f6", size = 23029601 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6d/a0/3cb6f4d2fb3e17428ad2880333cac878909ad1a89f678527b5328b93c1d4/scipy-1.17.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:158dd96d2207e21c966063e1635b1063cd7787b627b6f07305315dd73d9c679e", size = 33019667 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f3/c3/2d834a5ac7bf3a0c806ad1508efc02dda3c8c61472a56132d7894c312dea/scipy-1.17.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74cbb80d93260fe2ffa334efa24cb8f2f0f622a9b9febf8b483c0b865bfb3475", size = 35264159 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4d/77/d3ed4becfdbd217c52062fafe35a72388d1bd82c2d0ba5ca19d6fcc93e11/scipy-1.17.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:dbc12c9f3d185f5c737d801da555fb74b3dcfa1a50b66a1a93e09190f41fab50", size = 35102771 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bd/12/d19da97efde68ca1ee5538bb261d5d2c062f0c055575128f11a2730e3ac1/scipy-1.17.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:94055a11dfebe37c656e70317e1996dc197e1a15bbcc351bcdd4610e128fe1ca", size = 37665910 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/06/1c/1172a88d507a4baaf72c5a09bb6c018fe2ae0ab622e5830b703a46cc9e44/scipy-1.17.1-cp313-cp313t-win_amd64.whl", hash = "sha256:e30bdeaa5deed6bc27b4cc490823cd0347d7dae09119b8803ae576ea0ce52e4c", size = 36562980 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/70/b0/eb757336e5a76dfa7911f63252e3b7d1de00935d7705cf772db5b45ec238/scipy-1.17.1-cp313-cp313t-win_arm64.whl", hash = "sha256:a720477885a9d2411f94a93d16f9d89bad0f28ca23c3f8daa521e2dcc3f44d49", size = 24856543 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cf/83/333afb452af6f0fd70414dc04f898647ee1423979ce02efa75c3b0f2c28e/scipy-1.17.1-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:a48a72c77a310327f6a3a920092fa2b8fd03d7deaa60f093038f22d98e096717", size = 31584510 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ed/a6/d05a85fd51daeb2e4ea71d102f15b34fedca8e931af02594193ae4fd25f7/scipy-1.17.1-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:45abad819184f07240d8a696117a7aacd39787af9e0b719d00285549ed19a1e9", size = 28170131 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/db/7b/8624a203326675d7746a254083a187398090a179335b2e4a20e2ddc46e83/scipy-1.17.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:3fd1fcdab3ea951b610dc4cef356d416d5802991e7e32b5254828d342f7b7e0b", size = 20342032 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c9/35/2c342897c00775d688d8ff3987aced3426858fd89d5a0e26e020b660b301/scipy-1.17.1-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:7bdf2da170b67fdf10bca777614b1c7d96ae3ca5794fd9587dce41eb2966e866", size = 22678766 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ef/f2/7cdb8eb308a1a6ae1e19f945913c82c23c0c442a462a46480ce487fdc0ac/scipy-1.17.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:adb2642e060a6549c343603a3851ba76ef0b74cc8c079a9a58121c7ec9fe2350", size = 32957007 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0b/2e/7eea398450457ecb54e18e9d10110993fa65561c4f3add5e8eccd2b9cd41/scipy-1.17.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:eee2cfda04c00a857206a4330f0c5e3e56535494e30ca445eb19ec624ae75118", size = 35221333 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d9/77/5b8509d03b77f093a0d52e606d3c4f79e8b06d1d38c441dacb1e26cacf46/scipy-1.17.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d2650c1fb97e184d12d8ba010493ee7b322864f7d3d00d3f9bb97d9c21de4068", size = 35042066 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f9/df/18f80fb99df40b4070328d5ae5c596f2f00fffb50167e31439e932f29e7d/scipy-1.17.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:08b900519463543aa604a06bec02461558a6e1cef8fdbb8098f77a48a83c8118", size = 37612763 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4b/39/f0e8ea762a764a9dc52aa7dabcfad51a354819de1f0d4652b6a1122424d6/scipy-1.17.1-cp314-cp314-win_amd64.whl", hash = "sha256:3877ac408e14da24a6196de0ddcace62092bfc12a83823e92e49e40747e52c19", size = 37290984 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7c/56/fe201e3b0f93d1a8bcf75d3379affd228a63d7e2d80ab45467a74b494947/scipy-1.17.1-cp314-cp314-win_arm64.whl", hash = "sha256:f8885db0bc2bffa59d5c1b72fad7a6a92d3e80e7257f967dd81abb553a90d293", size = 25192877 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/96/ad/f8c414e121f82e02d76f310f16db9899c4fcde36710329502a6b2a3c0392/scipy-1.17.1-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:1cc682cea2ae55524432f3cdff9e9a3be743d52a7443d0cba9017c23c87ae2f6", size = 31949750 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7c/b0/c741e8865d61b67c81e255f4f0a832846c064e426636cd7de84e74d209be/scipy-1.17.1-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:2040ad4d1795a0ae89bfc7e8429677f365d45aa9fd5e4587cf1ea737f927b4a1", size = 28585858 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ed/1b/3985219c6177866628fa7c2595bfd23f193ceebbe472c98a08824b9466ff/scipy-1.17.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:131f5aaea57602008f9822e2115029b55d4b5f7c070287699fe45c661d051e39", size = 20757723 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c0/19/2a04aa25050d656d6f7b9e7b685cc83d6957fb101665bfd9369ca6534563/scipy-1.17.1-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:9cdc1a2fcfd5c52cfb3045feb399f7b3ce822abdde3a193a6b9a60b3cb5854ca", size = 23043098 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/86/f1/3383beb9b5d0dbddd030335bf8a8b32d4317185efe495374f134d8be6cce/scipy-1.17.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e3dcd57ab780c741fde8dc68619de988b966db759a3c3152e8e9142c26295ad", size = 33030397 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/41/68/8f21e8a65a5a03f25a79165ec9d2b28c00e66dc80546cf5eb803aeeff35b/scipy-1.17.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a9956e4d4f4a301ebf6cde39850333a6b6110799d470dbbb1e25326ac447f52a", size = 35281163 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/84/8d/c8a5e19479554007a5632ed7529e665c315ae7492b4f946b0deb39870e39/scipy-1.17.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:a4328d245944d09fd639771de275701ccadf5f781ba0ff092ad141e017eccda4", size = 35116291 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/52/52/e57eceff0e342a1f50e274264ed47497b59e6a4e3118808ee58ddda7b74a/scipy-1.17.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a77cbd07b940d326d39a1d1b37817e2ee4d79cb30e7338f3d0cddffae70fcaa2", size = 37682317 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/11/2f/b29eafe4a3fbc3d6de9662b36e028d5f039e72d345e05c250e121a230dd4/scipy-1.17.1-cp314-cp314t-win_amd64.whl", hash = "sha256:eb092099205ef62cd1782b006658db09e2fed75bffcae7cc0d44052d8aa0f484", size = 37345327 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/07/39/338d9219c4e87f3e708f18857ecd24d22a0c3094752393319553096b98af/scipy-1.17.1-cp314-cp314t-win_arm64.whl", hash = "sha256:200e1050faffacc162be6a486a984a0497866ec54149a01270adc8a59b7c7d21", size = 25489165 }, -] - -[[package]] -name = "seekalpha" -version = "0.1.0" -source = { editable = "." } -dependencies = [ - { name = "joblib" }, - { name = "numba" }, - { name = "numpy" }, - { name = "pandas" }, - { name = "pyarrow" }, - { name = "pyparsing" }, - { name = "python-dotenv" }, - { name = "pyyaml" }, - { name = "tushare" }, -] - -[package.optional-dependencies] -dev = [ - { name = "pytest" }, -] -mining = [ - { name = "agentscope" }, - { name = "openai" }, -] -model = [ - { name = "lightgbm" }, - { name = "scikit-learn" }, -] - -[package.dev-dependencies] -dev = [ - { name = "pytest" }, -] - -[package.metadata] -requires-dist = [ - { name = "agentscope", marker = "extra == 'mining'", specifier = ">=2.0.0" }, - { name = "joblib", specifier = ">=1.4" }, - { name = "lightgbm", marker = "extra == 'model'", specifier = ">=4.0" }, - { name = "numba", specifier = ">=0.59" }, - { name = "numpy", specifier = ">=1.24,<2" }, - { name = "openai", marker = "extra == 'mining'", specifier = ">=1.0" }, - { name = "pandas", specifier = ">=2.0,<3" }, - { name = "pyarrow", specifier = ">=14" }, - { name = "pyparsing", specifier = ">=3.2" }, - { name = "pytest", marker = "extra == 'dev'", specifier = ">=8" }, - { name = "python-dotenv", specifier = ">=1.0" }, - { name = "pyyaml", specifier = ">=6.0" }, - { name = "scikit-learn", marker = "extra == 'model'", specifier = ">=1.4" }, - { name = "tushare", specifier = ">=1.4" }, -] -provides-extras = ["dev", "model", "mining", "qmt"] - -[package.metadata.requires-dev] -dev = [{ name = "pytest", specifier = ">=8" }] - -[[package]] -name = "shellingham" -version = "1.5.4" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755 }, -] - -[[package]] -name = "shortuuid" -version = "1.0.13" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8c/e2/bcf761f3bff95856203f9559baf3741c416071dd200c0fc19fad7f078f86/shortuuid-1.0.13.tar.gz", hash = "sha256:3bb9cf07f606260584b1df46399c0b87dd84773e7b25912b7e391e30797c5e72", size = 9662 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c0/44/21d6bf170bf40b41396480d8d49ad640bca3f2b02139cd52aa1e272830a5/shortuuid-1.0.13-py3-none-any.whl", hash = "sha256:a482a497300b49b4953e15108a7913244e1bb0d41f9d332f5e9925dba33a3c5a", size = 10529 }, -] - -[[package]] -name = "simple-websocket" -version = "1.1.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "wsproto" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b0/d4/bfa032f961103eba93de583b161f0e6a5b63cebb8f2c7d0c6e6efe1e3d2e/simple_websocket-1.1.0.tar.gz", hash = "sha256:7939234e7aa067c534abdab3a9ed933ec9ce4691b0713c78acb195560aa52ae4", size = 17300 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/52/59/0782e51887ac6b07ffd1570e0364cf901ebc36345fea669969d2084baebb/simple_websocket-1.1.0-py3-none-any.whl", hash = "sha256:4af6069630a38ed6c561010f0e11a5bc0d4ca569b36306eb257cd9a192497c8c", size = 13842 }, -] - -[[package]] -name = "simplejson" -version = "4.1.1" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0e/2a/54837395a3487c725669428d513293612a48d82b95a0642c936932e5d898/simplejson-4.1.1.tar.gz", hash = "sha256:c08eb9f7a90f77ae470e19a07472e9a79ebc0d1c2315d86a72767665bd5ba79f", size = 118860 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1e/25/39013ffe279d90093ec1c848565b3683c586906c10fa55d9000ec29d046b/simplejson-4.1.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2867c64d92abd1992c15666fae198203093f593e43d6b81adf176bae530d493a", size = 111538 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f2/ae/2c272971c8a87e2539c54a98eb6ff037bee1e2e93943c3986cf7500a4f3a/simplejson-4.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c47c46e16c8ea9e4850061e6ed5aa2b9cd2074cb2274bfd9c138cba15ce7453", size = 90594 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4e/a2/6eebfb99dedc139f549200f61ade6d1890ac5707c5d427bdfa6fe39c9313/simplejson-4.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e294e33dbf316a9bbdd4030d46503c9b0f19470ae7ad6af5bae6c426bc2e869f", size = 90718 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/80/7e/c9e6c0c4ad8415e64dad0c47f619b556b02680a41631b4dbc281d55dc54d/simplejson-4.1.1-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7ce252b28fddbdd83db5bd7d93dad2a8a591d7ada098afec9c1b23d6b722a7a4", size = 180901 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/34/09/69e331e3994b1ed9be6ce9ace4ade704e7ed503edf869929ca7bb404eda8/simplejson-4.1.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4c44ef6b02a4eb67ed17a72342341792149b3ff46f15426c26e970e49addf327", size = 178133 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5d/40/ed806f24afef295c1032448f5ff6f6f2979392d5645ddb9f4fed7f38194d/simplejson-4.1.1-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:82bfca2b85a34178c25829c703f0a9e9f113a5af7539285bd3efb583a0bf1ba3", size = 188155 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/38/94/8d6f515b827b0f7881a49c8c1ac6920b7ae9428939ef04238c973278b42a/simplejson-4.1.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0e4b23f71dd781f8830f1663dc01a4944d3dbf87a1f93d78fba1cf64722d0ccf", size = 176225 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c9/fd/6dffb4956563d48bbe46b91ff341adae34920e94008fd6b8d728072abfc7/simplejson-4.1.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:82fee635d7b73ad801030b05a75fbd34a098da0c2ecf600667a03636d09e1e42", size = 185535 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/de/d2/a509ee37763e79aec75d68f8521db1440306edeba3b8b4064ab4ee8bf1d9/simplejson-4.1.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:68e62eda21192c5ea9bb92d571ca46a4477fef48762f50d433de2b4253051551", size = 179302 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d8/23/5b343bfd2a79d3b6818e4db3586c405a001a090d4c89d336e31273ce7177/simplejson-4.1.1-cp311-cp311-win32.whl", hash = "sha256:ffd3d82294b47f5ec64050021ace95fd62628a0c1cc8bbf4d06d2d1fb697e055", size = 88408 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/38/04/df9b37aedbd524dca20840d25ebe01d6ae486b89792aeff5d15b9c4114f7/simplejson-4.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:78a3fe0995be42bed62a26aa78e0e0b4d87c6545785346b9cc898f3389569a35", size = 90526 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/60/25/e90998fe8e480eb43b966c09e835379887d427567ebd496563d3b1e16b19/simplejson-4.1.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:19040a17154dc03d289bab68d73ce0a6a0be01de30c584bbdd93490bead14b22", size = 112414 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9c/a0/abd4785f36c3400f1fbb21f517be39295a750a714f04b7ee175adf6ef580/simplejson-4.1.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a94ebaecdbaa80d9551a3ec6bf0c9302fc8b53ab6c1b2bfd498a1df4cb28158d", size = 91120 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b8/78/fc060d2e3b13c6ec59288574b8efac64075e316b2afba4396a56b2422f78/simplejson-4.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:67341c95c0a168ab4a6d1e807e50463f1c8da932c3286d81e201266c427061fa", size = 91055 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0c/b6/156a8de1e1b47694f0e7de6675866936608d45dc68388fd017d36f8693be/simplejson-4.1.1-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:45ec18e337fec538b7e902d489505c450b2454653d1290f3f50385e6fd8aa607", size = 190297 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/86/1c/e4d0eab695be3eb21d0f46bce820752031f03e7113f9c80a9b3c73ee7157/simplejson-4.1.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:820c69a4710400e9b248d5670647d60be58824369282d3925e516b3ff1a7cd82", size = 187002 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/76/0e/7f5a59d29426b062d5928fb88b403c3f797129d53be7102f955dbe51aa44/simplejson-4.1.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2e708d373a10e4378ef2d59f8361850c7150fd907ed49efe49bc5492160476d1", size = 195146 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/78/18/9943db224dd4d5fa3c090c3e56a94c37b254338c83995ec5680285111c40/simplejson-4.1.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:980fc33353f81fd12d8c49d44f8c2760d1dc8192285e627c5180d141035b228a", size = 183931 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c2/08/9a690da9a766161c06c627d805362cf159f1abe480969372b2897649b955/simplejson-4.1.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:de2ed102fff88dacf543699f53ee3a533cc11539a39baa176b7e09dd783069d6", size = 192228 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/05/88/bd8aad36b451ffb0e0a3f721d695a88befa6d1ac7d1e02ae788ca7ff4029/simplejson-4.1.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2785ff8edc0e28bf773a32543a6bbed46351453c997b3f6709c744e3c2f7eabb", size = 187808 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/04/ee/14f91db0d1f481533b651dafbf8cd0da088d9817f7af30c68f7f19f9c847/simplejson-4.1.1-cp312-cp312-win32.whl", hash = "sha256:2e0d5ead6d14610467ec356ec1f6b5d8a56aa216abaad8d41c8b873b16cf313f", size = 88512 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b9/c4/90de06b2d8737c68c05ff9274113f854dbf6a5f28b7a955212111672cb57/simplejson-4.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:63a5451f557d6be48a231bae932458655c620902b868170b2f1c8afed496f6b4", size = 90748 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/37/a9/47b445eeb559c9593453a0648e0fd6d08e8adff64dd5e5ced66726da8a09/simplejson-4.1.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:dff52fc7af272e84fc21cc5a06c927c823ca6ae00af14f3b0d7707b42775ed98", size = 113160 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4c/65/cb72db31523c164dea5dc55b02dad065a40c478856bc7534b279d2b51906/simplejson-4.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:971aed0647ad6e840a3943bec812fcda5f2d26a5497a4981d1fb49aa4f9a396c", size = 91521 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9a/e5/54cb7c50ad5fdc1e0a86b7df4b135c2cbd5c4623605aa94466659098e8da/simplejson-4.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:249e2e220aa6d9b9d936bde84eb7bf79d5b6c5a8273c6e411f8b1635a9073f2d", size = 91407 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/38/2e/21a3ede87f0bf82d6c7bcb90480d50a6490eb974c6ab20881188e440957c/simplejson-4.1.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8e5cdd6a5d52299f345c15ab5678cc4249e24f383f361d986afbc3c7072a6b6b", size = 192451 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/59/df/9903edd3102bf0b5984edfcb90c88612330996efa3b4fbf8a971d6e17839/simplejson-4.1.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:642cec364e0676e2d5a73fa4d31d0c7c55886997caa2fde24e8292ca44d32728", size = 189015 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/98/cd/33230927a780e1398b857e3944abb914556994d252b1d765ae40d112cb25/simplejson-4.1.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:76fe296ca1df23d290033f10aaacf534fd1b3e3007e7f9ff8aa68b21413aaa78", size = 196658 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cd/84/2c5a7444eb53e9a86d3738299bffddd9f53aeed799ded2f45368221fdb19/simplejson-4.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8f0ad25b7dc4e0fb23858355819f2e994f1a5badcdcde8737eac7921c2f1ed2a", size = 185967 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d3/68/454378e06d059cd412a7ed5d87fb6d29fd5b60f13a4d89fc1f764ff434df/simplejson-4.1.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:a59ebd0533f03fd06ff0c42ba0f02d93cbcdd7944922bf3b93911327a95b901f", size = 193940 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d5/d5/a15bf915f623a2c5a079d6e3be8256fdb8ef06f110669493a09b9d6933e0/simplejson-4.1.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:bccbf4419676b517939852e5aeff2af6aee4dc046881c67a1581fa6f1cb01abd", size = 189795 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d2/c9/37212ae7dc4b607f0978c408e8633f05c810884e054c33113184c6c2c8a2/simplejson-4.1.1-cp313-cp313-win32.whl", hash = "sha256:6c845363eb5fd166fb7c72243da38f4fcfde666ede7fdf2cc6fd7762894626f7", size = 88773 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fe/a5/c7a0a47883a9015b54c9d8a4b62f2aba17bd4335b1787b9b8a0fc2fa6d52/simplejson-4.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:104d8324c34f25b4b90800bc5fa363780cbc3d8496aef061cba7ce1af9162270", size = 90888 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d3/18/4a118a6a92eb33bb08c8e2fe7ec85cb96f0673491bb2b829930831ee4fbe/simplejson-4.1.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:ed7473602b6625de793b6acba49aa949f144a475f538792067e4cf2fda2071f5", size = 110492 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/07/f4/84d160e9fa8cada1e0a9381cae4fa81eecd573577a5b34366d8ced59bdf7/simplejson-4.1.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:225c9caa324c5b554d009fb9cac22aee7711e71bd96f487938c659af467e828e", size = 90152 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/68/31/9a5432c433a7671107182cdc9a20ea78a70f99c4e5334aa54b6d4d0d79ed/simplejson-4.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:95407269340c7f22f09776ea7b717a52cf56cfcf119b5e45f66faa4a26445bea", size = 90115 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/78/91/3635cdb13318cb0a328abaa69e2b91251caad39d6779aa308098f341f6cb/simplejson-4.1.1-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:3851658d642c1184d2023f0e6c9ce44a21eb1629e74e7c84ef956b128841fe12", size = 184036 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fa/ba/149b6ec5393f6849d98c59cadba888b710a8ef4b805ab91e11a566960d40/simplejson-4.1.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:95a3bb0f78e85f4937f99092239f2011ce06f0f2d803df5c299cc05abbeae008", size = 180543 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/df/7c/a5d968d0b527a748b667e62bea94309ccbcb1e2b108e8f0cf8547efaa12b/simplejson-4.1.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:bbfdaa7c0603f75b7b14b211b7f2be44696d4e26833ad2d91d5c87bf5fb9a920", size = 188725 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/db/e3/6a8d11181d587ef00e2db9112357e6832111e56dd56b01b5c11758a1965d/simplejson-4.1.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:39e3c584071dced8c21b4689f0254303521daeb9b5bc1f4289755d71fa3cb0d3", size = 177492 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/67/e3/8b0eb8b06e8198cfbd1270487da163d0093df05cc4f557350cd65e2f7e79/simplejson-4.1.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:036a27bd0469b9d79557cbddb392969f876cd7f278cfbd0fba81534927a06575", size = 185281 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/dc/5f/64990f07ec9e2cb1a814c674e2e21b5693207f74ac70eb72151b847ea4e6/simplejson-4.1.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b70bfd2f67f3351baba08aa3ae9233c83f21fd95ae5e6b3d0ecb8c647929112f", size = 181848 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/61/a5/bbc1bc0447f339f79f99ab8c37f7f037cb2f1f93af75d6a4d553096bb0c3/simplejson-4.1.1-cp314-cp314-win32.whl", hash = "sha256:37233c72ce88d06acb92747347742b3c07871eba6789f060c179c9302dde8efe", size = 88761 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/18/72/ec1b5cbdcb140c132e6c7bdf99bd73e4f675439e77126c88f472fcffa09c/simplejson-4.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:cc0442dea71cd9cbf30a0b8b9929ab5aa6c02c0443a3d977351e6ec5bada4388", size = 91018 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3d/97/4fa437f68ff72219bac3bf3d050de9c6265691f3a170e16954bd69d7cddd/simplejson-4.1.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:c996a4d38290c515af347740659ce095b425449c164a5c9fa3977caa6eff5dbe", size = 113919 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c2/83/59de041d09eb4a9577f7015d7263c32095dfb7fde49717dff62145d89809/simplejson-4.1.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:c65c763fb20d7ca113c1c14dce2fc04a0fc3a57aceff533d6fdac707c7bffb40", size = 91904 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/03/8e/46bb345d540f6eb31427d984a4e518cdb182d0621814fee4fee045e8815b/simplejson-4.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0da5c9f57206ee7ef280ff7f1d924937b0a64f9a271a5ef371a2ecdbebba7421", size = 91752 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/83/e2/1b2ce97f068835eb3d253c116a4df7a3f436b7bf2fb5ff1ba29287e8b0ec/simplejson-4.1.1-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ea3426e786425d10e9e82f8a6eda74a7d6eb10d99165ac3d0d3bbcb65c0ea343", size = 214021 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/48/70/d93e556df6a0786298644a7c08304fcbeddc248325f23f38acbebeb21165/simplejson-4.1.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d75cea7a1025edd7e439b2966b3d977c45b5b899e2adaf422811b3ac702ed9fb", size = 213530 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1b/a5/c93bf305b9f00d7259e09e713d60e75bd0f7f53da970f716ab90491770e7/simplejson-4.1.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:63c2ada8e58f266491f19eed2eeeb7c25c6141e52f8f9e820f6bb94156cf8dbc", size = 218282 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0c/20/a9b5d2e27ec44b069ee251bd55544fc76929a067107b1050001566ba86f3/simplejson-4.1.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d1fffb56305c5b475ee746cf9e04f97423ba5aaacd292dc1255bd75b1d3b124b", size = 209249 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/97/e4/e06ee682ed5df67592181f5ecb062e35878967e27f5b6e087237d4548d95/simplejson-4.1.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:a6525ec733f43d0541206cffa64fd2aad5a7ae3eb76566aff49cd4db6382209a", size = 213963 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9c/9f/1e160e4cd8cdbf062bf6a454cdf814dc7a48eb47e566fdb8f80ccb202605/simplejson-4.1.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:861e393260508efa64d8805a8e49c416c3484907e3f146ce966c69552b49b9a3", size = 210474 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7a/e6/cecd913df322df5bbe7ebb8ba39e0708e505a165553900da8a7761026d6f/simplejson-4.1.1-cp314-cp314t-win32.whl", hash = "sha256:d083b89d30948a751d3d97476c2ed91e4caaa24a1a1459bdbadb8876242c71fe", size = 91134 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/97/73/f540dde99cc1d393bd062ab3b5735b777561a5d8f8a5f2e241164444d77a/simplejson-4.1.1-cp314-cp314t-win_amd64.whl", hash = "sha256:4cbb299d0528ec0447fe366d8c9641860e28f997a62730690fef905f1f41046e", size = 94467 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ce/6a/8b74c52ffd33dbbde00fe7251fee6a0acdc8cea33f7a43805aed258fb79b/simplejson-4.1.1-py3-none-any.whl", hash = "sha256:2ce92b3748f02423e26d2bfb636fb9d7a8f67c8f5854dcae69d350d123b2eee2", size = 69195 }, -] - -[[package]] -name = "six" -version = "1.17.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050 }, -] - -[[package]] -name = "sniffio" -version = "1.3.1" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 }, -] - -[[package]] -name = "soupsieve" -version = "2.8.4" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/47/2c/0a5f6f8ee0d5589e48c7640213ed5175d52cf540a06725b628cc1a45d6ce/soupsieve-2.8.4.tar.gz", hash = "sha256:e121fd02e975c695e4e9e8774a5ee35d74714b59307868dcc5319ad2d9e3328e", size = 121110 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5e/f5/0c41cb68dcae6b7de4fac4188a3a9589e21fb31df21ea3a2e888db95e6c9/soupsieve-2.8.4-py3-none-any.whl", hash = "sha256:e7e6b0769c8f51ed59acab6e994b00621096cfb1c640a7509295987388fbaf65", size = 37304 }, -] - -[[package]] -name = "sse-starlette" -version = "3.4.5" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "anyio" }, - { name = "starlette" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d2/1b/bc9e3e7a72dcdad7dc7888758f5d00f56f8909ed5cfdff822bd72bb4c520/sse_starlette-3.4.5.tar.gz", hash = "sha256:83072538bc211a2f68b7b0422226c4af3e9b62e106e07034664b832ca019842a", size = 35249 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/78/75/c88d3f5dafd59c791da1ce27650d30bf5b70cbf1cbf01cd00e5f9e360915/sse_starlette-3.4.5-py3-none-any.whl", hash = "sha256:e71bad53323f65573c3864a6c3bd0c1eb6e5f092b2e48082b0c35927d19ca296", size = 16518 }, -] - -[[package]] -name = "starlette" -version = "1.3.1" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "anyio" }, - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/eb/e3/7c1dc7381d9f8ab7d854328ebfa884e62cb3f3d8549ddfd37c7814f42afa/starlette-1.3.1.tar.gz", hash = "sha256:05d0213193f2fbaae60e2ecb593b4add4262ad4e46536b54abe36f11a71724e0", size = 2703240 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ec/bb/2799cc2ede3ed41131f8975621e7213dfc7ef4acbbaadfa440f32500c370/starlette-1.3.1-py3-none-any.whl", hash = "sha256:c7372aae11c3c3f26a42df7bd626cec2f47d03483d261d369516a615a53714c6", size = 73632 }, -] - -[[package]] -name = "threadpoolctl" -version = "3.6.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b7/4d/08c89e34946fce2aec4fbb45c9016efd5f4d7f24af8e5d93296e935631d8/threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e", size = 21274 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638 }, -] - -[[package]] -name = "tqdm" -version = "4.68.3" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/87/d7/0535a28b1f5f24f6612fb3ff1e89fb1a8d160fee0f976e0aa6803862134b/tqdm-4.68.3.tar.gz", hash = "sha256:00dfa48452b6b6cfae3dd9885636c23d3422d1ec97c66d96818cbd5e0821d482", size = 170596 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d8/8e/bb97bb0c71802080bfc8952937d174e49cfc50de5c951dd47b2496f0dcdb/tqdm-4.68.3-py3-none-any.whl", hash = "sha256:39832cc2def2789a6f29df83f172db7416cea70052c0907a57801c5f2fdccb03", size = 78337 }, -] - -[[package]] -name = "tree-sitter" -version = "0.25.2" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/66/7c/0350cfc47faadc0d3cf7d8237a4e34032b3014ddf4a12ded9933e1648b55/tree-sitter-0.25.2.tar.gz", hash = "sha256:fe43c158555da46723b28b52e058ad444195afd1db3ca7720c59a254544e9c20", size = 177961 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7c/22/88a1e00b906d26fa8a075dd19c6c3116997cb884bf1b3c023deb065a344d/tree_sitter-0.25.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b8ca72d841215b6573ed0655b3a5cd1133f9b69a6fa561aecad40dca9029d75b", size = 146752 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/57/1c/22cc14f3910017b7a76d7358df5cd315a84fe0c7f6f7b443b49db2e2790d/tree_sitter-0.25.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cc0351cfe5022cec5a77645f647f92a936b38850346ed3f6d6babfbeeeca4d26", size = 137765 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1c/0c/d0de46ded7d5b34631e0f630d9866dab22d3183195bf0f3b81de406d6622/tree_sitter-0.25.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1799609636c0193e16c38f366bda5af15b1ce476df79ddaae7dd274df9e44266", size = 604643 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/34/38/b735a58c1c2f60a168a678ca27b4c1a9df725d0bf2d1a8a1c571c033111e/tree_sitter-0.25.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3e65ae456ad0d210ee71a89ee112ac7e72e6c2e5aac1b95846ecc7afa68a194c", size = 632229 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/32/f6/cda1e1e6cbff5e28d8433578e2556d7ba0b0209d95a796128155b97e7693/tree_sitter-0.25.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:49ee3c348caa459244ec437ccc7ff3831f35977d143f65311572b8ba0a5f265f", size = 629861 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f9/19/427e5943b276a0dd74c2a1f1d7a7393443f13d1ee47dedb3f8127903c080/tree_sitter-0.25.2-cp311-cp311-win_amd64.whl", hash = "sha256:56ac6602c7d09c2c507c55e58dc7026b8988e0475bd0002f8a386cce5e8e8adc", size = 127304 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/eb/d9/eef856dc15f784d85d1397a17f3ee0f82df7778efce9e1961203abfe376a/tree_sitter-0.25.2-cp311-cp311-win_arm64.whl", hash = "sha256:b3d11a3a3ac89bb8a2543d75597f905a9926f9c806f40fcca8242922d1cc6ad5", size = 113990 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3c/9e/20c2a00a862f1c2897a436b17edb774e831b22218083b459d0d081c9db33/tree_sitter-0.25.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ddabfff809ffc983fc9963455ba1cecc90295803e06e140a4c83e94c1fa3d960", size = 146941 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ef/04/8512e2062e652a1016e840ce36ba1cc33258b0dcc4e500d8089b4054afec/tree_sitter-0.25.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c0c0ab5f94938a23fe81928a21cc0fac44143133ccc4eb7eeb1b92f84748331c", size = 137699 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/47/8a/d48c0414db19307b0fb3bb10d76a3a0cbe275bb293f145ee7fba2abd668e/tree_sitter-0.25.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dd12d80d91d4114ca097626eb82714618dcdfacd6a5e0955216c6485c350ef99", size = 607125 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/39/d1/b95f545e9fc5001b8a78636ef942a4e4e536580caa6a99e73dd0a02e87aa/tree_sitter-0.25.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b43a9e4c89d4d0839de27cd4d6902d33396de700e9ff4c5ab7631f277a85ead9", size = 635418 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/de/4d/b734bde3fb6f3513a010fa91f1f2875442cdc0382d6a949005cd84563d8f/tree_sitter-0.25.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fbb1706407c0e451c4f8cc016fec27d72d4b211fdd3173320b1ada7a6c74c3ac", size = 631250 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/46/f2/5f654994f36d10c64d50a192239599fcae46677491c8dd53e7579c35a3e3/tree_sitter-0.25.2-cp312-cp312-win_amd64.whl", hash = "sha256:6d0302550bbe4620a5dc7649517c4409d74ef18558276ce758419cf09e578897", size = 127156 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/67/23/148c468d410efcf0a9535272d81c258d840c27b34781d625f1f627e2e27d/tree_sitter-0.25.2-cp312-cp312-win_arm64.whl", hash = "sha256:0c8b6682cac77e37cfe5cf7ec388844957f48b7bd8d6321d0ca2d852994e10d5", size = 113984 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8c/67/67492014ce32729b63d7ef318a19f9cfedd855d677de5773476caf771e96/tree_sitter-0.25.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0628671f0de69bb279558ef6b640bcfc97864fe0026d840f872728a86cd6b6cd", size = 146926 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4e/9c/a278b15e6b263e86c5e301c82a60923fa7c59d44f78d7a110a89a413e640/tree_sitter-0.25.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f5ddcd3e291a749b62521f71fc953f66f5fd9743973fd6dd962b092773569601", size = 137712 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/54/9a/423bba15d2bf6473ba67846ba5244b988cd97a4b1ea2b146822162256794/tree_sitter-0.25.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bd88fbb0f6c3a0f28f0a68d72df88e9755cf5215bae146f5a1bdc8362b772053", size = 607873 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ed/4c/b430d2cb43f8badfb3a3fa9d6cd7c8247698187b5674008c9d67b2a90c8e/tree_sitter-0.25.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b878e296e63661c8e124177cc3084b041ba3f5936b43076d57c487822426f614", size = 636313 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9d/27/5f97098dbba807331d666a0997662e82d066e84b17d92efab575d283822f/tree_sitter-0.25.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d77605e0d353ba3fe5627e5490f0fbfe44141bafa4478d88ef7954a61a848dae", size = 631370 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d4/3c/87caaed663fabc35e18dc704cd0e9800a0ee2f22bd18b9cbe7c10799895d/tree_sitter-0.25.2-cp313-cp313-win_amd64.whl", hash = "sha256:463c032bd02052d934daa5f45d183e0521ceb783c2548501cf034b0beba92c9b", size = 127157 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d5/23/f8467b408b7988aff4ea40946a4bd1a2c1a73d17156a9d039bbaff1e2ceb/tree_sitter-0.25.2-cp313-cp313-win_arm64.whl", hash = "sha256:b3f63a1796886249bd22c559a5944d64d05d43f2be72961624278eff0dcc5cb8", size = 113975 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/07/e3/d9526ba71dfbbe4eba5e51d89432b4b333a49a1e70712aa5590cd22fc74f/tree_sitter-0.25.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:65d3c931013ea798b502782acab986bbf47ba2c452610ab0776cf4a8ef150fc0", size = 146776 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/42/97/4bd4ad97f85a23011dd8a535534bb1035c4e0bac1234d58f438e15cff51f/tree_sitter-0.25.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:bda059af9d621918efb813b22fb06b3fe00c3e94079c6143fcb2c565eb44cb87", size = 137732 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b6/19/1e968aa0b1b567988ed522f836498a6a9529a74aab15f09dd9ac1e41f505/tree_sitter-0.25.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eac4e8e4c7060c75f395feec46421eb61212cb73998dbe004b7384724f3682ab", size = 609456 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/48/b6/cf08f4f20f4c9094006ef8828555484e842fc468827ad6e56011ab668dbd/tree_sitter-0.25.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:260586381b23be33b6191a07cea3d44ecbd6c01aa4c6b027a0439145fcbc3358", size = 636772 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/57/e2/d42d55bf56360987c32bc7b16adb06744e425670b823fb8a5786a1cea991/tree_sitter-0.25.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7d2ee1acbacebe50ba0f85fff1bc05e65d877958f00880f49f9b2af38dce1af0", size = 631522 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/03/87/af9604ebe275a9345d88c3ace0cf2a1341aa3f8ef49dd9fc11662132df8a/tree_sitter-0.25.2-cp314-cp314-win_amd64.whl", hash = "sha256:4973b718fcadfb04e59e746abfbb0288694159c6aeecd2add59320c03368c721", size = 130864 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a6/6e/e64621037357acb83d912276ffd30a859ef117f9c680f2e3cb955f47c680/tree_sitter-0.25.2-cp314-cp314-win_arm64.whl", hash = "sha256:b8d4429954a3beb3e844e2872610d2a4800ba4eb42bb1990c6a4b1949b18459f", size = 117470 }, -] - -[[package]] -name = "tree-sitter-bash" -version = "0.25.1" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8e/0e/f0108be910f1eef6499eabce517e79fe3b12057280ed398da67ce2426cba/tree_sitter_bash-0.25.1.tar.gz", hash = "sha256:bfc0bdaa77bc1e86e3c6652e5a6e140c40c0a16b84185c2b63ad7cd809b88f14", size = 419703 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/30/8e/37e7364d9c9c58da89e05c510671d8c45818afd7b31c6939ab72f8dc6c04/tree_sitter_bash-0.25.1-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:0e6235f59e366d220dde7d830196bed597d01e853e44d8ccd1a82c5dd2500acf", size = 194160 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/23/bb/2d2cfbb1f89aaeb1ec892624f069d92d058d06bb66f16b9ec9fb5873ab60/tree_sitter_bash-0.25.1-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:f4a34a6504c7c5b2a9b8c5c4065531dea19ca2c35026e706cf2eeeebe2c92512", size = 202659 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/25/f0/1bb25519be27460255d3899db677313cfa1e6306988fbf456a3d7e211bbb/tree_sitter_bash-0.25.1-cp310-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e76c4cfb20b076552406782b7f8c2a3946835993df0a44df006de54b7030c7dc", size = 230596 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d7/22/9f70bc3d3b942ab9fc0f89c1dc9e087519a3a94f64ae6b7377aae3a7a0f0/tree_sitter_bash-0.25.1-cp310-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3f484c4bb8796cde7a87ca351e6116f09653edac0eb3c6d238566359dd28b117", size = 231981 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7a/c3/f1540e42cd41b323c6821e45e52e1aed6ed386209aad52db996f05703963/tree_sitter_bash-0.25.1-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:5e76af6df46d958c7f5b6d5884c9743218e3902a00ccb493ec92728b1084430b", size = 228364 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f7/a0/c3050a6277dfcac8c480f514dc4fe49f3f65f0eac68b4702cbaca2584e85/tree_sitter_bash-0.25.1-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a3332d71c7b7d5f78259b19d02d0ea111fcb82b72712ee4a93aaa5b226d3f0a8", size = 230074 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/71/0f/203fe6b27211387f4b9ba8c4a321567ca4ded2624dae6ccdbd2b6e940e17/tree_sitter_bash-0.25.1-cp310-abi3-win_amd64.whl", hash = "sha256:52a6802d9218f86278aa3e8b459c3abdad67eed0fde1f9f13aca5b6c634217a6", size = 195574 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/47/75/4ca1a9fabd8fb5aea78cea70f7837ce4dbf2afae115f62051e5fa99cba1c/tree_sitter_bash-0.25.1-cp310-abi3-win_arm64.whl", hash = "sha256:59115057ec2bae319e8082ff29559861045002964c3431ccb0fc92aa4bc9bccb", size = 191196 }, -] - -[[package]] -name = "tushare" -version = "1.4.29" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "bs4" }, - { name = "lxml" }, - { name = "pandas" }, - { name = "requests" }, - { name = "simplejson" }, - { name = "tqdm" }, - { name = "websocket-client" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/91/a4/5ce99585209410463e4b203d13c73d356c6ac92daea48af182932af7cb61/tushare-1.4.29.tar.gz", hash = "sha256:f578fb778868c0b744ac9173a837b695fd82e8d1b1e0d39c1f882b0e4fef7ecb", size = 128623 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/33/3e/d426a56e5feac9b0aaada1c6b0745ed03422d4a713295e0bbb44c8ea86fe/tushare-1.4.29-py3-none-any.whl", hash = "sha256:82554af953ea5ac3d8771d42330493181031c7e68dccce03a491c7356e9ba4b2", size = 142920 }, -] - -[[package]] -name = "typer" -version = "0.26.8" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "annotated-doc" }, - { name = "colorama", marker = "sys_platform == 'win32'" }, - { name = "rich" }, - { name = "shellingham" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7c/f7/68adc395201b20b872d68e975386832e8005ffeacedd43a1d837a32815be/typer-0.26.8.tar.gz", hash = "sha256:c244a6bd558886fe3f8780efb6bdd28bb9aff005a94eedebaa5cb32926fe2f7e", size = 202097 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/80/87/b9fd69c92c6102a066e1b86a35243f53e70bd4c709f2a26d9f4fee4f4dc0/typer-0.26.8-py3-none-any.whl", hash = "sha256:3512ca79ac5c11113414b36e80281b872884477722440691c89d1112e321a49c", size = 122564 }, -] - -[[package]] -name = "typing-extensions" -version = "4.15.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614 }, -] - -[[package]] -name = "typing-inspection" -version = "0.4.2" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "typing-extensions" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611 }, -] - -[[package]] -name = "tzdata" -version = "2026.2" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ba/19/1b9b0e29f30c6d35cb345486df41110984ea67ae69dddbc0e8a100999493/tzdata-2026.2.tar.gz", hash = "sha256:9173fde7d80d9018e02a662e168e5a2d04f87c41ea174b139fbef642eda62d10", size = 198254 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ce/e4/dccd7f47c4b64213ac01ef921a1337ee6e30e8c6466046018326977efd95/tzdata-2026.2-py2.py3-none-any.whl", hash = "sha256:bbe9af844f658da81a5f95019480da3a89415801f6cc966806612cc7169bffe7", size = 349321 }, -] - -[[package]] -name = "urllib3" -version = "2.7.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/53/0c/06f8b233b8fd13b9e5ee11424ef85419ba0d8ba0b3138bf360be2ff56953/urllib3-2.7.0.tar.gz", hash = "sha256:231e0ec3b63ceb14667c67be60f2f2c40a518cb38b03af60abc813da26505f4c", size = 433602 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7f/3e/5db95bcf282c52709639744ca2a8b149baccf648e39c8cc87553df9eae0c/urllib3-2.7.0-py3-none-any.whl", hash = "sha256:9fb4c81ebbb1ce9531cce37674bbc6f1360472bc18ca9a553ede278ef7276897", size = 131087 }, -] - -[[package]] -name = "uvicorn" -version = "0.49.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "click", marker = "sys_platform != 'emscripten'" }, - { name = "h11", marker = "sys_platform != 'emscripten'" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c4/1f/fa18009dea8469069cca78a4e877a008ab78f08b064bfc9ab891579077ff/uvicorn-0.49.0.tar.gz", hash = "sha256:ebf4271aa580d9de97f93192d4595176df6e91f9aae919ca73e4fc07df1e66a3", size = 91284 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/88/fa/e1388bbcf24ef3274f45c0c1c7b501fd14971037c1b6ee23610553307497/uvicorn-0.49.0-py3-none-any.whl", hash = "sha256:ba3d14c3ee7e41c6c654c46c9eb489d33213cdd30aa1696eab1374337c13f68f", size = 71376 }, -] - -[[package]] -name = "websocket-client" -version = "1.9.0" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2c/41/aa4bf9664e4cda14c3b39865b12251e8e7d239f4cd0e3cc1b6c2ccde25c1/websocket_client-1.9.0.tar.gz", hash = "sha256:9e813624b6eb619999a97dc7958469217c3176312b3a16a4bd1bc7e08a46ec98", size = 70576 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/34/db/b10e48aa8fff7407e67470363eac595018441cf32d5e1001567a7aeba5d2/websocket_client-1.9.0-py3-none-any.whl", hash = "sha256:af248a825037ef591efbf6ed20cc5faa03d3b47b9e5a2230a529eeee1c1fc3ef", size = 82616 }, -] - -[[package]] -name = "wsproto" -version = "1.3.2" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "h11" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c7/79/12135bdf8b9c9367b8701c2c19a14c913c120b882d50b014ca0d38083c2c/wsproto-1.3.2.tar.gz", hash = "sha256:b86885dcf294e15204919950f666e06ffc6c7c114ca900b060d6e16293528294", size = 50116 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a4/f5/10b68b7b1544245097b2a1b8238f66f2fc6dcaeb24ba5d917f52bd2eed4f/wsproto-1.3.2-py3-none-any.whl", hash = "sha256:61eea322cdf56e8cc904bd3ad7573359a242ba65688716b0710a5eb12beab584", size = 24405 }, -] - -[[package]] -name = "yarl" -version = "1.24.2" -source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } -dependencies = [ - { name = "idna" }, - { name = "multidict" }, - { name = "propcache" }, -] -sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/79/12/1e8f37460ea0f7eb59c221fdaf0ed75e7ac43e97f8093b9c6f411df50a78/yarl-1.24.2.tar.gz", hash = "sha256:9ac374123c6fd7abf64d1fec93962b0bd4ee2c19751755a762a72dd96c0378f8", size = 210798 } -wheels = [ - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c5/c5/1ce244152ff2839645e7cae92f90e7bafcb2c52bea7ff586ac714f14f5df/yarl-1.24.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:36348bebb147b83818b9d7e673ea4debc75970afc6ffdc7e3975ad05ce5a58c1", size = 128971 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/87/5a/00f36967203ed89cb3acd2c8ed526cc3fed9418eb70ce128160a911c8499/yarl-1.24.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a97e42c8a2233f2f279ecadd9e4a037bcb5d813b78435e8eedd4db5a9e9708c", size = 91507 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/31/d0/1fb0c1cd27288f39f6974da4318c32768d72c9890984541fdf1e2e32a51d/yarl-1.24.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8d027d56f1035e339d1001ac33eceab5b2ec8e42e449787bb75e289fb9a5cd1d", size = 91343 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/03/ce/d4a646508bed2f8dec6435b40166fe9308dd191262033d3f307b2bbcaecd/yarl-1.24.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a6377060e7927187a42b7eb202090cbe2b34933a4eeaf90e3bd9e33432e5cae", size = 105704 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4b/07/b3278e82d8bc41485bcf6d856cd0433262593de615b1d3dc43bd3f5bead4/yarl-1.24.2-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:17076578bce0049a5ce57d14ad1bded391b68a3b213e9b81b0097b090244999a", size = 97281 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/17/5b/4cee6e7c92e487bebe7afc797da0aa54a248ab4e776a68fe369ec29665a5/yarl-1.24.2-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:50713f1d4d6be6375bb178bb43d140ee1acb8abe589cd723320b7925a275be1e", size = 114020 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5c/82/111076571545a7d4f9cca3fbd5c6f40615af58642be09f12328f48022468/yarl-1.24.2-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:34263e2fa8fb5bb63a0d97706cda38edbad62fddb58c7f12d6acbc092812aa50", size = 111450 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b6/ec/08f671f69a444d704aeecebf92af659b67b97a869942411d0a578b08c334/yarl-1.24.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:49016d82f032b1bd1e10b01078a7d29ae71bf468eeae0ea22df8bab691e60003", size = 106384 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e5/86/ce41e7a7a199340b2330d52b60f25c4074b6636dd0e60b1a80d31a9db042/yarl-1.24.2-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3f6d2c216318f8f32038ca3f72501ba08536f0fd18a36e858836b121b2deed9f", size = 106153 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c4/5d/31be8a729531ab3e55ac3e7e5c800be8c89ea98947f418b2f6ea259fb6ee/yarl-1.24.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:08d3a33218e0c64393e7610284e770409a9c31c429b078bcb24096ed0a783b8f", size = 105322 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/47/9b/b57afb22b386ae87ac9940f09878b98d8c333f89113e6fc96fcf4ca9eb64/yarl-1.24.2-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:5d699376c4ca3cba49bbfae3a05b5b70ded572937171ce1e0b8d87118e2ba294", size = 99057 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a3/4f/06348c27c8389256c313e8a57d796808fc0264c915dd5e7cfd3c0e314dc7/yarl-1.24.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:a1cab588b4fa14bea2e55ebea27478adfb05372f47573738e1acc4a36c0b05d2", size = 113502 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5f/1c/284f307b298e4a17b7943b07d9d7ecc4151537f8d137ba51f3bb6c31ca20/yarl-1.24.2-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:ec87ccc31bd21db7ad009d8572c127c1000f268517618a4cc09adba3c2a7f21c", size = 105253 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c8/bf/0de123bec8619e45c80cbded9085f61b5b4a9eddb8abe6d25d28ee1ec866/yarl-1.24.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:d1dd47a22843b212baa8d74f37796815d43bd046b42a0f41e9da433386c3136b", size = 111345 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/90/af/0248eb065e51129d2a9b2436cd1b5c772c19a6b04e5b6a186955671e3319/yarl-1.24.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:7b54b9c67c2b06bd7b9a77253d242124b9c95d2c02def5a1144001ee547dd9d5", size = 106558 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/21/3c/f960d7a65ef97d8ba9b424fb5128796a4bc710fc6df2ddbbd7dfdc3bbd20/yarl-1.24.2-cp311-cp311-win_amd64.whl", hash = "sha256:f8fdbcff8b2c7c9284e60c196f693588598ddcee31e11c18e14949ce44519d45", size = 92808 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/03/1a/49fb03750e4de4d2284cd5b885a383133c34eef45bd59631b2bb8b7e81e8/yarl-1.24.2-cp311-cp311-win_arm64.whl", hash = "sha256:b32c37a7a337e90822c45797bf3d79d60875cfcccd3ecc80e9f453d87026c122", size = 87610 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f0/da/866bcb01076ba49d2b42b309867bed3826421f1c479655eb7a607b44f20b/yarl-1.24.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:b975866c184564c827e0877380f0dae57dcca7e52782128381b72feff6dfceb8", size = 129957 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bf/1d/fcefb70922ea2268a8971d8e5874d9a8218644200fb8465f1dcad55e6851/yarl-1.24.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3b075301a2836a0e297b1b658cb6d6135df535d62efefdd60366bd589c2c82f2", size = 92164 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/29/b6/170e2b8d4e3bc30e6bfdcca53556537f5bf595e938632dfcb059311f3ff6/yarl-1.24.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8ae44649b00947634ab0dab2a374a638f52923a6e67083f2c156cd5cbd1a881d", size = 91688 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fe/a5/c9f655d5553ea0b99fdac9d6a99ad3f9b3e73b8e5758bb46f58c9831f74c/yarl-1.24.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:507cc19f0b45454e2d6dcd62ff7d062b9f77a2812404e62dbdaec05b50faa035", size = 102902 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5d/bc/6b9664d815d79af4ee553337f9d606c56bbf269186ada9172de45f1b5f60/yarl-1.24.2-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:c4c17bad5a530912d2111825d3f05e89bab2dd376aaa8cbc77e449e6db63e576", size = 97931 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/98/ec/32ba48acae30fecd60928f5791188b80a9d6ee3840507ffda29fecd37b71/yarl-1.24.2-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f5f0cbb112838a4a293985b6ed73948a547dadcc1ba6d2089938e7abdedceef8", size = 111030 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/82/5a/6f4cd081e5f4934d2ae3a8ef4abe3afacc010d26f0035ee91b35cd7d7c37/yarl-1.24.2-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5ec8356b8a6afcf81fc7aeeef13b1ff7a49dec00f313394bbb9e83830d32ccd7", size = 110392 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7a/da/323a01c349bd5fb01bb6652e314d9bb218cee630a736bdb810ad50e4013f/yarl-1.24.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7e7ebcdef69dec6c6451e616f32b622a6d4a2e92b445c992f7c8e5274a6bbc4c", size = 105612 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7c/80/264ab684f181e1a876389374519ff05d10248725535ae2ac4e8ac4e563d6/yarl-1.24.2-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:47a55d6cf6db2f401017a9e96e5288844e5051911fb4e0c8311a3980f5e59a7d", size = 104487 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/41/07/efabe5df87e96d7ad5959760b888344be48cd6884db127b407c6b5503adc/yarl-1.24.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3065657c80a2321225e804048597ad55658a7e76b32d6f5ee4074d04c50401db", size = 102333 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/44/0c/bcf7c42603e1009295f586d8890f2ba032c8b53310e815adf0a202c73d9f/yarl-1.24.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:cb84b80d88e19ede158619b80813968713d8d008b0e2497a576e6a0557d50712", size = 99025 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4f/82/84482ab1a57a0f21a08afe6a7004c61d741f8f2ecc3b05c321577c612164/yarl-1.24.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:990de4f680b1c217e77ff0d6aa0029f9eb79889c11fb3e9a3942c7eba29c1996", size = 110507 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c4/8d/a546ba1dfe1b0f290e05fef145cd07614c0f15df1a707195e512d1e39d1d/yarl-1.24.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:abb8ec0323b80161e3802da3150ef660b41d0e9be2048b76a363d93eee992c2b", size = 103719 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1a/b6/267f2a09213138473adfce6b8a6e17791d7fee70bd4d9003218e4dec58b0/yarl-1.24.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:e7977781f83638a4c73e0f88425563d70173e0dfd90ac006a45c65036293ee3c", size = 110438 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/48/2d/1c8d89c7c5f9cad9fb2902445d94e2ab1d7aa35de029afbb8ae95c42d00f/yarl-1.24.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e30dd55825dc554ec5b66a94953b8eda8745926514c5089dfcacecb9c99b5bd1", size = 105719 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a7/25/722e3b93bd687009afb2d59a35e13d30ddd8f80571445bb0c4e4ce26ec66/yarl-1.24.2-cp312-cp312-win_amd64.whl", hash = "sha256:7dafe10c12ddd4d120d528c4b5599c953bd7b12845347d507b95451195bb6cad", size = 92901 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/39/47/4486ccfb674c04854a1ef8aa77868b6a6f765feaf69633409d7ca4f02cb8/yarl-1.24.2-cp312-cp312-win_arm64.whl", hash = "sha256:044a09d8401fcf8681977faef6d286b8ade1e2d2e9dceda175d1cfa5ca496f30", size = 87229 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/82/62/fcf0ce677f17e5c471c06311dd25964be38a4c586993632910d2e75278bc/yarl-1.24.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:491ac9141decf49ee8030199e1ee251cdff0e131f25678817ff6aa5f837a3536", size = 128978 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d3/58/8e63299bb71ed61a834121d9d3fe6c9fcf2a6a5d09754ff4f20f2d20baf5/yarl-1.24.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e89418f65eda18f99030386305bd44d7d504e328a7945db1ead514fbe03a0607", size = 91733 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c1/24/16748d5dab6daec8b0ed81ccec639a1cded0f18dcc62a4f696b4fe366c37/yarl-1.24.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cdfcce633b4a4bb8281913c57fcafd4b5933fbc19111a5e3930bbd299d6102f1", size = 91113 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1b/66/b63fff7b71211e866624b21432d5943cbb633eb0c2872d9ee3070648f22c/yarl-1.24.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:863297ddede92ee49024e9a9b11ecb59f310ca85b60d8537f56bed9bbb5b1986", size = 103899 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9d/ac/ba1974b8533909636f7733fe86cf677e3619527c3c2fa913e0ea89c48757/yarl-1.24.2-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:374423f70754a2c96942ede36a29d37dc6b0cb8f92f8d009ddf3ed78d3da5488", size = 97862 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1b/a5/123ac993b5c2ba6f554a140305620cb8f150fa543711bbc49be3ec0a65a4/yarl-1.24.2-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:33a29b5d00ccbf3219bb3e351d7875739c19481e030779f48cc46a7a71681a9b", size = 111060 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/23/37/c472d3af3509688392134a88a825276770a187f1daa4de3f6dc0a327a751/yarl-1.24.2-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a9532c57211730c515341af11fef6e9b61d157487272a096d0c04da445642592", size = 110613 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/df/88/09c28dad91e662ccfaa1b78f1c57badde74fc9d0b23e74aef644750ecd73/yarl-1.24.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:91e72cf093fd833483a97ee648e0c053c7c629f51ff4a0e7edd84f806b0c5617", size = 107012 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/07/ab/9d4f69d571a94f4d112fa7e2e007200f5a54d319f58c82ac7b7baa61f5c6/yarl-1.24.2-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b3177bc0a768ef3bacceb4f272632990b7bea352f1b2f1eee9d6d6ff16516f92", size = 105887 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8e/9a/000b2b66c0d772a499fc531d21dab92dfeb73b640a12eed6ba89f49bb2d0/yarl-1.24.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e196952aacaf3b232e265ff02980b64d483dc0972bd49bcb061171ff22ac203a", size = 103620 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/41/7c/7c1050f73450fbdaa3f0c72017059f00ce5e13366692f3dba25275a1083d/yarl-1.24.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:204e7a61ce99919c0de1bf904ab5d7aa188a129ea8f690a8f76cfb6e2844dc44", size = 100599 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ec/b1/29e5756b3926705f5f6089bd5b9f50a56eaac550da6e260bf713ead44d04/yarl-1.24.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4b156914620f0b9d78dc1adb3751141daee561cfec796088abb89ed49d220f1a", size = 110604 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a3/4b/8415bc96e9b150cde942fbac9a8182985e58f40ce5c54c34ed015407d3ee/yarl-1.24.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:8372a2b976cf70654b2be6619ab6068acabb35f724c0fda7b277fbf53d66a5cf", size = 105161 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8b/d4/cde059abfa229553b7298a2eadde2752e723d50aeedaef86ce59da2718ee/yarl-1.24.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:f9a1e9b622ca284143aab5d885848686dcd85453bb1ca9abcdb7503e64dc0056", size = 110619 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e7/2c/d6a6c9a61549f7b6c7e6dc6937d195bcf069582b47b7200dcd0e7b256acf/yarl-1.24.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:810e19b685c8c3c5862f6a38160a1f4e4c0916c9390024ec347b6157a45a0992", size = 107362 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/92/dd/3ae5fe417e9d1c353a548553326eb9935e76b6b727161563b424cc296df3/yarl-1.24.2-cp313-cp313-win_amd64.whl", hash = "sha256:7d37fb7c38f2b6edab0f845c4f85148d4c44204f52bc127021bd2bc9fdbf1656", size = 92667 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/10/cc/a7beb239f78f27fca1b053c8e8595e4179c02e62249b4687ec218c370c50/yarl-1.24.2-cp313-cp313-win_arm64.whl", hash = "sha256:1e831894be7c2954240e49791fa4b50c05a0dc881de2552cfe3ffd8631c7f461", size = 87069 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/40/0e/e08087695fc12789263821c5dc0f8dc52b5b17efd0887cacf419f8a43ba3/yarl-1.24.2-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:f9312b3c02d9b3d23840f67952913c9c8721d7f1b7db305289faefa878f364c2", size = 129670 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3a/98/ab4b5ed1b1b5cd973c8a3eb994c3a6aefb6ce6d399e21bb5f0316c33815c/yarl-1.24.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:a4f4d6cd615823bfc7fb7e9b5987c3f41666371d870d51058f77e2680fbe9630", size = 91916 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ba/b1/5297bb6a7df4782f7605bffc43b31f5044070935fbbcaa6c705a07e6ac65/yarl-1.24.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0c3063e5c0a8e8e62fae6c2596fa01da1561e4cd1da6fec5789f5cf99a8aefd8", size = 91625 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/02/a7/45baabfff76829264e623b185cff0c340d7e11bf3e1cd9ea37e7d17934bd/yarl-1.24.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fecd17873a096036c1c87ab3486f1aef7f269ada7f23f7f856f93b1cc7744f14", size = 104574 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f3/40/3a5ab144d3d650ca37d4f4b57e56169be8af3ca34c448793e064b30baaed/yarl-1.24.2-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a46d1ab4ba4d32e6dc80daf8a28ce0bd83d08df52fbc32f3e288663427734535", size = 97534 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9c/b5/5658fef3681fb5776b4513b052bec750009f47b3a592251c705d75375798/yarl-1.24.2-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:73e68edf6dfd5f73f9ca127d84e2a6f9213c65bdffb736bda19524c0564fcd14", size = 111481 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/4c/06/fdcd7dde037f00866dce123ed4ba23dba94beb56fc4cf561668d27be37f2/yarl-1.24.2-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a296ca617f2d25fbceafb962b88750d627e5984e75732c712154d058ae8d79a3", size = 111529 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c2/53/d81269aaafccea0d33396c03035de997b743f11e648e6e27a0df99c72980/yarl-1.24.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e51b2cf5ec89a8b8470177641ed62a3ba22d74e1e898e06ad53aa77972487208", size = 107338 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ae/04/23049463f729bd899df203a7960505a75333edd499cda8aa1d5a82b64df5/yarl-1.24.2-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:310fc687f7b2044ec54e372c8cbe923bb88f5c37bded0d3079e5791c2fc3cf50", size = 106147 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/14/18/04a4b5830b43ed5e4c5015b40e9f6241ad91487d71611061b4e111d6ac80/yarl-1.24.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:297a2fe352ecf858b30a98f87948746ec16f001d279f84aebdbd3bd965e2f1bd", size = 104272 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5a/f7/8cffdf319aee7a7c1dbd07b61d91c3e3fda460c7a93b5f93e445f3806c4c/yarl-1.24.2-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:2a263e76b97bc42bdcd7c5f4953dec1f7cd62a1112fa7f869e57255229390d67", size = 99962 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d7/39/b3cce3b7dbef64ac700ad4cea156a207d01bede0f507587616c364b5468e/yarl-1.24.2-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:822519b64cf0b474f1a0aaef1dc621438ea46bb77c94df97a5b4d213a7d8a8b1", size = 111063 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a1/ea/100818505e7ebf165c7242ff17fdf7d9fee79e27234aeca871c1082920d7/yarl-1.24.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:b6067060d9dc594899ba83e6db6c48c68d1e494a6dab158156ed86977ca7bcb1", size = 105438 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8f/d2/e075a0b32aa6625087de9e653087df0759fed5de4a435fef594181102a77/yarl-1.24.2-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:0063adad533e57171b79db3943b229d40dfafeeee579767f96541f106bac5f1b", size = 111458 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e6/5c/ceea7ba98b65c8eb8d947fdc52f9bedfcd43c6a57c9e3c90c17be8f324a3/yarl-1.24.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ee8e3fb34513e8dc082b586ef4910c98335d43a6fab688cd44d4851bacfce3e8", size = 107589 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fa/d9/5582d57e2b2db9b85eb6663a22efdd78e08805f3f5389566e9fcad254d1b/yarl-1.24.2-cp314-cp314-win_amd64.whl", hash = "sha256:afb00d7fd8e0f285ca29a44cc50df2d622ff2f7a6d933fa641577b5f9d5f3db0", size = 94424 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/92/10/7dc07a0e22806a9280f42a57361395506e800c64e22737cd7b0886feab42/yarl-1.24.2-cp314-cp314-win_arm64.whl", hash = "sha256:68cf6eacd6028ef1142bc4b48376b81566385ca6f9e7dde3b0fa91be08ffcb57", size = 88690 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9e/13/d5b8e2c8667db955bcb3de233f18798fefe7edf1d7429c2c9d4f9c401114/yarl-1.24.2-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:221ce1dd921ac4f603957f17d7c18c5cc0797fbb52f156941f92e04605d1d67b", size = 136248 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/de/46/a4a97c05c9c9b8fd266bb2a0df12992c7fbd02391eb9640583411b6dab32/yarl-1.24.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:5f3224db28173a00d7afacdee07045cc4673dfab2b15492c7ae10deddbece761", size = 95084 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/95/b2/845cf2074a015e6fe0d0808cf1a2d9e868386c4220d657ebd8302b199043/yarl-1.24.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c557165320d6244ebe3a02431b2a201a20080e02f41f0cfa0ccc47a183765da8", size = 95272 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fe/16/e69d4aa244aef45235ddfebc0e04036a6829842bc5a6a795aedc6c998d23/yarl-1.24.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:904065e6e85b1fa54d0d87438bd58c14c0bad97aad654ad1077fd9d87e8478ed", size = 101497 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/15/94/c07107715d621076863ee88b3ddf183fa5e9d4aba5769623c9979828410a/yarl-1.24.2-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:8cec2a38d70edc10e0e856ceda886af5327a017ccbde8e1de1bd44d300357543", size = 94002 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a9/35/fc1bbdd895b5e4010b8fdd037f7ed3aa289d3863e08231b30231ca9a0815/yarl-1.24.2-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e7484b9361ed222ee1ca5b4337aa4cbdcc4618ce5aff57d9ef1582fd95893fc0", size = 106524 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1f/f2/32b66d0a4ba47c296cf86d03e2c67bff58399fe6d6d84d5205c04c66cc6d/yarl-1.24.2-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:84f9670b89f34db07f81e53aee83e0b938a3412329d51c8f922488be7fcc4024", size = 106165 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/95/47/37cb5ff50c5e825d4d38e81bb04d1b7e96bf960f7ab89f9850b162f3f114/yarl-1.24.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:abb2759733d63a28b4956500a5dd57140f26486c92b2caedfb964ab7d9b79dbf", size = 103010 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6f/d2/4597912315096f7bb359e46e13bf8b60994fcbb2db29b804c0902ef4eff5/yarl-1.24.2-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:081c2bf54efe03774d0311172bc04fedf9ca01e644d4cd8c805688e527209bdc", size = 101128 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b9/d5/c8e86e120521e646013d02a8e3b8884392e28494be8f392366e50d208efc/yarl-1.24.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:86746bef442aa479107fe28132e1277237f9c24c2f00b0b0cf22b3ee0904f2bb", size = 101382 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fa/98/70b229236118f89dbeb739b76f10225bbf53b5497725502594c9a01d699a/yarl-1.24.2-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:2d07d21d0bc4b17558e8de0b02fbfdf1e347d3bb3699edd00bb92e7c57925420", size = 95964 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/87/f8/56c386981e3c8648d279fdef2397ffec577e8320fd5649745e34d54faeb7/yarl-1.24.2-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:4fb1ac3fc5fecd8ae7453ea237e4d22b49befa70266dfe1629924245c21a0c7f", size = 106204 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1a/1e/765afe97811ca35933e2a7de70ac57b1997ea2e4ee895719ee7a231fb7e5/yarl-1.24.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:4da31a5512ed1729ca8d8aacde3f7faeb8843cde3165d6bcf7f88f74f17bb8aa", size = 101510 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ee/78/393913f4b9039e1edd09ae8a9bbb9d539be909a8abf6d8a2084585bed4b7/yarl-1.24.2-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:533ded4dceb5f1f3da7906244f4e82cf46cfd40d84c69a1faf5ac506aa65ecbe", size = 105584 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/78/87/deb17b7049bbe74ea11a713b86f8f27800cc1c8648b0b797243ebb4830ba/yarl-1.24.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:7b3a85525f6e7eeabcfdd372862b21ee1915db1b498a04e8bf0e389b607ff0bd", size = 103410 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8f/be/f9f7594e23b5b93affff0318e4593c1920331bcaefda326cabcad94296a1/yarl-1.24.2-cp314-cp314t-win_amd64.whl", hash = "sha256:a7624b1ca46ca5d7b864ef0d2f8efe3091454085ee1855b4e992314529972215", size = 102980 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/65/a4/ba80dccd3593ff1f01051a818694d07b58cb8232677ee9a22a5a1f93a9fc/yarl-1.24.2-cp314-cp314t-win_arm64.whl", hash = "sha256:e434a45ce2e7a947f951fc5a8944c8cc080b7e59f9c50ae80fd39107cf88126d", size = 91219 }, - { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/4d/4b880086bd0d3e034d25647be1d830afc3e3f610e98c4ab3490af6b1b6d5/yarl-1.24.2-py3-none-any.whl", hash = "sha256:2783d9226db8797636cd6896e4de81feed252d1db72265686c9558d97a4d94b9", size = 53576 }, -]