From df5dc130bdb8b94f1909e3a7c80944a43b04ad6d Mon Sep 17 00:00:00 2001 From: Shudipto Trafder Date: Sat, 13 Jun 2026 20:50:27 +0600 Subject: [PATCH 1/2] feat: add evaluation report JSON structure and tests for import integrity - Added a new evaluation report JSON file to capture evaluation results and metadata. - Updated test files to ensure no dead imports from removed module paths in documentation and examples. - Implemented a regression guard for import order to prevent ImportError issues. - Refactored imports in example files to align with the new module structure. --- CLAUDE.md | 182 ++++ README.md | 87 +- agentflow/core/__init__.py | 71 +- agentflow/core/graph/__init__.py | 2 +- agentflow/core/skills/__init__.py | 4 +- agentflow/qa/evaluation/__init__.py | 4 +- .../qa/evaluation/collectors/__init__.py | 2 +- agentflow/qa/evaluation/config/__init__.py | 4 +- agentflow/qa/evaluation/criteria/__init__.py | 2 +- agentflow/qa/evaluation/dataset/__init__.py | 2 +- agentflow/qa/evaluation/evaluator.py | 4 +- agentflow/qa/evaluation/reporters/manager.py | 4 +- .../evaluation/simulators/user_simulator.py | 2 +- agentflow/qa/evaluation/testing.py | 4 +- agentflow/qa/testing/__init__.py | 4 +- eval_reports/s-file_20260613_204737.html | 954 ++++++++++++++++++ eval_reports/s-file_20260613_204737.json | 123 +++ .../evaluation/test1/test_weather_agent.py | 5 +- examples/evaluation/test_graph/__init__.py | 2 +- examples/github-mcp/git_mcp.py | 2 +- examples/tool-decorator/README.md | 2 +- tests/test_docs_imports.py | 115 +++ tests/test_import_order.py | 66 ++ 23 files changed, 1564 insertions(+), 83 deletions(-) create mode 100644 CLAUDE.md create mode 100644 eval_reports/s-file_20260613_204737.html create mode 100644 eval_reports/s-file_20260613_204737.json create mode 100644 tests/test_docs_imports.py create mode 100644 tests/test_import_order.py diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..ed462493 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,182 @@ +# agentflow (core Python library) — Engineering Guide + +This file documents the **core Python framework** only (`10xscale-agentflow`, the package that +lives in this folder). For the API/CLI, TS client, docs, or playground, see the CLAUDE.md in +their respective folders and the workspace-root `CLAUDE.md` for the monorepo overview. + +- Package name (PyPI): `10xscale-agentflow` +- Version: `0.7.5.1` (single source of truth: `pyproject.toml`) +- Requires: Python >= 3.12 +- Importable top-level package lives at `agentflow/agentflow/` (this folder is the repo root; + the importable package is the nested `agentflow/` directory). + +## What this package is + +A graph-based orchestration engine for multi-agent LLM systems. It is **LLM-agnostic**: you bring +the provider SDK (OpenAI / Google GenAI), and Agentflow provides the workflow engine, state, +persistence, tools, memory, evaluation, and event publishing. Inspired by LangGraph but simpler. + +## Working principles for this codebase + +- **Read before writing.** The public API is large and re-exported through many `__init__.py` + files. Confirm the real export path before referencing a symbol (see Import Map below). +- **Examples are the source of truth**, not the README. `examples/` uses current import paths; + the README and several docstrings still show pre-refactor paths (see Known Doc Drift). +- **Surgical edits.** This is `Development Status :: 5 - Production/Stable`. Don't refactor + module boundaries or rename exports without checking every `__init__.py` that re-exports them. +- **Keep coverage green.** `pytest` enforces `--cov-fail-under=70`. New code needs tests. +- **Optional deps are optional.** Provider SDKs, MCP, Postgres, Redis, Qdrant, Mem0, Kafka, + RabbitMQ, OTEL, a2a are all extras. Guard imports; never make core import a hard optional dep. + +## Package layout (real, current) + +The importable package is `agentflow/agentflow/`. Top-level subpackages: + +| Subpackage | What lives there | +|---|---| +| `core/` | The engine. `graph/` (StateGraph, Agent, ToolNode, CompiledGraph, Node, Edge), `state/` (AgentState, Message, content blocks, reducers, context managers), `llm/` (provider detection + client factory + `call_llm`), `skills/` (dynamic skill injection), `exceptions/` | +| `storage/` | `checkpointer/` (InMemory, Pg), `store/` (vector/long-term memory: Qdrant, Mem0, embeddings), `media/` (multimodal media processing, offload, resolvers, stores) | +| `runtime/` | `adapters/llm/` (OpenAI / OpenAI-Responses / Google GenAI response converters), `publisher/` (Console, Redis, Kafka, RabbitMQ, OTEL, Composite), `protocols/` (a2a, acp) | +| `prebuilt/` | `agent/` (React, RAG, PlanActReflect, SupervisorTeam, Swarm, StructuredOutput), `tools/` (calculator, fetch, files, handoff, memory, search) | +| `qa/` | `evaluation/` (criteria, datasets, evaluator, reporters, simulators) and `testing/` (TestAgent, mocks, quick tests) | +| `utils/` | constants (START/END/ResponseGranularity), `tool` decorator, `convert_messages`, callbacks, validators, id generators, background tasks, graceful shutdown | + +## Import Map (verified) — this is the part that bites people + +The package was restructured into `core/`, `storage/`, `runtime/`, `qa/`. **There are no +top-level `agentflow.graph`, `agentflow.state`, `agentflow.checkpointer`, `agentflow.skills`, +`agentflow.evaluation`, `agentflow.testing`, `agentflow.adapters`, or `agentflow.publisher` +shims.** Those paths raise `ModuleNotFoundError`. Use the canonical paths: + +```python +# Graph engine +from agentflow.core.graph import Agent, StateGraph, ToolNode, CompiledGraph, Node, Edge, RetryConfig +# or the aggregate: from agentflow.core import StateGraph, Agent, ToolNode, AgentState, Message, ... + +# State and messages +from agentflow.core.state import AgentState, Message, TextBlock, ToolResultBlock, add_messages + +# LLM client/provider helpers +from agentflow.core.llm import call_llm, create_llm_client, detect_provider + +# Skills +from agentflow.core.skills import SkillConfig, SkillMeta, SkillsRegistry + +# Persistence +from agentflow.storage.checkpointer import InMemoryCheckpointer, PgCheckpointer, BaseCheckpointer +# Vector / long-term memory +from agentflow.storage.store import QdrantStore, Mem0Store, MemoryConfig, AgentMemoryConfig + +# Publishers / converters +from agentflow.runtime.publisher import ConsolePublisher, RedisPublisher, KafkaPublisher, RabbitMQPublisher +from agentflow.runtime.adapters.llm import OpenAIConverter, GoogleGenAIConverter, OpenAIResponsesConverter + +# Prebuilt +from agentflow.prebuilt.agent import ReactAgent, RAGAgent, SwarmAgent, SupervisorTeamAgent +from agentflow.prebuilt.tools import safe_calculator, fetch_url, create_handoff_tool, memory_tool + +# QA +from agentflow.qa.evaluation import AgentEvaluator, EvalConfig, EvalCase, EvalSet +from agentflow.qa.testing import TestAgent, MockMCPClient, MockToolRegistry + +# Utils +from agentflow.utils import tool, convert_messages, Command +from agentflow.utils.constants import START, END, ResponseGranularity +``` + +Note: the root `agentflow/__init__.py` is intentionally empty. Importing the package does not +eagerly pull in submodules; import the subpackage you need. + +## Core concepts + +**StateGraph -> CompiledGraph.** Build with `StateGraph()`, `add_node`, `add_edge`, +`add_conditional_edges`, `set_entry_point`; then `.compile(...)` returns a `CompiledGraph`. +`compile()` accepts: `checkpointer`, `store`, `media_store`, `interrupt_before`, +`interrupt_after`, `callback_manager`, `shutdown_timeout` (default 30.0). + +**CompiledGraph execution API:** `invoke` / `ainvoke` (run), `stream` / `astream` (incremental), +`stop` / `astop` (interrupt), `override_node`, `attach_remote_tools`, `generate_graph`, `aclose`. +- Input shape: `{"messages": [Message...]}`. +- Config keys: `user_id`, `thread_id`, `run_id`, `recursion_limit` (default 25). +- `response_granularity`: `LOW` (messages only, default), `PARTIAL` (context+summary+messages), + `FULL` (full state). + +**Agent class** (`agentflow.core.graph.Agent`) — the high-level node that wraps LLM calls, +message conversion, and tool integration. Key constructor params: +`model` (required), `output_type="text"`, `system_prompt`, `tool_node` (name or ToolNode), +`extra_messages`, `trim_context`, `tools_tags`, `reasoning_config`, `skills`, `memory`, +`retry_config` (default True), `fallback_models`, `multimodal_config`, `output_schema`. + +**Model strings and providers.** `detect_provider(model)` infers the provider from a +`"provider/model"` prefix or the model name. **It only resolves to `"google"` or `"openai"`.** +Examples: `"gemini/gemini-2.5-flash"`, `"openai/gpt-4o"`, `"gpt-4o-mini"`. Vertex AI is selected +via `use_vertex_ai=True`. There is **no native Anthropic client** in the LLM factory despite +Anthropic/Claude appearing in marketing copy; Claude is reachable only via an OpenAI-compatible +endpoint or the custom-functions approach. Verify before promising native Claude support. + +**ToolNode.** `ToolNode(tools, client=None, pass_user_info_to_mcp=False)`. First positional arg +is `tools` (an iterable of callables). `client` is an MCP client (fastmcp/mcp). Tools run in +**parallel** when the LLM requests several at once. Define tools as plain functions; injectable +params (`tool_call_id`, `state`, `config`, plus InjectQ-provided deps) are filled automatically. + +**State and Message.** `AgentState` is a Pydantic model; subclass it for custom fields. +`Message.text_message(content, role="user")` is the text factory. `Message.tool_message(...)`, +`Message.image_message(...)` exist. There is **no `Message.from_text`** (README shows it; it is +wrong). Content is a list of typed blocks (TextBlock, ImageBlock, ToolCallBlock, ToolResultBlock, +ReasoningBlock, etc.). Reducers (`add_messages`, `replace_messages`, `append_items`) control how +state lists merge. + +**Persistence.** `InMemoryCheckpointer` for dev/tests. `PgCheckpointer` (Postgres + Redis dual +layer) for production; requires `[pg_checkpoint]`. + +**Memory / store.** 3-layer model: working state -> checkpointer (hot/durable) -> vector store +(Qdrant/Mem0) for long-term. `MemoryConfig` / `AgentMemoryConfig` drive it; `memory_tool` and +`create_memory_preload_node` wire it into a graph. + +**Skills.** `SkillConfig(skills_dir=...)` adds dynamic skill injection. Two modes: `on-demand` +(LLM calls `set_skill()` from a trigger table) and `session` (preload a fixed skill from a state +field via `preload_from`). + +**Publishers.** Emit execution events to Console, Redis Pub/Sub, Kafka, RabbitMQ, or OTEL. +`CompositePublisher` fans out to several. OTEL publisher provides tracing (`setup_tracing`). + +**QA.** `agentflow.qa.evaluation` is a full eval framework (criteria incl. LLM-as-judge, +trajectory matching, rubric, safety, hallucination; datasets; console/JSON/HTML/JUnit reporters; +user simulators). `agentflow.qa.testing` provides `TestAgent`, `MockMCPClient`, `MockToolRegistry`, +`TestContext` for unit-testing graphs without live LLMs. + +## Development workflow + +This repo root is `agentflow/`; the importable package is `agentflow/agentflow/`. A `.venv` is +already present. + +```bash +# from this folder (agentflow/) +.venv/bin/python -m pytest # full suite (enforces coverage >= 70%) +.venv/bin/python -m pytest tests/graph # one area +ruff check . && ruff format . # lint + format (line-length 100, py312) +# editable install with extras for local dev: +pip install -e ".[google-genai,openai,mcp,pg_checkpoint]" +``` + +- Tests live in `tests/` (mirrors package layout: `graph/`, `state/`, `storage/`, `store/`, + `checkpointer/`, `publisher/`, `prebuilt/`, `evaluation/`, `testing/`, plus `chaos/`, + `benchmarks/`, `integration/`). Markers: `asyncio`, `integration` (needs real DBs), `slow`. +- Lint config is in `pyproject.toml` `[tool.ruff]` (broad rule set; per-file ignores for a few + large modules). `mypy` and `bandit` are also configured there. +- `examples/` is organized by feature (react, rag, swarm, supervisor_team, memory, skills, mcp, + a2a_sdk, evaluation, testing, multimodal, structured_output, ...). Use these as canonical usage. + +## Known doc drift (do not copy from these without checking) + +- **README.md import paths are stale.** It imports `agentflow.graph`, `agentflow.state`, + `agentflow.checkpointer` — all removed. Real paths are `agentflow.core.*` / `agentflow.storage.*`. +- **`Message.from_text` does not exist** (README uses it). Use `Message.text_message`. +- **`ToolNode(functions=...)`** keyword is wrong (README MCP example). The param is `tools`. +- A few `examples/` files still use dead paths (`agentflow.state.message`, `agentflow.graph.tool_node`, + `agentflow.evaluation.*`). Treat those specific files as broken until fixed. +- README/docstrings imply native Anthropic support; the LLM factory only builds google/openai + clients. See Model strings above. + +When you touch any of the above, prefer fixing the doc/example to match the code rather than the +reverse, unless the export path itself is the bug. diff --git a/README.md b/README.md index d0776d0c..f7cb0290 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ # 10xScale Agentflow -![PyPI](https://img.shields.io/pypi/v/agentflow?color=blue) +![PyPI](https://img.shields.io/pypi/v/10xscale-agentflow?color=blue) ![License](https://img.shields.io/github/license/10xhub/agentflow) -![Python](https://img.shields.io/pypi/pyversions/agentflow) +![Python](https://img.shields.io/pypi/pyversions/10xscale-agentflow) [![Coverage](https://img.shields.io/badge/coverage-74%25-yellow.svg)](#) **10xScale Agentflow** is a lightweight Python framework for building intelligent agents and orchestrating multi-agent workflows. It's an **LLM-agnostic orchestration tool** that works with native SDKs from OpenAI, Google Gemini, Anthropic Claude, or any other provider. You choose your LLM library; 10xScale Agentflow provides the workflow orchestration. @@ -17,7 +17,7 @@ - **🤖 Multi-Agent Workflows** - Build complex agent systems with your choice of orchestration patterns - **📊 Structured Responses** - Get `content`, optional `thinking`, and `usage` in a standardized format - **🌊 Streaming Support** - Real-time incremental responses with delta updates -- **🔧 Tool Integration** - Native support for function calling, MCP, Composio, and LangChain tools with **parallel execution** +- **🔧 Tool Integration** - Native support for function calling and MCP tools with **parallel execution** - **🔀 LangGraph-Inspired Engine** - Flexible graph orchestration with nodes, conditional edges, and control flow - **💾 State Management** - Built-in persistence with in-memory and PostgreSQL+Redis checkpointers - **🔄 Human-in-the-Loop** - Pause/resume execution for approval workflows and debugging @@ -51,8 +51,6 @@ Agentflow stands out with powerful features designed for production-grade AI app - Remote tools (via TypeScript SDK) - Agent handoff tools (multi-agent collaboration) - MCP (Model Context Protocol) - - LangChain tools - - Composio tools ### 🎯 **Intelligent Context Management** @@ -76,7 +74,7 @@ Agentflow stands out with powerful features designed for production-grade AI app - Kafka - RabbitMQ - Redis Pub/Sub - - OpenTelemetry (planned) + - OpenTelemetry - Custom publishers ### 🔄 **Advanced Execution Features** @@ -155,19 +153,21 @@ pip install 10xscale-agentflow[mcp] # Google GenAI adapter (google-genai SDK) pip install 10xscale-agentflow[google-genai] -# Composio tools (adapter) -pip install 10xscale-agentflow[composio] +# OpenAI adapter (openai SDK) +pip install 10xscale-agentflow[openai] -# LangChain tools (registry-based adapter) -pip install 10xscale-agentflow[langchain] +# Vector / long-term memory stores +pip install 10xscale-agentflow[qdrant] # Qdrant store +pip install 10xscale-agentflow[mem0] # Mem0 store # Individual publishers pip install 10xscale-agentflow[redis] # Redis publisher pip install 10xscale-agentflow[kafka] # Kafka publisher pip install 10xscale-agentflow[rabbitmq] # RabbitMQ publisher +pip install 10xscale-agentflow[otel] # OpenTelemetry tracing # Multiple extras -pip install 10xscale-agentflow[pg_checkpoint,mcp,google-genai,composio,langchain] +pip install 10xscale-agentflow[pg_checkpoint,mcp,google-genai,openai] ``` ### Environment Setup @@ -204,8 +204,8 @@ If you have a `.env` file, it will be auto-loaded (via `python-dotenv`). Here's a complete tool-calling agent in under 30 lines: ```python -from agentflow.graph import Agent, StateGraph, ToolNode -from agentflow.state import AgentState, Message +from agentflow.core.graph import Agent, StateGraph, ToolNode +from agentflow.core.state import AgentState, Message from agentflow.utils.constants import END @@ -220,7 +220,7 @@ graph = StateGraph() graph.add_node("MAIN", Agent( model="gemini/gemini-2.5-flash", system_prompt=[{"role": "system", "content": "You are a helpful assistant."}], - tool_node_name="TOOL" + tool_node="TOOL" )) graph.add_node("TOOL", ToolNode([get_weather])) @@ -259,12 +259,11 @@ For maximum control, use custom functions instead of the Agent class: from dotenv import load_dotenv from openai import AsyncOpenAI -from agentflow.checkpointer import InMemoryCheckpointer -from agentflow.graph import StateGraph, ToolNode -from agentflow.state.agent_state import AgentState -from agentflow.utils import Message +from agentflow.core.graph import StateGraph, ToolNode +from agentflow.core.state import AgentState, Message +from agentflow.storage.checkpointer import InMemoryCheckpointer +from agentflow.utils import convert_messages from agentflow.utils.constants import END -from agentflow.utils.converter import convert_messages load_dotenv() client = AsyncOpenAI() @@ -353,7 +352,7 @@ graph.set_entry_point("MAIN") # Compile and run app = graph.compile(checkpointer=InMemoryCheckpointer()) -inp = {"messages": [Message.from_text("What's the weather in New York?")]} +inp = {"messages": [Message.text_message("What's the weather in New York?")]} config = {"thread_id": "12345", "recursion_limit": 10} res = app.invoke(inp, config=config) @@ -427,12 +426,11 @@ from dotenv import load_dotenv from fastmcp import Client from openai import AsyncOpenAI -from agentflow.checkpointer import InMemoryCheckpointer -from agentflow.graph import StateGraph, ToolNode -from agentflow.state.agent_state import AgentState -from agentflow.utils import Message +from agentflow.core.graph import StateGraph, ToolNode +from agentflow.core.state import AgentState, Message +from agentflow.storage.checkpointer import InMemoryCheckpointer +from agentflow.utils import convert_messages from agentflow.utils.constants import END -from agentflow.utils.converter import convert_messages load_dotenv() client = AsyncOpenAI() @@ -451,7 +449,7 @@ config = { client_http = Client(config) # Initialize ToolNode with MCP client -tool_node = ToolNode(functions=[], client=client_http) +tool_node = ToolNode([], client=client_http) async def main_agent(state: AgentState): @@ -509,7 +507,7 @@ graph.set_entry_point("MAIN") app = graph.compile(checkpointer=checkpointer) # Run the agent -inp = {"messages": [Message.from_text("Please call the get_weather function for New York City")]} +inp = {"messages": [Message.text_message("Please call the get_weather function for New York City")]} config = {"thread_id": "12345", "recursion_limit": 10} res = app.invoke(inp, config=config) @@ -551,12 +549,11 @@ import logging from dotenv import load_dotenv from openai import AsyncOpenAI -from agentflow.checkpointer import InMemoryCheckpointer -from agentflow.graph import StateGraph, ToolNode -from agentflow.state.agent_state import AgentState -from agentflow.utils import Message, ResponseGranularity +from agentflow.core.graph import StateGraph, ToolNode +from agentflow.core.state import AgentState, Message +from agentflow.storage.checkpointer import InMemoryCheckpointer +from agentflow.utils import ResponseGranularity, convert_messages from agentflow.utils.constants import END -from agentflow.utils.converter import convert_messages load_dotenv() client = AsyncOpenAI() @@ -647,7 +644,7 @@ app = graph.compile(checkpointer=checkpointer) async def run_stream_test(): - inp = {"messages": [Message.from_text("Call get_weather for Tokyo, then reply.")]} + inp = {"messages": [Message.text_message("Call get_weather for Tokyo, then reply.")]} config = {"thread_id": "stream-1", "recursion_limit": 10} logging.info("--- streaming start ---") @@ -693,7 +690,7 @@ python examples/react_stream/stream_react_agent.py # Parallel execution: max(1.0, 1.5, 0.8) = 1.5 seconds ⚡ ``` -See the [parallel tool execution documentation](https://10xhub.github.io/10xScale Agentflow/Concept/graph/tools/#parallel-tool-execution) for more details. +See the [parallel tool execution documentation](https://10xhub.github.io/Agentflow/Concept/graph/tools/#parallel-tool-execution) for more details. --- @@ -778,14 +775,14 @@ See `pyproject.dev.toml` for complete tool configurations. - ✅ Core graph engine with nodes and edges - ✅ State management and checkpointing -- ✅ Tool integration (MCP, Composio, LangChain) +- ✅ Tool integration (MCP, custom tools, parallel execution) - ✅ **Parallel tool execution** for improved performance - ✅ Streaming and event publishing - ✅ Human-in-the-loop support - ✅ Prebuilt agent patterns -- 🚧 Agent-to-Agent (A2A) communication protocols +- ✅ Agent-to-Agent (A2A) communication protocols +- ✅ Observability and tracing (OpenTelemetry) - 🚧 Remote node execution for distributed processing -- 🚧 Enhanced observability and tracing - 🚧 More persistence backends (Redis, DynamoDB) - 🚧 Parallel/branching strategies - 🚧 Visual graph editor @@ -794,16 +791,16 @@ See `pyproject.dev.toml` for complete tool configurations. ## 📄 License -MIT License - see [LICENSE](https://github.com/10xhub/10xScale Agentflow/blob/main/LICENSE) for details. +MIT License - see [LICENSE](https://github.com/10xHub/agentflow/blob/main/LICENSE) for details. --- ## 🔗 Links & Resources -- **[Documentation](https://10xhub.github.io/10xScale Agentflow/)** - Full documentation with tutorials and API reference +- **[Documentation](https://10xhub.github.io/Agentflow/)** - Full documentation with tutorials and API reference - **[GitHub Repository](https://github.com/10xhub/10xScale Agentflow)** - Source code and issues - **[PyPI Project](https://pypi.org/project/10xScale-Agentflow/)** - Package releases -- **[Examples Directory](https://github.com/10xhub/10xScale Agentflow/tree/main/examples)** - Runnable code samples +- **[Examples Directory](https://github.com/10xHub/agentflow/tree/main/examples)** - Runnable code samples --- @@ -820,11 +817,11 @@ Contributions are welcome! Please see our [GitHub repository](https://github.com ## 💬 Support -- **Documentation**: [https://10xhub.github.io/10xScale Agentflow/](https://10xhub.github.io/10xScale Agentflow/) -- **Examples**: Check the [examples directory](https://github.com/10xhub/10xScale Agentflow/tree/main/examples) -- **Issues**: Report bugs on [GitHub Issues](https://github.com/10xhub/10xScale Agentflow/issues) -- **Discussions**: Ask questions in [GitHub Discussions](https://github.com/10xhub/10xScale Agentflow/discussions) +- **Documentation**: [https://10xhub.github.io/Agentflow/](https://10xhub.github.io/Agentflow/) +- **Examples**: Check the [examples directory](https://github.com/10xHub/agentflow/tree/main/examples) +- **Issues**: Report bugs on [GitHub Issues](https://github.com/10xHub/agentflow/issues) +- **Discussions**: Ask questions in [GitHub Discussions](https://github.com/10xHub/agentflow/discussions) --- -**Ready to build intelligent agents?** Check out the [documentation](https://10xhub.github.io/10xScale Agentflow/) to get started! +**Ready to build intelligent agents?** Check out the [documentation](https://10xhub.github.io/Agentflow/) to get started! diff --git a/agentflow/core/__init__.py b/agentflow/core/__init__.py index ac03094c..dd80dca1 100644 --- a/agentflow/core/__init__.py +++ b/agentflow/core/__init__.py @@ -10,7 +10,9 @@ from __future__ import annotations -from . import exceptions, graph, skills, state +import typing as _t + +from . import exceptions, skills, state # --- Exceptions --- from .exceptions import ( @@ -25,21 +27,64 @@ TransientStorageError, ) -# --- Graph --- -from .graph import ( - Agent, - BaseAgent, - CompiledGraph, - Edge, - Node, - RetryConfig, - StateGraph, - ToolNode, -) - # --- Skills --- from .skills import SkillConfig, SkillMeta, SkillsRegistry + +# --- Graph (lazy) --- +# The graph engine is imported lazily to avoid an import cycle: ``agentflow.core.graph`` imports +# back into ``agentflow.utils`` and ``agentflow.storage.checkpointer``. Importing it eagerly here +# means that ``import agentflow.utils`` or ``import agentflow.storage.checkpointer`` *as the first +# import* triggers ``agentflow.core`` -> ``graph`` -> back into the half-initialized module and +# raises ImportError. Deferring graph keeps ``from agentflow.core import StateGraph`` working while +# letting those modules be imported in any order. See tests/test_import_order.py. +_GRAPH_EXPORTS = frozenset( + { + "Agent", + "BaseAgent", + "CompiledGraph", + "Edge", + "Node", + "RetryConfig", + "StateGraph", + "ToolNode", + } +) + +if _t.TYPE_CHECKING: + from . import graph + from .graph import ( + Agent, + BaseAgent, + CompiledGraph, + Edge, + Node, + RetryConfig, + StateGraph, + ToolNode, + ) + + +def __getattr__(name: str) -> _t.Any: + """Lazily resolve the graph submodule and its exported symbols (PEP 562). + + Uses ``importlib.import_module`` (not ``from . import graph``) so a re-entrant lookup while + ``graph`` is still importing returns the partial module from ``sys.modules`` directly instead + of recursing back through this hook via the parent-attribute binding. + """ + if name == "graph" or name in _GRAPH_EXPORTS: + import importlib + + graph = importlib.import_module(f"{__name__}.graph") + globals()["graph"] = graph # cache so future lookups skip __getattr__ + return graph if name == "graph" else getattr(graph, name) + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + +def __dir__() -> list[str]: + return sorted(set(globals()) | _GRAPH_EXPORTS | {"graph"}) + + # --- State --- from .state import ( AgentState, diff --git a/agentflow/core/graph/__init__.py b/agentflow/core/graph/__init__.py index 5334d30b..2c2affca 100644 --- a/agentflow/core/graph/__init__.py +++ b/agentflow/core/graph/__init__.py @@ -63,7 +63,7 @@ ============== ```python - from agentflow.graph import StateGraph, ToolNode + from agentflow.core.graph import StateGraph, ToolNode from agentflow.utils import START, END diff --git a/agentflow/core/skills/__init__.py b/agentflow/core/skills/__init__.py index f44db151..6fe2a27f 100644 --- a/agentflow/core/skills/__init__.py +++ b/agentflow/core/skills/__init__.py @@ -5,7 +5,7 @@ **on-demand** (default) — the LLM sees a trigger table and calls ``set_skill()`` to load skill content when a user request matches a skill:: - from agentflow.skills import SkillConfig + from agentflow.core.skills import SkillConfig agent = Agent( model="gpt-4o", @@ -17,7 +17,7 @@ domain/persona. The framework reads a state field to identify which skill to preload, with no trigger table and no extra tool-call round-trip:: - from agentflow.skills import SkillConfig + from agentflow.core.skills import SkillConfig from agentflow.core.state import AgentState diff --git a/agentflow/qa/evaluation/__init__.py b/agentflow/qa/evaluation/__init__.py index a3124d50..001cca9f 100644 --- a/agentflow/qa/evaluation/__init__.py +++ b/agentflow/qa/evaluation/__init__.py @@ -17,8 +17,8 @@ Example: ```python - from agentflow.evaluation import AgentEvaluator, EvalConfig, CriterionConfig - from agentflow.evaluation.dataset import EvalCase, ToolCall + from agentflow.qa.evaluation import AgentEvaluator, EvalConfig, CriterionConfig + from agentflow.qa.evaluation.dataset import EvalCase, ToolCall case = EvalCase.single_turn( eval_id="test_1", diff --git a/agentflow/qa/evaluation/collectors/__init__.py b/agentflow/qa/evaluation/collectors/__init__.py index 14bf9299..0eaca2ff 100644 --- a/agentflow/qa/evaluation/collectors/__init__.py +++ b/agentflow/qa/evaluation/collectors/__init__.py @@ -12,7 +12,7 @@ Example: ```python - from agentflow.evaluation.collectors import TrajectoryCollector, make_trajectory_callback + from agentflow.qa.evaluation.collectors import TrajectoryCollector, make_trajectory_callback collector = TrajectoryCollector() _, mgr = make_trajectory_callback(collector, config={"thread_id": "eval-1"}) diff --git a/agentflow/qa/evaluation/config/__init__.py b/agentflow/qa/evaluation/config/__init__.py index 6fda9a65..dd09a83b 100644 --- a/agentflow/qa/evaluation/config/__init__.py +++ b/agentflow/qa/evaluation/config/__init__.py @@ -5,8 +5,8 @@ Example: ```python - from agentflow.evaluation.config import EvalConfig, CriterionConfig - from agentflow.evaluation.config import EvalPresets, MatchType, Rubric + from agentflow.qa.evaluation.config import EvalConfig, CriterionConfig + from agentflow.qa.evaluation.config import EvalPresets, MatchType, Rubric # Use a preset config = EvalPresets.tool_usage(strict=True) diff --git a/agentflow/qa/evaluation/criteria/__init__.py b/agentflow/qa/evaluation/criteria/__init__.py index b538d49b..2a9c62f1 100644 --- a/agentflow/qa/evaluation/criteria/__init__.py +++ b/agentflow/qa/evaluation/criteria/__init__.py @@ -7,7 +7,7 @@ Example: ```python - from agentflow.evaluation.criteria import ( + from agentflow.qa.evaluation.criteria import ( TrajectoryMatchCriterion, ResponseMatchCriterion, LLMJudgeCriterion, diff --git a/agentflow/qa/evaluation/dataset/__init__.py b/agentflow/qa/evaluation/dataset/__init__.py index ce1b085e..75599a0b 100644 --- a/agentflow/qa/evaluation/dataset/__init__.py +++ b/agentflow/qa/evaluation/dataset/__init__.py @@ -5,7 +5,7 @@ Example: ```python - from agentflow.evaluation.dataset import ( + from agentflow.qa.evaluation.dataset import ( EvalSet, EvalCase, EvalSetBuilder, diff --git a/agentflow/qa/evaluation/evaluator.py b/agentflow/qa/evaluation/evaluator.py index 7f53c486..c80b95d9 100644 --- a/agentflow/qa/evaluation/evaluator.py +++ b/agentflow/qa/evaluation/evaluator.py @@ -55,8 +55,8 @@ class AgentEvaluator: Example: ```python - from agentflow.evaluation import AgentEvaluator, EvalConfig - from agentflow.evaluation.collectors import TrajectoryCollector, make_trajectory_callback + from agentflow.qa.evaluation import AgentEvaluator, EvalConfig + from agentflow.qa.evaluation.collectors import TrajectoryCollector, make_trajectory_callback collector = TrajectoryCollector(capture_all_events=True) _, callback_mgr = make_trajectory_callback(collector) diff --git a/agentflow/qa/evaluation/reporters/manager.py b/agentflow/qa/evaluation/reporters/manager.py index 466587a3..af2d8c68 100644 --- a/agentflow/qa/evaluation/reporters/manager.py +++ b/agentflow/qa/evaluation/reporters/manager.py @@ -61,8 +61,8 @@ class ReporterManager: Example: ```python - from agentflow.evaluation.config.eval_config import ReporterConfig - from agentflow.evaluation.reporters.manager import ReporterManager + from agentflow.qa.evaluation.config.eval_config import ReporterConfig + from agentflow.qa.evaluation.reporters.manager import ReporterManager manager = ReporterManager(ReporterConfig()) output = manager.run_all(report) diff --git a/agentflow/qa/evaluation/simulators/user_simulator.py b/agentflow/qa/evaluation/simulators/user_simulator.py index 8baf73f0..70245137 100644 --- a/agentflow/qa/evaluation/simulators/user_simulator.py +++ b/agentflow/qa/evaluation/simulators/user_simulator.py @@ -143,7 +143,7 @@ class UserSimulator: Example: ```python - from agentflow.evaluation import ( + from agentflow.qa.evaluation import ( UserSimulator, ConversationScenario, SimulationGoalsCriterion, diff --git a/agentflow/qa/evaluation/testing.py b/agentflow/qa/evaluation/testing.py index 69c0d598..d5a4b3b2 100644 --- a/agentflow/qa/evaluation/testing.py +++ b/agentflow/qa/evaluation/testing.py @@ -235,7 +235,7 @@ class EvalFixtures: Example: ```python # conftest.py - from agentflow.evaluation.testing import EvalFixtures + from agentflow.qa.evaluation.testing import EvalFixtures fixtures = EvalFixtures() fixtures.register() @@ -353,7 +353,7 @@ def create_eval_app( Example: ```python # conftest.py - from agentflow.evaluation.testing import create_eval_app + from agentflow.qa.evaluation.testing import create_eval_app @pytest.fixture(scope="session") diff --git a/agentflow/qa/testing/__init__.py b/agentflow/qa/testing/__init__.py index f3d4706e..c0f31e1a 100644 --- a/agentflow/qa/testing/__init__.py +++ b/agentflow/qa/testing/__init__.py @@ -10,7 +10,7 @@ Example: ```python - from agentflow.testing import TestAgent, TestContext, MockToolRegistry + from agentflow.qa.testing import TestAgent, TestContext, MockToolRegistry # Use TestAgent as a drop-in replacement for Agent test_agent = TestAgent(responses=["Hello from test!"]) @@ -28,7 +28,7 @@ assert tools.was_called("get_weather") # Use MockMCPClient for testing MCP tools - from agentflow.testing import MockMCPClient + from agentflow.qa.testing import MockMCPClient mock_mcp = MockMCPClient() mock_mcp.add_tool( diff --git a/eval_reports/s-file_20260613_204737.html b/eval_reports/s-file_20260613_204737.html new file mode 100644 index 00000000..2d8f33d1 --- /dev/null +++ b/eval_reports/s-file_20260613_204737.html @@ -0,0 +1,954 @@ + + + + + + s-file + + + +
+ + + +
+
+
📋
+
1
+
Total Cases
+
+
+
+
1
+
Passed
+
+
+
+
0
+
Failed
+
+
+
⚠️
+
0
+
Errors
+
+
+
📈
+
100%
+
Pass Rate
+
+
+
+
+
+
⏱️
+
0.00s
+
Duration
+
+ +
+ +
+
+

📊 Criterion Breakdown

+
+
+
+

🎯 Score by Case

+
+
+
+ +
+
+ + + + + +
+
+
+
+ +
+ c1 + Score: 0.00 + 0.00s +
+
+
+
+
+
+ + + +
+ + + \ No newline at end of file diff --git a/eval_reports/s-file_20260613_204737.json b/eval_reports/s-file_20260613_204737.json new file mode 100644 index 00000000..5ea4f953 --- /dev/null +++ b/eval_reports/s-file_20260613_204737.json @@ -0,0 +1,123 @@ +{ + "eval_set_id": "s-file", + "eval_set_name": "", + "results": [ + { + "eval_id": "c1", + "name": "", + "passed": true, + "criterion_results": [], + "actual_trajectory": [], + "actual_tool_calls": [], + "actual_response": "", + "messages": [], + "node_responses": [], + "node_visits": [], + "duration_seconds": 0.0, + "error": null, + "metadata": {}, + "turn_results": [], + "token_usage": { + "input_tokens": 0, + "output_tokens": 0, + "cache_read_tokens": 0, + "cache_creation_tokens": 0, + "total_tokens": 0 + }, + "agent_token_usage": { + "input_tokens": 0, + "output_tokens": 0, + "cache_read_tokens": 0, + "cache_creation_tokens": 0, + "total_tokens": 0 + }, + "node_details": [] + } + ], + "summary": { + "total_cases": 1, + "passed_cases": 1, + "failed_cases": 0, + "error_cases": 0, + "pass_rate": 1.0, + "avg_duration_seconds": 0.0, + "total_duration_seconds": 0.0, + "criterion_stats": {}, + "total_token_usage": { + "input_tokens": 0, + "output_tokens": 0, + "cache_read_tokens": 0, + "cache_creation_tokens": 0, + "total_tokens": 0 + }, + "per_case_token_usage": { + "c1": { + "input_tokens": 0, + "output_tokens": 0, + "cache_read_tokens": 0, + "cache_creation_tokens": 0, + "total_tokens": 0 + } + }, + "avg_tokens_per_case": 0.0 + }, + "config_used": { + "criteria": { + "tool_name_match": null, + "trajectory": { + "threshold": 1.0, + "match_type": "EXACT", + "judge_model": "gemini-2.5-flash", + "num_samples": 3, + "rubrics": [], + "keywords": [], + "check_args": false, + "enabled": true, + "api_style": "responses" + }, + "node_order": null, + "response_match": { + "threshold": 0.8, + "match_type": "EXACT", + "judge_model": "gemini-2.5-flash", + "num_samples": 3, + "rubrics": [], + "keywords": [], + "check_args": false, + "enabled": true, + "api_style": "responses" + }, + "rouge_match": null, + "contains_keywords": null, + "llm_judge": null, + "rubric_based": null, + "factual_accuracy": null, + "hallucination": null, + "safety": null, + "simulation_goals": null + }, + "user_simulator_config": null, + "parallel": false, + "max_concurrency": 4, + "timeout": 300.0, + "verbose": false, + "mock_mode": false, + "reporter": { + "enabled": true, + "output_dir": "eval_reports", + "console": true, + "json_report": true, + "html": true, + "junit_xml": false, + "verbose": true, + "include_details": true, + "include_trajectory": true, + "include_node_responses": true, + "include_actual_response": true, + "include_tool_call_details": true, + "timestamp_files": true + } + }, + "timestamp": 1781362057.5080197, + "metadata": {} +} \ No newline at end of file diff --git a/examples/evaluation/test1/test_weather_agent.py b/examples/evaluation/test1/test_weather_agent.py index c458c390..d08d9c99 100644 --- a/examples/evaluation/test1/test_weather_agent.py +++ b/examples/evaluation/test1/test_weather_agent.py @@ -12,9 +12,8 @@ # import pytest -# from agentflow.evaluation.config.eval_config import CriterionConfig, EvalConfig, MatchType -# from agentflow.evaluation.evaluator import AgentEvaluator -# from agentflow.state.message import Message +# from agentflow.qa.evaluation import AgentEvaluator, CriterionConfig, EvalConfig, MatchType +# from agentflow.core.state import Message # from .samples import CAPITAL_QUESTION, LONDON, NYC diff --git a/examples/evaluation/test_graph/__init__.py b/examples/evaluation/test_graph/__init__.py index 4a554b69..2f35cec0 100644 --- a/examples/evaluation/test_graph/__init__.py +++ b/examples/evaluation/test_graph/__init__.py @@ -57,7 +57,7 @@ def get_forecast(location: str, days: int = 3) -> str: ), }, ], - tool_node_name="TOOL", + tool_node="TOOL", ) # ── Routing ────────────────────────────────────────────────────── diff --git a/examples/github-mcp/git_mcp.py b/examples/github-mcp/git_mcp.py index ce6617e3..f233e5ac 100644 --- a/examples/github-mcp/git_mcp.py +++ b/examples/github-mcp/git_mcp.py @@ -28,7 +28,7 @@ client_http = Client(config) -tool_node = ToolNode(functions=[], client=client_http) +tool_node = ToolNode([], client=client_http) main_agent = Agent( diff --git a/examples/tool-decorator/README.md b/examples/tool-decorator/README.md index 0a1d8e75..1731489e 100644 --- a/examples/tool-decorator/README.md +++ b/examples/tool-decorator/README.md @@ -77,7 +77,7 @@ def advanced_function(x: int) -> int: ### Tag Filtering ```python -from agentflow.graph.tool_node import ToolNode +from agentflow.core.graph import ToolNode # Create tools with different tags @tool(name="read_tool", tags=["database", "read"]) diff --git a/tests/test_docs_imports.py b/tests/test_docs_imports.py new file mode 100644 index 00000000..49d62401 --- /dev/null +++ b/tests/test_docs_imports.py @@ -0,0 +1,115 @@ +"""Doc/example import guardrail. + +Prevents regressions of the pre-refactor import drift: README and ``examples/`` must not +reference module paths that were removed in the ``core/`` / ``storage/`` / ``runtime/`` / ``qa/`` +restructure, and must not use APIs that do not exist (``Message.from_text``, +``ToolNode(functions=...)``, ``Agent(tool_node_name=...)``). + +Two layers: + 1. Static scan of every ``agentflow.*`` import in README + examples against a denylist of + removed top-level shims, plus a scan for known-bad API call patterns. + 2. A live check that the canonical symbols the README now advertises are importable and real. + +The static scan needs no optional dependencies and is the authoritative regression guard. +""" + +from __future__ import annotations + +import re +from pathlib import Path + + +REPO_ROOT = Path(__file__).resolve().parents[1] + +# Top-level module prefixes removed in the package restructure. Any import that starts with one +# of these (followed by "." or end of token) is a dead path. Note that the canonical paths +# (agentflow.core.state, agentflow.core.graph, ...) do NOT start with any of these. +DEAD_PREFIXES = ( + "agentflow.graph", + "agentflow.state", + "agentflow.checkpointer", + "agentflow.evaluation", + "agentflow.skills", + "agentflow.testing", + "agentflow.adapters", + "agentflow.publisher", +) + +# API call patterns that reference symbols/keywords that do not exist. +BAD_API_PATTERNS = { + r"\bMessage\.from_text\s*\(": "Message.from_text does not exist; use Message.text_message", + r"\bToolNode\s*\(\s*functions\s*=": "ToolNode takes `tools` (positional), not `functions=`", + r"\btool_node_name\s*=": "Agent uses `tool_node=`, not `tool_node_name=`", +} + +_IMPORT_RE = re.compile(r"^\s*(?:from\s+(agentflow[\w.]*)\s+import|import\s+(agentflow[\w.]*))") +_PY_FENCE_RE = re.compile(r"```(?:python|py)\s*\n(.*?)```", re.DOTALL) + + +def _doc_files() -> list[Path]: + files = [REPO_ROOT / "README.md"] + examples = REPO_ROOT / "examples" + if examples.is_dir(): + files += sorted(examples.rglob("*.md")) + files += sorted(examples.rglob("*.py")) + return [f for f in files if f.is_file()] + + +def _code_text(path: Path) -> str: + """Return the Python source contained in a file (fenced blocks for .md, whole file for .py).""" + text = path.read_text(encoding="utf-8", errors="replace") + if path.suffix == ".md": + return "\n".join(_PY_FENCE_RE.findall(text)) + return text + + +def _agentflow_imports(code: str) -> list[str]: + """Yield the imported ``agentflow.*`` module path for each non-commented import line.""" + mods = [] + for line in code.splitlines(): + if line.lstrip().startswith("#"): + continue + m = _IMPORT_RE.match(line) + if m: + mods.append(m.group(1) or m.group(2)) + return mods + + +def _is_dead(mod: str) -> bool: + return any(mod == p or mod.startswith(p + ".") for p in DEAD_PREFIXES) + + +def test_no_dead_import_paths_in_docs(): + """No README/example code references a removed top-level module path.""" + violations = [] + for f in _doc_files(): + for mod in _agentflow_imports(_code_text(f)): + if _is_dead(mod): + violations.append(f"{f.relative_to(REPO_ROOT)}: {mod}") + assert not violations, ( + "Dead import paths found (use agentflow.core.* / agentflow.storage.* / agentflow.qa.*):\n" + + "\n".join(violations) + ) + + +def test_no_nonexistent_api_patterns_in_docs(): + """No README/example code uses an API symbol/keyword that does not exist.""" + violations = [] + for f in _doc_files(): + code = _code_text(f) + for pattern, why in BAD_API_PATTERNS.items(): + if re.search(pattern, code): + violations.append(f"{f.relative_to(REPO_ROOT)}: {why}") + assert not violations, "Nonexistent API usage found:\n" + "\n".join(violations) + + +def test_canonical_readme_symbols_are_real(): + """The canonical symbols the README advertises import and exist.""" + from agentflow.core.graph import Agent, StateGraph, ToolNode # noqa: F401 + from agentflow.core.state import AgentState, Message # noqa: F401 + from agentflow.storage.checkpointer import InMemoryCheckpointer # noqa: F401 + from agentflow.utils import ResponseGranularity, convert_messages # noqa: F401 + from agentflow.utils.constants import END # noqa: F401 + + assert hasattr(Message, "text_message") + assert not hasattr(Message, "from_text") diff --git a/tests/test_import_order.py b/tests/test_import_order.py new file mode 100644 index 00000000..7b9351d4 --- /dev/null +++ b/tests/test_import_order.py @@ -0,0 +1,66 @@ +"""Import-order regression guard. + +``agentflow.core.graph`` imports back into ``agentflow.utils`` and +``agentflow.storage.checkpointer``. Historically that made those modules unimportable as the +*first* import in a fresh interpreter (``ImportError: ... partially initialized module``), because +``agentflow.core`` eagerly pulled in ``graph``. ``graph`` is now loaded lazily (PEP 562 +``__getattr__`` in ``agentflow/core/__init__.py``) so every public entry point imports cleanly in +any order. + +Each case runs in a *fresh* subprocess — importing in-process would not catch the bug once pytest +has already loaded ``agentflow.core``. +""" + +from __future__ import annotations + +import subprocess +import sys + +import pytest + + +# Public entry points that must import cleanly as the very first import in a fresh interpreter. +FIRST_IMPORTS = [ + "import agentflow.utils", + "from agentflow.utils import CallbackManager, convert_messages, tool", + "import agentflow.storage", + "import agentflow.storage.checkpointer", + "from agentflow.storage.checkpointer import InMemoryCheckpointer, BaseCheckpointer", + "import agentflow.core", + "from agentflow.core import StateGraph, Agent, ToolNode, CompiledGraph, AgentState, Message", + "from agentflow.core.graph import Agent, StateGraph, ToolNode, CompiledGraph", + "from agentflow.core.state import AgentState, Message", + "import agentflow.runtime.publisher", + "import agentflow.qa.evaluation", + "import agentflow.qa.testing", +] + + +@pytest.mark.parametrize("statement", FIRST_IMPORTS) +def test_importable_as_first_import(statement: str): + """The statement succeeds when it is the only thing a fresh interpreter imports.""" + result = subprocess.run( + [sys.executable, "-c", statement], + capture_output=True, + text=True, + timeout=60, + ) + assert result.returncode == 0, ( + f"`{statement}` failed as a first import:\n{result.stderr}" + ) + + +def test_lazy_graph_symbol_identity(): + """The lazily-resolved aggregate symbol is the same object as the direct submodule symbol.""" + code = ( + "from agentflow.core import StateGraph as A\n" + "from agentflow.core.graph import StateGraph as B\n" + "assert A is B, 'aggregate symbol is not the submodule symbol'\n" + ) + result = subprocess.run( + [sys.executable, "-c", code], + capture_output=True, + text=True, + timeout=60, + ) + assert result.returncode == 0, result.stderr From dd5a7d80dc60fedc7b00bb2b47454f3ed85ffb80 Mon Sep 17 00:00:00 2001 From: Shudipto Trafder Date: Sat, 13 Jun 2026 21:10:26 +0600 Subject: [PATCH 2/2] feat: enhance provider detection and model resolution in Agent class --- .gitignore | 2 + agentflow/core/graph/agent.py | 17 ++-- .../core/graph/agent_internal/providers.py | 18 +++- agentflow/core/llm/client_factory.py | 45 +++++++++- tests/graph/test_agent_internal.py | 85 +++++++++++++++++++ 5 files changed, 152 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index b2be357e..3ca3c960 100644 --- a/.gitignore +++ b/.gitignore @@ -84,3 +84,5 @@ site graphify-out/ service_account.json + +eval_reports/ \ No newline at end of file diff --git a/agentflow/core/graph/agent.py b/agentflow/core/graph/agent.py index 8a6ab099..f175f902 100644 --- a/agentflow/core/graph/agent.py +++ b/agentflow/core/graph/agent.py @@ -264,11 +264,6 @@ class MyState(AgentState): **kwargs, ) - # check user sending model and provider as prefix, if provider is not explicitly provided - if "/" in model and provider is None: - provider, model = model.split("/", 1) - self.model = model - # Store output type self.output_type = output_type.lower() self.output_schema = output_schema @@ -276,15 +271,17 @@ class MyState(AgentState): # Determine provider; self.llm_kwargs is set by super().__init__ and is # already available here for _create_client(). + self.base_url = base_url if provider is not None: + # Provider explicitly supplied — trust it as-is. self.provider = provider.lower() - self.base_url = base_url self.client = self._create_client(self.provider, base_url, use_vertex_ai) else: - # Auto-detect provider from model name - self.provider = self._detect_provider_from_model(model, use_vertex_ai) - self.base_url = base_url - self.client = self._create_client(self.provider, base_url) + # Resolve provider (and strip a recognised ``provider/`` prefix) from + # the model string. Unknown prefixes resolve to ``openai`` and keep + # the full model name (e.g. OpenAI-compatible/self-hosted models). + self.provider, self.model = self._resolve_provider_and_model(model, use_vertex_ai) + self.client = self._create_client(self.provider, base_url, use_vertex_ai) # Validate that provider supports the output type self._validate_output_type() diff --git a/agentflow/core/graph/agent_internal/providers.py b/agentflow/core/graph/agent_internal/providers.py index b51cc49e..4f2627f2 100644 --- a/agentflow/core/graph/agent_internal/providers.py +++ b/agentflow/core/graph/agent_internal/providers.py @@ -5,7 +5,11 @@ import logging from typing import Any, Protocol -from agentflow.core.llm.client_factory import create_llm_client, detect_provider +from agentflow.core.llm.client_factory import ( + create_llm_client, + detect_provider, + resolve_provider_and_model, +) from .constants import ( CLIENT_CONSTRUCTOR_KWARGS, @@ -53,6 +57,18 @@ def _detect_provider_from_model(self, model: str, use_vertex_ai: bool = False) - """Infer the provider from the model name when not explicitly supplied.""" return detect_provider(model, use_vertex_ai=use_vertex_ai) + def _resolve_provider_and_model( + self, model: str, use_vertex_ai: bool = False + ) -> tuple[str, str]: + """Resolve a model string into a ``(provider, model)`` pair. + + Recognised ``provider/`` prefixes (``gemini``, ``google``, ``openai``, + ``gpt``) select the provider and are stripped from the model name. + Unknown prefixes are kept intact and resolve to the ``openai`` provider + so OpenAI-compatible / self-hosted models work out of the box. + """ + return resolve_provider_and_model(model, use_vertex_ai=use_vertex_ai) + def _create_google_vertex_ai_client(self) -> Any: return create_llm_client("google", use_vertex_ai=True) diff --git a/agentflow/core/llm/client_factory.py b/agentflow/core/llm/client_factory.py index 6ec2f5af..35cdfb35 100644 --- a/agentflow/core/llm/client_factory.py +++ b/agentflow/core/llm/client_factory.py @@ -14,6 +14,16 @@ logger = logging.getLogger("agentflow.llm") +# Recognised ``provider/`` prefixes mapped to the concrete provider the client +# factory can build. Anything not listed here is an unknown prefix and resolves +# to ``"openai"`` (the OpenAI SDK is used for OpenAI-compatible endpoints). +_PROVIDER_PREFIXES = { + "gemini": "google", + "google": "google", + "openai": "openai", + "gpt": "openai", +} + # Keys allowed in the AsyncOpenAI constructor but NOT in per-request calls. _CLIENT_CONSTRUCTOR_KWARGS = frozenset( { @@ -44,10 +54,8 @@ def detect_provider(model: str, use_vertex_ai: bool = False) -> str: if "/" in model: prefix = model.split("/", 1)[0].lower() - if prefix in ("gemini", "google"): - return "google" - if prefix in ("openai", "gpt"): - return "openai" + if prefix in _PROVIDER_PREFIXES: + return _PROVIDER_PREFIXES[prefix] # Unknown prefix — fall through to name-based detection using the suffix model = model.split("/", 1)[1] @@ -64,6 +72,35 @@ def detect_provider(model: str, use_vertex_ai: bool = False) -> str: return "openai" +def resolve_provider_and_model( + model: str, use_vertex_ai: bool = False +) -> tuple[str, str]: + """Resolve a model string into a concrete ``(provider, model)`` pair. + + Unlike :func:`detect_provider`, this also returns the model name that should + be sent to the provider. A *recognised* ``provider/`` prefix (e.g. + ``"gemini/..."``, ``"openai/..."``) is stripped, since the provider is + selected from the prefix. An *unrecognised* prefix is kept intact: it may be + an OpenAI-compatible / HuggingFace-style identifier (e.g. + ``"meta-llama/Llama-3-70b"``) where the slash is part of the real model name. + Such models always resolve to the ``"openai"`` provider. + + Args: + model: Model identifier, optionally prefixed with ``"provider/"``. + use_vertex_ai: When True, always selects the ``"google"`` provider. + + Returns: + A ``(provider, model)`` tuple where provider is ``"google"`` or + ``"openai"``. + """ + if "/" in model: + prefix, rest = model.split("/", 1) + if prefix.lower() in _PROVIDER_PREFIXES: + return detect_provider(model, use_vertex_ai=use_vertex_ai), rest + + return detect_provider(model, use_vertex_ai=use_vertex_ai), model + + def create_llm_client( provider: str, *, diff --git a/tests/graph/test_agent_internal.py b/tests/graph/test_agent_internal.py index ca4e4e82..2bfdfa52 100644 --- a/tests/graph/test_agent_internal.py +++ b/tests/graph/test_agent_internal.py @@ -281,6 +281,56 @@ def test_deepseek_defaults_to_openai(self): agent = _make_openai_agent() assert agent._detect_provider_from_model("deepseek-chat") == "openai" + def test_unknown_prefix_falls_back_to_openai(self): + agent = _make_openai_agent() + assert agent._detect_provider_from_model("ollama/llama3") == "openai" + assert agent._detect_provider_from_model("anthropic/claude-3") == "openai" + + +class TestResolveProviderAndModel: + """``_resolve_provider_and_model`` returns ``(provider, model)``: it strips + recognised provider aliases and defaults unknown prefixes to openai.""" + + def test_gemini_alias_maps_to_google(self): + agent = _make_openai_agent() + assert agent._resolve_provider_and_model("gemini/gemini-2.5-flash") == ( + "google", + "gemini-2.5-flash", + ) + + def test_google_alias_maps_to_google(self): + agent = _make_openai_agent() + assert agent._resolve_provider_and_model("google/gemini-2.0-flash") == ( + "google", + "gemini-2.0-flash", + ) + + def test_openai_alias_maps_to_openai(self): + agent = _make_openai_agent() + assert agent._resolve_provider_and_model("openai/gpt-4o") == ("openai", "gpt-4o") + + def test_gpt_alias_maps_to_openai(self): + agent = _make_openai_agent() + assert agent._resolve_provider_and_model("gpt/gpt-4o") == ("openai", "gpt-4o") + + def test_unknown_prefix_defaults_to_openai_and_keeps_full_model(self): + agent = _make_openai_agent() + assert agent._resolve_provider_and_model("meta-llama/Llama-3-70b") == ( + "openai", + "meta-llama/Llama-3-70b", + ) + + def test_bare_unknown_model_defaults_to_openai(self): + agent = _make_openai_agent() + assert agent._resolve_provider_and_model("llama3:70b") == ("openai", "llama3:70b") + + def test_use_vertex_ai_forces_google(self): + agent = _make_openai_agent() + assert agent._resolve_provider_and_model("llama3:70b", use_vertex_ai=True) == ( + "google", + "llama3:70b", + ) + class TestValidateOutputType: def test_valid_text_type_does_not_raise(self): @@ -1955,6 +2005,41 @@ def test_unknown_model_without_provider_auto_detects_openai(self): agent = Agent(model="llama3:70b", reasoning_config=None) assert agent.provider == "openai" + def test_gemini_slash_prefix_maps_to_google_provider(self): + """The ``gemini/`` alias must resolve to the ``google`` provider.""" + with patch.object(Agent, "_create_client", return_value=MagicMock()): + agent = Agent(model="gemini/gemini-2.5-flash", reasoning_config=None) + assert agent.provider == "google" + assert agent.model == "gemini-2.5-flash" + + def test_gpt_slash_prefix_maps_to_openai_provider(self): + """The ``gpt/`` alias must resolve to the ``openai`` provider.""" + with patch.object(Agent, "_create_client", return_value=MagicMock()): + agent = Agent(model="gpt/gpt-4o", reasoning_config=None) + assert agent.provider == "openai" + assert agent.model == "gpt-4o" + + def test_unknown_prefix_resolves_to_openai_and_keeps_full_model(self): + """An unrecognised prefix must default to openai, not google, and keep + the full model string (it may be an OpenAI-compatible / HF-style name).""" + with patch.object(Agent, "_create_client", return_value=MagicMock()): + agent = Agent(model="meta-llama/Llama-3-70b", reasoning_config=None) + assert agent.provider == "openai" + assert agent.model == "meta-llama/Llama-3-70b" + + def test_anthropic_prefix_resolves_to_openai(self): + """Claude via an OpenAI-compatible endpoint should not select google.""" + with patch.object(Agent, "_create_client", return_value=MagicMock()): + agent = Agent(model="anthropic/claude-3", reasoning_config=None) + assert agent.provider == "openai" + assert agent.model == "anthropic/claude-3" + + def test_ollama_prefix_resolves_to_openai(self): + with patch.object(Agent, "_create_client", return_value=MagicMock()): + agent = Agent(model="ollama/llama3", reasoning_config=None) + assert agent.provider == "openai" + assert agent.model == "ollama/llama3" + # ── reasoning config normalization ──────────────────────────────────── def test_default_sentinel_produces_medium_effort(self):