-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
113 lines (90 loc) · 4.2 KB
/
Copy pathmain.py
File metadata and controls
113 lines (90 loc) · 4.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# main.py — infrastructure layer.
# Owns: Ollama config, MCP server spawning, FastAPI app, HTTP endpoint.
# The actual agent logic lives in agent.py.
import sys
import traceback
import uuid
from contextlib import asynccontextmanager
from agents import set_default_openai_client, set_default_openai_api
from agents.mcp import MCPServerStdio
from fastapi import FastAPI, HTTPException
from openai import AsyncOpenAI
from pydantic import BaseModel
import agent as ag
# --- Ollama configuration ---------------------------------------------------
# The Agents SDK is built for OpenAI, so we redirect it to our local Ollama
# instance. This must happen at import time, before any agents are created.
# Tell the SDK to talk to Ollama instead of api.openai.com.
# api_key is required by the client constructor but Ollama ignores it.
# use_for_tracing=False stops the SDK trying to send trace events to OpenAI,
# which would fail without a real OpenAI key.
set_default_openai_client(
AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="ollama"),
use_for_tracing=False,
)
# Ollama implements the Chat Completions API (/v1/chat/completions).
# The SDK defaults to the newer Responses API, which Ollama doesn't support,
# so every request would 404 without this line.
set_default_openai_api("chat_completions")
# --- Application state ------------------------------------------------------
# Holds the live agent instance. Set during startup, cleared on shutdown.
# None between process start and the end of lifespan startup.
_agent = None
# --- Lifespan ---------------------------------------------------------------
# FastAPI calls this once when the server starts and once when it stops.
# Everything inside the `async with MCPServerStdio(...)` block is kept alive
# for as long as the server is running.
@asynccontextmanager
async def lifespan(app: FastAPI):
global _agent
# Step 1: spawn search_server.py as a child process.
# MCPServerStdio communicates with it over stdin/stdout (stdio).
# sys.executable is the current Python interpreter — using it (rather than
# the string "python") guarantees the child process runs inside the same
# venv and has access to the same installed packages.
async with MCPServerStdio(
name="search",
params={"command": sys.executable, "args": ["search_server.py"]},
) as search_server:
# Step 2: create the agent, handing it the live MCP server connection.
# The agent calls list_tools() on the server here to learn what tools
# are available, so it's ready to use them from the first request.
_agent = ag.create_agent(search_server)
# Step 3: yield — FastAPI starts accepting requests at this point.
# Execution pauses here until the server is told to shut down.
yield
# Step 4: after yield, the `async with MCPServerStdio` block exits.
# This sends a shutdown signal to search_server.py and closes the pipes.
# We also clear _agent so any in-flight requests after shutdown get a 503.
_agent = None
# --- FastAPI app ------------------------------------------------------------
app = FastAPI(lifespan=lifespan)
class ChatRequest(BaseModel):
# Standard Chat Completions request: a list of message dicts.
# e.g. [{"role": "user", "content": "what's in the news?"}]
model: str = ag.MODEL
messages: list
@app.post("/v1/chat/completions")
async def chat_completions(req: ChatRequest):
# Guard against requests arriving before startup completes or after shutdown.
if _agent is None:
raise HTTPException(status_code=503, detail="Agent not ready")
try:
# Hand off to agent.py — all the model interaction happens in there.
text = await ag.run(_agent, req.messages)
except Exception as e:
traceback.print_exc()
raise HTTPException(status_code=502, detail=str(e))
# Standard Chat Completions response envelope.
return {
"id": f"chatcmpl-{uuid.uuid4().hex}",
"object": "chat.completion",
"model": ag.MODEL,
"choices": [
{
"index": 0,
"message": {"role": "assistant", "content": text},
"finish_reason": "stop",
}
],
}