simple-debug-agent/main.py at main · blastStu/simple-debug-agent · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# main.py — infrastructure layer.
# Owns: Ollama config, MCP server spawning, FastAPI app, HTTP endpoint.
# The actual agent logic lives in agent.py.

import sys
import traceback
import uuid
from contextlib import asynccontextmanager

from agents import set_default_openai_client, set_default_openai_api
from agents.mcp import MCPServerStdio
from fastapi import FastAPI, HTTPException
from openai import AsyncOpenAI
from pydantic import BaseModel

import agent as ag

# --- Ollama configuration ---------------------------------------------------
# The Agents SDK is built for OpenAI, so we redirect it to our local Ollama
# instance. This must happen at import time, before any agents are created.

# Tell the SDK to talk to Ollama instead of api.openai.com.
# api_key is required by the client constructor but Ollama ignores it.
# use_for_tracing=False stops the SDK trying to send trace events to OpenAI,
# which would fail without a real OpenAI key.
set_default_openai_client(
    AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="ollama"),
    use_for_tracing=False,
)

# Ollama implements the Chat Completions API (/v1/chat/completions).
# The SDK defaults to the newer Responses API, which Ollama doesn't support,
# so every request would 404 without this line.
set_default_openai_api("chat_completions")


# --- Application state ------------------------------------------------------
# Holds the live agent instance. Set during startup, cleared on shutdown.
# None between process start and the end of lifespan startup.
_agent = None


# --- Lifespan ---------------------------------------------------------------
# FastAPI calls this once when the server starts and once when it stops.
# Everything inside the `async with MCPServerStdio(...)` block is kept alive
# for as long as the server is running.

@asynccontextmanager
async def lifespan(app: FastAPI):
    global _agent

    # Step 1: spawn search_server.py as a child process.
    # MCPServerStdio communicates with it over stdin/stdout (stdio).
    # sys.executable is the current Python interpreter — using it (rather than
    # the string "python") guarantees the child process runs inside the same
    # venv and has access to the same installed packages.
    async with MCPServerStdio(
        name="search",
        params={"command": sys.executable, "args": ["search_server.py"]},
    ) as search_server:

        # Step 2: create the agent, handing it the live MCP server connection.
        # The agent calls list_tools() on the server here to learn what tools
        # are available, so it's ready to use them from the first request.
        _agent = ag.create_agent(search_server)

        # Step 3: yield — FastAPI starts accepting requests at this point.
        # Execution pauses here until the server is told to shut down.
        yield

    # Step 4: after yield, the `async with MCPServerStdio` block exits.
    # This sends a shutdown signal to search_server.py and closes the pipes.
    # We also clear _agent so any in-flight requests after shutdown get a 503.
    _agent = None


# --- FastAPI app ------------------------------------------------------------
app = FastAPI(lifespan=lifespan)


class ChatRequest(BaseModel):
    # Standard Chat Completions request: a list of message dicts.
    # e.g. [{"role": "user", "content": "what's in the news?"}]
    model: str = ag.MODEL
    messages: list


@app.post("/v1/chat/completions")
async def chat_completions(req: ChatRequest):
    # Guard against requests arriving before startup completes or after shutdown.
    if _agent is None:
        raise HTTPException(status_code=503, detail="Agent not ready")

    try:
        # Hand off to agent.py — all the model interaction happens in there.
        text = await ag.run(_agent, req.messages)
    except Exception as e:
        traceback.print_exc()
        raise HTTPException(status_code=502, detail=str(e))

    # Standard Chat Completions response envelope.
    return {
        "id": f"chatcmpl-{uuid.uuid4().hex}",
        "object": "chat.completion",
        "model": ag.MODEL,
        "choices": [
            {
                "index": 0,
                "message": {"role": "assistant", "content": text},
                "finish_reason": "stop",
            }
        ],
    }