diff --git a/README.md b/README.md index 0ff3a86..71092b9 100644 --- a/README.md +++ b/README.md @@ -126,6 +126,15 @@ export OCR_LLM_MODEL=claude-opus-4-6 export OCR_USE_ANTHROPIC=true ``` +For OpenAI Codex / GPT-5 reasoning models, use the Responses endpoint: + +```bash +ocr config set llm.url https://api.openai.com/v1/responses +ocr config set llm.auth_token "$OPENAI_API_KEY" +ocr config set llm.model gpt-5.1-codex-max +ocr config set llm.use_anthropic false +``` + Config is stored in `~/.opencodereview/config.json`. It is also compatible with Claude Code environment variables (`ANTHROPIC_BASE_URL`, `ANTHROPIC_AUTH_TOKEN`, `ANTHROPIC_MODEL`) and parses `~/.zshrc` / `~/.bashrc` for those exports. @@ -331,9 +340,9 @@ Config file: `~/.opencodereview/config.json` | Key | Type | Example | |-----|------|---------| -| `llm.url` | string | `https://api.openai.com/v1/chat/completions` | +| `llm.url` | string | `https://api.openai.com/v1/responses` or `https://api.openai.com/v1/chat/completions` | | `llm.auth_token` | string | `sk-xxxxxxx` | -| `llm.model` | string | `claude-opus-4-6` | +| `llm.model` | string | `gpt-5.1-codex-max` or `claude-opus-4-6` | | `llm.use_anthropic` | boolean | `true` \| `false` | | `language` | string | `English` \| `Chinese` (default: Chinese) | | `telemetry.enabled` | boolean | `true` \| `false` | @@ -352,6 +361,8 @@ Environment variables take precedence over the config file. | `OCR_LLM_MODEL` | Model name | | `OCR_USE_ANTHROPIC` | `true` = Anthropic, `false` = OpenAI | +`OCR_LLM_AUTH_TOKEN` and `OCR_LLM_USE_ANTHROPIC` are also accepted for compatibility with the CI examples. + ## Telemetry diff --git a/cmd/opencodereview/flags.go b/cmd/opencodereview/flags.go index 8a73d6d..1d3eb4f 100644 --- a/cmd/opencodereview/flags.go +++ b/cmd/opencodereview/flags.go @@ -247,9 +247,10 @@ Usage: ocr config set Examples: - ocr config set llm.url https://xx/v1/openai/chat/completions + ocr config set llm.url https://api.openai.com/v1/responses ocr config set llm.auth_token xxxxxxxxxx - ocr config set llm.model claude-opus-4-6 + ocr config set llm.model gpt-5.1-codex-max + ocr config set llm.use_anthropic false ocr config set llm.extra_body '{"thinking":{"type":"disabled"}}' ocr config set language English ocr config set telemetry.enabled true diff --git a/internal/llm/client.go b/internal/llm/client.go index 2217bc4..123ddb2 100644 --- a/internal/llm/client.go +++ b/internal/llm/client.go @@ -1,5 +1,6 @@ // Package llm provides LLM client interfaces supporting multiple protocols. -// Supported protocols: Anthropic Messages API, OpenAI Chat Completions API. +// Supported protocols: Anthropic Messages API, OpenAI Chat Completions API, +// and OpenAI Responses API. package llm import ( @@ -197,7 +198,8 @@ type ClientConfig struct { // --- Factory --- // NewLLMClient creates the appropriate client based on the resolved endpoint protocol. -// protocol: "anthropic" -> AnthropicClient, anything else -> OpenAIClient. +// protocol: "anthropic" -> AnthropicClient; OpenAI /responses URLs -> OpenAIResponsesClient; +// anything else -> OpenAIClient. func NewLLMClient(ep ResolvedEndpoint) LLMClient { cfg := ClientConfig{ URL: ep.URL, @@ -208,6 +210,9 @@ func NewLLMClient(ep ResolvedEndpoint) LLMClient { if ep.Protocol == "anthropic" { return NewAnthropicClient(cfg) } + if isResponsesEndpoint(ep.URL) { + return NewOpenAIResponsesClient(cfg) + } return NewOpenAIClient(cfg) } @@ -270,7 +275,11 @@ func CountTokensForModel(text string, modelName string) int { func encodingForModel(modelName string) string { lower := strings.ToLower(modelName) switch { - case strings.Contains(lower, "o1") || strings.Contains(lower, "o3") || strings.Contains(lower, "o4"): + case strings.Contains(lower, "gpt-5") || + strings.Contains(lower, "codex") || + strings.Contains(lower, "o1") || + strings.Contains(lower, "o3") || + strings.Contains(lower, "o4"): return "o200k_base" default: return "cl100k_base" @@ -307,6 +316,19 @@ func NewClient(cfg ClientConfig) *OpenAIClient { return NewOpenAIClient(cfg) } +func isResponsesEndpoint(rawURL string) bool { + return strings.HasSuffix(strings.TrimRight(rawURL, "/"), "/responses") +} + +func useMaxCompletionTokens(model string) bool { + lower := strings.ToLower(model) + return strings.Contains(lower, "gpt-5") || + strings.Contains(lower, "codex") || + strings.Contains(lower, "o1") || + strings.Contains(lower, "o3") || + strings.Contains(lower, "o4") +} + // ChatRequest represents the payload for a chat completion call. type ChatRequest struct { Model string `json:"model"` @@ -370,15 +392,10 @@ func (c *OpenAIClient) StreamCompletionWithCtx(ctx context.Context, req ChatRequ } return c.withRetryCtx(ctx, func() error { - body := make(map[string]any) - b, _ := json.Marshal(req) - json.Unmarshal(b, &body) - body["model"] = model - for k, v := range c.cfg.ExtraBody { - body[k] = v + payload, err := c.buildRequestPayload(model, req) + if err != nil { + return fmt.Errorf("marshal request body: %w", err) } - - payload, _ := json.Marshal(body) httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, c.cfg.URL, bytes.NewReader(payload)) if err != nil { return fmt.Errorf("create request: %w", err) @@ -431,8 +448,7 @@ func (c *OpenAIClient) doRequestCtx(ctx context.Context, model string, req ChatR if model == "" { model = c.cfg.Model } - req.Model = model - payload, err := mergeExtraBody(req, c.cfg.ExtraBody) + payload, err := c.buildRequestPayload(model, req) if err != nil { return nil, fmt.Errorf("marshal request body: %w", err) } @@ -478,6 +494,26 @@ func (c *OpenAIClient) doRequestCtx(ctx context.Context, model string, req ChatR }, nil } +func (c *OpenAIClient) buildRequestPayload(model string, req ChatRequest) ([]byte, error) { + req.Model = model + b, err := json.Marshal(req) + if err != nil { + return nil, err + } + var body map[string]any + if err := json.Unmarshal(b, &body); err != nil { + return nil, err + } + if req.MaxTokens > 0 && useMaxCompletionTokens(model) { + delete(body, "max_tokens") + body["max_completion_tokens"] = req.MaxTokens + } + for k, v := range c.cfg.ExtraBody { + body[k] = v + } + return json.Marshal(body) +} + // --- AnthropicClient --- const anthropicVersion = "2023-06-01" diff --git a/internal/llm/client_test.go b/internal/llm/client_test.go index e3adc81..0c8c2e3 100644 --- a/internal/llm/client_test.go +++ b/internal/llm/client_test.go @@ -1,6 +1,9 @@ package llm import ( + "encoding/json" + "net/http" + "net/http/httptest" "testing" ) @@ -47,6 +50,205 @@ func TestNewOpenAIClient_URLNormalization(t *testing.T) { } } +func TestNewOpenAIResponsesClient_URLNormalization(t *testing.T) { + tests := []struct { + name string + inputURL string + wantURL string + }{ + { + name: "base URL without trailing slash", + inputURL: "https://api.example.com/v1", + wantURL: "https://api.example.com/v1/responses", + }, + { + name: "base URL with trailing slash", + inputURL: "https://api.example.com/v1/", + wantURL: "https://api.example.com/v1/responses", + }, + { + name: "full URL already has responses", + inputURL: "https://api.example.com/v1/responses", + wantURL: "https://api.example.com/v1/responses", + }, + { + name: "full URL with trailing slash", + inputURL: "https://api.example.com/v1/responses/", + wantURL: "https://api.example.com/v1/responses/", + }, + { + name: "bare host", + inputURL: "https://api.example.com", + wantURL: "https://api.example.com/responses", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + client := NewOpenAIResponsesClient(ClientConfig{URL: tt.inputURL}) + if client.cfg.URL != tt.wantURL { + t.Errorf("got URL %q, want %q", client.cfg.URL, tt.wantURL) + } + }) + } +} + +func TestNewLLMClient_SelectsResponsesClient(t *testing.T) { + client := NewLLMClient(ResolvedEndpoint{ + URL: "https://api.openai.com/v1/responses", + Token: "test-token", + Model: "gpt-5.1-codex-max", + Protocol: "openai", + }) + if _, ok := client.(*OpenAIResponsesClient); !ok { + t.Fatalf("expected *OpenAIResponsesClient, got %T", client) + } +} + +func TestOpenAIClient_UsesMaxCompletionTokensForCodexModels(t *testing.T) { + var got map[string]any + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if err := json.NewDecoder(r.Body).Decode(&got); err != nil { + t.Fatalf("decode request: %v", err) + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"id":"chatcmpl_1","model":"gpt-5.1-codex-max","choices":[{"message":{"role":"assistant","content":"ok"},"finish_reason":"stop"}]}`)) + })) + defer server.Close() + + client := NewOpenAIClient(ClientConfig{URL: server.URL, APIKey: "test-token", Model: "gpt-5.1-codex-max"}) + _, err := client.Completions(ChatRequest{ + Messages: []Message{NewTextMessage("user", "hi")}, + MaxTokens: 42, + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if _, ok := got["max_tokens"]; ok { + t.Fatalf("request included max_tokens: %#v", got) + } + if got["max_completion_tokens"] != float64(42) { + t.Fatalf("max_completion_tokens = %#v, want 42", got["max_completion_tokens"]) + } +} + +func TestOpenAIClient_KeepsMaxTokensForLegacyModels(t *testing.T) { + var got map[string]any + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if err := json.NewDecoder(r.Body).Decode(&got); err != nil { + t.Fatalf("decode request: %v", err) + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{"id":"chatcmpl_1","model":"gpt-4o","choices":[{"message":{"role":"assistant","content":"ok"},"finish_reason":"stop"}]}`)) + })) + defer server.Close() + + client := NewOpenAIClient(ClientConfig{URL: server.URL, APIKey: "test-token", Model: "gpt-4o"}) + _, err := client.Completions(ChatRequest{ + Messages: []Message{NewTextMessage("user", "hi")}, + MaxTokens: 42, + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if _, ok := got["max_completion_tokens"]; ok { + t.Fatalf("request included max_completion_tokens: %#v", got) + } + if got["max_tokens"] != float64(42) { + t.Fatalf("max_tokens = %#v, want 42", got["max_tokens"]) + } +} + +func TestOpenAIResponsesClient_RequestAndResponseMapping(t *testing.T) { + var got map[string]any + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/v1/responses" { + t.Fatalf("path = %q, want /v1/responses", r.URL.Path) + } + if err := json.NewDecoder(r.Body).Decode(&got); err != nil { + t.Fatalf("decode request: %v", err) + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(`{ + "id":"resp_1", + "model":"gpt-5.1-codex-max", + "output":[ + {"type":"message","role":"assistant","content":[{"type":"output_text","text":"Need more context"}]}, + {"type":"function_call","call_id":"call_1","name":"code_search","arguments":"{\"query\":\"foo\"}"} + ], + "usage":{"input_tokens":10,"output_tokens":5,"total_tokens":15} + }`)) + })) + defer server.Close() + + client := NewOpenAIResponsesClient(ClientConfig{ + URL: server.URL + "/v1", + APIKey: "test-token", + Model: "gpt-5.1-codex-max", + }) + resp, err := client.Completions(ChatRequest{ + Messages: []Message{ + NewTextMessage("system", "review code"), + NewTextMessage("user", "diff"), + NewToolCallMessage("", []ToolCall{{ + ID: "call_prev", + Type: "function", + Function: FunctionCall{ + Name: "code_search", + Arguments: `{"query":"bar"}`, + }, + }}), + NewToolResultMessage("call_prev", "result"), + }, + Tools: []ToolDef{{ + Type: "function", + Function: FunctionDef{ + Name: "code_search", + Description: "Search code", + Parameters: map[string]any{"type": "object"}, + }, + }}, + MaxTokens: 99, + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if got["max_output_tokens"] != float64(99) { + t.Fatalf("max_output_tokens = %#v, want 99", got["max_output_tokens"]) + } + input := got["input"].([]any) + if input[0].(map[string]any)["role"] != "system" { + t.Fatalf("first input item = %#v", input[0]) + } + if input[2].(map[string]any)["type"] != "function_call" { + t.Fatalf("third input item = %#v", input[2]) + } + if input[3].(map[string]any)["type"] != "function_call_output" { + t.Fatalf("fourth input item = %#v", input[3]) + } + tools := got["tools"].([]any) + if tools[0].(map[string]any)["strict"] != false { + t.Fatalf("tool strict = %#v, want false", tools[0].(map[string]any)["strict"]) + } + + if resp.Content() != "Need more context" { + t.Fatalf("content = %q", resp.Content()) + } + calls := resp.ToolCalls() + if len(calls) != 1 || calls[0].ID != "call_1" || calls[0].Function.Name != "code_search" { + t.Fatalf("tool calls = %#v", calls) + } + if resp.Usage == nil || resp.Usage.TotalTokens != 15 { + t.Fatalf("usage = %#v", resp.Usage) + } +} + func TestNewAnthropicClient_URLNormalization(t *testing.T) { tests := []struct { name string diff --git a/internal/llm/resolver.go b/internal/llm/resolver.go index 442ff81..4a648a0 100644 --- a/internal/llm/resolver.go +++ b/internal/llm/resolver.go @@ -21,10 +21,12 @@ type ResolvedEndpoint struct { // Environment variable names for OCR-specific configuration. const ( - envOCRLLMURL = "OCR_LLM_URL" - envOCRLLMToken = "OCR_LLM_TOKEN" - envOCRLLMModel = "OCR_LLM_MODEL" - envOCRUseAnthropic = "OCR_USE_ANTHROPIC" + envOCRLLMURL = "OCR_LLM_URL" + envOCRLLMToken = "OCR_LLM_TOKEN" + envOCRLLMAuthToken = "OCR_LLM_AUTH_TOKEN" + envOCRLLMModel = "OCR_LLM_MODEL" + envOCRUseAnthropic = "OCR_USE_ANTHROPIC" + envOCRLLMUseAnthropic = "OCR_LLM_USE_ANTHROPIC" ) // Environment variable names from Claude Code configuration. @@ -42,8 +44,8 @@ func ResolveEndpoint(configPath string) (ResolvedEndpoint, error) { name string fn func() (ResolvedEndpoint, bool, error) }{ - {"OCR config file", func() (ResolvedEndpoint, bool, error) { return tryOCRConfig(configPath) }}, {"OCR environment", tryOCREnv}, + {"OCR config file", func() (ResolvedEndpoint, bool, error) { return tryOCRConfig(configPath) }}, {"Claude Code environment", tryCCEnv}, {"Shell rc file", tryShellRC}, } @@ -67,13 +69,16 @@ func ResolveEndpoint(configPath string) (ResolvedEndpoint, error) { func tryOCREnv() (ResolvedEndpoint, bool, error) { url := os.Getenv(envOCRLLMURL) token := os.Getenv(envOCRLLMToken) + if token == "" { + token = os.Getenv(envOCRLLMAuthToken) + } model := os.Getenv(envOCRLLMModel) if url == "" || token == "" || model == "" { return ResolvedEndpoint{}, false, nil } useAnthropic := true // default true - if v := os.Getenv(envOCRUseAnthropic); v != "" { + if v := firstNonEmptyEnv(envOCRUseAnthropic, envOCRLLMUseAnthropic); v != "" { lower := strings.ToLower(v) useAnthropic = lower == "true" || lower == "1" || lower == "yes" } @@ -86,6 +91,15 @@ func tryOCREnv() (ResolvedEndpoint, bool, error) { return ResolvedEndpoint{URL: url, Token: token, Model: model, Protocol: protocol, Source: "OCR environment"}, true, nil } +func firstNonEmptyEnv(keys ...string) string { + for _, key := range keys { + if v := os.Getenv(key); v != "" { + return v + } + } + return "" +} + // llmFileConfig represents the llm section in config.json. type llmFileConfig struct { URL string `json:"url,omitempty"` diff --git a/internal/llm/resolver_test.go b/internal/llm/resolver_test.go index 61ca101..68b5cf2 100644 --- a/internal/llm/resolver_test.go +++ b/internal/llm/resolver_test.go @@ -115,3 +115,61 @@ func TestResolveEndpoint_ConfigFileStripsModelSuffix(t *testing.T) { t.Errorf("expected source %q, got %q", "OCR config file", ep.Source) } } + +func TestResolveEndpoint_OCREnvTakesPriorityOverConfigFile(t *testing.T) { + t.Setenv("OCR_LLM_URL", "https://env.example.com/v1/responses") + t.Setenv("OCR_LLM_TOKEN", "env-token") + t.Setenv("OCR_LLM_MODEL", "gpt-5.1-codex-max") + t.Setenv("OCR_USE_ANTHROPIC", "false") + t.Setenv("ANTHROPIC_BASE_URL", "") + t.Setenv("ANTHROPIC_AUTH_TOKEN", "") + t.Setenv("ANTHROPIC_MODEL", "") + + cfg := configFile{ + Llm: llmFileConfig{ + URL: "https://config.example.com/v1/messages", + AuthToken: "config-token", + Model: "claude-opus-4-7", + }, + } + data, _ := json.Marshal(cfg) + cfgPath := filepath.Join(t.TempDir(), "config.json") + os.WriteFile(cfgPath, data, 0644) + + ep, err := ResolveEndpoint(cfgPath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if ep.Source != "OCR environment" { + t.Fatalf("source = %q, want OCR environment", ep.Source) + } + if ep.URL != "https://env.example.com/v1/responses" { + t.Fatalf("url = %q, want env URL", ep.URL) + } + if ep.Protocol != "openai" { + t.Fatalf("protocol = %q, want openai", ep.Protocol) + } +} + +func TestResolveEndpoint_OCREnvCompatibilityNames(t *testing.T) { + t.Setenv("OCR_LLM_URL", "https://api.example.com/v1/responses") + t.Setenv("OCR_LLM_TOKEN", "") + t.Setenv("OCR_LLM_AUTH_TOKEN", "compat-token") + t.Setenv("OCR_LLM_MODEL", "gpt-5.1-codex-max") + t.Setenv("OCR_USE_ANTHROPIC", "") + t.Setenv("OCR_LLM_USE_ANTHROPIC", "false") + t.Setenv("ANTHROPIC_BASE_URL", "") + t.Setenv("ANTHROPIC_AUTH_TOKEN", "") + t.Setenv("ANTHROPIC_MODEL", "") + + ep, err := ResolveEndpoint(filepath.Join(t.TempDir(), "nonexistent.json")) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if ep.Token != "compat-token" { + t.Fatalf("token = %q, want compatibility token", ep.Token) + } + if ep.Protocol != "openai" { + t.Fatalf("protocol = %q, want openai", ep.Protocol) + } +} diff --git a/internal/llm/responses_client.go b/internal/llm/responses_client.go new file mode 100644 index 0000000..01e95b3 --- /dev/null +++ b/internal/llm/responses_client.go @@ -0,0 +1,339 @@ +package llm + +import ( + "bufio" + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + "time" +) + +// OpenAIResponsesClient sends requests to the OpenAI Responses API. +type OpenAIResponsesClient struct { + cfg ClientConfig + client *http.Client +} + +// NewOpenAIResponsesClient creates a new OpenAI Responses API client. +func NewOpenAIResponsesClient(cfg ClientConfig) *OpenAIResponsesClient { + if cfg.Timeout <= 0 { + cfg.Timeout = 5 * time.Minute + } + baseURL := strings.TrimRight(cfg.URL, "/") + if !strings.HasSuffix(baseURL, "/responses") { + cfg.URL = baseURL + "/responses" + } + return &OpenAIResponsesClient{ + cfg: cfg, + client: &http.Client{ + Timeout: cfg.Timeout, + }, + } +} + +// Completions sends a Responses API request and returns the parsed response. +func (c *OpenAIResponsesClient) Completions(req ChatRequest) (*ChatResponse, error) { + return c.CompletionsWithCtx(context.Background(), req) +} + +// CompletionsWithCtx sends a Responses API request with context support. +func (c *OpenAIResponsesClient) CompletionsWithCtx(ctx context.Context, req ChatRequest) (*ChatResponse, error) { + model := req.Model + if model == "" { + model = c.cfg.Model + } + + var result *ChatResponse + err := c.withRetryCtx(ctx, func() error { + resp, err := c.doRequestCtx(ctx, model, req) + if err != nil { + return err + } + result = resp + return nil + }) + return result, err +} + +// StreamCompletion initiates a streaming Responses API call. +func (c *OpenAIResponsesClient) StreamCompletion(req ChatRequest, cb func(chunk []byte) error) error { + return c.StreamCompletionWithCtx(context.Background(), req, cb) +} + +// StreamCompletionWithCtx initiates a streaming Responses API call with context support. +func (c *OpenAIResponsesClient) StreamCompletionWithCtx(ctx context.Context, req ChatRequest, cb func(chunk []byte) error) error { + req.Stream = true + + model := req.Model + if model == "" { + model = c.cfg.Model + } + + return c.withRetryCtx(ctx, func() error { + payload, err := c.buildRequestPayload(model, req) + if err != nil { + return fmt.Errorf("marshal request body: %w", err) + } + + httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, c.cfg.URL, bytes.NewReader(payload)) + if err != nil { + return fmt.Errorf("create request: %w", err) + } + httpReq.Header.Set("Content-Type", "application/json") + httpReq.Header.Set("Authorization", "Bearer "+c.cfg.APIKey) + httpReq.Header.Set("Accept", "text/event-stream") + httpReq.Header.Set("User-Agent", userAgent("")) + + resp, err := c.client.Do(httpReq) + if err != nil { + return fmt.Errorf("request failed: %w", err) + } + defer resp.Body.Close() + + if isRetryableStatus(resp.StatusCode) { + bodyBytes, _ := io.ReadAll(resp.Body) + return fmt.Errorf("API error %d: %s", resp.StatusCode, string(bodyBytes)) + } + if resp.StatusCode >= 400 { + bodyBytes, _ := io.ReadAll(resp.Body) + return fmt.Errorf("API error %d: %s (non-retryable)", resp.StatusCode, string(bodyBytes)) + } + + scanner := bufio.NewScanner(resp.Body) + for scanner.Scan() { + line := scanner.Text() + if !strings.HasPrefix(line, "data: ") { + continue + } + data := strings.TrimPrefix(line, "data: ") + if data == "[DONE]" { + break + } + if err := cb([]byte(data)); err != nil { + return err + } + } + return scanner.Err() + }) +} + +func (c *OpenAIResponsesClient) doRequestCtx(ctx context.Context, model string, req ChatRequest) (*ChatResponse, error) { + if model == "" { + model = c.cfg.Model + } + payload, err := c.buildRequestPayload(model, req) + if err != nil { + return nil, fmt.Errorf("marshal request body: %w", err) + } + + httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, c.cfg.URL, bytes.NewReader(payload)) + if err != nil { + return nil, fmt.Errorf("create request: %w", err) + } + httpReq.Header.Set("Content-Type", "application/json") + httpReq.Header.Set("Authorization", "Bearer "+c.cfg.APIKey) + httpReq.Header.Set("User-Agent", userAgent("")) + + resp, err := c.client.Do(httpReq) + if err != nil { + return nil, fmt.Errorf("request failed: %w", err) + } + defer resp.Body.Close() + + bodyBytes, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("read response body: %w", err) + } + + if resp.StatusCode >= 400 { + detail := extractErrorMessage(bodyBytes) + return nil, fmt.Errorf("API error %d: %s", resp.StatusCode, detail) + } + + chatResp, err := parseResponsesAPIResponse(bodyBytes, resp.Header) + if err != nil { + return nil, fmt.Errorf("decode response: %w", err) + } + return chatResp, nil +} + +func (c *OpenAIResponsesClient) buildRequestPayload(model string, req ChatRequest) ([]byte, error) { + body := map[string]any{ + "model": model, + "input": buildResponsesInput(req.Messages), + } + if len(req.Tools) > 0 { + body["tools"] = buildResponsesTools(req.Tools) + } + if req.Stream { + body["stream"] = true + } + if req.Temperature != nil { + body["temperature"] = req.Temperature + } + if req.MaxTokens > 0 { + body["max_output_tokens"] = req.MaxTokens + } + for k, v := range c.cfg.ExtraBody { + body[k] = v + } + return json.Marshal(body) +} + +func buildResponsesInput(messages []Message) []map[string]any { + items := make([]map[string]any, 0, len(messages)) + for _, msg := range messages { + switch msg.Role { + case "tool": + items = append(items, map[string]any{ + "type": "function_call_output", + "call_id": msg.ToolCallID, + "output": responseContentAsString(msg.Content), + }) + case "assistant": + content := msg.ExtractText() + if content != "" { + items = append(items, map[string]any{ + "role": "assistant", + "content": content, + }) + } + for _, tc := range msg.ToolCalls { + items = append(items, map[string]any{ + "type": "function_call", + "call_id": tc.ID, + "name": tc.Function.Name, + "arguments": tc.Function.Arguments, + }) + } + default: + items = append(items, map[string]any{ + "role": msg.Role, + "content": responseContentAsString(msg.Content), + }) + } + } + return items +} + +func responseContentAsString(content any) string { + switch v := content.(type) { + case string: + return v + case []ContentBlock: + msg := Message{Content: v} + return msg.ExtractText() + default: + return fmt.Sprintf("%v", v) + } +} + +func buildResponsesTools(tools []ToolDef) []map[string]any { + items := make([]map[string]any, 0, len(tools)) + for _, t := range tools { + items = append(items, map[string]any{ + "type": "function", + "name": t.Function.Name, + "description": t.Function.Description, + "parameters": t.Function.Parameters, + "strict": false, + }) + } + return items +} + +func parseResponsesAPIResponse(body []byte, headers http.Header) (*ChatResponse, error) { + type responseContent struct { + Type string `json:"type"` + Text string `json:"text,omitempty"` + } + type responseOutput struct { + Type string `json:"type"` + Role string `json:"role,omitempty"` + Content []responseContent `json:"content,omitempty"` + ID string `json:"id,omitempty"` + CallID string `json:"call_id,omitempty"` + Name string `json:"name,omitempty"` + Arguments string `json:"arguments,omitempty"` + } + var resp struct { + ID string `json:"id"` + Model string `json:"model"` + Output []responseOutput `json:"output"` + OutputText string `json:"output_text,omitempty"` + } + if err := json.Unmarshal(body, &resp); err != nil { + return nil, err + } + + var textParts []string + var toolCalls []ToolCall + role := "assistant" + for _, item := range resp.Output { + switch item.Type { + case "message": + if item.Role != "" { + role = item.Role + } + for _, content := range item.Content { + if content.Text != "" { + textParts = append(textParts, content.Text) + } + } + case "function_call": + callID := item.CallID + if callID == "" { + callID = item.ID + } + toolCalls = append(toolCalls, ToolCall{ + ID: callID, + Type: "function", + Function: FunctionCall{ + Name: item.Name, + Arguments: item.Arguments, + }, + }) + } + } + if len(textParts) == 0 && resp.OutputText != "" { + textParts = append(textParts, resp.OutputText) + } + + var contentStr *string + if len(textParts) > 0 { + s := strings.Join(textParts, "\n") + contentStr = &s + } + + finishReason := "stop" + if len(toolCalls) > 0 { + finishReason = "tool_calls" + } + + return &ChatResponse{ + ID: resp.ID, + Model: resp.Model, + Choices: []Choice{{ + Message: ResponseMessage{ + Role: role, + Content: contentStr, + ToolCalls: toolCalls, + }, + FinishReason: finishReason, + }}, + Headers: headers, + Usage: resolveUsage(body), + }, nil +} + +func (c *OpenAIResponsesClient) withRetry(fn func() error) error { + return retryWithCtx(context.Background(), fn) +} + +func (c *OpenAIResponsesClient) withRetryCtx(ctx context.Context, fn func() error) error { + return retryWithCtx(ctx, fn) +} diff --git a/skills/open-code-review/SKILL.md b/skills/open-code-review/SKILL.md index ff61473..6b84d12 100644 --- a/skills/open-code-review/SKILL.md +++ b/skills/open-code-review/SKILL.md @@ -61,6 +61,15 @@ ocr config set llm.model claude-opus-4-6 ocr config set llm.use_anthropic true ``` +For OpenAI Codex / GPT-5 reasoning models, use: + +```bash +ocr config set llm.url https://api.openai.com/v1/responses +ocr config set llm.auth_token +ocr config set llm.model gpt-5.1-codex-max +ocr config set llm.use_anthropic false +``` + Stop here and ask the user to provide credentials — never invent or hardcode API keys. ## Workflow