Skip to content
Merged
8 changes: 7 additions & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -248,14 +248,20 @@ jobs:
type=semver,pattern={{major}},value=${{ needs.release.outputs.tag }}
type=raw,value=latest

- name: Strip v prefix from version
id: version
env:
TAG: ${{ needs.release.outputs.tag }}
run: echo "value=${TAG#v}" >> "$GITHUB_OUTPUT"

- uses: docker/build-push-action@v6
with:
context: .
platforms: linux/amd64,linux/arm64
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
build-args: VERSION=${{ needs.release.outputs.tag | trimPrefix 'v' }}
build-args: VERSION=${{ steps.version.outputs.value }}
cache-from: type=gha
cache-to: type=gha,mode=max

Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ bin/
configs/config.json
.tmp/
oc-go-cc
routatic-proxy
8 changes: 8 additions & 0 deletions cmd/routatic-proxy/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,14 @@ func serveCmd() *cobra.Command {
return err
}
} else {
// Ensure config directory exists before writing PID file.
paths, err := daemon.DefaultPaths()
if err != nil {
return err
}
if err := paths.EnsureConfigDir(); err != nil {
return err
}
// Write PID file for foreground mode.
if err := daemon.WritePID(pidPath, os.Getpid()); err != nil {
return fmt.Errorf("failed to write PID file: %w", err)
Expand Down
1 change: 1 addition & 0 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ type Config struct {
type ModelConfig struct {
Provider string `json:"provider"`
ModelID string `json:"model_id"`
WireFormat string `json:"wire_format,omitempty"` // "auto" (default), "openai", "anthropic", "responses", "gemini"
Temperature float64 `json:"temperature"`
MaxTokens int `json:"max_tokens"`
ContextThreshold int `json:"context_threshold"`
Expand Down
33 changes: 33 additions & 0 deletions internal/core/errors.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package core

import "errors"

// Sentinel errors for common provider and routing failures.
var (
ErrModelNotFound = errors.New("model not found")
ErrProviderNotFound = errors.New("provider not found")
ErrUnsupportedCapability = errors.New("capability not supported by model")
ErrRateLimited = errors.New("rate limited by provider")
ErrStreamIdle = errors.New("upstream stream idle")
ErrClientDisconnected = errors.New("client disconnected")
)

// NormalizedError wraps a provider error with structured context.
type NormalizedError struct {
Kind string // "api_error", "rate_limit", "invalid_request", etc.
Message string
Retryable bool
StatusCode int
Provider string
ModelID string
}

// Error implements the error interface.
func (e *NormalizedError) Error() string {
return e.Message
}

// IsRetryable returns true if the error is safe to retry with a fallback model.
func (e *NormalizedError) IsRetryable() bool {
return e.Retryable
}
148 changes: 148 additions & 0 deletions internal/core/normalize.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
package core

import (
"encoding/json"

"github.com/routatic/proxy/pkg/types"
)

// thinkingConfig mirrors the Anthropic thinking field structure so we can
// decode it without coupling to a specific json.RawMessage layout.
type thinkingConfig struct {
Type string `json:"type"`
BudgetTokens int `json:"budget_tokens,omitempty"`
}

// NormalizeRequest converts an Anthropic MessageRequest to a NormalizedRequest.
// This is a lossless extraction: all data from the Anthropic format survives.
func NormalizeRequest(anthropicReq *types.MessageRequest) *NormalizedRequest {
nr := &NormalizedRequest{
Model: anthropicReq.Model,
MaxTokens: anthropicReq.MaxTokens,
Stream: anthropicReq.Stream != nil && *anthropicReq.Stream,
}

// Extract system prompt (string or array of content blocks).
nr.SystemPrompt = anthropicReq.SystemText()

// Set temperature if provided.
if anthropicReq.Temperature != nil {
nr.Temperature = anthropicReq.Temperature
}

// Extract reasoning effort and thinking budget.
if len(anthropicReq.Thinking) > 0 {
var tc thinkingConfig
if err := json.Unmarshal(anthropicReq.Thinking, &tc); err == nil {
nr.ReasoningEffort = tc.Type
nr.ThinkingBudget = tc.BudgetTokens
}
}

// Convert messages.
for _, msg := range anthropicReq.Messages {
nm := NormalizedMessage{
Role: msg.Role,
}

blocks := msg.ContentBlocks()
for _, block := range blocks {
switch block.Type {
case "text":
nm.Content += block.Text
case "tool_use":
nm.ToolCalls = append(nm.ToolCalls, NormalizedToolCall{
ID: block.ID,
Name: block.Name,
Arguments: string(block.Input),
})
case "tool_result":
nm.ToolResults = append(nm.ToolResults, NormalizedToolResult{
ToolCallID: block.ToolUseID,
Content: block.TextContent(),
})
case "thinking":
nm.Thinking += block.Thinking
case "image":
nm.Content += "[Image]"
}
}

nr.Messages = append(nr.Messages, nm)
}

// Convert tools.
for _, tool := range anthropicReq.Tools {
nt := NormalizedToolDef{
Name: tool.Name,
Description: tool.Description,
InputSchema: tool.InputSchema,
}
nr.Tools = append(nr.Tools, nt)
}

return nr
}

// DenormalizeResponse converts a NormalizedResponse to an Anthropic MessageResponse.
func DenormalizeResponse(nr *NormalizedResponse) *types.MessageResponse {
resp := &types.MessageResponse{
ID: nr.ID,
Type: "message",
Model: nr.Model,
Usage: types.Usage{
InputTokens: nr.Usage.InputTokens,
OutputTokens: nr.Usage.OutputTokens,
CacheCreationInputTokens: nr.Usage.CacheCreationTokens,
CacheReadInputTokens: nr.Usage.CacheReadTokens,
},
}

// Build content blocks from messages.
for _, msg := range nr.Messages {
switch msg.Role {
case "assistant":
resp.Role = "assistant"

// Add thinking block if present.
if msg.Thinking != "" {
resp.Content = append(resp.Content, types.ContentBlock{
Type: "thinking",
Thinking: msg.Thinking,
})
}

// Add text block if present.
if msg.Content != "" {
resp.Content = append(resp.Content, types.ContentBlock{
Type: "text",
Text: msg.Content,
})
}

// Add tool_use blocks.
for _, tc := range msg.ToolCalls {
resp.Content = append(resp.Content, types.ContentBlock{
Type: "tool_use",
ID: tc.ID,
Name: tc.Name,
Input: []byte(tc.Arguments),
})
}
}

// Determine stop reason.
switch nr.StopReason {
case "end_turn":
resp.StopReason = "end_turn"
case "max_tokens":
resp.StopReason = "max_tokens"
case "tool_use":
resp.StopReason = "tool_use"
default:
resp.StopReason = "end_turn"
}
}

return resp
}
64 changes: 64 additions & 0 deletions internal/core/normalized.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package core

// NormalizedToolResult represents a single tool result in the normalized format.
type NormalizedToolResult struct {
ToolCallID string
Content string
}

// NormalizedMessage is a single message in the internal canonical format.
// All wire formats (Anthropic, OpenAI, Responses, Gemini) map to and from
// this representation.
type NormalizedMessage struct {
Role string // "user", "assistant", "system", "tool"
Content string // Concatenated text content
ToolCalls []NormalizedToolCall // Present on assistant messages
ToolResults []NormalizedToolResult // Present on user messages with tool results
ToolCallID string // Deprecated: use ToolResults instead. Kept for backward compat.
Thinking string // Reasoning/thinking content (assistant only)
}

// NormalizedToolCall represents a tool invocation in the internal format.
type NormalizedToolCall struct {
ID string
Name string
Arguments string // JSON string
}

// NormalizedRequest is the canonical internal request format.
type NormalizedRequest struct {
Model string
SystemPrompt string
Messages []NormalizedMessage
MaxTokens int
Temperature *float64
TopP *float64
Stream bool
Tools []NormalizedToolDef
ReasoningEffort string // "low", "medium", "high"
ThinkingBudget int // budget_tokens for thinking mode
}

// NormalizedToolDef is a tool definition in the internal format.
type NormalizedToolDef struct {
Name string
Description string
InputSchema []byte // JSON bytes of the schema
}

// NormalizedResponse is the canonical internal response format.
type NormalizedResponse struct {
ID string
Model string
Messages []NormalizedMessage
StopReason string // "end_turn", "max_tokens", "tool_use"
Usage NormalizedUsage
}

// NormalizedUsage holds token counts in the internal format.
type NormalizedUsage struct {
InputTokens int
OutputTokens int
CacheReadTokens int
CacheCreationTokens int
}
92 changes: 92 additions & 0 deletions internal/core/provider.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// Package core defines the provider abstraction, wire format types, and
// capability metadata that form the foundation of the routing engine.
package core

import (
"context"
"io"
"time"

"github.com/routatic/proxy/internal/config"
)

// WireFormat describes the upstream API format a provider uses for a given model.
type WireFormat int

const (
// WireFormatOpenAIChat is the OpenAI Chat Completions format (/v1/chat/completions).
WireFormatOpenAIChat WireFormat = iota
// WireFormatAnthropic is the Anthropic Messages format (/v1/messages).
WireFormatAnthropic
// WireFormatOpenAIResponses is the OpenAI Responses format (/v1/responses).
WireFormatOpenAIResponses
// WireFormatGemini is the Google Gemini format (/v1/models/{id}).
WireFormatGemini
)

// String returns a human-readable name for the wire format.
func (w WireFormat) String() string {
switch w {
case WireFormatOpenAIChat:
return "openai"
case WireFormatAnthropic:
return "anthropic"
case WireFormatOpenAIResponses:
return "responses"
case WireFormatGemini:
return "gemini"
default:
return "unknown"
}
}

// ProviderCapabilities describes what a provider can do at the provider level.
// Per-model refinements are returned by ModelCapabilities.
type ProviderCapabilities struct {
SupportsStreaming bool
SupportsTools bool
SupportsThinking bool
SupportsImageInput bool
MaxContextLength int // in tokens
DefaultMaxTokens int
KnownModels []string
}

// ExecuteResult holds the result of a non-streaming provider call.
type ExecuteResult struct {
Body []byte
ModelID string
Latency time.Duration
}

// Provider is the abstraction for an upstream LLM provider.
type Provider interface {
// Name returns the provider identifier (e.g. "opencode-go", "opencode-zen").
Name() string

// Capabilities returns provider-level capabilities.
Capabilities() ProviderCapabilities

// ModelCapabilities returns per-model capabilities. Returns false if the
// model is unknown to this provider.
ModelCapabilities(modelID string) (ProviderCapabilities, bool)

// WireFormat returns the wire format for the given model on this provider.
WireFormat(modelID string) WireFormat

// Execute sends a non-streaming request and returns the response.
Execute(ctx context.Context, req *NormalizedRequest, model config.ModelConfig) (*ExecuteResult, error)

// Stream sends a streaming request and returns an io.ReadCloser for SSE
// events. The stream emits raw SSE bytes; the handler is responsible for
// forwarding them.
Stream(ctx context.Context, req *NormalizedRequest, model config.ModelConfig) (io.ReadCloser, error)

// RoundTripName returns the model ID to use in the upstream request. This
// may differ from the config's ModelID (e.g. for model overrides).
RoundTripName(model config.ModelConfig) string

// StreamIdleTimeout returns the maximum gap between bytes on an active
// stream before it is treated as stuck and aborted.
StreamIdleTimeout(model config.ModelConfig) time.Duration
}
Loading
Loading