routatic · samueltuyizere · Jun 19, 2026 · Jun 19, 2026 · Jun 19, 2026 · Jun 19, 2026
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -248,14 +248,20 @@ jobs:
             type=semver,pattern={{major}},value=${{ needs.release.outputs.tag }}
             type=raw,value=latest
 
+      - name: Strip v prefix from version
+        id: version
+        env:
+          TAG: ${{ needs.release.outputs.tag }}
+        run: echo "value=${TAG#v}" >> "$GITHUB_OUTPUT"
+
       - uses: docker/build-push-action@v6
         with:
           context: .
           platforms: linux/amd64,linux/arm64
           push: true
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
-          build-args: VERSION=${{ needs.release.outputs.tag | trimPrefix 'v' }}
+          build-args: VERSION=${{ steps.version.outputs.value }}
           cache-from: type=gha
           cache-to: type=gha,mode=max
 

diff --git a/.gitignore b/.gitignore
@@ -5,3 +5,4 @@ bin/
 configs/config.json
 .tmp/
 oc-go-cc
+routatic-proxy
diff --git a/cmd/routatic-proxy/main.go b/cmd/routatic-proxy/main.go
@@ -115,6 +115,14 @@ func serveCmd() *cobra.Command {
 					return err
 				}
 			} else {
+				// Ensure config directory exists before writing PID file.
+				paths, err := daemon.DefaultPaths()
+				if err != nil {
+					return err
+				}
+				if err := paths.EnsureConfigDir(); err != nil {
+					return err
+				}
 				// Write PID file for foreground mode.
 				if err := daemon.WritePID(pidPath, os.Getpid()); err != nil {
 					return fmt.Errorf("failed to write PID file: %w", err)

diff --git a/internal/config/config.go b/internal/config/config.go
@@ -24,6 +24,7 @@ type Config struct {
 type ModelConfig struct {
 	Provider               string          `json:"provider"`
 	ModelID                string          `json:"model_id"`
+	WireFormat             string          `json:"wire_format,omitempty"` // "auto" (default), "openai", "anthropic", "responses", "gemini"
 	Temperature            float64         `json:"temperature"`
 	MaxTokens              int             `json:"max_tokens"`
 	ContextThreshold       int             `json:"context_threshold"`

diff --git a/internal/core/errors.go b/internal/core/errors.go
@@ -0,0 +1,33 @@
+package core
+
+import "errors"
+
+// Sentinel errors for common provider and routing failures.
+var (
+	ErrModelNotFound         = errors.New("model not found")
+	ErrProviderNotFound      = errors.New("provider not found")
+	ErrUnsupportedCapability = errors.New("capability not supported by model")
+	ErrRateLimited           = errors.New("rate limited by provider")
+	ErrStreamIdle            = errors.New("upstream stream idle")
+	ErrClientDisconnected    = errors.New("client disconnected")
+)
+
+// NormalizedError wraps a provider error with structured context.
+type NormalizedError struct {
+	Kind       string // "api_error", "rate_limit", "invalid_request", etc.
+	Message    string
+	Retryable  bool
+	StatusCode int
+	Provider   string
+	ModelID    string
+}
+
+// Error implements the error interface.
+func (e *NormalizedError) Error() string {
+	return e.Message
+}
+
+// IsRetryable returns true if the error is safe to retry with a fallback model.
+func (e *NormalizedError) IsRetryable() bool {
+	return e.Retryable
+}
diff --git a/internal/core/normalize.go b/internal/core/normalize.go
@@ -0,0 +1,148 @@
+package core
+
+import (
+	"encoding/json"
+
+	"github.com/routatic/proxy/pkg/types"
+)
+
+// thinkingConfig mirrors the Anthropic thinking field structure so we can
+// decode it without coupling to a specific json.RawMessage layout.
+type thinkingConfig struct {
+	Type         string `json:"type"`
+	BudgetTokens int    `json:"budget_tokens,omitempty"`
+}
+
+// NormalizeRequest converts an Anthropic MessageRequest to a NormalizedRequest.
+// This is a lossless extraction: all data from the Anthropic format survives.
+func NormalizeRequest(anthropicReq *types.MessageRequest) *NormalizedRequest {
+	nr := &NormalizedRequest{
+		Model:     anthropicReq.Model,
+		MaxTokens: anthropicReq.MaxTokens,
+		Stream:    anthropicReq.Stream != nil && *anthropicReq.Stream,
+	}
+
+	// Extract system prompt (string or array of content blocks).
+	nr.SystemPrompt = anthropicReq.SystemText()
+
+	// Set temperature if provided.
+	if anthropicReq.Temperature != nil {
+		nr.Temperature = anthropicReq.Temperature
+	}
+
+	// Extract reasoning effort and thinking budget.
+	if len(anthropicReq.Thinking) > 0 {
+		var tc thinkingConfig
+		if err := json.Unmarshal(anthropicReq.Thinking, &tc); err == nil {
+			nr.ReasoningEffort = tc.Type
+			nr.ThinkingBudget = tc.BudgetTokens
+		}
+	}
+
+	// Convert messages.
+	for _, msg := range anthropicReq.Messages {
+		nm := NormalizedMessage{
+			Role: msg.Role,
+		}
+
+		blocks := msg.ContentBlocks()
+		for _, block := range blocks {
+			switch block.Type {
+			case "text":
+				nm.Content += block.Text
+			case "tool_use":
+				nm.ToolCalls = append(nm.ToolCalls, NormalizedToolCall{
+					ID:        block.ID,
+					Name:      block.Name,
+					Arguments: string(block.Input),
+				})
+			case "tool_result":
+				nm.ToolResults = append(nm.ToolResults, NormalizedToolResult{
+					ToolCallID: block.ToolUseID,
+					Content:    block.TextContent(),
+				})
+			case "thinking":
+				nm.Thinking += block.Thinking
+			case "image":
+				nm.Content += "[Image]"
+			}
+		}
+
+		nr.Messages = append(nr.Messages, nm)
+	}
+
+	// Convert tools.
+	for _, tool := range anthropicReq.Tools {
+		nt := NormalizedToolDef{
+			Name:        tool.Name,
+			Description: tool.Description,
+			InputSchema: tool.InputSchema,
+		}
+		nr.Tools = append(nr.Tools, nt)
+	}
+
+	return nr
+}
+
+// DenormalizeResponse converts a NormalizedResponse to an Anthropic MessageResponse.
+func DenormalizeResponse(nr *NormalizedResponse) *types.MessageResponse {
+	resp := &types.MessageResponse{
+		ID:    nr.ID,
+		Type:  "message",
+		Model: nr.Model,
+		Usage: types.Usage{
+			InputTokens:              nr.Usage.InputTokens,
+			OutputTokens:             nr.Usage.OutputTokens,
+			CacheCreationInputTokens: nr.Usage.CacheCreationTokens,
+			CacheReadInputTokens:     nr.Usage.CacheReadTokens,
+		},
+	}
+
+	// Build content blocks from messages.
+	for _, msg := range nr.Messages {
+		switch msg.Role {
+		case "assistant":
+			resp.Role = "assistant"
+
+			// Add thinking block if present.
+			if msg.Thinking != "" {
+				resp.Content = append(resp.Content, types.ContentBlock{
+					Type:     "thinking",
+					Thinking: msg.Thinking,
+				})
+			}
+
+			// Add text block if present.
+			if msg.Content != "" {
+				resp.Content = append(resp.Content, types.ContentBlock{
+					Type: "text",
+					Text: msg.Content,
+				})
+			}
+
+			// Add tool_use blocks.
+			for _, tc := range msg.ToolCalls {
+				resp.Content = append(resp.Content, types.ContentBlock{
+					Type:  "tool_use",
+					ID:    tc.ID,
+					Name:  tc.Name,
+					Input: []byte(tc.Arguments),
+				})
+			}
+		}
+
+		// Determine stop reason.
+		switch nr.StopReason {
+		case "end_turn":
+			resp.StopReason = "end_turn"
+		case "max_tokens":
+			resp.StopReason = "max_tokens"
+		case "tool_use":
+			resp.StopReason = "tool_use"
+		default:
+			resp.StopReason = "end_turn"
+		}
+	}
+
+	return resp
+}
diff --git a/internal/core/normalized.go b/internal/core/normalized.go
@@ -0,0 +1,64 @@
+package core
+
+// NormalizedToolResult represents a single tool result in the normalized format.
+type NormalizedToolResult struct {
+	ToolCallID string
+	Content    string
+}
+
+// NormalizedMessage is a single message in the internal canonical format.
+// All wire formats (Anthropic, OpenAI, Responses, Gemini) map to and from
+// this representation.
+type NormalizedMessage struct {
+	Role        string                 // "user", "assistant", "system", "tool"
+	Content     string                 // Concatenated text content
+	ToolCalls   []NormalizedToolCall   // Present on assistant messages
+	ToolResults []NormalizedToolResult // Present on user messages with tool results
+	ToolCallID  string                 // Deprecated: use ToolResults instead. Kept for backward compat.
+	Thinking    string                 // Reasoning/thinking content (assistant only)
+}
+
+// NormalizedToolCall represents a tool invocation in the internal format.
+type NormalizedToolCall struct {
+	ID        string
+	Name      string
+	Arguments string // JSON string
+}
+
+// NormalizedRequest is the canonical internal request format.
+type NormalizedRequest struct {
+	Model           string
+	SystemPrompt    string
+	Messages        []NormalizedMessage
+	MaxTokens       int
+	Temperature     *float64
+	TopP            *float64
+	Stream          bool
+	Tools           []NormalizedToolDef
+	ReasoningEffort string // "low", "medium", "high"
+	ThinkingBudget  int    // budget_tokens for thinking mode
+}
+
+// NormalizedToolDef is a tool definition in the internal format.
+type NormalizedToolDef struct {
+	Name        string
+	Description string
+	InputSchema []byte // JSON bytes of the schema
+}
+
+// NormalizedResponse is the canonical internal response format.
+type NormalizedResponse struct {
+	ID         string
+	Model      string
+	Messages   []NormalizedMessage
+	StopReason string // "end_turn", "max_tokens", "tool_use"
+	Usage      NormalizedUsage
+}
+
+// NormalizedUsage holds token counts in the internal format.
+type NormalizedUsage struct {
+	InputTokens         int
+	OutputTokens        int
+	CacheReadTokens     int
+	CacheCreationTokens int
+}
diff --git a/internal/core/provider.go b/internal/core/provider.go
@@ -0,0 +1,92 @@
+// Package core defines the provider abstraction, wire format types, and
+// capability metadata that form the foundation of the routing engine.
+package core
+
+import (
+	"context"
+	"io"
+	"time"
+
+	"github.com/routatic/proxy/internal/config"
+)
+
+// WireFormat describes the upstream API format a provider uses for a given model.
+type WireFormat int
+
+const (
+	// WireFormatOpenAIChat is the OpenAI Chat Completions format (/v1/chat/completions).
+	WireFormatOpenAIChat WireFormat = iota
+	// WireFormatAnthropic is the Anthropic Messages format (/v1/messages).
+	WireFormatAnthropic
+	// WireFormatOpenAIResponses is the OpenAI Responses format (/v1/responses).
+	WireFormatOpenAIResponses
+	// WireFormatGemini is the Google Gemini format (/v1/models/{id}).
+	WireFormatGemini
+)
+
+// String returns a human-readable name for the wire format.
+func (w WireFormat) String() string {
+	switch w {
+	case WireFormatOpenAIChat:
+		return "openai"
+	case WireFormatAnthropic:
+		return "anthropic"
+	case WireFormatOpenAIResponses:
+		return "responses"
+	case WireFormatGemini:
+		return "gemini"
+	default:
+		return "unknown"
+	}
+}
+
+// ProviderCapabilities describes what a provider can do at the provider level.
+// Per-model refinements are returned by ModelCapabilities.
+type ProviderCapabilities struct {
+	SupportsStreaming  bool
+	SupportsTools      bool
+	SupportsThinking   bool
+	SupportsImageInput bool
+	MaxContextLength   int // in tokens
+	DefaultMaxTokens   int
+	KnownModels        []string
+}
+
+// ExecuteResult holds the result of a non-streaming provider call.
+type ExecuteResult struct {
+	Body    []byte
+	ModelID string
+	Latency time.Duration
+}
+
+// Provider is the abstraction for an upstream LLM provider.
+type Provider interface {
+	// Name returns the provider identifier (e.g. "opencode-go", "opencode-zen").
+	Name() string
+
+	// Capabilities returns provider-level capabilities.
+	Capabilities() ProviderCapabilities
+
+	// ModelCapabilities returns per-model capabilities. Returns false if the
+	// model is unknown to this provider.
+	ModelCapabilities(modelID string) (ProviderCapabilities, bool)
+
+	// WireFormat returns the wire format for the given model on this provider.
+	WireFormat(modelID string) WireFormat
+
+	// Execute sends a non-streaming request and returns the response.
+	Execute(ctx context.Context, req *NormalizedRequest, model config.ModelConfig) (*ExecuteResult, error)
+
+	// Stream sends a streaming request and returns an io.ReadCloser for SSE
+	// events. The stream emits raw SSE bytes; the handler is responsible for
+	// forwarding them.
+	Stream(ctx context.Context, req *NormalizedRequest, model config.ModelConfig) (io.ReadCloser, error)
+
+	// RoundTripName returns the model ID to use in the upstream request. This
+	// may differ from the config's ModelID (e.g. for model overrides).
+	RoundTripName(model config.ModelConfig) string
+
+	// StreamIdleTimeout returns the maximum gap between bytes on an active
+	// stream before it is treated as stuck and aborted.
+	StreamIdleTimeout(model config.ModelConfig) time.Duration
+}