From e137996a5b62422dd36d86f725688280b377f1c8 Mon Sep 17 00:00:00 2001
From: "Peter St. John" <pstjohn@nvidia.com>
Date: Tue, 27 Jan 2026 15:20:58 -0800
Subject: [PATCH] add claude code with nvidia inference api

Signed-off-by: Peter St. John <pstjohn@nvidia.com>
---
 .devcontainer/recipes/Dockerfile           |   7 +-
 .devcontainer/recipes/README.md            |   8 +
 .devcontainer/recipes/devcontainer.json    |   9 +-
 .devcontainer/recipes/initializeCommand.sh |   8 +-
 .devcontainer/recipes/litellm_config.yaml  |  94 ++++
 .devcontainer/recipes/postCreateCommand.sh |   3 +
 .devcontainer/recipes/requirements.txt     |   5 +-
 .devcontainer/recipes/setup_claude_env.sh  | 589 +++++++++++++++++++++
 .secrets.baseline                          |  20 +-
 9 files changed, 730 insertions(+), 13 deletions(-)
 create mode 100644 .devcontainer/recipes/litellm_config.yaml
 create mode 100644 .devcontainer/recipes/setup_claude_env.sh

diff --git a/.devcontainer/recipes/Dockerfile b/.devcontainer/recipes/Dockerfile
index 880e773052..eaac738158 100644
--- a/.devcontainer/recipes/Dockerfile
+++ b/.devcontainer/recipes/Dockerfile
@@ -6,6 +6,11 @@ FROM nvcr.io/nvidia/pytorch:25.12-py3
 #  Remove once bug has been addressed in the nvidia/pytorch container.
 RUN rm -f /usr/local/lib/python*/dist-packages/transformer_engine-*.dist-info/direct_url.json
 
+# lsof needed for Daniel's claude script to work.
+RUN --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
+    --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    apt-get update && apt-get install -y lsof && rm -rf /var/lib/apt/lists/*
+
 RUN --mount=type=cache,target=/root/.cache/pip \
     --mount=type=bind,source=requirements.txt,target=/workspace/requirements.txt \
     PIP_CONSTRAINT= pip install -r /workspace/requirements.txt
@@ -13,5 +18,5 @@ RUN --mount=type=cache,target=/root/.cache/pip \
 COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/
 USER ubuntu
 RUN curl https://cursor.com/install -fsS | bash  # Install cursor-agent CLI tool
-RUN curl -fsSL https://claude.ai/install.sh | bash  # Install Claude CLI tool
+RUN curl -fsSL https://claude.ai/install.sh | bash -s 2.0.32  # Install Claude CLI tool (pinned to avoid context_management parameter issue)
 RUN uv tool install pre-commit --with pre-commit-uv --force-reinstall
diff --git a/.devcontainer/recipes/README.md b/.devcontainer/recipes/README.md
index c2c5e18632..02caf50586 100644
--- a/.devcontainer/recipes/README.md
+++ b/.devcontainer/recipes/README.md
@@ -1,3 +1,11 @@
 # Dev Container Setup
 
 General-purpose dev container for local recipe development.
+
+For claude code usage, make sure you've generated an API key at inference.nvidia.com and add
+
+```bash
+export NVIDIA_INFERENCE_KEY=<your_inference_key>
+```
+
+to your .bashrc or .zshrc file.
diff --git a/.devcontainer/recipes/devcontainer.json b/.devcontainer/recipes/devcontainer.json
index 85da9fef4d..a61f3f942a 100644
--- a/.devcontainer/recipes/devcontainer.json
+++ b/.devcontainer/recipes/devcontainer.json
@@ -8,13 +8,12 @@
     "mounts": [
         "source=${localEnv:HOME}/.bash_history_devcontainer,target=/home/ubuntu/.bash_history,type=bind,consistency=cached",
         "source=${localEnv:HOME}/.cache,target=/home/ubuntu/.cache,type=bind,consistency=cached",
-        "source=${localEnv:HOME}/.claude,target=/home/ubuntu/.claude,type=bind,consistency=cached",
-        "source=${localEnv:HOME}/.claude.json,target=/home/ubuntu/.claude.json,type=bind,consistency=cached"
         "source=${localEnv:HOME}/.config,target=/home/ubuntu/.config,type=bind,consistency=cached",
         "source=${localEnv:HOME}/.cursor,target=/home/ubuntu/.cursor,type=bind,consistency=cached",
-        "source=${localEnv:HOME}/.gnupg,target=/home/ubuntu/.gnupg,type=bind,consistency=cached",
         "source=${localEnv:HOME}/.netrc,target=/home/ubuntu/.netrc,type=bind,consistency=cached",
-        "source=${localEnv:HOME}/.ssh,target=/home/ubuntu/.ssh,readonly,type=bind,consistency=cached",
+        "source=${localEnv:HOME}/.nvidia-api-key,target=/home/ubuntu/.nvidia-api-key,type=bind,consistency=cached",
+        "source=${localEnv:HOME}/.claude-devcontainer.json,target=/home/ubuntu/.claude.json,type=bind,consistency=cached",
+        "source=${localEnv:HOME}/.ssh,target=/home/ubuntu/.ssh,readonly,type=bind,consistency=cached"
     ],
     "postCreateCommand": ".devcontainer/recipes/postCreateCommand.sh",
     "initializeCommand": ".devcontainer/recipes/initializeCommand.sh",
@@ -24,7 +23,7 @@
         "--shm-size=4g"
     ],
     "containerEnv": {
-        "PRE_COMMIT_HOME": "/home/ubuntu/.cache/pre-commit-devcontainer",
+        "PRE_COMMIT_HOME": "/home/ubuntu/.cache/pre-commit-devcontainer"
     },
     "customizations": {
         "vscode": {
diff --git a/.devcontainer/recipes/initializeCommand.sh b/.devcontainer/recipes/initializeCommand.sh
index 182db741f8..b5d7c8a2c1 100755
--- a/.devcontainer/recipes/initializeCommand.sh
+++ b/.devcontainer/recipes/initializeCommand.sh
@@ -4,13 +4,13 @@
 mkdir -p ~/.devcontainer_cache
 mkdir -p ~/.ssh
 mkdir -p ~/.cache/pre-commit-devcontainer
-mkdir -p ~/.gnupg
 mkdir -p ~/.config
 mkdir -p ~/.cursor
-mkdir -p ~/.claude
-[ ! -f ~/.netrc ] && touch ~/.netrc
 
+[ ! -f ~/.netrc ] && touch ~/.netrc
+[ ! -f ~/.nvidia-api-key ] && touch ~/.nvidia-api-key
+[ ! -f ~/.claude-devcontainer.json ] && touch ~/.claude-devcontainer.json
 [ ! -f ~/.bash_history_devcontainer ] && touch ~/.bash_history_devcontainer
-[ ! -f ~/.claude.json ] && touch ~/.claude.json
+
 
 exit 0
diff --git a/.devcontainer/recipes/litellm_config.yaml b/.devcontainer/recipes/litellm_config.yaml
new file mode 100644
index 0000000000..63301212d4
--- /dev/null
+++ b/.devcontainer/recipes/litellm_config.yaml
@@ -0,0 +1,94 @@
+model_list:
+  # ===========================================
+  # NVIDIA Inference API Model Mappings
+  # ===========================================
+  # Maps Claude Code model requests to NVIDIA's hosted Claude models
+  #
+  # Available NVIDIA models:
+  #   - aws/anthropic/claude-opus-4-5         (Opus 4.5)
+  #   - aws/anthropic/bedrock-claude-sonnet-4-5-v1  (Sonnet 4.5)
+  #
+  # IMPORTANT: NVIDIA's Bedrock-hosted models have smaller context windows
+  # than direct Anthropic API (~100K vs 200K). We set max_input_tokens to
+  # enable pre-call validation, which allows Claude Code to trigger
+  # context compaction before hitting the API limit.
+  #
+  # Claude Code requests these models by name, so we map them appropriately.
+
+  # --- Sonnet models → NVIDIA Sonnet ---
+  - model_name: claude-sonnet-4-5-20250929
+    litellm_params:
+      model: openai/aws/anthropic/bedrock-claude-sonnet-4-5-v1
+      api_base: https://inference-api.nvidia.com
+      api_key: os.environ/NVIDIA_API_KEY
+    model_info:
+      max_input_tokens: 100000  # Tested: NVIDIA limit is ~111K, using 100K for safety
+      max_output_tokens: 8192
+
+  - model_name: claude-sonnet-4-20250514
+    litellm_params:
+      model: openai/aws/anthropic/bedrock-claude-sonnet-4-5-v1
+      api_base: https://inference-api.nvidia.com
+      api_key: os.environ/NVIDIA_API_KEY
+    model_info:
+      max_input_tokens: 100000
+      max_output_tokens: 8192
+
+  - model_name: claude-3-5-sonnet-20241022
+    litellm_params:
+      model: openai/aws/anthropic/bedrock-claude-sonnet-4-5-v1
+      api_base: https://inference-api.nvidia.com
+      api_key: os.environ/NVIDIA_API_KEY
+    model_info:
+      max_input_tokens: 100000
+      max_output_tokens: 8192
+
+  # --- Haiku models → NVIDIA Sonnet (no Haiku available) ---
+  - model_name: claude-haiku-4-5-20251001
+    litellm_params:
+      model: openai/aws/anthropic/bedrock-claude-sonnet-4-5-v1
+      api_base: https://inference-api.nvidia.com
+      api_key: os.environ/NVIDIA_API_KEY
+    model_info:
+      max_input_tokens: 100000
+      max_output_tokens: 8192
+
+  # --- Opus models → NVIDIA Opus ---
+  - model_name: claude-opus-4-5-20250929
+    litellm_params:
+      model: openai/aws/anthropic/claude-opus-4-5
+      api_base: https://inference-api.nvidia.com
+      api_key: os.environ/NVIDIA_API_KEY
+    model_info:
+      max_input_tokens: 100000  # Tested: NVIDIA limit is ~111K, using 100K for safety
+      max_output_tokens: 8192
+
+  - model_name: claude-3-opus-20240229
+    litellm_params:
+      model: openai/aws/anthropic/claude-opus-4-5
+      api_base: https://inference-api.nvidia.com
+      api_key: os.environ/NVIDIA_API_KEY
+    model_info:
+      max_input_tokens: 100000
+      max_output_tokens: 8192
+
+general_settings:
+  master_key: sk-litellm-local-dev
+
+router_settings:
+  # Enable pre-call validation of context window limits
+  # This checks if input exceeds max_input_tokens BEFORE making the API call
+  # Allows Claude Code to receive ContextWindowExceededError early and trigger compaction
+  enable_pre_call_checks: true
+
+litellm_settings:
+  drop_params: true
+  num_retries: 2
+  # Context window fallbacks: when a model hits context limit, try Opus
+  # Note: Both models have same ~100K limit on NVIDIA, so fallback may not help
+  # The real fix is enable_pre_call_checks which lets Claude Code compact early
+  context_window_fallbacks:
+    - claude-sonnet-4-5-20250929: ["claude-opus-4-5-20250929"]
+    - claude-sonnet-4-20250514: ["claude-opus-4-5-20250929"]
+    - claude-3-5-sonnet-20241022: ["claude-opus-4-5-20250929"]
+    - claude-haiku-4-5-20251001: ["claude-sonnet-4-5-20250929", "claude-opus-4-5-20250929"]
diff --git a/.devcontainer/recipes/postCreateCommand.sh b/.devcontainer/recipes/postCreateCommand.sh
index 5e04fd91a5..955b386090 100755
--- a/.devcontainer/recipes/postCreateCommand.sh
+++ b/.devcontainer/recipes/postCreateCommand.sh
@@ -4,3 +4,6 @@ set -euo pipefail
 if git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
   uvx pre-commit install
 fi
+
+# Set up Claude environment and proxy server
+source .devcontainer/recipes/setup_claude_env.sh
diff --git a/.devcontainer/recipes/requirements.txt b/.devcontainer/recipes/requirements.txt
index da0dd58c8a..3cd846e2cd 100644
--- a/.devcontainer/recipes/requirements.txt
+++ b/.devcontainer/recipes/requirements.txt
@@ -2,10 +2,13 @@ accelerate
 datasets
 deepspeed
 hydra-core
+litellm[proxy]
 lm-eval
 megatron-fsdp
+nvdlfw_inspect @ git+https://github.com/NVIDIA/nvidia-dlfw-inspect
 peft
 pytest
+seaborn
 torch
 torchao!=0.14.0
 torchdata
@@ -16,5 +19,3 @@ transformers
 typer
 wandb
 zstandard
-nvdlfw_inspect @ git+https://github.com/NVIDIA/nvidia-dlfw-inspect
-seaborn
diff --git a/.devcontainer/recipes/setup_claude_env.sh b/.devcontainer/recipes/setup_claude_env.sh
new file mode 100644
index 0000000000..aa061c706e
--- /dev/null
+++ b/.devcontainer/recipes/setup_claude_env.sh
@@ -0,0 +1,589 @@
+#!/bin/bash
+# setup_env.sh - Environment setup for Claude Code with NVIDIA Hosted Endpoint
+#
+# This script sets up Claude Code to use NVIDIA's hosted Claude API via LiteLLM proxy.
+# It handles everything: API key setup, LiteLLM installation, and proxy startup.
+#
+# Usage:
+#   source scripts/setup_env.sh           # Basic setup + start proxy
+#   source scripts/setup_env.sh --venv    # Create/activate virtual environment first
+#   source scripts/setup_env.sh --restart # Force restart proxy (e.g., after changing API key)
+#   source scripts/setup_env.sh --verbose # Show detailed install output
+#
+# What this script does:
+#   1. Installs uv (fast Python package manager) if not present
+#   2. Installs Claude Code CLI (via native installer) if not present
+#   3. Reads NVIDIA API key from ~/.nvidia-api-key or prompts and saves it
+#   4. Installs LiteLLM proxy using uv
+#   5. Starts the LiteLLM proxy server in the background
+#   6. Configures Claude Code via ~/.claude/settings.json (proper method)
+#   7. Skips Claude onboarding via ~/.claude.json
+#
+# Prerequisites:
+#   - NVIDIA API key from https://inference.nvidia.com/key-management
+#   - Basic system tools: curl, lsof, python3 (usually pre-installed)
+#
+# API Key Storage:
+#   The script reads your NVIDIA API key from ~/.nvidia-api-key
+#   If the file doesn't exist or is empty, it will prompt you for the key
+#   and save it securely to that file with restricted permissions (600)
+
+# ============================================================================
+# Check: Must be sourced, not executed
+# ============================================================================
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+    echo ""
+    echo "❌ ERROR: This script must be SOURCED, not executed directly."
+    echo ""
+    echo "   Wrong:   ./scripts/setup_env.sh"
+    echo "   Wrong:   bash scripts/setup_env.sh"
+    echo "   Correct: source scripts/setup_env.sh"
+    echo ""
+    echo "   Sourcing exports environment variables to your current shell."
+    exit 1
+fi
+
+# Safe error handling for sourced scripts
+# Use 'return' instead of 'exit' to avoid killing the shell
+safe_exit() {
+    return 1
+}
+
+# Parse arguments
+FORCE_RESTART=false
+USE_VENV=false
+VERBOSE=false
+for arg in "$@"; do
+    case $arg in
+        --restart) FORCE_RESTART=true ;;
+        --venv) USE_VENV=true ;;
+        --verbose) VERBOSE=true ;;
+    esac
+done
+
+# Log file for install output
+INSTALL_LOG="/tmp/claude_nvidia_setup.log"
+echo "" > "$INSTALL_LOG"
+
+# ============================================================================
+# Helper Functions
+# ============================================================================
+
+# Spinner animation
+spin() {
+    local pid=$1
+    local msg=$2
+    local spinchars='⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏'
+    local i=0
+
+    # Hide cursor
+    tput civis 2>/dev/null || true
+
+    while kill -0 "$pid" 2>/dev/null; do
+        # Use arithmetic expansion for shell compatibility (bash and zsh)
+        local idx=$((i % ${#spinchars}))
+        printf "\r   %s %s" "${spinchars:$idx:1}" "$msg"
+        i=$((i + 1))
+        sleep 0.1
+    done
+
+    # Show cursor
+    tput cnorm 2>/dev/null || true
+
+    # Wait for process and get exit code
+    wait "$pid"
+    return $?
+}
+
+# Run command with spinner (suppresses output unless verbose)
+run_with_spinner() {
+    local msg=$1
+    shift
+
+    if [ "$VERBOSE" = true ]; then
+        echo "   $msg"
+        "$@"
+        return $?
+    fi
+
+    # Run in background, capture output to log
+    "$@" >> "$INSTALL_LOG" 2>&1 &
+    local pid=$!
+
+    if spin "$pid" "$msg"; then
+        printf "\r   ✓ %-60s\n" "$msg"
+        return 0
+    else
+        printf "\r   ❌ %-60s\n" "$msg"
+        echo "      See log: $INSTALL_LOG"
+        return 1
+    fi
+}
+
+# Print status line
+status() {
+    printf "   ✓ %s\n" "$1"
+}
+
+# Print warning
+warn() {
+    printf "   ⚠️  %s\n" "$1"
+}
+
+# Print error
+error() {
+    printf "   ❌ %s\n" "$1"
+}
+
+# ============================================================================
+# Header
+# ============================================================================
+echo ""
+echo "╔════════════════════════════════════════════════════════════════════╗"
+echo "║       Claude Code + NVIDIA Inference - Environment Setup          ║"
+echo "╚════════════════════════════════════════════════════════════════════╝"
+echo ""
+
+# ============================================================================
+# Check System Dependencies
+# ============================================================================
+echo "📦 System Dependencies"
+echo "   ─────────────────────────────────────────────────────────────────"
+
+# Check basic commands
+MISSING_CMDS=""
+for cmd in curl lsof python3; do
+    if ! command -v $cmd &> /dev/null; then
+        MISSING_CMDS="$MISSING_CMDS $cmd"
+    fi
+done
+
+if [ -n "$MISSING_CMDS" ]; then
+    error "Missing required commands:$MISSING_CMDS"
+    echo "      Install with: sudo apt install$MISSING_CMDS"
+    return 1
+fi
+status "curl, lsof, python3"
+
+# ============================================================================
+# Install uv (Python package manager)
+# ============================================================================
+if ! command -v uv &> /dev/null; then
+    run_with_spinner "Installing uv (Python package manager)..." \
+        bash -c 'curl -LsSf https://astral.sh/uv/install.sh | sh'
+
+    # Add to PATH for current session
+    export PATH="$HOME/.local/bin:$HOME/.cargo/bin:$PATH"
+fi
+
+if command -v uv &> /dev/null; then
+    PKG_MANAGER="uv"
+    PKG_INSTALL="uv pip install"
+    status "uv $(uv --version 2>/dev/null | head -1 || echo '')"
+elif command -v pip &> /dev/null; then
+    PKG_MANAGER="pip"
+    PKG_INSTALL="pip install"
+    warn "uv not available, using pip"
+else
+    error "Neither 'uv' nor 'pip' available"
+    return 1
+fi
+
+# ============================================================================
+# Claude Code CLI Setup (Native Installer)
+# ============================================================================
+echo ""
+echo "🤖 Claude Code CLI"
+echo "   ─────────────────────────────────────────────────────────────────"
+
+# Add common installation paths
+export PATH="$HOME/.local/bin:$PATH"
+
+if command -v claude &> /dev/null; then
+    CLAUDE_VERSION=$(claude --version 2>/dev/null || echo "unknown")
+    status "Claude CLI $CLAUDE_VERSION"
+else
+    # Use native installer (npm installation is deprecated)
+    run_with_spinner "Installing Claude Code CLI (native installer)..." \
+        bash -c 'curl -fsSL https://claude.ai/install.sh | bash'
+
+    # Update PATH to include installation location
+    export PATH="$HOME/.local/bin:$PATH"
+
+    # Verify installation
+    if command -v claude &> /dev/null; then
+        status "Claude CLI installed"
+    else
+        warn "Installation completed but 'claude' not in PATH"
+        echo "      Try: export PATH=\"\$HOME/.local/bin:\$PATH\""
+        echo "      Or run: claude install"
+    fi
+fi
+
+# Optional: Create virtual environment
+if [ "$USE_VENV" = true ]; then
+    echo ""
+    echo "📦 Virtual Environment"
+    echo "   ─────────────────────────────────────────────────────────────────"
+    if [ ! -d "venv" ]; then
+        run_with_spinner "Creating virtual environment..." \
+            python3 -m venv venv
+    fi
+    # shellcheck disable=SC1091
+    source venv/bin/activate
+    status "Activated: $(which python)"
+fi
+
+# ============================================================================
+# NVIDIA API Key Setup
+# ============================================================================
+echo ""
+echo "🔑 NVIDIA API Key"
+echo "   ─────────────────────────────────────────────────────────────────"
+
+# Define the API key file
+API_KEY_FILE="$HOME/.nvidia-api-key"
+
+# Try to read the key from the file
+NVIDIA_API_KEY=""
+if [ -f "$API_KEY_FILE" ]; then
+    # Read the key from file, removing any trailing whitespace
+    NVIDIA_API_KEY=$(cat "$API_KEY_FILE" 2>/dev/null | tr -d '\n\r' | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
+
+    if [ -n "$NVIDIA_API_KEY" ]; then
+        # Validate the key format
+        if [[ ! "$NVIDIA_API_KEY" =~ ^sk- ]]; then
+            error "API key found in $API_KEY_FILE but format is invalid"
+            error "Key found: ${NVIDIA_API_KEY:0:30}..."
+            error "Key must start with 'sk-' to work. Clearing invalid key."
+            NVIDIA_API_KEY=""
+        else
+            status "Loaded NVIDIA_API_KEY from $API_KEY_FILE (${NVIDIA_API_KEY:0:7}...)"
+        fi
+    fi
+fi
+
+# If no valid key found, prompt the user
+if [ -z "$NVIDIA_API_KEY" ]; then
+    if [ -t 0 ]; then  # Interactive terminal
+        echo "   Get your key: https://inference.nvidia.com/key-management"
+        echo "   Key format: sk-xxxxxxxxxxxxxxxxxxxxxxxx"
+        echo ""
+        # Use shell-compatible read syntax (works in both bash and zsh)
+        if [ -n "${ZSH_VERSION:-}" ]; then
+            read -s "?   Enter your NVIDIA API key: " NVIDIA_API_KEY
+        else
+            read -sp "   Enter your NVIDIA API key: " NVIDIA_API_KEY
+        fi
+        echo ""
+
+        if [ -n "$NVIDIA_API_KEY" ]; then
+            # Validate user-entered key format
+            if [[ ! "$NVIDIA_API_KEY" =~ ^sk- ]]; then
+                error "Invalid key format: must start with 'sk-'"
+                error "You entered: ${NVIDIA_API_KEY:0:20}..."
+                echo "      Please try again with a valid NVIDIA API key."
+                NVIDIA_API_KEY=""
+                return 1
+            fi
+            # Save to the API key file
+            echo "$NVIDIA_API_KEY" > "$API_KEY_FILE"
+            chmod 600 "$API_KEY_FILE"  # Secure file permissions (owner read/write only)
+            status "Saved to $API_KEY_FILE"
+        fi
+    else
+        error "NVIDIA_API_KEY not found in $API_KEY_FILE (non-interactive mode)"
+        echo "      Create the file: echo 'your-key' > ~/.nvidia-api-key"
+        echo "      Make sure the key starts with 'sk-'"
+        return 1
+    fi
+fi
+
+# Final validation: key MUST start with "sk-" to work
+if [ -n "$NVIDIA_API_KEY" ]; then
+    if [[ ! "$NVIDIA_API_KEY" =~ ^sk- ]]; then
+        error "Invalid API key format: must start with 'sk-'"
+        error "Found key starting with: ${NVIDIA_API_KEY:0:20}..."
+        echo "      Please set a valid NVIDIA API key."
+        echo "      Get your key: https://inference.nvidia.com/key-management"
+        return 1
+    fi
+    export NVIDIA_API_KEY
+    status "Key configured (${NVIDIA_API_KEY:0:7}...)"
+else
+    error "NVIDIA_API_KEY is not set"
+    return 1
+fi
+
+# ============================================================================
+# LiteLLM Proxy Setup
+# ============================================================================
+echo ""
+echo "🔄 LiteLLM Proxy"
+echo "   ─────────────────────────────────────────────────────────────────"
+
+# Check if LiteLLM is installed and working
+LITELLM_NEEDS_INSTALL=false
+if ! command -v litellm &> /dev/null; then
+    if [ -f "venv/bin/litellm" ] && [ "$USE_VENV" = false ]; then
+        warn "LiteLLM found in venv/ but venv not activated"
+        echo "      Re-run with: source scripts/setup_env.sh --venv"
+        return 1
+    fi
+    LITELLM_NEEDS_INSTALL=true
+else
+    # LiteLLM exists, but check if proxy dependencies are installed
+    if ! python3 -c "import backoff" 2>/dev/null; then
+        warn "LiteLLM installed but missing proxy dependencies"
+        LITELLM_NEEDS_INSTALL=true
+    fi
+fi
+
+if [ "$LITELLM_NEEDS_INSTALL" = true ]; then
+    # Use requirements.txt if available (more reliable than extras)
+    if [ -f "requirements.txt" ]; then
+        run_with_spinner "Installing LiteLLM from requirements.txt..." \
+            $PKG_INSTALL -r requirements.txt
+    else
+        # Fallback: install litellm[proxy] directly
+        run_with_spinner "Installing LiteLLM..." \
+            $PKG_INSTALL 'litellm[proxy]'
+    fi
+    status "LiteLLM installed"
+else
+    status "LiteLLM ready"
+fi
+
+# Check if config file exists
+if [ ! -f ".devcontainer/recipes/litellm_config.yaml" ]; then
+    error "litellm_config.yaml not found"
+    return 1
+fi
+
+# ============================================================================
+# Start LiteLLM Proxy Server
+# ============================================================================
+echo ""
+echo "🚀 Starting Proxy"
+echo "   ─────────────────────────────────────────────────────────────────"
+
+# Function to start proxy with spinner
+start_proxy_with_spinner() {
+    # Backup old log
+    [ -f /tmp/litellm_proxy.log ] && mv /tmp/litellm_proxy.log /tmp/litellm_proxy.log.bak 2>/dev/null || true
+
+    # Start LiteLLM in background
+    nohup litellm --config .devcontainer/recipes/litellm_config.yaml --port 4000 > /tmp/litellm_proxy.log 2>&1 &
+    local PROXY_PID=$!
+
+    # Spinner while waiting for health
+    local spinchars='⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏'
+    local i=0
+    local msg="Starting proxy on port 4000..."
+
+    tput civis 2>/dev/null || true
+
+    local count=0
+    while [ $count -lt 30 ]; do
+        if curl -s http://localhost:4000/health &> /dev/null; then
+            tput cnorm 2>/dev/null || true
+            printf "\r   ✓ %-60s\n" "Proxy running (PID: $PROXY_PID)"
+            return 0
+        fi
+
+        if ! kill -0 $PROXY_PID 2>/dev/null; then
+            tput cnorm 2>/dev/null || true
+            printf "\r   ❌ %-60s\n" "Proxy failed to start"
+            echo "      Check logs: cat /tmp/litellm_proxy.log"
+            return 1
+        fi
+
+        # Use arithmetic expansion for shell compatibility (bash and zsh)
+        local idx=$((i % ${#spinchars}))
+        printf "\r   %s %s" "${spinchars:$idx:1}" "$msg"
+        i=$((i + 1))
+        count=$((count + 1))
+        sleep 1
+    done
+
+    tput cnorm 2>/dev/null || true
+    printf "\r   ❌ %-60s\n" "Proxy timeout after 30 seconds"
+    return 1
+}
+
+# Check if proxy is already running
+if lsof -i :4000 &> /dev/null; then
+    if curl -s http://localhost:4000/health &> /dev/null; then
+        if [ "$FORCE_RESTART" = true ]; then
+            printf "   %s %s" "⠋" "Restarting proxy..."
+            lsof -ti :4000 | xargs kill 2>/dev/null || true
+            sleep 2
+            [ "$(lsof -i :4000 2>/dev/null)" ] && lsof -ti :4000 | xargs kill -9 2>/dev/null || true
+            sleep 1
+            printf "\r   ✓ %-60s\n" "Stopped old proxy"
+            start_proxy_with_spinner || return 1
+        else
+            # Proxy is running - ask if user wants to restart (configs may have changed)
+            if [ -t 0 ]; then  # Interactive terminal
+                echo "   Proxy already running on port 4000"
+                echo "   Re-sourcing may have updated configurations."
+                echo ""
+                # Use shell-compatible read syntax
+                RESTART_CHOICE=""
+                if [ -n "${ZSH_VERSION:-}" ]; then
+                    read "?   Restart proxy to apply changes? [y/N]: " RESTART_CHOICE
+                else
+                    read -p "   Restart proxy to apply changes? [y/N]: " RESTART_CHOICE
+                fi
+
+                if [[ "$RESTART_CHOICE" =~ ^[Yy]$ ]]; then
+                    printf "   %s %s" "⠋" "Restarting proxy..."
+                    lsof -ti :4000 | xargs kill 2>/dev/null || true
+                    sleep 2
+                    [ "$(lsof -i :4000 2>/dev/null)" ] && lsof -ti :4000 | xargs kill -9 2>/dev/null || true
+                    sleep 1
+                    printf "\r   ✓ %-60s\n" "Stopped old proxy"
+                    start_proxy_with_spinner || return 1
+                else
+                    status "Keeping existing proxy running"
+                fi
+            else
+                # Non-interactive: just report it's running
+                status "Proxy already running on port 4000"
+            fi
+        fi
+    else
+        error "Port 4000 in use by another process"
+        echo "      Kill it: lsof -ti :4000 | xargs kill -9"
+        return 1
+    fi
+else
+    start_proxy_with_spinner || return 1
+fi
+
+# ============================================================================
+# Configure Claude Code (via settings.json)
+# ============================================================================
+echo ""
+echo "⚙️  Claude Code Config"
+echo "   ─────────────────────────────────────────────────────────────────"
+
+# Create Claude config directory
+mkdir -p "$HOME/.claude"
+
+# Create/update ~/.claude/settings.json with proxy configuration
+CLAUDE_SETTINGS="$HOME/.claude/settings.json"
+if [ -f "$CLAUDE_SETTINGS" ]; then
+    # Backup existing settings
+    cp "$CLAUDE_SETTINGS" "$CLAUDE_SETTINGS.bak"
+
+    # Check if we need to update (using python for JSON handling)
+    python3 << EOF
+import json
+import sys
+
+settings_file = "$CLAUDE_SETTINGS"
+try:
+    with open(settings_file, 'r') as f:
+        settings = json.load(f)
+except (json.JSONDecodeError, FileNotFoundError):
+    settings = {}
+
+# Ensure env section exists
+if 'env' not in settings:
+    settings['env'] = {}
+
+# Update proxy settings
+settings['env']['ANTHROPIC_API_KEY'] = 'sk-litellm-local-dev'
+settings['env']['ANTHROPIC_BASE_URL'] = 'http://localhost:4000'
+
+with open(settings_file, 'w') as f:
+    json.dump(settings, f, indent=2)
+EOF
+    status "Updated $CLAUDE_SETTINGS"
+else
+    # Create new settings.json
+    cat > "$CLAUDE_SETTINGS" << 'EOF'
+{
+  "env": {
+    "ANTHROPIC_API_KEY": "sk-litellm-local-dev",
+    "ANTHROPIC_BASE_URL": "http://localhost:4000"
+  }
+}
+EOF
+    status "Created $CLAUDE_SETTINGS"
+fi
+
+# Create/update ~/.claude.json to skip onboarding
+# When using self-hosted API, we must skip onboarding to avoid sign-in prompts
+CLAUDE_JSON="$HOME/.claude.json"
+if [ ! -f "$CLAUDE_JSON" ]; then
+    echo '{"hasCompletedOnboarding": true}' > "$CLAUDE_JSON"
+    status "Created $CLAUDE_JSON (skip onboarding for self-hosted API)"
+else
+    # Always force hasCompletedOnboarding to true when using self-hosted API
+    # This ensures onboarding is skipped even if it was previously set to false
+    python3 << EOF
+import json
+import sys
+config_file = "$CLAUDE_JSON"
+try:
+    with open(config_file, 'r') as f:
+        config = json.load(f)
+except (json.JSONDecodeError, FileNotFoundError):
+    config = {}
+
+# Force onboarding to be completed when using self-hosted API
+old_value = config.get('hasCompletedOnboarding', None)
+config['hasCompletedOnboarding'] = True
+
+with open(config_file, 'w') as f:
+    json.dump(config, f, indent=2)
+
+# Only show message if we changed the value
+if old_value != True:
+    import sys
+    if old_value is None:
+        sys.exit(1)  # Was missing, now added
+    elif old_value == False:
+        sys.exit(2)  # Was false, now true
+    else:
+        sys.exit(0)  # Was already true
+else:
+    sys.exit(0)  # Was already true
+EOF
+    exit_code=$?
+    case $exit_code in
+        1)
+            status "Added hasCompletedOnboarding to $CLAUDE_JSON (skip onboarding)"
+            ;;
+        2)
+            status "Updated $CLAUDE_JSON: forced hasCompletedOnboarding=true (skip onboarding for self-hosted API)"
+            ;;
+        0)
+            status "Onboarding already skipped in $CLAUDE_JSON"
+            ;;
+    esac
+fi
+
+# ============================================================================
+# Summary
+# ============================================================================
+echo ""
+echo "╔════════════════════════════════════════════════════════════════════╗"
+echo "║                        ✓ Setup Complete!                           ║"
+echo "╚════════════════════════════════════════════════════════════════════╝"
+echo ""
+echo "   🟢 Proxy: http://localhost:4000"
+echo "   🔧 Config: ~/.claude/settings.json"
+echo ""
+echo "   Run Claude Code:"
+echo ""
+echo "      claude"
+echo ""
+echo "─────────────────────────────────────────────────────────────────────"
+echo "   Proxy logs: /tmp/litellm_proxy.log"
+echo "   Stop proxy: lsof -ti :4000 | xargs kill"
+echo "   Restart:    source scripts/setup_env.sh --restart"
+echo "─────────────────────────────────────────────────────────────────────"
+echo ""
diff --git a/.secrets.baseline b/.secrets.baseline
index 9bf60fbce5..b4881a0fa6 100644
--- a/.secrets.baseline
+++ b/.secrets.baseline
@@ -142,6 +142,24 @@
     }
   ],
   "results": {
+    ".devcontainer/recipes/litellm_config.yaml": [
+      {
+        "type": "Secret Keyword",
+        "filename": ".devcontainer/recipes/litellm_config.yaml",
+        "hashed_secret": "994633953a121c63f079c7f1b5f56ced78429b3a",
+        "is_verified": false,
+        "line_number": 23
+      }
+    ],
+    ".devcontainer/recipes/setup_claude_env.sh": [
+      {
+        "type": "Secret Keyword",
+        "filename": ".devcontainer/recipes/setup_claude_env.sh",
+        "hashed_secret": "770ec49341b2381b78ffc022f8b004c1853575a4",
+        "is_verified": false,
+        "line_number": 497
+      }
+    ],
     "pyproject.toml": [
       {
         "type": "Hex High Entropy String",
@@ -152,5 +170,5 @@
       }
     ]
   },
-  "generated_at": "2025-12-29T20:49:21Z"
+  "generated_at": "2026-01-27T16:52:56Z"
 }