NVIDIA · pstjohn · Jan 27, 2026
@@ -6,12 +6,17 @@ FROM nvcr.io/nvidia/pytorch:25.12-py3
 #  Remove once bug has been addressed in the nvidia/pytorch container.
 RUN rm -f /usr/local/lib/python*/dist-packages/transformer_engine-*.dist-info/direct_url.json
 
+# lsof needed for Daniel's claude script to work.
+RUN --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
+    --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    apt-get update && apt-get install -y lsof && rm -rf /var/lib/apt/lists/*
+
 RUN --mount=type=cache,target=/root/.cache/pip \
     --mount=type=bind,source=requirements.txt,target=/workspace/requirements.txt \
     PIP_CONSTRAINT= pip install -r /workspace/requirements.txt
 
 COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/
 USER ubuntu
 RUN curl https://cursor.com/install -fsS | bash  # Install cursor-agent CLI tool
-RUN curl -fsSL https://claude.ai/install.sh | bash  # Install Claude CLI tool
+RUN curl -fsSL https://claude.ai/install.sh | bash -s 2.0.32  # Install Claude CLI tool (pinned to avoid context_management parameter issue)
 RUN uv tool install pre-commit --with pre-commit-uv --force-reinstall
@@ -1,3 +1,11 @@
 # Dev Container Setup
 
 General-purpose dev container for local recipe development.
+
+For claude code usage, make sure you've generated an API key at inference.nvidia.com and add
+
+```bash
+export NVIDIA_INFERENCE_KEY=<your_inference_key>
+```
+
+to your .bashrc or .zshrc file.
@@ -8,13 +8,12 @@
     "mounts": [
         "source=${localEnv:HOME}/.bash_history_devcontainer,target=/home/ubuntu/.bash_history,type=bind,consistency=cached",
         "source=${localEnv:HOME}/.cache,target=/home/ubuntu/.cache,type=bind,consistency=cached",
-        "source=${localEnv:HOME}/.claude,target=/home/ubuntu/.claude,type=bind,consistency=cached",
-        "source=${localEnv:HOME}/.claude.json,target=/home/ubuntu/.claude.json,type=bind,consistency=cached"
         "source=${localEnv:HOME}/.config,target=/home/ubuntu/.config,type=bind,consistency=cached",
         "source=${localEnv:HOME}/.cursor,target=/home/ubuntu/.cursor,type=bind,consistency=cached",
-        "source=${localEnv:HOME}/.gnupg,target=/home/ubuntu/.gnupg,type=bind,consistency=cached",
         "source=${localEnv:HOME}/.netrc,target=/home/ubuntu/.netrc,type=bind,consistency=cached",
-        "source=${localEnv:HOME}/.ssh,target=/home/ubuntu/.ssh,readonly,type=bind,consistency=cached",
+        "source=${localEnv:HOME}/.nvidia-api-key,target=/home/ubuntu/.nvidia-api-key,type=bind,consistency=cached",
+        "source=${localEnv:HOME}/.claude-devcontainer.json,target=/home/ubuntu/.claude.json,type=bind,consistency=cached",
+        "source=${localEnv:HOME}/.ssh,target=/home/ubuntu/.ssh,readonly,type=bind,consistency=cached"
     ],
     "postCreateCommand": ".devcontainer/recipes/postCreateCommand.sh",
     "initializeCommand": ".devcontainer/recipes/initializeCommand.sh",
@@ -24,7 +23,7 @@
         "--shm-size=4g"
     ],
     "containerEnv": {
-        "PRE_COMMIT_HOME": "/home/ubuntu/.cache/pre-commit-devcontainer",
+        "PRE_COMMIT_HOME": "/home/ubuntu/.cache/pre-commit-devcontainer"
     },
     "customizations": {
         "vscode": {

@@ -4,13 +4,13 @@
 mkdir -p ~/.devcontainer_cache
 mkdir -p ~/.ssh
 mkdir -p ~/.cache/pre-commit-devcontainer
-mkdir -p ~/.gnupg
 mkdir -p ~/.config
 mkdir -p ~/.cursor
-mkdir -p ~/.claude
-[ ! -f ~/.netrc ] && touch ~/.netrc
 
+[ ! -f ~/.netrc ] && touch ~/.netrc
+[ ! -f ~/.nvidia-api-key ] && touch ~/.nvidia-api-key
+[ ! -f ~/.claude-devcontainer.json ] && touch ~/.claude-devcontainer.json
 [ ! -f ~/.bash_history_devcontainer ] && touch ~/.bash_history_devcontainer
-[ ! -f ~/.claude.json ] && touch ~/.claude.json
+
 
 exit 0
@@ -0,0 +1,94 @@
+model_list:
+  # ===========================================
+  # NVIDIA Inference API Model Mappings
+  # ===========================================
+  # Maps Claude Code model requests to NVIDIA's hosted Claude models
+  #
+  # Available NVIDIA models:
+  #   - aws/anthropic/claude-opus-4-5         (Opus 4.5)
+  #   - aws/anthropic/bedrock-claude-sonnet-4-5-v1  (Sonnet 4.5)
+  #
+  # IMPORTANT: NVIDIA's Bedrock-hosted models have smaller context windows
+  # than direct Anthropic API (~100K vs 200K). We set max_input_tokens to
+  # enable pre-call validation, which allows Claude Code to trigger
+  # context compaction before hitting the API limit.
+  #
+  # Claude Code requests these models by name, so we map them appropriately.
+
+  # --- Sonnet models → NVIDIA Sonnet ---
+  - model_name: claude-sonnet-4-5-20250929
+    litellm_params:
+      model: openai/aws/anthropic/bedrock-claude-sonnet-4-5-v1
+      api_base: https://inference-api.nvidia.com
+      api_key: os.environ/NVIDIA_API_KEY
+    model_info:
+      max_input_tokens: 100000  # Tested: NVIDIA limit is ~111K, using 100K for safety
+      max_output_tokens: 8192
+
+  - model_name: claude-sonnet-4-20250514
+    litellm_params:
+      model: openai/aws/anthropic/bedrock-claude-sonnet-4-5-v1
+      api_base: https://inference-api.nvidia.com
+      api_key: os.environ/NVIDIA_API_KEY
+    model_info:
+      max_input_tokens: 100000
+      max_output_tokens: 8192
+
+  - model_name: claude-3-5-sonnet-20241022
+    litellm_params:
+      model: openai/aws/anthropic/bedrock-claude-sonnet-4-5-v1
+      api_base: https://inference-api.nvidia.com
+      api_key: os.environ/NVIDIA_API_KEY
+    model_info:
+      max_input_tokens: 100000
+      max_output_tokens: 8192
+
+  # --- Haiku models → NVIDIA Sonnet (no Haiku available) ---
+  - model_name: claude-haiku-4-5-20251001
+    litellm_params:
+      model: openai/aws/anthropic/bedrock-claude-sonnet-4-5-v1
+      api_base: https://inference-api.nvidia.com
+      api_key: os.environ/NVIDIA_API_KEY
+    model_info:
+      max_input_tokens: 100000
+      max_output_tokens: 8192
+
+  # --- Opus models → NVIDIA Opus ---
+  - model_name: claude-opus-4-5-20250929
+    litellm_params:
+      model: openai/aws/anthropic/claude-opus-4-5
+      api_base: https://inference-api.nvidia.com
+      api_key: os.environ/NVIDIA_API_KEY
+    model_info:
+      max_input_tokens: 100000  # Tested: NVIDIA limit is ~111K, using 100K for safety
+      max_output_tokens: 8192
+
+  - model_name: claude-3-opus-20240229
+    litellm_params:
+      model: openai/aws/anthropic/claude-opus-4-5
+      api_base: https://inference-api.nvidia.com
+      api_key: os.environ/NVIDIA_API_KEY
+    model_info:
+      max_input_tokens: 100000
+      max_output_tokens: 8192
+
+general_settings:
+  master_key: sk-litellm-local-dev
+
+router_settings:
+  # Enable pre-call validation of context window limits
+  # This checks if input exceeds max_input_tokens BEFORE making the API call
+  # Allows Claude Code to receive ContextWindowExceededError early and trigger compaction
+  enable_pre_call_checks: true
+
+litellm_settings:
+  drop_params: true
+  num_retries: 2
+  # Context window fallbacks: when a model hits context limit, try Opus
+  # Note: Both models have same ~100K limit on NVIDIA, so fallback may not help
+  # The real fix is enable_pre_call_checks which lets Claude Code compact early
+  context_window_fallbacks:
+    - claude-sonnet-4-5-20250929: ["claude-opus-4-5-20250929"]
+    - claude-sonnet-4-20250514: ["claude-opus-4-5-20250929"]
+    - claude-3-5-sonnet-20241022: ["claude-opus-4-5-20250929"]
+    - claude-haiku-4-5-20251001: ["claude-sonnet-4-5-20250929", "claude-opus-4-5-20250929"]
@@ -4,3 +4,6 @@ set -euo pipefail
 if git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
   uvx pre-commit install
 fi
+
+# Set up Claude environment and proxy server
+source .devcontainer/recipes/setup_claude_env.sh
@@ -2,10 +2,13 @@ accelerate
 datasets
 deepspeed
 hydra-core
+litellm[proxy]
 lm-eval
 megatron-fsdp
+nvdlfw_inspect @ git+https://github.com/NVIDIA/nvidia-dlfw-inspect
 peft
 pytest
+seaborn
 torch
 torchao!=0.14.0
 torchdata
@@ -16,5 +19,3 @@ transformers
 typer
 wandb
 zstandard
-nvdlfw_inspect @ git+https://github.com/NVIDIA/nvidia-dlfw-inspect
-seaborn