Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .devcontainer/recipes/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,17 @@ FROM nvcr.io/nvidia/pytorch:25.12-py3
# Remove once bug has been addressed in the nvidia/pytorch container.
RUN rm -f /usr/local/lib/python*/dist-packages/transformer_engine-*.dist-info/direct_url.json

# lsof needed for Daniel's claude script to work.
RUN --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
--mount=type=cache,target=/var/cache/apt,sharing=locked \
apt-get update && apt-get install -y lsof && rm -rf /var/lib/apt/lists/*

RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=bind,source=requirements.txt,target=/workspace/requirements.txt \
PIP_CONSTRAINT= pip install -r /workspace/requirements.txt

COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/
USER ubuntu
RUN curl https://cursor.com/install -fsS | bash # Install cursor-agent CLI tool
RUN curl -fsSL https://claude.ai/install.sh | bash # Install Claude CLI tool
RUN curl -fsSL https://claude.ai/install.sh | bash -s 2.0.32 # Install Claude CLI tool (pinned to avoid context_management parameter issue)
RUN uv tool install pre-commit --with pre-commit-uv --force-reinstall
8 changes: 8 additions & 0 deletions .devcontainer/recipes/README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# Dev Container Setup

General-purpose dev container for local recipe development.

For claude code usage, make sure you've generated an API key at inference.nvidia.com and add

```bash
export NVIDIA_INFERENCE_KEY=<your_inference_key>
```

to your .bashrc or .zshrc file.
9 changes: 4 additions & 5 deletions .devcontainer/recipes/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,12 @@
"mounts": [
"source=${localEnv:HOME}/.bash_history_devcontainer,target=/home/ubuntu/.bash_history,type=bind,consistency=cached",
"source=${localEnv:HOME}/.cache,target=/home/ubuntu/.cache,type=bind,consistency=cached",
"source=${localEnv:HOME}/.claude,target=/home/ubuntu/.claude,type=bind,consistency=cached",
"source=${localEnv:HOME}/.claude.json,target=/home/ubuntu/.claude.json,type=bind,consistency=cached"
"source=${localEnv:HOME}/.config,target=/home/ubuntu/.config,type=bind,consistency=cached",
"source=${localEnv:HOME}/.cursor,target=/home/ubuntu/.cursor,type=bind,consistency=cached",
"source=${localEnv:HOME}/.gnupg,target=/home/ubuntu/.gnupg,type=bind,consistency=cached",
"source=${localEnv:HOME}/.netrc,target=/home/ubuntu/.netrc,type=bind,consistency=cached",
"source=${localEnv:HOME}/.ssh,target=/home/ubuntu/.ssh,readonly,type=bind,consistency=cached",
"source=${localEnv:HOME}/.nvidia-api-key,target=/home/ubuntu/.nvidia-api-key,type=bind,consistency=cached",
"source=${localEnv:HOME}/.claude-devcontainer.json,target=/home/ubuntu/.claude.json,type=bind,consistency=cached",
"source=${localEnv:HOME}/.ssh,target=/home/ubuntu/.ssh,readonly,type=bind,consistency=cached"
],
"postCreateCommand": ".devcontainer/recipes/postCreateCommand.sh",
"initializeCommand": ".devcontainer/recipes/initializeCommand.sh",
Expand All @@ -24,7 +23,7 @@
"--shm-size=4g"
],
"containerEnv": {
"PRE_COMMIT_HOME": "/home/ubuntu/.cache/pre-commit-devcontainer",
"PRE_COMMIT_HOME": "/home/ubuntu/.cache/pre-commit-devcontainer"
},
"customizations": {
"vscode": {
Expand Down
8 changes: 4 additions & 4 deletions .devcontainer/recipes/initializeCommand.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
mkdir -p ~/.devcontainer_cache
mkdir -p ~/.ssh
mkdir -p ~/.cache/pre-commit-devcontainer
mkdir -p ~/.gnupg
mkdir -p ~/.config
mkdir -p ~/.cursor
mkdir -p ~/.claude
[ ! -f ~/.netrc ] && touch ~/.netrc

[ ! -f ~/.netrc ] && touch ~/.netrc
[ ! -f ~/.nvidia-api-key ] && touch ~/.nvidia-api-key
[ ! -f ~/.claude-devcontainer.json ] && touch ~/.claude-devcontainer.json
[ ! -f ~/.bash_history_devcontainer ] && touch ~/.bash_history_devcontainer
[ ! -f ~/.claude.json ] && touch ~/.claude.json


exit 0
94 changes: 94 additions & 0 deletions .devcontainer/recipes/litellm_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
model_list:
# ===========================================
# NVIDIA Inference API Model Mappings
# ===========================================
# Maps Claude Code model requests to NVIDIA's hosted Claude models
#
# Available NVIDIA models:
# - aws/anthropic/claude-opus-4-5 (Opus 4.5)
# - aws/anthropic/bedrock-claude-sonnet-4-5-v1 (Sonnet 4.5)
#
# IMPORTANT: NVIDIA's Bedrock-hosted models have smaller context windows
# than direct Anthropic API (~100K vs 200K). We set max_input_tokens to
# enable pre-call validation, which allows Claude Code to trigger
# context compaction before hitting the API limit.
#
# Claude Code requests these models by name, so we map them appropriately.

# --- Sonnet models → NVIDIA Sonnet ---
- model_name: claude-sonnet-4-5-20250929
litellm_params:
model: openai/aws/anthropic/bedrock-claude-sonnet-4-5-v1
api_base: https://inference-api.nvidia.com
api_key: os.environ/NVIDIA_API_KEY
model_info:
max_input_tokens: 100000 # Tested: NVIDIA limit is ~111K, using 100K for safety
max_output_tokens: 8192

- model_name: claude-sonnet-4-20250514
litellm_params:
model: openai/aws/anthropic/bedrock-claude-sonnet-4-5-v1
api_base: https://inference-api.nvidia.com
api_key: os.environ/NVIDIA_API_KEY
model_info:
max_input_tokens: 100000
max_output_tokens: 8192

- model_name: claude-3-5-sonnet-20241022
litellm_params:
model: openai/aws/anthropic/bedrock-claude-sonnet-4-5-v1
api_base: https://inference-api.nvidia.com
api_key: os.environ/NVIDIA_API_KEY
model_info:
max_input_tokens: 100000
max_output_tokens: 8192

# --- Haiku models → NVIDIA Sonnet (no Haiku available) ---
- model_name: claude-haiku-4-5-20251001
litellm_params:
model: openai/aws/anthropic/bedrock-claude-sonnet-4-5-v1
api_base: https://inference-api.nvidia.com
api_key: os.environ/NVIDIA_API_KEY
model_info:
max_input_tokens: 100000
max_output_tokens: 8192

# --- Opus models → NVIDIA Opus ---
- model_name: claude-opus-4-5-20250929
litellm_params:
model: openai/aws/anthropic/claude-opus-4-5
api_base: https://inference-api.nvidia.com
api_key: os.environ/NVIDIA_API_KEY
model_info:
max_input_tokens: 100000 # Tested: NVIDIA limit is ~111K, using 100K for safety
max_output_tokens: 8192

- model_name: claude-3-opus-20240229
litellm_params:
model: openai/aws/anthropic/claude-opus-4-5
api_base: https://inference-api.nvidia.com
api_key: os.environ/NVIDIA_API_KEY
model_info:
max_input_tokens: 100000
max_output_tokens: 8192

general_settings:
master_key: sk-litellm-local-dev

router_settings:
# Enable pre-call validation of context window limits
# This checks if input exceeds max_input_tokens BEFORE making the API call
# Allows Claude Code to receive ContextWindowExceededError early and trigger compaction
enable_pre_call_checks: true

litellm_settings:
drop_params: true
num_retries: 2
# Context window fallbacks: when a model hits context limit, try Opus
# Note: Both models have same ~100K limit on NVIDIA, so fallback may not help
# The real fix is enable_pre_call_checks which lets Claude Code compact early
context_window_fallbacks:
- claude-sonnet-4-5-20250929: ["claude-opus-4-5-20250929"]
- claude-sonnet-4-20250514: ["claude-opus-4-5-20250929"]
- claude-3-5-sonnet-20241022: ["claude-opus-4-5-20250929"]
- claude-haiku-4-5-20251001: ["claude-sonnet-4-5-20250929", "claude-opus-4-5-20250929"]
3 changes: 3 additions & 0 deletions .devcontainer/recipes/postCreateCommand.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@ set -euo pipefail
if git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
uvx pre-commit install
fi

# Set up Claude environment and proxy server
source .devcontainer/recipes/setup_claude_env.sh
5 changes: 3 additions & 2 deletions .devcontainer/recipes/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@ accelerate
datasets
deepspeed
hydra-core
litellm[proxy]
lm-eval
megatron-fsdp
nvdlfw_inspect @ git+https://github.com/NVIDIA/nvidia-dlfw-inspect
peft
pytest
seaborn
torch
torchao!=0.14.0
torchdata
Expand All @@ -16,5 +19,3 @@ transformers
typer
wandb
zstandard
nvdlfw_inspect @ git+https://github.com/NVIDIA/nvidia-dlfw-inspect
seaborn
Loading
Loading