Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 28 additions & 18 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -78,16 +78,26 @@ RUN chmod +x /usr/local/bin/nemoclaw-start
# Build args for config that varies per deployment.
# nemoclaw onboard passes these at image build time.
ARG NEMOCLAW_MODEL=nvidia/nemotron-3-super-120b-a12b
ARG NEMOCLAW_PROVIDER_KEY=nvidia
ARG NEMOCLAW_PRIMARY_MODEL_REF=nvidia/nemotron-3-super-120b-a12b
ARG CHAT_UI_URL=http://127.0.0.1:18789
ARG NEMOCLAW_INFERENCE_BASE_URL=https://inference.local/v1
ARG NEMOCLAW_INFERENCE_API=openai-completions
ARG NEMOCLAW_INFERENCE_COMPAT_B64=e30=
# Unique per build to ensure each image gets a fresh auth token.
# Pass --build-arg NEMOCLAW_BUILD_ID=$(date +%s) to bust the cache.
ARG NEMOCLAW_BUILD_ID=default

# SECURITY: Promote build-args to env vars so the Python script reads them
# via os.environ, never via string interpolation into Python source code.
# Direct ARG interpolation into python3 -c is a code injection vector (C-2).
# Direct ARG interpolation into python3 -c is a code injection vector.
ENV NEMOCLAW_MODEL=${NEMOCLAW_MODEL} \
CHAT_UI_URL=${CHAT_UI_URL}
NEMOCLAW_PROVIDER_KEY=${NEMOCLAW_PROVIDER_KEY} \
NEMOCLAW_PRIMARY_MODEL_REF=${NEMOCLAW_PRIMARY_MODEL_REF} \
CHAT_UI_URL=${CHAT_UI_URL} \
NEMOCLAW_INFERENCE_BASE_URL=${NEMOCLAW_INFERENCE_BASE_URL} \
NEMOCLAW_INFERENCE_API=${NEMOCLAW_INFERENCE_API} \
NEMOCLAW_INFERENCE_COMPAT_B64=${NEMOCLAW_INFERENCE_COMPAT_B64}

WORKDIR /sandbox
USER sandbox
Expand All @@ -98,30 +108,30 @@ USER sandbox
# Build args (NEMOCLAW_MODEL, CHAT_UI_URL) customize per deployment.
# Auth token is generated per build so each image has a unique token.
RUN python3 -c "\
import json, os, secrets; \
import base64, json, os, secrets; \
from urllib.parse import urlparse; \
model = os.environ['NEMOCLAW_MODEL']; \
provider_key = os.environ['NEMOCLAW_PROVIDER_KEY']; \
primary_model_ref = os.environ['NEMOCLAW_PRIMARY_MODEL_REF']; \
chat_ui_url = os.environ['CHAT_UI_URL']; \
inference_base_url = os.environ['NEMOCLAW_INFERENCE_BASE_URL']; \
inference_api = os.environ['NEMOCLAW_INFERENCE_API']; \
inference_compat = json.loads(base64.b64decode(os.environ['NEMOCLAW_INFERENCE_COMPAT_B64']).decode('utf-8')); \
parsed = urlparse(chat_ui_url); \
chat_origin = f'{parsed.scheme}://{parsed.netloc}' if parsed.scheme and parsed.netloc else 'http://127.0.0.1:18789'; \
origins = ['http://127.0.0.1:18789']; \
origins = list(dict.fromkeys(origins + [chat_origin])); \
providers = { \
provider_key: { \
'baseUrl': inference_base_url, \
'apiKey': 'unused', \
'api': inference_api, \
'models': [{**({'compat': inference_compat} if inference_compat else {}), 'id': model, 'name': primary_model_ref, 'reasoning': False, 'input': ['text'], 'cost': {'input': 0, 'output': 0, 'cacheRead': 0, 'cacheWrite': 0}, 'contextWindow': 131072, 'maxTokens': 4096}] \
} \
}; \
config = { \
'agents': {'defaults': {'model': {'primary': f'inference/{model}'}}}, \
'models': {'mode': 'merge', 'providers': { \
'nvidia': { \
'baseUrl': 'https://inference.local/v1', \
'apiKey': 'openshell-managed', \
'api': 'openai-completions', \
'models': [{'id': model.split('/')[-1], 'name': model, 'reasoning': False, 'input': ['text'], 'cost': {'input': 0, 'output': 0, 'cacheRead': 0, 'cacheWrite': 0}, 'contextWindow': 131072, 'maxTokens': 4096}] \
}, \
'inference': { \
'baseUrl': 'https://inference.local/v1', \
'apiKey': 'unused', \
'api': 'openai-completions', \
'models': [{'id': model, 'name': model, 'reasoning': False, 'input': ['text'], 'cost': {'input': 0, 'output': 0, 'cacheRead': 0, 'cacheWrite': 0}, 'contextWindow': 131072, 'maxTokens': 4096}] \
} \
}}, \
'agents': {'defaults': {'model': {'primary': primary_model_ref}}}, \
'models': {'mode': 'merge', 'providers': providers}, \
'channels': {'defaults': {'configWrites': False}}, \
'gateway': { \
'mode': 'local', \
Expand Down
32 changes: 23 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,14 +162,14 @@ curl -fsSL https://raw.githubusercontent.com/NVIDIA/NemoClaw/refs/heads/main/uni

## How It Works

NemoClaw installs the NVIDIA OpenShell runtime and Nemotron models, then uses a versioned blueprint to create a sandboxed environment where every network request, file access, and inference call is governed by declarative policy. The `nemoclaw` CLI orchestrates the full stack: OpenShell gateway, sandbox, inference provider, and network policy.
NemoClaw installs the NVIDIA OpenShell runtime, then creates a sandboxed OpenClaw environment where every network request, file access, and inference call is governed by declarative policy. The `nemoclaw` CLI orchestrates the full stack: OpenShell gateway, sandbox, inference provider, and network policy.

| Component | Role |
|------------------|-------------------------------------------------------------------------------------------|
| **Plugin** | TypeScript CLI commands for launch, connect, status, and logs. |
| **Blueprint** | Versioned Python artifact that orchestrates sandbox creation, policy, and inference setup. |
| **Sandbox** | Isolated OpenShell container running OpenClaw with policy-enforced egress and filesystem. |
| **Inference** | NVIDIA Endpoint model calls, routed through the OpenShell gateway, transparent to the agent. |
| **Inference** | Provider-routed model calls, routed through the OpenShell gateway, transparent to the agent. |

The blueprint lifecycle follows four stages: resolve the artifact, verify its digest, plan the resources, and apply through the OpenShell CLI.

Expand All @@ -179,15 +179,29 @@ When something goes wrong, errors may originate from either NemoClaw or the Open

## Inference

Inference requests from the agent never leave the sandbox directly. OpenShell intercepts every call and routes it to the NVIDIA Endpoint provider.
Inference requests from the agent never leave the sandbox directly. OpenShell intercepts every call and routes it to the provider you selected during onboarding.

| Provider | Model | Use Case |
|--------------|--------------------------------------|-------------------------------------------------|
| NVIDIA Endpoint | `nvidia/nemotron-3-super-120b-a12b` | Production. Requires an NVIDIA API key. |
Supported non-experimental onboarding paths:

Get an API key from [build.nvidia.com](https://build.nvidia.com). The `nemoclaw onboard` command prompts for this key during setup.
| Provider | Notes |
|---|---|
| NVIDIA Endpoints | Curated hosted models on `integrate.api.nvidia.com`. |
| OpenAI | Curated GPT models plus `Other...` for manual model entry. |
| Other OpenAI-compatible endpoint | For proxies and compatible gateways. |
| Anthropic | Curated Claude models plus `Other...` for manual model entry. |
| Other Anthropic-compatible endpoint | For Claude proxies and compatible gateways. |
| Google Gemini | Google's OpenAI-compatible endpoint. |
| Local Ollama | Local model serving through Ollama with pull, warmup, and validation in onboarding. |

Local inference options such as Ollama and vLLM are still experimental. On macOS, they also depend on OpenShell host-routing support in addition to the local service itself being reachable on the host.
During onboarding, NemoClaw validates the selected provider and model before it creates the sandbox:

- OpenAI-compatible providers: tries `/responses` first, then `/chat/completions`
- Anthropic-compatible providers: tries `/v1/messages`
- If validation fails, the wizard prompts you to fix the selection before continuing

Credentials stay on the host in `~/.nemoclaw/credentials.json`. The sandbox only sees the routed `inference.local` endpoint, not your raw provider key.

Local Ollama is supported in the standard onboarding flow. Local vLLM remains experimental, and local host-routed inference on macOS still depends on OpenShell host-routing support in addition to the local service itself being reachable on the host.

---

Expand Down Expand Up @@ -252,7 +266,7 @@ Refer to the documentation for more information on NemoClaw.
- [Overview](https://docs.nvidia.com/nemoclaw/latest/about/overview.html): Learn what NemoClaw does and how it fits together.
- [How It Works](https://docs.nvidia.com/nemoclaw/latest/about/how-it-works.html): Learn about the plugin, blueprint, and sandbox lifecycle.
- [Architecture](https://docs.nvidia.com/nemoclaw/latest/reference/architecture.html): Learn about the plugin structure, blueprint lifecycle, and sandbox environment.
- [Inference Profiles](https://docs.nvidia.com/nemoclaw/latest/reference/inference-profiles.html): Learn about the NVIDIA Endpoint inference configuration.
- [Inference Profiles](https://docs.nvidia.com/nemoclaw/latest/reference/inference-profiles.html): Learn how NemoClaw configures routed inference providers.
- [Network Policies](https://docs.nvidia.com/nemoclaw/latest/reference/network-policies.html): Learn about egress control and policy customization.
- [CLI Commands](https://docs.nvidia.com/nemoclaw/latest/reference/commands.html): Learn about the full command reference.
- [Troubleshooting](https://docs.nvidia.com/nemoclaw/latest/reference/troubleshooting.html): Troubleshoot common issues and resolution steps.
Expand Down
95 changes: 92 additions & 3 deletions bin/lib/credentials.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,97 @@ function getCredential(key) {
return creds[key] || null;
}

function prompt(question) {
function promptSecret(question) {
return new Promise((resolve, reject) => {
const input = process.stdin;
const output = process.stderr;
let answer = "";
let rawModeEnabled = false;
let finished = false;

function cleanup() {
input.removeListener("data", onData);
if (rawModeEnabled && typeof input.setRawMode === "function") {
input.setRawMode(false);
}
if (typeof input.pause === "function") {
input.pause();
}
}

function finish(fn, value) {
if (finished) return;
finished = true;
cleanup();
output.write("\n");
fn(value);
}

function onData(chunk) {
const text = chunk.toString("utf8");
for (let i = 0; i < text.length; i += 1) {
const ch = text[i];

if (ch === "\u0003") {
finish(reject, Object.assign(new Error("Prompt interrupted"), { code: "SIGINT" }));
return;
}

if (ch === "\r" || ch === "\n") {
finish(resolve, answer.trim());
return;
}

if (ch === "\u0008" || ch === "\u007f") {
answer = answer.slice(0, -1);
continue;
}

if (ch === "\u001b") {
// Ignore terminal escape/control sequences such as Delete, arrows,
// Home/End, etc. while leaving the buffered secret untouched.
const rest = text.slice(i);
const match = rest.match(/^\u001b(?:\[[0-9;?]*[~A-Za-z]|\][^\u0007]*\u0007|.)/);
if (match) {
i += match[0].length - 1;
}
continue;
}

if (ch >= " ") {
answer += ch;
}
}
}

output.write(question);
input.setEncoding("utf8");
if (typeof input.resume === "function") {
input.resume();
}
if (typeof input.setRawMode === "function") {
input.setRawMode(true);
rawModeEnabled = true;
}
input.on("data", onData);
});
}

function prompt(question, opts = {}) {
return new Promise((resolve) => {
const silent = opts.secret === true && process.stdin.isTTY && process.stderr.isTTY;
if (silent) {
promptSecret(question)
.then(resolve)
.catch((err) => {
if (err && err.code === "SIGINT") {
process.kill(process.pid, "SIGINT");
return;
}
throw err;
});
return;
}
Comment on lines +110 to +124
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Verify unresolved Promise paths in prompt() silent branch (read-only).
awk 'NR>=110 && NR<=124 {printf "%4d  %s\n", NR, $0}' bin/lib/credentials.js
rg -nP 'new Promise\(\(resolve\)\s*=>\s*\{' bin/lib/credentials.js -n -A12 -B2

Repository: NVIDIA/NemoClaw

Length of output: 1081


🏁 Script executed:

sed -n '110,200p' bin/lib/credentials.js | cat -n

Repository: NVIDIA/NemoClaw

Length of output: 3624


Settle the outer prompt() Promise on silent-branch failures.

The Promise at line 111 only accepts resolve, but the silent branch (lines 114–123) has error paths that never settle it. When promptSecret() rejects with SIGINT (line 118), process.kill() executes and returns, leaving the outer Promise unresolved. For other errors (line 121), throw creates an unhandled rejection while the outer Promise still hangs. Callers awaiting prompt() can hang indefinitely.

Add reject to the Promise constructor and call reject(err) in both error cases:

Suggested fix
-function prompt(question, opts = {}) {
-  return new Promise((resolve) => {
+function prompt(question, opts = {}) {
+  return new Promise((resolve, reject) => {
     const silent = opts.secret === true && process.stdin.isTTY && process.stderr.isTTY;
     if (silent) {
       promptSecret(question)
         .then(resolve)
         .catch((err) => {
           if (err && err.code === "SIGINT") {
             process.kill(process.pid, "SIGINT");
-            return;
+            reject(err);
+            return;
           }
-          throw err;
+          reject(err);
         });
       return;
     }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
function prompt(question, opts = {}) {
return new Promise((resolve) => {
const silent = opts.secret === true && process.stdin.isTTY && process.stderr.isTTY;
if (silent) {
promptSecret(question)
.then(resolve)
.catch((err) => {
if (err && err.code === "SIGINT") {
process.kill(process.pid, "SIGINT");
return;
}
throw err;
});
return;
}
function prompt(question, opts = {}) {
return new Promise((resolve, reject) => {
const silent = opts.secret === true && process.stdin.isTTY && process.stderr.isTTY;
if (silent) {
promptSecret(question)
.then(resolve)
.catch((err) => {
if (err && err.code === "SIGINT") {
process.kill(process.pid, "SIGINT");
reject(err);
return;
}
reject(err);
});
return;
}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@bin/lib/credentials.js` around lines 110 - 124, The outer Promise in prompt()
only accepts resolve so the silent branch's catch never settles callers; update
the Promise constructor to (resolve, reject) and in the promptSecret().catch
handler call reject(err) for both error paths instead of throwing (and for the
SIGINT path call reject(err) then process.kill(process.pid, "SIGINT") to ensure
the Promise is settled before exiting). This change should be made in the prompt
function and touches the promptSecret error handling block so callers awaiting
prompt() won't hang.

const rl = readline.createInterface({ input: process.stdin, output: process.stderr });
rl.question(question, (answer) => {
rl.close();
Expand Down Expand Up @@ -67,7 +156,7 @@ async function ensureApiKey() {
console.log(" └─────────────────────────────────────────────────────────────────┘");
console.log("");

key = await prompt(" NVIDIA API Key: ");
key = await prompt(" NVIDIA API Key: ", { secret: true });

if (!key || !key.startsWith("nvapi-")) {
console.error(" Invalid key. Must start with nvapi-");
Expand Down Expand Up @@ -114,7 +203,7 @@ async function ensureGithubToken() {
console.log(" └──────────────────────────────────────────────────┘");
console.log("");

token = await prompt(" GitHub Token: ");
token = await prompt(" GitHub Token: ", { secret: true });

if (!token) {
console.error(" Token required for deploy (repo is private).");
Expand Down
58 changes: 57 additions & 1 deletion bin/lib/inference-config.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ const { DEFAULT_OLLAMA_MODEL } = require("./local-inference");

function getProviderSelectionConfig(provider, model) {
switch (provider) {
case "nvidia-prod":
case "nvidia-nim":
return {
endpointType: "custom",
Expand All @@ -27,7 +28,62 @@ function getProviderSelectionConfig(provider, model) {
profile: DEFAULT_ROUTE_PROFILE,
credentialEnv: DEFAULT_ROUTE_CREDENTIAL_ENV,
provider,
providerLabel: "NVIDIA Endpoint API",
providerLabel: "NVIDIA Endpoints",
};
case "openai-api":
return {
endpointType: "custom",
endpointUrl: INFERENCE_ROUTE_URL,
ncpPartner: null,
model: model || "gpt-5.4",
profile: DEFAULT_ROUTE_PROFILE,
credentialEnv: "OPENAI_API_KEY",
provider,
providerLabel: "OpenAI",
};
case "anthropic-prod":
return {
endpointType: "custom",
endpointUrl: INFERENCE_ROUTE_URL,
ncpPartner: null,
model: model || "claude-sonnet-4-6",
profile: DEFAULT_ROUTE_PROFILE,
credentialEnv: "ANTHROPIC_API_KEY",
provider,
providerLabel: "Anthropic",
};
case "compatible-anthropic-endpoint":
return {
endpointType: "custom",
endpointUrl: INFERENCE_ROUTE_URL,
ncpPartner: null,
model: model || "custom-anthropic-model",
profile: DEFAULT_ROUTE_PROFILE,
credentialEnv: "COMPATIBLE_ANTHROPIC_API_KEY",
provider,
providerLabel: "Other Anthropic-compatible endpoint",
};
case "gemini-api":
return {
endpointType: "custom",
endpointUrl: INFERENCE_ROUTE_URL,
ncpPartner: null,
model: model || "gemini-2.5-flash",
profile: DEFAULT_ROUTE_PROFILE,
credentialEnv: "GEMINI_API_KEY",
provider,
providerLabel: "Google Gemini",
};
case "compatible-endpoint":
return {
endpointType: "custom",
endpointUrl: INFERENCE_ROUTE_URL,
ncpPartner: null,
model: model || "custom-model",
profile: DEFAULT_ROUTE_PROFILE,
credentialEnv: "COMPATIBLE_API_KEY",
provider,
providerLabel: "Other OpenAI-compatible endpoint",
};
case "vllm-local":
return {
Expand Down
34 changes: 30 additions & 4 deletions bin/lib/local-inference.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ const { shellQuote } = require("./runner");
const HOST_GATEWAY_URL = "http://host.openshell.internal";
const CONTAINER_REACHABILITY_IMAGE = "curlimages/curl:8.10.1";
const DEFAULT_OLLAMA_MODEL = "nemotron-3-nano:30b";
const SMALL_OLLAMA_MODEL = "qwen2.5:7b";
const LARGE_OLLAMA_MIN_MEMORY_MB = 32768;

function getLocalProviderBaseUrl(provider) {
switch (provider) {
Expand All @@ -18,6 +20,17 @@ function getLocalProviderBaseUrl(provider) {
}
}

function getLocalProviderValidationBaseUrl(provider) {
switch (provider) {
case "vllm-local":
return "http://localhost:8000/v1";
case "ollama-local":
return "http://localhost:11434/v1";
default:
return null;
}
}

function getLocalProviderHealthCheck(provider) {
switch (provider) {
case "vllm-local":
Expand Down Expand Up @@ -105,14 +118,23 @@ function parseOllamaList(output) {
function getOllamaModelOptions(runCapture) {
const output = runCapture("ollama list 2>/dev/null", { ignoreError: true });
const parsed = parseOllamaList(output);
if (parsed.length > 0) {
return parsed;
return parsed;
}

function getBootstrapOllamaModelOptions(gpu) {
const options = [SMALL_OLLAMA_MODEL];
if (gpu && gpu.totalMemoryMB >= LARGE_OLLAMA_MIN_MEMORY_MB) {
options.push(DEFAULT_OLLAMA_MODEL);
}
return [DEFAULT_OLLAMA_MODEL];
return options;
}

function getDefaultOllamaModel(runCapture) {
function getDefaultOllamaModel(runCapture, gpu = null) {
const models = getOllamaModelOptions(runCapture);
if (models.length === 0) {
const bootstrap = getBootstrapOllamaModelOptions(gpu);
return bootstrap[0];
}
return models.includes(DEFAULT_OLLAMA_MODEL) ? DEFAULT_OLLAMA_MODEL : models[0];
}

Expand Down Expand Up @@ -164,8 +186,12 @@ module.exports = {
CONTAINER_REACHABILITY_IMAGE,
DEFAULT_OLLAMA_MODEL,
HOST_GATEWAY_URL,
LARGE_OLLAMA_MIN_MEMORY_MB,
SMALL_OLLAMA_MODEL,
getDefaultOllamaModel,
getBootstrapOllamaModelOptions,
getLocalProviderBaseUrl,
getLocalProviderValidationBaseUrl,
getLocalProviderContainerReachabilityCheck,
getLocalProviderHealthCheck,
getOllamaModelOptions,
Expand Down
Loading
Loading