feat(onboard): add custom OpenAI-compatible provider option

andy-ratsirarson · andy-ratsirarson · commit 30723a31d2f5 · 2026-03-21T23:23:36.000-07:00
Add a "Custom OpenAI-compatible endpoint" option to the onboarding
wizard, allowing users to bring any provider that exposes an
OpenAI-compatible /v1/chat/completions endpoint (e.g. Google Gemini
via AI Studio, OpenRouter, Together AI, LiteLLM).

The custom provider follows the same gateway-routed architecture as
existing providers: the sandbox talks to inference.local, and the
OpenShell gateway proxies to the user's endpoint with credential
injection and model rewriting.

Non-NVIDIA endpoints may reject OpenAI-specific parameters like
"store". Set supportsStore: false in the default openclaw.json model
compat to prevent 400 rejections from strict endpoints. This is safe
for all providers — NVIDIA and Ollama ignore the flag.

Interactive mode prompts for base URL, API key, and model name.
Non-interactive mode reads NEMOCLAW_CUSTOM_BASE_URL,
NEMOCLAW_CUSTOM_API_KEY, and NEMOCLAW_MODEL.

Tested with Google Gemini (gemini-2.5-flash) and local Ollama
(llama3.2) to verify backward compatibility.
diff --git a/Dockerfile b/Dockerfile
@@ -113,7 +113,7 @@ config = { \
             'baseUrl': 'https://inference.local/v1', \
             'apiKey': 'unused', \
             'api': 'openai-completions', \
-            'models': [{'id': model, 'name': model, 'reasoning': False, 'input': ['text'], 'cost': {'input': 0, 'output': 0, 'cacheRead': 0, 'cacheWrite': 0}, 'contextWindow': 131072, 'maxTokens': 4096}] \
+            'models': [{'id': model, 'name': model, 'reasoning': False, 'input': ['text'], 'cost': {'input': 0, 'output': 0, 'cacheRead': 0, 'cacheWrite': 0}, 'contextWindow': 131072, 'maxTokens': 4096, 'compat': {'supportsStore': False}}] \
         } \
     }}, \
     'channels': {'defaults': {'configWrites': False}}, \
diff --git a/README.md b/README.md
@@ -179,13 +179,16 @@ When something goes wrong, errors may originate from either NemoClaw or the Open
 
 ## Inference
 
-Inference requests from the agent never leave the sandbox directly. OpenShell intercepts every call and routes it to the NVIDIA Endpoint provider.
+Inference requests from the agent never leave the sandbox directly. OpenShell intercepts every call and routes it through the gateway proxy.
 
 | Provider     | Model                               | Use Case                                       |
 |--------------|--------------------------------------|-------------------------------------------------|
 | NVIDIA Endpoint | `nvidia/nemotron-3-super-120b-a12b` | Production. Requires an NVIDIA API key.         |
+| Custom OpenAI-compatible | User-specified | Any provider with an OpenAI-compatible `/v1/chat/completions` endpoint. |
 
-Get an API key from [build.nvidia.com](https://build.nvidia.com). The `nemoclaw onboard` command prompts for this key during setup.
+For the NVIDIA endpoint, get an API key from [build.nvidia.com](https://build.nvidia.com). The `nemoclaw onboard` command prompts for this key during setup.
+
+For custom providers, select "Custom OpenAI-compatible endpoint" during `nemoclaw onboard` and provide the base URL, API key, and model name. Any provider that exposes an OpenAI-compatible `/v1/chat/completions` endpoint will work. For non-interactive mode, set `NEMOCLAW_PROVIDER=custom`, `NEMOCLAW_CUSTOM_BASE_URL`, `NEMOCLAW_CUSTOM_API_KEY`, and `NEMOCLAW_MODEL`.
 
 Local inference options such as Ollama and vLLM are still experimental. On macOS, they also depend on OpenShell host-routing support in addition to the local service itself being reachable on the host.
 
diff --git a/bin/lib/inference-config.js b/bin/lib/inference-config.js
@@ -51,6 +51,17 @@ function getProviderSelectionConfig(provider, model) {
         provider,
         providerLabel: "Local Ollama",
       };
+    case "custom":
+      return {
+        endpointType: "custom",
+        endpointUrl: INFERENCE_ROUTE_URL,
+        ncpPartner: null,
+        model: model || null,
+        profile: DEFAULT_ROUTE_PROFILE,
+        credentialEnv: DEFAULT_ROUTE_CREDENTIAL_ENV,
+        provider,
+        providerLabel: "Custom Provider",
+      };
     default:
       return null;
   }
diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js
@@ -27,7 +27,7 @@ const {
   isUnsupportedMacosRuntime,
   shouldPatchCoredns,
 } = require("./platform");
-const { prompt, ensureApiKey, getCredential } = require("./credentials");
+const { prompt, ensureApiKey, getCredential, saveCredential } = require("./credentials");
 const registry = require("./registry");
 const nim = require("./nim");
 const policies = require("./policies");
@@ -209,10 +209,10 @@ function getNonInteractiveProvider() {
   const providerKey = (process.env.NEMOCLAW_PROVIDER || "").trim().toLowerCase();
   if (!providerKey) return null;
 
-  const validProviders = new Set(["cloud", "ollama", "vllm", "nim"]);
+  const validProviders = new Set(["cloud", "ollama", "vllm", "nim", "custom"]);
   if (!validProviders.has(providerKey)) {
     console.error(`  Unsupported NEMOCLAW_PROVIDER: ${providerKey}`);
-    console.error("  Valid values: cloud, ollama, vllm, nim");
+    console.error("  Valid values: cloud, ollama, vllm, nim, custom");
     process.exit(1);
   }
 
@@ -532,6 +532,7 @@ async function setupNim(sandboxName, gpu) {
   let model = null;
   let provider = "nvidia-nim";
   let nimContainer = null;
+  let customCreds = null;
 
   // Detect local inference options
   const hasOllama = !!runCapture("command -v ollama", { ignoreError: true });
@@ -570,6 +571,8 @@ async function setupNim(sandboxName, gpu) {
     options.push({ key: "install-ollama", label: "Install Ollama (macOS)" });
   }
 
+  options.push({ key: "custom", label: "Custom OpenAI-compatible endpoint (bring your own)" });
+
   if (options.length > 1) {
     let selected;
 
@@ -681,6 +684,83 @@ async function setupNim(sandboxName, gpu) {
       console.log("  ✓ Using existing vLLM on localhost:8000");
       provider = "vllm-local";
       model = "vllm-local";
+    } else if (selected.key === "custom") {
+      provider = "custom";
+      let customBaseUrl;
+      let customApiKey;
+      if (isNonInteractive()) {
+        customBaseUrl = (process.env.NEMOCLAW_CUSTOM_BASE_URL || "").trim();
+        customApiKey = (process.env.NEMOCLAW_CUSTOM_API_KEY || "").trim();
+        model = requestedModel;
+        if (!customBaseUrl || !customApiKey || !model) {
+          console.error("  Custom provider requires NEMOCLAW_CUSTOM_BASE_URL, NEMOCLAW_CUSTOM_API_KEY, and NEMOCLAW_MODEL.");
+          process.exit(1);
+        }
+      } else {
+        console.log("");
+        console.log("  ┌─────────────────────────────────────────────────────────────────┐");
+        console.log("  │  Custom OpenAI-compatible provider                              │");
+        console.log("  │                                                                 │");
+        console.log("  │  Provide a base URL and API key for any provider that           │");
+        console.log("  │  exposes an OpenAI-compatible /v1/chat/completions endpoint.    │");
+        console.log("  │                                                                 │");
+        console.log("  │  Examples:                                                      │");
+        console.log("  │    Google Gemini  https://generativelanguage.googleapis.com/v1beta/openai │");
+        console.log("  │    OpenRouter     https://openrouter.ai/api/v1                  │");
+        console.log("  │    Together AI    https://api.together.xyz/v1                   │");
+        console.log("  │    LiteLLM        http://localhost:4000/v1                      │");
+        console.log("  └─────────────────────────────────────────────────────────────────┘");
+        console.log("");
+
+        customBaseUrl = (await prompt("  Base URL: ")).trim();
+        if (!customBaseUrl) {
+          console.error("  Base URL is required.");
+          process.exit(1);
+        }
+
+        const previousBaseUrl = getCredential("CUSTOM_PROVIDER_BASE_URL");
+        saveCredential("CUSTOM_PROVIDER_BASE_URL", customBaseUrl);
+
+        customApiKey = previousBaseUrl === customBaseUrl
+          ? getCredential("CUSTOM_PROVIDER_API_KEY")
+          : null;
+        if (!customApiKey) {
+          if (previousBaseUrl && previousBaseUrl !== customBaseUrl) {
+            console.log("  Base URL changed — please enter a new API key.");
+          }
+          customApiKey = (await prompt("  API Key: ")).trim();
+          if (!customApiKey) {
+            console.error("  API key is required.");
+            process.exit(1);
+          }
+          saveCredential("CUSTOM_PROVIDER_API_KEY", customApiKey);
+          console.log("  Key saved to ~/.nemoclaw/credentials.json");
+        } else {
+          console.log("  Using saved API key from credentials.");
+        }
+
+        model = await prompt("  Model name (e.g. gemini-2.5-flash): ");
+        if (!model) {
+          console.error("  Model name is required.");
+          process.exit(1);
+        }
+      }
+
+      // Validate base URL
+      try {
+        const parsed = new URL(customBaseUrl);
+        if (parsed.protocol === "http:" && !["localhost", "127.0.0.1", "::1"].includes(parsed.hostname)) {
+          console.error("  Insecure http:// URLs are only allowed for localhost. Use https:// for remote endpoints.");
+          process.exit(1);
+        }
+      } catch {
+        console.error(`  Invalid URL: ${customBaseUrl}`);
+        process.exit(1);
+      }
+
+      // Store credentials for setupInference to use
+      customCreds = { baseUrl: customBaseUrl, apiKey: customApiKey };
+      console.log(`  ✓ Using custom provider with model: ${model}`);
     }
     // else: cloud — fall through to default below
   }
@@ -703,12 +783,12 @@ async function setupNim(sandboxName, gpu) {
 
   registry.updateSandbox(sandboxName, { model, provider, nimContainer });
 
-  return { model, provider };
+  return { model, provider, customCreds };
 }
 
 // ── Step 5: Inference provider ───────────────────────────────────
 
-async function setupInference(sandboxName, model, provider) {
+async function setupInference(sandboxName, model, provider, customCreds) {
   step(5, 7, "Setting up inference provider");
 
   if (provider === "nvidia-nim") {
@@ -769,6 +849,22 @@ async function setupInference(sandboxName, model, provider) {
       console.error(`  ${probe.message}`);
       process.exit(1);
     }
+  } else if (provider === "custom") {
+    const baseUrl = customCreds?.baseUrl || getCredential("CUSTOM_PROVIDER_BASE_URL");
+    const apiKey = customCreds?.apiKey || getCredential("CUSTOM_PROVIDER_API_KEY");
+    run(
+      `openshell provider create --name custom-provider --type openai ` +
+      `--credential ${shellQuote("OPENAI_API_KEY=" + apiKey)} ` +
+      `--config ${shellQuote("OPENAI_BASE_URL=" + baseUrl)} 2>&1 || ` +
+      `openshell provider update custom-provider ` +
+      `--credential ${shellQuote("OPENAI_API_KEY=" + apiKey)} ` +
+      `--config ${shellQuote("OPENAI_BASE_URL=" + baseUrl)} 2>&1 || true`,
+      { ignoreError: true }
+    );
+    run(
+      `openshell inference set --no-verify --provider custom-provider --model ${shellQuote(model)} 2>/dev/null || true`,
+      { ignoreError: true }
+    );
   }
 
   registry.updateSandbox(sandboxName, { model, provider });
@@ -921,6 +1017,7 @@ function printDashboard(sandboxName, model, provider) {
   if (provider === "nvidia-nim") providerLabel = "NVIDIA Endpoint API";
   else if (provider === "vllm-local") providerLabel = "Local vLLM";
   else if (provider === "ollama-local") providerLabel = "Local Ollama";
+  else if (provider === "custom") providerLabel = "Custom Provider";
 
   console.log("");
   console.log(`  ${"─".repeat(50)}`);
@@ -949,8 +1046,8 @@ async function onboard(opts = {}) {
   const gpu = await preflight();
   await startGateway(gpu);
   const sandboxName = await createSandbox(gpu);
-  const { model, provider } = await setupNim(sandboxName, gpu);
-  await setupInference(sandboxName, model, provider);
+  const { model, provider, customCreds } = await setupNim(sandboxName, gpu);
+  await setupInference(sandboxName, model, provider, customCreds);
   await setupOpenclaw(sandboxName, model, provider);
   await setupPolicies(sandboxName);
   printDashboard(sandboxName, model, provider);
diff --git a/docs/inference/switch-inference-providers.md b/docs/inference/switch-inference-providers.md
@@ -67,6 +67,44 @@ You can switch to any of these models at runtime.
 | `nvidia/llama-3.3-nemotron-super-49b-v1.5` | Nemotron Super 49B v1.5 | 131,072 | 4,096 |
 | `nvidia/nemotron-3-nano-30b-a3b` | Nemotron 3 Nano 30B | 131,072 | 4,096 |
 
+## Custom OpenAI-Compatible Providers
+
+You can use any provider that exposes an OpenAI-compatible `/v1/chat/completions` endpoint.
+
+During `nemoclaw onboard`, select **"Custom OpenAI-compatible endpoint"** and provide:
+
+- **Base URL** — the provider's API base (e.g. `https://generativelanguage.googleapis.com/v1beta/openai`)
+- **API key** — your provider credential
+- **Model name** — the model identifier (e.g. `gemini-2.5-flash`)
+
+Examples of compatible providers:
+
+| Provider | Base URL |
+|---|---|
+| Google AI Studio (Gemini) | `https://generativelanguage.googleapis.com/v1beta/openai` |
+| OpenRouter | `https://openrouter.ai/api/v1` |
+| Together AI | `https://api.together.xyz/v1` |
+| LiteLLM (local) | `http://localhost:4000/v1` |
+
+To switch to a custom provider at runtime:
+
+```console
+$ openshell provider create --name custom-provider --type openai \
+    --credential "OPENAI_API_KEY=<your-key>" \
+    --config "OPENAI_BASE_URL=<base-url>"
+$ openshell inference set --no-verify --provider custom-provider --model <model-name>
+```
+
+For non-interactive onboarding:
+
+```console
+$ NEMOCLAW_PROVIDER=custom \
+  NEMOCLAW_CUSTOM_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai \
+  NEMOCLAW_CUSTOM_API_KEY=AIza... \
+  NEMOCLAW_MODEL=gemini-2.5-flash \
+  nemoclaw onboard --non-interactive
+```
+
 ## Related Topics
 
 - [Inference Profiles](../reference/inference-profiles.md) for full profile configuration details.
diff --git a/nemoclaw-blueprint/blueprint.yaml b/nemoclaw-blueprint/blueprint.yaml
@@ -11,6 +11,7 @@ profiles:
   - ncp
   - nim-local
   - vllm
+  - custom
 
 description: |
   NemoClaw blueprint: orchestrates OpenClaw sandbox creation, migration,
@@ -54,6 +55,13 @@ components:
         credential_env: "OPENAI_API_KEY"
         credential_default: "dummy"
 
+      custom:
+        provider_type: "openai"
+        provider_name: "custom-provider"
+        endpoint: ""
+        model: ""
+        credential_env: "OPENAI_API_KEY"
+
   policy:
     base: "sandboxes/openclaw/policy.yaml"
     additions:
diff --git a/test/inference-config.test.js b/test/inference-config.test.js
@@ -56,6 +56,25 @@ describe("inference selection config", () => {
     });
   });
 
+  it("maps custom to the sandbox inference route with user-specified model", () => {
+    assert.deepEqual(getProviderSelectionConfig("custom", "gemini-2.5-flash"), {
+      endpointType: "custom",
+      endpointUrl: INFERENCE_ROUTE_URL,
+      ncpPartner: null,
+      model: "gemini-2.5-flash",
+      profile: DEFAULT_ROUTE_PROFILE,
+      credentialEnv: DEFAULT_ROUTE_CREDENTIAL_ENV,
+      provider: "custom",
+      providerLabel: "Custom Provider",
+    });
+  });
+
+  it("returns null model for custom provider when no model specified", () => {
+    const config = getProviderSelectionConfig("custom");
+    assert.equal(config.model, null);
+    assert.equal(config.providerLabel, "Custom Provider");
+  });
+
   it("builds a qualified OpenClaw primary model for ollama-local", () => {
     assert.equal(
       getOpenClawPrimaryModel("ollama-local", "nemotron-3-nano:30b"),