diff --git a/Dockerfile b/Dockerfile index 4967deb62..2ab24d782 100644 --- a/Dockerfile +++ b/Dockerfile @@ -92,14 +92,15 @@ ENV NEMOCLAW_MODEL=${NEMOCLAW_MODEL} \ CHAT_UI_URL=${CHAT_UI_URL} WORKDIR /sandbox -USER sandbox -# Write the COMPLETE openclaw.json including gateway config and auth token. -# This file is immutable at runtime (Landlock read-only on /sandbox/.openclaw). -# No runtime writes to openclaw.json are needed or possible. -# Build args (NEMOCLAW_MODEL, CHAT_UI_URL) customize per deployment. +# Write the COMPLETE openclaw.json to /etc/openclaw/ (immutable config dir). +# This keeps the config outside the agent-writable zone entirely. # Auth token is generated per build so each image has a unique token. -RUN python3 -c "\ +# At runtime, nemoclaw-start.sh copies this template to /tmp/openclaw/ for +# any runtime mutations (e.g. auth profile injection). +# Ref: #514, #516, #654 +USER root +RUN mkdir -p /etc/openclaw && python3 -c "\ import json, os, secrets; \ from urllib.parse import urlparse; \ model = os.environ['NEMOCLAW_MODEL']; \ @@ -136,9 +137,15 @@ config = { \ 'auth': {'token': secrets.token_hex(32)} \ } \ }; \ -path = os.path.expanduser('~/.openclaw/openclaw.json'); \ -json.dump(config, open(path, 'w'), indent=2); \ -os.chmod(path, 0o600)" +json.dump(config, open('/etc/openclaw/openclaw.json', 'w'), indent=2); \ +os.chmod('/etc/openclaw/openclaw.json', 0o444)" \ + && chmod 555 /etc/openclaw +USER sandbox + +# Symlink so OpenClaw finds config at the expected path. +# At runtime, nemoclaw-start.sh replaces this with a link to /tmp/openclaw/ +# (mutable runtime copy of the immutable /etc template). +RUN ln -sf /etc/openclaw/openclaw.json /sandbox/.openclaw/openclaw.json # Install NemoClaw plugin into OpenClaw RUN openclaw doctor --fix > /dev/null 2>&1 || true \ @@ -146,14 +153,14 @@ RUN openclaw doctor --fix > /dev/null 2>&1 || true \ # Lock openclaw.json via DAC: chown to root so the sandbox user cannot modify # it at runtime. This works regardless of Landlock enforcement status. -# The Landlock policy (/sandbox/.openclaw in read_only) provides defense-in-depth -# once OpenShell enables enforcement. # Ref: https://github.com/NVIDIA/NemoClaw/issues/514 USER root RUN chown root:root /sandbox/.openclaw \ && find /sandbox/.openclaw -mindepth 1 -maxdepth 1 -exec chown -h root:root {} + \ - && chmod 1777 /sandbox/.openclaw \ - && chmod 444 /sandbox/.openclaw/openclaw.json + && chmod 0555 /sandbox/.openclaw \ + && chmod 444 /sandbox/.openclaw/openclaw.json \ + && chown -R sandbox:sandbox /sandbox/.openclaw-data/workspace \ + /sandbox/.openclaw-data/agents USER sandbox ENTRYPOINT ["/bin/bash"] diff --git a/docs/index.md b/docs/index.md index d4a411124..96a3d438d 100644 --- a/docs/index.md +++ b/docs/index.md @@ -170,7 +170,7 @@ Egress control, operator approval flow, and policy configuration. :link: workspace/workspace-files :link-type: doc -Understand agent identity, memory, and configuration files that persist in the sandbox. +Understand `SOUL.md`, `USER.md`, and other workspace files, plus backup and restore. +++ {bdg-secondary}`Concept` @@ -234,18 +234,18 @@ Set Up the Telegram Bridge ``` ```{toctree} -:caption: Monitoring +:caption: Workspace :hidden: -Monitor Sandbox Activity +Workspace Files +Backup & Restore ``` ```{toctree} -:caption: Workspace +:caption: Monitoring :hidden: -Workspace Files -Back Up and Restore +Monitor Sandbox Activity ``` ```{toctree} diff --git a/docs/reference/commands.md b/docs/reference/commands.md index dc3a6e1d7..5c05f029b 100644 --- a/docs/reference/commands.md +++ b/docs/reference/commands.md @@ -105,9 +105,9 @@ Stop the NIM container and delete the sandbox. This removes the sandbox from the registry. :::{warning} -Destroying a sandbox permanently deletes all files inside it, including -[workspace files](../workspace/workspace-files.md) (SOUL.md, USER.md, IDENTITY.md, AGENTS.md, MEMORY.md, and daily memory notes). -Back up your workspace first by following the instructions at [Back Up and Restore](../workspace/backup-restore.md). +This command permanently deletes the sandbox **and its persistent volume**. +All [workspace files](../workspace/workspace-files.md) (SOUL.md, USER.md, IDENTITY.md, AGENTS.md, MEMORY.md, and daily memory notes) are lost. +Back up your workspace first — see [Backup and Restore](../workspace/backup-restore.md). ::: ```console diff --git a/docs/workspace/backup-restore.md b/docs/workspace/backup-restore.md index 79d9695ad..665f85143 100644 --- a/docs/workspace/backup-restore.md +++ b/docs/workspace/backup-restore.md @@ -1,11 +1,11 @@ --- title: - page: "Back Up and Restore Workspace Files" - nav: "Back Up & Restore" + page: "Backup and Restore Workspace Files" + nav: "Backup & Restore" description: "How to back up and restore OpenClaw workspace files before destructive operations." keywords: ["nemoclaw backup", "nemoclaw restore", "workspace backup", "openshell sandbox download upload"] topics: ["generative_ai", "ai_agents"] -tags: ["openclaw", "openshell", "sandboxing", "workspace", "backup", "nemoclaw"] +tags: ["openclaw", "openshell", "sandboxing", "workspace", "backup"] content: type: how_to difficulty: technical_beginner @@ -14,28 +14,22 @@ status: published --- -# Back Up and Restore Workspace Files +# Backup and Restore Workspace Files Workspace files define your agent's personality, memory, and user context. They persist across sandbox restarts but are **permanently deleted** when you run `nemoclaw destroy`. This guide covers manual backup with CLI commands and an automated script. -## Prerequisites - -- A running NemoClaw sandbox (for backup) or a freshly created sandbox (for restore). -- The OpenShell CLI on your `PATH`. -- The sandbox name (shown by `nemoclaw list`). - ## When to Back Up -- Before running `nemoclaw destroy`. -- Before major NemoClaw version upgrades. -- Periodically, if you have invested time customizing your agent. +- **Before running `nemoclaw destroy`** +- Before major NemoClaw version upgrades +- Periodically, if you've invested time customizing your agent ## Manual Backup @@ -101,7 +95,7 @@ $ ./scripts/backup-workspace.sh restore my-assistant 20260320-120000 List backed-up files to confirm completeness: ```console -$ ls ~/.nemoclaw/backups/20260320-120000/ +$ ls -la ~/.nemoclaw/backups/20260320-120000/ AGENTS.md IDENTITY.md MEMORY.md @@ -110,17 +104,7 @@ USER.md memory/ ``` -## Inspecting Files Inside the Sandbox - -Connect to the sandbox to list or view workspace files directly: - -```console -$ openshell sandbox connect my-assistant -$ ls -la /sandbox/.openclaw/workspace/ -``` - ## Next Steps - [Workspace Files overview](workspace-files.md) — learn what each file does - [Commands reference](../reference/commands.md) -- [Monitor Sandbox Activity](../monitoring/monitor-sandbox-activity.md) diff --git a/docs/workspace/workspace-files.md b/docs/workspace/workspace-files.md index 1fcbc5bbc..1c95f13df 100644 --- a/docs/workspace/workspace-files.md +++ b/docs/workspace/workspace-files.md @@ -2,10 +2,10 @@ title: page: "Workspace Files" nav: "Workspace Files" -description: "What workspace files are, where they live, and how they persist across sandbox restarts." -keywords: ["nemoclaw workspace files", "soul.md", "user.md", "identity.md", "agents.md", "memory.md", "sandbox persistence"] +description: "What workspace personality and configuration files are, where they live, and how they persist across sandbox restarts." +keywords: ["nemoclaw workspace files", "soul.md", "user.md", "identity.md", "agents.md", "sandbox persistence"] topics: ["generative_ai", "ai_agents"] -tags: ["openclaw", "openshell", "sandboxing", "workspace", "persistence", "nemoclaw"] +tags: ["openclaw", "openshell", "sandboxing", "workspace", "persistence"] content: type: concept difficulty: technical_beginner @@ -14,27 +14,25 @@ status: published --- # Workspace Files -OpenClaw stores agent identity, behavior, and memory in a set of Markdown files inside the sandbox. -These files live at `/sandbox/.openclaw/workspace/` and are read by the agent at the start of every session. +OpenClaw stores its personality, user context, and behavioral configuration in a set of Markdown files inside the sandbox. +These files live at `/sandbox/.openclaw/workspace/` and are collectively called **workspace files**. ## File Reference -Each file controls a distinct aspect of the agent's behavior and memory. - -| File | Purpose | Upstream Docs | -|---|---|---| -| `SOUL.md` | Core personality, tone, and behavioral rules. | [SOUL template](https://docs.openclaw.ai/reference/templates/SOUL) | -| `USER.md` | Preferences, context, and facts the agent learns about you. | [USER template](https://docs.openclaw.ai/reference/templates/USER) | -| `IDENTITY.md` | Agent name, creature type, emoji, and self-presentation. | [IDENTITY template](https://docs.openclaw.ai/reference/templates/IDENTITY) | -| `AGENTS.md` | Multi-agent coordination, memory conventions, and safety guidelines. | [AGENTS template](https://docs.openclaw.ai/reference/templates/AGENTS) | -| `MEMORY.md` | Curated long-term memory distilled from daily notes. | — | -| `memory/` | Directory of daily note files (`YYYY-MM-DD.md`) for session continuity. | — | +| File | Purpose | +|---|---| +| `SOUL.md` | Defines the agent's persona, tone, and communication style. | +| `USER.md` | Stores information about the human the agent assists. | +| `IDENTITY.md` | Short identity card — name, language, emoji, creature type. | +| `AGENTS.md` | Behavioral rules, memory conventions, safety guidelines, and session workflow. | +| `MEMORY.md` | Curated long-term memory distilled from daily notes. | +| `memory/` | Directory of daily note files (`YYYY-MM-DD.md`) for session continuity. | ## Where They Live @@ -52,23 +50,23 @@ All workspace files reside inside the sandbox filesystem: └── 2026-03-19.md ``` -:::{note} -The workspace directory is hidden (`.openclaw`). -The files are not at `/sandbox/SOUL.md` — use the full path when downloading or uploading. -::: - ## Persistence Behavior Understanding when these files persist and when they are lost is critical. -| Event | Workspace files | -|---|---| -| Sandbox restart | **Preserved** — the sandbox PVC retains its data. | -| `nemoclaw destroy` | **Lost** — the sandbox and its PVC are deleted. | +### Survives: Sandbox Restart + +Sandbox restarts (`openshell sandbox restart`) preserve workspace files. +The sandbox uses a **Persistent Volume Claim (PVC)** that outlives individual container restarts. + +### Lost: Sandbox Destroy + +Running `nemoclaw destroy` **deletes the sandbox and its PVC**. +All workspace files are permanently lost unless you back them up first. :::{warning} Always back up your workspace files before running `nemoclaw destroy`. -See [Back Up and Restore](backup-restore.md) for instructions. +See [Backup and Restore](backup-restore.md) for instructions. ::: ## Editing Workspace Files @@ -76,10 +74,10 @@ See [Back Up and Restore](backup-restore.md) for instructions. The agent reads these files at the start of every session. You can edit them in two ways: -1. **Let the agent do it** — Ask your agent to update its persona, memory, or user context during a session. -2. **Edit manually** — Use `openshell sandbox connect` to open a terminal inside the sandbox and edit files directly, or use `openshell sandbox upload` to push edited files from your host. +1. **Let the agent do it** — Ask your agent to update its persona, memory, or user context. +2. **Edit manually** — Use `openshell sandbox shell` to open a terminal inside the sandbox and edit files directly, or use `openshell sandbox upload` to push edited files from your host. ## Next Steps -- [Back Up and Restore workspace files](backup-restore.md) +- [Backup and Restore workspace files](backup-restore.md) - [Commands reference](../reference/commands.md) diff --git a/nemoclaw-blueprint/policies/openclaw-sandbox.yaml b/nemoclaw-blueprint/policies/openclaw-sandbox.yaml index 3e3d1cd92..119155488 100644 --- a/nemoclaw-blueprint/policies/openclaw-sandbox.yaml +++ b/nemoclaw-blueprint/policies/openclaw-sandbox.yaml @@ -36,9 +36,30 @@ filesystem_policy: - /dev/null - /sandbox/.openclaw-data # Writable agent/plugin state (symlinked from .openclaw) +# TODO: evaluate landlock enforce mode — currently best_effort because +# enforce can break things in unpredictable ways. Needs thorough testing +# across different kernel versions before we flip this. (ref #516 item 3) landlock: compatibility: best_effort +# Prevent agents from tampering with their own config or killing the gateway. +# This is defense-in-depth on top of the filesystem permission lock from #514. +tool_policy: + deny: + - tool: write + paths: + - "**/.openclaw/openclaw.json" + - "/etc/openclaw/openclaw.json" + - "/tmp/openclaw/openclaw.json" + - tool: edit + paths: + - "**/.openclaw/openclaw.json" + - "/etc/openclaw/openclaw.json" + - "/tmp/openclaw/openclaw.json" + # NOTE: exec command restrictions are handled at the OpenShell/sandbox level + # via security=allowlist mode, not through tool_policy (which only gates tool + # names, not individual commands). + process: run_as_user: sandbox run_as_group: sandbox diff --git a/scripts/backup-workspace.sh b/scripts/backup-workspace.sh index 573823443..5b55c8ce8 100755 --- a/scripts/backup-workspace.sh +++ b/scripts/backup-workspace.sh @@ -37,6 +37,10 @@ EOF exit 1 } +require_cmd() { + command -v "$1" >/dev/null 2>&1 || fail "'$1' is required but not found in PATH." +} + do_backup() { local sandbox="$1" local ts @@ -53,7 +57,7 @@ do_backup() { local count=0 for f in "${FILES[@]}"; do - if openshell sandbox download "$sandbox" "${WORKSPACE_PATH}/${f}" "${dest}/"; then + if openshell sandbox download "$sandbox" "${WORKSPACE_PATH}/${f}" "${dest}/" 2>/dev/null; then count=$((count + 1)) else warn "Skipped ${f} (not found or download failed)" @@ -61,7 +65,7 @@ do_backup() { done for d in "${DIRS[@]}"; do - if openshell sandbox download "$sandbox" "${WORKSPACE_PATH}/${d}/" "${dest}/${d}/"; then + if openshell sandbox download "$sandbox" "${WORKSPACE_PATH}/${d}/" "${dest}/${d}/" 2>/dev/null; then count=$((count + 1)) else warn "Skipped ${d}/ (not found or download failed)" @@ -69,6 +73,7 @@ do_backup() { done if [ "$count" -eq 0 ]; then + rmdir "$dest" 2>/dev/null || true fail "No files were backed up. Check that the sandbox '${sandbox}' exists and has workspace files." fi @@ -121,7 +126,7 @@ do_restore() { # --- Main --- [ $# -ge 2 ] || usage -command -v openshell >/dev/null 2>&1 || fail "'openshell' is required but not found in PATH." +require_cmd openshell action="$1" sandbox="$2" diff --git a/scripts/nemoclaw-start.sh b/scripts/nemoclaw-start.sh index 053506425..3b1043897 100755 --- a/scripts/nemoclaw-start.sh +++ b/scripts/nemoclaw-start.sh @@ -15,6 +15,69 @@ NEMOCLAW_CMD=("$@") CHAT_UI_URL="${CHAT_UI_URL:-http://127.0.0.1:18789}" PUBLIC_PORT=18789 +fix_openclaw_config() { + python3 - <<'PYCFG' +import json +import os +import shutil +from urllib.parse import urlparse + +# /etc/openclaw/openclaw.json is the immutable template (baked into image). +# We copy it to /tmp/openclaw/ for runtime mutations. The symlink at +# ~/.openclaw/openclaw.json already points to /tmp/openclaw/openclaw.json. +template_path = '/etc/openclaw/openclaw.json' +runtime_dir = '/tmp/openclaw' +runtime_path = os.path.join(runtime_dir, 'openclaw.json') + +# Prevent TOCTOU: wipe any pre-existing /tmp/openclaw/ that the sandboxed +# agent might have planted before this (root-owned) entrypoint runs. +import shutil as _shutil +try: + _shutil.rmtree(runtime_dir) +except FileNotFoundError: + pass +os.makedirs(runtime_dir, mode=0o700) + +cfg = {} +if os.path.exists(template_path): + with open(template_path) as f: + cfg = json.load(f) + +default_model = os.environ.get('NEMOCLAW_MODEL') +if default_model: + cfg.setdefault('agents', {}).setdefault('defaults', {}).setdefault('model', {})['primary'] = default_model + +chat_ui_url = os.environ.get('CHAT_UI_URL', 'http://127.0.0.1:18789') +parsed = urlparse(chat_ui_url) +chat_origin = f"{parsed.scheme}://{parsed.netloc}" if parsed.scheme and parsed.netloc else 'http://127.0.0.1:18789' +local_origin = f'http://127.0.0.1:{os.environ.get("PUBLIC_PORT", "18789")}' +origins = [local_origin] +if chat_origin not in origins: + origins.append(chat_origin) + +gateway = cfg.setdefault('gateway', {}) +gateway['mode'] = 'local' +gateway['controlUi'] = { + 'allowInsecureAuth': True, + 'dangerouslyDisableDeviceAuth': True, + 'allowedOrigins': origins, +} +gateway['trustedProxies'] = ['127.0.0.1', '::1'] + +# Update symlink to point to the mutable runtime copy +symlink = os.path.expanduser('~/.openclaw/openclaw.json') +try: + os.unlink(symlink) +except OSError: + pass +os.symlink(runtime_path, symlink) + +with open(runtime_path, 'w') as f: + json.dump(cfg, f, indent=2) +os.chmod(runtime_path, 0o600) +PYCFG +} + write_auth_profile() { if [ -z "${NVIDIA_API_KEY:-}" ]; then return @@ -44,7 +107,7 @@ print_dashboard_urls() { python3 - <<'PYTOKEN' import json import os -path = os.path.expanduser('~/.openclaw/openclaw.json') +path = '/tmp/openclaw/openclaw.json' try: cfg = json.load(open(path)) except Exception: @@ -129,9 +192,11 @@ PYAUTOPAIR echo 'Setting up NemoClaw...' # openclaw doctor --fix and openclaw plugins install already ran at build time -# (Dockerfile Step 28). At runtime they fail with EPERM against the locked +# (Dockerfile). At runtime they fail with EPERM against the locked # /sandbox/.openclaw directory and accomplish nothing. write_auth_profile +export CHAT_UI_URL PUBLIC_PORT +fix_openclaw_config if [ ${#NEMOCLAW_CMD[@]} -gt 0 ]; then exec "${NEMOCLAW_CMD[@]}"