garniergeorges · garniergeorges · Apr 27, 2026 · Apr 27, 2026 · Apr 27, 2026 · Apr 27, 2026
diff --git a/README.fr.md b/README.fr.md
@@ -37,7 +37,9 @@ Le builder est la seule surface conversationnelle. Les sous-agents sont créés
 | **P3** | Le builder écrit l'`AGENT.md`, demande la permission, lance l'agent dans un container neuf, streame la sortie | ✅ fait |
 | **P4** | Six tools natifs sandboxés sous `/workspace` : Bash, FileWrite, FileRead, FileEdit, Grep, Glob ; tool-loop runtime avec `maxTurns` | ✅ fait |
 | **P6** | Couche skills : format `SKILL.md`, catalogue (built-in + `~/.agent-forge/skills/`), matching des triggers côté serveur, runner à 2 appels (un pour AGENT.md, un pour le run prompt) | ✅ fait |
-| P5 | Sandbox durci + agents persistants (`docker exec`) + extraction d'artefacts vers le host | suivant |
+| **P5.1** | Sandbox Docker durci : user non-root, racine read-only + tmpfs `/tmp`, `--cap-drop=ALL`, `--security-opt=no-new-privileges`, `--network=none` par défaut, caps ressources (mémoire / cpus / pids). Le dialog de permission signale toute relaxation déclarée dans l'AGENT.md. | ✅ fait |
+| P5.2 | Extraction d'artefacts vers le host (`~/.agent-forge/artifacts/<session>/<agent>/`) | suivant |
+| P5.3 | Agents persistants via `docker exec`, slash commands de cycle de vie | |
 | P7 | `TEAM.md` — exécutions multi-agents coordonnées | |
 | P8 | Dashboard pixel art (activité agents en direct) | |
 | P9 | ★ POC validé : démo Next.js + Laravel + QA de bout en bout | |
@@ -201,6 +203,41 @@ La skill `scaffold-and-run` est livrée par défaut : elle se déclenche sur des
 - `↑↓ / PgUp / PgDn / g / G` — scroll dans la vue détail
 - `Ctrl+E` — retour live dans le transcript
 
+## Réseau de la sandbox
+
+Deux profils, choisis automatiquement au premier run :
+
+- **proxy** — `--network=none` dans le container ; le host fait tourner un proxy LLM par run sur une socket Unix bind-mountée à `/run/forge/llm.sock`. La clé API n'entre jamais dans le container. **C'est le profil strict que l'on veut.**
+- **bridge** — `--network=bridge` ; le runtime parle directement à l'upstream. La clé API doit être passée en env du container. Moins idéal, mais c'est la seule chose qui marche sous Docker Desktop sur macOS (la couche FUSE des bind-mounts ne supporte pas les sockets Unix).
+
+Le détecteur fait un test au premier run avec un container jetable. Sous Linux, profil `proxy` ; sous Docker Desktop Mac, profil `bridge`. Override possible avec `FORGE_SANDBOX_NETWORK=proxy|bridge`.
+
+Les autres flags de hardening restent actifs quel que soit le profil : `--cap-drop=ALL`, `--security-opt=no-new-privileges`, `--read-only`, `--user=agent`, caps mémoire / cpus / pids.
+
+## Debug
+
+La TUI possède stdout, donc pas de `console.log` possible — Forge écrit dans un fichier de log structuré.
+
+```bash
+# Désactivé par défaut. Active via une de ces variables :
+FORGE_DEBUG=1 bun run forge                       # niveau debug → ~/.agent-forge/logs/forge-<pid>-<ts>.log
+FORGE_DEBUG=trace bun run forge                   # plus fin (system prompts, réponses LLM complètes)
+FORGE_LOG_FILE=/tmp/forge.log bun run forge       # chemin explicite
+
+# Dans le REPL :
+/log                                              # affiche le chemin du log courant
+```
+
+Le log est en JSON-lines, une entrée par ligne :
+
+```json
+{"t":"2026-04-27T22:30:00.000Z","level":"info","source":"useChat","msg":"send","data":{"prompt":"…"}}
+{"t":"2026-04-27T22:30:01.523Z","level":"info","source":"skillRunner","msg":"runScaffoldAndRun start"}
+{"t":"2026-04-27T22:30:04.812Z","level":"info","source":"dockerLaunch","msg":"launching","data":{"agent":"…","sandboxCfg":{…}}}
+```
+
+Greps utiles : `jq -r 'select(.level=="error")' forge-*.log`, ou `grep '"source":"dockerLaunch"' forge-*.log | jq`.
+
 ## Architecture
 
 ```

diff --git a/README.md b/README.md
@@ -37,7 +37,9 @@ The builder is the only conversational surface. Sub-agents are spawned on demand
 | **P3** | Builder writes `AGENT.md`, asks for permission, launches the agent in a fresh container, streams its output | ✅ done |
 | **P4** | Six native tools sandboxed under `/workspace` : Bash, FileWrite, FileRead, FileEdit, Grep, Glob ; runtime tool-loop with `maxTurns` | ✅ done |
 | **P6** | Skill layer : `SKILL.md` format, catalog (built-in + `~/.agent-forge/skills/`), server-side trigger matching, two-call runner (one for AGENT.md, one for the run prompt) | ✅ done |
-| P5 | Hardened sandbox + persistent agents (`docker exec`) + artifact extraction back to host | next |
+| **P5.1** | Hardened Docker sandbox : non-root user, read-only root + tmpfs `/tmp`, `--cap-drop=ALL`, `--security-opt=no-new-privileges`, `--network=none` by default, resource caps (memory / cpus / pids). Permission dialog flags any AGENT.md relaxation. | ✅ done |
+| P5.2 | Artifact extraction back to host (`~/.agent-forge/artifacts/<session>/<agent>/`) | next |
+| P5.3 | Persistent agents via `docker exec`, lifecycle slash commands | |
 | P7 | `TEAM.md` — coordinated multi-agent runs | |
 | P8 | Pixel-art dashboard (live agent activity) | |
 | P9 | ★ POC validated : Next.js + Laravel + QA demo end-to-end | |
@@ -201,6 +203,41 @@ Built-in `scaffold-and-run` ships today : it triggers on words like `audite`, `t
 - `↑↓ / PgUp / PgDn / g / G` — scroll inside the detail view
 - `Ctrl+E` — return the chat transcript to live mode
 
+## Sandbox networking
+
+Two profiles, picked automatically at first run :
+
+- **proxy** — `--network=none` inside the container ; the host runs a per-run LLM proxy on a Unix socket bind-mounted at `/run/forge/llm.sock`. The container never sees the API key. **This is the strict, secure profile we want.**
+- **bridge** — `--network=bridge` ; the runtime talks to the upstream directly. The API key has to be forwarded into the container env. Less ideal, but it's the only thing that works under Docker Desktop on macOS (the FUSE bind-mount layer doesn't support Unix sockets).
+
+The detector probes once at startup with a tiny throwaway container. Runs on Linux pick `proxy` ; runs on Docker Desktop Mac pick `bridge`. Override with `FORGE_SANDBOX_NETWORK=proxy|bridge`.
+
+The other hardening flags stay on regardless of profile : `--cap-drop=ALL`, `--security-opt=no-new-privileges`, `--read-only`, `--user=agent`, memory / cpus / pids caps.
+
+## Debugging
+
+The TUI owns stdout, so we never `console.log` — instead Forge ships a structured file logger.
+
+```bash
+# Off by default. Either flag turns it on :
+FORGE_DEBUG=1 bun run forge                       # debug level → ~/.agent-forge/logs/forge-<pid>-<ts>.log
+FORGE_DEBUG=trace bun run forge                   # finer (system prompts, full LLM replies)
+FORGE_LOG_FILE=/tmp/forge.log bun run forge       # explicit path
+
+# Inside the REPL :
+/log                                              # prints the current log path
+```
+
+The log is JSON-lines, one entry per line :
+
+```json
+{"t":"2026-04-27T22:30:00.000Z","level":"info","source":"useChat","msg":"send","data":{"prompt":"…"}}
+{"t":"2026-04-27T22:30:01.523Z","level":"info","source":"skillRunner","msg":"runScaffoldAndRun start"}
+{"t":"2026-04-27T22:30:04.812Z","level":"info","source":"dockerLaunch","msg":"launching","data":{"agent":"…","sandboxCfg":{…}}}
+```
+
+Useful greps : `jq -r 'select(.level=="error")' forge-*.log`, or `grep '"source":"dockerLaunch"' forge-*.log | jq`.
+
 ## Architecture
 
 ```

diff --git a/docker/base.Dockerfile b/docker/base.Dockerfile
@@ -1,7 +1,18 @@
 # Agent Forge — base image
 # Minimal sandbox for simple agents (read, edit, run shell).
 #
-# Status : POC, not built yet. This file is a sketch of the target.
+# Hardening (P5) :
+#   - non-root user `agent` (uid 1000) is the default at runtime
+#   - /workspace is the only writable dir owned by `agent`
+#   - DockerLaunch passes --read-only on the root FS, plus a tmpfs
+#     mount on /tmp so package installers and test runners that
+#     write under /tmp keep working without granting write to the
+#     image
+#   - --cap-drop=ALL --security-opt=no-new-privileges
+#   - --network=none always — even agents that need an LLM call go
+#     through the host's per-run LLM proxy, bind-mounted as a Unix
+#     socket at /run/forge/llm.sock. The host injects the API key
+#     and forwards only /v1/chat/completions to the real upstream.
 
 FROM debian:bookworm-slim
 
@@ -24,8 +35,8 @@ RUN curl -fsSL https://deb.nodesource.com/setup_${NODE_VERSION}.x | bash - \
 
 # ─── Non-root user ───────────────────────────────────────────────
 RUN useradd -m -s /bin/bash agent \
-    && mkdir -p /workspace \
-    && chown agent:agent /workspace
+    && mkdir -p /workspace /run/forge \
+    && chown agent:agent /workspace /run/forge
 USER agent
 WORKDIR /workspace
 

diff --git a/packages/cli/src/commands.ts b/packages/cli/src/commands.ts
@@ -8,6 +8,7 @@ import {
   loadSkillCatalog,
   setProviderOverride,
 } from '@agent-forge/core/builder'
+import { currentLogPath, isLoggingEnabled } from '@agent-forge/core/log'
 import {
   type ForgeConfig,
   type Lang,
@@ -61,6 +62,11 @@ function helpLines(lang: Lang): string[] {
         ? 'liste les skills disponibles'
         : 'list available skills'
     }`,
+    `  /log                ${
+      lang === 'fr'
+        ? "affiche le chemin du fichier de log courant (FORGE_DEBUG=1 pour activer)"
+        : 'show the current log file path (FORGE_DEBUG=1 to enable)'
+    }`,
   ]
 }
 
@@ -187,6 +193,24 @@ export function runCommand(
       return { lines }
     }
 
+    case '/log': {
+      if (!isLoggingEnabled()) {
+        return {
+          lines: [
+            lang === 'fr'
+              ? 'logging désactivé — relance avec FORGE_DEBUG=1 (ou FORGE_LOG_FILE=/path)'
+              : 'logging disabled — restart with FORGE_DEBUG=1 (or FORGE_LOG_FILE=/path)',
+          ],
+        }
+      }
+      const path = currentLogPath()
+      return {
+        lines: [
+          lang === 'fr' ? `log courant : ${path ?? '?'}` : `current log : ${path ?? '?'}`,
+        ],
+      }
+    }
+
     case '/skills': {
       const catalog = loadSkillCatalog()
       if (catalog.skills.length === 0) {

diff --git a/packages/cli/src/components/ConfirmAction.tsx b/packages/cli/src/components/ConfirmAction.tsx
@@ -5,15 +5,120 @@
 // Style : framed (double orange border), distinct from the chat flow, with
 // an explicit question, a metadata block, a preview, and three "button-
 // like" choices.
+//
+// P5 : when a write action concerns an AGENT.md whose sandbox section
+// relaxes the strict defaults (network=bridge, readOnlyRoot=false,
+// elevated resources), we surface a list of warnings between the
+// metadata and the preview so the user notices before approving.
 
+import { existsSync, readFileSync } from 'node:fs'
+import { homedir } from 'node:os'
+import { join } from 'node:path'
 import { Box, Text, useInput, useStdin } from 'ink'
 import React, { useState } from 'react'
+import {
+  type AppliedSandboxConfig,
+  SANDBOX_DEFAULTS,
+  applySandboxDefaults,
+  parseAgentMd,
+} from '@agent-forge/core/types'
 import type { Action } from '../actions/types.ts'
 import { useLanguage } from '../i18n/LanguageContext.tsx'
 import { C } from '../theme/colors.ts'
 
 const PREVIEW_LINES = 6
 
+type SandboxWarning = {
+  field: string
+  detail: string
+}
+
+function diffAgainstDefaults(
+  cfg: AppliedSandboxConfig,
+  lang: 'en' | 'fr',
+): SandboxWarning[] {
+  const t = lang === 'fr'
+  const out: SandboxWarning[] = []
+  if (cfg.network !== SANDBOX_DEFAULTS.network) {
+    out.push({
+      field: 'network',
+      detail: t
+        ? `réseau ouvert (${cfg.network}) — l'agent pourra accéder à internet`
+        : `network open (${cfg.network}) — agent will have internet access`,
+    })
+  }
+  if (cfg.readOnlyRoot !== SANDBOX_DEFAULTS.readOnlyRoot) {
+    out.push({
+      field: 'readOnlyRoot',
+      detail: t
+        ? "racine en écriture — l'agent peut modifier le système de fichiers de l'image"
+        : 'writable root — agent can mutate the image filesystem',
+    })
+  }
+  if (cfg.user !== SANDBOX_DEFAULTS.user) {
+    out.push({
+      field: 'user',
+      detail: t
+        ? `utilisateur "${cfg.user}" au lieu de "${SANDBOX_DEFAULTS.user}"`
+        : `user "${cfg.user}" instead of "${SANDBOX_DEFAULTS.user}"`,
+    })
+  }
+  if (cfg.memory !== SANDBOX_DEFAULTS.memory) {
+    out.push({
+      field: 'memory',
+      detail: t
+        ? `mémoire ${cfg.memory} (défaut ${SANDBOX_DEFAULTS.memory})`
+        : `memory ${cfg.memory} (default ${SANDBOX_DEFAULTS.memory})`,
+    })
+  }
+  if (cfg.cpus !== SANDBOX_DEFAULTS.cpus) {
+    out.push({
+      field: 'cpus',
+      detail: t
+        ? `CPU ${cfg.cpus} (défaut ${SANDBOX_DEFAULTS.cpus.toString()})`
+        : `cpus ${cfg.cpus.toString()} (default ${SANDBOX_DEFAULTS.cpus.toString()})`,
+    })
+  }
+  if (cfg.pidsLimit !== SANDBOX_DEFAULTS.pidsLimit) {
+    out.push({
+      field: 'pidsLimit',
+      detail: t
+        ? `pids ${cfg.pidsLimit.toString()} (défaut ${SANDBOX_DEFAULTS.pidsLimit.toString()})`
+        : `pids ${cfg.pidsLimit.toString()} (default ${SANDBOX_DEFAULTS.pidsLimit.toString()})`,
+    })
+  }
+  return out
+}
+
+// Resolve the sandbox config from whichever source the action carries :
+// - write action targeting an AGENT.md → parse the proposed content
+//   directly (the file may not exist on disk yet)
+// - run action → read the persisted AGENT.md from ~/.agent-forge
+// Returns null if no AGENT.md is involved or parsing fails.
+function sandboxFor(action: Action): AppliedSandboxConfig | null {
+  try {
+    if (action.kind === 'write' && action.path.endsWith('AGENT.md')) {
+      const parsed = parseAgentMd(action.content)
+      return applySandboxDefaults(parsed.meta.sandbox)
+    }
+    if (action.kind === 'run') {
+      const path = join(
+        homedir(),
+        '.agent-forge',
+        'agents',
+        action.agent,
+        'AGENT.md',
+      )
+      if (!existsSync(path)) return null
+      const parsed = parseAgentMd(readFileSync(path, 'utf8'))
+      return applySandboxDefaults(parsed.meta.sandbox)
+    }
+  } catch {
+    return null
+  }
+  return null
+}
+
 type Strings = {
   title: string
   questionWrite: string
@@ -29,6 +134,7 @@ type Strings = {
   collapse: string
   actionWrite: string
   actionRun: string
+  warningHeader: string
 }
 
 const STRINGS: Record<'en' | 'fr', Strings> = {
@@ -47,6 +153,7 @@ const STRINGS: Record<'en' | 'fr', Strings> = {
     collapse: 'Collapse preview',
     actionWrite: 'create file',
     actionRun: 'launch agent',
+    warningHeader: 'Sandbox relaxations applied to this agent :',
   },
   fr: {
     title: 'AUTORISATION REQUISE',
@@ -63,6 +170,7 @@ const STRINGS: Record<'en' | 'fr', Strings> = {
     collapse: "Réduire l’aperçu",
     actionWrite: 'créer un fichier',
     actionRun: 'lancer un agent',
+    warningHeader: 'Relaxations sandbox appliquées à cet agent :',
   },
 }
 
@@ -186,6 +294,30 @@ export function ConfirmAction({
         )}
       </Box>
 
+      {/* Sandbox warnings : surface every relaxation vs the strict
+          defaults so the user can spot e.g. network=bridge before
+          approving. Only renders when at least one relaxation is
+          declared in the AGENT.md sandbox section. */}
+      {(() => {
+        const cfg = sandboxFor(action)
+        if (!cfg) return null
+        const warnings = diffAgainstDefaults(cfg, lang ?? 'en')
+        if (warnings.length === 0) return null
+        return (
+          <Box flexDirection="column" marginTop={1}>
+            <Text color={C.red} bold>
+              {`  ▲ ${s.warningHeader}`}
+            </Text>
+            {warnings.map((w) => (
+              <Box key={w.field}>
+                <Text color={C.red}>{`    · ${w.field}`}</Text>
+                <Text color={C.greyLight}>{` — ${w.detail}`}</Text>
+              </Box>
+            ))}
+          </Box>
+        )
+      })()}
+
       {/* Preview */}
       <Box flexDirection="column" marginTop={1}>
         <Text color={C.grey} dimColor>