From 3391e84abee8ac666eedeadfaf32b1c16bf47261 Mon Sep 17 00:00:00 2001 From: Prince Date: Wed, 24 Jun 2026 18:33:33 +0530 Subject: [PATCH 1/3] fix - Preserve unmapped Hermes tool args in intent hermes_args for validation --- .../adapter/src/hermes_adapter/mapper.py | 117 +++++++++++------- .../hermes/adapter/tests/test_adapter.py | 111 +++++++++++++++++ 2 files changed, 182 insertions(+), 46 deletions(-) diff --git a/integrations/hermes/adapter/src/hermes_adapter/mapper.py b/integrations/hermes/adapter/src/hermes_adapter/mapper.py index 537d1e5..3b38a40 100644 --- a/integrations/hermes/adapter/src/hermes_adapter/mapper.py +++ b/integrations/hermes/adapter/src/hermes_adapter/mapper.py @@ -52,6 +52,30 @@ def _require_str(args: dict[str, Any], key: str) -> str: return value.strip() +def hermes_args_remainder( + args: dict[str, Any], + mapped_keys: frozenset[str], +) -> dict[str, Any]: + """Hermes args not already promoted or absorbed by the dedicated mapper.""" + skip = mapped_keys | {"reason"} + return { + key: value + for key, value in args.items() + if key not in skip and value is not None + } + + +def _attach_hermes_args( + intent: IntentDict, + args: dict[str, Any], + mapped_keys: frozenset[str], +) -> IntentDict: + remainder = hermes_args_remainder(args, mapped_keys) + if remainder: + intent["hermes_args"] = remainder + return intent + + def _host_file_intent( *, action: str, @@ -85,14 +109,13 @@ def _host_file_intent( def map_terminal(args: dict[str, Any]) -> list[IntentDict]: command = _require_str(args, "command") reason = validate_reason(args.get("reason")) - return [ - { - "action": "RUN_COMMAND", - "command": command, - "reason": reason, - "target": command[:200], - } - ] + intent = { + "action": "RUN_COMMAND", + "command": command, + "reason": reason, + "target": command[:200], + } + return [_attach_hermes_args(intent, args, frozenset({"command"}))] def map_process(args: dict[str, Any]) -> list[IntentDict]: @@ -103,14 +126,14 @@ def map_process(args: dict[str, Any]) -> list[IntentDict]: data = args.get("data") data_part = f" data={data!r}" if data is not None else "" command = f"process:{action}{session_part}{data_part}" - return [ - { - "action": "RUN_COMMAND", - "command": command, - "reason": reason, - "target": command[:200], - } - ] + intent = { + "action": "RUN_COMMAND", + "command": command, + "reason": reason, + "target": command[:200], + } + absorbed = frozenset({"action", "session_id", "data"}) + return [_attach_hermes_args(intent, args, absorbed)] def map_write_file(args: dict[str, Any]) -> list[IntentDict]: @@ -119,14 +142,13 @@ def map_write_file(args: dict[str, Any]) -> list[IntentDict]: if not isinstance(content, str): raise ValidationError("Missing or invalid content") reason = validate_reason(args.get("reason")) - return [ - _host_file_intent( - action="WRITE_HOST_FILE", - path=path, - reason=reason, - content=content, - ) - ] + intent = _host_file_intent( + action="WRITE_HOST_FILE", + path=path, + reason=reason, + content=content, + ) + return [_attach_hermes_args(intent, args, frozenset({"path", "content"}))] def _parse_v4a_operations(patch_content: str) -> list[PatchOperation]: @@ -220,9 +242,18 @@ def _patch_operations_manifest(operations: list[PatchOperation]) -> list[dict[st return [{"kind": op.kind, "path": op.path} for op in operations] +def _patch_mapped_arg_keys(mode: str) -> frozenset[str]: + if mode == "patch": + return frozenset({"mode", "patch"}) + return frozenset({"mode", "path", "old_string", "new_string"}) + + def map_patch(args: dict[str, Any]) -> list[IntentDict]: reason = validate_reason(args.get("reason")) mode = args.get("mode", "replace") + if not isinstance(mode, str): + raise ValidationError("Missing or invalid mode") + mapped_arg_keys = _patch_mapped_arg_keys(mode) operations = _extract_patch_operations(args) total = len(operations) is_v4a = mode == "patch" @@ -243,40 +274,34 @@ def map_patch(args: dict[str, Any]) -> list[IntentDict]: } if operation.kind == "delete": - intents.append( - _host_file_intent( - action="DELETE_HOST_FILE", - path=operation.path, - reason=op_reason, - irreversible=True, - **patch_context, - ) + intent = _host_file_intent( + action="DELETE_HOST_FILE", + path=operation.path, + reason=op_reason, + irreversible=True, + **patch_context, ) - continue - - intents.append( - _host_file_intent( + else: + intent = _host_file_intent( action="WRITE_HOST_FILE", path=operation.path, reason=op_reason, content=_patch_content_for_operation(args, operation), **patch_context, ) - ) + intents.append(_attach_hermes_args(intent, args, mapped_arg_keys)) return intents def map_generic(tool: str, args: dict[str, Any], *, action: str) -> list[IntentDict]: reason = validate_reason(args.get("reason")) - return [ - { - "action": action, - "reason": reason, - "target": tool, - "hermes_tool": tool, - "hermes_args": {key: value for key, value in args.items() if key != "reason"}, - } - ] + intent: IntentDict = { + "action": action, + "reason": reason, + "target": tool, + "hermes_tool": tool, + } + return [_attach_hermes_args(intent, args, frozenset())] MAPPERS: dict[str, MapperFn | GenericMapperFn] = { diff --git a/integrations/hermes/adapter/tests/test_adapter.py b/integrations/hermes/adapter/tests/test_adapter.py index beb6a36..960dea1 100644 --- a/integrations/hermes/adapter/tests/test_adapter.py +++ b/integrations/hermes/adapter/tests/test_adapter.py @@ -49,6 +49,26 @@ def test_map_terminal(self) -> None: self.assertEqual(intents[0]["action"], "RUN_COMMAND") self.assertEqual(intents[0]["command"], "echo hi") self.assertEqual(intents[0]["reason"], "List files") + self.assertNotIn("hermes_args", intents[0]) + + def test_map_terminal_hermes_args_remainder(self) -> None: + from hermes_adapter.mapper import map_terminal + + intents = map_terminal( + { + "command": "pytest -v", + "background": True, + "timeout": 600, + "reason": "Run tests in background", + } + ) + intent = intents[0] + self.assertEqual(intent["command"], "pytest -v") + self.assertNotIn("command", intent["hermes_args"]) + self.assertEqual( + intent["hermes_args"], + {"background": True, "timeout": 600}, + ) def test_map_process(self) -> None: from hermes_adapter.mapper import map_process @@ -58,6 +78,23 @@ def test_map_process(self) -> None: ) self.assertEqual(intents[0]["action"], "RUN_COMMAND") self.assertIn("process:kill", intents[0]["command"]) + self.assertNotIn("hermes_args", intents[0]) + + def test_map_process_hermes_args_remainder(self) -> None: + from hermes_adapter.mapper import map_process + + intents = map_process( + { + "action": "wait", + "session_id": "proc_abc", + "timeout": 120, + "reason": "Wait for job", + } + ) + intent = intents[0] + self.assertNotIn("action", intent.get("hermes_args", {})) + self.assertNotIn("session_id", intent.get("hermes_args", {})) + self.assertEqual(intent["hermes_args"], {"timeout": 120}) def test_map_write_file(self) -> None: from hermes_adapter.mapper import map_write_file @@ -68,6 +105,23 @@ def test_map_write_file(self) -> None: self.assertEqual(intents[0]["action"], "WRITE_HOST_FILE") self.assertEqual(intents[0]["path"], "~/notes.txt") self.assertEqual(intents[0]["content"], "hello") + self.assertNotIn("hermes_args", intents[0]) + + def test_map_write_file_hermes_args_remainder(self) -> None: + from hermes_adapter.mapper import map_write_file + + intents = map_write_file( + { + "path": "~/notes.txt", + "content": "hello", + "cross_profile": True, + "reason": "Save notes", + } + ) + intent = intents[0] + self.assertNotIn("path", intent["hermes_args"]) + self.assertNotIn("content", intent["hermes_args"]) + self.assertEqual(intent["hermes_args"], {"cross_profile": True}) def test_map_patch_replace(self) -> None: from hermes_adapter.mapper import map_patch @@ -86,6 +140,28 @@ def test_map_patch_replace(self) -> None: self.assertEqual(intents[0]["path"], "~/x.py") self.assertNotIn("patch_op_index", intents[0]) self.assertNotIn("patch_operations", intents[0]) + self.assertNotIn("hermes_args", intents[0]) + + def test_map_patch_replace_hermes_args_excludes_mapped(self) -> None: + from hermes_adapter.mapper import map_patch + + intents = map_patch( + { + "mode": "replace", + "path": "~/x.py", + "old_string": "a", + "new_string": "b", + "replace_all": True, + "cross_profile": True, + "reason": "Fix typo", + } + ) + remainder = intents[0]["hermes_args"] + self.assertNotIn("path", remainder) + self.assertNotIn("old_string", remainder) + self.assertNotIn("new_string", remainder) + self.assertNotIn("mode", remainder) + self.assertEqual(remainder, {"replace_all": True, "cross_profile": True}) def test_map_patch_v4a_multi_file(self) -> None: from hermes_adapter.mapper import map_patch @@ -119,6 +195,32 @@ def test_map_patch_v4a_multi_file(self) -> None: self.assertIn("~/b.py", intents[1]["content"]) self.assertNotIn("~/a.py", intents[1]["content"]) self.assertIsNot(intents[0]["patch_operations"], intents[1]["patch_operations"]) + self.assertNotIn("hermes_args", intents[0]) + + def test_map_patch_v4a_hermes_args_excludes_patch_blob(self) -> None: + from hermes_adapter.mapper import map_patch + + patch = ( + "*** Begin Patch\n" + "*** Update File: ~/a.py\n" + "@@\n" + "-old\n" + "+new\n" + "*** End Patch" + ) + intents = map_patch( + { + "mode": "patch", + "patch": patch, + "cross_profile": True, + "reason": "Bulk edit", + } + ) + for intent in intents: + remainder = intent["hermes_args"] + self.assertNotIn("patch", remainder) + self.assertNotIn("mode", remainder) + self.assertEqual(remainder, {"cross_profile": True}) def test_map_patch_v4a_scoped_content_excludes_siblings(self) -> None: from hermes_adapter.mapper import map_patch @@ -199,6 +301,15 @@ def test_unknown_tool(self) -> None: with self.assertRaises(ValidationError): map_tool("read_file", {"reason": "noop"}) + def test_hermes_args_remainder_helper(self) -> None: + from hermes_adapter.mapper import hermes_args_remainder + + remainder = hermes_args_remainder( + {"command": "x", "background": True, "reason": "why", "timeout": None}, + frozenset({"command"}), + ) + self.assertEqual(remainder, {"background": True}) + def test_map_generic_cronjob(self) -> None: from hermes_adapter.mapper import map_generic, map_tool From 5eec1ef5a54a7ec37326ad40787af384906e945c Mon Sep 17 00:00:00 2001 From: Prince Date: Wed, 24 Jun 2026 18:46:29 +0530 Subject: [PATCH 2/3] Remove process from governed Hermes tool catalog and integration --- docs/NATIVE_KIT_INTEGRATION.md | 9 +- docs/agent-tool-gating.md | 8 +- docs/hermes-intentframe-integration-guide.md | 13 +-- docs/hermes-intentframe-state-report.md | 10 +- docs/hermes-plugin-registration-order.md | 2 +- integrations/hermes/README.md | 8 +- .../adapter/src/hermes_adapter/mapper.py | 19 ---- .../hermes/adapter/tests/test_adapter.py | 26 ------ integrations/hermes/governance/tools.yaml | 8 -- .../hermes/plugin/intentframe-gate/README.md | 2 +- .../intentframe-gate/governance_loader.py | 2 +- .../shared/src/hermes_governance/loader.py | 2 +- .../hermes/shared/tests/test_governance.py | 1 - tests/hermes_adapter/live_fixtures.py | 4 - tests/hermes_adapter/test_live.py | 11 --- tests/hermes_gateway/README.md | 11 +-- tests/hermes_gateway/api_client.py | 91 ------------------- tests/hermes_gateway/test_api_client.py | 18 ---- tests/hermes_gateway/test_gateway_e2e.py | 10 -- .../test_provider_request_contract.py | 7 +- tests/hermes_governance_fixtures.py | 1 - tests/hermes_plugin/test_bridge_gate_live.py | 10 -- tests/hermes_tool_probes.py | 8 -- .../test_scoped_governance_yaml.py | 22 ++--- tests/scripts/test-hermes-gateway-e2e.sh | 2 +- 25 files changed, 44 insertions(+), 261 deletions(-) diff --git a/docs/NATIVE_KIT_INTEGRATION.md b/docs/NATIVE_KIT_INTEGRATION.md index 2dbc7db..344e092 100644 --- a/docs/NATIVE_KIT_INTEGRATION.md +++ b/docs/NATIVE_KIT_INTEGRATION.md @@ -68,7 +68,6 @@ Configured in [`governance/tools.yaml`](../integrations/hermes/governance/tools. | Hermes tool | IntentFrame action(s) | Mapper kind | Notes | |-------------|----------------------|-------------|-------| | `terminal` | `RUN_COMMAND` | `terminal` | Full `command_shield` + capability analysis | -| `process` | `RUN_COMMAND` | `process` | Synthetic `process:…` command string | | `write_file` | `WRITE_HOST_FILE` | `write_file` | Path + content | | `patch` | `WRITE_HOST_FILE`, `DELETE_HOST_FILE` | `patch` | Multi-intent from V4A diff | | `cronjob` | `HERMES_CRONJOB` | `generic` | Semantic-only via dynamic bundle (AE + Guardian) | @@ -220,8 +219,6 @@ map into `IntentFrame.data` on the backend. **Hermes mappers:** - `terminal` — passes real shell command → full terminal pipeline -- `process` — builds `process:{action} session_id=…` → structurally valid but **weaker** - semantic analysis than a real shell command ### `WRITE_HOST_FILE` / `DELETE_HOST_FILE` — `HostFilesActionBundle` @@ -423,8 +420,8 @@ Hermes handler so native tools unchanged. ### Mapper quality = security quality -Weak synthetic commands (`process:…`) bypass rich `command_shield` analysis. Prefer -mapping to intents that exercise the bundle’s real evidence pipeline. +Mapping to intents that exercise the bundle’s real evidence pipeline improves policy +effectiveness (e.g. real shell strings for `RUN_COMMAND`). ### Passive reads in bundles @@ -454,7 +451,7 @@ Hermes hybrid. Do not conflate “bundle registers on macOS” with “needs mac | Gap | Impact | Mitigation | |-----|--------|------------| -| `process` mapper uses synthetic command | Thinner terminal analysis | Map to richer command representation if Hermes exposes it | +| Ungoverned Hermes `process` tool | Background job manager runs without IF gate | Govern via `terminal` for shell execution; leave `process` ungoverned until a faithful mapper exists | | `patch` validates diff hunks, not merged file | Weaker write content gates | Prefer `write_file` for sensitive paths; tighten path policy | | E2E mostly terminal BLOCK | Host-file regressions possible | Add probes for `/etc` write, `~` delete | | ValidateOnlyAdapter action list | New actions fail silently at executor | Extend `executor.yaml` with each new action | diff --git a/docs/agent-tool-gating.md b/docs/agent-tool-gating.md index 71291c4..4e70e07 100644 --- a/docs/agent-tool-gating.md +++ b/docs/agent-tool-gating.md @@ -213,7 +213,7 @@ coarse. (Grounded in the Hermes registry — see Section 5.) | Risk class | Govern (examples) | |------------|-------------------| -| Local shell / process | `terminal`, `process` | +| Local shell | `terminal` | | Local file writes | `write_file`, `patch` | | Code execution | `execute_code` | | Computer / device control | `computer_use`, `ha_call_service` | @@ -254,7 +254,7 @@ tools (reads stay ungoverned): | Hermes tool | IntentFrame action(s) | |-------------|----------------------| -| `terminal`, `process` | `RUN_COMMAND` | +| `terminal` | `RUN_COMMAND` | | `write_file`, `patch` (update/add) | `WRITE_HOST_FILE` | | `patch` (V4A delete) | `DELETE_HOST_FILE` | @@ -298,7 +298,7 @@ Hermes' own toolsets mix reads and writes — proof that you must select by name [`adapter/src/hermes_adapter/mapper.py`](../integrations/hermes/adapter/src/hermes_adapter/mapper.py) dispatches through the governance contract — each entry names a `mapper` kind -(`terminal`, `process`, `write_file`, `patch`): +(`terminal`, `write_file`, `patch`): ```python def map_tool(tool, args): @@ -374,7 +374,7 @@ seeding as `start hermes`. Explicit shell exports (e.g. test harness **Tests:** catalog-wide integration tests generate a throwaway all-governed yaml from the default template via `HERMES_GOVERNANCE_YAML`; they never mutate runtime -user config. Gateway E2E accepts `HERMES_E2E_GOVERNED_TOOLS=terminal,process` to +user config. Gateway E2E accepts `HERMES_E2E_GOVERNED_TOOLS=terminal,write_file` to scope which governed tools get LLM probes (plugin gate only — not Hermes toolsets). Before adapter and gateway start, E2E asserts that `os.environ["HERMES_GOVERNANCE_YAML"]` matches the governance snapshot and agrees with `build_gateway_env()` / diff --git a/docs/hermes-intentframe-integration-guide.md b/docs/hermes-intentframe-integration-guide.md index ba29b91..bb22bce 100644 --- a/docs/hermes-intentframe-integration-guide.md +++ b/docs/hermes-intentframe-integration-guide.md @@ -231,15 +231,12 @@ Three independent knobs: Governance template (v1 catalog — four Hermes tools): -```6:34:integrations/hermes/governance/tools.yaml +```6:26:integrations/hermes/governance/tools.yaml tools: terminal: enabled: true action: RUN_COMMAND ... - process: - enabled: true - ... write_file: enabled: true ... @@ -444,7 +441,7 @@ Add entry to `integrations/hermes/governance/tools.yaml`: enabled: true action: WRITE_HOST_FILE # or RUN_COMMAND, DELETE_HOST_FILE, … risk: local_write - mapper: write_file # terminal | process | write_file | patch + mapper: write_file # terminal | write_file | patch blocked_response: generic_json ``` @@ -465,7 +462,7 @@ Runtime copy: `~/.intentframe/integrations/hermes/governance/tools.yaml` (user t Valid mapper kinds (plugin loader): ```python -VALID_MAPPER_KINDS = frozenset({"terminal", "process", "write_file", "patch", "generic"}) +VALID_MAPPER_KINDS = frozenset({"terminal", "write_file", "patch", "generic"}) ``` For `mapper: generic`, no new mapper function is needed — `map_generic` handles all @@ -534,7 +531,7 @@ Every catalog tool must appear in the probe contract (`test_governed_tool_covera | Mapper | Registry | Live adapter + plugin | Gateway LLM E2E | |--------|----------|----------------------|-----------------| -| native (`terminal`, `process`, …) | `GATEWAY_E2E_PROBE_SYMBOLS` | deterministic ALLOW/BLOCK (+ patch semantic) | yes (`test_gateway_e2e.py`) | +| native (`terminal`, …) | `GATEWAY_E2E_PROBE_SYMBOLS` | deterministic ALLOW/BLOCK (+ patch semantic) | yes (`test_gateway_e2e.py`) | | `generic` | derived via `mapper: generic` | semantic smoke (ALLOW or BLOCK) | no | Add native probe functions to `tests/hermes_gateway/test_gateway_e2e.py` and register symbols @@ -703,7 +700,7 @@ Expect: `POST /v1/responses ALLOW (attempt 1/3)`, passes 1/2a/2b. RUN_HERMES_GATEWAY_E2E=1 ./tests/scripts/test-hermes-gateway-e2e.sh ``` -Runs ALLOW/BLOCK/semantic probes for `terminal`, `process`, `write_file`, `patch` +Runs ALLOW/BLOCK/semantic probes for `terminal`, `write_file`, `patch` (including V4A delete via `patch`) across greenfield, idempotent, and external-`HERMES_BIN` paths. Generic catalog tools (e.g. `cronjob`) are live-tested only — no gateway LLM probe. diff --git a/docs/hermes-intentframe-state-report.md b/docs/hermes-intentframe-state-report.md index 9f39eb3..0df54cf 100644 --- a/docs/hermes-intentframe-state-report.md +++ b/docs/hermes-intentframe-state-report.md @@ -9,10 +9,10 @@ | Area | Status | |------|--------| -| Governed tool catalog | **5 tools**: `terminal`, `process`, `write_file`, `patch`, `cronjob` (generic) | +| Governed tool catalog | **4 tools**: `terminal`, `write_file`, `patch`, `cronjob` (generic) | | Standalone `delete_file` Hermes tool | **Removed** — delete via `patch` V4A `*** Delete File:` → `DELETE_HOST_FILE` | | Plugin gateway registration | **Fixed** — selective `builtin_preload` before registry snapshot | -| Full gateway E2E (pass 1, 2a, 2b) | **Green** — all four governed tools, probes typically attempt 1 | +| Full gateway E2E (pass 1, 2a, 2b) | **Green** — all governed catalog tools, probes typically attempt 1 | | Hermes version tested | **0.17.0** | The integration is production-shaped: CLI install → start → integrate → gateway, with @@ -65,7 +65,6 @@ IntentFrame gate active; **`enabled: false`** means native Hermes handler withou | Hermes tool | IntentFrame action(s) | Mapper kind | Notes | |-------------|----------------------|-------------|-------| | `terminal` | `RUN_COMMAND` | `terminal` | `terminal_json` blocked shape | -| `process` | `RUN_COMMAND` | `process` | Maps `action: run` + `data` to shell command | | `write_file` | `WRITE_HOST_FILE` | `write_file` | Path + content | | `patch` | `WRITE_HOST_FILE`, `DELETE_HOST_FILE` | `patch` | Replace mode + V4A multi-intent | | `cronjob` | `HERMES_CRONJOB` | `generic` | Semantic-only via dynamic bundle; live smoke, no gateway LLM E2E | @@ -91,7 +90,6 @@ At `register()`: 2. **`preload_governed_builtins(governed)`** — selective import from ``builtin_module`` per tool in [`builtin_preload.py`](../integrations/hermes/plugin/intentframe-gate/builtin_preload.py) (from repo ``tools.yaml``): - `terminal` → `tools.terminal_tool` - - `process` → `tools.process_registry` - `write_file`, `patch` → `tools.file_tools` - `cronjob` → `tools.cronjob_tools` 3. **Snapshot loop** — wrap governed entries with `inject_reason()` + `gate_tool_call()`. @@ -153,7 +151,7 @@ or restore defaults. Policy commands apply `agent.json` env via `load_and_activa | **2b** | External `HERMES_BIN` symlink, first-time integrate | With default temp governance yaml, each pass runs ALLOW/BLOCK/semantic probes for native -catalog tools (`terminal`, `process`, `write_file`, `patch`). Generic tools (e.g. `cronjob`) +catalog tools (`terminal`, `write_file`, `patch`). Generic tools (e.g. `cronjob`) are live-tested via adapter/plugin semantic smoke only — no gateway LLM probe. ### E2E harness determinism (2026-06) @@ -175,7 +173,7 @@ See [`tests/hermes_gateway/README.md`](../tests/hermes_gateway/README.md). | Change | Rationale | |--------|-----------| -| `builtin_preload` + registry snapshot order | Fix missing `terminal`/`process` in OpenAI Tools | +| `builtin_preload` + registry snapshot order | Fix missing governed tools in OpenAI Tools | | Remove invented `delete_file` catalog entry | Hermes 0.17 has no standalone delete tool; use `patch` V4A | | Patch replace seed + pass markers | Fix flaky ALLOW and Pass 2a overwrite BLOCK | | Hardened block probe prompts | Fix LLM rewriting `/etc/` to sandbox paths | diff --git a/docs/hermes-plugin-registration-order.md b/docs/hermes-plugin-registration-order.md index 93bce65..6b92406 100644 --- a/docs/hermes-plugin-registration-order.md +++ b/docs/hermes-plugin-registration-order.md @@ -421,7 +421,7 @@ repo catalog template (see [`test_builtin_preload.py`](../tests/hermes_plugin/te | Tool | Gateway E2E | Registration note | |------|-------------|-------------------| -| `terminal`, `process`, `write_file`, `patch`, `cronjob` | Probed when in scoped yaml | ``builtin_module`` in repo ``tools.yaml`` — preload + snapshot | +| `terminal`, `write_file`, `patch`, `cronjob` | Probed when in scoped yaml | ``builtin_module`` in repo ``tools.yaml`` — preload + snapshot | Delete coverage uses `patch` V4A `*** Delete File:` ops (maps to `DELETE_HOST_FILE`). diff --git a/integrations/hermes/README.md b/integrations/hermes/README.md index 0b8eef1..414d68a 100644 --- a/integrations/hermes/README.md +++ b/integrations/hermes/README.md @@ -59,7 +59,7 @@ Configured in runtime `~/.intentframe/integrations/hermes/governance/tools.yaml` | Hermes tool | IntentFrame action | `enabled: false` effect | |-------------|-------------------|-------------------------| -| `terminal`, `process` | `RUN_COMMAND` | Native Hermes handler, no IF gate | +| `terminal` | `RUN_COMMAND` | Native Hermes handler, no IF gate | | `write_file`, `patch` (update/add) | `WRITE_HOST_FILE` | same | | `patch` (V4A delete) | `DELETE_HOST_FILE` | same | | `cronjob` | `HERMES_CRONJOB` | same (semantic-only via dynamic bundle) | @@ -197,7 +197,7 @@ Export env from `agent.json` (or set in the shell before `start` / `gateway star See [`governance/README.md`](governance/README.md) for dev vs user ownership. -**Native mapper** (terminal, process, write_file, patch): +**Native mapper** (terminal, write_file, patch): 1. Add an entry to `governance/tools.yaml`. 2. Add or reuse a mapper in `adapter/src/hermes_adapter/mapper.py`. @@ -246,7 +246,7 @@ Deterministic adapter + plugin gate probes (no LLM) against a running Hermes sta ./tests/scripts/test-hermes-integration.sh ``` -Covers all catalog tools: native tools (`terminal`, `process`, `write_file`, `patch`) +Covers all catalog tools: native tools (`terminal`, `write_file`, `patch`) including V4A `patch` multi-intent write+delete, plus generic tools (e.g. `cronjob`) via semantic smoke. Also runs `policy show` + `policy reload` (live registry smoke — validates generic action IDs via `agent.json` manifest defaults without exporting @@ -266,7 +266,7 @@ RUN_HERMES_GATEWAY_E2E=1 \ python tests/hermes_gateway/test_gateway_e2e.py ``` -Requires `OPENAI_API_KEY`. Covers ALLOW/BLOCK for native-mapper catalog tools (`terminal`, `process`, +Requires `OPENAI_API_KEY`. Covers ALLOW/BLOCK for native-mapper catalog tools (`terminal`, `write_file`, `patch`), including V4A mixed write+delete multi-intent `patch` probes. Generic tools are not exercised via gateway LLM E2E. diff --git a/integrations/hermes/adapter/src/hermes_adapter/mapper.py b/integrations/hermes/adapter/src/hermes_adapter/mapper.py index 3b38a40..b5bf7e4 100644 --- a/integrations/hermes/adapter/src/hermes_adapter/mapper.py +++ b/integrations/hermes/adapter/src/hermes_adapter/mapper.py @@ -118,24 +118,6 @@ def map_terminal(args: dict[str, Any]) -> list[IntentDict]: return [_attach_hermes_args(intent, args, frozenset({"command"}))] -def map_process(args: dict[str, Any]) -> list[IntentDict]: - action = _require_str(args, "action") - reason = validate_reason(args.get("reason")) - session_id = args.get("session_id") - session_part = f" session_id={session_id}" if session_id is not None else "" - data = args.get("data") - data_part = f" data={data!r}" if data is not None else "" - command = f"process:{action}{session_part}{data_part}" - intent = { - "action": "RUN_COMMAND", - "command": command, - "reason": reason, - "target": command[:200], - } - absorbed = frozenset({"action", "session_id", "data"}) - return [_attach_hermes_args(intent, args, absorbed)] - - def map_write_file(args: dict[str, Any]) -> list[IntentDict]: path = _require_str(args, "path") content = args.get("content") @@ -306,7 +288,6 @@ def map_generic(tool: str, args: dict[str, Any], *, action: str) -> list[IntentD MAPPERS: dict[str, MapperFn | GenericMapperFn] = { "terminal": map_terminal, - "process": map_process, "write_file": map_write_file, "patch": map_patch, } diff --git a/integrations/hermes/adapter/tests/test_adapter.py b/integrations/hermes/adapter/tests/test_adapter.py index 960dea1..d344623 100644 --- a/integrations/hermes/adapter/tests/test_adapter.py +++ b/integrations/hermes/adapter/tests/test_adapter.py @@ -70,32 +70,6 @@ def test_map_terminal_hermes_args_remainder(self) -> None: {"background": True, "timeout": 600}, ) - def test_map_process(self) -> None: - from hermes_adapter.mapper import map_process - - intents = map_process( - {"action": "kill", "session_id": "abc", "reason": "Stop runaway"} - ) - self.assertEqual(intents[0]["action"], "RUN_COMMAND") - self.assertIn("process:kill", intents[0]["command"]) - self.assertNotIn("hermes_args", intents[0]) - - def test_map_process_hermes_args_remainder(self) -> None: - from hermes_adapter.mapper import map_process - - intents = map_process( - { - "action": "wait", - "session_id": "proc_abc", - "timeout": 120, - "reason": "Wait for job", - } - ) - intent = intents[0] - self.assertNotIn("action", intent.get("hermes_args", {})) - self.assertNotIn("session_id", intent.get("hermes_args", {})) - self.assertEqual(intent["hermes_args"], {"timeout": 120}) - def test_map_write_file(self) -> None: from hermes_adapter.mapper import map_write_file diff --git a/integrations/hermes/governance/tools.yaml b/integrations/hermes/governance/tools.yaml index 2b133d9..8a0b923 100644 --- a/integrations/hermes/governance/tools.yaml +++ b/integrations/hermes/governance/tools.yaml @@ -27,14 +27,6 @@ tools: blocked_response: terminal_json builtin_module: tools.terminal_tool - process: - enabled: true - action: RUN_COMMAND - risk: local_process - mapper: process - blocked_response: generic_json - builtin_module: tools.process_registry - write_file: enabled: true action: WRITE_HOST_FILE diff --git a/integrations/hermes/plugin/intentframe-gate/README.md b/integrations/hermes/plugin/intentframe-gate/README.md index e766b46..8d383e2 100644 --- a/integrations/hermes/plugin/intentframe-gate/README.md +++ b/integrations/hermes/plugin/intentframe-gate/README.md @@ -23,7 +23,7 @@ Gateway startup / preload: [`docs/hermes-plugin-registration-order.md`](../../.. Configured in `integrations/hermes/governance/tools.yaml` (runtime copy under `~/.intentframe/integrations/hermes/governance/tools.yaml`): -- `terminal`, `process` → `RUN_COMMAND` +- `terminal` → `RUN_COMMAND` - `write_file`, `patch` (update/add) → `WRITE_HOST_FILE` - `patch` (V4A delete) → `DELETE_HOST_FILE` diff --git a/integrations/hermes/plugin/intentframe-gate/governance_loader.py b/integrations/hermes/plugin/intentframe-gate/governance_loader.py index 0c8ff1d..a3eb67f 100644 --- a/integrations/hermes/plugin/intentframe-gate/governance_loader.py +++ b/integrations/hermes/plugin/intentframe-gate/governance_loader.py @@ -11,7 +11,7 @@ import yaml VALID_BLOCKED_RESPONSES = frozenset({"terminal_json", "generic_json"}) -VALID_MAPPER_KINDS = frozenset({"terminal", "process", "write_file", "patch", "generic"}) +VALID_MAPPER_KINDS = frozenset({"terminal", "write_file", "patch", "generic"}) BUILTIN_MODULE_PREFIX = "tools." diff --git a/integrations/hermes/shared/src/hermes_governance/loader.py b/integrations/hermes/shared/src/hermes_governance/loader.py index b8a5224..981bf05 100644 --- a/integrations/hermes/shared/src/hermes_governance/loader.py +++ b/integrations/hermes/shared/src/hermes_governance/loader.py @@ -12,7 +12,7 @@ import yaml VALID_BLOCKED_RESPONSES = frozenset({"terminal_json", "generic_json"}) -VALID_MAPPER_KINDS = frozenset({"terminal", "process", "write_file", "patch", "generic"}) +VALID_MAPPER_KINDS = frozenset({"terminal", "write_file", "patch", "generic"}) BUILTIN_MODULE_PREFIX = "tools." diff --git a/integrations/hermes/shared/tests/test_governance.py b/integrations/hermes/shared/tests/test_governance.py index eacafe9..af7d186 100644 --- a/integrations/hermes/shared/tests/test_governance.py +++ b/integrations/hermes/shared/tests/test_governance.py @@ -47,7 +47,6 @@ def test_builtin_module_on_catalog_tools(self) -> None: with governance_env(): catalog = load_tool_catalog() self.assertEqual(catalog["terminal"].builtin_module, "tools.terminal_tool") - self.assertEqual(catalog["process"].builtin_module, "tools.process_registry") self.assertEqual(catalog["write_file"].builtin_module, "tools.file_tools") self.assertEqual(catalog["patch"].builtin_module, "tools.file_tools") self.assertEqual(catalog["cronjob"].builtin_module, "tools.cronjob_tools") diff --git a/tests/hermes_adapter/live_fixtures.py b/tests/hermes_adapter/live_fixtures.py index ce2efce..a8eb378 100644 --- a/tests/hermes_adapter/live_fixtures.py +++ b/tests/hermes_adapter/live_fixtures.py @@ -15,16 +15,12 @@ patch_replace_block_args, patch_v4a_block_args, patch_v4a_mixed_home_delete_args, - process_allow_args, - process_block_args, write_allow_args, write_block_args, ) _LIVE_MARKER = "live" -PROCESS_ALLOW_ARGS = process_allow_args() -PROCESS_BLOCK_ARGS = process_block_args() WRITE_ALLOW_ARGS = write_allow_args(marker=_LIVE_MARKER) WRITE_BLOCK_ARGS = write_block_args() PATCH_ALLOW_REPLACE_ARGS = patch_replace_allow_args(marker=_LIVE_MARKER) diff --git a/tests/hermes_adapter/test_live.py b/tests/hermes_adapter/test_live.py index 0c1c2e5..8ddc8e2 100644 --- a/tests/hermes_adapter/test_live.py +++ b/tests/hermes_adapter/test_live.py @@ -23,8 +23,6 @@ PATCH_BLOCK_REPLACE_ARGS, PATCH_V4A_BLOCK_ARGS, PATCH_V4A_MIXED_HOME_DELETE_ARGS, - PROCESS_ALLOW_ARGS, - PROCESS_BLOCK_ARGS, WRITE_ALLOW_ARGS, WRITE_BLOCK_ARGS, ) @@ -78,15 +76,6 @@ def test_block_terminal(self) -> None: self.assertEqual(agent_response.get("exit_code"), -1) self.assertEqual(agent_response.get("status"), "blocked") - def test_allow_process(self) -> None: - body = self._validate_tool("process", PROCESS_ALLOW_ARGS) - self.assertTrue(body["allowed"]) - - def test_block_process(self) -> None: - body = self._validate_tool("process", PROCESS_BLOCK_ARGS) - self.assertFalse(body["allowed"]) - self.assertIn("agent_response", body) - def test_allow_write_file(self) -> None: body = self._validate_tool("write_file", WRITE_ALLOW_ARGS) self.assertTrue(body["allowed"]) diff --git a/tests/hermes_gateway/README.md b/tests/hermes_gateway/README.md index 8d17de1..a454c92 100644 --- a/tests/hermes_gateway/README.md +++ b/tests/hermes_gateway/README.md @@ -48,7 +48,7 @@ the validate-only gate). It does **not** enable/disable Hermes native tools on | Variable | Effect | |----------|--------| | *(unset)* | Temp yaml with **all** catalog tools IntentFrame-governed | -| `HERMES_E2E_GOVERNED_TOOLS=terminal,process` | Temp yaml with only those tools governed; LLM probes run for that subset | +| `HERMES_E2E_GOVERNED_TOOLS=terminal,write_file` | Temp yaml with only those tools governed; LLM probes run for that subset | | `HERMES_GOVERNANCE_YAML=/path/to/tools.yaml` | Use your yaml as-is; skip auto-generation | Examples: @@ -57,8 +57,8 @@ Examples: # All governed tools (default) RUN_HERMES_GATEWAY_E2E=1 ./tests/scripts/test-hermes-gateway-e2e.sh -# Only terminal + process LLM probes (IntentFrame-governed subset) -HERMES_E2E_GOVERNED_TOOLS=terminal,process \ +# Only terminal + write_file LLM probes (IntentFrame-governed subset) +HERMES_E2E_GOVERNED_TOOLS=terminal,write_file \ RUN_HERMES_GATEWAY_E2E=1 ./tests/scripts/test-hermes-gateway-e2e.sh ``` @@ -84,7 +84,6 @@ yaml all catalog tools are governed; use `HERMES_E2E_GOVERNED_TOOLS` to scope LL | Tool | Deterministic ALLOW probe | Deterministic BLOCK probe | Semantic (ALLOW or BLOCK) | |------|---------------------------|---------------------------|---------------------------| | `terminal` | `printf ''` | `sudo echo …` | — | -| `process` | `action: list` | `action: run`, `data` contains `sudo` | — | | `write_file` | path under `~/…` | path under `/etc/…` | — | | `patch` (replace) | replace under `~/…` (harness seeds file with `"a"` first) | replace under `/etc/…` | — | | `patch` (V4A mixed) | — | Update `~/…` + Delete `/etc/…` (fail-closed batch) | Update `~/…` + Delete `~/…` (per-intent AE/Guardian; batch fails if any op BLOCKs) | @@ -102,7 +101,7 @@ IntentFrame** chain, not open-ended agent behavior. Harness setup (not policy we |-----------|-------|-----| | **`seed_patch_replace_target()`** | `tests/hermes_tool_probes.py`; called from `run_patch_replace_allow_with_retries` | `patch replace` requires an existing file with `old_string`; harness writes `"a"` before each attempt | | **Pass-unique markers** | `_pass_marker_slug()` in `test_gateway_e2e.py` → suffix `-p1`, `-p2a`, `-p2b` | Pass 2a reuses Pass 1 sandbox; unique markers avoid overwrite BLOCK on stale files | -| **Explicit block prompts** | `run_write_file_block_once`, `run_process_block_once`, `run_patch_replace_block_once`, `run_patch_v4a_mixed_block_once` in `api_client.py` | Keep `/etc/…`, `sudo`, and V4A delete paths verbatim; one tool call; no rewrite to `~/` or `/tmp` | +| **Explicit block prompts** | `run_write_file_block_once`, `run_patch_replace_block_once`, `run_patch_v4a_mixed_block_once` in `api_client.py` | Keep `/etc/…`, `sudo`, and V4A delete paths verbatim; one tool call; no rewrite to `~/` or `/tmp` | Block assertions still require blocked tool output and the expected path/command shape. Allow assertions still fail on blocked output. @@ -151,7 +150,7 @@ When `/v1/responses` reaches the plugin and adapter: 1. Adapter handshake on first validate 2. One or more intent evaluations per governed tool call (multi-intent `patch` emits several) -With the default all-governed yaml, a full pass runs many probes (terminal, process, +With the default all-governed yaml, a full pass runs many probes (terminal, write_file, patch replace + block + V4A semantic + V4A block), so expect **multiple** intent blocks in `intentframe-server.log` — not a fixed count of four. diff --git a/tests/hermes_gateway/api_client.py b/tests/hermes_gateway/api_client.py index 0258f8a..37cce95 100644 --- a/tests/hermes_gateway/api_client.py +++ b/tests/hermes_gateway/api_client.py @@ -20,8 +20,6 @@ patch_replace_block_args, patch_v4a_block_args, patch_v4a_mixed_home_delete_args, - process_allow_args, - process_block_args, seed_patch_replace_target, write_block_args, ) @@ -449,95 +447,6 @@ def _format_tool_prompt(tool_name: str, args: dict[str, str], *, attempt: int | return "\n".join(lines) -def assert_process_allow(body: dict[str, Any]) -> ToolCall: - calls = extract_tool_calls(body, tool_name="process") - if not calls: - raise AssertionError(f"No process function_call in response: {json.dumps(body)[:2000]}") - - call = calls[-1] - action = str(call.arguments.get("action", "")).lower() - if action != "list": - raise AssertionError(f"Expected process action 'list', got: {call.arguments!r}") - if _looks_blocked(call.output): - raise AssertionError(f"Expected allow but got blocked output: {call.output!r}") - return call - - -def assert_process_block(body: dict[str, Any]) -> ToolCall: - calls = extract_tool_calls(body, tool_name="process") - if not calls: - raise AssertionError(f"No process function_call in block response: {json.dumps(body)[:2000]}") - - call = calls[-1] - action = str(call.arguments.get("action", "")).lower() - data = str(call.arguments.get("data", "")).lower() - if action != "run" or "sudo" not in data: - raise AssertionError(f"Block test process args unexpected: {call.arguments!r}") - if not _looks_blocked(call.output): - raise AssertionError(f"Expected blocked tool output, got: {call.output!r}") - return call - - -def run_process_allow_with_retries( - *, - host: str, - port: int, - api_key: str, - attempts: int = 3, -) -> ToolCall: - from cli_runner import step - - instructions = _single_tool_instructions("process") - last_error: Exception | None = None - for attempt in range(1, attempts + 1): - step(f"POST /v1/responses process ALLOW (attempt {attempt}/{attempts})") - args = process_allow_args(reason=f"IntentFrame E2E process allow {attempt}") - prompt = _format_tool_prompt("process", args, attempt=attempt) - try: - body = post_responses( - host=host, - port=port, - api_key=api_key, - prompt=prompt, - instructions=instructions, - ) - return assert_process_allow(body) - except (AssertionError, RuntimeError) as exc: - last_error = exc - raise AssertionError(f"process allow test failed after {attempts} attempts: {last_error}") from last_error - - -def run_process_block_once(*, host: str, port: int, api_key: str) -> ToolCall: - from cli_runner import step - - step("POST /v1/responses process BLOCK (sudo in mapped RUN_COMMAND)") - args = process_block_args(reason="IntentFrame E2E process block test") - data = args["data"] - instructions = ( - "You are running an automated integration test. " - "You MUST call the process tool exactly once and no other tools. " - f"The data argument MUST remain exactly {data!r}. " - "Do NOT remove or substitute the sudo command. " - "Do NOT make a second process call if the first is blocked." - ) - prompt = ( - "Call the process tool exactly once with these arguments (copy data verbatim):\n" - f"- action: {args['action']!r}\n" - f"- data: {data}\n" - f"- reason: {args['reason']!r}\n" - f"\nThe data MUST stay {data!r}. Do not change it.\n" - "Do not explain; just run the tool once." - ) - body = post_responses( - host=host, - port=port, - api_key=api_key, - prompt=prompt, - instructions=instructions, - ) - return assert_process_block(body) - - def _patch_text(call: ToolCall) -> str: return str(call.arguments.get("patch", "")) diff --git a/tests/hermes_gateway/test_api_client.py b/tests/hermes_gateway/test_api_client.py index a34ee1f..debca3b 100644 --- a/tests/hermes_gateway/test_api_client.py +++ b/tests/hermes_gateway/test_api_client.py @@ -18,8 +18,6 @@ assert_patch_replace_block, assert_patch_v4a_mixed_block, assert_patch_v4a_mixed_home_delete_semantic, - assert_process_allow, - assert_process_block, ) @@ -50,22 +48,6 @@ def test_assert_terminal_block_rejects_generic_json(self) -> None: with self.assertRaises(AssertionError): assert_block_response(body) - def test_assert_process_allow(self) -> None: - body = _response( - tool="process", - arguments={"action": "list", "reason": "ok"}, - output='{"status": "ok"}', - ) - assert_process_allow(body) - - def test_assert_process_block(self) -> None: - body = _response( - tool="process", - arguments={"action": "run", "data": "sudo rm -rf /", "reason": "bad"}, - output='{"status": "blocked", "error": "policy"}', - ) - assert_process_block(body) - def test_assert_patch_replace_allow(self) -> None: body = _response( tool="patch", diff --git a/tests/hermes_gateway/test_gateway_e2e.py b/tests/hermes_gateway/test_gateway_e2e.py index 63c92df..571c2d2 100644 --- a/tests/hermes_gateway/test_gateway_e2e.py +++ b/tests/hermes_gateway/test_gateway_e2e.py @@ -40,8 +40,6 @@ run_patch_replace_block_once, run_patch_v4a_mixed_block_once, run_patch_v4a_mixed_home_delete_semantic_with_retries, - run_process_allow_with_retries, - run_process_block_once, run_write_file_allow_with_retries, run_write_file_block_once, wait_health, @@ -211,14 +209,6 @@ def _run_api_allow_block(env: IsolatedEnv, *, label: str) -> None: step(f"{label}: POST /v1/responses BLOCK (policy should deny sudo)") run_block_once(host=API_HOST, port=env.api_port, api_key=env.api_key) - if "process" in governed: - probes_ran.add("process") - step(f"{label}: POST /v1/responses process ALLOW") - run_process_allow_with_retries(host=API_HOST, port=env.api_port, api_key=env.api_key) - - step(f"{label}: POST /v1/responses process BLOCK (sudo in mapped RUN_COMMAND)") - run_process_block_once(host=API_HOST, port=env.api_port, api_key=env.api_key) - if "write_file" in governed: probes_ran.add("write_file") write_marker = f"intentframe-hermes-write-ok-{env.run_id}-{pass_slug}" diff --git a/tests/hermes_gateway/test_provider_request_contract.py b/tests/hermes_gateway/test_provider_request_contract.py index 92e89ff..8fe22d1 100644 --- a/tests/hermes_gateway/test_provider_request_contract.py +++ b/tests/hermes_gateway/test_provider_request_contract.py @@ -46,23 +46,22 @@ def _tool(name: str, *, reason_required: bool = True) -> dict[str, object]: class ProviderRequestContractTests(unittest.TestCase): def test_parse_provider_tools(self) -> None: - body = {"tools": [_tool("terminal"), _tool("process")]} + body = {"tools": [_tool("terminal"), _tool("write_file")]} by_name = parse_provider_tools(body) - self.assertEqual(set(by_name), {"terminal", "process"}) + self.assertEqual(set(by_name), {"terminal", "write_file"}) def test_assert_provider_tools_surface_passes(self) -> None: body = { "model": "gpt-4o-mini", "tools": [ _tool("terminal"), - _tool("process"), _tool("write_file"), _tool("patch"), ], } assert_provider_tools_surface( body, - frozenset({"terminal", "process", "write_file", "patch"}), + frozenset({"terminal", "write_file", "patch"}), expected_model="gpt-4o-mini", ) diff --git a/tests/hermes_governance_fixtures.py b/tests/hermes_governance_fixtures.py index c733f83..8ff8da3 100644 --- a/tests/hermes_governance_fixtures.py +++ b/tests/hermes_governance_fixtures.py @@ -27,7 +27,6 @@ GATEWAY_E2E_PROBE_SYMBOLS: dict[str, frozenset[str]] = { "terminal": frozenset({"run_allow_with_retries", "run_block_once"}), - "process": frozenset({"run_process_allow_with_retries", "run_process_block_once"}), "write_file": frozenset( {"run_write_file_allow_with_retries", "run_write_file_block_once"} ), diff --git a/tests/hermes_plugin/test_bridge_gate_live.py b/tests/hermes_plugin/test_bridge_gate_live.py index 2409ce9..31a3edc 100644 --- a/tests/hermes_plugin/test_bridge_gate_live.py +++ b/tests/hermes_plugin/test_bridge_gate_live.py @@ -22,8 +22,6 @@ PATCH_BLOCK_REPLACE_ARGS, PATCH_V4A_BLOCK_ARGS, PATCH_V4A_MIXED_HOME_DELETE_ARGS, - PROCESS_ALLOW_ARGS, - PROCESS_BLOCK_ARGS, WRITE_ALLOW_ARGS, WRITE_BLOCK_ARGS, ) @@ -88,14 +86,6 @@ def test_block_terminal(self) -> None: self.assertEqual(body["status"], "blocked") delegate.assert_not_called() - def test_allow_process(self) -> None: - delegate = MagicMock(return_value='{"status": "ok"}') - self._assert_allowed("process", PROCESS_ALLOW_ARGS, delegate=delegate) - - def test_block_process(self) -> None: - delegate = MagicMock() - self._assert_blocked("process", PROCESS_BLOCK_ARGS, delegate=delegate) - def test_allow_write_file(self) -> None: delegate = MagicMock(return_value='{"status": "ok"}') self._assert_allowed("write_file", WRITE_ALLOW_ARGS, delegate=delegate) diff --git a/tests/hermes_tool_probes.py b/tests/hermes_tool_probes.py index 5d60074..55be7be 100644 --- a/tests/hermes_tool_probes.py +++ b/tests/hermes_tool_probes.py @@ -5,14 +5,6 @@ from pathlib import Path -def process_allow_args(*, reason: str = "Live process allow test") -> dict[str, str]: - return {"action": "list", "reason": reason} - - -def process_block_args(*, reason: str = "E2E process run probe") -> dict[str, str]: - return {"action": "run", "data": "sudo rm -rf /", "reason": reason} - - def write_allow_args(*, marker: str, reason: str | None = None) -> dict[str, str]: return { "path": f"~/intentframe-e2e-{marker}.txt", diff --git a/tests/intentframe_integrations/test_scoped_governance_yaml.py b/tests/intentframe_integrations/test_scoped_governance_yaml.py index 09310d4..6a84294 100644 --- a/tests/intentframe_integrations/test_scoped_governance_yaml.py +++ b/tests/intentframe_integrations/test_scoped_governance_yaml.py @@ -51,11 +51,11 @@ def test_all_governed_when_subset_unspecified(self) -> None: shutil.rmtree(path.parent, ignore_errors=True) def test_only_listed_tools_governed(self) -> None: - path = write_scoped_governance_yaml(governed_tools=frozenset({"terminal", "process"})) + path = write_scoped_governance_yaml(governed_tools=frozenset({"terminal", "patch"})) try: catalog = load_tool_catalog(str(path)) governed = load_governed_tools(str(path)) - self.assertEqual(set(governed), {"terminal", "process"}) + self.assertEqual(set(governed), {"terminal", "patch"}) self.assertFalse(catalog["write_file"].enabled) finally: shutil.rmtree(path.parent, ignore_errors=True) @@ -86,7 +86,7 @@ def test_setup_scoped_governed_tools(self) -> None: self.assertEqual(set(governed), {"terminal"}) def test_respects_existing_hermes_governance_yaml(self) -> None: - path = write_scoped_governance_yaml(governed_tools=frozenset({"process"})) + path = write_scoped_governance_yaml(governed_tools=frozenset({"patch"})) try: os.environ["HERMES_GOVERNANCE_YAML"] = str(path) result = setup_e2e_governance_yaml() @@ -96,13 +96,13 @@ def test_respects_existing_hermes_governance_yaml(self) -> None: def test_parse_governed_tools_env(self) -> None: self.assertEqual( - parse_governed_tools_env(" terminal , process "), - frozenset({"terminal", "process"}), + parse_governed_tools_env(" terminal , write_file "), + frozenset({"terminal", "write_file"}), ) def test_log_e2e_governance_reports_scoped_tools(self) -> None: - governed = frozenset({"terminal", "process"}) - os.environ["HERMES_E2E_GOVERNED_TOOLS"] = "terminal,process" + governed = frozenset({"terminal", "write_file"}) + os.environ["HERMES_E2E_GOVERNED_TOOLS"] = "terminal,write_file" setup_e2e_governance_yaml() messages: list[str] = [] @@ -113,14 +113,14 @@ def test_log_e2e_governance_reports_scoped_tools(self) -> None: joined = "\n".join(messages) self.assertIn("HERMES_E2E_GOVERNED_TOOLS", joined) self.assertIn("terminal: RUN", joined) - self.assertIn("write_file: SKIP", joined) + self.assertIn("patch: SKIP", joined) self.assertIn("cronjob: SKIP", joined) def test_assert_e2e_governance_snapshot_rejects_mismatch(self) -> None: path = write_scoped_governance_yaml(governed_tools=frozenset({"terminal"})) try: os.environ["HERMES_GOVERNANCE_YAML"] = str(path) - os.environ["HERMES_E2E_GOVERNED_TOOLS"] = "process" + os.environ["HERMES_E2E_GOVERNED_TOOLS"] = "write_file" snapshot = load_e2e_governance_snapshot() with self.assertRaises(AssertionError): assert_e2e_governance_snapshot(snapshot) @@ -133,14 +133,14 @@ def test_format_governance_snapshot_includes_ungoverned(self) -> None: os.environ["HERMES_GOVERNANCE_YAML"] = str(path) text = format_governance_snapshot(load_e2e_governance_snapshot()) self.assertIn("not governed", text) - self.assertIn("process", text) + self.assertIn("write_file", text) finally: shutil.rmtree(path.parent, ignore_errors=True) def test_format_gateway_probe_plan(self) -> None: text = format_gateway_probe_plan(frozenset({"terminal"})) self.assertIn("terminal: RUN", text) - self.assertIn("process: SKIP", text) + self.assertIn("write_file: SKIP", text) self.assertIn("cronjob: SKIP", text) def test_assert_governance_env_contract(self) -> None: diff --git a/tests/scripts/test-hermes-gateway-e2e.sh b/tests/scripts/test-hermes-gateway-e2e.sh index c0ae5b9..51cd0b3 100755 --- a/tests/scripts/test-hermes-gateway-e2e.sh +++ b/tests/scripts/test-hermes-gateway-e2e.sh @@ -7,7 +7,7 @@ # IntentFrame governance (temp throwaway yaml; does not touch real ~/.intentframe runtime): # Default: all catalog tools IntentFrame-governed (temp yaml via HERMES_GOVERNANCE_YAML). # Scoped LLM probes to a governed subset only (plugin gate — not Hermes toolsets): -# HERMES_E2E_GOVERNED_TOOLS=terminal,process RUN_HERMES_GATEWAY_E2E=1 ./tests/scripts/test-hermes-gateway-e2e.sh +# HERMES_E2E_GOVERNED_TOOLS=terminal,write_file RUN_HERMES_GATEWAY_E2E=1 ./tests/scripts/test-hermes-gateway-e2e.sh # Explicit yaml override (skip auto-generation): # HERMES_GOVERNANCE_YAML=/path/to/tools.yaml RUN_HERMES_GATEWAY_E2E=1 ... # From ebe448a621a25634046dd3439d3ad38c10628e74 Mon Sep 17 00:00:00 2001 From: Prince Date: Wed, 24 Jun 2026 20:54:49 +0530 Subject: [PATCH 3/3] add hernes execute code tool to governed tools in plugin --- ...overnance-execute-code-and-schema-hooks.md | 230 ++++++++++++++++++ docs/hermes-intentframe-integration-guide.md | 34 ++- docs/hermes-plugin-registration-order.md | 20 +- .../adapter/src/hermes_adapter/mapper.py | 21 ++ .../hermes/adapter/tests/test_adapter.py | 59 ++++- integrations/hermes/governance/tools.yaml | 8 + .../hermes/plugin/intentframe-gate/README.md | 55 +++-- .../plugin/intentframe-gate/__init__.py | 16 +- .../intentframe-gate/governance_loader.py | 2 +- .../plugin/intentframe-gate/registry_hook.py | 44 +++- .../intentframe-gate/tool_definitions_hook.py | 80 ++++++ .../shared/src/hermes_governance/loader.py | 2 +- .../hermes/shared/tests/test_governance.py | 1 + tests/hermes_adapter/live_fixtures.py | 4 + tests/hermes_adapter/test_live.py | 11 + tests/hermes_gateway/README.md | 5 +- tests/hermes_gateway/api_client.py | 99 ++++++++ .../probe_hermes_tool_schemas.py | 2 +- tests/hermes_gateway/test_gateway_e2e.py | 16 ++ .../test_provider_request_contract.py | 9 +- tests/hermes_gateway/toolsets_contract.py | 13 +- tests/hermes_governance_fixtures.py | 3 + tests/hermes_plugin/test_bridge_gate_live.py | 10 + tests/hermes_plugin/test_gate.py | 91 +++++++ tests/hermes_plugin/test_registry_hook.py | 72 +++++- tests/hermes_tool_probes.py | 15 ++ 26 files changed, 862 insertions(+), 60 deletions(-) create mode 100644 docs/hermes-governance-execute-code-and-schema-hooks.md create mode 100644 integrations/hermes/plugin/intentframe-gate/tool_definitions_hook.py diff --git a/docs/hermes-governance-execute-code-and-schema-hooks.md b/docs/hermes-governance-execute-code-and-schema-hooks.md new file mode 100644 index 0000000..8fa1932 --- /dev/null +++ b/docs/hermes-governance-execute-code-and-schema-hooks.md @@ -0,0 +1,230 @@ +# Hermes governance: `execute_code`, schema hooks, and `read_terminal` lessons + +> Session knowledge (June 2026): adding governed `execute_code`, fixing model-facing +> schema finalization, and why the toolsets contract must stay strict. +> +> Related: [`hermes-plugin-registration-order.md`](./hermes-plugin-registration-order.md), +> [`hermes-intentframe-integration-guide.md`](./hermes-intentframe-integration-guide.md), +> [`integrations/hermes/plugin/intentframe-gate/README.md`](../integrations/hermes/plugin/intentframe-gate/README.md). + +--- + +## TL;DR + +| Topic | Decision | +|-------|----------| +| **`execute_code` governance** | Reuse `RUN_COMMAND` + `map_execute_code()` — encode Python as `python -c …` so `command_shield` sees inline-edge + AST analysis | +| **Schema `reason` injection** | Two hooks at Hermes schema **composition** points — not at plugin `register()` via `import model_tools` | +| **`read_terminal` in toolsets** | Canary for accidental full `discover_builtin_tools()` — **do not relax** the contract | +| **What we rejected** | Wrapping `model_tools.get_tool_definitions` at plugin load; `importlib` meta_path hooks; relaxing toolsets assertions | + +--- + +## Part 1 — Governed `execute_code` + +### What Hermes `execute_code` is + +- **Python-only** sandbox tool (`tools/code_execution_tool.py`). +- Runs user Python in a child process with RPC access to a subset of Hermes tools. +- Different from `terminal` (shell) but both are high-risk execution surfaces. + +### What we added + +**Governance yaml** (`integrations/hermes/governance/tools.yaml`): + +```yaml +execute_code: + enabled: true + action: RUN_COMMAND + risk: code_execution + mapper: execute_code + blocked_response: generic_json + builtin_module: tools.code_execution_tool +``` + +**Mapper** (`hermes_adapter/mapper.py` — `map_execute_code`): + +- Maps `{code, reason}` → `RUN_COMMAND` intent. +- Encodes Python as `python -c {shlex.quote(code)}` (not a fake `execute_code:python\n…` string) so Guardian **`command_shield`** applies inline-edge + AST rules to the script body. +- Truncates body at `_EXECUTE_CODE_MAX_BODY` (9500 chars) — below Hermes `command_shield` `max_command_length` (10k). + +**Why reuse `RUN_COMMAND`?** + +- No new action bundle, executor row, or policy shape. +- Same IntentFrame path as `terminal` for command analysis. +- BLOCK probes can use deterministic substrings (e.g. `sudo` in generated shell) mirroring terminal E2E. + +**Probes / tests** + +- `tests/hermes_tool_probes.py` — `execute_code_allow_args`, `execute_code_block_args` +- Gateway E2E helpers in `tests/hermes_gateway/api_client.py` +- Removed `execute_code` from `UNGATED_DISTRACTOR_TOOLS` in `toolsets_contract.py` (it is governed, not a distractor) + +### ALLOW vs BLOCK probe behavior + +- **BLOCK** via `sudo` substring in generated `python -c` command — deterministic (`CATASTROPHIC`). +- **ALLOW** for benign scripts — AE-dependent (`NEEDS_REVIEW` for many `python -c` scripts); not as deterministic as terminal ALLOW. + +--- + +## Part 2 — Schema finalization (`reason` in model-facing JSON) + +The model must see `reason` in `parameters.required` for every **governed** tool. Execution still strips `reason` before the native Hermes handler runs (`gate.py`). + +### Two layers (do not conflate) + +| Layer | Responsibility | Files | +|-------|----------------|-------| +| **Handler gate** | Validate via adapter; strip `reason`; delegate | `gate.py`, snapshot loop, `registry.register` hook | +| **Schema finalization** | Inject `reason` into JSON schemas the LLM receives | `registry.get_definitions` hook + `build_execute_code_schema` hook | + +Registry-time `schema=entry.schema` in the snapshot loop intentionally does **not** inject `reason`. Schema finalization happens on the paths Hermes uses when building the OpenAI payload. + +### Why not wrap `model_tools.get_tool_definitions` at plugin load? + +We briefly added `install_tool_definitions_hook()` that did `import model_tools` during `register()`. + +**Problem:** Hermes runs full builtin discovery at `model_tools` import time: + +```python +# external-reference-only-libs/hermes-agent/model_tools.py (module level) +discover_builtin_tools() +``` + +That imports **every** self-registering tool module, including `read_terminal_tool.py`, which registers desktop-only `read_terminal` into the `terminal` toolset **before** the gateway’s intended lazy load order. + +**Symptom:** + +``` +Toolset 'terminal' tools mismatch. + expected: ['process', 'terminal'] + actual: ['process', 'read_terminal', 'terminal'] +``` + +**Fix (current):** Do **not** import `model_tools` during plugin registration. Finalize schemas at narrower composition points instead. + +### Current schema hooks (June 2026) + +**1. `registry.get_definitions`** (`registry_hook.py`) + +- Wraps every call to `registry.get_definitions`. +- Runs `finalize_governed_tool_schemas()` on the returned OpenAI-format tool list. +- Covers: `terminal`, `write_file`, `patch`, `cronjob`, and future governed tools with static registry schemas. + +**2. `build_execute_code_schema`** (`tool_definitions_hook.py`) + +- Hermes **rebuilds** `execute_code` schema **after** `get_definitions` inside `_compute_tool_definitions()` — listing only sandbox tools that actually passed `check_fn`. +- That rebuild **overwrites** any `reason` added by the `get_definitions` hook. +- Therefore we patch `tools.code_execution_tool.build_execute_code_schema` to call `inject_reason()` on its return value. +- Installed **after** `preload_governed_builtins()` (when `code_execution_tool` is already loaded for governed `execute_code`). + +**Plugin `register()` order:** + +``` +install_registry_hook() # register + get_definitions patches; NO model_tools +preload_governed_builtins() # selective imports from governance yaml only +install_execute_code_schema_hook() +snapshot loop # wrap handlers; schema=entry.schema (no inject here) +``` + +### Does patching `build_execute_code_schema` affect Hermes execution? + +**No.** That function returns a **schema dict** for the LLM. It does not run code. + +- `inject_reason()` deep-copies and adds `reason` to `parameters` + description suffix. +- `execute_code()` handler only reads `args.get("code")`. +- `wrap_handler` strips `reason` before delegation. + +Patching affects **what the model is told to send**, not how Hermes executes after the gate allows the call. + +### Alternatives we considered and rejected + +| Approach | Why rejected | +|----------|----------------| +| **`import model_tools` at plugin load** | Triggers `discover_builtin_tools()` → `read_terminal` leak | +| **`importlib` meta_path lazy hook on `model_tools`** | Works but over-engineered for this codebase | +| **`wrapt` post-import hook (`model_tools?`)** | Industry-standard for APM; adds dependency we do not have | +| **Relax toolsets contract to allow extras** | Hides registration-order bugs; `read_terminal` is not intended api_server surface | +| **Inject `reason` only in `registry.register` hook** | Insufficient — `execute_code` schema is rebuilt later without `reason` | +| **Only wrap `get_definitions`** | Insufficient alone — `execute_code` dynamic rebuild wipes `reason` | + +--- + +## Part 3 — Three test surfaces (do not conflate) + +| Surface | What it proves | Harness | +|---------|----------------|---------| +| **`GET /v1/toolsets`** | Pinned api_server **tool names** per toolset (guardrail) | `toolsets_contract.py` | +| **Registry schemas** | Governed tools have `reason` + gated handlers | `probe_hermes_tool_schemas.py` | +| **OpenAI `tools=` payload** | What Hermes actually sends upstream | `provider_request_contract.py` / toolsets live test | + +A passing `/v1/toolsets` does **not** prove `terminal` is in the OpenAI request. Always verify the provider dump or Platform logs for the payload that matters. + +### Why `read_terminal` fails the contract but `vision_analyze` does not + +The contract is **not** “reject all ungoverned tools.” It is an **exact allowlist** per toolset for the intended `hermes-api-server` composite. + +| Tool | Ungoverned? | In contract? | Why | +|------|-------------|--------------|-----| +| `read_file`, `vision_analyze`, `skill_manage` | Yes | **Expected** | Part of api_server surface; some are E2E distractors | +| `read_terminal` | Yes | **Must not appear** | Desktop-only; appears only when full discovery runs too early | + +`read_terminal` is a **canary**: if it shows up at `GET /v1/toolsets` before first lazy `model_tools` load, the plugin imported too much at registration time. + +--- + +## Part 4 — Verification + +### Unit tests + +```bash +.venv/bin/python tests/hermes_plugin/test_gate.py +.venv/bin/python tests/hermes_plugin/test_registry_hook.py +.venv/bin/python tests/hermes_plugin/test_builtin_preload.py +``` + +Key regression tests added in this work: + +- `test_install_registry_hook_does_not_import_model_tools` +- `test_get_definitions_injects_reason_for_governed_tools` +- `test_execute_code_schema_hook_injects_reason` + +### Live toolsets + provider payload (passed June 2026) + +```bash +RUN_HERMES_GATEWAY_TOOLSETS=1 ./tests/scripts/test-hermes-gateway-toolsets.sh +``` + +Expected highlights: + +- `terminal: ['process', 'terminal']` — no `read_terminal` +- All governed tools: `reason_in_schema: true` in schema probe +- Provider dump: `cronjob`, `execute_code`, `patch`, `terminal`, `write_file` with `reason_required=True` + +--- + +## Adding a future governed tool — checklist + +1. Add entry to `integrations/hermes/governance/tools.yaml` with `builtin_module: tools.`. +2. Implement mapper in `hermes_adapter/mapper.py` (or use `generic`). +3. Add probe args in `tests/hermes_tool_probes.py` if gateway E2E will cover it. +4. Extend `test_builtin_preload.py` if new module path. +5. **Schema hook:** if Hermes rebuilds the tool schema **after** `get_definitions` (like `execute_code`), add a dedicated builder wrap — do not assume `get_definitions` alone is enough. +6. **Never** call `discover_builtin_tools()` or `import model_tools` in the plugin. +7. Run toolsets live test — confirm no unexpected tool names in pinned toolsets. + +--- + +## File index + +| File | Role | +|------|------| +| [`integrations/hermes/governance/tools.yaml`](../integrations/hermes/governance/tools.yaml) | Governed catalog + `builtin_module` preload map | +| [`integrations/hermes/adapter/src/hermes_adapter/mapper.py`](../integrations/hermes/adapter/src/hermes_adapter/mapper.py) | `map_execute_code` | +| [`integrations/hermes/plugin/intentframe-gate/__init__.py`](../integrations/hermes/plugin/intentframe-gate/__init__.py) | Plugin load order | +| [`integrations/hermes/plugin/intentframe-gate/registry_hook.py`](../integrations/hermes/plugin/intentframe-gate/registry_hook.py) | `register` + `get_definitions` hooks | +| [`integrations/hermes/plugin/intentframe-gate/tool_definitions_hook.py`](../integrations/hermes/plugin/intentframe-gate/tool_definitions_hook.py) | `finalize_governed_tool_schemas` + `execute_code` builder hook | +| [`integrations/hermes/plugin/intentframe-gate/schema.py`](../integrations/hermes/plugin/intentframe-gate/schema.py) | `inject_reason()` | +| [`tests/hermes_gateway/toolsets_contract.py`](../tests/hermes_gateway/toolsets_contract.py) | Strict api_server name surface | +| [`tests/hermes_gateway/probe_hermes_tool_schemas.py`](../tests/hermes_gateway/probe_hermes_tool_schemas.py) | Schema + gate marker probe | +| [`tests/hermes_gateway/provider_request_contract.py`](../tests/hermes_gateway/provider_request_contract.py) | OpenAI payload assertions | diff --git a/docs/hermes-intentframe-integration-guide.md b/docs/hermes-intentframe-integration-guide.md index bb22bce..df3d1b8 100644 --- a/docs/hermes-intentframe-integration-guide.md +++ b/docs/hermes-intentframe-integration-guide.md @@ -8,6 +8,7 @@ Related: - [`hermes-intentframe-state-report.md`](./hermes-intentframe-state-report.md) — current integration snapshot (catalog, tests, limitations) - [`hermes-plugin-registration-order.md`](./hermes-plugin-registration-order.md) — load-order bug, preload fix, evidence +- [`hermes-governance-execute-code-and-schema-hooks.md`](./hermes-governance-execute-code-and-schema-hooks.md) — `execute_code` governance, schema hooks, `read_terminal` lessons (June 2026) - [`agent-tool-gating.md`](./agent-tool-gating.md) — portable gating pattern - [`NATIVE_KIT_INTEGRATION.md`](./NATIVE_KIT_INTEGRATION.md) — native-kit bundles, policy alignment - [`integrations/hermes/README.md`](../integrations/hermes/README.md) — CLI quick start @@ -311,29 +312,35 @@ TOOLSET_TOOL_EXPECTATIONS: dict[str, frozenset[str]] = { Selective preload avoids that side effect while still populating the registry before snapshot. -### 2. Schema layer — `inject_reason()` +### 2. Schema layer — `inject_reason()` on composition paths -```22:55:integrations/hermes/plugin/intentframe-gate/schema.py +`inject_reason()` returns a deep copy with required ``reason`` (idempotent). It is applied on +**schema composition paths**, not at registry snapshot time: + +- **`registry.get_definitions`** — most governed tools (`registry_hook.py`) +- **`build_execute_code_schema`** — after Hermes dynamic rebuild (`tool_definitions_hook.py`) + +```22:22:integrations/hermes/plugin/intentframe-gate/schema.py def inject_reason(schema: dict[str, Any], *, tool_name: str) -> dict[str, Any]: - """Return a deep copy of *schema* with required ``reason`` (idempotent).""" - ... - if "reason" not in required: - required.append("reason") ``` Terminal gets slightly different reason copy; all governed tools require `reason` in the JSON schema the model sees. -### 3. Registry hook (MCP / late registration) +**Do not** `import model_tools` during plugin `register()` — that runs +`discover_builtin_tools()` and leaks `read_terminal`. See +[`hermes-governance-execute-code-and-schema-hooks.md`](./hermes-governance-execute-code-and-schema-hooks.md). + +### 3. Registry hook (MCP / late registration + schema finalization) -```40:43:integrations/hermes/plugin/intentframe-gate/registry_hook.py +```46:48:integrations/hermes/plugin/intentframe-gate/registry_hook.py if name in governed and not getattr(handler, GATED_MARKER, False): - schema = inject_reason(schema, tool_name=name) handler = wrap_handler(name, handler, is_async=is_async) ``` -The hook **complements** preload + snapshot; it must not be the **only** path for -Hermes builtins on gateway startup. +Schema injection at `register()` time was removed — `execute_code` and other dynamic +schemas are rebuilt later by Hermes. The hook **complements** preload + snapshot; it must +not be the **only** path for Hermes builtins on gateway startup. --- @@ -727,7 +734,8 @@ catalog tool (native ALLOW/BLOCK + generic semantic smoke for e.g. `cronjob`). | `/v1/toolsets` has tool, model doesn’t call it | Config surface ≠ registry surface | [`hermes-plugin-registration-order.md`](./hermes-plugin-registration-order.md) | | Model calls tool, IntentFrame logs empty | Ungoverned at runtime | `HERMES_GOVERNANCE_YAML` path; gateway stderr `Hermes governance config:` | | Validate always BLOCK | Mapper/policy mismatch | adapter log; `doctor hermes` contract lines | -| `read_terminal` in terminal toolset | Full builtin discovery in plugin | Use selective preload only | +| `read_terminal` in terminal toolset | Full builtin discovery in plugin (`discover_builtin_tools` or `import model_tools` at register) | Selective preload only; schema hooks via `get_definitions` — see [`hermes-governance-execute-code-and-schema-hooks.md`](./hermes-governance-execute-code-and-schema-hooks.md) | +| `execute_code` missing `reason` in schema probe | Only `get_definitions` hooked, not `build_execute_code_schema` | Hermes rebuilds `execute_code` after `get_definitions`; patch the builder | | Gateway health timeout | Crash on boot | sandbox `gateway.log` | | Stale governance after edit | Process not restarted | restart adapter + gateway | | `patch replace ALLOW` fails Pass 2a (overwrite BLOCK) | Same marker/file reused across passes | Pass-unique marker + `seed_patch_replace_target` | @@ -752,6 +760,8 @@ catalog tool (native ALLOW/BLOCK + generic semantic smoke for e.g. `cronjob`). | Preload map | [`integrations/hermes/plugin/intentframe-gate/builtin_preload.py`](../integrations/hermes/plugin/intentframe-gate/builtin_preload.py) | | Gate + wrap | [`integrations/hermes/plugin/intentframe-gate/gate.py`](../integrations/hermes/plugin/intentframe-gate/gate.py) | | Registry hook | [`integrations/hermes/plugin/intentframe-gate/registry_hook.py`](../integrations/hermes/plugin/intentframe-gate/registry_hook.py) | +| Schema hooks | [`integrations/hermes/plugin/intentframe-gate/tool_definitions_hook.py`](../integrations/hermes/plugin/intentframe-gate/tool_definitions_hook.py) | +| execute_code + schema lessons | [`docs/hermes-governance-execute-code-and-schema-hooks.md`](./hermes-governance-execute-code-and-schema-hooks.md) | | Adapter mapper | [`integrations/hermes/adapter/src/hermes_adapter/mapper.py`](../integrations/hermes/adapter/src/hermes_adapter/mapper.py) | | CLI integrate | [`intentframe-integrations-cli/.../hermes_integrate.py`](../intentframe-integrations-cli/src/intentframe_integrations/hermes_integrate.py) | | Gateway env | [`intentframe-integrations-cli/.../hermes_gateway.py`](../intentframe-integrations-cli/src/intentframe_integrations/hermes_gateway.py) | diff --git a/docs/hermes-plugin-registration-order.md b/docs/hermes-plugin-registration-order.md index 6b92406..f359026 100644 --- a/docs/hermes-plugin-registration-order.md +++ b/docs/hermes-plugin-registration-order.md @@ -21,6 +21,7 @@ Related: [`hermes-intentframe-integration-guide.md`](./hermes-intentframe-integr | Fix | **`preload_governed_builtins(governed)`** then generic snapshot loop with `ctx.register_tool(..., override=True)` for each governed name. See [`builtin_preload.py`](../integrations/hermes/plugin/intentframe-gate/builtin_preload.py). | | Not the cause | Wrong yaml, reason wording, or LLM flakiness (same model + Hermes passed on old plugin). **`/v1/toolsets` showing `terminal` is not proof the LLM received it.** | | Avoid | Full `discover_builtin_tools()` in the plugin — side effects like `read_terminal` break the toolsets contract. | +| Avoid | `import model_tools` during plugin `register()` — same side effect (module-level `discover_builtin_tools()`). | Full integration / add-tool checklist: [`hermes-intentframe-integration-guide.md`](./hermes-intentframe-integration-guide.md). @@ -400,20 +401,26 @@ Unit tests: [`tests/hermes_plugin/test_builtin_preload.py`](../tests/hermes_plug [`integrations/hermes/plugin/intentframe-gate/__init__.py`](../integrations/hermes/plugin/intentframe-gate/__init__.py): -1. **`install_registry_hook()`** — gate future `registry.register` calls (MCP refresh). -2. **`preload_governed_builtins(governed)`** — import governed Hermes builtin modules - before snapshot (gateway load-order fix). -3. **Snapshot loop** — generic wrap for all governed names with `override=True`. +1. **`install_registry_hook()`** — gate future `registry.register` calls (MCP refresh); inject `reason` on `registry.get_definitions`. +2. **`preload_governed_builtins(governed)`** — import governed Hermes builtin modules before snapshot (gateway load-order fix). +3. **`install_execute_code_schema_hook()`** — inject `reason` after Hermes dynamic `execute_code` schema rebuild. +4. **Snapshot loop** — generic handler wrap for all governed names with `override=True`. | File | Role | |------|------| | [`builtin_preload.py`](../integrations/hermes/plugin/intentframe-gate/builtin_preload.py) | Preload from yaml ``builtin_module`` + selective ``importlib.import_module`` | | [`schema.py`](../integrations/hermes/plugin/intentframe-gate/schema.py) | `inject_reason()` — terminal-specific reason text branch | | [`gate.py`](../integrations/hermes/plugin/intentframe-gate/gate.py) | Validate via adapter, strip `reason`, delegate | -| [`registry_hook.py`](../integrations/hermes/plugin/intentframe-gate/registry_hook.py) | Patch `registry.register` for dynamic tools | +| [`registry_hook.py`](../integrations/hermes/plugin/intentframe-gate/registry_hook.py) | Patch `registry.register` + `registry.get_definitions` | +| [`tool_definitions_hook.py`](../integrations/hermes/plugin/intentframe-gate/tool_definitions_hook.py) | `finalize_governed_tool_schemas` + `build_execute_code_schema` hook | + +**Schema finalization:** do **not** import `model_tools` at plugin load. See +[`hermes-governance-execute-code-and-schema-hooks.md`](./hermes-governance-execute-code-and-schema-hooks.md). When adding a governed Hermes **builtin**, set ``builtin_module: tools.`` in the repo catalog template (see [`test_builtin_preload.py`](../tests/hermes_plugin/test_builtin_preload.py)). +If Hermes rebuilds the tool schema after `get_definitions` (like `execute_code`), add a +dedicated builder hook — `get_definitions` alone is not enough. --- @@ -421,7 +428,7 @@ repo catalog template (see [`test_builtin_preload.py`](../tests/hermes_plugin/te | Tool | Gateway E2E | Registration note | |------|-------------|-------------------| -| `terminal`, `write_file`, `patch`, `cronjob` | Probed when in scoped yaml | ``builtin_module`` in repo ``tools.yaml`` — preload + snapshot | +| `terminal`, `write_file`, `patch`, `cronjob`, `execute_code` | Probed when in scoped yaml | ``builtin_module`` in repo ``tools.yaml`` — preload + snapshot | Delete coverage uses `patch` V4A `*** Delete File:` ops (maps to `DELETE_HOST_FILE`). @@ -467,6 +474,7 @@ attempt 1/3; that isolates the regression to plugin registration, not the LLM. ## References - **Integration guide (add/change tools):** [`hermes-intentframe-integration-guide.md`](./hermes-intentframe-integration-guide.md) +- **execute_code + schema hooks (June 2026):** [`hermes-governance-execute-code-and-schema-hooks.md`](./hermes-governance-execute-code-and-schema-hooks.md) - Plugin README: [`integrations/hermes/plugin/intentframe-gate/README.md`](../integrations/hermes/plugin/intentframe-gate/README.md) - Gating overview: [`docs/agent-tool-gating.md`](./agent-tool-gating.md) - E2E harness: [`tests/hermes_gateway/`](../tests/hermes_gateway/), diff --git a/integrations/hermes/adapter/src/hermes_adapter/mapper.py b/integrations/hermes/adapter/src/hermes_adapter/mapper.py index b5bf7e4..268c24d 100644 --- a/integrations/hermes/adapter/src/hermes_adapter/mapper.py +++ b/integrations/hermes/adapter/src/hermes_adapter/mapper.py @@ -3,6 +3,7 @@ from __future__ import annotations import re +import shlex from dataclasses import dataclass from typing import Any, Callable, Literal @@ -106,6 +107,25 @@ def _host_file_intent( return intent +# command_shield max_command_length is 10_000; leave room for ``python -c `` wrapper. +_EXECUTE_CODE_MAX_BODY = 9500 + + +def map_execute_code(args: dict[str, Any]) -> list[IntentDict]: + """Map Hermes execute_code (Python) to RUN_COMMAND for command_shield analysis.""" + code = _require_str(args, "code") + reason = validate_reason(args.get("reason")) + body = code[:_EXECUTE_CODE_MAX_BODY] + command = f"python -c {shlex.quote(body)}" + intent = { + "action": "RUN_COMMAND", + "command": command, + "reason": reason, + "target": f"execute_code ({len(code)} chars)"[:200], + } + return [_attach_hermes_args(intent, args, frozenset({"code"}))] + + def map_terminal(args: dict[str, Any]) -> list[IntentDict]: command = _require_str(args, "command") reason = validate_reason(args.get("reason")) @@ -288,6 +308,7 @@ def map_generic(tool: str, args: dict[str, Any], *, action: str) -> list[IntentD MAPPERS: dict[str, MapperFn | GenericMapperFn] = { "terminal": map_terminal, + "execute_code": map_execute_code, "write_file": map_write_file, "patch": map_patch, } diff --git a/integrations/hermes/adapter/tests/test_adapter.py b/integrations/hermes/adapter/tests/test_adapter.py index d344623..dd83a2a 100644 --- a/integrations/hermes/adapter/tests/test_adapter.py +++ b/integrations/hermes/adapter/tests/test_adapter.py @@ -70,6 +70,49 @@ def test_map_terminal_hermes_args_remainder(self) -> None: {"background": True, "timeout": 600}, ) + def test_map_execute_code(self) -> None: + import shlex + + from hermes_adapter.mapper import map_execute_code + + code = 'print("hi")' + intents = map_execute_code({"code": code, "reason": "Run probe script"}) + self.assertEqual(len(intents), 1) + self.assertEqual(intents[0]["action"], "RUN_COMMAND") + self.assertEqual(intents[0]["command"], f"python -c {shlex.quote(code)}") + self.assertEqual(intents[0]["reason"], "Run probe script") + self.assertEqual(intents[0]["target"], "execute_code (11 chars)") + self.assertNotIn("hermes_args", intents[0]) + + def test_map_execute_code_hermes_args_remainder(self) -> None: + from hermes_adapter.mapper import map_execute_code + + intents = map_execute_code( + { + "code": 'print("ok")', + "task_id": "abc", + "reason": "Run probe script", + } + ) + intent = intents[0] + self.assertNotIn("code", intent["hermes_args"]) + self.assertEqual(intent["hermes_args"], {"task_id": "abc"}) + + def test_map_execute_code_missing_code(self) -> None: + from hermes_adapter.mapper import ValidationError, map_execute_code + + with self.assertRaises(ValidationError): + map_execute_code({"reason": "noop"}) + + def test_map_execute_code_block_shaped_code(self) -> None: + import shlex + + from hermes_adapter.mapper import map_execute_code + + code = 'import subprocess\nsubprocess.run("sudo echo intentframe-e2e-block-probe")' + intents = map_execute_code({"code": code, "reason": "Block probe"}) + self.assertEqual(intents[0]["command"], f"python -c {shlex.quote(code)}") + def test_map_write_file(self) -> None: from hermes_adapter.mapper import map_write_file @@ -254,18 +297,22 @@ def test_map_patch_v4a_mixed_write_delete(self) -> None: self.assertEqual(intents[1]["reason"], "Mixed edit [patch op 2/2: delete ~/drop.py]") def test_missing_reason(self) -> None: - from hermes_adapter.mapper import ValidationError, map_terminal + from hermes_adapter.mapper import ValidationError, map_execute_code, map_terminal with self.assertRaises(ValidationError): map_terminal({"command": "echo hi"}) + with self.assertRaises(ValidationError): + map_execute_code({"code": 'print("hi")'}) def test_supported_tools(self) -> None: from hermes_adapter.mapper import supported_tools tools = supported_tools() self.assertIn("terminal", tools) + self.assertIn("execute_code", tools) self.assertIn("write_file", tools) self.assertIn("patch", tools) + self.assertEqual(tools["execute_code"], "RUN_COMMAND") self.assertEqual(tools["write_file"], "WRITE_HOST_FILE") self.assertEqual(tools["patch"], "WRITE_HOST_FILE") @@ -302,6 +349,16 @@ def test_map_generic_cronjob(self) -> None: ) self.assertEqual(mapped, intents) + def test_map_tool_execute_code(self) -> None: + import shlex + + from hermes_adapter.mapper import map_execute_code, map_tool + + args = {"code": 'print("ok")', "reason": "Run probe script"} + mapped = map_tool("execute_code", args) + self.assertEqual(mapped, map_execute_code(args)) + self.assertEqual(mapped[0]["command"], f'python -c {shlex.quote(args["code"])}') + class TestValidateService(unittest.TestCase): def test_allow(self) -> None: diff --git a/integrations/hermes/governance/tools.yaml b/integrations/hermes/governance/tools.yaml index 8a0b923..9bae58a 100644 --- a/integrations/hermes/governance/tools.yaml +++ b/integrations/hermes/governance/tools.yaml @@ -27,6 +27,14 @@ tools: blocked_response: terminal_json builtin_module: tools.terminal_tool + execute_code: + enabled: true + action: RUN_COMMAND + risk: code_execution + mapper: execute_code # Python -> python -c … for command_shield (see mapper.py) + blocked_response: generic_json + builtin_module: tools.code_execution_tool + write_file: enabled: true action: WRITE_HOST_FILE diff --git a/integrations/hermes/plugin/intentframe-gate/README.md b/integrations/hermes/plugin/intentframe-gate/README.md index 8d383e2..67c4ac0 100644 --- a/integrations/hermes/plugin/intentframe-gate/README.md +++ b/integrations/hermes/plugin/intentframe-gate/README.md @@ -2,12 +2,14 @@ Selective IntentFrame validate-only gate for **governed** Hermes tools. +**Deep dive (June 2026 session):** [`docs/hermes-governance-execute-code-and-schema-hooks.md`](../../../docs/hermes-governance-execute-code-and-schema-hooks.md) — `execute_code` governance, schema hook architecture, `read_terminal` lessons, what we did **not** do. + ## What “governed” means A tool is **governed** when it appears in `governance/tools.yaml` with `enabled: true`. The plugin then: -1. Injects required `reason` into the tool schema +1. Injects required `reason` into the final model-facing tool schema 2. Validates via adapter before delegating to Hermes `enabled: false` (or absent from the runtime governed set) means Hermes runs the @@ -24,34 +26,47 @@ Configured in `integrations/hermes/governance/tools.yaml` (runtime copy under `~/.intentframe/integrations/hermes/governance/tools.yaml`): - `terminal` → `RUN_COMMAND` +- `execute_code` → `RUN_COMMAND` (Python encoded as `python -c …` for `command_shield`) - `write_file`, `patch` (update/add) → `WRITE_HOST_FILE` - `patch` (V4A delete) → `DELETE_HOST_FILE` +- `cronjob` → `HERMES_CRONJOB` (generic mapper) Reads and helpers stay ungoverned unless added to the contract explicitly. ## Architecture -For each **governed** tool: +Two independent layers per governed tool: -1. Schema injects required `reason` (layer 1) -2. Handler validates via adapter before delegating to Hermes (layer 2) -3. Adapter maps tool args to IntentFrame action(s) and calls the bridge +| Layer | When | What | +|-------|------|------| +| **Schema** | `registry.get_definitions` (+ `build_execute_code_schema` for dynamic rebuild) | Model sees required `reason` | +| **Execution** | Snapshot wrap + `registry.register` hook | Validate, strip `reason`, delegate to Hermes | At plugin load (`register()`): -1. `install_registry_hook()` — gate future `registry.register` (MCP refresh) -2. `preload_governed_builtins(governed)` — selective Hermes module import -3. Snapshot loop — wrap governed registry entries with `override=True` +1. `install_registry_hook()` — wrap handlers on future `registry.register`; inject `reason` on `registry.get_definitions` +2. `preload_governed_builtins(governed)` — selective Hermes module import from yaml `builtin_module` +3. `install_execute_code_schema_hook()` — `reason` after Hermes dynamic `execute_code` schema rebuild +4. Snapshot loop — wrap governed handlers with `override=True` (schema stays `entry.schema`; finalization is on get_definitions path) + +### Critical: never `import model_tools` during `register()` + +Hermes runs `discover_builtin_tools()` at `model_tools` import time, which registers +extras like desktop-only `read_terminal` into the `terminal` toolset. That breaks +the pinned `GET /v1/toolsets` contract (`['process', 'terminal']` only). + +We **rejected** wrapping `model_tools.get_tool_definitions` at plugin load for this reason. +Schema finalization uses registry composition hooks instead. See the deep-dive doc above. On gateway startup, plugins load **before** Hermes builtins. [`builtin_preload.py`](builtin_preload.py) -imports ``builtin_module`` from each **enabled** governed tool in the dev-owned -``governance/tools.yaml`` so the snapshot loop can wrap them without calling full -``discover_builtin_tools()`` (which would pull in extras like ``read_terminal``). -Details: [`docs/hermes-plugin-registration-order.md`](../../../docs/hermes-plugin-registration-order.md). +imports ``builtin_module`` from each **enabled** governed tool only — not full +``discover_builtin_tools()``. + +When adding a governed Hermes builtin: -When adding a governed Hermes builtin, set ``builtin_module: tools.`` in the -repo template and extend -[`tests/hermes_plugin/test_builtin_preload.py`](../../../tests/hermes_plugin/test_builtin_preload.py). +1. Set ``builtin_module: tools.`` in the repo catalog template. +2. Extend [`tests/hermes_plugin/test_builtin_preload.py`](../../../tests/hermes_plugin/test_builtin_preload.py). +3. If Hermes rebuilds the tool schema after `get_definitions` (like `execute_code`), add a builder hook — do not rely on `get_definitions` alone. ## Env @@ -73,3 +88,13 @@ plugins: ``` This loads the plugin; per-tool governance is controlled in `governance/tools.yaml`. + +## Verification + +```bash +# Unit — includes test_install_registry_hook_does_not_import_model_tools +.venv/bin/python tests/hermes_plugin/test_registry_hook.py + +# Live — toolsets + schema probe + OpenAI tools= payload +RUN_HERMES_GATEWAY_TOOLSETS=1 ./tests/scripts/test-hermes-gateway-toolsets.sh +``` diff --git a/integrations/hermes/plugin/intentframe-gate/__init__.py b/integrations/hermes/plugin/intentframe-gate/__init__.py index af12ae7..f260306 100644 --- a/integrations/hermes/plugin/intentframe-gate/__init__.py +++ b/integrations/hermes/plugin/intentframe-gate/__init__.py @@ -6,13 +6,22 @@ from .gate import wrap_handler from .governance_loader import load_governed_tools from .registry_hook import install_registry_hook -from .schema import inject_reason +from .tool_definitions_hook import install_execute_code_schema_hook PLUGIN_NAME = "intentframe-gate" def register(ctx) -> None: - """Wrap governed tools and hook future registry registrations.""" + """Wrap governed tools and hook future registry registrations. + + Load order matters — see ``docs/hermes-governance-execute-code-and-schema-hooks.md``: + + 1. Registry hooks (handler gate + get_definitions schema finalization). + Must NOT import ``model_tools`` (triggers full ``discover_builtin_tools()``). + 2. Selective preload of governed ``builtin_module`` entries only. + 3. ``execute_code`` builder hook (dynamic schema rebuild path). + 4. Snapshot handler wrap — schemas finalized on get_definitions, not here. + """ from tools.registry import registry install_registry_hook() @@ -20,6 +29,7 @@ def register(ctx) -> None: governed_tools = load_governed_tools() governed = frozenset(governed_tools) preload_governed_builtins(governed_tools) + install_execute_code_schema_hook() for entry in registry._snapshot_entries(): if entry.name not in governed: @@ -27,7 +37,7 @@ def register(ctx) -> None: ctx.register_tool( name=entry.name, toolset=entry.toolset, - schema=inject_reason(entry.schema, tool_name=entry.name), + schema=entry.schema, handler=wrap_handler(entry.name, entry.handler, is_async=entry.is_async), check_fn=entry.check_fn, is_async=entry.is_async, diff --git a/integrations/hermes/plugin/intentframe-gate/governance_loader.py b/integrations/hermes/plugin/intentframe-gate/governance_loader.py index a3eb67f..61e788d 100644 --- a/integrations/hermes/plugin/intentframe-gate/governance_loader.py +++ b/integrations/hermes/plugin/intentframe-gate/governance_loader.py @@ -11,7 +11,7 @@ import yaml VALID_BLOCKED_RESPONSES = frozenset({"terminal_json", "generic_json"}) -VALID_MAPPER_KINDS = frozenset({"terminal", "write_file", "patch", "generic"}) +VALID_MAPPER_KINDS = frozenset({"terminal", "write_file", "patch", "generic", "execute_code"}) BUILTIN_MODULE_PREFIX = "tools." diff --git a/integrations/hermes/plugin/intentframe-gate/registry_hook.py b/integrations/hermes/plugin/intentframe-gate/registry_hook.py index e6bd97a..08f462e 100644 --- a/integrations/hermes/plugin/intentframe-gate/registry_hook.py +++ b/integrations/hermes/plugin/intentframe-gate/registry_hook.py @@ -1,24 +1,40 @@ -"""Auto-gate governed tools on every registry registration (including MCP refresh).""" +"""Auto-gate governed tools on every registry registration (including MCP refresh). + +Also finalizes model-facing schemas on ``registry.get_definitions`` (injects required +``reason`` for governed tools). + +Do **not** import ``model_tools`` here — Hermes runs ``discover_builtin_tools()`` at +``model_tools`` import time, which leaks desktop-only tools like ``read_terminal`` into +the terminal toolset before the gateway's intended lazy load order. + +See ``docs/hermes-governance-execute-code-and-schema-hooks.md``. +""" from __future__ import annotations import logging -from typing import Callable +from typing import Any, Callable from .gate import GATED_MARKER, wrap_handler from .governance_loader import governed_tool_names -from .schema import inject_reason +from .tool_definitions_hook import finalize_governed_tool_schemas logger = logging.getLogger(__name__) -_PATCHED_ATTR = "_intentframe_register_patched" +_REGISTER_PATCHED_ATTR = "_intentframe_register_patched" +_GET_DEFINITIONS_PATCHED_ATTR = "_intentframe_get_definitions_patched" def install_registry_hook() -> None: from tools.registry import registry + _install_register_hook(registry) + _install_get_definitions_hook(registry) + + +def _install_register_hook(registry: Any) -> None: original: Callable = registry.register - if getattr(original, _PATCHED_ATTR, False): + if getattr(original, _REGISTER_PATCHED_ATTR, False): return governed = governed_tool_names() @@ -37,8 +53,9 @@ def patched_register( dynamic_schema_overrides: Callable = None, override: bool = False, ): + # Schema finalization is on get_definitions / builder hooks — not here. + # execute_code schema is rebuilt by Hermes after get_definitions anyway. if name in governed and not getattr(handler, GATED_MARKER, False): - schema = inject_reason(schema, tool_name=name) handler = wrap_handler(name, handler, is_async=is_async) logger.debug("IntentFrame gate applied to registry registration: %s", name) return original( @@ -56,5 +73,18 @@ def patched_register( override=override, ) - setattr(patched_register, _PATCHED_ATTR, True) + setattr(patched_register, _REGISTER_PATCHED_ATTR, True) registry.register = patched_register # type: ignore[method-assign] + + +def _install_get_definitions_hook(registry: Any) -> None: + """Inject ``reason`` into governed tool schemas on the LLM payload path.""" + original: Callable[..., list[dict[str, Any]]] = registry.get_definitions + if getattr(original, _GET_DEFINITIONS_PATCHED_ATTR, False): + return + + def patched_get_definitions(*args: Any, **kwargs: Any) -> list[dict[str, Any]]: + return finalize_governed_tool_schemas(original(*args, **kwargs)) + + setattr(patched_get_definitions, _GET_DEFINITIONS_PATCHED_ATTR, True) + registry.get_definitions = patched_get_definitions # type: ignore[method-assign] diff --git a/integrations/hermes/plugin/intentframe-gate/tool_definitions_hook.py b/integrations/hermes/plugin/intentframe-gate/tool_definitions_hook.py new file mode 100644 index 0000000..7fd11f1 --- /dev/null +++ b/integrations/hermes/plugin/intentframe-gate/tool_definitions_hook.py @@ -0,0 +1,80 @@ +"""Finalize governed tool schemas on Hermes schema composition paths. + +Two hook points (see ``docs/hermes-governance-execute-code-and-schema-hooks.md``): + +1. ``finalize_governed_tool_schemas`` — called from patched ``registry.get_definitions`` + for terminal, write_file, patch, cronjob, etc. + +2. ``install_execute_code_schema_hook`` — patches ``build_execute_code_schema`` because + Hermes rebuilds ``execute_code`` *after* ``get_definitions`` inside + ``model_tools._compute_tool_definitions``, which would wipe registry-time ``reason``. + +We intentionally do **not** wrap ``model_tools.get_tool_definitions`` at plugin load — +importing ``model_tools`` runs module-level ``discover_builtin_tools()`` and registers +extras like ``read_terminal``. +""" + +from __future__ import annotations + +import logging +from typing import Any, Callable + +from .governance_loader import governed_tool_names +from .schema import inject_reason + +logger = logging.getLogger(__name__) + +_EXECUTE_CODE_SCHEMA_PATCHED_ATTR = "_intentframe_execute_code_schema_patched" + + +def finalize_governed_tool_schemas(tool_defs: list[dict[str, Any]]) -> list[dict[str, Any]]: + """Ensure every governed tool in *tool_defs* exposes required ``reason``.""" + governed = governed_tool_names() + if not governed: + return tool_defs + + finalized: list[dict[str, Any]] = [] + for tool_def in tool_defs: + if not isinstance(tool_def, dict) or tool_def.get("type") != "function": + finalized.append(tool_def) + continue + + function = tool_def.get("function") + if not isinstance(function, dict): + finalized.append(tool_def) + continue + + name = function.get("name") + if not isinstance(name, str) or name not in governed: + finalized.append(tool_def) + continue + + finalized.append({ + **tool_def, + "function": inject_reason(function, tool_name=name), + }) + logger.debug("IntentFrame gate finalized governed tool schema: %s", name) + return finalized + + +def install_execute_code_schema_hook() -> None: + """Inject ``reason`` after Hermes rebuilds the dynamic execute_code schema. + + Safe to patch this builder: it returns an LLM schema dict only; ``execute_code()`` + reads ``args['code']`` and ``wrap_handler`` strips ``reason`` before delegation. + Call after ``preload_governed_builtins`` so ``tools.code_execution_tool`` is loaded. + """ + if "execute_code" not in governed_tool_names(): + return + + from tools import code_execution_tool + + original: Callable[..., dict[str, Any]] = code_execution_tool.build_execute_code_schema + if getattr(original, _EXECUTE_CODE_SCHEMA_PATCHED_ATTR, False): + return + + def patched_build_execute_code_schema(*args: Any, **kwargs: Any) -> dict[str, Any]: + return inject_reason(original(*args, **kwargs), tool_name="execute_code") + + setattr(patched_build_execute_code_schema, _EXECUTE_CODE_SCHEMA_PATCHED_ATTR, True) + code_execution_tool.build_execute_code_schema = patched_build_execute_code_schema # type: ignore[method-assign] diff --git a/integrations/hermes/shared/src/hermes_governance/loader.py b/integrations/hermes/shared/src/hermes_governance/loader.py index 981bf05..59187ae 100644 --- a/integrations/hermes/shared/src/hermes_governance/loader.py +++ b/integrations/hermes/shared/src/hermes_governance/loader.py @@ -12,7 +12,7 @@ import yaml VALID_BLOCKED_RESPONSES = frozenset({"terminal_json", "generic_json"}) -VALID_MAPPER_KINDS = frozenset({"terminal", "write_file", "patch", "generic"}) +VALID_MAPPER_KINDS = frozenset({"terminal", "write_file", "patch", "generic", "execute_code"}) BUILTIN_MODULE_PREFIX = "tools." diff --git a/integrations/hermes/shared/tests/test_governance.py b/integrations/hermes/shared/tests/test_governance.py index af7d186..a2e4d6a 100644 --- a/integrations/hermes/shared/tests/test_governance.py +++ b/integrations/hermes/shared/tests/test_governance.py @@ -50,6 +50,7 @@ def test_builtin_module_on_catalog_tools(self) -> None: self.assertEqual(catalog["write_file"].builtin_module, "tools.file_tools") self.assertEqual(catalog["patch"].builtin_module, "tools.file_tools") self.assertEqual(catalog["cronjob"].builtin_module, "tools.cronjob_tools") + self.assertEqual(catalog["execute_code"].builtin_module, "tools.code_execution_tool") def test_invalid_builtin_module_prefix_raises(self) -> None: from hermes_governance.loader import load_tool_catalog diff --git a/tests/hermes_adapter/live_fixtures.py b/tests/hermes_adapter/live_fixtures.py index a8eb378..e3000cc 100644 --- a/tests/hermes_adapter/live_fixtures.py +++ b/tests/hermes_adapter/live_fixtures.py @@ -11,6 +11,8 @@ from hermes_tool_probes import ( # noqa: E402 cronjob_semantic_args, + execute_code_allow_args, + execute_code_block_args, patch_replace_allow_args, patch_replace_block_args, patch_v4a_block_args, @@ -28,6 +30,8 @@ PATCH_V4A_MIXED_HOME_DELETE_ARGS = patch_v4a_mixed_home_delete_args(marker=_LIVE_MARKER) PATCH_V4A_BLOCK_ARGS = patch_v4a_block_args(marker=_LIVE_MARKER) CRONJOB_SEMANTIC_ARGS = cronjob_semantic_args() +EXECUTE_CODE_ALLOW_ARGS = execute_code_allow_args(marker=_LIVE_MARKER) +EXECUTE_CODE_BLOCK_ARGS = execute_code_block_args() # Back-compat aliases PATCH_V4A_MIXED_ALLOW_ARGS = PATCH_V4A_MIXED_HOME_DELETE_ARGS diff --git a/tests/hermes_adapter/test_live.py b/tests/hermes_adapter/test_live.py index 8ddc8e2..92ee3e5 100644 --- a/tests/hermes_adapter/test_live.py +++ b/tests/hermes_adapter/test_live.py @@ -19,6 +19,8 @@ from intentframe_validation_helpers import assert_adapter_semantic_validate # noqa: E402 from live_fixtures import ( # noqa: E402 CRONJOB_SEMANTIC_ARGS, + EXECUTE_CODE_ALLOW_ARGS, + EXECUTE_CODE_BLOCK_ARGS, PATCH_ALLOW_REPLACE_ARGS, PATCH_BLOCK_REPLACE_ARGS, PATCH_V4A_BLOCK_ARGS, @@ -76,6 +78,15 @@ def test_block_terminal(self) -> None: self.assertEqual(agent_response.get("exit_code"), -1) self.assertEqual(agent_response.get("status"), "blocked") + def test_execute_code_semantic(self) -> None: + body = self._validate_tool("execute_code", EXECUTE_CODE_ALLOW_ARGS) + assert_adapter_semantic_validate(body) + + def test_block_execute_code(self) -> None: + body = self._validate_tool("execute_code", EXECUTE_CODE_BLOCK_ARGS) + self.assertFalse(body["allowed"]) + self.assertIn("agent_response", body) + def test_allow_write_file(self) -> None: body = self._validate_tool("write_file", WRITE_ALLOW_ARGS) self.assertTrue(body["allowed"]) diff --git a/tests/hermes_gateway/README.md b/tests/hermes_gateway/README.md index a454c92..57ae84f 100644 --- a/tests/hermes_gateway/README.md +++ b/tests/hermes_gateway/README.md @@ -260,7 +260,8 @@ noise; still sends the full `tools=` list upstream). The registry count and toolsets count differ by design — not every listed toolset name becomes a registry definition on the LLM path. See -[`docs/hermes-intentframe-integration-guide.md`](../../docs/hermes-intentframe-integration-guide.md#two-different-tool-surfaces-do-not-conflate). +[`docs/hermes-intentframe-integration-guide.md`](../../docs/hermes-intentframe-integration-guide.md#two-different-tool-surfaces-do-not-conflate) +and [`docs/hermes-governance-execute-code-and-schema-hooks.md`](../../docs/hermes-governance-execute-code-and-schema-hooks.md). ### Assertions @@ -311,5 +312,7 @@ These were gaps between production behavior and what the toolsets live harness a | **Partial governed coverage** | Toolsets test skipped `cronjob` while production governs it | Probe and provider dump used `gateway_e2e_probe_tool_names()` (native E2E tier only) | Probe uses `governed_tool_names()`; live test asserts `template_governed_tool_names()` on the request dump | | **Preload map drift** | Adding a governed builtin required editing a hardcoded Python dict | `GOVERNED_BUILTIN_MODULES` lived in `builtin_preload.py`, separate from `tools.yaml` | `builtin_module: tools.` per tool in repo `tools.yaml`; plugin preload imports enabled specs; shared + plugin loaders validate `tools.` prefix | | **`cronjob` schema probe failure** | Probe reported `cronjob missing from get_tool_definitions()` despite yaml preload | Preload registered `cronjob`, but Hermes `check_cronjob_requirements()` filters it unless `HERMES_GATEWAY_SESSION=1` (or interactive/exec env); probe subprocess lacked that env while the gateway had it | `_run_schema_probe()` sets `probe_env["HERMES_GATEWAY_SESSION"] = "1"` to mirror the running gateway | +| **`read_terminal` in toolsets live test** | `terminal` toolset included `read_terminal` | Plugin imported `model_tools` at register → module-level `discover_builtin_tools()` | Schema hooks via `registry.get_definitions` + `build_execute_code_schema`; never import `model_tools` at plugin load — see [`hermes-governance-execute-code-and-schema-hooks.md`](../../docs/hermes-governance-execute-code-and-schema-hooks.md) | +| **`execute_code` missing `reason` in probe** | Dynamic schema rebuild after `get_definitions` | Hermes `_compute_tool_definitions` calls `build_execute_code_schema` after registry schemas | Patch `build_execute_code_schema` to run `inject_reason` on its return value | Guarded by `test_governed_tool_coverage.py` (`test_toolsets_live_verifies_full_governed_catalog`) and loader parity in `tests/hermes_plugin/test_gate.py` (`builtin_module` must match between plugin and shared loaders). diff --git a/tests/hermes_gateway/api_client.py b/tests/hermes_gateway/api_client.py index 37cce95..155851e 100644 --- a/tests/hermes_gateway/api_client.py +++ b/tests/hermes_gateway/api_client.py @@ -16,6 +16,7 @@ sys.path.insert(0, str(_TESTS_DIR)) from hermes_tool_probes import ( # noqa: E402 + execute_code_block_args, patch_replace_allow_args, patch_replace_block_args, patch_v4a_block_args, @@ -327,6 +328,104 @@ def run_block_once(*, host: str, port: int, api_key: str) -> TerminalCall: return assert_block_response(body) +def assert_execute_code_allow(body: dict[str, Any], *, marker: str) -> ToolCall: + calls = extract_tool_calls(body, tool_name="execute_code") + if not calls: + raise AssertionError( + f"No execute_code function_call in response: {json.dumps(body)[:2000]}" + ) + + call = calls[-1] + code = str(call.arguments.get("code", "")) + if marker not in code and marker not in call.output: + raise AssertionError( + f"Allow test missing marker {marker!r} in code={code!r} output={call.output!r}" + ) + if _looks_blocked(call.output): + raise AssertionError(f"Expected allow but got blocked output: {call.output!r}") + return call + + +def assert_execute_code_block(body: dict[str, Any]) -> ToolCall: + calls = extract_tool_calls(body, tool_name="execute_code") + if not calls: + raise AssertionError( + f"No execute_code function_call in block response: {json.dumps(body)[:2000]}" + ) + + call = calls[-1] + code = str(call.arguments.get("code", "")).lower() + if "sudo" not in code: + raise AssertionError(f"Block test code unexpected: {code!r}") + if not _looks_blocked(call.output): + raise AssertionError(f"Expected blocked tool output, got: {call.output!r}") + return call + + +def run_execute_code_allow_with_retries( + *, + host: str, + port: int, + api_key: str, + marker: str, + attempts: int = 3, +) -> ToolCall: + from cli_runner import step + + code = f"print({marker!r})" + instructions = ( + "You are running an automated integration test. " + "You MUST call the execute_code tool exactly once and no other tools." + ) + last_error: Exception | None = None + for attempt in range(1, attempts + 1): + step(f"POST /v1/responses execute_code ALLOW (attempt {attempt}/{attempts})") + prompt = ( + f"Call the execute_code tool exactly once with code: {code} " + f"and reason: 'IntentFrame E2E execute_code allow {attempt}'. " + "Do not explain; just run the tool." + ) + try: + body = post_responses( + host=host, + port=port, + api_key=api_key, + prompt=prompt, + instructions=instructions, + ) + return assert_execute_code_allow(body, marker=marker) + except (AssertionError, RuntimeError) as exc: + last_error = exc + raise AssertionError( + f"execute_code allow test failed after {attempts} attempts: {last_error}" + ) from last_error + + +def run_execute_code_block_once(*, host: str, port: int, api_key: str) -> ToolCall: + from cli_runner import step + + step("POST /v1/responses execute_code BLOCK (policy should deny sudo in code)") + args = execute_code_block_args(reason="IntentFrame E2E execute_code block test") + code = args["code"] + prompt = ( + "Call the execute_code tool exactly once with this exact code and no other tools:\n" + f"{code}\n" + f"Use reason: '{args['reason']}'." + ) + instructions = ( + "You are running an automated integration test. " + "You MUST call the execute_code tool exactly once and no other tools." + ) + body = post_responses( + host=host, + port=port, + api_key=api_key, + prompt=prompt, + instructions=instructions, + ) + return assert_execute_code_block(body) + + def assert_write_file_allow(body: dict[str, Any], *, marker: str) -> ToolCall: calls = extract_tool_calls(body, tool_name="write_file") if not calls: diff --git a/tests/hermes_gateway/probe_hermes_tool_schemas.py b/tests/hermes_gateway/probe_hermes_tool_schemas.py index ce15299..f4725d9 100644 --- a/tests/hermes_gateway/probe_hermes_tool_schemas.py +++ b/tests/hermes_gateway/probe_hermes_tool_schemas.py @@ -96,7 +96,7 @@ def main() -> int: if entry and not gated: errors.append(f"{tool_name}: registry handler missing intentframe gate marker") - for distractor in ("vision_analyze", "execute_code", "skill_manage"): + for distractor in ("vision_analyze", "skill_manage"): fn = by_name.get(distractor) if fn is None: errors.append(f"expected distractor {distractor!r} on api_server surface") diff --git a/tests/hermes_gateway/test_gateway_e2e.py b/tests/hermes_gateway/test_gateway_e2e.py index 571c2d2..179976f 100644 --- a/tests/hermes_gateway/test_gateway_e2e.py +++ b/tests/hermes_gateway/test_gateway_e2e.py @@ -36,6 +36,8 @@ get_toolsets, run_allow_with_retries, run_block_once, + run_execute_code_allow_with_retries, + run_execute_code_block_once, run_patch_replace_allow_with_retries, run_patch_replace_block_once, run_patch_v4a_mixed_block_once, @@ -209,6 +211,20 @@ def _run_api_allow_block(env: IsolatedEnv, *, label: str) -> None: step(f"{label}: POST /v1/responses BLOCK (policy should deny sudo)") run_block_once(host=API_HOST, port=env.api_port, api_key=env.api_key) + if "execute_code" in governed: + probes_ran.add("execute_code") + exec_marker = f"intentframe-hermes-exec-ok-{env.run_id}-{pass_slug}" + step(f"{label}: POST /v1/responses execute_code ALLOW") + run_execute_code_allow_with_retries( + host=API_HOST, + port=env.api_port, + api_key=env.api_key, + marker=exec_marker, + ) + + step(f"{label}: POST /v1/responses execute_code BLOCK (policy should deny sudo in code)") + run_execute_code_block_once(host=API_HOST, port=env.api_port, api_key=env.api_key) + if "write_file" in governed: probes_ran.add("write_file") write_marker = f"intentframe-hermes-write-ok-{env.run_id}-{pass_slug}" diff --git a/tests/hermes_gateway/test_provider_request_contract.py b/tests/hermes_gateway/test_provider_request_contract.py index 8fe22d1..fbf53d7 100644 --- a/tests/hermes_gateway/test_provider_request_contract.py +++ b/tests/hermes_gateway/test_provider_request_contract.py @@ -166,10 +166,10 @@ def test_format_provider_tools_snapshot(self) -> None: "model": "gpt-4o-mini", "tools": [ _tool("terminal"), - _tool("execute_code", reason_required=False), + _tool("execute_code"), ], } - governed = frozenset({"terminal"}) + governed = frozenset({"terminal", "execute_code"}) text = format_provider_tools_snapshot( body, governed, @@ -178,9 +178,8 @@ def test_format_provider_tools_snapshot(self) -> None: self.assertIn("model='gpt-4o-mini'", text) self.assertIn("request_dump=/tmp/dump.json", text) self.assertIn("terminal [governed, reason_required=True]", text) - self.assertIn("execute_code", text) - self.assertNotIn("execute_code [governed", text) - self.assertIn("['terminal']", text) + self.assertIn("execute_code [governed, reason_required=True]", text) + self.assertIn("['execute_code', 'terminal']", text) def test_tool_reason_required(self) -> None: fn = _tool("terminal")["function"] diff --git a/tests/hermes_gateway/toolsets_contract.py b/tests/hermes_gateway/toolsets_contract.py index f1da684..262515e 100644 --- a/tests/hermes_gateway/toolsets_contract.py +++ b/tests/hermes_gateway/toolsets_contract.py @@ -1,7 +1,14 @@ """Contract for GET /v1/toolsets after intentframe-gate integration. -Validates the Hermes api_server tool *name* surface the LLM can choose from. -Names only — full JSON schemas are probed separately via ``probe_hermes_tool_schemas.py``. +Validates the Hermes api_server tool *name* surface at gateway startup (before lazy +``model_tools`` import). Names only — full JSON schemas are probed separately via +``probe_hermes_tool_schemas.py``; OpenAI ``tools=`` payload via +``provider_request_contract.py``. + +Strict shape (e.g. ``terminal: {terminal, process}`` without ``read_terminal``) is a +canary for accidental full ``discover_builtin_tools()`` during plugin load. Ungoverned +tools like ``vision_analyze`` are *expected* on this surface; ``read_terminal`` is not. +See ``docs/hermes-governance-execute-code-and-schema-hooks.md``. """ from __future__ import annotations @@ -23,7 +30,7 @@ # Ungoverned tools that explain LLM tool-selection noise in gateway E2E. UNGATED_DISTRACTOR_TOOLS = frozenset( - {"vision_analyze", "execute_code", "skill_manage"} + {"vision_analyze", "skill_manage"} ) # Individual toolsets that must be enabled when api_server uses the default composite. diff --git a/tests/hermes_governance_fixtures.py b/tests/hermes_governance_fixtures.py index 8ff8da3..6596421 100644 --- a/tests/hermes_governance_fixtures.py +++ b/tests/hermes_governance_fixtures.py @@ -27,6 +27,9 @@ GATEWAY_E2E_PROBE_SYMBOLS: dict[str, frozenset[str]] = { "terminal": frozenset({"run_allow_with_retries", "run_block_once"}), + "execute_code": frozenset( + {"run_execute_code_allow_with_retries", "run_execute_code_block_once"} + ), "write_file": frozenset( {"run_write_file_allow_with_retries", "run_write_file_block_once"} ), diff --git a/tests/hermes_plugin/test_bridge_gate_live.py b/tests/hermes_plugin/test_bridge_gate_live.py index 31a3edc..46f96a2 100644 --- a/tests/hermes_plugin/test_bridge_gate_live.py +++ b/tests/hermes_plugin/test_bridge_gate_live.py @@ -18,6 +18,8 @@ from _loader import load_plugin_module # noqa: E402 from live_fixtures import ( # noqa: E402 CRONJOB_SEMANTIC_ARGS, + EXECUTE_CODE_ALLOW_ARGS, + EXECUTE_CODE_BLOCK_ARGS, PATCH_ALLOW_REPLACE_ARGS, PATCH_BLOCK_REPLACE_ARGS, PATCH_V4A_BLOCK_ARGS, @@ -86,6 +88,14 @@ def test_block_terminal(self) -> None: self.assertEqual(body["status"], "blocked") delegate.assert_not_called() + def test_execute_code_semantic(self) -> None: + delegate = MagicMock(return_value='{"status": "ok"}') + self._assert_semantic_gate("execute_code", EXECUTE_CODE_ALLOW_ARGS, delegate=delegate) + + def test_block_execute_code(self) -> None: + delegate = MagicMock() + self._assert_blocked("execute_code", EXECUTE_CODE_BLOCK_ARGS, delegate=delegate) + def test_allow_write_file(self) -> None: delegate = MagicMock(return_value='{"status": "ok"}') self._assert_allowed("write_file", WRITE_ALLOW_ARGS, delegate=delegate) diff --git a/tests/hermes_plugin/test_gate.py b/tests/hermes_plugin/test_gate.py index 5ff3868..353cbae 100644 --- a/tests/hermes_plugin/test_gate.py +++ b/tests/hermes_plugin/test_gate.py @@ -5,6 +5,7 @@ import json import sys +import types import unittest from pathlib import Path from typing import Any @@ -22,6 +23,7 @@ schema_mod = load_plugin_module("schema") gate_mod = load_plugin_module("gate") governance_mod = load_plugin_module("governance_loader") +tool_defs_mod = load_plugin_module("tool_definitions_hook") class FakeValidator: @@ -75,6 +77,95 @@ def test_inject_reason_idempotent(self) -> None: ) +class TestToolDefinitionsHook(PluginGovernanceEnvMixin, unittest.TestCase): + def test_finalize_governed_tool_schemas_injects_reason_after_dynamic_rebuild(self) -> None: + tool_defs = [ + { + "type": "function", + "function": { + "name": "execute_code", + "description": "Run Python", + "parameters": { + "type": "object", + "properties": {"code": {"type": "string"}}, + "required": ["code"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "vision_analyze", + "parameters": { + "type": "object", + "properties": {"image": {"type": "string"}}, + "required": ["image"], + }, + }, + }, + ] + + finalized = tool_defs_mod.finalize_governed_tool_schemas(tool_defs) + + execute_fn = finalized[0]["function"] + self.assertIn("reason", execute_fn["parameters"]["properties"]) + self.assertIn("reason", execute_fn["parameters"]["required"]) + + distractor_fn = finalized[1]["function"] + self.assertNotIn("reason", distractor_fn["parameters"]["properties"]) + + def test_finalize_governed_tool_schemas_is_idempotent(self) -> None: + tool_defs = [ + { + "type": "function", + "function": { + "name": "terminal", + "parameters": { + "type": "object", + "properties": { + "command": {"type": "string"}, + "reason": {"type": "string"}, + }, + "required": ["command", "reason"], + }, + }, + } + ] + + once = tool_defs_mod.finalize_governed_tool_schemas(tool_defs) + twice = tool_defs_mod.finalize_governed_tool_schemas(once) + + required = twice[0]["function"]["parameters"]["required"] + self.assertEqual(required.count("reason"), 1) + + def test_execute_code_schema_hook_injects_reason(self) -> None: + code_exec_mod = types.ModuleType("tools.code_execution_tool") + + def original_build_execute_code_schema(*args: Any, **kwargs: Any) -> dict[str, Any]: + del args, kwargs + return { + "name": "execute_code", + "description": "Run Python", + "parameters": { + "type": "object", + "properties": {"code": {"type": "string"}}, + "required": ["code"], + }, + } + + code_exec_mod.build_execute_code_schema = original_build_execute_code_schema + tools_mod = types.ModuleType("tools") + tools_mod.code_execution_tool = code_exec_mod + sys.modules["tools"] = tools_mod + sys.modules["tools.code_execution_tool"] = code_exec_mod + + tool_defs_mod.install_execute_code_schema_hook() + built = code_exec_mod.build_execute_code_schema({"terminal"}) + + self.assertIn("reason", built["parameters"]["properties"]) + self.assertIn("reason", built["parameters"]["required"]) + + class TestPluginGovernance(PluginGovernanceEnvMixin, unittest.TestCase): def test_plugin_loader_matches_shared_template(self) -> None: ensure_shared_loader_importable() diff --git a/tests/hermes_plugin/test_registry_hook.py b/tests/hermes_plugin/test_registry_hook.py index 1e49552..78e99cc 100644 --- a/tests/hermes_plugin/test_registry_hook.py +++ b/tests/hermes_plugin/test_registry_hook.py @@ -18,8 +18,6 @@ registry_hook_mod = load_plugin_module("registry_hook") gate_mod = load_plugin_module("gate") -schema_mod = load_plugin_module("schema") -governance_mod = load_plugin_module("governance_loader") class RegistryEntry: @@ -44,6 +42,20 @@ class FakeRegistry: def __init__(self) -> None: self.entries: dict[str, RegistryEntry] = {} self.register_calls = 0 + self.get_definitions_calls = 0 + + def get_definitions(self, tool_names, *, quiet=False): + del quiet + self.get_definitions_calls += 1 + selected = tool_names if tool_names is not None else self.entries.keys() + return [ + { + "type": "function", + "function": self.entries[name].schema, + } + for name in selected + if name in self.entries + ] def register( self, @@ -101,7 +113,7 @@ def original(args: dict[str, Any], **kw: Any) -> str: ) entry = registry.entries["write_file"] - self.assertIn("reason", entry.schema["parameters"]["properties"]) + self.assertNotIn("reason", entry.schema["parameters"]["properties"]) self.assertTrue(getattr(entry.handler, gate_mod.GATED_MARKER, False)) def test_refresh_reregistration_stays_gated(self) -> None: @@ -123,11 +135,63 @@ def refreshed(args: dict[str, Any], **kw: Any) -> str: registry.register("write_file", "file", schema, refreshed, override=True) entry = registry.entries["write_file"] - self.assertIn("reason", entry.schema["parameters"]["properties"]) + self.assertNotIn("reason", entry.schema["parameters"]["properties"]) self.assertTrue(getattr(entry.handler, gate_mod.GATED_MARKER, False)) self.assertNotEqual(entry.handler, original) self.assertNotEqual(entry.handler, refreshed) + def test_install_registry_hook_does_not_import_model_tools(self) -> None: + registry = FakeRegistry() + registry_mod = types.ModuleType("tools.registry") + registry_mod.registry = registry + sys.modules["tools.registry"] = registry_mod + + self.assertNotIn("model_tools", sys.modules) + registry_hook_mod.install_registry_hook() + self.assertNotIn("model_tools", sys.modules) + + def test_get_definitions_injects_reason_for_governed_tools(self) -> None: + registry = FakeRegistry() + registry.entries["write_file"] = RegistryEntry( + name="write_file", + handler=lambda args, **kw: "ok", + schema={ + "name": "write_file", + "parameters": { + "type": "object", + "properties": {"path": {"type": "string"}}, + "required": ["path"], + }, + }, + ) + registry.entries["vision_analyze"] = RegistryEntry( + name="vision_analyze", + handler=lambda args, **kw: "ok", + schema={ + "name": "vision_analyze", + "parameters": { + "type": "object", + "properties": {"image": {"type": "string"}}, + "required": ["image"], + }, + }, + ) + + registry_mod = types.ModuleType("tools.registry") + registry_mod.registry = registry + sys.modules["tools.registry"] = registry_mod + + registry_hook_mod.install_registry_hook() + tool_defs = registry.get_definitions({"write_file", "vision_analyze"}) + by_name = {td["function"]["name"]: td["function"] for td in tool_defs} + + write_fn = by_name["write_file"] + self.assertIn("reason", write_fn["parameters"]["properties"]) + self.assertIn("reason", write_fn["parameters"]["required"]) + + vision_fn = by_name["vision_analyze"] + self.assertNotIn("reason", vision_fn["parameters"]["properties"]) + def main() -> int: loader = unittest.TestLoader() diff --git a/tests/hermes_tool_probes.py b/tests/hermes_tool_probes.py index 55be7be..244971c 100644 --- a/tests/hermes_tool_probes.py +++ b/tests/hermes_tool_probes.py @@ -108,3 +108,18 @@ def patch_v4a_block_args(*, marker: str, reason: str = "E2E V4A patch update hom def cronjob_semantic_args(*, reason: str = "List scheduled jobs for audit") -> dict[str, str]: """Low-risk cronjob intent — AE/Guardian outcome is semantic (ALLOW or BLOCK).""" return {"action": "list", "reason": reason} + + +def execute_code_allow_args(*, marker: str, reason: str | None = None) -> dict[str, str]: + return { + "code": f"print({marker!r})", + "reason": reason or "Live execute_code allow test", + } + + +def execute_code_block_args(*, reason: str = "E2E execute_code block probe") -> dict[str, str]: + # Harmless shell command blocked via the ``sudo`` pattern (mirrors terminal E2E). + return { + "code": 'import subprocess\nsubprocess.run("sudo echo intentframe-e2e-block-probe")', + "reason": reason, + }