From 9be3e3fefecf8bf5e9a260dc2fad389e0990bd23 Mon Sep 17 00:00:00 2001 From: supermario_leo Date: Fri, 22 May 2026 19:05:47 +0800 Subject: [PATCH 1/3] fix(tools): coerce bash tool timeout argument to int The bash tool computes `timeout = min(args.get("timeout") or DEFAULT_TIMEOUT, MAX_TIMEOUT)` outside the handler's try block. The tool schema declares `timeout` as an integer, but model providers occasionally serialize schema-typed integers as JSON strings (e.g. "120"). In that case `min("120", 36000)` raises an uncaught TypeError that aborts the whole agent turn instead of returning a recoverable tool error. Coerce the value with int() and fall back to DEFAULT_TIMEOUT on a non-numeric input, then clamp with MAX_TIMEOUT. Behavior for valid integer input is unchanged. Add tests/unit/test_local_tools.py with regression coverage for string, missing, integer, oversized, oversized-string, and non-numeric timeouts. --- agent/tools/local_tools.py | 8 ++- tests/unit/test_local_tools.py | 96 ++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 tests/unit/test_local_tools.py diff --git a/agent/tools/local_tools.py b/agent/tools/local_tools.py index 50cd5bd6..ac7efb82 100644 --- a/agent/tools/local_tools.py +++ b/agent/tools/local_tools.py @@ -108,7 +108,13 @@ async def _bash_handler( return "No command provided.", False command = wrap_shell_command_with_hub_artifact_bootstrap(command, session) work_dir = args.get("work_dir", ".") - timeout = min(args.get("timeout") or DEFAULT_TIMEOUT, MAX_TIMEOUT) + # Providers occasionally emit the schema-typed integer `timeout` as a + # string; coerce it so a string value can't raise an uncaught TypeError. + try: + timeout = int(args.get("timeout") or DEFAULT_TIMEOUT) + except (TypeError, ValueError): + timeout = DEFAULT_TIMEOUT + timeout = min(timeout, MAX_TIMEOUT) try: result = subprocess.run( command, diff --git a/tests/unit/test_local_tools.py b/tests/unit/test_local_tools.py new file mode 100644 index 00000000..c4e8fa8c --- /dev/null +++ b/tests/unit/test_local_tools.py @@ -0,0 +1,96 @@ +"""Regression tests for ``_bash_handler`` argument handling. + +The bash tool's JSON schema declares ``timeout`` as an integer, but model +providers occasionally serialize schema-typed integers as JSON strings +(``"120"``). The handler must coerce such values instead of letting +``min(...)`` raise an uncaught ``TypeError`` that kills the agent turn. +""" + +from types import SimpleNamespace + +from agent.tools import local_tools +from agent.tools.local_tools import DEFAULT_TIMEOUT, MAX_TIMEOUT + + +def _fake_run(seen: dict): + """Return a ``subprocess.run`` stand-in that records its kwargs.""" + + def run(command, **kwargs): + seen["command"] = command + seen["kwargs"] = kwargs + return SimpleNamespace(stdout="ok", stderr="", returncode=0) + + return run + + +async def test_bash_timeout_string_arg_does_not_crash(monkeypatch): + """A string ``timeout`` must be coerced, not crash the handler.""" + seen: dict = {} + monkeypatch.setattr(local_tools.subprocess, "run", _fake_run(seen)) + + output, ok = await local_tools._bash_handler( + {"command": "echo hi", "timeout": "30"} + ) + + assert ok is True + assert output == "ok" + assert seen["kwargs"]["timeout"] == 30 + assert isinstance(seen["kwargs"]["timeout"], int) + + +async def test_bash_timeout_none_uses_default(monkeypatch): + """A missing ``timeout`` falls back to ``DEFAULT_TIMEOUT``.""" + seen: dict = {} + monkeypatch.setattr(local_tools.subprocess, "run", _fake_run(seen)) + + output, ok = await local_tools._bash_handler({"command": "echo hi"}) + + assert ok is True + assert seen["kwargs"]["timeout"] == DEFAULT_TIMEOUT + + +async def test_bash_timeout_integer_arg_is_unchanged(monkeypatch): + """A valid integer ``timeout`` is passed through unchanged.""" + seen: dict = {} + monkeypatch.setattr(local_tools.subprocess, "run", _fake_run(seen)) + + await local_tools._bash_handler({"command": "echo hi", "timeout": 300}) + + assert seen["kwargs"]["timeout"] == 300 + + +async def test_bash_timeout_oversized_is_clamped(monkeypatch): + """An oversized ``timeout`` is clamped to ``MAX_TIMEOUT``.""" + seen: dict = {} + monkeypatch.setattr(local_tools.subprocess, "run", _fake_run(seen)) + + await local_tools._bash_handler( + {"command": "echo hi", "timeout": MAX_TIMEOUT + 100_000} + ) + + assert seen["kwargs"]["timeout"] == MAX_TIMEOUT + + +async def test_bash_timeout_oversized_string_is_clamped(monkeypatch): + """An oversized string ``timeout`` is coerced *and* clamped.""" + seen: dict = {} + monkeypatch.setattr(local_tools.subprocess, "run", _fake_run(seen)) + + await local_tools._bash_handler( + {"command": "echo hi", "timeout": str(MAX_TIMEOUT + 100_000)} + ) + + assert seen["kwargs"]["timeout"] == MAX_TIMEOUT + + +async def test_bash_timeout_non_numeric_falls_back_to_default(monkeypatch): + """A non-numeric ``timeout`` falls back to ``DEFAULT_TIMEOUT``.""" + seen: dict = {} + monkeypatch.setattr(local_tools.subprocess, "run", _fake_run(seen)) + + output, ok = await local_tools._bash_handler( + {"command": "echo hi", "timeout": "not-a-number"} + ) + + assert ok is True + assert seen["kwargs"]["timeout"] == DEFAULT_TIMEOUT From 5c654358c17ec8e84877e841865696127886cae4 Mon Sep 17 00:00:00 2001 From: supermario_leo Date: Fri, 22 May 2026 19:08:44 +0800 Subject: [PATCH 2/3] fix(tools): validate bash tool work_dir before running A non-existent `work_dir` reached `subprocess.run(cwd=...)` and surfaced a raw `FileNotFoundError` string. Check the directory up front and return an actionable tool error so the model can correct the path. Extends tests/unit/test_local_tools.py with work_dir coverage. --- agent/tools/local_tools.py | 2 ++ tests/unit/test_local_tools.py | 56 +++++++++++++++++++++++++++++++--- 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/agent/tools/local_tools.py b/agent/tools/local_tools.py index ac7efb82..6c6d095e 100644 --- a/agent/tools/local_tools.py +++ b/agent/tools/local_tools.py @@ -108,6 +108,8 @@ async def _bash_handler( return "No command provided.", False command = wrap_shell_command_with_hub_artifact_bootstrap(command, session) work_dir = args.get("work_dir", ".") + if not os.path.isdir(work_dir): + return f"work_dir is not an existing directory: {work_dir}", False # Providers occasionally emit the schema-typed integer `timeout` as a # string; coerce it so a string value can't raise an uncaught TypeError. try: diff --git a/tests/unit/test_local_tools.py b/tests/unit/test_local_tools.py index c4e8fa8c..2f775151 100644 --- a/tests/unit/test_local_tools.py +++ b/tests/unit/test_local_tools.py @@ -1,9 +1,13 @@ """Regression tests for ``_bash_handler`` argument handling. -The bash tool's JSON schema declares ``timeout`` as an integer, but model -providers occasionally serialize schema-typed integers as JSON strings -(``"120"``). The handler must coerce such values instead of letting -``min(...)`` raise an uncaught ``TypeError`` that kills the agent turn. +Two robustness fixes are covered: + +* ``timeout`` — the bash tool's JSON schema declares this as an integer, but + model providers occasionally serialize schema-typed integers as JSON + strings (``"120"``). The handler must coerce such values instead of + letting ``min(...)`` raise an uncaught ``TypeError`` that kills the turn. +* ``work_dir`` — a non-existent ``work_dir`` must produce an actionable tool + error rather than a raw ``OSError`` string. """ from types import SimpleNamespace @@ -94,3 +98,47 @@ async def test_bash_timeout_non_numeric_falls_back_to_default(monkeypatch): assert ok is True assert seen["kwargs"]["timeout"] == DEFAULT_TIMEOUT + + +async def test_bash_rejects_nonexistent_work_dir(monkeypatch): + """A missing ``work_dir`` returns an actionable error, not a raw OSError.""" + seen: dict = {} + monkeypatch.setattr(local_tools.subprocess, "run", _fake_run(seen)) + + output, ok = await local_tools._bash_handler( + {"command": "echo hi", "work_dir": "/no/such/directory/xyz123"} + ) + + assert ok is False + assert "work_dir" in output + assert "/no/such/directory/xyz123" in output + assert "command" not in seen # subprocess.run never reached + + +async def test_bash_rejects_file_as_work_dir(monkeypatch, tmp_path): + """A regular file passed as ``work_dir`` is rejected.""" + seen: dict = {} + monkeypatch.setattr(local_tools.subprocess, "run", _fake_run(seen)) + a_file = tmp_path / "not_a_dir.txt" + a_file.write_text("x") + + output, ok = await local_tools._bash_handler( + {"command": "echo hi", "work_dir": str(a_file)} + ) + + assert ok is False + assert "work_dir" in output + assert "command" not in seen + + +async def test_bash_accepts_valid_work_dir(monkeypatch, tmp_path): + """An existing ``work_dir`` is passed through to ``subprocess.run``.""" + seen: dict = {} + monkeypatch.setattr(local_tools.subprocess, "run", _fake_run(seen)) + + output, ok = await local_tools._bash_handler( + {"command": "echo hi", "work_dir": str(tmp_path)} + ) + + assert ok is True + assert seen["kwargs"]["cwd"] == str(tmp_path) From ebf93dfa5a006425f0c8af79796d5f1b1659e518 Mon Sep 17 00:00:00 2001 From: supermario_leo Date: Fri, 22 May 2026 19:12:19 +0800 Subject: [PATCH 3/3] fix(tools): floor non-positive bash timeout to the default A negative or zero `timeout` (e.g. "-5" or "0") survived coercion and was passed straight to `subprocess.run`, which then timed out instantly with a misleading message. Fall back to DEFAULT_TIMEOUT for any non-positive value. --- agent/tools/local_tools.py | 4 ++++ tests/unit/test_local_tools.py | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/agent/tools/local_tools.py b/agent/tools/local_tools.py index 6c6d095e..2a4a0dda 100644 --- a/agent/tools/local_tools.py +++ b/agent/tools/local_tools.py @@ -112,10 +112,14 @@ async def _bash_handler( return f"work_dir is not an existing directory: {work_dir}", False # Providers occasionally emit the schema-typed integer `timeout` as a # string; coerce it so a string value can't raise an uncaught TypeError. + # A non-positive value would make subprocess.run time out instantly, so + # fall back to the default in that case too. try: timeout = int(args.get("timeout") or DEFAULT_TIMEOUT) except (TypeError, ValueError): timeout = DEFAULT_TIMEOUT + if timeout <= 0: + timeout = DEFAULT_TIMEOUT timeout = min(timeout, MAX_TIMEOUT) try: result = subprocess.run( diff --git a/tests/unit/test_local_tools.py b/tests/unit/test_local_tools.py index 2f775151..9e6098df 100644 --- a/tests/unit/test_local_tools.py +++ b/tests/unit/test_local_tools.py @@ -100,6 +100,15 @@ async def test_bash_timeout_non_numeric_falls_back_to_default(monkeypatch): assert seen["kwargs"]["timeout"] == DEFAULT_TIMEOUT +async def test_bash_timeout_non_positive_falls_back_to_default(monkeypatch): + """A non-positive ``timeout`` must not be passed to ``subprocess.run``.""" + for bad in (-5, "-5", "0"): + seen: dict = {} + monkeypatch.setattr(local_tools.subprocess, "run", _fake_run(seen)) + await local_tools._bash_handler({"command": "echo hi", "timeout": bad}) + assert seen["kwargs"]["timeout"] == DEFAULT_TIMEOUT + + async def test_bash_rejects_nonexistent_work_dir(monkeypatch): """A missing ``work_dir`` returns an actionable error, not a raw OSError.""" seen: dict = {}