From 62f952887a4769ca2ca1d967a0d95d7d68c56eb7 Mon Sep 17 00:00:00 2001 From: Don Bloomfield Date: Wed, 27 May 2026 15:59:15 +1000 Subject: [PATCH] improve(mcp): unwrap ExceptionGroups so real connection errors are visible Errors raised inside streamable_http_client / sse_client / stdio_client get wrapped in one or more ExceptionGroup layers by anyio's task-group machinery. The _session_runner handler only logged '{exc}', which renders as 'unhandled errors in a TaskGroup (1 sub-exception)' - the actual cause is hidden behind the wrapper, sometimes through several nested layers. Add a small _leaf_exceptions() helper that recursively descends ExceptionGroup chains and returns the leaf-level error messages. Log them explicitly in the warning, and emit the full traceback (also at WARNING level so DEBUG logging doesn't need to be enabled) so anyone hitting an MCP connection failure can see the real cause without attaching a debugger. No behaviour change - purely a logging improvement; affects only the failure path. I believe this is worthwhile given the nature of the project for tinkering and MCP integration use-cases. Changes: - Add _leaf_exceptions(exc, depth, max_depth): recursively flattens ExceptionGroup trees to ": " leaf strings, depth-bounded. - _session_runner logs the unwrapped leaves plus the full traceback at WARNING. - Docstring on _leaf_exceptions (Args/Returns + depth-cap behaviour). - tests/test_mcp_error_visibility.py: flat exception, group flattening, nested-group flattening, and the depth cap. Merge note: if landing both MCP fixes, merge the streamable_http_client auth fix (#202, https://github.com/dnhkng/GLaDOS/pull/202) first. It touches the same import block and _session_runner, so there is a small overlap with this branch, but the two resolve manually without much trouble - this change is best applied on top of #202. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/glados/mcp/manager.py | 42 +++++++++++++++++++++++++++++- tests/test_mcp_error_visibility.py | 35 +++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 tests/test_mcp_error_visibility.py diff --git a/src/glados/mcp/manager.py b/src/glados/mcp/manager.py index 6c5b9751..9a87bb84 100644 --- a/src/glados/mcp/manager.py +++ b/src/glados/mcp/manager.py @@ -3,6 +3,7 @@ import subprocess import threading import time +import traceback from collections.abc import Iterable from concurrent.futures import TimeoutError as FuturesTimeoutError from dataclasses import dataclass @@ -26,6 +27,37 @@ streamable_http_client = None # type: ignore[assignment] +def _leaf_exceptions(exc: BaseException, depth: int = 0, max_depth: int = 8) -> list[str]: + """Recursively descend ExceptionGroup chains to surface leaf exceptions. + + anyio task groups wrap failures in one or more ``ExceptionGroup`` layers, + so logging the top-level exception only shows "unhandled errors in a + TaskGroup". This flattens that tree to the underlying causes. + + Args: + exc: The exception to unwrap. Any object exposing an ``exceptions`` + attribute (e.g. ``ExceptionGroup``) is treated as a group and + descended; anything else is treated as a leaf. + depth: Current recursion depth (internal; callers use the default). + max_depth: Safety bound on recursion. Beyond it, descent stops and the + current node is reported with an "(unwrap depth exceeded)" prefix + rather than recursing further. + + Returns: + A flat list of ``": "`` strings, one per leaf + exception (or one entry if ``exc`` is not a group). + """ + if depth > max_depth: + return [f"(unwrap depth exceeded) {type(exc).__name__}: {exc}"] + subs = getattr(exc, "exceptions", None) + if subs: + out: list[str] = [] + for sub in subs: + out.extend(_leaf_exceptions(sub, depth + 1, max_depth)) + return out + return [f"{type(exc).__name__}: {exc}"] + + class MCPError(RuntimeError): pass @@ -208,7 +240,15 @@ async def _session_runner(self, config: MCPServerConfig) -> None: except asyncio.CancelledError: break except Exception as exc: - logger.warning(f"MCP: server '{config.name}' connection failed: {exc}") + # Recursively unwrap nested ExceptionGroups (Python 3.11+) so the + # actual leaf exception(s) are visible. + leaves = _leaf_exceptions(exc) + logger.warning( + f"MCP: server '{config.name}' connection failed: leaves={leaves}" + ) + # Full traceback to WARNING so we don't need DEBUG level enabled. + tb = "".join(traceback.format_exception(type(exc), exc, exc.__traceback__)) + logger.warning(f"MCP: full traceback for '{config.name}':\n{tb}") if self._observability_bus: self._observability_bus.emit( source="mcp", diff --git a/tests/test_mcp_error_visibility.py b/tests/test_mcp_error_visibility.py new file mode 100644 index 00000000..10e71858 --- /dev/null +++ b/tests/test_mcp_error_visibility.py @@ -0,0 +1,35 @@ +"""Unit tests for _leaf_exceptions ExceptionGroup unwrapping. + +These guard the error-visibility helper that flattens anyio's nested +ExceptionGroup wrappers down to the underlying leaf causes, so MCP +connection failures log the real error instead of "unhandled errors in a +TaskGroup". +""" + +from glados.mcp.manager import _leaf_exceptions + + +def test_leaf_exceptions_flat_exception(): + assert _leaf_exceptions(ValueError("boom")) == ["ValueError: boom"] + + +def test_leaf_exceptions_flattens_group_preserving_order(): + eg = ExceptionGroup("grp", [ValueError("a"), KeyError("b")]) + assert _leaf_exceptions(eg) == ["ValueError: a", "KeyError: 'b'"] + + +def test_leaf_exceptions_flattens_nested_groups(): + inner = ExceptionGroup("inner", [RuntimeError("deep")]) + outer = ExceptionGroup("outer", [inner, OSError("shallow")]) + assert _leaf_exceptions(outer) == ["RuntimeError: deep", "OSError: shallow"] + + +def test_leaf_exceptions_respects_depth_cap(): + # A group whose children sit deeper than max_depth must stop descending + # and report the offending node rather than recursing further. + eg = ExceptionGroup("grp", [ValueError("x")]) + out = _leaf_exceptions(eg, max_depth=0) + + assert len(out) == 1 + assert out[0].startswith("(unwrap depth exceeded)") + assert "ValueError" in out[0]