diff --git a/src/glados/mcp/manager.py b/src/glados/mcp/manager.py index 6c5b9751..9a87bb84 100644 --- a/src/glados/mcp/manager.py +++ b/src/glados/mcp/manager.py @@ -3,6 +3,7 @@ import subprocess import threading import time +import traceback from collections.abc import Iterable from concurrent.futures import TimeoutError as FuturesTimeoutError from dataclasses import dataclass @@ -26,6 +27,37 @@ streamable_http_client = None # type: ignore[assignment] +def _leaf_exceptions(exc: BaseException, depth: int = 0, max_depth: int = 8) -> list[str]: + """Recursively descend ExceptionGroup chains to surface leaf exceptions. + + anyio task groups wrap failures in one or more ``ExceptionGroup`` layers, + so logging the top-level exception only shows "unhandled errors in a + TaskGroup". This flattens that tree to the underlying causes. + + Args: + exc: The exception to unwrap. Any object exposing an ``exceptions`` + attribute (e.g. ``ExceptionGroup``) is treated as a group and + descended; anything else is treated as a leaf. + depth: Current recursion depth (internal; callers use the default). + max_depth: Safety bound on recursion. Beyond it, descent stops and the + current node is reported with an "(unwrap depth exceeded)" prefix + rather than recursing further. + + Returns: + A flat list of ``": "`` strings, one per leaf + exception (or one entry if ``exc`` is not a group). + """ + if depth > max_depth: + return [f"(unwrap depth exceeded) {type(exc).__name__}: {exc}"] + subs = getattr(exc, "exceptions", None) + if subs: + out: list[str] = [] + for sub in subs: + out.extend(_leaf_exceptions(sub, depth + 1, max_depth)) + return out + return [f"{type(exc).__name__}: {exc}"] + + class MCPError(RuntimeError): pass @@ -208,7 +240,15 @@ async def _session_runner(self, config: MCPServerConfig) -> None: except asyncio.CancelledError: break except Exception as exc: - logger.warning(f"MCP: server '{config.name}' connection failed: {exc}") + # Recursively unwrap nested ExceptionGroups (Python 3.11+) so the + # actual leaf exception(s) are visible. + leaves = _leaf_exceptions(exc) + logger.warning( + f"MCP: server '{config.name}' connection failed: leaves={leaves}" + ) + # Full traceback to WARNING so we don't need DEBUG level enabled. + tb = "".join(traceback.format_exception(type(exc), exc, exc.__traceback__)) + logger.warning(f"MCP: full traceback for '{config.name}':\n{tb}") if self._observability_bus: self._observability_bus.emit( source="mcp", diff --git a/tests/test_mcp_error_visibility.py b/tests/test_mcp_error_visibility.py new file mode 100644 index 00000000..10e71858 --- /dev/null +++ b/tests/test_mcp_error_visibility.py @@ -0,0 +1,35 @@ +"""Unit tests for _leaf_exceptions ExceptionGroup unwrapping. + +These guard the error-visibility helper that flattens anyio's nested +ExceptionGroup wrappers down to the underlying leaf causes, so MCP +connection failures log the real error instead of "unhandled errors in a +TaskGroup". +""" + +from glados.mcp.manager import _leaf_exceptions + + +def test_leaf_exceptions_flat_exception(): + assert _leaf_exceptions(ValueError("boom")) == ["ValueError: boom"] + + +def test_leaf_exceptions_flattens_group_preserving_order(): + eg = ExceptionGroup("grp", [ValueError("a"), KeyError("b")]) + assert _leaf_exceptions(eg) == ["ValueError: a", "KeyError: 'b'"] + + +def test_leaf_exceptions_flattens_nested_groups(): + inner = ExceptionGroup("inner", [RuntimeError("deep")]) + outer = ExceptionGroup("outer", [inner, OSError("shallow")]) + assert _leaf_exceptions(outer) == ["RuntimeError: deep", "OSError: shallow"] + + +def test_leaf_exceptions_respects_depth_cap(): + # A group whose children sit deeper than max_depth must stop descending + # and report the offending node rather than recursing further. + eg = ExceptionGroup("grp", [ValueError("x")]) + out = _leaf_exceptions(eg, max_depth=0) + + assert len(out) == 1 + assert out[0].startswith("(unwrap depth exceeded)") + assert "ValueError" in out[0]