diff --git a/CHANGELOG.md b/CHANGELOG.md index 78caa298..498dcd03 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### Improvements - Build and publish Windows ARM64 `win_arm64` wheels for CPython 3.10 through 3.14, including the free-threaded 3.14 build. Closes [#785](https://github.com/ClickHouse/clickhouse-connect/issues/785). +- Server errors now expose structured fields on the raised exception. `DatabaseError` and `OperationalError` carry a numeric `code` attribute with the ClickHouse error code and a `name` attribute with the symbolic name such as `UNKNOWN_TABLE`, so callers can branch on `exc.code` instead of parsing the message string. `code` is set even when `show_clickhouse_errors` is disabled. `name` is only set when error detail is enabled. Both default to `None` when unavailable, such as on transport errors. Closes [#786](https://github.com/ClickHouse/clickhouse-connect/issues/786). ### Bug Fixes - Large query parameter payloads are now automatically sent as form data in the request body instead of the URL query string. Server-side bind parameters were urlencoded into the request URL, so a large `IN` list or a high-dimensional vector embedding could produce a URL that HTTP intermediaries such as nginx, AWS ALB, and CloudFront reject with HTTP 414. The client now routes parameters to the POST body once their encoded length passes a threshold, which keeps the URL small. Setting `form_encode_query_params=True` still forces form encoding for all queries. Queries using binary parameter binds are never promoted automatically and only use form encoding when the flag is set. This does not change the server's per-value size limit, which is governed by `http_max_field_value_size`. Applies to both sync and async clients. Closes [#740](https://github.com/ClickHouse/clickhouse-connect/issues/740). diff --git a/clickhouse_connect/driver/asyncclient.py b/clickhouse_connect/driver/asyncclient.py index 2d0813c4..b0301df8 100644 --- a/clickhouse_connect/driver/asyncclient.py +++ b/clickhouse_connect/driver/asyncclient.py @@ -41,7 +41,14 @@ from clickhouse_connect.driver.compression import available_compression from clickhouse_connect.driver.constants import CH_VERSION_WITH_PROTOCOL, PROTOCOL_VERSION_WITH_LOW_CARD from clickhouse_connect.driver.ctypes import RespBuffCls -from clickhouse_connect.driver.exceptions import DatabaseError, DataError, OperationalError, ProgrammingError +from clickhouse_connect.driver.exceptions import ( + DatabaseError, + DataError, + OperationalError, + ProgrammingError, + error_code_from_header, + error_name_from_body, +) from clickhouse_connect.driver.external import ExternalData from clickhouse_connect.driver.insert import InsertContext from clickhouse_connect.driver.models import ColumnDef, SettingDef @@ -1892,26 +1899,29 @@ async def _error_handler(self, response: aiohttp.ClientResponse, retried: bool = """ try: body = "" + full_body = "" try: raw_body = await response.read() encoding = response.headers.get("Content-Encoding") + loop = asyncio.get_running_loop() if encoding: - loop = asyncio.get_running_loop() def decompress_and_decode(): - decompressed = decompress_response(raw_body, encoding) - return common.format_error(decompressed.decode(errors="backslashreplace")).strip() + return decompress_response(raw_body, encoding).decode(errors="backslashreplace") - body = await loop.run_in_executor(None, decompress_and_decode) + full_body = await loop.run_in_executor(None, decompress_and_decode) else: - loop = asyncio.get_running_loop() - body = await loop.run_in_executor(None, lambda: common.format_error(raw_body.decode(errors="backslashreplace")).strip()) + full_body = await loop.run_in_executor(None, lambda: raw_body.decode(errors="backslashreplace")) + body = common.format_error(full_body).strip() except Exception: logger.warning("Failed to read error response body", exc_info=True) + err_code = response.headers.get(ex_header) + code = error_code_from_header(err_code) + name = error_name_from_body(full_body) if self.show_clickhouse_errors else None + if self.show_clickhouse_errors: - err_code = response.headers.get(ex_header) if err_code: err_str = f"Received ClickHouse exception, code: {err_code}" else: @@ -1927,7 +1937,8 @@ def decompress_and_decode(): finally: response.close() - raise OperationalError(err_str) if retried else DatabaseError(err_str) from None + err_type = OperationalError if retried else DatabaseError + raise err_type(err_str, code=code, name=name) from None async def _raw_request( self, diff --git a/clickhouse_connect/driver/exceptions.py b/clickhouse_connect/driver/exceptions.py index 84009a31..cddcac55 100644 --- a/clickhouse_connect/driver/exceptions.py +++ b/clickhouse_connect/driver/exceptions.py @@ -4,6 +4,26 @@ libraries. In most cases docstring are taken from the DBIApi 2.0 documentation """ +import re + +_error_name_re = re.compile(r"\(([A-Z][A-Z0-9_]+)\)") + + +def error_code_from_header(header_value: str | None) -> int | None: + """Parse the numeric ClickHouse error code from the exception header value.""" + if not header_value: + return None + try: + return int(header_value) + except (TypeError, ValueError): + return None + + +def error_name_from_body(body: str | None) -> str | None: + """Extract the symbolic ClickHouse error name (e.g. UNKNOWN_TABLE) from a response body.""" + matches = _error_name_re.findall(body or "") + return matches[-1] if matches else None + class ClickHouseError(Exception): """Exception related to operation with ClickHouse.""" @@ -16,7 +36,16 @@ class Warning(Warning, ClickHouseError): # noqa: N818 class Error(ClickHouseError): """Exception that is the base class of all other error exceptions - (not Warning).""" + (not Warning). + + `code` is the numeric ClickHouse server error code when known, `name` the symbolic + name. Both are None when unavailable, e.g. transport errors or suppressed error detail. + """ + + def __init__(self, *args, code: int | None = None, name: str | None = None): + super().__init__(*args) + self.code = code + self.name = name class InterfaceError(Error): diff --git a/clickhouse_connect/driver/httpclient.py b/clickhouse_connect/driver/httpclient.py index ebffd0b1..170c2ab6 100644 --- a/clickhouse_connect/driver/httpclient.py +++ b/clickhouse_connect/driver/httpclient.py @@ -24,7 +24,13 @@ from clickhouse_connect.driver.common import coerce_bool, coerce_int, dict_add, dict_copy from clickhouse_connect.driver.compression import available_compression from clickhouse_connect.driver.ctypes import RespBuffCls -from clickhouse_connect.driver.exceptions import DatabaseError, OperationalError, ProgrammingError +from clickhouse_connect.driver.exceptions import ( + DatabaseError, + OperationalError, + ProgrammingError, + error_code_from_header, + error_name_from_body, +) from clickhouse_connect.driver.external import ExternalData from clickhouse_connect.driver.httputil import ( ResponseSource, @@ -499,14 +505,19 @@ def _error_handler(self, response: HTTPResponse, retried: bool = False) -> None: """ try: body = "" + full_body = "" try: raw_body = get_response_data(response) - body = common.format_error(raw_body.decode(errors="backslashreplace")).strip() + full_body = raw_body.decode(errors="backslashreplace") + body = common.format_error(full_body).strip() except Exception: logger.warning("Failed to read error response body", exc_info=True) + err_code = response.headers.get(ex_header) + code = error_code_from_header(err_code) + name = error_name_from_body(full_body) if self.show_clickhouse_errors else None + if self.show_clickhouse_errors: - err_code = response.headers.get(ex_header) if err_code: err_str = f"Received ClickHouse exception, code: {err_code}" else: @@ -522,7 +533,8 @@ def _error_handler(self, response: HTTPResponse, retried: bool = False) -> None: finally: response.close() - raise OperationalError(err_str) if retried else DatabaseError(err_str) from None + err_type = OperationalError if retried else DatabaseError + raise err_type(err_str, code=code, name=name) from None def _raw_request( self, diff --git a/tests/integration_tests/test_client.py b/tests/integration_tests/test_client.py index 4999bce4..43b4a44f 100644 --- a/tests/integration_tests/test_client.py +++ b/tests/integration_tests/test_client.py @@ -45,6 +45,13 @@ def test_command(param_client, call): assert int(version.split(".")[0]) >= 19 +def test_query_error_exposes_structured_code(param_client, call): + with pytest.raises(DatabaseError) as excinfo: + call(param_client.query, "SELECT * FROM does_not_exist_tbl_xyz") + assert excinfo.value.code == 60 + assert excinfo.value.name == "UNKNOWN_TABLE" + + def test_client_name(param_client, client_mode): user_agent = param_client.headers["User-Agent"] assert "test" in user_agent or "param" in user_agent diff --git a/tests/unit_tests/test_driver/test_exceptions.py b/tests/unit_tests/test_driver/test_exceptions.py new file mode 100644 index 00000000..bf49497a --- /dev/null +++ b/tests/unit_tests/test_driver/test_exceptions.py @@ -0,0 +1,66 @@ +from clickhouse_connect.driver.exceptions import ( + DatabaseError, + Error, + OperationalError, + StreamClosedError, + error_code_from_header, + error_name_from_body, +) + +UNKNOWN_TABLE_BODY = ( + "Code: 60. DB::Exception: Unknown table expression identifier " + "'non_existent_table' in scope SELECT * FROM non_existent_table. " + "(UNKNOWN_TABLE) (version 26.2.4.23 (official build))" +) + + +class TestErrorCodeFromHeader: + def test_parses_numeric_code(self): + assert error_code_from_header("60") == 60 + + def test_none_header(self): + assert error_code_from_header(None) is None + + def test_empty_header(self): + assert error_code_from_header("") is None + + def test_non_numeric_header(self): + assert error_code_from_header("not-a-number") is None + + +class TestErrorNameFromBody: + def test_extracts_symbolic_name(self): + assert error_name_from_body(UNKNOWN_TABLE_BODY) == "UNKNOWN_TABLE" + + def test_picks_error_name_over_version_token(self): + body = "DB::Exception: limit reached (MEMORY_LIMIT_EXCEEDED) (version 26.2.4.23)" + assert error_name_from_body(body) == "MEMORY_LIMIT_EXCEEDED" + + def test_ignores_camelcase_type_tokens(self): + assert error_name_from_body("bad cast from (UInt64) value") is None + + def test_empty_body(self): + assert error_name_from_body("") is None + + def test_none_body(self): + assert error_name_from_body(None) is None + + +class TestErrorFields: + def test_carries_code_and_name(self): + exc = DatabaseError("boom", code=60, name="UNKNOWN_TABLE") + assert str(exc) == "boom" + assert exc.code == 60 + assert exc.name == "UNKNOWN_TABLE" + assert isinstance(exc, Error) + + def test_defaults_to_none(self): + exc = OperationalError("network down") + assert str(exc) == "network down" + assert exc.code is None + assert exc.name is None + + def test_subclass_with_custom_init(self): + exc = StreamClosedError() + assert exc.code is None + assert exc.name is None diff --git a/tests/unit_tests/test_driver/test_httpclient.py b/tests/unit_tests/test_driver/test_httpclient.py index 82bba37d..a9b12bbd 100644 --- a/tests/unit_tests/test_driver/test_httpclient.py +++ b/tests/unit_tests/test_driver/test_httpclient.py @@ -4,6 +4,7 @@ import pytest +from clickhouse_connect import common from clickhouse_connect.driver import create_async_client, create_client from clickhouse_connect.driver.asyncclient import AsyncClient from clickhouse_connect.driver.client import Client @@ -170,6 +171,77 @@ async def test_explicit_headers_override_dsn_headers_query_param(self): assert client.headers["X-Gateway"] == "cloudflare" +class TestAsyncClientErrorHandler: + """Test the error handling functionality of AsyncClient""" + + @staticmethod + def make_client(): + client = AsyncClient( + interface="http", + host="localhost", + port=8123, + username="default", + password="", + database="default", + ) + client.url = "http://localhost:8123" + client.show_clickhouse_errors = True + return client + + @staticmethod + def make_response(status=500, headers=None, data=b""): + response = Mock() + response.status = status + response.headers = headers or {} + response.read = AsyncMock(return_value=data) + response.close = Mock() + return response + + @pytest.mark.asyncio + async def test_error_handler_sets_structured_code_and_name(self): + client = self.make_client() + response = self.make_response( + status=404, + headers={ex_header: "60"}, + data=b"Code: 60. DB::Exception: Unknown table 'x'. (UNKNOWN_TABLE) (version 26.2.4.23)", + ) + + with pytest.raises(DatabaseError) as excinfo: + await client._error_handler(response) + + assert excinfo.value.code == 60 + assert excinfo.value.name == "UNKNOWN_TABLE" + assert "server response:" in str(excinfo.value) + response.close.assert_called_once() + + @pytest.mark.asyncio + async def test_error_handler_code_set_when_errors_disabled(self): + client = self.make_client() + client.show_clickhouse_errors = False + response = self.make_response( + status=404, + headers={ex_header: "60"}, + data=b"Code: 60. DB::Exception: Unknown table 'x'. (UNKNOWN_TABLE)", + ) + + with pytest.raises(DatabaseError) as excinfo: + await client._error_handler(response) + + assert excinfo.value.code == 60 + assert excinfo.value.name is None + assert "UNKNOWN_TABLE" not in str(excinfo.value) + + @pytest.mark.asyncio + async def test_error_handler_retried_raises_operational_error(self): + client = self.make_client() + response = self.make_response(status=503, headers={ex_header: "159"}, data=b"timeout") + + with pytest.raises(OperationalError) as excinfo: + await client._error_handler(response, retried=True) + + assert excinfo.value.code == 159 + + class TestHttpClientErrorHandler: """Test the error handling functionality of HttpClient""" @@ -207,8 +279,50 @@ def test_error_handler_with_exception_code(self): assert "Received ClickHouse exception, code: 99" in error_msg assert "server response: Error executing query" in error_msg assert self.client.url in error_msg + assert excinfo.value.code == 99 response.close.assert_called_once() + def test_error_handler_sets_structured_code_and_name(self): + """Code and name are exposed as attributes parsed from the header and body""" + response = create_mock_response( + status=404, + headers={ex_header: "60"}, + data=b"Code: 60. DB::Exception: Unknown table 'x'. (UNKNOWN_TABLE) (version 26.2.4.23)", + ) + + with pytest.raises(DatabaseError) as excinfo: + self.client._error_handler(response) + + assert excinfo.value.code == 60 + assert excinfo.value.name == "UNKNOWN_TABLE" + + def test_error_handler_code_none_without_header(self): + """Code is None when the server sends no exception header""" + response = create_mock_response(status=503, data=b"Service unavailable") + + with pytest.raises(DatabaseError) as excinfo: + self.client._error_handler(response) + + assert excinfo.value.code is None + assert excinfo.value.name is None + + def test_error_handler_name_parsed_before_truncation(self): + """name is parsed from the full body even when max_error_size truncates the message""" + long_body = "Code: 62. DB::Exception: " + ("x" * 400) + " (SYNTAX_ERROR) (version 26.2.4.23)" + response = create_mock_response(status=400, headers={ex_header: "62"}, data=long_body.encode()) + + original = common.get_setting("max_error_size") + common.set_setting("max_error_size", 100) + try: + with pytest.raises(DatabaseError) as excinfo: + self.client._error_handler(response) + finally: + common.set_setting("max_error_size", original) + + assert excinfo.value.code == 62 + assert excinfo.value.name == "SYNTAX_ERROR" + assert "SYNTAX_ERROR" not in str(excinfo.value) # truncated out of the displayed message + def test_error_handler_without_exception_code(self): """Test error handling when only HTTP status is available""" @@ -261,6 +375,9 @@ def test_error_handler_with_errors_disabled(self): assert "The ClickHouse server returned an error (for url http://localhost:8123)" in error_msg assert "Invalid query" not in error_msg # Should not include the body assert "99" not in error_msg # Should not include the exception code + # Numeric code is still exposed structurally, but the body-derived name is suppressed + assert excinfo.value.code == 99 + assert excinfo.value.name is None response.close.assert_called_once() def test_error_handler_with_unicode_decode_error(self):