Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

### Improvements
- Build and publish Windows ARM64 `win_arm64` wheels for CPython 3.10 through 3.14, including the free-threaded 3.14 build. Closes [#785](https://github.com/ClickHouse/clickhouse-connect/issues/785).
- Server errors now expose structured fields on the raised exception. `DatabaseError` and `OperationalError` carry a numeric `code` attribute with the ClickHouse error code and a `name` attribute with the symbolic name such as `UNKNOWN_TABLE`, so callers can branch on `exc.code` instead of parsing the message string. `code` is set even when `show_clickhouse_errors` is disabled. `name` is only set when error detail is enabled. Both default to `None` when unavailable, such as on transport errors. Closes [#786](https://github.com/ClickHouse/clickhouse-connect/issues/786).

### Bug Fixes
- Large query parameter payloads are now automatically sent as form data in the request body instead of the URL query string. Server-side bind parameters were urlencoded into the request URL, so a large `IN` list or a high-dimensional vector embedding could produce a URL that HTTP intermediaries such as nginx, AWS ALB, and CloudFront reject with HTTP 414. The client now routes parameters to the POST body once their encoded length passes a threshold, which keeps the URL small. Setting `form_encode_query_params=True` still forces form encoding for all queries. Queries using binary parameter binds are never promoted automatically and only use form encoding when the flag is set. This does not change the server's per-value size limit, which is governed by `http_max_field_value_size`. Applies to both sync and async clients. Closes [#740](https://github.com/ClickHouse/clickhouse-connect/issues/740).
Expand Down
29 changes: 20 additions & 9 deletions clickhouse_connect/driver/asyncclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,14 @@
from clickhouse_connect.driver.compression import available_compression
from clickhouse_connect.driver.constants import CH_VERSION_WITH_PROTOCOL, PROTOCOL_VERSION_WITH_LOW_CARD
from clickhouse_connect.driver.ctypes import RespBuffCls
from clickhouse_connect.driver.exceptions import DatabaseError, DataError, OperationalError, ProgrammingError
from clickhouse_connect.driver.exceptions import (
DatabaseError,
DataError,
OperationalError,
ProgrammingError,
error_code_from_header,
error_name_from_body,
)
from clickhouse_connect.driver.external import ExternalData
from clickhouse_connect.driver.insert import InsertContext
from clickhouse_connect.driver.models import ColumnDef, SettingDef
Expand Down Expand Up @@ -1892,26 +1899,29 @@ async def _error_handler(self, response: aiohttp.ClientResponse, retried: bool =
"""
try:
body = ""
full_body = ""
try:
raw_body = await response.read()
encoding = response.headers.get("Content-Encoding")
loop = asyncio.get_running_loop()

if encoding:
loop = asyncio.get_running_loop()

def decompress_and_decode():
decompressed = decompress_response(raw_body, encoding)
return common.format_error(decompressed.decode(errors="backslashreplace")).strip()
return decompress_response(raw_body, encoding).decode(errors="backslashreplace")

body = await loop.run_in_executor(None, decompress_and_decode)
full_body = await loop.run_in_executor(None, decompress_and_decode)
else:
loop = asyncio.get_running_loop()
body = await loop.run_in_executor(None, lambda: common.format_error(raw_body.decode(errors="backslashreplace")).strip())
full_body = await loop.run_in_executor(None, lambda: raw_body.decode(errors="backslashreplace"))
body = common.format_error(full_body).strip()
except Exception:
logger.warning("Failed to read error response body", exc_info=True)

err_code = response.headers.get(ex_header)
code = error_code_from_header(err_code)
name = error_name_from_body(full_body) if self.show_clickhouse_errors else None

if self.show_clickhouse_errors:
err_code = response.headers.get(ex_header)
if err_code:
err_str = f"Received ClickHouse exception, code: {err_code}"
else:
Expand All @@ -1927,7 +1937,8 @@ def decompress_and_decode():
finally:
response.close()

raise OperationalError(err_str) if retried else DatabaseError(err_str) from None
err_type = OperationalError if retried else DatabaseError
raise err_type(err_str, code=code, name=name) from None

async def _raw_request(
self,
Expand Down
31 changes: 30 additions & 1 deletion clickhouse_connect/driver/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,26 @@
libraries. In most cases docstring are taken from the DBIApi 2.0 documentation
"""

import re

_error_name_re = re.compile(r"\(([A-Z][A-Z0-9_]+)\)")


def error_code_from_header(header_value: str | None) -> int | None:
"""Parse the numeric ClickHouse error code from the exception header value."""
if not header_value:
return None
try:
return int(header_value)
except (TypeError, ValueError):
return None


def error_name_from_body(body: str | None) -> str | None:
"""Extract the symbolic ClickHouse error name (e.g. UNKNOWN_TABLE) from a response body."""
matches = _error_name_re.findall(body or "")
return matches[-1] if matches else None


class ClickHouseError(Exception):
"""Exception related to operation with ClickHouse."""
Expand All @@ -16,7 +36,16 @@ class Warning(Warning, ClickHouseError): # noqa: N818

class Error(ClickHouseError):
"""Exception that is the base class of all other error exceptions
(not Warning)."""
(not Warning).

`code` is the numeric ClickHouse server error code when known, `name` the symbolic
name. Both are None when unavailable, e.g. transport errors or suppressed error detail.
"""

def __init__(self, *args, code: int | None = None, name: str | None = None):
super().__init__(*args)
self.code = code
self.name = name


class InterfaceError(Error):
Expand Down
20 changes: 16 additions & 4 deletions clickhouse_connect/driver/httpclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,13 @@
from clickhouse_connect.driver.common import coerce_bool, coerce_int, dict_add, dict_copy
from clickhouse_connect.driver.compression import available_compression
from clickhouse_connect.driver.ctypes import RespBuffCls
from clickhouse_connect.driver.exceptions import DatabaseError, OperationalError, ProgrammingError
from clickhouse_connect.driver.exceptions import (
DatabaseError,
OperationalError,
ProgrammingError,
error_code_from_header,
error_name_from_body,
)
from clickhouse_connect.driver.external import ExternalData
from clickhouse_connect.driver.httputil import (
ResponseSource,
Expand Down Expand Up @@ -499,14 +505,19 @@ def _error_handler(self, response: HTTPResponse, retried: bool = False) -> None:
"""
try:
body = ""
full_body = ""
try:
raw_body = get_response_data(response)
body = common.format_error(raw_body.decode(errors="backslashreplace")).strip()
full_body = raw_body.decode(errors="backslashreplace")
body = common.format_error(full_body).strip()
except Exception:
logger.warning("Failed to read error response body", exc_info=True)

err_code = response.headers.get(ex_header)
code = error_code_from_header(err_code)
name = error_name_from_body(full_body) if self.show_clickhouse_errors else None

if self.show_clickhouse_errors:
err_code = response.headers.get(ex_header)
if err_code:
err_str = f"Received ClickHouse exception, code: {err_code}"
else:
Expand All @@ -522,7 +533,8 @@ def _error_handler(self, response: HTTPResponse, retried: bool = False) -> None:
finally:
response.close()

raise OperationalError(err_str) if retried else DatabaseError(err_str) from None
err_type = OperationalError if retried else DatabaseError
raise err_type(err_str, code=code, name=name) from None

def _raw_request(
self,
Expand Down
7 changes: 7 additions & 0 deletions tests/integration_tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@ def test_command(param_client, call):
assert int(version.split(".")[0]) >= 19


def test_query_error_exposes_structured_code(param_client, call):
with pytest.raises(DatabaseError) as excinfo:
call(param_client.query, "SELECT * FROM does_not_exist_tbl_xyz")
assert excinfo.value.code == 60
assert excinfo.value.name == "UNKNOWN_TABLE"


def test_client_name(param_client, client_mode):
user_agent = param_client.headers["User-Agent"]
assert "test" in user_agent or "param" in user_agent
Expand Down
66 changes: 66 additions & 0 deletions tests/unit_tests/test_driver/test_exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from clickhouse_connect.driver.exceptions import (
DatabaseError,
Error,
OperationalError,
StreamClosedError,
error_code_from_header,
error_name_from_body,
)

UNKNOWN_TABLE_BODY = (
"Code: 60. DB::Exception: Unknown table expression identifier "
"'non_existent_table' in scope SELECT * FROM non_existent_table. "
"(UNKNOWN_TABLE) (version 26.2.4.23 (official build))"
)


class TestErrorCodeFromHeader:
def test_parses_numeric_code(self):
assert error_code_from_header("60") == 60

def test_none_header(self):
assert error_code_from_header(None) is None

def test_empty_header(self):
assert error_code_from_header("") is None

def test_non_numeric_header(self):
assert error_code_from_header("not-a-number") is None


class TestErrorNameFromBody:
def test_extracts_symbolic_name(self):
assert error_name_from_body(UNKNOWN_TABLE_BODY) == "UNKNOWN_TABLE"

def test_picks_error_name_over_version_token(self):
body = "DB::Exception: limit reached (MEMORY_LIMIT_EXCEEDED) (version 26.2.4.23)"
assert error_name_from_body(body) == "MEMORY_LIMIT_EXCEEDED"

def test_ignores_camelcase_type_tokens(self):
assert error_name_from_body("bad cast from (UInt64) value") is None

def test_empty_body(self):
assert error_name_from_body("") is None

def test_none_body(self):
assert error_name_from_body(None) is None


class TestErrorFields:
def test_carries_code_and_name(self):
exc = DatabaseError("boom", code=60, name="UNKNOWN_TABLE")
assert str(exc) == "boom"
assert exc.code == 60
assert exc.name == "UNKNOWN_TABLE"
assert isinstance(exc, Error)

def test_defaults_to_none(self):
exc = OperationalError("network down")
assert str(exc) == "network down"
assert exc.code is None
assert exc.name is None

def test_subclass_with_custom_init(self):
exc = StreamClosedError()
assert exc.code is None
assert exc.name is None
117 changes: 117 additions & 0 deletions tests/unit_tests/test_driver/test_httpclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import pytest

from clickhouse_connect import common
from clickhouse_connect.driver import create_async_client, create_client
from clickhouse_connect.driver.asyncclient import AsyncClient
from clickhouse_connect.driver.client import Client
Expand Down Expand Up @@ -170,6 +171,77 @@ async def test_explicit_headers_override_dsn_headers_query_param(self):
assert client.headers["X-Gateway"] == "cloudflare"


class TestAsyncClientErrorHandler:
"""Test the error handling functionality of AsyncClient"""

@staticmethod
def make_client():
client = AsyncClient(
interface="http",
host="localhost",
port=8123,
username="default",
password="",
database="default",
)
client.url = "http://localhost:8123"
client.show_clickhouse_errors = True
return client

@staticmethod
def make_response(status=500, headers=None, data=b""):
response = Mock()
response.status = status
response.headers = headers or {}
response.read = AsyncMock(return_value=data)
response.close = Mock()
return response

@pytest.mark.asyncio
async def test_error_handler_sets_structured_code_and_name(self):
client = self.make_client()
response = self.make_response(
status=404,
headers={ex_header: "60"},
data=b"Code: 60. DB::Exception: Unknown table 'x'. (UNKNOWN_TABLE) (version 26.2.4.23)",
)

with pytest.raises(DatabaseError) as excinfo:
await client._error_handler(response)

assert excinfo.value.code == 60
assert excinfo.value.name == "UNKNOWN_TABLE"
assert "server response:" in str(excinfo.value)
response.close.assert_called_once()

@pytest.mark.asyncio
async def test_error_handler_code_set_when_errors_disabled(self):
client = self.make_client()
client.show_clickhouse_errors = False
response = self.make_response(
status=404,
headers={ex_header: "60"},
data=b"Code: 60. DB::Exception: Unknown table 'x'. (UNKNOWN_TABLE)",
)

with pytest.raises(DatabaseError) as excinfo:
await client._error_handler(response)

assert excinfo.value.code == 60
assert excinfo.value.name is None
assert "UNKNOWN_TABLE" not in str(excinfo.value)

@pytest.mark.asyncio
async def test_error_handler_retried_raises_operational_error(self):
client = self.make_client()
response = self.make_response(status=503, headers={ex_header: "159"}, data=b"timeout")

with pytest.raises(OperationalError) as excinfo:
await client._error_handler(response, retried=True)

assert excinfo.value.code == 159


class TestHttpClientErrorHandler:
"""Test the error handling functionality of HttpClient"""

Expand Down Expand Up @@ -207,8 +279,50 @@ def test_error_handler_with_exception_code(self):
assert "Received ClickHouse exception, code: 99" in error_msg
assert "server response: Error executing query" in error_msg
assert self.client.url in error_msg
assert excinfo.value.code == 99
response.close.assert_called_once()

def test_error_handler_sets_structured_code_and_name(self):
"""Code and name are exposed as attributes parsed from the header and body"""
response = create_mock_response(
status=404,
headers={ex_header: "60"},
data=b"Code: 60. DB::Exception: Unknown table 'x'. (UNKNOWN_TABLE) (version 26.2.4.23)",
)

with pytest.raises(DatabaseError) as excinfo:
self.client._error_handler(response)

assert excinfo.value.code == 60
assert excinfo.value.name == "UNKNOWN_TABLE"

def test_error_handler_code_none_without_header(self):
"""Code is None when the server sends no exception header"""
response = create_mock_response(status=503, data=b"Service unavailable")

with pytest.raises(DatabaseError) as excinfo:
self.client._error_handler(response)

assert excinfo.value.code is None
assert excinfo.value.name is None

def test_error_handler_name_parsed_before_truncation(self):
"""name is parsed from the full body even when max_error_size truncates the message"""
long_body = "Code: 62. DB::Exception: " + ("x" * 400) + " (SYNTAX_ERROR) (version 26.2.4.23)"
response = create_mock_response(status=400, headers={ex_header: "62"}, data=long_body.encode())

original = common.get_setting("max_error_size")
common.set_setting("max_error_size", 100)
try:
with pytest.raises(DatabaseError) as excinfo:
self.client._error_handler(response)
finally:
common.set_setting("max_error_size", original)

assert excinfo.value.code == 62
assert excinfo.value.name == "SYNTAX_ERROR"
assert "SYNTAX_ERROR" not in str(excinfo.value) # truncated out of the displayed message

def test_error_handler_without_exception_code(self):
"""Test error handling when only HTTP status is available"""

Expand Down Expand Up @@ -261,6 +375,9 @@ def test_error_handler_with_errors_disabled(self):
assert "The ClickHouse server returned an error (for url http://localhost:8123)" in error_msg
assert "Invalid query" not in error_msg # Should not include the body
assert "99" not in error_msg # Should not include the exception code
# Numeric code is still exposed structurally, but the body-derived name is suppressed
assert excinfo.value.code == 99
assert excinfo.value.name is None
response.close.assert_called_once()

def test_error_handler_with_unicode_decode_error(self):
Expand Down