Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
### Improvements
- Build and publish Windows ARM64 `win_arm64` wheels for CPython 3.10 through 3.14, including the free-threaded 3.14 build. Closes [#785](https://github.com/ClickHouse/clickhouse-connect/issues/785).

### Bug Fixes
- Large query parameter payloads are now automatically sent as form data in the request body instead of the URL query string. Server-side bind parameters were urlencoded into the request URL, so a large `IN` list or a high-dimensional vector embedding could produce a URL that HTTP intermediaries such as nginx, AWS ALB, and CloudFront reject with HTTP 414. The client now routes parameters to the POST body once their encoded length passes a threshold, which keeps the URL small. Setting `form_encode_query_params=True` still forces form encoding for all queries. Queries using binary parameter binds are never promoted automatically and only use form encoding when the flag is set. This does not change the server's per-value size limit, which is governed by `http_max_field_value_size`. Applies to both sync and async clients. Closes [#740](https://github.com/ClickHouse/clickhouse-connect/issues/740).

## 1.2.0, 2026-06-08

### Improvements
Expand Down
14 changes: 8 additions & 6 deletions clickhouse_connect/driver/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,9 +184,10 @@ def create_client(
match the server's column definition which means timezone-aware when the column defines a timezone and naive
for bare DateTime columns.
:param autogenerate_session_id If set, this will override the 'autogenerate_session_id' common setting.
:param form_encode_query_params If True, query parameters will be sent as form-encoded data in the request body
instead of as URL parameters. This is useful for queries with large parameter sets that might exceed URL length
limits. Only available for query operations (not inserts). Default: False
:param form_encode_query_params If True, always send query parameters as form-encoded data in the request body
instead of as URL parameters. When False, large parameter payloads are still automatically sent as form data to
avoid exceeding URL length limits, except for queries using binary parameter binds, which are only form-encoded
when this is True. Only available for query operations (not inserts). Default: False
:return: ClickHouse Connect Client instance
"""
host, username, password, port, database, interface = _parse_connection_params(
Expand Down Expand Up @@ -320,9 +321,10 @@ async def create_async_client(
match the server's column definition which means timezone-aware when the column defines a timezone and naive
for bare DateTime columns.
:param autogenerate_session_id If set, this will override the 'autogenerate_session_id' common setting.
:param form_encode_query_params If True, query parameters will be sent as form-encoded data in the request body
instead of as URL parameters. This is useful for queries with large parameter sets that might exceed URL length
limits. Only available for query operations (not inserts). Default: False
:param form_encode_query_params If True, always send query parameters as form-encoded data in the request body
instead of as URL parameters. When False, large parameter payloads are still automatically sent as form data to
avoid exceeding URL length limits, except for queries using binary parameter binds, which are only form-encoded
when this is True. Only available for query operations (not inserts). Default: False
:return: ClickHouse Connect AsyncClient instance
"""
try:
Expand Down
12 changes: 7 additions & 5 deletions clickhouse_connect/driver/asyncclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from clickhouse_connect.datatypes.registry import get_from_name
from clickhouse_connect.driver import httputil, options, tzutil
from clickhouse_connect.driver.asyncqueue import EOF_SENTINEL, AsyncSyncQueue
from clickhouse_connect.driver.binding import bind_query, quote_identifier
from clickhouse_connect.driver.binding import bind_query, quote_identifier, use_form_encoding
from clickhouse_connect.driver.client import Client, _apply_arrow_tz_policy
from clickhouse_connect.driver.common import StreamContext, coerce_bool, dict_copy
from clickhouse_connect.driver.compression import available_compression
Expand Down Expand Up @@ -580,13 +580,14 @@ async def _query_with_context(self, context: QueryContext) -> QueryResult: # ty
context.block_info = True
params.update(self._validate_settings(context.settings))
context.rename_response_column = self._rename_response_column
use_form = use_form_encoding(context.final_query, context.bind_params, self.form_encode_query_params)

if not context.is_insert and columns_only_re.search(context.uncommented_query):
fmt_json_query = f"{context.final_query}\n FORMAT JSON"
fields = {"query": fmt_json_query}
fields.update(context.bind_params)

if self.form_encode_query_params:
if use_form:
files = {}
if context.external_data:
params.update(context.external_data.query_params)
Expand Down Expand Up @@ -645,7 +646,7 @@ def decompress_and_parse_json():
files = None
data = None

if self.form_encode_query_params:
if use_form:
fields = {"query": final_query}
fields.update(context.bind_params)

Expand Down Expand Up @@ -1125,10 +1126,11 @@ def _prep_raw_query(self, query, parameters, settings, fmt, use_database, extern
files = None
body = None

if external_data and not self.form_encode_query_params and isinstance(final_query, bytes):
use_form = use_form_encoding(final_query, bind_params, self.form_encode_query_params)
if external_data and not use_form and isinstance(final_query, bytes):
raise ProgrammingError("Binary query cannot be placed in URL when using External Data; enable form encoding.")

if self.form_encode_query_params:
if use_form:
files = {}
files["query"] = (None, final_query if isinstance(final_query, str) else final_query.decode())
for k, v in bind_params.items():
Expand Down
25 changes: 25 additions & 0 deletions clickhouse_connect/driver/binding.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from datetime import date, datetime, timezone, tzinfo
from enum import Enum
from typing import Any
from urllib.parse import quote, urlencode

from clickhouse_connect import common
from clickhouse_connect.driver import tzutil
Expand Down Expand Up @@ -192,6 +193,30 @@ def bind_query(
return query, bound_params


# Server-side bind parameters are urlencoded into the request URL. Once the encoded length
# passes this budget the client routes them through multipart form data instead, keeping
# oversized payloads out of the URL where proxies (nginx, ALB, CloudFront) reject them with
# HTTP 414. The threshold leaves ample headroom under common request line limits.
MAX_URL_BIND_PARAM_LENGTH = 4096


def use_form_encoding(query: str | bytes, bind_params: dict[str, str], force_form: bool = False) -> bool:
if force_form:
return True
# Binary binds embed bytes into the query, which the form path cannot round-trip; leave
# those on the default path unless form encoding is explicitly requested.
if isinstance(query, bytes):
return False
if not bind_params:
return False
# Raw length is a lower bound on the encoded length, so large payloads short-circuit
# without materializing the encoded string.
if sum(len(k) + len(str(v)) for k, v in bind_params.items()) > MAX_URL_BIND_PARAM_LENGTH:
Comment thread
joe-clickhouse marked this conversation as resolved.
return True
# Measure with quote so spaces count as %20, matching the longer of the two client encodings.
return len(urlencode(bind_params, quote_via=quote)) > MAX_URL_BIND_PARAM_LENGTH


def format_str(value: str):
return f"'{escape_str(value)}'"

Expand Down
12 changes: 7 additions & 5 deletions clickhouse_connect/driver/httpclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from clickhouse_connect import common
from clickhouse_connect.datatypes import registry
from clickhouse_connect.datatypes.base import ClickHouseType
from clickhouse_connect.driver.binding import bind_query, quote_identifier
from clickhouse_connect.driver.binding import bind_query, quote_identifier, use_form_encoding
from clickhouse_connect.driver.client import Client
from clickhouse_connect.driver.common import coerce_bool, coerce_int, dict_add, dict_copy
from clickhouse_connect.driver.compression import available_compression
Expand Down Expand Up @@ -268,10 +268,11 @@ def _query_with_context(self, context: QueryContext) -> QueryResult:
context.block_info = True
params.update(self._validate_settings(context.settings))
context.rename_response_column = self._rename_response_column
use_form = use_form_encoding(context.final_query, context.bind_params, self.form_encode_query_params)
if not context.is_insert and columns_only_re.search(context.uncommented_query):
# Mirror normal query behavior for form encoding and external data
fmt_json_query = f"{context.final_query}\n FORMAT JSON"
if self.form_encode_query_params:
if use_form:
fields = {"query": fmt_json_query}
fields.update(context.bind_params)
if context.external_data: # Deal with form encoding + external data
Expand Down Expand Up @@ -311,7 +312,7 @@ def _query_with_context(self, context: QueryContext) -> QueryResult:
final_query = self._prep_query(context)
fields = {}
# Setup additional query parameters and body
if self.form_encode_query_params:
if use_form:
body = b""
fields["query"] = final_query
fields.update(context.bind_params)
Expand Down Expand Up @@ -687,11 +688,12 @@ def _prep_raw_query(
if use_database and self.database:
params["database"] = self.database
fields = {}
use_form = use_form_encoding(final_query, bind_params, self.form_encode_query_params)
# Setup query body
if external_data and not self.form_encode_query_params and isinstance(final_query, bytes):
if external_data and not use_form and isinstance(final_query, bytes):
raise ProgrammingError("Binary query cannot be placed in URL when using External Data; enable form encoding.")
# Setup additional query parameters and body
if self.form_encode_query_params:
if use_form:
body = b""
fields["query"] = final_query
fields.update(bind_params)
Expand Down
33 changes: 33 additions & 0 deletions tests/integration_tests/test_form_encode_query.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from collections.abc import Callable

from clickhouse_connect.driver import Client
from clickhouse_connect.driver.binding import bind_query, use_form_encoding


def test_form_encode_query_basic(client_factory, call, table_context: Callable):
Expand Down Expand Up @@ -126,3 +127,35 @@ def test_form_encode_schema_probe_query(client_factory, call, table_context: Cal
assert result.column_names == ("id", "name", "adjusted_value")
assert len(result.column_types) == 3
assert len(result.result_set) == 0


def test_auto_form_encode_large_params(client_factory, call):
"""Large parameter payloads are auto-promoted to form data without enabling the flag"""
default_client = client_factory()
assert default_client.form_encode_query_params is False

ids = list(range(3000))
query = "SELECT 1 WHERE 0 IN {ids:Array(UInt64)}"
final_query, bind_params = bind_query(query, {"ids": ids})
# Payload exceeds the URL budget, so the default client must route it through the body
assert use_form_encoding(final_query, bind_params) is True

result = call(
default_client.query,
"SELECT count() FROM numbers(5000) WHERE number IN {ids:Array(UInt64)}",
parameters={"ids": ids},
)
assert result.first_row[0] == 3000


def test_auto_form_encode_raw_query_large_params(client_factory, call):
"""raw_query auto-promotes large parameter payloads on a default client"""
default_client = client_factory()

ids = list(range(3000))
result = call(
default_client.raw_query,
"SELECT count() FROM numbers(5000) WHERE number IN {ids:Array(UInt64)}",
parameters={"ids": ids},
)
assert b"3000" in result
39 changes: 38 additions & 1 deletion tests/unit_tests/test_driver/test_binding.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest

from clickhouse_connect.driver.binding import quote_identifier
from clickhouse_connect.driver.binding import MAX_URL_BIND_PARAM_LENGTH, quote_identifier, use_form_encoding


@pytest.mark.parametrize(
Expand Down Expand Up @@ -41,3 +41,40 @@ def test_quote_identifier_valid_prequoted_passthrough(identifier):
)
def test_quote_identifier_invalid_prequoted_escaped_as_raw(identifier, expected):
assert quote_identifier(identifier) == expected


def test_use_form_encoding_empty():
assert use_form_encoding("SELECT 1", {}) is False
assert use_form_encoding("SELECT 1", {}, force_form=True) is True


def test_use_form_encoding_force():
assert use_form_encoding("SELECT {id:UInt32}", {"param_id": "1"}, force_form=True) is True


def test_use_form_encoding_small_params_stay_in_url():
assert use_form_encoding("SELECT 1", {"param_id": "123", "param_name": "abc"}) is False


def test_use_form_encoding_large_params_promote():
big = {"param_big": "x" * (MAX_URL_BIND_PARAM_LENGTH + 1)}
assert use_form_encoding("SELECT {big:String}", big) is True


def test_use_form_encoding_total_across_params():
# Many individually small params whose combined encoded length exceeds the budget
params = {f"param_{i}": "v" * 200 for i in range(40)}
assert use_form_encoding("SELECT 1", params) is True


def test_use_form_encoding_percent_expansion_promotes():
# Raw length is under the budget but percent-encoding expands each space to %20
params = {"param_s": " " * (MAX_URL_BIND_PARAM_LENGTH // 2)}
assert use_form_encoding("SELECT 1", params) is True


def test_use_form_encoding_binary_query_not_promoted():
# Binary binds make the query bytes; auto-promotion must not kick in unless forced
big = {"param_big": "x" * (MAX_URL_BIND_PARAM_LENGTH + 1)}
assert use_form_encoding(b"SELECT \xff", big) is False
assert use_form_encoding(b"SELECT \xff", big, force_form=True) is True
37 changes: 37 additions & 0 deletions tests/unit_tests/test_driver/test_httpclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,25 @@ def test_raw_query_with_form_encode(self, mock_raw_request):
assert "param_id" in fields
assert "param_id" not in params

@patch.object(HttpClient, "_raw_request")
def test_raw_query_auto_form_encode_large_params(self, mock_raw_request):
"""Large bind params auto-promote to form data even with form_encode_query_params=False"""
self.client.form_encode_query_params = False

mock_response = Mock()
mock_response.data = b"test_result"
mock_raw_request.return_value = mock_response

query = "SELECT * FROM table WHERE name = {name:String}"
parameters = {"name": "x" * 5000}

self.client.raw_query(query, parameters=parameters)

body, params, fields = self.extract_raw_request_params(mock_raw_request)
assert body == b""
assert "param_name" in fields
assert "param_name" not in params

@patch.object(HttpClient, "_raw_request")
def test_raw_query_with_external_data_only(self, mock_raw_request):
"""Test raw_query with external_data only (no form_encode)"""
Expand Down Expand Up @@ -671,6 +690,24 @@ def test_query_with_context_form_encode(self, mock_raw_request):
assert "param_id" in fields
assert "param_id" not in params

@patch.object(HttpClient, "_raw_request")
def test_query_with_context_auto_form_encode_large_params(self, mock_raw_request):
"""Large bind params auto-promote to form data even with form_encode_query_params=False"""
self.client.form_encode_query_params = False

mock_raw_request.return_value = self.setup_mock_raw_request()
self.client._transform = Mock()
self.client._transform.parse_response.return_value = Mock(summary=None)

context = self.create_mock_query_context(query="SELECT * FROM table WHERE name = 'x'", bind_params={"param_name": "x" * 5000})

self.client._query_with_context(context)

body, params, fields = self.extract_raw_request_params(mock_raw_request)
assert body == b""
assert "param_name" in fields
assert "param_name" not in params

@patch.object(HttpClient, "_raw_request")
def test_query_with_context_external_data(self, mock_raw_request):
"""Test _query_with_context with external_data only"""
Expand Down