From cb4f34662c88c31cfe99f1622ad06ac110473a46 Mon Sep 17 00:00:00 2001 From: Petr Date: Thu, 4 Jun 2026 22:24:16 +0200 Subject: [PATCH 1/3] feat(workspace): BigQuery support for `workspace query` (0.58.0) The Query Service backend now runs BigQuery (as keboola-mcp-server already uses). The query execution path was always backend-agnostic -- submit + CSV export are identical for Snowflake and BigQuery -- so a SELECT against a BigQuery workspace already returned rows. The gaps were classification and error legibility: - qs_compatible is now keyed by (backend, loginType). BigQuery workspaces carry loginType `default`, which IS Query-Service-compatible, but `default` was off the Snowflake-only whitelist, so every BigQuery workspace was reported qs_compatible=false and hidden by `workspace list --qs-compatible`. New QUERY_SERVICE_COMPATIBLE_LOGIN_TYPES_BIGQUERY whitelist is kept separate because Snowflake's legacy `default` is rejected ('JWT token is invalid') -- the same string means compatible for BigQuery, incompatible for Snowflake. - workspace create on BigQuery now requests loginType `default` explicitly (matches keboola-mcp-server) instead of relying on the backend default. - BigQuery query errors are unwrapped: the Query Service serializes them as {Location: ...; Message: "..."; Reason: ...}; _unwrap_bigquery_error extracts the inner Message so the error reads like Snowflake plain text. Verified live against project 9621 (e2e-bigquery, connection.keboola.com): create/list/detail/load/query/delete, real-data query, qs_compatible surfacing, and clean error messages. Tests: TestBigQueryQueryServiceSupport, TestUnwrapBigQueryError + a BigQuery case in TestExtractQueryJobError; test_e2e workspace query is now backend-aware (BigQuery back-tick vs Snowflake double-quote quoting). --- .claude-plugin/marketplace.json | 2 +- plugins/kbagent/.claude-plugin/plugin.json | 2 +- .../skills/kbagent/references/gotchas.md | 17 +++- .../kbagent/references/workspace-workflow.md | 2 +- pyproject.toml | 2 +- src/keboola_agent_cli/changelog.py | 31 ++++++ src/keboola_agent_cli/client.py | 35 ++++++- src/keboola_agent_cli/constants.py | 21 ++++ .../services/workspace_service.py | 44 ++++++--- tests/test_client.py | 65 +++++++++++++ tests/test_e2e.py | 22 ++++- tests/test_workspace_service.py | 96 ++++++++++++++++++- uv.lock | 2 +- 13 files changed, 311 insertions(+), 30 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 81c16bcb..65f6cb2c 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -10,7 +10,7 @@ "plugins": [ { "name": "kbagent", - "version": "0.57.0", + "version": "0.58.0", "source": "./plugins/kbagent", "description": "AI-friendly interface to Keboola Connection projects — explore configs, jobs, lineage, call MCP tools, manage dev branches, and debug SQL in workspaces", "category": "development" diff --git a/plugins/kbagent/.claude-plugin/plugin.json b/plugins/kbagent/.claude-plugin/plugin.json index f7884767..6a9fb2c7 100644 --- a/plugins/kbagent/.claude-plugin/plugin.json +++ b/plugins/kbagent/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "kbagent", - "version": "0.57.0", + "version": "0.58.0", "description": "AI-friendly interface to Keboola Connection projects — explore configs, jobs, lineage, call MCP tools, manage dev branches, and debug SQL in workspaces", "author": { "name": "Keboola", diff --git a/plugins/kbagent/skills/kbagent/references/gotchas.md b/plugins/kbagent/skills/kbagent/references/gotchas.md index fa04d5d5..c2b8e511 100644 --- a/plugins/kbagent/skills/kbagent/references/gotchas.md +++ b/plugins/kbagent/skills/kbagent/references/gotchas.md @@ -260,7 +260,10 @@ plus a derived `qs_compatible: bool`. } ``` -**Compatibility whitelist (`constants.QUERY_SERVICE_COMPATIBLE_LOGIN_TYPES`):** +**Compatibility is keyed by (backend, loginType) -- since v0.58.0.** The same +`default` string means opposite things per backend, so there are two whitelists. + +Snowflake (`constants.QUERY_SERVICE_COMPATIBLE_LOGIN_TYPES`): - `snowflake-service-keypair` -- confirmed PASS - `snowflake-person-sso` -- confirmed PASS @@ -268,9 +271,19 @@ plus a derived `qs_compatible: bool`. - `snowflake-legacy-service` -- explicitly OFF the list (works on `connection.keboola.com` but FAILED on GCP us-east4 stack in the original #304 incident -- keep it off until cross-stack confirmation) -- `default` (legacy 2016 workspaces) -- confirmed FAIL +- `default` on Snowflake (legacy 2016 workspaces) -- confirmed FAIL (`JWT token is invalid`) +BigQuery (`constants.QUERY_SERVICE_COMPATIBLE_LOGIN_TYPES_BIGQUERY`): + +- `default` on BigQuery -- confirmed PASS (since v0.58.0). Every BigQuery + workspace carries loginType `default` (the sandbox API exposes no + Snowflake-style variants for BigQuery), and the Query Service runs SELECTs + against it -- verified live against project 9621 on `connection.keboola.com`. + Before v0.58.0, kbagent's whitelist was Snowflake-only, so BigQuery + workspaces were mislabeled `qs_compatible: false` and hidden by + `workspace list --qs-compatible`, even though `workspace query` worked. + `qs_compatible: false` does NOT mean "broken"; it means "not on the confirmed-good whitelist". For an unknown loginType, `workspace list` renders it as `?` (yellow) in the QS column so callers know the policy diff --git a/plugins/kbagent/skills/kbagent/references/workspace-workflow.md b/plugins/kbagent/skills/kbagent/references/workspace-workflow.md index 521fac97..23448827 100644 --- a/plugins/kbagent/skills/kbagent/references/workspace-workflow.md +++ b/plugins/kbagent/skills/kbagent/references/workspace-workflow.md @@ -149,7 +149,7 @@ workspace. - **Snowflake**: converts unquoted identifiers to UPPERCASE. Always double-quote database, schema, and table names -- Keboola names are typically lowercase (e.g. `"sapi_901"."in.c-main"."users"`). - **BigQuery**: requires backticks (`` ` ``), not double quotes; the dataset name is normalized to underscores (e.g. `` `in_c_main`.`users` ``). - Easiest path: read `tables[].sql_path` from `bucket-detail` -- it is already correctly quoted for the bucket's backend (since v0.25.3). -- **Query Service**: uses Storage API token for auth -- no Snowflake credentials needed in the query command +- **Query Service**: uses Storage API token for auth -- no warehouse credentials needed in the query command. Backend-agnostic: runs SELECTs against **both Snowflake and BigQuery** workspaces (BigQuery since v0.58.0; the path was always identical, the gap was classification). BigQuery workspaces carry `login_type: "default"` and are `qs_compatible: true` from v0.58.0 -- earlier versions mislabeled them `false`. - **Transactional mode**: add `--transactional` to wrap SQL in a transaction ## Orphan detection + garbage collection (since v0.22.0) diff --git a/pyproject.toml b/pyproject.toml index 805b95a4..1c70639a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "keboola-agent-cli" -version = "0.57.0" +version = "0.58.0" description = "AI-friendly CLI for managing Keboola projects" readme = "README.md" requires-python = ">=3.12" diff --git a/src/keboola_agent_cli/changelog.py b/src/keboola_agent_cli/changelog.py index 2e9015af..4758c549 100644 --- a/src/keboola_agent_cli/changelog.py +++ b/src/keboola_agent_cli/changelog.py @@ -24,6 +24,37 @@ # Ordered newest-first. Each value is a list of brief one-line descriptions. CHANGELOG: dict[str, list[str]] = { + "0.58.0": [ + "BigQuery support for `kbagent workspace query` (the Query Service backend now runs BigQuery, " + "as keboola-mcp-server already uses). The query *execution* path was always backend-agnostic -- " + "`POST /api/v1/branches/{b}/workspaces/{w}/queries` plus the CSV export are identical for " + "Snowflake and BigQuery -- so a `SELECT` against a BigQuery workspace already returned rows. The " + "gaps were classification and error legibility, fixed here (verified live against project 9621 / " + "e2e-bigquery on connection.keboola.com). (1) `qs_compatible` is now keyed by (backend, " + "loginType), not loginType alone. BigQuery workspaces carry loginType `default`, which IS " + "Query-Service-compatible, but `default` was OFF the Snowflake-only whitelist, so `workspace " + "list`/`detail` reported every BigQuery workspace as `qs_compatible: false` and `workspace list " + "--qs-compatible` hid them all -- misleading data-app developers into thinking BigQuery can't run " + "Query-Service SELECTs. New `QUERY_SERVICE_COMPATIBLE_LOGIN_TYPES_BIGQUERY` whitelist ({`default`}) " + "is kept deliberately separate from the Snowflake set because Snowflake ALSO mints a `default` " + "loginType (legacy 2016 workspaces) that the Query Service REJECTS ('JWT token is invalid') -- so " + "the same string means compatible for BigQuery and incompatible for Snowflake. (2) `workspace " + "create` on a BigQuery project now requests loginType `default` explicitly (the only BigQuery " + "loginType; matches keboola-mcp-server) instead of omitting it and relying on the backend default. " + "(3) BigQuery query errors are unwrapped: the Query Service serializes a failed BigQuery statement " + 'as `{Location: "query"; Message: "Syntax error: ..."; Reason: "invalidQuery"}`; the new ' + "`_unwrap_bigquery_error` extracts the inner `Message` so the red error box reads like Snowflake " + "plain text (Snowflake errors have no wrapper and pass through untouched). Layers: backend-aware " + "`_classify_qs_compatibility(login_type, backend)` + `_workspace_login_type_for_backend` " + "(`services/workspace_service.py`); `_unwrap_bigquery_error` + `_BQ_ERROR_MESSAGE_RE` " + "(`client.py`); two new constants (`constants.py`). Tests: " + "`test_workspace_service.py::TestBigQueryQueryServiceSupport` (backend-aware classification, " + "Snowflake-default regression guard, BigQuery login-type, qs-compatible filter) + updated " + "`TestAutoDetectBackend` BigQuery create asserts; `test_client.py::TestUnwrapBigQueryError` + a " + "BigQuery case in `TestExtractQueryJobError`; `test_e2e.py` workspace query is now backend-aware " + "(BigQuery back-tick vs Snowflake double-quote identifier quoting) and `detail` asserts " + "login_type/qs_compatible.", + ], "0.57.0": [ "BREAKING (flow / conditional flows): the `flow` command group now targets " "conditional flows (`keboola.flow`) ONLY; `keboola.orchestrator` support is " diff --git a/src/keboola_agent_cli/client.py b/src/keboola_agent_cli/client.py index 2504310c..1cef3682 100644 --- a/src/keboola_agent_cli/client.py +++ b/src/keboola_agent_cli/client.py @@ -9,6 +9,7 @@ import json import logging +import re import time from collections.abc import Iterator from pathlib import Path @@ -2743,6 +2744,25 @@ def wait_for_query_job(self, query_job_id: str) -> dict[str, Any]: ) +# The Query Service surfaces BigQuery errors as a serialized object string, e.g. +# {Location: "query"; Message: "Syntax error: Unexpected identifier ..."; Reason: "invalidQuery"} +# Pull out the human-readable `Message: "..."` part so a BigQuery failure reads +# like Snowflake's plain text instead of leaking the wrapper into the user's red +# error box. Mirrors keboola-mcp-server's `_BigQueryWorkspace._format_error_message`. +_BQ_ERROR_MESSAGE_RE = re.compile(r'Message:\s*"((?:[^"\\]|\\.)*)"') + + +def _unwrap_bigquery_error(message: str) -> str: + """Extract the inner message from a serialized BigQuery Query-Service error. + + Snowflake errors are plain strings with no ``Message: "..."`` wrapper, so + they pass through unchanged. Only the BigQuery object shape is rewritten. + """ + if message and (match := _BQ_ERROR_MESSAGE_RE.search(message)): + return match.group(1).replace('\\"', '"') + return message + + def _extract_query_job_error(job: dict[str, Any]) -> str: """Pull the most useful warehouse error message out of a failed Query Service job. @@ -2772,14 +2792,19 @@ def _extract_query_job_error(job: dict[str, Any]) -> str: def _as_text(err: Any) -> str: if isinstance(err, str): - return err.strip() - if isinstance(err, dict): + raw = err.strip() + elif isinstance(err, dict): + raw = "" for key in ("message", "error", "detail"): val = err.get(key) if isinstance(val, str) and val.strip(): - return val.strip() - return "" - return str(err).strip() if err is not None else "" + raw = val.strip() + break + else: + raw = str(err).strip() if err is not None else "" + # BigQuery wraps the real message in a serialized object; Snowflake plain + # text passes through untouched. + return _unwrap_bigquery_error(raw) statement_errors: list[str] = [] for i, stmt in enumerate(job.get("statements") or []): diff --git a/src/keboola_agent_cli/constants.py b/src/keboola_agent_cli/constants.py index 6c3680b0..288c0fbe 100644 --- a/src/keboola_agent_cli/constants.py +++ b/src/keboola_agent_cli/constants.py @@ -368,6 +368,11 @@ # default (legacy 2016 ws): FAIL ('JWT token is invalid') # # Extend ONLY after empirical confirmation across at least one non-AWS stack. +# +# This whitelist is SNOWFLAKE-SCOPED. BigQuery compatibility lives in its own +# set below because the `default` loginType means opposite things per backend +# (see BIGQUERY_WORKSPACE_LOGIN_TYPE). Compatibility is therefore keyed by +# (backend, loginType) -- see `_classify_qs_compatibility`. SNOWFLAKE_WORKSPACE_LOGIN_TYPE: str = "snowflake-person-keypair" QUERY_SERVICE_COMPATIBLE_LOGIN_TYPES: frozenset[str] = frozenset( { @@ -377,6 +382,22 @@ } ) +# --- BigQuery Query Service compatibility (since v0.58.0) --- +# BigQuery workspaces carry a single `default` loginType -- the sandbox API does +# not expose Snowflake-style variants for BigQuery. The Query Service accepts it: +# verified 2026-06-04 against project 9621 on connection.keboola.com, where a +# `SELECT` against a read-only `default` BigQuery workspace returns rows. +# +# CRITICAL: `default` is on the BigQuery whitelist but deliberately OFF the +# Snowflake one above. Snowflake ALSO mints a `default` loginType (legacy 2016 +# workspaces) which the Query Service REJECTS ('JWT token is invalid'). Keying +# compatibility on loginType alone would wrongly green-light those legacy +# Snowflake workspaces, so `_classify_qs_compatibility` dispatches on backend. +BIGQUERY_WORKSPACE_LOGIN_TYPE: str = "default" +QUERY_SERVICE_COMPATIBLE_LOGIN_TYPES_BIGQUERY: frozenset[str] = frozenset( + {BIGQUERY_WORKSPACE_LOGIN_TYPE} +) + # --- Permission Exit Code --- EXIT_PERMISSION_DENIED: int = 6 # --- Job-timeout Exit Code --- diff --git a/src/keboola_agent_cli/services/workspace_service.py b/src/keboola_agent_cli/services/workspace_service.py index 39dec856..78122766 100644 --- a/src/keboola_agent_cli/services/workspace_service.py +++ b/src/keboola_agent_cli/services/workspace_service.py @@ -12,7 +12,12 @@ from cryptography.hazmat.primitives import serialization from cryptography.hazmat.primitives.asymmetric import rsa -from ..constants import QUERY_SERVICE_COMPATIBLE_LOGIN_TYPES, SNOWFLAKE_WORKSPACE_LOGIN_TYPE +from ..constants import ( + BIGQUERY_WORKSPACE_LOGIN_TYPE, + QUERY_SERVICE_COMPATIBLE_LOGIN_TYPES, + QUERY_SERVICE_COMPATIBLE_LOGIN_TYPES_BIGQUERY, + SNOWFLAKE_WORKSPACE_LOGIN_TYPE, +) from ..errors import ConfigError, ErrorCode, KeboolaApiError from ..models import ProjectConfig from .base import BaseService @@ -28,22 +33,37 @@ class SnowflakeWorkspaceKeyPair: public_pem: str -def _classify_qs_compatibility(login_type: str) -> bool: - """Map a Storage API workspace ``connection.loginType`` to Query-Service compat. +def _classify_qs_compatibility(login_type: str, backend: str) -> bool: + """Map a workspace ``(loginType, backend)`` pair to Query-Service compat. + + Compatibility is keyed by BOTH backend and loginType because the same + ``default`` string means opposite things per backend: a BigQuery workspace's + ``default`` loginType IS Query-Service-compatible (verified against project + 9621 on connection.keboola.com), whereas a Snowflake legacy ``default`` + workspace is NOT ('JWT token is invalid'). See the two whitelists in + ``constants`` for the empirical rationale. Conservative whitelist semantics: returns True only for ``loginType``s confirmed to work with POST /v2/storage/branch/{ID}/workspaces/{WS}/query. - See ``constants.QUERY_SERVICE_COMPATIBLE_LOGIN_TYPES`` for the rationale - behind why ``snowflake-legacy-service`` (issue #304) stays off the list - even though it works on some stacks. + Unknown backends fall through to the Snowflake whitelist (false negatives + over false positives). """ + if backend.lower() == "bigquery": + return login_type in QUERY_SERVICE_COMPATIBLE_LOGIN_TYPES_BIGQUERY return login_type in QUERY_SERVICE_COMPATIBLE_LOGIN_TYPES def _workspace_login_type_for_backend(backend: str) -> str | None: """Return the loginType kbagent should request for newly created workspaces.""" - if backend.lower() == "snowflake": + normalized = backend.lower() + if normalized == "snowflake": return SNOWFLAKE_WORKSPACE_LOGIN_TYPE + if normalized == "bigquery": + # BigQuery's Query-Service-compatible loginType. Omitting it lets the + # backend default to the same value, but requesting it explicitly keeps + # parity with keboola-mcp-server and is robust to a server-side change + # of the implicit default. + return BIGQUERY_WORKSPACE_LOGIN_TYPE return None @@ -478,12 +498,13 @@ def worker( config_id = ws.get("configurationId") or "" component_id = ws.get("component") or "" login_type = connection.get("loginType", "") or "" + backend = connection.get("backend", "") or "" read_only = bool(ws.get("readOnlyStorageAccess", False)) entry = { "project_alias": alias, "id": ws.get("id"), "name": config_names.get(str(config_id), ws.get("name", "")), - "backend": connection.get("backend", ""), + "backend": backend, "host": connection.get("host", ""), "database": connection.get("database", ""), "warehouse": connection.get("warehouse", ""), @@ -494,7 +515,7 @@ def worker( "config_id": config_id, "login_type": login_type, "read_only": read_only, - "qs_compatible": _classify_qs_compatibility(login_type), + "qs_compatible": _classify_qs_compatibility(login_type, backend), } if orphaned_only: if _is_orphaned_workspace(entry, config_names): @@ -647,10 +668,11 @@ def get_workspace( connection = ws_data.get("connection", {}) login_type = connection.get("loginType", "") or "" + backend = connection.get("backend", "") or "" return { "project_alias": alias, "workspace_id": ws_data.get("id"), - "backend": connection.get("backend", ""), + "backend": backend, "host": connection.get("host", ""), "warehouse": connection.get("warehouse", ""), "database": connection.get("database", ""), @@ -659,7 +681,7 @@ def get_workspace( "created": ws_data.get("created", ""), "login_type": login_type, "read_only": bool(ws_data.get("readOnlyStorageAccess", False)), - "qs_compatible": _classify_qs_compatibility(login_type), + "qs_compatible": _classify_qs_compatibility(login_type, backend), "component_id": ws_data.get("component", "") or "", "config_id": ws_data.get("configurationId", "") or "", } diff --git a/tests/test_client.py b/tests/test_client.py index 71ef7c89..2afc7208 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -3010,6 +3010,71 @@ def test_returns_explicit_fallback_when_nothing_is_set(self) -> None: extract = self._import() assert extract({"status": "failed"}).startswith("Query execution failed (no error details") + def test_unwraps_bigquery_serialized_error_object(self) -> None: + """BigQuery surfaces failures as a serialized object string; the helper + extracts the inner Message so the user's red error box reads like + Snowflake plain text instead of leaking the wrapper. + + Pinned against the real shape from project 9621 (connection.keboola.com): + ``{Location: "query"; Message: "Syntax error: ..."; Reason: "invalidQuery"}``. + """ + extract = self._import() + job = { + "status": "failed", + "statements": [ + { + "status": "failed", + "error": ( + '{Location: "query"; Message: "Syntax error: SELECT list ' + 'must not be empty at [1:8]"; Reason: "invalidQuery"}' + ), + } + ], + } + msg = extract(job) + assert msg == "Syntax error: SELECT list must not be empty at [1:8]" + assert "Location" not in msg + assert "Reason" not in msg + + +class TestUnwrapBigQueryError: + """Tests for _unwrap_bigquery_error -- pulls the inner ``Message: "..."`` out + of a serialized BigQuery Query-Service error; Snowflake plain text passes + through untouched. Added in v0.58.0 alongside BigQuery query support. + """ + + def _import(self): + from keboola_agent_cli.client import _unwrap_bigquery_error + + return _unwrap_bigquery_error + + def test_extracts_inner_message(self) -> None: + unwrap = self._import() + raw = ( + '{Location: ""; Message: "Not found: Table sapi-9621:WORKSPACE_1.t was ' + 'not found in location US"; Reason: "notFound"}' + ) + assert ( + unwrap(raw) == "Not found: Table sapi-9621:WORKSPACE_1.t was not found in location US" + ) + + def test_snowflake_plain_text_passes_through(self) -> None: + """No ``Message: "..."`` wrapper -> returned verbatim.""" + unwrap = self._import() + raw = ( + "SQL compilation error:\nFunction DATE_TRUNC does not support VARCHAR(10) argument type" + ) + assert unwrap(raw) == raw + + def test_handles_escaped_quotes_in_message(self) -> None: + unwrap = self._import() + raw = '{Message: "Unrecognized name: \\"weird\\" column"; Reason: "invalidQuery"}' + assert unwrap(raw) == 'Unrecognized name: "weird" column' + + def test_empty_string_passes_through(self) -> None: + unwrap = self._import() + assert unwrap("") == "" + class TestWaitForQueueJob: """Tests for wait_for_queue_job -- strategy dispatch, deadline, failure.""" diff --git a/tests/test_e2e.py b/tests/test_e2e.py index 4902a480..26398da6 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -1893,6 +1893,9 @@ def _test_workspace_detail(self, workspace_id: int) -> None: ) detail = data["data"] assert detail["workspace_id"] == workspace_id + # Issue #304 / BigQuery support: detail surfaces login_type + qs_compatible. + assert "login_type" in detail + assert "qs_compatible" in detail def _test_workspace_password(self, workspace_id: int) -> None: """Reset workspace password and verify a new password is returned. @@ -1931,10 +1934,25 @@ def _test_workspace_load(self, workspace_id: int, table_id: str) -> None: assert data["status"] == "ok" def _test_workspace_query(self, workspace_id: int, table_id: str) -> None: - """Run a SQL query in the workspace and verify result.""" + """Run a SQL query in the workspace and verify result. + + Identifier quoting is backend-specific: Snowflake uses double quotes, + BigQuery uses back-ticks. Running this E2E against the BigQuery project + (e2e-bigquery, #379+) with Snowflake quoting would fail because BigQuery + reads ``"name"`` as a string literal, not an identifier. + """ # Table name in workspace is the last segment of table_id ws_table_name = table_id.rsplit(".", 1)[-1] - sql = f'SELECT COUNT(*) AS cnt FROM "{ws_table_name}"' + detail = self._run_ok( + "workspace", + "detail", + "--project", + self.alias, + "--workspace-id", + str(workspace_id), + )["data"] + quote = "`" if detail.get("backend") == "bigquery" else '"' + sql = f"SELECT COUNT(*) AS cnt FROM {quote}{ws_table_name}{quote}" data = self._run_ok( "workspace", "query", diff --git a/tests/test_workspace_service.py b/tests/test_workspace_service.py index d9272bd5..dbfa02cb 100644 --- a/tests/test_workspace_service.py +++ b/tests/test_workspace_service.py @@ -258,8 +258,13 @@ def test_create_workspace_snowflake_uses_person_keypair_login_type( assert call_kwargs["login_type"] == "snowflake-person-keypair" assert call_kwargs["public_key"].startswith("-----BEGIN PUBLIC KEY-----") - def test_create_workspace_bigquery_keeps_default_login_type(self, tmp_config_dir: Path) -> None: - """BigQuery sandbox workspaces omit loginType so Storage uses its default.""" + def test_create_workspace_bigquery_requests_default_login_type( + self, tmp_config_dir: Path + ) -> None: + """BigQuery sandbox workspaces request loginType ``default`` -- the only + BigQuery loginType and the one the Query Service accepts (since v0.58.0, + matching keboola-mcp-server). No key pair: BigQuery uses service-account + credentials, not RSA keys.""" mock_client = MagicMock() mock_client.verify_token.return_value = SAMPLE_TOKEN_VERIFY_BIGQUERY mock_client.list_dev_branches.return_value = [{"id": 123, "isDefault": True}] @@ -286,7 +291,7 @@ def test_create_workspace_bigquery_keeps_default_login_type(self, tmp_config_dir component_id="keboola.sandboxes", config_id="cfg-1", backend="bigquery", - login_type=None, + login_type="default", public_key=None, ) @@ -349,7 +354,7 @@ def test_create_workspace_auto_detects_bigquery(self, tmp_config_dir: Path) -> N component_id="keboola.sandboxes", config_id="cfg-1", backend="bigquery", - login_type=None, + login_type="default", public_key=None, ) @@ -416,7 +421,7 @@ def test_from_transformation_auto_detects_bigquery(self, tmp_config_dir: Path) - component_id="keboola.snowflake-transformation", config_id="456", backend="bigquery", - login_type=None, + login_type="default", public_key=None, ) @@ -1678,3 +1683,84 @@ def test_detail_exposes_login_type_and_qs_compatible(self, tmp_config_dir: Path) assert result["qs_compatible"] is True assert result["component_id"] == "keboola.sandboxes" assert result["config_id"] == "cfg-42" + + +class TestBigQueryQueryServiceSupport: + """BigQuery Query-Service compatibility added in v0.58.0. + + Verified live against project 9621 on connection.keboola.com: BigQuery + workspaces carry loginType ``default`` and the Query Service runs SELECTs + against them. The fix makes qs_compatibility backend-aware so BigQuery's + ``default`` is whitelisted while Snowflake's legacy ``default`` stays off. + """ + + def test_classify_bigquery_default_is_compatible(self) -> None: + from keboola_agent_cli.services.workspace_service import _classify_qs_compatibility + + assert _classify_qs_compatibility("default", "bigquery") is True + # Backend match is case-insensitive. + assert _classify_qs_compatibility("default", "BigQuery") is True + + def test_classify_snowflake_default_stays_incompatible(self) -> None: + """Regression guard: Snowflake legacy ``default`` must NOT inherit the + BigQuery whitelist -- it is rejected with 'JWT token is invalid'.""" + from keboola_agent_cli.services.workspace_service import _classify_qs_compatibility + + assert _classify_qs_compatibility("default", "snowflake") is False + + def test_classify_bigquery_rejects_snowflake_login_types(self) -> None: + """Backends do not share login types: a Snowflake loginType is not on + the BigQuery whitelist.""" + from keboola_agent_cli.services.workspace_service import _classify_qs_compatibility + + assert _classify_qs_compatibility("snowflake-person-sso", "bigquery") is False + + def test_login_type_for_bigquery_backend_is_default(self) -> None: + from keboola_agent_cli.services.workspace_service import ( + _workspace_login_type_for_backend, + ) + + assert _workspace_login_type_for_backend("bigquery") == "default" + assert _workspace_login_type_for_backend("BigQuery") == "default" + + def test_login_type_for_snowflake_and_unknown_backend(self) -> None: + from keboola_agent_cli.services.workspace_service import ( + _workspace_login_type_for_backend, + ) + + assert _workspace_login_type_for_backend("snowflake") == "snowflake-person-keypair" + assert _workspace_login_type_for_backend("exasol") is None + + def test_list_marks_bigquery_default_workspace_compatible(self, tmp_config_dir: Path) -> None: + """End-to-end through the service: a BigQuery ``default`` RO workspace + is qs_compatible (mirrors the real project-9621 shape).""" + mock_client = MagicMock() + mock_client.list_dev_branches.return_value = [{"id": 1, "isDefault": True}] + mock_client.list_workspaces.return_value = [ + { + "id": 7, + "name": "bq-ro", + "connection": { + "backend": "bigquery", + "schema": "WORKSPACE_7", + "user": '{"type":"service_account","project_id":"sapi-9621"}', + "loginType": "default", + }, + "readOnlyStorageAccess": True, + "component": "keboola.sandboxes", + "configurationId": "cfg-7", + }, + ] + mock_client.list_component_configs.return_value = [] + + store = setup_single_project(tmp_config_dir) + svc = WorkspaceService( + config_store=store, + client_factory=lambda url, token: mock_client, + ) + + result = svc.list_workspaces(aliases=["prod"], qs_compatible_only=True) + + ids = [w["id"] for w in result["workspaces"]] + assert ids == [7] + assert result["workspaces"][0]["qs_compatible"] is True diff --git a/uv.lock b/uv.lock index 6f6596f2..b5617c2a 100644 --- a/uv.lock +++ b/uv.lock @@ -580,7 +580,7 @@ wheels = [ [[package]] name = "keboola-agent-cli" -version = "0.57.0" +version = "0.58.0" source = { editable = "." } dependencies = [ { name = "croniter" }, From 4126aa853ae53e816cab886b5bfedfa43146cf72 Mon Sep 17 00:00:00 2001 From: Petr Date: Fri, 5 Jun 2026 01:02:32 +0200 Subject: [PATCH 2/3] docs(workspace): note BigQuery qs_compatible + backend-agnostic query (review NB-1) Addresses the kbagent-pr-reviewer NB-1 finding: the commands-reference.md workspace list/detail/query entries documented the #304 qs_compatible fields but not the v0.58.0 backend-aware change. Without it, an agent on an older install hitting a BigQuery project would get zero `--qs-compatible` results with no explanation. - workspace list / detail: note qs_compatible is keyed by (backend, loginType) and BigQuery `default` workspaces are now qs_compatible=true (Snowflake's own legacy `default` stays false). - workspace query: note it is backend-agnostic since v0.58.0 and the Snowflake double-quote vs BigQuery back-tick dialect difference. --- .../kbagent/skills/kbagent/references/commands-reference.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/plugins/kbagent/skills/kbagent/references/commands-reference.md b/plugins/kbagent/skills/kbagent/references/commands-reference.md index 486d3a70..e24da331 100644 --- a/plugins/kbagent/skills/kbagent/references/commands-reference.md +++ b/plugins/kbagent/skills/kbagent/references/commands-reference.md @@ -164,12 +164,12 @@ Bucket sharing + linking across projects in the same organization. `sharing edge ## Workspaces (SQL Debugging) - `workspace create --project ALIAS [--name NAME] [--ui] [--read-only]` -- create workspace (headless ~1s, `--ui` ~15s). Since v0.47.1: Snowflake headless workspaces return a `private_key` PEM field; `password` is empty. BigQuery workspaces keep the default password credential shape. -- `workspace list [--project NAME ...] [--orphaned] [--branch ID] [--qs-compatible]` -- list workspaces. `--project` repeatable; `--orphaned` filters to workspaces whose backing `keboola.sandboxes` config is missing. **Since v0.42.0 (#304)**: each entry carries `login_type`, `read_only`, `qs_compatible`, `database`, `warehouse`. New `Login Type` / `RO` / `QS` columns in human mode. `--qs-compatible` pre-filters to RO + whitelisted-loginType workspaces (the canonical data-app shape). `--branch` requires exactly one `--project`; without `--branch`, the command behaves like `storage buckets` and uses production with an `Info: Using production branch for read (active dev branch X ignored; pass --branch X to override)` banner when an alias is pinned to a dev branch -- `workspace detail --project ALIAS --workspace-id ID [--branch ID]` -- show connection details. **Since v0.42.0 (#304)**: response carries `login_type`, `read_only`, `qs_compatible`; human mode adds `Login type:` / `Read-only:` / `Query Service compatible:` rows. `--branch` opt-in mirrors `workspace list` +- `workspace list [--project NAME ...] [--orphaned] [--branch ID] [--qs-compatible]` -- list workspaces. `--project` repeatable; `--orphaned` filters to workspaces whose backing `keboola.sandboxes` config is missing. **Since v0.42.0 (#304)**: each entry carries `login_type`, `read_only`, `qs_compatible`, `database`, `warehouse`. New `Login Type` / `RO` / `QS` columns in human mode. `--qs-compatible` pre-filters to RO + whitelisted-loginType workspaces (the canonical data-app shape). **Updated v0.58.0**: `qs_compatible` is keyed by `(backend, loginType)` -- BigQuery workspaces (loginType `default`) now report `qs_compatible: true` and pass `--qs-compatible`; pre-0.58.0 every BigQuery workspace was wrongly excluded (Snowflake's own legacy `default` stays `false`). `--branch` requires exactly one `--project`; without `--branch`, the command behaves like `storage buckets` and uses production with an `Info: Using production branch for read (active dev branch X ignored; pass --branch X to override)` banner when an alias is pinned to a dev branch +- `workspace detail --project ALIAS --workspace-id ID [--branch ID]` -- show connection details. **Since v0.42.0 (#304)**: response carries `login_type`, `read_only`, `qs_compatible`; human mode adds `Login type:` / `Read-only:` / `Query Service compatible:` rows. **Updated v0.58.0**: BigQuery `default` workspaces now report `qs_compatible: true` (was `false`). `--branch` opt-in mirrors `workspace list` - `workspace delete --project ALIAS --workspace-id ID` -- delete workspace - `workspace password --project ALIAS --workspace-id ID` -- reset and return new password - `workspace load --project ALIAS --workspace-id ID --tables TABLE_ID [...] [--preserve]` -- load storage tables -- `workspace query --project ALIAS --workspace-id ID --sql "..." [--file F] [--transactional]` -- run SQL via Query Service +- `workspace query --project ALIAS --workspace-id ID --sql "..." [--file F] [--transactional]` -- run SQL via Query Service. **Backend-agnostic since v0.58.0**: runs against both Snowflake and BigQuery workspaces (the path was always identical; BigQuery just needed the classification fix). Mind the dialect: Snowflake quotes identifiers with `"..."`, BigQuery with backticks `` `...` `` - `workspace gc [--project NAME ...] [--dry-run] [--yes]` -- garbage-collect orphaned workspaces (and any lingering `keboola.sandboxes` configs). `--dry-run` previews without deleting; `--project` repeatable, omit to GC across all connected projects - `workspace from-transformation --project ALIAS --component-id ID --config-id ID [--row-id ID]` -- workspace from existing transform From 62c32c81c89a59f57d7ac068142d897465387aa2 Mon Sep 17 00:00:00 2001 From: Petr Date: Fri, 5 Jun 2026 01:31:21 +0200 Subject: [PATCH 3/3] docs(changelog): reformat 0.58.0 per #401 authoring contract + document #401 The 0.58.0 BigQuery note was a single wall-of-text bullet. #401 (merged to main) introduced the `kbagent changelog` one-line-summary view + an authoring contract (one logical change per prefixed bullet, self-contained first sentence). Reformat the entry into New:/Fix:/Change: bullets so the default `kbagent changelog` view stays scannable, and add a bullet documenting the #401 feature itself (it shipped to main without a version bump, so 0.58.0 is its release home). --- src/keboola_agent_cli/changelog.py | 57 +++++++++++++++--------------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/src/keboola_agent_cli/changelog.py b/src/keboola_agent_cli/changelog.py index 4758c549..a0e7dd1a 100644 --- a/src/keboola_agent_cli/changelog.py +++ b/src/keboola_agent_cli/changelog.py @@ -25,35 +25,34 @@ # Ordered newest-first. Each value is a list of brief one-line descriptions. CHANGELOG: dict[str, list[str]] = { "0.58.0": [ - "BigQuery support for `kbagent workspace query` (the Query Service backend now runs BigQuery, " - "as keboola-mcp-server already uses). The query *execution* path was always backend-agnostic -- " - "`POST /api/v1/branches/{b}/workspaces/{w}/queries` plus the CSV export are identical for " - "Snowflake and BigQuery -- so a `SELECT` against a BigQuery workspace already returned rows. The " - "gaps were classification and error legibility, fixed here (verified live against project 9621 / " - "e2e-bigquery on connection.keboola.com). (1) `qs_compatible` is now keyed by (backend, " - "loginType), not loginType alone. BigQuery workspaces carry loginType `default`, which IS " - "Query-Service-compatible, but `default` was OFF the Snowflake-only whitelist, so `workspace " - "list`/`detail` reported every BigQuery workspace as `qs_compatible: false` and `workspace list " - "--qs-compatible` hid them all -- misleading data-app developers into thinking BigQuery can't run " - "Query-Service SELECTs. New `QUERY_SERVICE_COMPATIBLE_LOGIN_TYPES_BIGQUERY` whitelist ({`default`}) " - "is kept deliberately separate from the Snowflake set because Snowflake ALSO mints a `default` " - "loginType (legacy 2016 workspaces) that the Query Service REJECTS ('JWT token is invalid') -- so " - "the same string means compatible for BigQuery and incompatible for Snowflake. (2) `workspace " - "create` on a BigQuery project now requests loginType `default` explicitly (the only BigQuery " - "loginType; matches keboola-mcp-server) instead of omitting it and relying on the backend default. " - "(3) BigQuery query errors are unwrapped: the Query Service serializes a failed BigQuery statement " - 'as `{Location: "query"; Message: "Syntax error: ..."; Reason: "invalidQuery"}`; the new ' - "`_unwrap_bigquery_error` extracts the inner `Message` so the red error box reads like Snowflake " - "plain text (Snowflake errors have no wrapper and pass through untouched). Layers: backend-aware " - "`_classify_qs_compatibility(login_type, backend)` + `_workspace_login_type_for_backend` " - "(`services/workspace_service.py`); `_unwrap_bigquery_error` + `_BQ_ERROR_MESSAGE_RE` " - "(`client.py`); two new constants (`constants.py`). Tests: " - "`test_workspace_service.py::TestBigQueryQueryServiceSupport` (backend-aware classification, " - "Snowflake-default regression guard, BigQuery login-type, qs-compatible filter) + updated " - "`TestAutoDetectBackend` BigQuery create asserts; `test_client.py::TestUnwrapBigQueryError` + a " - "BigQuery case in `TestExtractQueryJobError`; `test_e2e.py` workspace query is now backend-aware " - "(BigQuery back-tick vs Snowflake double-quote identifier quoting) and `detail` asserts " - "login_type/qs_compatible.", + "New: `kbagent workspace query` runs SQL against BigQuery workspaces, not just Snowflake. " + "The Query Service path was always backend-agnostic (`POST " + "/api/v1/branches/{b}/workspaces/{w}/queries` + CSV export are identical for both backends), so " + "this was a classification + error-legibility fix rather than a new execution path -- verified " + "live against project 9621 (e2e-bigquery) on connection.keboola.com, including a real-data " + "`workspace load` + `query` round-trip. Mind the dialect: Snowflake quotes identifiers with " + '`"..."`, BigQuery with backticks `` `...` ``.', + "Fix: BigQuery workspaces are no longer mislabeled `qs_compatible: false`. `qs_compatible` is now " + "keyed by (backend, loginType): BigQuery's `default` loginType is whitelisted via the new " + "`QUERY_SERVICE_COMPATIBLE_LOGIN_TYPES_BIGQUERY`, kept separate from the Snowflake whitelist " + "because Snowflake's own legacy `default` is rejected by the Query Service ('JWT token is " + "invalid') -- the same string means compatible for BigQuery and incompatible for Snowflake. " + "Pre-0.58.0 every BigQuery workspace was wrongly hidden by `workspace list --qs-compatible` and " + "shown incompatible in `workspace detail`, even though queries ran fine.", + "Change: `workspace create` on a BigQuery project now requests loginType `default` explicitly. " + "It is the only BigQuery loginType and matches keboola-mcp-server, rather than omitting it and " + "relying on the backend default; Snowflake key-pair creation is unchanged.", + "Fix: BigQuery query errors now read as plain text instead of a serialized wrapper. The Query " + 'Service returns a failed BigQuery statement as `{Location: ...; Message: "..."; Reason: ...}`; ' + "the new `_unwrap_bigquery_error` (`client.py`) extracts the inner `Message` so the error box " + "matches Snowflake's plain text (Snowflake errors have no wrapper and pass through untouched). " + "Tests: `TestBigQueryQueryServiceSupport`, `TestUnwrapBigQueryError`, a BigQuery case in " + "`TestExtractQueryJobError`, and a backend-aware `test_e2e.py` workspace query.", + "New (#401): `kbagent changelog` now shows a one-line summary per version by default, with " + "`--full` / `-v` to expand every note. Entries follow an authoring contract -- one logical change " + "per prefixed bullet (`New:`/`Fix:`/`Change:`/...), leading with a self-contained first sentence " + "-- so the default view and the post-update 'What's new' banner stay scannable instead of " + "rendering a wall of text.", ], "0.57.0": [ "BREAKING (flow / conditional flows): the `flow` command group now targets "