diff --git a/src/keboola_agent_cli/commands/_data_app_runtime.py b/src/keboola_agent_cli/commands/_data_app_runtime.py index a4d1d067..c35dfb1d 100644 --- a/src/keboola_agent_cli/commands/_data_app_runtime.py +++ b/src/keboola_agent_cli/commands/_data_app_runtime.py @@ -209,6 +209,19 @@ def data_app_secrets_set( style="yellow", ) + # Plaintext-on-encrypt-failure fallback -- name the secret key-paths written + # in PLAINTEXT (keys only) so the leak is visible. JSON mode carries the same + # list on the envelope via plaintext_written, so warn only in human mode. + plaintext_written = result.get("plaintext_written") + if plaintext_written and not formatter.json_mode: + formatter.warning( + f"{len(plaintext_written)} secret(s) were written in PLAINTEXT (encryption " + f"failed and --allow-plaintext-on-encrypt-failure was set): " + f"{', '.join(plaintext_written)}. Rotate these credentials and re-encrypt " + f"once the Encryption API is reachable -- config version history retains the " + f"plaintext copy." + ) + if no_hint_next and isinstance(result, dict): result.pop("next_step", None) diff --git a/src/keboola_agent_cli/commands/config.py b/src/keboola_agent_cli/commands/config.py index 0df4d798..9a29bb3c 100644 --- a/src/keboola_agent_cli/commands/config.py +++ b/src/keboola_agent_cli/commands/config.py @@ -711,6 +711,7 @@ def config_update( f"{branch_info}" ) _emit_normalizations_warning(formatter, normalizations) + _emit_plaintext_written_warning(formatter, result) def _emit_normalizations_warning(formatter: Any, normalizations: list[dict[str, Any]]) -> None: @@ -735,6 +736,26 @@ def _emit_normalizations_warning(formatter: Any, normalizations: list[dict[str, ) +def _emit_plaintext_written_warning(formatter: Any, result: dict[str, Any]) -> None: + """Surface a plaintext-on-encrypt-failure fallback in human mode. + + When ``--allow-plaintext-on-encrypt-failure`` lets a write proceed despite a + failed encryption, the service result carries ``plaintext_written`` -- the + secret key-paths now stored in PLAINTEXT (key-paths only, never the values). + Name them and the remediation so the leak is visible and actionable. JSON + mode already exposes the same list on the envelope, so emit only here. + """ + leaked = result.get("plaintext_written") + if not leaked: + return + formatter.warning( + f"{len(leaked)} secret(s) were written in PLAINTEXT (encryption failed and " + f"--allow-plaintext-on-encrypt-failure was set): {', '.join(leaked)}. " + f"Rotate these credentials and re-encrypt once the Encryption API is reachable " + f"-- config version history retains the plaintext copy." + ) + + @config_app.command("set-default-bucket", rich_help_panel="Storage") def config_set_default_bucket( ctx: typer.Context, @@ -1417,6 +1438,7 @@ def _render_push_result_human( "[dim]Note: schema validation was skipped " "(empty shell, no schema available, or --no-validate).[/dim]" ) + _emit_plaintext_written_warning(formatter, result) # ── Config metadata commands ─────────────────────────────────────────── @@ -1771,6 +1793,7 @@ def config_variables_set( formatter.output(result) else: _format_variables_set(formatter, result) + _emit_plaintext_written_warning(formatter, result) @config_app.command("variables-get", rich_help_panel="Variables") @@ -2095,6 +2118,7 @@ def config_row_create( f"Created row '{escape(row_name)}' [{row_id}] " f"in {escape(component_id)}/{escape(config_id)}{branch_info}" ) + _emit_plaintext_written_warning(formatter, result) # ── config row-update ────────────────────────────────────────────────────────── @@ -2298,6 +2322,7 @@ def config_row_update( f"Updated row '{escape(updated_name)}' [{row_id}] " f"in {escape(component_id)}/{escape(config_id)}{branch_info}" ) + _emit_plaintext_written_warning(formatter, result) # ── config row-delete ────────────────────────────────────────────────────────── diff --git a/src/keboola_agent_cli/services/config_service.py b/src/keboola_agent_cli/services/config_service.py index b175904c..f4b9fa71 100644 --- a/src/keboola_agent_cli/services/config_service.py +++ b/src/keboola_agent_cli/services/config_service.py @@ -22,7 +22,7 @@ from ..sync.code_extraction import normalize_blocks_codes_script from ..sync.manifest import Manifest, load_manifest, save_manifest from ..sync.naming import sanitize_name -from ._encryption import collect_secrets, encrypt_secrets_in_config +from ._encryption import collect_secrets, encrypt_secrets_in_config, find_plaintext_secret_keys from .base import BaseService, ClientFactory, sanitize_unexpected_error from .workspace_service import find_storage_workspace_for_sandbox_config @@ -622,11 +622,16 @@ def _encrypt_secrets_before_write( # Secrets present but the Encryption API call cannot be scoped. # Fail closed rather than silently write plaintext. if allow_plaintext_fallback: + # GHSA-7jrf: name the exact secret key-paths written in PLAINTEXT + # (keys only -- `secrets` maps flattened path -> value -- never the + # values), consistent with the encryption-failure warning in + # `encrypt_secrets_in_config`. No plaintext-write path stays silent. logger.warning( - "Cannot resolve project_id for %s; writing %d secret(s) as " - "plaintext (allow_plaintext_fallback=True)", + "Cannot resolve project_id for %s; --allow-plaintext-on-encrypt-failure " + "is set, so %d secret value(s) are being written in PLAINTEXT: %s.", component_id, len(secrets), + ", ".join(sorted(secrets)) or "(unable to enumerate)", ) return configuration raise KeboolaApiError( @@ -785,6 +790,12 @@ def update_config( result["project_alias"] = alias result["branch_id"] = effective_branch_id result["normalizations"] = normalizations + # Surface a plaintext-on-encrypt-failure fallback structurally (not just + # via the stderr warning) so --json consumers see the leaked key-paths. + # find_plaintext_secret_keys returns [] when encryption succeeded. + result["plaintext_written"] = ( + find_plaintext_secret_keys(final_config) if final_config else [] + ) return result def _resolve_configuration( @@ -1601,6 +1612,8 @@ def create_config_row( result["project_alias"] = alias result["branch_id"] = effective_branch_id + # Structurally surface any plaintext-fallback leak (empty when encrypted). + result["plaintext_written"] = find_plaintext_secret_keys(row_config) if row_config else [] return result # ── config create (one-shot remote create via `config new --push`) ───────── @@ -1726,6 +1739,10 @@ def create_config( # empty -- but we annotate it anyway so JSON consumers can rely on # the key being present. result["validation_errors"] = validation_errors + # Structurally surface any plaintext-fallback leak (empty when encrypted). + result["plaintext_written"] = ( + find_plaintext_secret_keys(encrypted_config) if encrypted_config else [] + ) return result def _validate_config_body( @@ -1931,6 +1948,10 @@ def update_config_row( result["project_alias"] = alias result["branch_id"] = effective_branch_id + # Structurally surface any plaintext-fallback leak (empty when encrypted). + result["plaintext_written"] = ( + find_plaintext_secret_keys(final_config) if final_config else [] + ) return result def _resolve_row_configuration( diff --git a/src/keboola_agent_cli/services/data_app_service.py b/src/keboola_agent_cli/services/data_app_service.py index ed8d5d1c..59da9175 100644 --- a/src/keboola_agent_cli/services/data_app_service.py +++ b/src/keboola_agent_cli/services/data_app_service.py @@ -1359,6 +1359,11 @@ def set_data_app_secrets( "secrets_set": secrets_set, "secrets_unchanged": unchanged, "shadowed_by_runtime": shadowed, + # Keys whose ciphertext check failed but were written anyway under + # --allow-plaintext-on-encrypt-failure (empty on full encryption). + # Surfaces the plaintext leak structurally in --json, not just the + # stderr warning above. + "plaintext_written": sorted(problems), "config_version_before": old_version, "config_version_after": new_version, "deploy_required": True, diff --git a/src/keboola_agent_cli/services/variables_service.py b/src/keboola_agent_cli/services/variables_service.py index 972ddf97..8dce9100 100644 --- a/src/keboola_agent_cli/services/variables_service.py +++ b/src/keboola_agent_cli/services/variables_service.py @@ -15,10 +15,11 @@ import copy import logging +from dataclasses import dataclass from typing import Any from ..errors import ConfigError, KeboolaApiError -from ._encryption import encrypt_secrets_in_config +from ._encryption import encrypt_secrets_in_config, find_plaintext_secret_keys from .base import BaseService logger = logging.getLogger(__name__) @@ -26,6 +27,34 @@ VARIABLES_COMPONENT_ID = "keboola.variables" +@dataclass(frozen=True) +class _CreatedLinkedVariables: + """Result of the auto-create path -- a new ``keboola.variables`` config + row. + + Named fields replace a positional tuple (CONTRIBUTING.md: multi-value returns + use a dataclass, never a bare tuple beyond two values). + """ + + variables_id: str + values_id: str + values: dict[str, str] + plaintext_written: list[str] + + +@dataclass(frozen=True) +class _UpdatedLinkedVariables: + """Result of the update path -- an existing config's default values row. + + ``plaintext_written`` holds the secret key-paths left unencrypted by an + allowed plaintext-on-encrypt-failure fallback (``[]`` when encryption + succeeded, never the values). + """ + + values_id: str + values: dict[str, str] + plaintext_written: list[str] + + class VariablesService(BaseService): """Assign, read, and detach variables on any Keboola config. @@ -144,11 +173,7 @@ def set_variables( action = "updated" if not linked_vars_id: - ( - linked_vars_id, - linked_values_id, - final_values, - ) = self._create_linked_variables( + created = self._create_linked_variables( client=client, project_id=project_id, parent_name=parent_name, @@ -158,9 +183,13 @@ def set_variables( branch_id=branch_id, allow_plaintext_fallback=allow_plaintext_fallback, ) + linked_vars_id = created.variables_id + linked_values_id = created.values_id + final_values = created.values + plaintext_written = created.plaintext_written action = "created" else: - linked_values_id, final_values = self._update_linked_variables( + updated = self._update_linked_variables( client=client, project_id=project_id, variables_id=linked_vars_id, @@ -170,6 +199,9 @@ def set_variables( branch_id=branch_id, allow_plaintext_fallback=allow_plaintext_fallback, ) + linked_values_id = updated.values_id + final_values = updated.values + plaintext_written = updated.plaintext_written # Ensure the parent config carries the link. Existing-linked path # may no-op; auto-create path always writes. @@ -196,6 +228,9 @@ def set_variables( "action": action, "values": final_values, "encrypted_keys": encrypted_keys, + # Empty unless an allowed plaintext-on-encrypt-failure fallback + # left secret key-paths unencrypted in the row that was written. + "plaintext_written": plaintext_written, } finally: client.close() @@ -258,7 +293,7 @@ def _create_linked_variables( variables: dict[str, str], branch_id: int | None, allow_plaintext_fallback: bool, - ) -> tuple[str, str, dict[str, str]]: + ) -> _CreatedLinkedVariables: """Auto-create path: new variables config + default row, parent not yet linked.""" var_name = (parent_name or parent_config_id) + "-vars" schema = [{"name": k, "type": "string"} for k in variables] @@ -278,6 +313,7 @@ def _create_linked_variables( variables=variables, allow_plaintext_fallback=allow_plaintext_fallback, ) + plaintext_written = find_plaintext_secret_keys(row_config) new_row = client.create_config_row( component_id=VARIABLES_COMPONENT_ID, @@ -287,7 +323,12 @@ def _create_linked_variables( description="Auto-created default row by kbagent", branch_id=branch_id, ) - return variables_id, new_row["id"], dict(variables) + return _CreatedLinkedVariables( + variables_id=variables_id, + values_id=new_row["id"], + values=dict(variables), + plaintext_written=plaintext_written, + ) def _update_linked_variables( self, @@ -300,7 +341,7 @@ def _update_linked_variables( replace: bool, branch_id: int | None, allow_plaintext_fallback: bool, - ) -> tuple[str, dict[str, str]]: + ) -> _UpdatedLinkedVariables: """Update path: parent already linked (or explicit --variables-id). Merge or replace.""" vars_cfg = client.get_config_detail( VARIABLES_COMPONENT_ID, variables_id, branch_id=branch_id @@ -315,6 +356,7 @@ def _update_linked_variables( variables=variables, allow_plaintext_fallback=allow_plaintext_fallback, ) + plaintext_written = find_plaintext_secret_keys(row_config) new_row = client.create_config_row( component_id=VARIABLES_COMPONENT_ID, config_id=variables_id, @@ -329,7 +371,11 @@ def _update_linked_variables( variables=variables, branch_id=branch_id, ) - return new_row["id"], dict(variables) + return _UpdatedLinkedVariables( + values_id=new_row["id"], + values=dict(variables), + plaintext_written=plaintext_written, + ) existing_values = target_row.get("configuration", {}).get("values", []) existing_dict = {v["name"]: v["value"] for v in existing_values} @@ -348,6 +394,7 @@ def _update_linked_variables( variables=final_values, allow_plaintext_fallback=allow_plaintext_fallback, ) + plaintext_written = find_plaintext_secret_keys(row_config) client.update_config_row( component_id=VARIABLES_COMPONENT_ID, @@ -364,7 +411,11 @@ def _update_linked_variables( variables=final_values, branch_id=branch_id, ) - return target_row["id"], final_values + return _UpdatedLinkedVariables( + values_id=target_row["id"], + values=final_values, + plaintext_written=plaintext_written, + ) @staticmethod def _build_encrypted_row_configuration( @@ -380,6 +431,10 @@ def _build_encrypted_row_configuration( transformation runner both key off it). :func:`encrypt_secrets_in_config` recognizes the ``{name, value}`` list shape directly, so no pre-flatten dance is needed. + + Returns the encrypted row config. Callers surface any plaintext-fallback + leak by passing the returned config to :func:`find_plaintext_secret_keys` + (key-paths only, never the values). """ row_config: dict[str, Any] = { "values": [{"name": k, "value": v} for k, v in variables.items()], diff --git a/tests/test_config_encryption.py b/tests/test_config_encryption.py index cf47b01a..4d094678 100644 --- a/tests/test_config_encryption.py +++ b/tests/test_config_encryption.py @@ -262,6 +262,109 @@ def test_update_config_dry_run_keeps_plaintext(self, tmp_config_dir: Path) -> No assert result["new_configuration"]["parameters"]["#password"] == "plain-secret" +# --------------------------------------------------------------------------- +# plaintext_written: structured visibility of a plaintext-fallback leak +# --------------------------------------------------------------------------- + + +class TestPlaintextWrittenField: + """The service result must carry ``plaintext_written`` so --json consumers + see a plaintext-on-encrypt-failure fallback, not just the stderr warning. + + Empty list after a successful encryption; the leaked key-PATHS (never the + plaintext value) after an allowed fallback. + """ + + LEAKED_PATH = "#parameters.#password" + + def test_create_config_plaintext_written_empty_on_success(self, tmp_config_dir: Path) -> None: + service, _ = _make_service(tmp_config_dir) + result = service.create_config( + alias="prod", + component_id=COMPONENT_ID, + name="t", + configuration=_config_with_secret(), + validate=False, + ) + assert result["plaintext_written"] == [] + + def test_update_config_plaintext_written_empty_on_success(self, tmp_config_dir: Path) -> None: + service, _ = _make_service(tmp_config_dir) + result = service.update_config( + alias="prod", + component_id=COMPONENT_ID, + config_id=CONFIG_ID, + configuration=_config_with_secret(), + ) + assert result["plaintext_written"] == [] + + def test_update_config_no_secret_plaintext_written_empty(self, tmp_config_dir: Path) -> None: + service, _ = _make_service(tmp_config_dir) + result = service.update_config( + alias="prod", + component_id=COMPONENT_ID, + config_id=CONFIG_ID, + configuration=_config_without_secret(), + ) + assert result["plaintext_written"] == [] + + def test_update_config_lists_leaked_keys_on_fallback(self, tmp_config_dir: Path) -> None: + service, client = _make_service(tmp_config_dir) + client.encrypt_values.side_effect = RuntimeError("API unavailable") + result = service.update_config( + alias="prod", + component_id=COMPONENT_ID, + config_id=CONFIG_ID, + configuration=_config_with_secret(), + allow_plaintext_fallback=True, + ) + # Leaked key-PATH is surfaced; the plaintext value must never appear. + assert result["plaintext_written"] == [self.LEAKED_PATH] + assert "plain-secret" not in result["plaintext_written"] + # The plaintext write did happen (escape hatch), value left intact. + assert _written_secret(client.update_config.call_args) == "plain-secret" + + def test_create_config_lists_leaked_keys_on_fallback(self, tmp_config_dir: Path) -> None: + service, client = _make_service(tmp_config_dir) + client.encrypt_values.side_effect = RuntimeError("API unavailable") + result = service.create_config( + alias="prod", + component_id=COMPONENT_ID, + name="t", + configuration=_config_with_secret(), + validate=False, + allow_plaintext_fallback=True, + ) + assert result["plaintext_written"] == [self.LEAKED_PATH] + assert "plain-secret" not in result["plaintext_written"] + + def test_create_config_row_lists_leaked_keys_on_fallback(self, tmp_config_dir: Path) -> None: + service, client = _make_service(tmp_config_dir) + client.encrypt_values.side_effect = RuntimeError("API unavailable") + result = service.create_config_row( + alias="prod", + component_id=COMPONENT_ID, + config_id=CONFIG_ID, + name="r", + configuration=_config_with_secret(), + allow_plaintext_fallback=True, + ) + assert result["plaintext_written"] == [self.LEAKED_PATH] + + def test_update_config_row_lists_leaked_keys_on_fallback(self, tmp_config_dir: Path) -> None: + service, client = _make_service(tmp_config_dir) + client.encrypt_values.side_effect = RuntimeError("API unavailable") + result = service.update_config_row( + alias="prod", + component_id=COMPONENT_ID, + config_id=CONFIG_ID, + row_id=ROW_ID, + configuration=_config_with_secret(), + allow_plaintext_fallback=True, + ) + assert result["plaintext_written"] == [self.LEAKED_PATH] + + # --------------------------------------------------------------------------- # CLI wiring: --allow-plaintext-on-encrypt-failure reaches the service layer # --------------------------------------------------------------------------- diff --git a/tests/test_variables_cli.py b/tests/test_variables_cli.py index 0babf21e..2520790a 100644 --- a/tests/test_variables_cli.py +++ b/tests/test_variables_cli.py @@ -362,6 +362,64 @@ def test_human_output_shows_action_and_values(self, tmp_path: Path) -> None: assert "year_start" in output assert "2016" in output + def test_human_mode_warns_when_plaintext_written(self, tmp_path: Path) -> None: + """GHSA-7jrf: an allowed plaintext-on-encrypt-failure fallback must surface + a human-mode warning naming the leaked key-PATHS (never the values). + + ``--json`` carries ``plaintext_written`` on the envelope; this locks the + paired human-mode stderr warning so the leak is never silent. + """ + store = _setup_config(tmp_path / "config") + mock_vars = MagicMock() + mock_vars.set_variables.return_value = { + "project_alias": "prod", + "parent_component_id": "keboola.x", + "parent_config_id": "cfg-1", + "variables_id": "vars-1", + "values_id": "row-1", + "action": "updated", + "values": {"#api_token": "super-secret-value"}, + "encrypted_keys": ["#api_token"], + # Encryption API was down + --allow-plaintext-on-encrypt-failure set, + # so the secret landed in plaintext; the service reports the key-path. + "plaintext_written": ["#values.[0].#api_token"], + } + + with ( + patch("keboola_agent_cli.cli.ConfigStore") as MockStore, + patch("keboola_agent_cli.cli.ProjectService") as MockProj, + patch("keboola_agent_cli.cli.VariablesService") as MockVars, + ): + MockStore.return_value = store + MockProj.return_value = ProjectService(config_store=store) + MockVars.return_value = mock_vars + + result = runner.invoke( + app, + [ + "config", + "variables-set", + "--project", + "prod", + "--component-id", + "keboola.x", + "--config-id", + "cfg-1", + "--var", + "#api_token=super-secret-value", + "--allow-plaintext-on-encrypt-failure", + ], + ) + + assert result.exit_code == 0, result.output + # The warning lands on stderr; Rich soft-wraps at width 80, so collapse + # whitespace before substring matching. + combined = _strip_ansi(result.stdout + result.stderr) + normalized = re.sub(r"\s+", " ", combined) + assert "were written in PLAINTEXT" in normalized + assert "#values.[0].#api_token" in normalized # the key PATH is named + assert "super-secret-value" not in normalized # the value is NEVER shown + class TestVariablesGet: def test_json_returns_linked_payload(self, tmp_path: Path) -> None: diff --git a/tests/test_variables_service.py b/tests/test_variables_service.py index da39e1aa..cb5e1570 100644 --- a/tests/test_variables_service.py +++ b/tests/test_variables_service.py @@ -402,6 +402,105 @@ def test_encryption_failure_fail_closed(self, tmp_config_dir: Path) -> None: # best_practices.md §5: try/finally close() must still fire on error. client.close.assert_called_once() + def test_plaintext_written_empty_on_successful_encryption(self, tmp_config_dir: Path) -> None: + """A successful encryption leaves plaintext_written empty.""" + client = _mock_client() + client.get_config_detail.side_effect = [ + { + "id": "cfg-1", + "name": "my-transform", + "configuration": { + "variables_id": "vars-1", + "variables_values_id": "row-1", + }, + }, + { + "id": "vars-1", + "configuration": {"variables": []}, + "rows": [{"id": "row-1", "configuration": {"values": []}}], + }, + ] + client.update_config_row.return_value = {"id": "row-1"} + client.encrypt_values.side_effect = lambda *, project_id, component_id, data: { + k: "KBC::ComponentSecure::cipher" for k in data + } + svc = _service(tmp_config_dir, client) + + result = svc.set_variables( + alias="prod", + component_id="keboola.x", + config_id="cfg-1", + variables={"#api_token": "plain-secret"}, + ) + + assert result["plaintext_written"] == [] + + def test_plaintext_written_lists_leaked_keys_on_fallback(self, tmp_config_dir: Path) -> None: + """An allowed plaintext fallback surfaces the leaked key-PATH, never the value.""" + client = _mock_client() + client.get_config_detail.side_effect = [ + { + "id": "cfg-1", + "name": "my-transform", + "configuration": { + "variables_id": "vars-1", + "variables_values_id": "row-1", + }, + }, + { + "id": "vars-1", + "configuration": {"variables": []}, + "rows": [{"id": "row-1", "configuration": {"values": []}}], + }, + ] + client.update_config_row.return_value = {"id": "row-1"} + client.encrypt_values.side_effect = Exception("encryption unavailable") + svc = _service(tmp_config_dir, client) + + result = svc.set_variables( + alias="prod", + component_id="keboola.x", + config_id="cfg-1", + variables={"#api_token": "plain-secret"}, + allow_plaintext_fallback=True, + ) + + # The row-hoisted secret flattens to this path; the plaintext value + # must never appear in the structured field. + assert result["plaintext_written"] == ["#values.[0].#api_token"] + assert "plain-secret" not in result["plaintext_written"] + # The escape hatch wrote the row with plaintext intact. + put_kwargs = client.update_config_row.call_args.kwargs + values_sent = {v["name"]: v["value"] for v in put_kwargs["configuration"]["values"]} + assert values_sent["#api_token"] == "plain-secret" + + def test_plaintext_written_lists_leaked_keys_on_fallback_auto_create( + self, tmp_config_dir: Path + ) -> None: + """Auto-create path also threads the leaked key-PATH out on fallback.""" + client = _mock_client() + client.get_config_detail.return_value = { + "id": "cfg-1", + "name": "my-transform", + "configuration": {"parameters": {}}, + } + client.create_config.return_value = {"id": "vars-auto-1"} + client.create_config_row.return_value = {"id": "row-auto-1"} + client.encrypt_values.side_effect = Exception("encryption unavailable") + svc = _service(tmp_config_dir, client) + + result = svc.set_variables( + alias="prod", + component_id="keboola.snowflake-transformation", + config_id="cfg-1", + variables={"#api_token": "plain-secret"}, + allow_plaintext_fallback=True, + ) + + assert result["action"] == "created" + assert result["plaintext_written"] == ["#values.[0].#api_token"] + assert "plain-secret" not in result["plaintext_written"] + class TestSetVariablesValidation: def test_empty_variables_dict_raises(self, tmp_config_dir: Path) -> None: