From 438823571ffdc6126c61c7392ec4ed23a92c78e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Jodas?= <12143866+ondrajodas@users.noreply.github.com> Date: Thu, 4 Jun 2026 15:47:42 +0200 Subject: [PATCH 1/8] feat(flow)!: conditional flows support, drop orchestrator (0.56.0) --component-id removed from all flow subcommands and the /flows REST surface; orchestrator configs dropped from flow list (legacy count warning). Payload validation runs against the live conditional-flow JSON Schema fetched at runtime from the AI Service component registry (never bundled), with semantic-only graceful degradation when the fetch fails. New flow validate [--project] and flow schema --full. Detail rendering rewritten for conditional flows. INVALID_FLOW_DAG renamed to INVALID_FLOW_DEFINITION. Docs and plugin surfaces synced. --- CLAUDE.md | 25 +- README.md | 4 +- .../2026-06-04-conditional-flow-support.md | 2079 +++++++++++++++++ ...6-06-04-conditional-flow-support-design.md | 328 +++ plugins/kbagent/agents/keboola-expert.md | 63 +- plugins/kbagent/skills/kbagent/SKILL.md | 11 +- .../kbagent/references/commands-reference.md | 20 +- .../kbagent/references/flow-workflow.md | 230 +- .../skills/kbagent/references/gotchas.md | 71 +- src/keboola_agent_cli/changelog.py | 55 + src/keboola_agent_cli/commands/context.py | 49 +- src/keboola_agent_cli/commands/flow.py | 404 +++- src/keboola_agent_cli/errors.py | 3 +- src/keboola_agent_cli/models.py | 9 + src/keboola_agent_cli/permissions.py | 1 + src/keboola_agent_cli/server/routers/flows.py | 26 +- .../services/component_service.py | 50 +- .../services/flow_service.py | 344 +-- .../services/flow_validation.py | 188 ++ src/keboola_agent_cli/sync/config_format.py | 3 - tests/test_agent_prompt.py | 8 +- tests/test_component_service.py | 22 +- tests/test_e2e.py | 435 +--- tests/test_flow_cli.py | 859 +++---- tests/test_flow_service.py | 720 +++--- tests/test_flow_validation.py | 295 +++ tests/test_schedule_service.py | 14 +- 27 files changed, 4720 insertions(+), 1596 deletions(-) create mode 100644 docs/superpowers/plans/2026-06-04-conditional-flow-support.md create mode 100644 docs/superpowers/specs/2026-06-04-conditional-flow-support-design.md create mode 100644 src/keboola_agent_cli/services/flow_validation.py create mode 100644 tests/test_flow_validation.py diff --git a/CLAUDE.md b/CLAUDE.md index 214a424b..a23b4480 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -534,13 +534,24 @@ kbagent kai chat-detail --chat-id ID [--project NAME] kbagent kai history [--project NAME] [--limit N] kbagent flow list [--project NAME] [--branch ID] [--with-schedules] -kbagent flow detail --project NAME --flow-id ID [--component-id keboola.orchestrator|keboola.flow] [--branch ID] -kbagent flow schema -kbagent flow new --project NAME --name NAME [--component-id keboola.orchestrator|keboola.flow] [--description D] [--file @path.yaml|-|JSON] [--branch ID] -kbagent flow update --project NAME --flow-id ID [--component-id ID] [--name N] [--description D] [--file @path.yaml|-|JSON] [--branch ID] -kbagent flow delete --project NAME --flow-id ID [--component-id ID] [--branch ID] [--yes] -kbagent flow schedule --project NAME --flow-id ID --cron "0 6 * * *" [--component-id ID] [--timezone TZ] [--disabled] [--branch ID] -kbagent flow schedule-remove --project NAME --flow-id ID [--component-id ID] [--branch ID] [--yes] +kbagent flow detail --project NAME --flow-id ID [--branch ID] +kbagent flow schema [--full --project NAME] +kbagent flow validate --file @flow.yaml|- [--project NAME] +kbagent flow new --project NAME --name NAME [--description D] [--file @path.yaml|-|JSON] [--branch ID] +kbagent flow update --project NAME --flow-id ID [--name N] [--description D] [--file @path.yaml|-|JSON] [--branch ID] +kbagent flow delete --project NAME --flow-id ID [--branch ID] [--yes] +kbagent flow schedule --project NAME --flow-id ID --cron "0 6 * * *" [--timezone TZ] [--disabled] [--branch ID] +kbagent flow schedule-remove --project NAME --flow-id ID [--branch ID] [--yes] +# Flows are conditional flows (keboola.flow). keboola.orchestrator is NOT supported (dropped 0.56.0). +# IDs are strings; phases use next[].goto + conditions; tasks are typed (job/notification/variable). +# flow new/update validate against the live CF schema fetched from the stack (AI Service +# configurationSchema for keboola.flow; NOT bundled) -> INVALID_FLOW_DEFINITION on failure. +# Schema-fetch failure (network/empty) does NOT block the write: structural check skipped, +# semantic checks still run, a "structural schema validation skipped" warning is surfaced. +# flow validate: with --project fetches the live schema (full validation; fetch failure -> +# semantic-only + note); without --project runs semantic-only + a note. flow schema --full +# requires --project (fetches live schema); plain flow schema is the offline YAML template. +# Execute a flow with: kbagent job run --project NAME --component-id keboola.flow --config-id ID kbagent schedule list [--project NAME ...] [--enabled-only] [--branch ID] kbagent schedule detail --project NAME --schedule-id ID [--branch ID] diff --git a/README.md b/README.md index dcd16948..e8f3b33d 100644 --- a/README.md +++ b/README.md @@ -119,7 +119,7 @@ kbagent workspace query --project prod --workspace-id WS_ID \ | **Search** | `kbagent search "QUERY"` -- find tables, configs, flows, data apps across every connected project in one call (since 0.30.0). Backed by Storage `global-search`; falls back to per-project body scan with `--search-type config-based`. | | **Configurations** | List, search, inspect, scaffold, update, delete configs. Full-text search across all config bodies (incl. rows). Row CRUD (`row-create / row-update / row-delete`) with `--merge`, `--set`, `--dry-run`, `--is-disabled / --is-enabled` (since 0.30.0). OAuth wizard URL minting with short-lived child tokens (`config oauth-url`, since 0.30.0). Variables management (`variables-set / -get / -clear`). Metadata CRUD + folder grouping. Output-bucket override (`set-default-bucket`). String-script auto-normalize for SQL transformations (closes the silent runtime crash from #245, since 0.28.0). | | **Jobs** | List, inspect, run with `--wait` polling (exponential curve), `--timeout` auto-kill, log tail on failure. Row-level execution for multi-row configs. Bulk terminate by ID list or filter (`job terminate --status processing` -- since 0.20.2). | -| **Flows** | Create, update, delete orchestrator/flow configs with phase/task DAG validation. Attach cron schedules (timezone + enabled/disabled state). | +| **Flows** | Create, update, delete **conditional flows** (`keboola.flow`) with schema-backed validation (`next[].goto` transitions + conditions; typed `job`/`notification`/`variable` tasks; string ids). Offline `flow validate` and `flow schema --full`. Attach cron schedules (timezone + enabled/disabled state). `keboola.orchestrator` is not supported (dropped in 0.56.0). | | **Storage** | Buckets, tables, files -- full CRUD. Upload CSV (auto-creates bucket+table). Download by file ID or by tag. Descriptions on buckets/tables/columns (batch-applicable from YAML). Native column types (`VARCHAR(40)`, `NUMBER(18,2)`, `TIMESTAMP_TZ`, `VARIANT`, ...) with per-column `--not-null` and `--default` flags; dev branches auto-materialize target buckets on first write. **`storage swap-tables`** -- atomically swap a typed rebuild back into the original table name in a dev branch without touching downstream config references (since 0.28.0; closes the typify migration footgun). Streamed downloads cap memory at ~1 MiB regardless of table size. Parquet export via `unload-table --file-type parquet`. BigQuery dialect-aware paths in `bucket-detail`. | | **Dev branches** | Create a branch, activate it, and every command auto-targets it. Storage writes, MCP, sync -- everything follows. Storage reads default to production (safer). | | **Sync & GitOps** | Pull configs as YAML, edit in IDE, push back. SQL/Python extracted as real files. Diff and status tracking. Adopt existing kbc Go CLI checkouts (`sync init --adopt-existing`). | @@ -192,7 +192,7 @@ kbagent config list | detail | search | update | set-default-bucket | renam row-create | row-update | row-delete oauth-url kbagent job list | detail | run | terminate -kbagent flow list | detail | schema | new | update | delete | schedule | schedule-remove +kbagent flow list | detail | schema | validate | new | update | delete | schedule | schedule-remove kbagent storage buckets | bucket-detail | create-bucket | delete-bucket tables | table-detail | create-table | upload-table | download-table delete-table | truncate-table | delete-column | swap-tables | clone-table diff --git a/docs/superpowers/plans/2026-06-04-conditional-flow-support.md b/docs/superpowers/plans/2026-06-04-conditional-flow-support.md new file mode 100644 index 00000000..55844c09 --- /dev/null +++ b/docs/superpowers/plans/2026-06-04-conditional-flow-support.md @@ -0,0 +1,2079 @@ +# Conditional Flow (`keboola.flow`) Support Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Rewrite the `kbagent flow` surface to support `keboola.flow` (Conditional Flows) with correct schema-backed validation, and drop `keboola.orchestrator` support entirely, shipping as a single breaking release 0.56.0. + +**Architecture:** Follows the repo's 3-layer design (commands → services → client). A new pure-function module `services/flow_validation.py` loads a bundled copy of the upstream CF JSON Schema (`resources/conditional-flow-schema.json`) and performs structural (jsonschema Draft7) + semantic validation. `FlowService` hardcodes the single component `keboola.flow`, drops every `component_id` parameter, and calls the validator on create/update. The REST router mirror drops `component_id`. + +**Tech Stack:** Python 3.12, Typer, Pydantic 2.x, `jsonschema>=4.20` (already a dependency), `importlib.resources`, hatchling packaging, pytest + `typer.testing.CliRunner`. + +**Design spec:** `docs/superpowers/specs/2026-06-04-conditional-flow-support-design.md` + +> **Design revision (2026-06-04) — schema fetched live, not bundled.** Supersedes +> the "bundled schema" approach above (spec decision D3). The CF JSON Schema is +> **not** vendored into the wheel; it is fetched at runtime from the stack's +> component registry via `AiServiceClient` → `ComponentDetail.configuration_schema` +> (AI Service `/docs/components/keboola.flow`), the same path `config new --push` +> uses. Verified live that the stack serves the full schema. `flow_validation.py` +> stays a pure module: `validate_conditional_flow(phases, tasks, schema=None)` +> runs structural Draft7 checks only when a schema is passed; the semantic checks +> always run. `FlowService` fetches the live schema before validating and degrades +> gracefully on fetch failure (skip structural, keep semantic, surface a +> `structural schema validation skipped` warning — never block the write, since +> Storage does not validate flow configs server-side). `flow validate` gains +> `--project` (live schema → full validation; otherwise semantic-only + a note); +> `flow schema --full` now requires `--project`. The bundle +> (`resources/conditional-flow-schema.json`, `load_conditional_flow_schema()`), +> the `importlib.resources` loader, and any hatchling packaging for it are +> removed. Where tasks below say "bundled"/"vendored"/"pinned SHA", apply this +> revision instead. + +**Pinned upstream schema:** `keboola/job-queue-daemon` `docs/flow-schema.json` @ commit `24176de2ec1098e0a4be278815e0ca57a93cc93d` (2026-05-26). Private repo — fetch via `gh api`. + +> **CRITICAL GROUNDING NOTE — string ids.** Every id in the CF schema +> (`phase.id`, `task.id`, `next.id`, `task.phase`, `goto`) is a JSON **string** +> (`goto` is `string | null`). The original issue text assumed integer phase +> ids; that is WRONG. All templates, fixtures, and validation in this plan use +> string ids. Using ints will fail Draft7 validation. + +--- + +## File Structure + +| File | Action | Responsibility | +|------|--------|----------------| +| `src/keboola_agent_cli/resources/__init__.py` | Create | Make `resources` an importable package for `importlib.resources`. | +| `src/keboola_agent_cli/resources/conditional-flow-schema.json` | Create | Verbatim copy of upstream CF JSON Schema (pinned SHA). | +| `src/keboola_agent_cli/services/flow_validation.py` | Create | Pure validation: schema loader + structural + semantic + reachability. No HTTP. | +| `src/keboola_agent_cli/errors.py` | Modify | Replace `INVALID_FLOW_DAG` with `INVALID_FLOW_DEFINITION`. | +| `src/keboola_agent_cli/services/flow_service.py` | Modify | Single component; drop `component_id` params; call new validator. | +| `src/keboola_agent_cli/commands/flow.py` | Modify | Drop `--component-id`; new CF template; `flow schema --full`; new `flow validate`; detail rewrite; legacy-count warning. | +| `src/keboola_agent_cli/permissions.py` | Modify | Add `flow.validate: read`. | +| `src/keboola_agent_cli/server/routers/flows.py` | Modify | Drop `component_id` from models + query params. | +| `src/keboola_agent_cli/sync/config_format.py` | Modify | Delete dead `ORCHESTRATOR_COMPONENTS`. | +| `src/keboola_agent_cli/services/component_service.py` | Modify | CF scaffold for `config new`; drop orchestrator default. | +| `src/keboola_agent_cli/commands/context.py` | Modify | Update `AGENT_CONTEXT` flow section. | +| `tests/test_flow_validation.py` | Create | Validator unit tests. | +| `tests/test_flow_service.py` | Rewrite | CF service tests; remove `dependsOn`. | +| `tests/test_flow_cli.py` | Rewrite | CF CLI tests; `validate`, `schema --full`. | +| `tests/test_e2e.py` | Modify | CF round-trip + `flow validate`; skip if CF disabled. | +| `CLAUDE.md`, `pyproject.toml`, `changelog.py`, plugin docs | Modify | Docs/version/changelog sync. | + +--- + +## Phase 1 — Bundle the schema + +### Task 1: Vendor the upstream CF schema into the package + +**Files:** +- Create: `src/keboola_agent_cli/resources/__init__.py` +- Create: `src/keboola_agent_cli/resources/conditional-flow-schema.json` + +- [ ] **Step 1: Create the resources package marker** + +Create `src/keboola_agent_cli/resources/__init__.py` with a single docstring line: + +```python +"""Bundled static resources (JSON schemas) shipped inside the wheel.""" +``` + +- [ ] **Step 2: Fetch the pinned schema verbatim** + +Run (the repo is private; `gh` has credentials): + +```bash +gh api "repos/keboola/job-queue-daemon/contents/docs/flow-schema.json?ref=24176de2ec1098e0a4be278815e0ca57a93cc93d" \ + --jq '.content' | base64 -d > src/keboola_agent_cli/resources/conditional-flow-schema.json +``` + +Expected: a ~21 KB JSON file whose top-level keys are `$schema`, `type`, +`required`, `description`, `properties` (`phases`, `tasks`), `definitions`. + +- [ ] **Step 3: Verify it parses and has the expected shape** + +Run: + +```bash +python3 -c "import json; s=json.load(open('src/keboola_agent_cli/resources/conditional-flow-schema.json')); assert s['required']==['phases','tasks']; assert s['properties']['phases']['items']['properties']['id']['type']=='string'; print('OK', len(open('src/keboola_agent_cli/resources/conditional-flow-schema.json').read()), 'bytes')" +``` + +Expected: `OK bytes` + +- [ ] **Step 4: Commit** + +```bash +git add src/keboola_agent_cli/resources/ +git commit -m "feat(flow): vendor conditional-flow JSON schema (pinned SHA 24176de)" +``` + +--- + +### Task 2: Schema loader (`flow_validation.load_conditional_flow_schema`) + +**Files:** +- Create: `src/keboola_agent_cli/services/flow_validation.py` +- Test: `tests/test_flow_validation.py` + +- [ ] **Step 1: Write the failing test** + +Create `tests/test_flow_validation.py`: + +```python +"""Unit tests for conditional-flow validation (services/flow_validation.py). + +Pure functions, no HTTP, no ConfigStore. +""" + +from __future__ import annotations + +from keboola_agent_cli.services.flow_validation import load_conditional_flow_schema + + +def test_load_schema_ships_in_package(): + schema = load_conditional_flow_schema() + assert schema["required"] == ["phases", "tasks"] + # ids are strings, not integers (grounding guard) + assert schema["properties"]["phases"]["items"]["properties"]["id"]["type"] == "string" +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `uv run pytest tests/test_flow_validation.py::test_load_schema_ships_in_package -v` +Expected: FAIL — `ModuleNotFoundError: keboola_agent_cli.services.flow_validation` + +- [ ] **Step 3: Write the loader** + +Create `src/keboola_agent_cli/services/flow_validation.py`: + +```python +"""Conditional-flow (keboola.flow) validation. + +Pure functions: no HTTP, no ConfigStore -- trivially unit-testable. + +Schema source of truth: keboola/job-queue-daemon docs/flow-schema.json +Pinned commit SHA: 24176de2ec1098e0a4be278815e0ca57a93cc93d (2026-05-26). +The bundled copy lives in keboola_agent_cli/resources/conditional-flow-schema.json. +When the upstream schema changes, re-vendor the file and bump the SHA above and +in references/gotchas.md (no CI freshness check in v1 -- upstream repo is private). +""" + +from __future__ import annotations + +import json +from functools import lru_cache +from importlib import resources +from typing import Any + + +@lru_cache(maxsize=1) +def load_conditional_flow_schema() -> dict[str, Any]: + """Load the bundled conditional-flow JSON Schema (draft-07).""" + text = ( + resources.files("keboola_agent_cli.resources") + .joinpath("conditional-flow-schema.json") + .read_text(encoding="utf-8") + ) + return json.loads(text) +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `uv run pytest tests/test_flow_validation.py::test_load_schema_ships_in_package -v` +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add src/keboola_agent_cli/services/flow_validation.py tests/test_flow_validation.py +git commit -m "feat(flow): add conditional-flow schema loader" +``` + +--- + +### Task 3: Verify the JSON ships in the built wheel + +**Files:** none (packaging verification only) + +- [ ] **Step 1: Build the wheel** + +Run: + +```bash +uv build --wheel 2>&1 | tail -5 +``` + +Expected: a `dist/keboola_agent_cli-0.55.0-*.whl` (version bumps later). + +- [ ] **Step 2: Assert the schema is inside the wheel** + +Run: + +```bash +unzip -l dist/*.whl | grep conditional-flow-schema.json +``` + +Expected: one line listing `keboola_agent_cli/resources/conditional-flow-schema.json`. +If MISSING: the file lives inside the package tree and is not gitignored, so the +hatchling default wheel target (`packages = ["src/keboola_agent_cli"]`) should +include it. If it is absent, add a `force-include` entry in `pyproject.toml` +under `[tool.hatch.build.targets.wheel.force-include]`: +`"src/keboola_agent_cli/resources" = "keboola_agent_cli/resources"`, rebuild, +and re-verify. (The unit test in Task 2 already guards the runtime path.) + +- [ ] **Step 3: Clean up build artifacts** + +Run: `rm -rf dist build` + +--- + +## Phase 2 — Error code + +### Task 4: Replace `INVALID_FLOW_DAG` with `INVALID_FLOW_DEFINITION` + +**Files:** +- Modify: `src/keboola_agent_cli/errors.py:99-101` + +- [ ] **Step 1: Confirm there are no external wire consumers** + +Run: + +```bash +grep -rn "INVALID_FLOW_DAG" src/ tests/ plugins/ docs/ +``` + +Expected: references only in `errors.py`, `services/flow_service.py`, +`changelog.py` (historical), `tests/test_flow_service.py`, and docs — all +in-repo. These are all updated by later tasks. + +- [ ] **Step 2: Edit the enum** + +In `src/keboola_agent_cli/errors.py`, replace: + +```python + # Flow (new in 0.22.0) + INVALID_FLOW_DAG = "INVALID_FLOW_DAG" + SCHEDULE_DELETE_FAILED = "SCHEDULE_DELETE_FAILED" +``` + +with: + +```python + # Flow (new in 0.22.0) + SCHEDULE_DELETE_FAILED = "SCHEDULE_DELETE_FAILED" + # Conditional-flow validation (replaces INVALID_FLOW_DAG; since 0.56.0) + INVALID_FLOW_DEFINITION = "INVALID_FLOW_DEFINITION" +``` + +- [ ] **Step 3: Verify import resolves** + +Run: `uv run python -c "from keboola_agent_cli.errors import ErrorCode; print(ErrorCode.INVALID_FLOW_DEFINITION)"` +Expected: `INVALID_FLOW_DEFINITION` + +- [ ] **Step 4: Commit** + +```bash +git add src/keboola_agent_cli/errors.py +git commit -m "feat(flow): add INVALID_FLOW_DEFINITION error code (replaces INVALID_FLOW_DAG)" +``` + +--- + +## Phase 3 — Validation logic (TDD) + +> All tasks in this phase add tests + code to `tests/test_flow_validation.py` and +> `src/keboola_agent_cli/services/flow_validation.py`. Define a shared valid +> fixture first (Task 5), then layer one rule per task. + +### Task 5: Valid CF fixture + structural validation entrypoint + +**Files:** +- Modify: `src/keboola_agent_cli/services/flow_validation.py` +- Test: `tests/test_flow_validation.py` + +- [ ] **Step 1: Write the failing tests** + +Append to `tests/test_flow_validation.py`: + +```python +from keboola_agent_cli.services.flow_validation import validate_conditional_flow + + +def _valid_phases(): + return [ + { + "id": "extract", + "name": "Extract", + "next": [ + { + "id": "t1", + "goto": "transform", + "condition": { + "type": "operator", + "operator": "ANY_TASKS_IN_PHASE", + "phase": "extract", + "operands": [], + }, + }, + {"id": "t2", "goto": None}, # default transition (no condition) + ], + }, + {"id": "transform", "name": "Transform"}, + ] + + +def _valid_tasks(): + return [ + { + "id": "task-1", + "name": "Run extractor", + "phase": "extract", + "enabled": True, + "task": { + "type": "job", + "componentId": "keboola.ex-http", + "configId": "123", + "mode": "run", + }, + }, + { + "id": "task-2", + "name": "Run transform", + "phase": "transform", + "task": { + "type": "job", + "componentId": "keboola.snowflake-transformation", + "configId": "456", + "mode": "run", + }, + }, + ] + + +def test_valid_flow_has_no_errors(): + assert validate_conditional_flow(_valid_phases(), _valid_tasks()) == [] + + +def test_structural_error_bad_task_type(): + tasks = _valid_tasks() + tasks[0]["task"]["type"] = "nonsense" + errors = validate_conditional_flow(_valid_phases(), tasks) + assert errors # at least one structural error reported + assert any("task" in e.lower() for e in errors) +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `uv run pytest tests/test_flow_validation.py -k "valid_flow or structural_error_bad_task" -v` +Expected: FAIL — `validate_conditional_flow` not defined. + +- [ ] **Step 3: Implement structural validation + semantic dispatch shell** + +Append to `src/keboola_agent_cli/services/flow_validation.py`: + +```python +import jsonschema + + +def _structural_errors(phases: list[dict[str, Any]], tasks: list[dict[str, Any]]) -> list[str]: + """Run Draft7 validation against the bundled schema, collecting ALL errors.""" + schema = load_conditional_flow_schema() + document = {"phases": phases, "tasks": tasks} + validator = jsonschema.Draft7Validator(schema) + errors: list[str] = [] + for err in sorted(validator.iter_errors(document), key=lambda e: list(e.path)): + path = "/".join(str(p) for p in err.path) or "(root)" + errors.append(f"Schema error at {path}: {err.message}") + return errors + + +def validate_conditional_flow( + phases: list[dict[str, Any]], tasks: list[dict[str, Any]] +) -> list[str]: + """Validate a conditional-flow body. Returns a flat list of error strings + (empty == valid). Reachability is computed separately as a warning -- call + ``find_unreachable_phases`` for that. NO cycle detection: goto loops are + legal at runtime. + + Structural (Draft7) errors are returned first; semantic checks only run when + the structure is sound (avoids cascade noise from a malformed document). + """ + structural = _structural_errors(phases, tasks) + if structural: + return structural + return _semantic_errors(phases, tasks) + + +def _semantic_errors(phases: list[dict[str, Any]], tasks: list[dict[str, Any]]) -> list[str]: + """Placeholder; rules added incrementally in later tasks.""" + return [] +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `uv run pytest tests/test_flow_validation.py -k "valid_flow or structural_error_bad_task" -v` +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add src/keboola_agent_cli/services/flow_validation.py tests/test_flow_validation.py +git commit -m "feat(flow): structural Draft7 validation for conditional flows" +``` + +--- + +### Task 6: Semantic — unique ids, task→phase refs, goto refs + +**Files:** +- Modify: `src/keboola_agent_cli/services/flow_validation.py` (`_semantic_errors`) +- Test: `tests/test_flow_validation.py` + +- [ ] **Step 1: Write the failing tests** + +Append to `tests/test_flow_validation.py`: + +```python +def test_duplicate_phase_ids(): + phases = _valid_phases() + phases[1]["id"] = "extract" # collide with phase[0] + errors = validate_conditional_flow(phases, _valid_tasks()) + assert any("duplicate phase id" in e.lower() for e in errors) + + +def test_duplicate_task_ids(): + tasks = _valid_tasks() + tasks[1]["id"] = "task-1" + errors = validate_conditional_flow(_valid_phases(), tasks) + assert any("duplicate task id" in e.lower() for e in errors) + + +def test_task_references_missing_phase(): + tasks = _valid_tasks() + tasks[0]["phase"] = "ghost" + errors = validate_conditional_flow(_valid_phases(), tasks) + assert any("ghost" in e and "phase" in e.lower() for e in errors) + + +def test_goto_references_missing_phase(): + phases = _valid_phases() + phases[0]["next"][0]["goto"] = "ghost" + errors = validate_conditional_flow(phases, _valid_tasks()) + assert any("ghost" in e and "goto" in e.lower() for e in errors) + + +def test_goto_null_is_allowed(): + phases = _valid_phases() + phases[0]["next"] = [{"id": "x", "goto": None}] + assert validate_conditional_flow(phases, _valid_tasks()) == [] +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `uv run pytest tests/test_flow_validation.py -k "duplicate or missing_phase or goto" -v` +Expected: FAIL — `_semantic_errors` returns `[]`. + +- [ ] **Step 3: Implement the rules** + +Replace `_semantic_errors` in `src/keboola_agent_cli/services/flow_validation.py`: + +```python +def _semantic_errors(phases: list[dict[str, Any]], tasks: list[dict[str, Any]]) -> list[str]: + errors: list[str] = [] + + # Unique phase ids + phase_ids: list[str] = [str(p.get("id")) for p in phases] + seen: set[str] = set() + for pid in phase_ids: + if pid in seen: + errors.append(f"Duplicate phase id '{pid}'") + seen.add(pid) + valid_phase_ids = set(phase_ids) + + # Unique task ids + seen_tasks: set[str] = set() + for task in tasks: + tid = str(task.get("id")) + if tid in seen_tasks: + errors.append(f"Duplicate task id '{tid}'") + seen_tasks.add(tid) + + # task.phase references an existing phase + for task in tasks: + ref = str(task.get("phase")) + if ref not in valid_phase_ids: + errors.append( + f"Task '{task.get('id', '?')}' references unknown phase '{ref}'" + ) + + # next[].goto is an existing phase id or null + for phase in phases: + for transition in phase.get("next", []): + goto = transition.get("goto") + if goto is not None and str(goto) not in valid_phase_ids: + errors.append( + f"Phase '{phase.get('id', '?')}' transition goto '{goto}' " + f"is not an existing phase id (use null to end the flow)" + ) + + return errors +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `uv run pytest tests/test_flow_validation.py -k "duplicate or missing_phase or goto" -v` +Expected: PASS (all 5) + +- [ ] **Step 5: Commit** + +```bash +git add src/keboola_agent_cli/services/flow_validation.py tests/test_flow_validation.py +git commit -m "feat(flow): semantic id/reference validation" +``` + +--- + +### Task 7: Semantic — default transition + enabled-task-per-phase + +**Files:** +- Modify: `src/keboola_agent_cli/services/flow_validation.py` (`_semantic_errors`) +- Test: `tests/test_flow_validation.py` + +- [ ] **Step 1: Write the failing tests** + +Append to `tests/test_flow_validation.py`: + +```python +def test_conditional_transitions_need_default_last(): + phases = _valid_phases() + # remove the default (last, condition-less) transition, leaving only conditional + phases[0]["next"] = [phases[0]["next"][0]] + errors = validate_conditional_flow(phases, _valid_tasks()) + assert any("default" in e.lower() and "transition" in e.lower() for e in errors) + + +def test_phase_without_enabled_task(): + tasks = _valid_tasks() + tasks[1]["enabled"] = False # transform phase now has zero enabled tasks + errors = validate_conditional_flow(_valid_phases(), tasks) + assert any("transform" in e and "enabled task" in e.lower() for e in errors) +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `uv run pytest tests/test_flow_validation.py -k "default_last or without_enabled" -v` +Expected: FAIL + +- [ ] **Step 3: Implement the rules** + +Append inside `_semantic_errors` (before `return errors`): + +```python + # A phase with conditional transitions must end with a default + # (condition-less) transition. + for phase in phases: + nexts = phase.get("next", []) + if not nexts: + continue + has_conditional = any("condition" in t for t in nexts) + last_is_default = "condition" not in nexts[-1] + if has_conditional and not last_is_default: + errors.append( + f"Phase '{phase.get('id', '?')}' has conditional transitions but " + f"no default (condition-less) transition as the last next[] item" + ) + + # Every phase must have at least one enabled task. + enabled_by_phase: dict[str, int] = {str(p.get("id")): 0 for p in phases} + for task in tasks: + if task.get("enabled", True): + enabled_by_phase[str(task.get("phase"))] = ( + enabled_by_phase.get(str(task.get("phase")), 0) + 1 + ) + for phase in phases: + pid = str(phase.get("id")) + if enabled_by_phase.get(pid, 0) == 0: + errors.append(f"Phase '{pid}' has no enabled task") +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `uv run pytest tests/test_flow_validation.py -k "default_last or without_enabled" -v` +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add src/keboola_agent_cli/services/flow_validation.py tests/test_flow_validation.py +git commit -m "feat(flow): default-transition + enabled-task semantic checks" +``` + +--- + +### Task 8: Semantic — operator/function operand arity + +**Files:** +- Modify: `src/keboola_agent_cli/services/flow_validation.py` +- Test: `tests/test_flow_validation.py` + +- [ ] **Step 1: Write the failing tests** + +Append to `tests/test_flow_validation.py`: + +```python +def _phase_with_condition(condition): + return [ + { + "id": "p1", + "name": "P1", + "next": [ + {"id": "c", "goto": "p2", "condition": condition}, + {"id": "d", "goto": None}, + ], + }, + {"id": "p2", "name": "P2"}, + ] + + +def _tasks_two_phases(): + return [ + {"id": "a", "name": "A", "phase": "p1", + "task": {"type": "job", "componentId": "c", "configId": "1", "mode": "run"}}, + {"id": "b", "name": "B", "phase": "p2", + "task": {"type": "job", "componentId": "c", "configId": "2", "mode": "run"}}, + ] + + +def _const(v): + return {"type": "const", "value": v} + + +def test_equals_requires_two_operands(): + cond = {"type": "operator", "operator": "EQUALS", "operands": [_const("x")]} + errors = validate_conditional_flow(_phase_with_condition(cond), _tasks_two_phases()) + assert any("EQUALS" in e and "2 operand" in e for e in errors) + + +def test_and_requires_at_least_one_operand(): + cond = {"type": "operator", "operator": "AND", "operands": []} + errors = validate_conditional_flow(_phase_with_condition(cond), _tasks_two_phases()) + assert any("AND" in e and "at least 1" in e for e in errors) + + +def test_function_count_requires_one_operand(): + cond = {"type": "function", "function": "COUNT", "operands": [_const("a"), _const("b")]} + errors = validate_conditional_flow(_phase_with_condition(cond), _tasks_two_phases()) + assert any("COUNT" in e and "1 operand" in e for e in errors) + + +def test_valid_equals_two_operands_ok(): + cond = {"type": "operator", "operator": "EQUALS", "operands": [_const("x"), _const("y")]} + assert validate_conditional_flow(_phase_with_condition(cond), _tasks_two_phases()) == [] +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `uv run pytest tests/test_flow_validation.py -k "operand or two_operands or one_operand or at_least_one" -v` +Expected: FAIL + +- [ ] **Step 3: Implement arity checks** + +Append to `src/keboola_agent_cli/services/flow_validation.py` (module-level +constants + a walker), and call it from `_semantic_errors`: + +```python +# Operand arity per operator (semantic; the schema cannot express these counts). +_BINARY_OPERATORS = frozenset( + {"EQUALS", "NOT_EQUALS", "GREATER_THAN", "LESS_THAN", "INCLUDES", "CONTAINS"} +) +_VARIADIC_MIN1_OPERATORS = frozenset({"AND", "OR"}) +_PHASE_SCOPED_OPERATORS = frozenset({"ALL_TASKS_IN_PHASE", "ANY_TASKS_IN_PHASE"}) +_UNARY_FUNCTIONS = frozenset({"COUNT", "DATE"}) + + +def _condition_arity_errors(condition: Any) -> list[str]: + """Recursively check operator/function operand arity.""" + if not isinstance(condition, dict): + return [] + errors: list[str] = [] + ctype = condition.get("type") + operands = condition.get("operands", []) + + if ctype == "operator": + op = condition.get("operator") + if op in _BINARY_OPERATORS and len(operands) != 2: + errors.append(f"Operator '{op}' requires exactly 2 operands, got {len(operands)}") + elif op in _VARIADIC_MIN1_OPERATORS and len(operands) < 1: + errors.append(f"Operator '{op}' requires at least 1 operand, got {len(operands)}") + elif op in _PHASE_SCOPED_OPERATORS and not condition.get("phase"): + errors.append(f"Operator '{op}' requires a 'phase' field") + elif ctype == "function": + fn = condition.get("function") + if fn in _UNARY_FUNCTIONS and len(operands) != 1: + errors.append(f"Function '{fn}' requires exactly 1 operand, got {len(operands)}") + + for child in operands: + errors.extend(_condition_arity_errors(child)) + return errors +``` + +Then add inside `_semantic_errors` (before `return errors`): + +```python + # Condition operand arity (recursive). + for phase in phases: + for transition in phase.get("next", []): + cond = transition.get("condition") + if cond is not None: + errors.extend(_condition_arity_errors(cond)) +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `uv run pytest tests/test_flow_validation.py -k "operand or two_operands or one_operand or at_least_one" -v` +Expected: PASS (all 4) + +- [ ] **Step 5: Commit** + +```bash +git add src/keboola_agent_cli/services/flow_validation.py tests/test_flow_validation.py +git commit -m "feat(flow): condition operand-arity validation" +``` + +--- + +### Task 9: Reachability warnings (`find_unreachable_phases`) + +**Files:** +- Modify: `src/keboola_agent_cli/services/flow_validation.py` +- Test: `tests/test_flow_validation.py` + +- [ ] **Step 1: Write the failing tests** + +Append to `tests/test_flow_validation.py`: + +```python +from keboola_agent_cli.services.flow_validation import find_unreachable_phases + + +def test_all_phases_reachable(): + assert find_unreachable_phases(_valid_phases()) == [] + + +def test_unreachable_phase_reported(): + phases = [ + {"id": "start", "name": "Start", "next": [{"id": "x", "goto": None}]}, + {"id": "island", "name": "Island"}, # never targeted + ] + assert find_unreachable_phases(phases) == ["island"] + + +def test_goto_loop_is_not_an_error(): + # start -> loop -> start ... legal at runtime, must NOT be flagged + phases = [ + {"id": "start", "name": "Start", "next": [{"id": "a", "goto": "loop"}]}, + {"id": "loop", "name": "Loop", "next": [{"id": "b", "goto": "start"}]}, + ] + assert find_unreachable_phases(phases) == [] + assert validate_conditional_flow(phases, [ + {"id": "t", "name": "T", "phase": "start", + "task": {"type": "job", "componentId": "c", "configId": "1", "mode": "run"}}, + {"id": "u", "name": "U", "phase": "loop", + "task": {"type": "job", "componentId": "c", "configId": "2", "mode": "run"}}, + ]) == [] +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `uv run pytest tests/test_flow_validation.py -k "reachable or unreachable or goto_loop" -v` +Expected: FAIL — `find_unreachable_phases` not defined. + +- [ ] **Step 3: Implement reachability (forward BFS from first phase)** + +Append to `src/keboola_agent_cli/services/flow_validation.py`: + +```python +from collections import deque + + +def find_unreachable_phases(phases: list[dict[str, Any]]) -> list[str]: + """Return ids of phases not reachable from the entry phase (first in the + list) by following next[].goto edges. WARNING-level only -- never blocks a + write. Returns ids in the order they appear in ``phases``. + """ + if not phases: + return [] + by_id = {str(p.get("id")): p for p in phases} + entry = str(phases[0].get("id")) + reachable: set[str] = set() + queue: deque[str] = deque([entry]) + while queue: + pid = queue.popleft() + if pid in reachable or pid not in by_id: + continue + reachable.add(pid) + for transition in by_id[pid].get("next", []): + goto = transition.get("goto") + if goto is not None: + queue.append(str(goto)) + return [str(p.get("id")) for p in phases if str(p.get("id")) not in reachable] +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `uv run pytest tests/test_flow_validation.py -k "reachable or unreachable or goto_loop" -v` +Expected: PASS (all 3) + +- [ ] **Step 5: Run the whole validation suite** + +Run: `uv run pytest tests/test_flow_validation.py -v` +Expected: all PASS. + +- [ ] **Step 6: Commit** + +```bash +git add src/keboola_agent_cli/services/flow_validation.py tests/test_flow_validation.py +git commit -m "feat(flow): unreachable-phase reachability warnings (no cycle detection)" +``` + +--- + +## Phase 4 — Service layer + +### Task 10: Rewrite `FlowService` to single-component + new validation + +**Files:** +- Modify: `src/keboola_agent_cli/services/flow_service.py` +- Test: `tests/test_flow_service.py` (rewrite) + +> This task replaces `_validate_dag`, removes all `component_id` params, hardcodes +> `FLOW_COMPONENT_ID`, adds `legacy_orchestrator_count` to `list_flows`, and wires +> in `validate_conditional_flow` + `find_unreachable_phases`. + +- [ ] **Step 1: Rewrite the service test file (failing)** + +Replace `tests/test_flow_service.py` entirely. Key tests (full file — keep the +`_mock_config_store` / `_make_flow_service` helpers from the existing file, which +do NOT change): + +```python +"""Unit tests for FlowService (conditional flows only).""" + +from __future__ import annotations + +from unittest.mock import MagicMock + +import pytest + +from keboola_agent_cli.errors import ErrorCode, KeboolaApiError +from keboola_agent_cli.services.flow_service import FLOW_COMPONENT_ID, FlowService + + +def _mock_config_store(projects: dict) -> MagicMock: + cs = MagicMock() + config = MagicMock() + config.projects = { + alias: MagicMock(stack_url=v["url"], token=v["token"], active_branch_id=None) + for alias, v in projects.items() + } + config.max_parallel_workers = 10 + cs.load.return_value = config + cs.get_project.side_effect = lambda alias: config.projects.get(alias) + return cs + + +def _make_flow_service(mock_client: MagicMock, projects: dict | None = None) -> FlowService: + if projects is None: + projects = {"prod": {"url": "https://connection.keboola.com", "token": "tok"}} + cs = _mock_config_store(projects) + return FlowService(config_store=cs, client_factory=lambda url, token: mock_client) + + +def _valid_body(): + phases = [ + {"id": "p1", "name": "P1", "next": [{"id": "n", "goto": None}]}, + ] + tasks = [ + {"id": "t1", "name": "T1", "phase": "p1", + "task": {"type": "job", "componentId": "c", "configId": "1", "mode": "run"}}, + ] + return phases, tasks + + +def test_component_id_constant(): + assert FLOW_COMPONENT_ID == "keboola.flow" + + +def test_create_flow_rejects_invalid_definition(): + client = MagicMock() + svc = _make_flow_service(client) + # task references a phase that does not exist -> semantic error + phases = [{"id": "p1", "name": "P1", "next": [{"id": "n", "goto": None}]}] + tasks = [{"id": "t1", "name": "T1", "phase": "ghost", + "task": {"type": "job", "componentId": "c", "configId": "1", "mode": "run"}}] + with pytest.raises(KeboolaApiError) as exc: + svc.create_flow(alias="prod", name="F", phases=phases, tasks=tasks) + assert exc.value.error_code == ErrorCode.INVALID_FLOW_DEFINITION + + +def test_create_flow_uses_keboola_flow_component(): + client = MagicMock() + client.create_config.return_value = {"id": "999", "name": "F"} + svc = _make_flow_service(client) + phases, tasks = _valid_body() + result = svc.create_flow(alias="prod", name="F", phases=phases, tasks=tasks) + assert client.create_config.call_args.kwargs["component_id"] == "keboola.flow" + assert result["id"] == "999" + + +def test_list_flows_reports_legacy_orchestrator_count(): + client = MagicMock() + + def list_configs(component_id, branch_id=None): + if component_id == "keboola.flow": + return [{"id": "1", "name": "CF"}] + if component_id == "keboola.orchestrator": + return [{"id": "9", "name": "Old"}, {"id": "10", "name": "Old2"}] + return [] + + client.list_component_configs.side_effect = list_configs + svc = _make_flow_service(client) + result = svc.list_flows(aliases=["prod"]) + assert result["legacy_orchestrator_count"] == 2 + assert all(f["component_id"] == "keboola.flow" for f in result["flows"]) +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `uv run pytest tests/test_flow_service.py -v` +Expected: FAIL — `FLOW_COMPONENT_ID` not importable / signatures still take `component_id`. + +- [ ] **Step 3: Edit `flow_service.py` — constants + imports + delete `_validate_dag`** + +In `src/keboola_agent_cli/services/flow_service.py`: + +Replace the import block and the `FLOW_COMPONENT_IDS` line: + +```python +from ..errors import ErrorCode, KeboolaApiError +from ..models import ProjectConfig +from .base import BaseService +from .flow_validation import find_unreachable_phases, validate_conditional_flow +``` + +```python +FLOW_COMPONENT_ID = "keboola.flow" +LEGACY_FLOW_COMPONENT_ID = "keboola.orchestrator" +SCHEDULER_COMPONENT_ID = "keboola.scheduler" +``` + +Delete the entire `_validate_dag` function (services/flow_service.py:47-96). + +- [ ] **Step 4: Edit `list_flows` — single component + legacy count** + +Replace the `worker` body's flow-collection loop and the return assembly. The +worker now lists ONLY `keboola.flow`, plus a separate count of +`keboola.orchestrator` configs: + +```python + def worker(alias: str, project: ProjectConfig) -> tuple[Any, ...]: + client = self._client_factory(project.stack_url, project.token) + effective_branch = branch_id or project.active_branch_id + try: + flows: list[dict[str, Any]] = [] + try: + configs = client.list_component_configs( + FLOW_COMPONENT_ID, branch_id=effective_branch + ) + except KeboolaApiError as exc: + if exc.error_code == "NOT_FOUND": + configs = [] + else: + raise + for cfg in configs: + flow_row: dict[str, Any] = { + "project_alias": alias, + "component_id": FLOW_COMPONENT_ID, + "config_id": str(cfg.get("id", "")), + "name": cfg.get("name", ""), + "description": cfg.get("description", ""), + "is_disabled": cfg.get("isDisabled", False), + } + if with_schedules: + flow_row["schedules"] = [] + flows.append(flow_row) + + # Count (do not list) legacy orchestrator configs so the CLI can warn. + try: + legacy = client.list_component_configs( + LEGACY_FLOW_COMPONENT_ID, branch_id=effective_branch + ) + legacy_count = len(legacy) + except KeboolaApiError as exc: + if exc.error_code == "NOT_FOUND": + legacy_count = 0 + else: + raise + + if with_schedules and flows: + schedules_by_parent = _collect_schedules_by_parent(client, effective_branch) + for flow_row in flows: + key = (flow_row["component_id"], flow_row["config_id"]) + flow_row["schedules"] = schedules_by_parent.get(key, []) + + return (alias, flows, legacy_count) + except KeboolaApiError as exc: + return ( + alias, + {"project_alias": alias, "error_code": exc.error_code, "message": exc.message}, + ) + except Exception as exc: + return ( + alias, + {"project_alias": alias, "error_code": "UNEXPECTED_ERROR", "message": str(exc)}, + ) + finally: + client.close() + + successes, errors = self._run_parallel(projects, worker) + + all_flows: list[dict[str, Any]] = [] + legacy_total = 0 + for _, flows, legacy_count in successes: + all_flows.extend(flows) + legacy_total += legacy_count + all_flows.sort(key=lambda f: (f["project_alias"], f["name"].lower())) + errors.sort(key=lambda e: e.get("project_alias", "")) + + return { + "flows": all_flows, + "errors": errors, + "legacy_orchestrator_count": legacy_total, + } +``` + +Update the `list_flows` docstring Returns section to mention +`legacy_orchestrator_count` and that only `keboola.flow` rows are returned. + +- [ ] **Step 5: Edit the remaining methods — drop `component_id`, hardcode** + +For each method below, remove the `component_id` parameter and replace every use +of `component_id` with `FLOW_COMPONENT_ID`: + +- `get_flow_detail(self, alias, config_id, branch_id=None)` — call + `client.get_config_detail(FLOW_COMPONENT_ID, config_id, ...)`; set + `detail["component_id"] = FLOW_COMPONENT_ID`. +- `create_flow(self, alias, name, description="", phases=None, tasks=None, branch_id=None)`: + replace the `if phases: dag_errors = _validate_dag(...)` block with: + + ```python + phases = phases or [] + tasks = tasks or [] + + definition_errors = validate_conditional_flow(phases, tasks) + if definition_errors: + raise KeboolaApiError( + message="Flow definition is invalid: " + "; ".join(definition_errors), + status_code=400, + error_code=ErrorCode.INVALID_FLOW_DEFINITION, + retryable=False, + ) + warnings = [ + f"Phase '{pid}' is unreachable from the entry phase" + for pid in find_unreachable_phases(phases) + ] + + configuration: dict[str, Any] = {"phases": phases, "tasks": tasks} + ``` + + Call `client.create_config(component_id=FLOW_COMPONENT_ID, ...)`; add + `result["warnings"] = warnings` before returning. + +- `update_flow(self, alias, config_id, name=None, description=None, phases=None, tasks=None, branch_id=None)`: + use `FLOW_COMPONENT_ID` in `get_config_detail` / `update_config`. Replace the + `_validate_dag` block with validation on the merged body: + + ```python + merged_phases = phases if phases is not None else current_body.get("phases", []) + merged_tasks = tasks if tasks is not None else current_body.get("tasks", []) + definition_errors = validate_conditional_flow(merged_phases, merged_tasks) + if definition_errors: + raise KeboolaApiError( + message="Flow definition is invalid: " + "; ".join(definition_errors), + status_code=400, + error_code=ErrorCode.INVALID_FLOW_DEFINITION, + retryable=False, + ) + configuration = dict(current_body) + configuration["phases"] = merged_phases + configuration["tasks"] = merged_tasks + ``` + +- `delete_flow(self, alias, config_id, branch_id=None)` — `FLOW_COMPONENT_ID` in + `delete_config`; set `"component_id": FLOW_COMPONENT_ID` in the return dict. +- `list_flow_schedules(self, alias, config_id, branch_id=None)` — filter + `target.componentId == FLOW_COMPONENT_ID`; return `"component_id": FLOW_COMPONENT_ID`. +- `set_flow_schedule(self, alias, config_id, cron_tab, timezone="UTC", enabled=True, schedule_name=None, branch_id=None)` + — `get_config_detail(FLOW_COMPONENT_ID, ...)` for the name; the scheduler + `target.componentId` is `FLOW_COMPONENT_ID`; match existing by + `FLOW_COMPONENT_ID`; return `"component_id": FLOW_COMPONENT_ID`. +- `remove_flow_schedule(self, alias, config_id, branch_id=None)` — filter by + `FLOW_COMPONENT_ID`; return `"component_id": FLOW_COMPONENT_ID`. + +- [ ] **Step 6: Run the service tests** + +Run: `uv run pytest tests/test_flow_service.py -v` +Expected: PASS (all). If any reference `_validate_dag` or `_count_phases_tasks`, +keep `_count_phases_tasks` (still used) and ensure no test imports `_validate_dag`. + +- [ ] **Step 7: Commit** + +```bash +git add src/keboola_agent_cli/services/flow_service.py tests/test_flow_service.py +git commit -m "feat(flow): single-component FlowService with conditional-flow validation" +``` + +--- + +## Phase 5 — CLI layer + +### Task 11: CF template + `flow schema --full` + +**Files:** +- Modify: `src/keboola_agent_cli/commands/flow.py:30-85` (`_FLOW_SCHEMA`, `_FLOW_COMPONENT_CHOICES`), `flow_schema` command +- Test: `tests/test_flow_cli.py` + +- [ ] **Step 1: Write the failing CLI tests** + +In the rewritten `tests/test_flow_cli.py` (see Task 12 for the file header / +runner fixture), add: + +```python +def test_flow_schema_default_is_conditional_template(runner, app): + result = runner.invoke(app, ["flow", "schema"]) + assert result.exit_code == 0 + assert "next:" in result.stdout + assert "goto" in result.stdout + assert "dependsOn" not in result.stdout # legacy template gone + + +def test_flow_schema_full_dumps_json_schema(runner, app): + result = runner.invoke(app, ["flow", "schema", "--full"]) + assert result.exit_code == 0 + assert "$schema" in result.stdout or "draft-07" in result.stdout + + +def test_flow_schema_full_json_mode(runner, app): + result = runner.invoke(app, ["--json", "flow", "schema", "--full"]) + assert result.exit_code == 0 + import json as _json + payload = _json.loads(result.stdout) + assert payload["schema"]["required"] == ["phases", "tasks"] +``` + +- [ ] **Step 2: Run to verify failure** + +Run: `uv run pytest tests/test_flow_cli.py -k "schema" -v` +Expected: FAIL. + +- [ ] **Step 3: Replace the template and `flow schema` command** + +In `src/keboola_agent_cli/commands/flow.py`: + +Delete `_FLOW_COMPONENT_CHOICES = [...]`. + +Replace `_FLOW_SCHEMA` with the CF template (string ids): + +```python +_FLOW_SCHEMA = """\ +# kbagent flow schema -- keboola.flow (Conditional Flow) configuration format +# +# Create with: kbagent flow new --project ALIAS --name "My Flow" --file @flow.yaml +# Update with: kbagent flow update --project ALIAS --flow-id ID --file @flow.yaml +# Validate offline: kbagent flow validate --file @flow.yaml +# Full JSON schema: kbagent flow schema --full +# +# IDs are STRINGS. goto is a phase id or null (= end the flow). + +phases: + - id: "extract" + name: "Extract" + next: + # Conditional transition: if any task in 'extract' failed, go to 'notify'. + - id: "on-failure" + goto: "notify" + condition: + type: operator + operator: ANY_TASKS_IN_PHASE + phase: "extract" + operands: [] + # Default transition (NO condition) -- MUST be last. + - id: "default" + goto: "transform" + - id: "transform" + name: "Transform" + retry: + strategy: linear + strategyParams: + delaySeconds: 60 + retryOn: ["error"] + next: + - id: "done" + goto: null + - id: "notify" + name: "Notify on failure" + +tasks: + - id: "task-extract" + name: "Run HTTP extractor" + phase: "extract" + enabled: true + task: + type: job + componentId: "keboola.ex-http" + configId: "123456789" + mode: run + retry: + strategy: linear + strategyParams: + delaySeconds: 30 + retryOn: ["error"] + - id: "task-transform" + name: "Run transformation" + phase: "transform" + enabled: true + task: + type: job + componentId: "keboola.snowflake-transformation" + configId: "987654321" + mode: run + - id: "task-notify" + name: "Email the team" + phase: "notify" + enabled: true + task: + type: notification + title: "Flow failed" + message: "The extract phase reported a failure." + recipients: + - channel: email + address: "team@example.com" + - id: "task-setvar" + name: "Set a flow variable" + phase: "extract" + enabled: true + task: + type: variable + name: "run_date" + value: "2026-01-01" +""" +``` + +Replace the `flow_schema` command: + +```python +@flow_app.command("schema") +def flow_schema( + ctx: typer.Context, + full: bool = typer.Option( + False, "--full", help="Dump the full bundled JSON Schema verbatim instead of the template." + ), +) -> None: + """Print the conditional-flow YAML template, or --full for the JSON Schema.""" + formatter = get_formatter(ctx) + if full: + from ..services.flow_validation import load_conditional_flow_schema + + schema = load_conditional_flow_schema() + if formatter.json_mode: + formatter.output({"format": "json-schema", "schema": schema}) + else: + import json as _json + + from rich.syntax import Syntax + + formatter.console.print( + Syntax(_json.dumps(schema, indent=2), "json", theme="monokai", line_numbers=False) + ) + return + + if formatter.json_mode: + formatter.output( + { + "format": "yaml", + "description": "keboola.flow (Conditional Flow) configuration schema", + "schema": _FLOW_SCHEMA, + } + ) + else: + from rich.syntax import Syntax + + formatter.console.print(Syntax(_FLOW_SCHEMA, "yaml", theme="monokai", line_numbers=False)) +``` + +- [ ] **Step 4: Run to verify pass** + +Run: `uv run pytest tests/test_flow_cli.py -k "schema" -v` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/keboola_agent_cli/commands/flow.py tests/test_flow_cli.py +git commit -m "feat(flow): conditional-flow template + 'flow schema --full'" +``` + +--- + +### Task 12: `flow validate` command + drop `--component-id` everywhere + +**Files:** +- Modify: `src/keboola_agent_cli/commands/flow.py` (all 8 subcommands + new `validate`) +- Modify: `src/keboola_agent_cli/permissions.py:278-286` +- Test: `tests/test_flow_cli.py` (rewrite) + +- [ ] **Step 1: Write the rewritten CLI test file header + validate tests (failing)** + +Replace `tests/test_flow_cli.py`. File header / fixtures: + +```python +"""CLI tests for the flow command group (conditional flows).""" + +from __future__ import annotations + +import json + +import pytest +from typer.testing import CliRunner + +from keboola_agent_cli.cli import app as _app + + +@pytest.fixture +def runner(): + return CliRunner() + + +@pytest.fixture +def app(): + return _app +``` + +Add validate tests (offline, no project needed): + +```python +_VALID_FLOW_YAML = """ +phases: + - id: "p1" + name: "P1" + next: + - id: "n" + goto: null +tasks: + - id: "t1" + name: "T1" + phase: "p1" + enabled: true + task: + type: job + componentId: "keboola.ex-http" + configId: "1" + mode: run +""" + + +def test_flow_validate_valid(runner, app, tmp_path): + f = tmp_path / "flow.yaml" + f.write_text(_VALID_FLOW_YAML) + result = runner.invoke(app, ["flow", "validate", "--file", f"@{f}"]) + assert result.exit_code == 0 + + +def test_flow_validate_invalid_exit_2(runner, app, tmp_path): + bad = _VALID_FLOW_YAML.replace('phase: "p1"', 'phase: "ghost"') + f = tmp_path / "bad.yaml" + f.write_text(bad) + result = runner.invoke(app, ["--json", "flow", "validate", "--file", f"@{f}"]) + assert result.exit_code == 2 + payload = json.loads(result.stdout) + assert payload["valid"] is False + assert payload["errors"] + + +def test_flow_validate_json_valid_lists_warnings(runner, app, tmp_path): + f = tmp_path / "flow.yaml" + f.write_text(_VALID_FLOW_YAML) + result = runner.invoke(app, ["--json", "flow", "validate", "--file", f"@{f}"]) + assert result.exit_code == 0 + payload = json.loads(result.stdout) + assert payload["valid"] is True + assert payload["errors"] == [] + assert "warnings" in payload +``` + +- [ ] **Step 2: Run to verify failure** + +Run: `uv run pytest tests/test_flow_cli.py -k "validate" -v` +Expected: FAIL — no `validate` command. + +- [ ] **Step 3: Add the `flow validate` command** + +In `src/keboola_agent_cli/commands/flow.py`, add after `flow_schema`: + +```python +@flow_app.command("validate") +def flow_validate( + ctx: typer.Context, + file: str = typer.Option( + ..., + "--file", + help="YAML/JSON flow definition to validate (@file, -, or inline). Offline -- no API call.", + ), +) -> None: + """Validate a conditional-flow definition offline (schema + semantic checks). + + Exit 0 when valid (warnings still printed), exit 2 when there are errors. + """ + formatter = get_formatter(ctx) + from ..services.flow_validation import find_unreachable_phases, validate_conditional_flow + + try: + flow_def = _load_flow_yaml(file) + except (OSError, yaml.YAMLError, ValueError) as exc: + formatter.error( + message=f"Cannot load flow definition: {exc}", error_code=ErrorCode.VALIDATION_ERROR + ) + raise typer.Exit(code=2) from None + + phases = flow_def.get("phases", []) + tasks = flow_def.get("tasks", []) + errors = validate_conditional_flow(phases, tasks) + warnings = [ + f"Phase '{pid}' is unreachable from the entry phase" + for pid in find_unreachable_phases(phases) + ] + valid = not errors + + if formatter.json_mode: + formatter.output({"valid": valid, "errors": errors, "warnings": warnings}) + else: + for w in warnings: + formatter.warning(w) + if valid: + formatter.success("Flow definition is valid.") + else: + for e in errors: + formatter.console.print(f"[red]✗[/red] {escape(e)}") + if not valid: + raise typer.Exit(code=2) +``` + +- [ ] **Step 4: Remove `--component-id` from all 8 subcommands** + +Delete the `component_id: str = typer.Option(...)` parameter from `flow_detail`, +`flow_new`, `flow_update`, `flow_delete`, `flow_schedule`, `flow_schedule_remove` +(`flow_list` and `flow_schema` never had it). Update each service call to drop +the `component_id=` kwarg. Replace human-output strings that interpolate +`component_id` with the literal `keboola.flow` (or just the flow id). Examples: + +- `flow_detail`: `service.get_flow_detail(alias=project, config_id=flow_id, branch_id=effective_branch)`. +- `flow_new`: `service.create_flow(alias=project, name=name, description=description, phases=phases, tasks=tasks, branch_id=branch)`; success line: `f"Created flow '{...}' [keboola.flow/{result.get('id','')}]{branch_info}"`. Print any `result.get("warnings")` via `formatter.warning`. +- `flow_update`: `service.update_flow(alias=project, config_id=flow_id, name=name, description=description, phases=phases, tasks=tasks, branch_id=branch)`; success line uses `keboola.flow`. +- `flow_delete`: drop `component_id` from `would_delete`, the confirm prompt (`f"Delete flow keboola.flow/{flow_id}?"`), and `service.delete_flow(alias=project, config_id=flow_id, branch_id=branch)`. +- `flow_schedule`: `service.set_flow_schedule(alias=project, config_id=flow_id, cron_tab=cron, ...)`. +- `flow_schedule_remove`: both `list_flow_schedules` and `remove_flow_schedule` calls drop `component_id`; drop it from `would_delete`. + +- [ ] **Step 5: Update the `flow list` legacy warning** + +In `_format_flows_table`, after the errors loop, add: + +```python + legacy = result.get("legacy_orchestrator_count", 0) + if legacy: + formatter.warning( + f"{legacy} legacy keboola.orchestrator flow(s) are not shown " + f"(orchestrator support was dropped in 0.56.0; migrate to keboola.flow)." + ) +``` + +The JSON path already passes `result` straight through, so +`legacy_orchestrator_count` appears in `--json` automatically. + +Also drop the `"Component"` column from the table (every row is `keboola.flow`): +remove `"Component"` from `columns` and remove the `f.get("component_id", "")` +cell from each row. + +- [ ] **Step 6: Add the permission entry** + +In `src/keboola_agent_cli/permissions.py`, in the `# Flow operations` block, +after `"flow.schema": "read",` add: + +```python + "flow.validate": "read", +``` + +- [ ] **Step 7: Update `flow_app` help string** + +Change `flow_app = typer.Typer(help="Manage flows (keboola.orchestrator + keboola.flow)")` +to `flow_app = typer.Typer(help="Manage conditional flows (keboola.flow)")`. + +- [ ] **Step 8: Add CLI tests for dropped `--component-id` + detail rendering** + +Append to `tests/test_flow_cli.py`: + +```python +def test_component_id_flag_removed(runner, app): + # --component-id is no longer a recognized option on flow detail + result = runner.invoke( + app, ["flow", "detail", "--project", "x", "--flow-id", "1", "--component-id", "keboola.flow"] + ) + assert result.exit_code == 2 + assert "No such option" in result.stdout or "no such option" in result.stdout.lower() +``` + +- [ ] **Step 9: Run the CLI suite** + +Run: `uv run pytest tests/test_flow_cli.py -v` +Expected: PASS. + +- [ ] **Step 10: Commit** + +```bash +git add src/keboola_agent_cli/commands/flow.py src/keboola_agent_cli/permissions.py tests/test_flow_cli.py +git commit -m "feat(flow): add 'flow validate', drop --component-id, legacy-count warning" +``` + +--- + +### Task 13: `flow detail` human rendering rewrite + +**Files:** +- Modify: `src/keboola_agent_cli/commands/flow.py` (`_format_flow_detail`) +- Test: `tests/test_flow_cli.py` + +- [ ] **Step 1: Write the failing test** + +Append to `tests/test_flow_cli.py` (mock the service via monkeypatch of the +service factory is heavy; instead test the pure formatter through a small unit). +Add a direct unit test of the renderer: + +```python +def test_format_flow_detail_renders_transitions_and_badges(capsys): + from keboola_agent_cli.commands.flow import _format_flow_detail + from keboola_agent_cli.output import OutputFormatter + + formatter = OutputFormatter(json_mode=False) + detail = { + "name": "My CF", + "id": "100", + "phases": [ + {"id": "p1", "name": "Extract", + "next": [{"id": "c", "goto": "p2", "condition": {"type": "operator", "operator": "ANY_TASKS_IN_PHASE", "phase": "p1", "operands": []}}, + {"id": "d", "goto": None}]}, + {"id": "p2", "name": "Transform"}, + ], + "tasks": [ + {"id": "t1", "name": "Run", "phase": "p1", "enabled": True, + "task": {"type": "job", "componentId": "keboola.ex-http", "configId": "9", "mode": "run"}}, + {"id": "t2", "name": "Notify", "phase": "p2", + "task": {"type": "notification", "title": "x", "recipients": []}}, + ], + } + _format_flow_detail(formatter, detail) + out = capsys.readouterr().out + assert "Extract" in out and "Transform" in out + assert "→" in out # transition arrow + assert "default" in out.lower() # condition-less transition labeled + assert "job" in out and "notification" in out # task type badges +``` + +> NOTE: confirm `OutputFormatter(json_mode=False)` is the correct constructor by +> checking `src/keboola_agent_cli/output.py`; adapt the kwarg if the signature +> differs (e.g. positional). The existing `conftest.py` has a formatter fixture +> you may reuse instead. + +- [ ] **Step 2: Run to verify failure** + +Run: `uv run pytest tests/test_flow_cli.py -k "format_flow_detail" -v` +Expected: FAIL (current renderer prints `dependsOn`, no arrows/badges). + +- [ ] **Step 3: Rewrite `_format_flow_detail`** + +Replace `_format_flow_detail` in `src/keboola_agent_cli/commands/flow.py`: + +```python +def _summarize_condition(condition: dict[str, Any] | None) -> str: + """One-line human summary of a transition condition.""" + if not condition: + return "default" + ctype = condition.get("type") + if ctype == "operator": + op = condition.get("operator", "?") + phase = condition.get("phase") + return f"{op}({phase})" if phase else f"{op}(...)" + if ctype == "function": + return f"{condition.get('function', '?')}(...)" + if ctype in ("const", "constant"): + return f"const={condition.get('value')!r}" + return str(ctype) + + +def _format_flow_detail(formatter: Any, result: dict[str, Any]) -> None: + formatter.console.print( + f"\n[bold]{escape(result.get('name', ''))}[/bold]" + f" [dim](keboola.flow / {escape(str(result.get('id', '')))})[/dim]" + ) + if result.get("description"): + formatter.console.print(f"[dim]{escape(result['description'])}[/dim]") + if result.get("branch_id"): + formatter.console.print(f"[dim]Branch: {result['branch_id']}[/dim]") + + phases = result.get("phases", []) + tasks = result.get("tasks", []) + if not phases and not tasks: + formatter.console.print("\n[dim]No phases or tasks defined.[/dim]") + return + + formatter.console.print( + f"\n[bold]Phases[/bold] ({len(phases)}) [bold]Tasks[/bold] ({len(tasks)})" + ) + + tasks_by_phase: dict[Any, list[dict[str, Any]]] = {} + for task in tasks: + tasks_by_phase.setdefault(str(task.get("phase")), []).append(task) + + _TYPE_COLORS = {"job": "green", "notification": "yellow", "variable": "magenta"} + + for phase in phases: + pid = str(phase.get("id")) + retry = " [dim](retry)[/dim]" if phase.get("retry") else "" + formatter.console.print( + f"\n [cyan bold]Phase {escape(pid)}: {escape(phase.get('name', ''))}[/cyan bold]{retry}" + ) + for transition in phase.get("next", []): + goto = transition.get("goto") + target = "END" if goto is None else str(goto) + summary = _summarize_condition(transition.get("condition")) + formatter.console.print( + f" [dim]→ {escape(target)} [{escape(summary)}][/dim]" + ) + for task in tasks_by_phase.get(pid, []): + t_info = task.get("task") or {} + ttype = t_info.get("type", "?") + color = _TYPE_COLORS.get(ttype, "white") + badge = f"[{color}]{escape(ttype)}[/{color}]" + detail_str = "" + if ttype == "job": + detail_str = f" {escape(str(t_info.get('componentId', '')))}/{escape(str(t_info.get('configId', '')))}" + elif ttype == "variable": + detail_str = f" {escape(str(t_info.get('name', '')))}" + t_retry = " [dim](retry)[/dim]" if t_info.get("retry") else "" + enabled = "" if task.get("enabled", True) else " [dim](disabled)[/dim]" + formatter.console.print( + f" [{escape(str(task.get('id', '?')))}] {badge} " + f"{escape(task.get('name', ''))}[dim]{detail_str}[/dim]{enabled}{t_retry}" + ) + + orphan_keys = set(tasks_by_phase.keys()) - {str(p.get("id")) for p in phases} + for key in sorted(orphan_keys): + formatter.console.print(f"\n [yellow]Phase '{key}' (not in phases list)[/yellow]") + for task in tasks_by_phase.get(key, []): + formatter.console.print(f" {escape(task.get('name', str(task)))}") +``` + +- [ ] **Step 4: Run to verify pass** + +Run: `uv run pytest tests/test_flow_cli.py -k "format_flow_detail" -v` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/keboola_agent_cli/commands/flow.py tests/test_flow_cli.py +git commit -m "feat(flow): rewrite detail rendering for conditional flows" +``` + +--- + +## Phase 6 — REST mirror + +### Task 14: Drop `component_id` from `server/routers/flows.py` + +**Files:** +- Modify: `src/keboola_agent_cli/server/routers/flows.py` +- Test: existing server tests (run to confirm green) — locate with grep below + +- [ ] **Step 1: Find server flow tests** + +Run: `grep -rln "flows\|/flows\|FlowCreate" tests/ | grep -i serv` +Expected: a server test file (e.g. `tests/test_server*.py`). If a flow-route test +exists, read it to learn the expected request shape before editing. + +- [ ] **Step 2: Edit the router** + +In `src/keboola_agent_cli/server/routers/flows.py`: + +- Delete the `DEFAULT_FLOW_COMPONENT = "keboola.flow"` constant. +- Remove `component_id` from `FlowCreate`, `FlowUpdate`, `FlowSchedule`. +- Remove the `component_id: str = DEFAULT_FLOW_COMPONENT` query param from + `detail`, `delete`, `list_schedules`, `remove_schedule`. +- Drop the `component_id=...` kwarg from every `registry.flow.*` call (matching + the new service signatures from Task 10). + +Example for `create`: + +```python +@router.post("/{project}", summary="Create a new flow") +def create( + project: str, body: FlowCreate, registry: ServiceRegistry = Depends(get_registry) +) -> dict[str, Any]: + """Create a new flow configuration. Mirrors `kbagent flow new`.""" + return registry.flow.create_flow( + alias=project, + name=body.name, + description=body.description, + phases=body.phases, + tasks=body.tasks, + branch_id=body.branch_id, + ) +``` + +Apply the analogous edit to `detail`, `update`, `delete`, `list_schedules`, +`set_schedule`, `remove_schedule`. + +- [ ] **Step 3: Run server tests** + +Run: `uv run pytest tests/ -k "flow and serv" -v` (and any file found in Step 1) +Expected: PASS. If a server test posted `component_id`, update it to omit the +field. + +- [ ] **Step 4: Commit** + +```bash +git add src/keboola_agent_cli/server/routers/flows.py tests/ +git commit -m "feat(flow): drop component_id from /flows REST surface" +``` + +--- + +## Phase 7 — Cleanup sweep + +### Task 15: Remove dead `ORCHESTRATOR_COMPONENTS`; CF scaffold for `config new` + +**Files:** +- Modify: `src/keboola_agent_cli/sync/config_format.py:70` +- Modify: `src/keboola_agent_cli/services/component_service.py:43,292-298,645-650` +- Test: `tests/test_component_service.py` + +- [ ] **Step 1: Confirm `ORCHESTRATOR_COMPONENTS` is dead** + +Run: `grep -rn "ORCHESTRATOR_COMPONENTS" src/ tests/` +Expected: only the definition at `config_format.py:70`. + +- [ ] **Step 2: Delete it** + +Remove the line `ORCHESTRATOR_COMPONENTS: set[str] = {"keboola.orchestrator", "keboola.flow"}` +from `src/keboola_agent_cli/sync/config_format.py` (and its preceding comment if +standalone). + +- [ ] **Step 3: Update the flow scaffold in `component_service.py`** + +Read `tests/test_component_service.py` first to see what the flow scaffold test +asserts. Then: + +- Change `_FLOW_COMPONENT_IDS = ("keboola.orchestrator", "keboola.flow")` to + `_FLOW_COMPONENT_IDS = ("keboola.flow",)` (or inline `"keboola.flow"` if it + only feeds `_classify`). +- Rewrite `_build_flow_config_yml` to emit a CF skeleton (string ids): + +```python +def _build_flow_config_yml(name: str, component_id: str = "keboola.flow") -> str: + """Generate a conditional-flow (keboola.flow) configuration YAML skeleton.""" + lines = [ + f'name: "{name}"', + "description: |", + " TODO: describe this flow", + "phases:", + ' - id: "phase-1"', + ' name: "Phase 1"', + " next:", + ' - id: "default"', + " goto: null", + "tasks:", + ' - id: "task-1"', + ' name: "Task 1"', + ' phase: "phase-1"', + " enabled: true", + " task:", + " type: job", + ' componentId: "keboola.ex-http"', + ' configId: "TODO"', + " mode: run", + ] + return "\n".join(lines) + "\n" +``` + +- In the `config new` builder (`component_service.py:645-650`), update the flow + branch description to `"Conditional flow (keboola.flow) configuration"` and + pass `detail.component_id` (now always `keboola.flow` for flow components). + +- [ ] **Step 4: Run component service tests** + +Run: `uv run pytest tests/test_component_service.py -v` +Expected: PASS. Update any assertion that expected `dependsOn` / orchestrator +output to expect the new CF skeleton. + +- [ ] **Step 5: Commit** + +```bash +git add src/keboola_agent_cli/sync/config_format.py src/keboola_agent_cli/services/component_service.py tests/test_component_service.py +git commit -m "refactor(flow): drop orchestrator constants; CF scaffold for 'config new'" +``` + +--- + +## Phase 8 — E2E + +### Task 16: CF round-trip E2E + skip when CF disabled + +**Files:** +- Modify: `tests/test_e2e.py` (`TestE2EFlowOperations`, ~line 5101-5200) + +- [ ] **Step 1: Read the existing flow E2E block** + +Run: `sed -n '5101,5230p' tests/test_e2e.py` — note the `self._run`, `_step`, +and cleanup helpers and the existing assertions. + +- [ ] **Step 2: Rewrite the flow E2E to use a CF payload** + +Replace the body that creates/updates the flow so it writes a valid CF +definition (string ids, a `job` task) via a temp YAML file and `--file`, and +drops every `--component-id` argument. Add a `flow validate` step (offline). At +the start of `test_flow_crud_and_skip`, detect CF support and skip cleanly: + +```python + def test_flow_crud_and_schedule(self, tmp_path: Path) -> None: + # Skip if the project has conditional flows disabled. + probe = self._run("flow", "new", "--project", self.alias, "--name", "cf-probe", + "--file", "@" + str(self._write_cf(tmp_path))) + if probe.exit_code != 0 and "conditional" in (probe.stdout + probe.stderr).lower(): + pytest.skip("Project reports conditional_flows=false; skipping CF E2E") + ... +``` + +Add a helper on the test class: + +```python + @staticmethod + def _write_cf(tmp_path: Path) -> Path: + body = ( + 'phases:\n' + ' - id: "p1"\n' + ' name: "P1"\n' + ' next:\n' + ' - id: "n"\n' + ' goto: null\n' + 'tasks:\n' + ' - id: "t1"\n' + ' name: "T1"\n' + ' phase: "p1"\n' + ' enabled: true\n' + ' task:\n' + ' type: job\n' + ' componentId: "keboola.ex-http"\n' + ' configId: "1"\n' + ' mode: run\n' + ) + path = tmp_path / "cf.yaml" + path.write_text(body, encoding="utf-8") + return path +``` + +Ensure the steps cover: schema → validate → new → detail → update → schedule → +schedule-remove → delete. Cleanup tracks created flow ids as before (now always +`keboola.flow`). + +- [ ] **Step 3: Run E2E (requires credentials)** + +Run: `make test-e2e` (needs `E2E_API_TOKEN` + `E2E_URL`). +Expected: the flow E2E passes against a CF-enabled project, or skips with the +clear reason on a CF-disabled one. If credentials are unavailable in this +environment, note it and defer to CI; do NOT mark the task done without a run. + +- [ ] **Step 4: Commit** + +```bash +git add tests/test_e2e.py +git commit -m "test(flow): conditional-flow E2E round-trip + CF-disabled skip" +``` + +--- + +## Phase 9 — Docs, plugin sync, release + +### Task 17: Update `CLAUDE.md` + `AGENT_CONTEXT` flow sections + +**Files:** +- Modify: `CLAUDE.md` (`## All CLI Commands` flow block) +- Modify: `src/keboola_agent_cli/commands/context.py:569-598` + +- [ ] **Step 1: Update `CLAUDE.md` flow block** + +Replace the flow command lines in `## All CLI Commands` to drop `--component-id`, +add `flow validate` and `flow schema --full`: + +``` +kbagent flow list [--project NAME] [--branch ID] [--with-schedules] +kbagent flow detail --project NAME --flow-id ID [--branch ID] +kbagent flow schema [--full] +kbagent flow validate --file @flow.yaml|- +kbagent flow new --project NAME --name NAME [--description D] [--file @path.yaml|-|JSON] [--branch ID] +kbagent flow update --project NAME --flow-id ID [--name N] [--description D] [--file @path.yaml|-|JSON] [--branch ID] +kbagent flow delete --project NAME --flow-id ID [--branch ID] [--yes] +kbagent flow schedule --project NAME --flow-id ID --cron "0 6 * * *" [--timezone TZ] [--disabled] [--branch ID] +kbagent flow schedule-remove --project NAME --flow-id ID [--branch ID] [--yes] +# Flows are conditional flows (keboola.flow). keboola.orchestrator is NOT supported (dropped 0.56.0). +# Execute a flow with: kbagent job run --project NAME --component-id keboola.flow --config-id ID +``` + +- [ ] **Step 2: Update `context.py` `AGENT_CONTEXT`** + +In `src/keboola_agent_cli/commands/context.py:569-598`, mirror the same changes: +remove `--component-id`, remove "defaults to keboola.orchestrator" notes, replace +"DAG re-validated on write" with "validated against the conditional-flow schema +(INVALID_FLOW_DEFINITION on failure)", add the `flow validate` and +`flow schema --full` entries, and add a note that orchestrator is dropped. + +- [ ] **Step 3: Verify context renders** + +Run: `uv run kbagent context | grep -A2 "flow validate"` +Expected: the new command appears; no `--component-id` in the flow section. + +- [ ] **Step 4: Commit** + +```bash +git add CLAUDE.md src/keboola_agent_cli/commands/context.py +git commit -m "docs(flow): refresh CLAUDE.md + AGENT_CONTEXT for conditional flows" +``` + +--- + +### Task 18: Plugin sync — keboola-expert, SKILL, references + +**Files:** +- Modify: `plugins/kbagent/agents/keboola-expert.md` +- Modify: `plugins/kbagent/skills/kbagent/SKILL.md` +- Modify: `plugins/kbagent/skills/kbagent/references/commands-reference.md` +- Rewrite: `plugins/kbagent/skills/kbagent/references/flow-workflow.md` +- Modify: `plugins/kbagent/skills/kbagent/references/gotchas.md` + +- [ ] **Step 1: keboola-expert.md** + +Update the tool-selection matrix / version gate: flows are conditional flows; +`--component-id` removed; add `flow validate`; reference the validate-before-push +loop; note orchestrator dropped in 0.56.0. + +- [ ] **Step 2: SKILL.md + commands-reference.md** + +Update the flow rows: drop `--component-id`, add `flow validate` and +`flow schema --full`. If the SKILL.md decision table is CI-generated, regenerate +per the repo convention; otherwise hand-edit. + +- [ ] **Step 3: Rewrite flow-workflow.md** + +Full rewrite around conditional flows: the CF template, a conditions cookbook +(operator/function/phase/task examples with string ids), the +validate-before-push loop (`flow validate` → fix → `flow new`/`flow update`), +and execution via `kbagent job run --component-id keboola.flow --config-id ID`. +Remove all `dependsOn` content. + +- [ ] **Step 4: gotchas.md — new entries** + +Add, each tagged `(since v0.56.0)`: +- orchestrator support dropped; `flow list` hides legacy flows (shows a count). +- `--component-id` removed from all flow subcommands. +- old `dependsOn` template is invalid; use `phases[].next[].goto` + conditions. +- `INVALID_FLOW_DAG` renamed to `INVALID_FLOW_DEFINITION`. +- **IDs are strings**, not integers. +- bundled schema pinned to job-queue-daemon SHA `24176de…` (re-vendor to update). +Mark the old "flow default-component differs between subcommands" gotcha as +**resolved**. + +- [ ] **Step 5: Commit** + +```bash +git add plugins/kbagent/ +git commit -m "docs(plugin): sync flow surface to conditional flows (0.56.0)" +``` + +--- + +### Task 19: README + version bump + changelog + release checks + +**Files:** +- Modify: `README.md` (if flows mentioned) +- Modify: `pyproject.toml:3` +- Modify: `src/keboola_agent_cli/changelog.py` + +- [ ] **Step 1: README scan** + +Run: `grep -n "orchestrator\|flow" README.md` +Update any flow mention to conditional flows; drop `--component-id`. + +- [ ] **Step 2: Bump version** + +In `pyproject.toml`, change `version = "0.55.0"` to `version = "0.56.0"`. + +- [ ] **Step 3: Add changelog entry** + +In `src/keboola_agent_cli/changelog.py`, add a `"0.56.0"` key at the TOP of +`CHANGELOG` (newest-first) with a breaking-change callout, e.g.: + +```python + "0.56.0": [ + "BREAKING: `flow` command group now targets conditional flows " + "(`keboola.flow`) only; `keboola.orchestrator` support is dropped. " + "`--component-id` removed from every `flow` subcommand and from the " + "`/flows` REST surface. `flow new`/`flow update` validate payloads " + "against the bundled conditional-flow JSON schema (phases[].next[].goto " + "transitions + conditions; job/notification/variable tasks; string ids) " + "and reject invalid bodies with `INVALID_FLOW_DEFINITION` (replaces " + "`INVALID_FLOW_DAG`). New `flow validate --file` does offline schema + " + "semantic checks. `flow schema --full` dumps the JSON schema. `flow list` " + "hides legacy orchestrator configs and reports `legacy_orchestrator_count`. " + "Schema pinned to job-queue-daemon@24176de.", + ], +``` + +- [ ] **Step 4: Sync plugin version** + +Run: `make version-sync` +Expected: `plugin.json` / `marketplace.json` updated to 0.56.0. + +- [ ] **Step 5: Full check suite** + +Run: `make check` +Expected: ruff lint + format-check + changelog-check + tests all pass. +Fix any `ruff check --fix` / `ruff format` / `ty check` findings inline. + +- [ ] **Step 6: E2E (if creds available)** + +Run: `make test-e2e` +Expected: PASS or clean CF-disabled skip. + +- [ ] **Step 7: Commit** + +```bash +git add README.md pyproject.toml src/keboola_agent_cli/changelog.py plugins/kbagent/.claude-plugin/ .claude-plugin/ +git commit -m "release: 0.56.0 -- conditional flow support, drop orchestrator" +``` + +--- + +## Final verification + +- [ ] Run the full unit suite: `uv run pytest tests/ -m "not e2e" -v` — all green. +- [ ] `grep -rn "INVALID_FLOW_DAG\|_validate_dag\|dependsOn\|ORCHESTRATOR_COMPONENTS\|FLOW_COMPONENT_IDS" src/ tests/ plugins/ docs/` returns **no production references** (historical changelog text is acceptable). +- [ ] `grep -rn "\-\-component-id" src/keboola_agent_cli/commands/flow.py src/keboola_agent_cli/server/routers/flows.py` returns nothing. +- [ ] `uv run kbagent flow schema | grep goto` and `uv run kbagent flow schema --full | grep '\$schema'` both succeed. +- [ ] `make check` passes. diff --git a/docs/superpowers/specs/2026-06-04-conditional-flow-support-design.md b/docs/superpowers/specs/2026-06-04-conditional-flow-support-design.md new file mode 100644 index 00000000..77e3bae1 --- /dev/null +++ b/docs/superpowers/specs/2026-06-04-conditional-flow-support-design.md @@ -0,0 +1,328 @@ +# Design: Conditional Flow (`keboola.flow`) support in kbagent — drop `keboola.orchestrator` + +**Linear issue:** AJDA-2813 "CF: add support in new CLI" +**Target release:** 0.56.0 (one breaking release) +**Status:** design approved (subagent-driven; decisions recorded below) +**Date:** 2026-06-04 + +## Design revision (2026-06-04) — schema is fetched live, not bundled + +This supersedes decision **D3** (and the related D12 / §4.1 / §4.6 "bundled +schema" wording). The conditional-flow JSON Schema is **no longer +bundled/vendored** in the CLI. It is fetched at **runtime** from the stack's +component registry. + +- **Source:** AI Service `/docs/components/keboola.flow` → + `ComponentDetail.configuration_schema`, via the existing `AiServiceClient` + (the same path `config new --push` already uses for schema validation). + Verified live that both the Storage API component index and the AI Service + serve the full CF schema; the AI Service path was chosen for DI symmetry with + `component_service` / `config_service`. +- **Why:** a bundled schema drifts the moment upstream changes and forces a + re-vendor + SHA bump (the original D3). Fetching live guarantees the validator + always matches the stack the user is actually talking to, removes the private + upstream repo from the loop, and eliminates the wheel-packaging surface. +- **`flow_validation.py` stays pure:** `validate_conditional_flow(phases, tasks, + schema=None)` takes the schema as an explicit optional parameter. Structural + (Draft7) validation runs only when a schema is supplied; the semantic checks + always run. No network calls in this module. +- **Graceful degradation:** `FlowService.create_flow`/`update_flow` fetch the + live schema before validating. On fetch failure (network, `KeboolaApiError`, + or empty/missing schema) the write is **not** blocked — structural validation + is skipped, semantic checks still run (Storage does not validate flow configs + server-side), and a `structural schema validation skipped: ` warning is + surfaced. A real validation error still rejects with `INVALID_FLOW_DEFINITION`. +- **CLI surface:** `flow validate` gains optional `--project ALIAS` (fetch live + schema → full validation; without it, semantic-only + an explicit note). + `flow schema --full` now **requires `--project`** (fetches/dumps the live + schema); plain `flow schema` stays the offline YAML template. + +The sections below are kept for historical context; where they say "bundled", +"vendored", "pinned SHA", or `resources/conditional-flow-schema.json`, read the +revision above instead. + +## 1. Problem + +The `kbagent flow` command group treats `keboola.flow` as if it shared the legacy +`keboola.orchestrator` shape (a `dependsOn`-based phase DAG). In reality +`keboola.flow` **is** the Conditional Flow (CF) component, with a completely +different configuration schema: + +- `phases[]` carry `next[]` transitions with optional `condition` objects and a + `goto` target (another phase id or `null` = end the flow); plus optional + per-phase `retry`. +- `tasks[]` are typed: `task.type` ∈ {`job`, `notification`, `variable`}; each + task references its phase via `task.phase`. +- Conditions form a recursive grammar (`const`/`phase`/`task`/`variable`/ + `operator`/`function`/`array`). + +Today the CLI: + +- `flow schema` prints a `dependsOn` template **labeled** `keboola.flow` — wrong. +- `_validate_dag` (services/flow_service.py:47) validates a `dependsOn` graph + that does not exist in CF; it passes trivially and checks nothing real + (`next`/`goto` never validated). +- Defaults are inconsistent: `flow new` defaults to `keboola.flow`, while + `detail`/`update`/`delete`/`schedule`/`schedule-remove` default to + `keboola.orchestrator` (a documented gotcha). +- Tests and `flow-workflow.md` cement the wrong contract. + +**Goal:** rewrite the flow surface for `keboola.flow` (Conditional Flows) with +the correct schema and validation, and **drop `keboola.orchestrator` support +entirely** (the old CLI is being deprecated). + +## 2. Source of truth (grounding) + +The CF JSON Schema lives in the **private** repo `keboola/job-queue-daemon`, +file `docs/flow-schema.json` (JSON Schema draft-07, maintained by the engine +that executes conditional flows). + +- The public raw URL 404s (private repo); the file is reachable via + `gh api repos/keboola/job-queue-daemon/contents/docs/flow-schema.json` with + the maintainer's GitHub credentials. +- **Pinned commit SHA:** `24176de2ec1098e0a4be278815e0ca57a93cc93d` + (committed 2026-05-26). This SHA is recorded in the loader header comment and + in the gotchas log. + +### Verified schema shapes (read from the pinned schema, 21 KB) + +- Top-level `required`: `["phases", "tasks"]`. +- **`phases[]`**: required `["id", "name"]`; props `id, name, retry?, next?, description?`. + - `next[]`: required `["id", "goto"]`; props `id, name?, condition?, goto`. + - `goto`: `type: ["string", "null"]` — target phase id or `null` (end flow). +- **`tasks[]`**: required `["id", "name", "task", "phase"]`; props `id, name, phase, task, enabled?`. + - `task` is a `oneOf` over three typed shapes: + - `job`: required `type=job, componentId, mode` + `anyOf(configId|configData)`. + - `notification`: notification task shape. + - `variable`: variable task shape. +- **IDs are STRINGS, not integers.** `phase.id`, `task.id`, `next.id`, + `task.phase`, `goto` are all `type: "string"` (or `["string","null"]` for + `goto`). **This contradicts the issue text and the current code, which assume + integer phase ids.** The new code, template, fixtures, and docs MUST use + string ids. +- **Conditions** (`#/definitions/conditionObject` = `oneOf` of): + - `constantCondition`: `type ∈ {const, constant}`, `value`. + - `phaseCondition`: `type=phase`, `phase`, `value`. + - `taskCondition`: `type=task`, `task`, `value`. + - `variableCondition`: `type=variable`, `value`. + - `operatorCondition`: `type=operator`, `operator`, `operands`. Two variants: + - relational/logical: `operator ∈ {AND, OR, EQUALS, NOT_EQUALS, GREATER_THAN, LESS_THAN, INCLUDES, CONTAINS}`. + - phase-scoped: requires `operator + phase + operands`, `operator ∈ {ALL_TASKS_IN_PHASE, ANY_TASKS_IN_PHASE}`. + - `functionCondition`: `type=function`, `function ∈ {COUNT, DATE}`, `operands`. + - `arrayCondition`: `type=array`, `operands`. +- `retryConfiguration`: `retryOn?`, `strategy ∈ {linear}`, `strategyParams?`. + +## 3. Decisions (open questions + judgment calls) + +These were decided by the implementing subagent (no interactive user); the user +should review them. + +| # | Question | Decision | Rationale | +|---|----------|----------|-----------| +| D1 | Feature-gate preflight for CF-disabled projects? | **Error-mapping only** (adopt issue proposal 1). No proactive `conditional_flows` feature check. | YAGNI; one fewer API round-trip; the Storage API already rejects with a clear-enough error we can map. A proactive check would duplicate engine logic and drift. | +| D2 | Legacy-flow UX in `flow list`? | **Warning only** via `legacy_orchestrator_count` (adopt proposal 2). No `--legacy` escape hatch. | YAGNI; orchestrator is deprecated. Surfacing a count tells users why a flow "disappeared" without re-introducing legacy plumbing. | +| D3 | Schema drift vs upstream? | **Pin SHA + gotcha note for v1** (adopt proposal 3). No CI freshness check. | The upstream repo is private, so a CI fetch would need a token in CI; not worth it for v1. A follow-up issue can add a freshness job if drift bites. | +| D4 | **ID type: integer or string?** | **String ids everywhere** (overrides the issue's integer assumption). | Grounded in the pinned schema: all ids are `type: "string"`. Using ints would fail Draft7 validation and produce configs the engine rejects. Recorded as a deviation from the issue text. | +| D5 | Cycle detection over `goto` edges? | **No cycle detection.** Unreachable phases = **warning**, not error. | The issue is explicit: `goto` loops are legal at runtime. Reachability is computed by a forward graph walk from the entry phase. | +| D6 | What is the "entry phase" for reachability? | **The first phase in `phases[]`** (document this). | The schema has no explicit entry marker; engine convention is array order. Keep it simple and documented; reachability is a warning only, so a wrong guess is non-fatal. | +| D7 | `flow validate` exit code on validation failure? | **Exit 2** (usage/validation), matching the issue and existing `VALIDATION_ERROR` convention in flow.py. Exit 0 on success. | Consistent with how `_load_flow_yaml` failures already exit 2. | +| D8 | Does `flow validate` hit the network? | **No.** Pure offline: parse YAML/JSON → schema + semantic validation. No project/branch required, no `--project`. | Lets agents run a tight validate-before-push loop with zero credentials/latency. | +| D9 | `operatorCondition` arity enforcement vs schema. | Enforce as **semantic checks** layered on top of Draft7 structural validation: EQUALS/NOT_EQUALS/GREATER_THAN/LESS_THAN/INCLUDES/CONTAINS = 2 operands; AND/OR ≥ 1; COUNT/DATE (functionCondition) = 1; ALL/ANY_TASKS_IN_PHASE require `phase`. | The schema cannot express per-operator operand counts; these are the issue's explicit arity rules, refined to match the verified enums. | +| D10 | Behavior of `update_flow` validation. | Validation runs on the **merged** result (fetch current body when only one of phases/tasks supplied), preserving today's merge-aware behavior. | Matches issue Phase 2 and current code; avoids validating a half-config. | +| D11 | `flow detail` JSON output. | **Full-body passthrough unchanged.** Only the **human** rendering is rewritten (per-phase transitions, task-type badges, retry). | Stable machine contract; agents already consume the raw body. | +| D12 | `flow schema --full`. | Add `--full` to dump the **bundled JSON schema verbatim**; default prints the YAML template. JSON mode (`--json`) of `--full` returns the parsed schema object. | Agents need the exact contract; humans need a copy-paste template. | +| D13 | Removing `INVALID_FLOW_DAG` from `ErrorCode`. | **Remove** it and add `INVALID_FLOW_DEFINITION`. Grep confirmed references are only in this repo (errors.py, flow_service.py, changelog.py history, tests, docs) — no external wire consumers known. | Per coding-convention note "renaming/removing a code = major bump"; we accept this as part of the single 0.56.0 breaking release and changelog it loudly. | +| D14 | `component_id` on REST models. | **Drop** `component_id` from `FlowCreate`/`FlowUpdate`/`FlowSchedule` and from query params on `detail`/`delete`/`list_schedules`/`remove_schedule`. Keep URL paths stable. | Issue Phase 4; CF is the only component now. | +| D15 | Service signatures. | **Remove** `component_id` from all 8 service methods; hardcode `FLOW_COMPONENT_ID = "keboola.flow"`. Scheduler `target.componentId` is always `keboola.flow`. | Issue Phase 2. Reduces a whole class of "wrong default component" bugs. | +| D16 | `notification` / `variable` task validation depth. | Rely on Draft7 structural validation for their internal shape; semantic layer only checks the cross-cutting rules (unique ids, phase refs, enabled-task-per-phase). | The schema already encodes their structure; re-implementing it in Python would drift. | + +## 4. Architecture + +Follows the repo's 3-layer design (CLI → service → client) plus a new pure +validation module. + +``` +commands/flow.py (LAYER 1) thin Typer: 9 subcommands (8 existing + new `validate`) +services/flow_service.py(LAYER 2) CRUD/schedule; single component; calls validation +services/flow_validation.py (NEW) pure functions: schema load + structural + semantic +src/keboola_agent_cli/resources/conditional-flow-schema.json (NEW) bundled schema +server/routers/flows.py REST mirror (component_id dropped) +``` + +### 4.1 New module: `services/flow_validation.py` + +Pure, dependency-light (only `jsonschema` + `importlib.resources`), **no HTTP, +no ConfigStore** — trivially unit-testable. Public surface: + +- `load_conditional_flow_schema() -> dict` — loads the bundled JSON via + `importlib.resources.files("keboola_agent_cli.resources")`, cached with + `functools.lru_cache`. Header comment names the upstream repo + pinned SHA. +- `validate_conditional_flow(phases: list[dict], tasks: list[dict]) -> list[str]` + — returns a flat list of human-readable error strings (empty = valid). + - **Structural:** build the document `{"phases": phases, "tasks": tasks}` and + run `jsonschema.Draft7Validator(schema).iter_errors(doc)` — collect **all** + errors (not first-fail), each rendered with its JSON path. + - **Semantic** (only runs if structural passed, to avoid cascade noise): + - unique phase ids; unique task ids; + - every `task.phase` references an existing phase id; + - every `next[].goto` is an existing phase id **or** `null`; + - a phase whose `next[]` contains any conditional transition MUST end with a + default (condition-less) transition (the last `next` item, `goto` may be a + phase id or `null`); + - every phase has ≥1 **enabled** task (`enabled` defaults true); + - operator/function operand-arity (D9). +- `find_unreachable_phases(phases) -> list[str]` — forward BFS over `next[].goto` + edges from the entry phase (first phase, D6); phases never visited are + returned as **warnings** (surfaced separately, never block writes). + +`validate_conditional_flow` returns errors; reachability is computed separately +so callers can treat it as a warning. **No cycle detection** (D5). + +### 4.2 `services/flow_service.py` changes + +- `FLOW_COMPONENT_IDS: tuple` → `FLOW_COMPONENT_ID = "keboola.flow"`. +- Delete `_validate_dag`; import `validate_conditional_flow` / + `find_unreachable_phases` from `flow_validation`. +- Remove `component_id` param from `list_flows`, `get_flow_detail`, + `create_flow`, `update_flow`, `delete_flow`, `list_flow_schedules`, + `set_flow_schedule`, `remove_flow_schedule`. Scheduler `target.componentId` is + hardcoded to `keboola.flow`. +- `create_flow` / `update_flow`: run `validate_conditional_flow` on the + (merged, for update) phases+tasks; on errors raise + `KeboolaApiError(error_code=ErrorCode.INVALID_FLOW_DEFINITION, status_code=400)` + with all messages joined. Unreachable-phase warnings are attached to the + returned dict (`warnings: [...]`), not raised. +- `list_flows`: single-component listing; additionally count + `keboola.orchestrator` configs per project (still a `list_component_configs` + call, 404 → 0) and return `legacy_orchestrator_count` (per project + total) so + the CLI can warn (D2). The orchestrator configs themselves are **not** added + to the `flows` array. +- Error mapping: surface the CF-disabled project error + (`conditional_flows=false`) as a clear, actionable message (D1). + +### 4.3 `commands/flow.py` changes + +- Drop `--component-id` from all 8 subcommands; drop `_FLOW_COMPONENT_CHOICES`. +- Replace `_FLOW_SCHEMA` with a CF YAML template (string ids) demonstrating: 2 + phases with a conditional `next` (`ANY_TASKS_IN_PHASE` failure check + default + transition), a `job` task with `retry`, a `notification` task, a `variable` + task. +- `flow schema [--full]` (D12): default prints the YAML template; `--full` + dumps the bundled JSON schema verbatim (rich JSON syntax for humans, parsed + object for `--json`). +- **New** `flow validate (--file @flow.yaml | -)` (D7, D8): offline validation; + loads YAML/JSON, calls `validate_conditional_flow` + `find_unreachable_phases`; + exit 0 if no errors (warnings still printed), exit 2 if errors; `--json` lists + `{errors: [...], warnings: [...], valid: bool}`. +- `flow detail` human rendering rewrite (D11): per-phase transition list + (`→ goto [condition summary | default]`), task-type badges + (`job`/`notification`/`variable`), retry info. JSON unchanged. +- `flow list` human + JSON: surface `legacy_orchestrator_count` as a + `formatter.warning` line (human) and a key in the JSON payload. + +### 4.4 `permissions.py` + +Add `"flow.validate": "read"` to the operation registry (next to the other +`flow.*` entries). + +### 4.5 REST (`server/routers/flows.py`) + +Drop `component_id` from `FlowCreate`/`FlowUpdate`/`FlowSchedule` and from the +query params on `detail`/`delete`/`list_schedules`/`remove_schedule`; drop the +now-unused `DEFAULT_FLOW_COMPONENT` plumbing. Paths unchanged (D14). No `validate` +REST endpoint in v1 (offline CLI-only; can be added later if needed — YAGNI). + +### 4.6 Packaging + +The bundled JSON must actually ship in the wheel (same class of problem as +`_ui_dist`). Since `conditional-flow-schema.json` lives **inside** the package +tree (`src/keboola_agent_cli/resources/`) and is **not** gitignored, hatchling's +default wheel collection includes it — **no `force-include` needed**. The plan +must nonetheless **verify** this by building the wheel and asserting the JSON is +present (`unzip -l dist/*.whl | grep conditional-flow-schema.json`), and add the +`resources/` dir to the sdist `include` list if not already covered by `src/` +(it is — sdist includes `src/`). A unit test also calls +`load_conditional_flow_schema()` to catch a missing-resource regression. + +## 5. Data flow + +``` +flow new --file @flow.yaml + → commands/flow._load_flow_yaml → {phases, tasks} + → FlowService.create_flow + → flow_validation.validate_conditional_flow (schema + semantic) [errors → INVALID_FLOW_DEFINITION] + → flow_validation.find_unreachable_phases (warnings) + → client.create_config(component_id="keboola.flow", {phases, tasks}) + → result {id, name, warnings?} + +flow validate --file @flow.yaml (offline) + → _load_flow_yaml → validate_conditional_flow + find_unreachable_phases + → exit 0 (+warnings) | exit 2 (errors) +``` + +## 6. Error handling + +- Structural + semantic validation failures → + `INVALID_FLOW_DEFINITION` (replaces `INVALID_FLOW_DAG`), status 400, + non-retryable, all messages joined. +- CF-disabled project → mapped to an actionable message (D1); E2E skips when a + project reports `conditional_flows=false`. +- YAML/JSON parse errors → `VALIDATION_ERROR`, exit 2 (existing behavior). +- Reachability issues → **warnings**, never block. + +## 7. Testing + +- **`tests/test_flow_validation.py` (new):** valid CF fixture; missing default + transition; unknown `goto`; task → missing phase; phase with no enabled task; + duplicate phase/task ids; operand-arity violations (each operator class); + notification + variable task shapes; unreachable-phase warning; `goto` loop is + **legal** (no error); string-id fixtures only. +- **`tests/test_flow_service.py` (rewrite):** CF payloads, no `component_id` + args, `INVALID_FLOW_DEFINITION`, `legacy_orchestrator_count`, merge-aware + update validation. Remove all `dependsOn` fixtures. +- **`tests/test_flow_cli.py` (rewrite):** new `flow validate`, `flow schema + --full`, dropped `--component-id`, detail human rendering. Remove `dependsOn`. +- **`tests/test_e2e.py`:** full round-trip create → detail → update → schedule → + schedule-remove → delete + `flow validate` against a CF-enabled project; skip + with a clear reason when `conditional_flows=false`. +- A unit assertion that `load_conditional_flow_schema()` succeeds (packaging + regression guard). + +## 8. Cleanup sweep + +- Delete dead `ORCHESTRATOR_COMPONENTS` from `sync/config_format.py` (verified + unused repo-wide). +- `services/component_service.py`: `_FLOW_COMPONENT_IDS` / + `_build_flow_config_yml` still reference orchestrator for the `config new` + scaffold path — update the flow scaffold to emit a CF skeleton (string ids, + `phases`/`tasks` with a `job` task) and default component `keboola.flow`; + remove the orchestrator default. +- Repo-wide `keboola.orchestrator` grep: update comments/help in `context.py`, + `commands/flow.py`, docs. The sync engine itself is unaffected (flow payload + round-trips via `_configuration_extra`). + +## 9. Docs & plugin sync (convention #17 — all mandatory, silent-drift) + +`CLAUDE.md` `## All CLI Commands` flow block; `commands/context.py` +`AGENT_CONTEXT`; `plugins/kbagent/agents/keboola-expert.md` (version gate + tool +matrix); `SKILL.md` + `references/commands-reference.md`; full rewrite of +`references/flow-workflow.md` (CF template, conditions cookbook, validate-before-push +loop, `job run --component-id keboola.flow` to execute); `references/gotchas.md` +new entries tagged `(since v0.56.0)` (orchestrator dropped, `--component-id` +removed, old `dependsOn` template invalid, `INVALID_FLOW_DAG` → +`INVALID_FLOW_DEFINITION`, **string ids**), and mark the old default-component +gotcha resolved; `README.md` if flows mentioned. + +## 10. Release + +Bump `pyproject.toml` → `0.56.0`; add `changelog.py` entry with an explicit +**breaking-change** callout (orchestrator dropped, `--component-id` removed, +`INVALID_FLOW_DAG` → `INVALID_FLOW_DEFINITION`, CF schema validation, string +ids); `make version-sync`; `make check`; `make test-e2e`. + +## 11. Out of scope + +- Migration tooling `keboola.orchestrator` → `keboola.flow`. +- A `flow run` command (CF executes via `job run --component-id keboola.flow`). +- CF awareness in `lineage` / `schedule find` beyond existing passthrough. +- A `validate` REST endpoint (offline CLI only for v1). diff --git a/plugins/kbagent/agents/keboola-expert.md b/plugins/kbagent/agents/keboola-expert.md index 85d0cc90..f4e9eadb 100644 --- a/plugins/kbagent/agents/keboola-expert.md +++ b/plugins/kbagent/agents/keboola-expert.md @@ -83,7 +83,7 @@ a critical failure. | User intent | First choice | Fallback | NEVER | |---|---|---|---| -| Update flow (rename, description, phases) | `kbagent flow update` (partial, no `--file`) | `--file` after fetching current phases, merging locally, passing full YAML | `tool call update_flow` (strips `behavior.onError` pre-MCP v1.60); partial `--file` that drops fields | +| Author / edit a conditional flow (keboola.flow) | `kbagent flow validate --file @flow.yaml --project ALIAS` (fetches live schema; loop until clean) then `kbagent flow new`/`flow update --file` | fetch `flow detail`, merge phases/tasks locally, re-validate, push | `--component-id` (removed 0.56.0); integer ids (ids are STRINGS); `dependsOn` (use `next[].goto` + conditions); `keboola.orchestrator` (dropped 0.56.0); assuming `flow schema --full` works offline (now needs `--project`) | | Schedule flow | `kbagent flow schedule --cron ... [--timezone]` | `tool call create_flow_schedule` | raw REST to `/storage/configurations/keboola.scheduler` | | Create Snowflake transformation | `kbagent config new --component-id keboola.snowflake-transformation --name N --project P --push --no-files` (0.33.0+; one-shot, no scaffold, body defaults to `{}` and validation auto-skips for empty shell -- then `config update --set ...` to fill in script) **or** `kbagent config new --component-id keboola.snowflake-transformation --project P --output-dir D` + `config update --set ...` (scaffold-then-patch) | `tool call create_sql_transformation` (lower schema, avoids the MCP `create_config` Snowflake refusal) | `tool call create_config` (refuses keboola.snowflake-transformation) -- note: `config new --push` does NOT inherit this refusal because it wraps the raw Storage API directly | | Update SQL transformation body (script[]) | `kbagent config update --project P --component-id keboola.snowflake-transformation --config-id K --configuration @body.json` (0.28.0+ auto-normalizes string `script` to array; SQL gets statement-level split, Python/R gets `[script]` wrap; envelope's `normalizations: [...]` records every change. 0.31.0+ also re-splits multi-statement LIST elements -- closes the #274 ODBC `statement count 2 vs desired 1` crash that survives the 0.28.0 string fix) | -- | `tool call update_sql_transformation` -- still vulnerable to BOTH the #245 string-vs-array AND #274 list-element runtime crashes because it pushes raw to Storage API; raw `PUT /v2/storage/components/.../configs/...` -- same trap | @@ -161,9 +161,20 @@ read it when a trigger fires. Each `(X.Y.Z+)` tag is the version floor. **Flow / config edits** -- **Flow phase `behavior.onError`**: `flow update --file` is a **full-replace** -- - omitting `behavior` on a phase silently drops it. Fetch `flow detail` first, - merge locally, then push. Integer-vs-string phase IDs are irrelevant to this. +- **Conditional flows only (since 0.56.0)**: `flow` targets `keboola.flow`; + `keboola.orchestrator` is dropped and `--component-id` is removed from every + `flow` subcommand. IDs are **strings**; phases use `next[].goto` (a phase id or + `null` to end) + optional `condition`; tasks are typed (`job`/`notification`/ + `variable`). The old `dependsOn` template is invalid. `flow new`/`flow update` + validate against the **live** CF schema fetched at runtime from the stack + (AI Service `configurationSchema` for `keboola.flow`; NOT bundled) and reject + bad bodies with `INVALID_FLOW_DEFINITION`. If the schema fetch fails + (network/empty), the write is NOT blocked: structural validation is skipped, + semantic checks still run, and a `structural schema validation skipped` + warning is surfaced. `flow update --file` is still a **full-replace** of + phases+tasks -- fetch `flow detail` first, merge locally, run + `flow validate --file @merged.yaml --project ALIAS` (full schema) until clean, + then push. - **Snowflake transformation scaffolding**: MCP `create_config` REFUSES `keboola.snowflake-transformation`. Use `config new --push --no-files` (0.33.0+) or `config new --output-dir` then `config update`, or MCP @@ -306,20 +317,28 @@ Do NOT proceed to step 4 without explicit go-ahead. Do NOT bolt a `job run` onto the end -- that is a SEPARATE turn with a SEPARATE confirmation. -### 4.3 Flow structural edit +### 4.3 Conditional-flow structural edit (keboola.flow) ``` -# 1. Fetch current full YAML +# 1. Fetch current full body (phases use next[].goto + conditions; tasks are typed) kbagent --json flow detail --project P --flow-id F > /tmp/flow-current.json -# 2. Build merged YAML locally (preserve behavior.onError, description, phases you're not touching) +# 2. Build merged YAML locally (string ids; preserve description + phases/tasks +# you're not touching -- flow update --file is a FULL replace of phases+tasks) -# 3. Dry-run is NOT supported by flow update; instead verify your merged YAML -# echoes the expected structure, then apply: +# 3. Validate before pushing; loop until clean. Pass --project to fetch the +# LIVE schema from the stack for full structural + semantic validation +# (without --project only semantic checks run + a "skipped" note): +kbagent --json flow validate --file @/tmp/flow-merged.yaml --project P + +# 4. Apply. flow update fetches the live schema and validates on write +# (INVALID_FLOW_DEFINITION on a bad body; a schema-fetch failure degrades to +# semantic-only + a warning -- it never blocks the write): kbagent --json flow update --project P --flow-id F --file @/tmp/flow-merged.yaml -# 4. Fetch again and verify +# 5. Fetch again and verify; execute the flow with: kbagent --json flow detail --project P --flow-id F +kbagent --json job run --project P --component-id keboola.flow --config-id F --wait ``` ### 4.4 Workspace-based SQL debugging @@ -371,11 +390,25 @@ kbagent sync push --project dest → Look up the native `kbagent ` equivalent in §2. If it exists, switch to it; otherwise use the `kbagent serve` REST API. -- `update_flow` returned success but verification shows - `behavior.onError = None` on phases that had it before: - → You likely used MCP `tool call update_flow` or `--file` without - merging. Re-fetch current detail, merge behavior back in, push via - `kbagent flow update --file` (native). +- `flow new`/`flow update` failed with `INVALID_FLOW_DEFINITION`: + → The body failed schema/semantic validation. Run + `kbagent flow validate --file @flow.yaml --project ALIAS` to fetch the + live schema and see every error (string ids? `next[].goto` targets exist? + each phase has an enabled task? conditional transitions end with a + default?). Fix and re-push. + +- `flow new`/`flow update`/`flow validate` warns + `structural schema validation skipped: ...`: + → The live CF schema could not be fetched from the stack (network, or the + AI Service returned no `configurationSchema`). This is NOT an error: only + structural checks were skipped; semantic checks still ran. The write + proceeded. Re-run when the stack is reachable for full structural coverage. + +- `flow update` returned success but a phase/task you didn't touch + vanished: + → `flow update --file` is a FULL replace of phases+tasks. Re-fetch + `flow detail`, merge the missing items back in locally, run + `flow validate`, then push again. - `403 Forbidden` on a write: → Check: `kbagent permissions show`. If the active policy denies diff --git a/plugins/kbagent/skills/kbagent/SKILL.md b/plugins/kbagent/skills/kbagent/SKILL.md index 7e11658c..73250927 100644 --- a/plugins/kbagent/skills/kbagent/SKILL.md +++ b/plugins/kbagent/skills/kbagent/SKILL.md @@ -229,12 +229,13 @@ When working inside a git repository or project directory, run `kbagent init` (o | Check whether the configured token can use Kai (master token + AI Agent Chat) | `kbagent kai preflight` | | Fetch the full message history of a single Kai chat | `kbagent kai chat-detail --chat-id CHAT-ID` | | List recent Kai chat sessions | `kbagent kai history` | -| List all flows (keboola.orchestrator + keboola.flow) across projects | `kbagent flow list` | -| Show detailed flow information including phases and tasks | `kbagent flow detail --project PROJECT --flow-id FLOW-ID` | -| Print the YAML format expected by 'flow new' and 'flow update' | `kbagent flow schema` | -| Create a new flow configuration | `kbagent flow new --project PROJECT --name NAME` | +| List conditional flows (keboola.flow) across projects | `kbagent flow list` | +| Show detailed conditional-flow information including phases and tasks | `kbagent flow detail --project PROJECT --flow-id FLOW-ID` | +| Print the conditional-flow YAML template, or --full for the live JSON Schema | `kbagent flow schema` | +| Validate a conditional-flow definition (schema + semantic checks) | `kbagent flow validate --file FILE` | +| Create a new conditional-flow (keboola.flow) configuration | `kbagent flow new --project PROJECT --name NAME` | | Update a flow's name, description, or phases/tasks | `kbagent flow update --project PROJECT --flow-id FLOW-ID` | -| Delete a flow configuration | `kbagent flow delete --project PROJECT --flow-id FLOW-ID` | +| Delete a conditional-flow (keboola.flow) configuration | `kbagent flow delete --project PROJECT --flow-id FLOW-ID` | | Bind a cron schedule to a flow (upsert: creates or updates) | `kbagent flow schedule --project PROJECT --flow-id FLOW-ID --cron CRON` | | Remove all schedules bound to a flow (deletes keboola.scheduler configs) | `kbagent flow schedule-remove --project PROJECT --flow-id FLOW-ID` | | List cron schedules (keboola.scheduler configs) across projects | `kbagent schedule list` | diff --git a/plugins/kbagent/skills/kbagent/references/commands-reference.md b/plugins/kbagent/skills/kbagent/references/commands-reference.md index b2f3dc23..99f92011 100644 --- a/plugins/kbagent/skills/kbagent/references/commands-reference.md +++ b/plugins/kbagent/skills/kbagent/references/commands-reference.md @@ -203,15 +203,17 @@ Requires the project to be added with its **master ('owner') Storage API token** - `kai chat-detail --chat-id ID [--project NAME]` -- fetch full transcript of one chat as a flat `[{role, content, created_at}]` list. Tool calls and non-text parts skipped. Use to restore / export a conversation - `kai history [--project NAME] [--limit N]` -- list recent Kai chat sessions (default limit: 10) -## Flows (Orchestrator) -- `flow list [--project NAME] [--branch ID] [--with-schedules]` -- list all flows (keboola.orchestrator + keboola.flow) across one or all projects. `--with-schedules` enriches each row with `schedules: [{schedule_id, cron, timezone, enabled}, ...]` via one extra keboola.scheduler list call per project (not per flow) -- `flow detail --project NAME --flow-id ID [--component-id keboola.orchestrator|keboola.flow] [--branch ID]` -- full phase/task breakdown; groups tasks by phase, lists orphan tasks -- `flow schema` -- print YAML template for flow configuration (phases + tasks); use with `--file @-` or save to a file -- `flow new --project NAME --name NAME [--component-id keboola.orchestrator|keboola.flow] [--description D] [--file @path.yaml|-|JSON] [--branch ID]` -- create a flow; DAG validated before API call; default component: keboola.flow -- `flow update --project NAME --flow-id ID [--component-id ID] [--name N] [--description D] [--file @path.yaml|-|JSON] [--branch ID]` -- update name, description, or phases/tasks; requires at least one of --name/--description/--file -- `flow delete --project NAME --flow-id ID [--component-id ID] [--branch ID] [--yes]` -- delete a flow config (confirmation guard) -- `flow schedule --project NAME --flow-id ID --cron "0 6 * * *" [--component-id ID] [--timezone TZ] [--disabled] [--branch ID]` -- attach a cron schedule (stored as keboola.scheduler config); replaces any existing schedule -- `flow schedule-remove --project NAME --flow-id ID [--component-id ID] [--branch ID] [--yes]` -- remove all cron schedules attached to a flow; idempotent +## Flows (Conditional Flows -- keboola.flow only) +> Since 0.56.0 the `flow` group targets `keboola.flow` (Conditional Flows) ONLY; `keboola.orchestrator` is dropped and `--component-id` is removed from every subcommand. IDs are **strings**; phases use `next[].goto` (a phase id or `null`) + optional `condition`; tasks are typed (`job`/`notification`/`variable`). The old `dependsOn` template is invalid. Execute a flow with `kbagent job run --component-id keboola.flow --config-id ID`. See `flow-workflow.md`. +- `flow list [--project NAME] [--branch ID] [--with-schedules]` -- list conditional flows (keboola.flow) across one or all projects. Legacy keboola.orchestrator configs are NOT listed; their total appears as `legacy_orchestrator_count` (+ a warning). `--with-schedules` enriches each row with `schedules: [{schedule_id, cron, timezone, enabled}, ...]` via one extra keboola.scheduler list call per project (not per flow) +- `flow detail --project NAME --flow-id ID [--branch ID]` -- full phase/task breakdown; per-phase transitions (`→ goto [condition | default]`), typed-task badges, retry info; JSON is the raw body unchanged +- `flow schema [--full --project NAME]` -- plain form prints the offline conditional-flow YAML template (string ids, `next[].goto`, typed tasks). `--full` fetches and dumps the **live** JSON Schema from the stack (AI Service `configurationSchema` for `keboola.flow`) and **requires `--project`** -- the schema is no longer bundled +- `flow validate --file @path.yaml|- [--project NAME]` -- validate a definition. With `--project`: fetch the live schema from the stack for full structural + semantic validation (a fetch failure degrades to semantic-only + a note). Without `--project`: semantic-only validation + a note that structural validation was skipped (no schema source). Exit 0 valid (warnings still printed), exit 2 on errors; `--json` lists `{valid, errors, warnings, notes}` +- `flow new --project NAME --name NAME [--description D] [--file @path.yaml|-|JSON] [--branch ID]` -- create a conditional flow; validated against the **live** CF schema fetched from the stack before the API call (`INVALID_FLOW_DEFINITION` on failure). A schema-fetch failure does NOT block the write: structural check skipped, semantic checks still run, a `structural schema validation skipped` warning is surfaced +- `flow update --project NAME --flow-id ID [--name N] [--description D] [--file @path.yaml|-|JSON] [--branch ID]` -- update name, description, or phases/tasks; `--file` is a full-replace of phases+tasks; merge-aware validation against the live CF schema (same graceful semantic-only degradation on fetch failure); requires at least one of --name/--description/--file +- `flow delete --project NAME --flow-id ID [--branch ID] [--yes]` -- delete a flow config (confirmation guard) +- `flow schedule --project NAME --flow-id ID --cron "0 6 * * *" [--timezone TZ] [--disabled] [--branch ID]` -- attach a cron schedule (stored as keboola.scheduler config, target.componentId=keboola.flow); replaces any existing schedule +- `flow schedule-remove --project NAME --flow-id ID [--branch ID] [--yes]` -- remove all cron schedules attached to a flow; idempotent ## Schedule Discovery & Audit (Fleet-Wide) - `schedule list [--project NAME ...] [--enabled-only] [--branch ID]` -- fleet-wide list of every `keboola.scheduler` config across one, many, or all projects (parallel fan-out, no --project = all). Each row has `project_alias`, `schedule_id`, `schedule_name`, `parent_component_id`, `parent_config_id`, `parent_name`, `cron`, `timezone`, `enabled`. Answers "which configs are running on cron triggers across N projects?" without enumerating flows diff --git a/plugins/kbagent/skills/kbagent/references/flow-workflow.md b/plugins/kbagent/skills/kbagent/references/flow-workflow.md index 888073d3..f20ace74 100644 --- a/plugins/kbagent/skills/kbagent/references/flow-workflow.md +++ b/plugins/kbagent/skills/kbagent/references/flow-workflow.md @@ -1,121 +1,225 @@ -# Flow Workflow +# Flow Workflow (Conditional Flows / keboola.flow) -Flows orchestrate Keboola transformations and extractors in a directed acyclic graph (DAG) of phases and tasks. kbagent supports two flow component types: `keboola.orchestrator` (classic) and `keboola.flow` (new format). +> **Since v0.56.0:** the `flow` command group targets **`keboola.flow` +> (Conditional Flows) ONLY**. `keboola.orchestrator` support was dropped and +> `--component-id` was removed from every subcommand. The old `dependsOn` +> phase-DAG template is **invalid**. IDs are **strings**. -## Core concepts +Conditional Flows model orchestration as phases connected by **transitions** +(`next[].goto`) with optional **conditions**, plus typed **tasks**. -- **Phase**: a named stage with `id` and `dependsOn` (list of upstream phase IDs). Phases with no `dependsOn` run first. -- **Task**: a unit of work referencing a component config, assigned to a phase via `phase` field. -- **Schedule**: stored as a `keboola.scheduler` config that targets the flow; not part of the flow config itself. +## Core concepts -## Quick start: create a flow +- **Phase**: `{id (string), name, next?: [...], retry?, description?}`. The + **entry phase** is the first item in `phases[]`. +- **Transition** (`next[]` item): `{id, goto, condition?, name?}`. `goto` is a + phase id **or `null`** (= end the flow). A phase whose `next[]` contains any + conditional transition **MUST end with a default** (condition-less) transition. +- **Task**: `{id (string), name, phase, enabled?, task: {...}}`. `task.type` is + one of `job` / `notification` / `variable`. Every phase must have **≥1 enabled + task**. +- **Condition**: a recursive grammar (`const`/`phase`/`task`/`variable`/ + `operator`/`function`/`array`). Operators: `AND`/`OR` (≥1 operand), + `EQUALS`/`NOT_EQUALS`/`GREATER_THAN`/`LESS_THAN`/`INCLUDES`/`CONTAINS` + (exactly 2 operands), `ALL_TASKS_IN_PHASE`/`ANY_TASKS_IN_PHASE` (require a + `phase` field). Functions `COUNT`/`DATE` take exactly 1 operand. +- **Schedule**: stored as a `keboola.scheduler` config whose + `target.componentId` is `keboola.flow`; not part of the flow body itself. + +## Quick start: validate-before-push loop ```bash -# 1. See the template +# 1. See the template (string ids, next[].goto, typed tasks) -- offline kbagent flow schema +# Full JSON Schema (the exact contract) -- fetched live from the stack, needs --project: +kbagent flow schema --full --project PROJECT -# 2. Create a simple flow from YAML +# 2. Author flow.yaml cat > flow.yaml <<'EOF' phases: - - id: 1 - name: Extract - dependsOn: [] - - id: 2 - name: Transform - dependsOn: [1] + - id: "extract" + name: "Extract" + next: + # If any task in 'extract' failed, branch to 'notify'. + - id: "on-failure" + goto: "notify" + condition: + type: operator + operator: ANY_TASKS_IN_PHASE + phase: "extract" + operands: [] + # Default transition (NO condition) -- MUST be last. + - id: "default" + goto: "transform" + - id: "transform" + name: "Transform" + next: + - id: "done" + goto: null # end the flow + - id: "notify" + name: "Notify on failure" tasks: - - id: 1 - name: Run extractor - phase: 1 + - id: "task-extract" + name: "Run extractor" + phase: "extract" + enabled: true task: - mode: run - componentId: keboola.ex-db-snowflake + type: job + componentId: "keboola.ex-db-snowflake" configId: "123456" - - id: 2 - name: Run transformation - phase: 2 - task: mode: run - componentId: keboola.snowflake-transformation + - id: "task-transform" + name: "Run transformation" + phase: "transform" + enabled: true + task: + type: job + componentId: "keboola.snowflake-transformation" configId: "789012" + mode: run + - id: "task-notify" + name: "Email the team" + phase: "notify" + enabled: true + task: + type: notification + title: "Flow failed" + message: "The extract phase reported a failure." + recipients: + - channel: email + address: "team@example.com" EOF +# 3. Validate; loop until clean. Pass --project to fetch the LIVE schema from the +# stack for full structural + semantic validation. Without --project you get +# semantic-only checks plus a note that structural validation was skipped. +kbagent --json flow validate --file @flow.yaml --project prod + +# 4. Create. flow new fetches the live schema and validates on write; a +# schema-fetch failure degrades to semantic-only + a warning (never blocks). kbagent --json flow new --project prod --name "Daily ETL" --file @flow.yaml ``` +## Conditions cookbook + +```yaml +# A task in another phase succeeded: +condition: + type: task + task: "task-extract" + value: "success" + +# Logical AND of two checks: +condition: + type: operator + operator: AND + operands: + - { type: phase, phase: "extract", value: "success" } + - { type: variable, value: "run_full" } + +# Equality (exactly 2 operands): +condition: + type: operator + operator: EQUALS + operands: + - { type: variable, value: "env" } + - { type: const, value: "prod" } +``` + ## List and inspect flows ```bash -# All flows across all projects +# Conditional flows across all projects (legacy orchestrator configs are +# NOT listed -- their count surfaces as legacy_orchestrator_count + a warning). kbagent --json flow list # Flows in one project kbagent --json flow list --project prod -# Full phase/task breakdown +# Full phase/task breakdown (transitions, task-type badges, retry) kbagent --json flow detail --project prod --flow-id 111 # Flow-centric schedule view -- each row gets inline schedules: [...] -# One extra keboola.scheduler list call per project, NOT per flow. kbagent --json flow list --project prod --with-schedules ``` -For schedule-centric fleet-wide discovery ("which configs across N projects are on cron?"), see [schedule-workflow.md](schedule-workflow.md) and the `kbagent schedule list/detail/find` commands. The two views complement each other: - -- `flow list --with-schedules` answers "for each flow, which schedules target it?" -- `schedule list` answers "what cron schedules exist in the fleet, and what do they target?" +For schedule-centric fleet-wide discovery, see +[schedule-workflow.md](schedule-workflow.md). ## Update a flow +`flow update --file` is a **full replace** of phases+tasks. Fetch the current +body, merge locally, validate, then push. + ```bash # Rename only kbagent --json flow update --project prod --flow-id 111 --name "New Name" -# Replace phases/tasks from file (validates DAG before write) +# Replace phases/tasks from file (re-validated against the CF schema on write) +kbagent --json flow detail --project prod --flow-id 111 > current.json +# ... merge edits into updated.yaml ... +kbagent --json flow validate --file @updated.yaml --project prod kbagent --json flow update --project prod --flow-id 111 --file @updated.yaml ``` -## Schedule a flow +## Run a flow -Schedules are stored as `keboola.scheduler` configs pointing at the flow. `flow schedule` is an upsert — if a schedule already exists for the flow it is updated in-place; otherwise a new one is created. Calling it twice with different cron expressions replaces the existing schedule. +Conditional flows execute as a job on the `keboola.flow` component: ```bash -# Daily at 06:00 UTC -kbagent --json flow schedule --project prod --flow-id 111 --cron "0 6 * * *" +kbagent --json job run --project prod --component-id keboola.flow --config-id 111 --wait +``` + +## Schedule a flow -# With timezone and disabled state -kbagent --json flow schedule \ - --project prod --flow-id 111 \ - --cron "0 8 * * 1-5" \ - --timezone "Europe/Prague" \ - --disabled +`flow schedule` is an upsert (stored as a `keboola.scheduler` config targeting +`keboola.flow`). -# Remove all schedules (idempotent) +```bash +kbagent --json flow schedule --project prod --flow-id 111 --cron "0 6 * * *" +kbagent --json flow schedule --project prod --flow-id 111 \ + --cron "0 8 * * 1-5" --timezone "Europe/Prague" --disabled kbagent --json flow schedule-remove --project prod --flow-id 111 --yes ``` -For bulk audit (e.g. "which of our 14 projects have schedules that fire between 02:00-04:00?") use the dedicated [schedule-workflow.md](schedule-workflow.md) commands instead of iterating `flow list` manually. - ## Delete a flow ```bash kbagent --json flow delete --project prod --flow-id 111 --yes ``` -## DAG validation - -kbagent validates the phase graph client-side before every create/update: -- Unknown `dependsOn` phase IDs → `INVALID_FLOW_DAG` -- Tasks referencing unknown phase IDs → `INVALID_FLOW_DAG` -- Cycles in the phase graph → `INVALID_FLOW_DAG` - -The error carries a list of human-readable violation messages. - -## Component IDs - -| Component | Use case | -|---|---| -| `keboola.flow` | New projects, preferred for new flows (default for `flow new`) | -| `keboola.orchestrator` | Legacy flows; most existing orchestrations use this (default for `flow detail/update/delete/schedule`) | - -Both are fully supported. Use `--component-id` to override the default. +## Validation errors + +`flow new` / `flow update` validate the body against the **live** conditional-flow +JSON Schema (fetched at runtime from the stack) plus semantic checks; failures +raise `INVALID_FLOW_DEFINITION` with all violation messages joined. Run +`flow validate --file @flow.yaml --project PROJECT` (with `--project` to fetch the +live schema) to see them. Common causes: + +- integer ids (ids must be **strings**); +- `next[].goto` targets a non-existent phase (use `null` to end); +- a phase with conditional transitions lacks a trailing default transition; +- a task references an unknown `phase`, or a phase has no enabled task; +- operator/function operand-arity violations. + +Unreachable phases are reported as **warnings** (never block a write). `goto` +loops are legal at runtime and are NOT flagged. + +### Graceful degradation when the schema can't be fetched + +If the live schema fetch fails (network error, or the AI Service returns no +`configurationSchema` for `keboola.flow`), the write is **not** blocked: +structural validation is skipped, the semantic checks still run (the Storage API +does not validate flow configs server-side), and a +`structural schema validation skipped: ` warning is surfaced on the +result. `flow validate` without `--project` behaves the same way and adds a note +explaining there was no schema source. + +## Schema source of truth + +The conditional-flow JSON Schema is served by the **stack's component registry** +and fetched at runtime via the AI Service `configurationSchema` for +`keboola.flow` -- it is **not bundled or vendored** in the CLI. This guarantees +the validator always matches the stack the user is actually talking to. There is +nothing to re-vendor or pin; `flow schema --full --project PROJECT` prints +whatever the live stack serves. diff --git a/plugins/kbagent/skills/kbagent/references/gotchas.md b/plugins/kbagent/skills/kbagent/references/gotchas.md index 402a220c..ed6ce330 100644 --- a/plugins/kbagent/skills/kbagent/references/gotchas.md +++ b/plugins/kbagent/skills/kbagent/references/gotchas.md @@ -2097,21 +2097,46 @@ CLI hides via its four-bucket response, but they matter when interpreting result subdirectory and there is no risk of name collisions. Override with `--output DIR` if you need a custom location. -## Flow: default `--component-id` differs between commands - -- `kbagent flow new` defaults to **`keboola.flow`** (the newer format). -- `kbagent flow detail / update / delete / schedule / schedule-remove` all - default to **`keboola.orchestrator`** (the legacy format, since most - existing flows still use it). -- Consequence: if you create a flow with `flow new` and then call - `flow detail` without `--component-id`, you will get a `NOT_FOUND` error - because kbagent looks up the ID under `keboola.orchestrator`. Always pass - `--component-id keboola.flow` when round-tripping a flow you just created - via `flow new` (or, equivalently, pass `--component-id keboola.orchestrator` - on `flow new` to keep things consistent). -- `flow list` returns both component IDs and surfaces `component_id` on each - row — use it to confirm which variant a flow lives under before issuing - detail/update/delete/schedule commands. +## Flow: conditional flows only; `--component-id` removed (since v0.56.0) + +- **RESOLVED (since v0.56.0):** the old foot-gun where `flow new` defaulted to + `keboola.flow` but `flow detail/update/delete/schedule/...` defaulted to + `keboola.orchestrator` is **gone**. The `flow` group now targets the single + component `keboola.flow`, and `--component-id` has been **removed** from every + `flow` subcommand. Passing it errors with "No such option". +- **`keboola.orchestrator` is dropped (since v0.56.0).** `flow list` does NOT + list orchestrator configs; it reports their total as `legacy_orchestrator_count` + (+ a warning) so you can see why a legacy flow "disappeared". There is no + migration command (cross-component migration is out of scope). +- **IDs are STRINGS (since v0.56.0).** `phase.id`, `task.id`, `next.id`, + `task.phase`, and `goto` are all JSON strings (`goto` is `string | null`). + Integer ids fail Draft7 validation and are rejected with + `INVALID_FLOW_DEFINITION`. +- **The old `dependsOn` phase-DAG template is invalid (since v0.56.0).** Phases + use `next[].goto` (a phase id or `null` to end) with an optional `condition`; + a phase with conditional transitions must end with a default (condition-less) + transition. Tasks are typed (`job`/`notification`/`variable`). +- **`INVALID_FLOW_DAG` was renamed to `INVALID_FLOW_DEFINITION` (since v0.56.0).** + Update any code/string matching on the old error code. +- **Validation (since v0.56.0):** `kbagent flow validate --file @flow.yaml [--project ALIAS]`. + With `--project` it fetches the **live** JSON Schema from the stack and runs + full structural + semantic checks; without `--project` it runs semantic-only + and adds a note that structural validation was skipped (no schema source). + Exit 0 valid, exit 2 on errors. Use it in a tight loop before + `flow new`/`flow update`. +- **Schema is fetched live from the stack, NOT bundled (since v0.56.0).** The + conditional-flow JSON Schema is served by the stack's component registry and + read at runtime via the AI Service `configurationSchema` for `keboola.flow` + (the same path `config new --push` uses). There is nothing vendored, pinned, + or to re-sync. `flow schema --full` therefore **requires `--project`** (plain + `flow schema` is still the offline YAML template). +- **Graceful semantic-only degradation (since v0.56.0).** If the live schema + fetch fails (network error, or the AI Service returns no `configurationSchema`), + `flow new`/`flow update`/`flow validate --project` do **not** block: structural + validation is skipped, the semantic checks still run (Storage does not validate + flow configs server-side), and a `structural schema validation skipped: ` + warning/note is surfaced. A genuine `INVALID_FLOW_DEFINITION` still rejects the + write. ## `schedule find --cron-window` is an hour-field approximation @@ -2502,16 +2527,14 @@ These are Keboola-platform behaviors, not kbagent features, so they carry no `since` tag -- they hold on every kbagent version. The `keboola-expert` agent prompt keeps only a one-line trigger for each and links here for the full prose. -### Flow phase `behavior.onError` is dropped by a full-replace `--file` +### `flow update --file` is a full replace of phases + tasks -`kbagent flow update` preserves `behavior.onError` on partial updates (rename, -description only). BUT `--file` is a **full-replace** operation -- if your YAML -omits `behavior` on a phase, that field is silently dropped. For structural -edits, always fetch via `kbagent flow detail --json` first, merge your diff -locally, then push via `--file`. Same failure shape as the pre-v1.60 MCP -`update_flow` strip bug, reached via a different door. Integer-vs-string phase -IDs are irrelevant to this -- MCP accepts both, and changing ID types does NOT -make `update_flow` preserve `behavior.onError`. Don't waste retries on it. +`kbagent flow update` preserves the body on metadata-only updates (rename, +description). BUT `--file` is a **full-replace** of `phases` + `tasks` -- if your +YAML omits a phase, task, transition, or per-task `retry` that existed before, it +is silently dropped. For structural edits, always fetch via +`kbagent flow detail --json` first, merge your diff locally, run +`kbagent flow validate --file @merged.yaml --project ALIAS`, then push via `--file`. ### Primary keys on new output tables crash the first run diff --git a/src/keboola_agent_cli/changelog.py b/src/keboola_agent_cli/changelog.py index 9c55632a..140a5f42 100644 --- a/src/keboola_agent_cli/changelog.py +++ b/src/keboola_agent_cli/changelog.py @@ -8,6 +8,61 @@ # Ordered newest-first. Each value is a list of brief one-line descriptions. CHANGELOG: dict[str, list[str]] = { + "0.57.0": [ + "BREAKING (flow / conditional flows): the `flow` command group now targets " + "conditional flows (`keboola.flow`) ONLY; `keboola.orchestrator` support is " + "dropped. `--component-id` is removed from every `flow` subcommand and from " + "the `/flows` REST surface (FlowCreate/FlowUpdate/FlowSchedule models + query " + "params). `flow new`/`flow update` validate the body against the live " + "conditional-flow JSON Schema (Draft7), fetched at runtime from the stack's " + "component registry (AI Service `configurationSchema` for `keboola.flow` -- " + "never bundled/vendored), plus semantic checks -- phases use " + "`next[].goto` transitions (a phase id or `null` to end) with optional " + "`condition` objects (operator/function/phase/task/variable/const/array); " + "tasks are typed (`job`/`notification`/`variable`); **IDs are strings, not " + "integers**; a phase with conditional transitions must end with a default " + "(condition-less) transition; every phase needs >=1 enabled task; " + "operator/function operand-arity is enforced. Invalid bodies are rejected " + "with `INVALID_FLOW_DEFINITION` (replaces `INVALID_FLOW_DAG`, which is " + "removed from `ErrorCode`). When the schema fetch fails (network, " + "KeboolaApiError, or empty/missing schema) the write is NOT blocked: " + "structural validation is skipped, the semantic checks still run (the " + "Storage API does not validate flow configs server-side), and a " + "`structural schema validation skipped: ` warning is surfaced. " + "`flow validate --file @flow.yaml|- [--project ALIAS]` validates a " + "definition: with `--project` it fetches the live schema for full " + "structural + semantic validation (fetch failure degrades to semantic-only " + "+ a note); without `--project` it runs semantic-only and notes that " + "structural schema validation was skipped (no schema source). Exit 0 valid " + "/ exit 2 errors; `--json` lists `{valid, errors, warnings, notes}`. " + "`flow schema --full --project ALIAS` fetches and dumps the live JSON " + "Schema from the stack (`--full` without `--project` errors -- the schema " + "is no longer bundled); plain `flow schema` still prints the offline YAML " + "template, conditional-flow shaped. `flow detail` human rendering is " + "rewritten for conditional flows (per-phase transitions, task-type badges, " + "retry); JSON output is the raw body, unchanged. `flow list` no longer lists " + "legacy orchestrator configs -- it counts them and reports " + "`legacy_orchestrator_count` (+ a warning) so a 'disappeared' flow is " + "explained; the `Component` column is dropped (every row is keboola.flow). " + "Unreachable phases are reported as warnings (forward BFS from the first " + "phase), never blocking a write; `goto` loops are legal (no cycle detection). " + "New module `services/flow_validation.py` (pure: structural validation " + "takes an explicit optional `schema` parameter, semantic checks always run; " + "no network, no bundled schema). `config new` flow scaffold now emits a " + "conditional-flow skeleton (string ids, `phases`/`tasks`, a `job` task) and " + "defaults to `keboola.flow`; dead `ORCHESTRATOR_COMPONENTS` removed from " + "`sync/config_format.py`. New permission `flow.validate` (read). Docs/agent " + "surfaces synced: CLAUDE.md, AGENT_CONTEXT, keboola-expert.md, SKILL.md, " + "commands-reference.md, flow-workflow.md (full rewrite), gotchas.md " + "(string-ids, dropped orchestrator, removed --component-id, " + "INVALID_FLOW_DEFINITION rename; old default-component gotcha marked " + "resolved). Execute a conditional flow with " + "`kbagent job run --component-id keboola.flow --config-id ID`. Tests: " + "`tests/test_flow_validation.py` (new), `tests/test_flow_service.py` + " + "`tests/test_flow_cli.py` rewritten, `tests/test_e2e.py` flow round-trip " + "uses a CF payload + `flow validate` and skips cleanly on " + "conditional_flows=false.", + ], "0.56.0": [ "Maintenance re-release -- no code changes since 0.55.0. The `0.55.0` version number lived in " "`main` across three successive builds (#383 sync-secret audit, then #379 `semantic-layer " diff --git a/src/keboola_agent_cli/commands/context.py b/src/keboola_agent_cli/commands/context.py index 4b2b0f23..73bf7ba4 100644 --- a/src/keboola_agent_cli/commands/context.py +++ b/src/keboola_agent_cli/commands/context.py @@ -564,37 +564,52 @@ kbagent feature user-remove --project ALIAS --email EMAIL --feature NAME [--dry-run] [--yes] Per-user features (GET/POST/DELETE /manage/users/{{email}}/features). -### Flows (Orchestrator + Conditional) +### Flows (Conditional Flows -- keboola.flow only; orchestrator dropped in 0.56.0) kbagent flow list [--project NAME] [--branch ID] [--with-schedules] - List all flows (keboola.orchestrator + keboola.flow) across projects. + List conditional flows (keboola.flow) across projects. Legacy keboola.orchestrator + flows are NOT listed; their count is surfaced as legacy_orchestrator_count + a warning. --with-schedules enriches each row with {{schedule_id, cron, timezone, enabled}} entries from keboola.scheduler (one extra API call per project, NOT per flow). - kbagent flow detail --project NAME --flow-id ID [--component-id keboola.orchestrator|keboola.flow] [--branch ID] - Show phases, tasks, and full configuration. --component-id defaults to keboola.orchestrator. - - kbagent flow schema - Print the YAML format accepted by 'flow new' and 'flow update'. - - kbagent flow new --project NAME --name "Name" [--component-id keboola.orchestrator|keboola.flow] [--description D] [--file YAML|@file|-] [--branch ID] - Create a new flow. --component-id defaults to keboola.flow (newer format). - --file accepts YAML with 'phases' and 'tasks' keys. DAG is validated (acyclic, refs exist). - - kbagent flow update --project NAME --flow-id ID [--component-id ID] [--name N] [--description D] [--file YAML] [--branch ID] + kbagent flow detail --project NAME --flow-id ID [--branch ID] + Show phases, transitions (next[].goto + conditions), typed tasks, and full configuration. + + kbagent flow schema [--full --project NAME] + Plain: print the offline conditional-flow YAML template. --full fetches and dumps the + live JSON Schema from the stack (AI Service configurationSchema for keboola.flow) and + REQUIRES --project (the schema is no longer bundled). + + kbagent flow validate --file YAML|@file|- [--project NAME] + With --project: fetch the live schema from the stack -> full structural + semantic + validation (fetch failure degrades to semantic-only + a note). Without --project: + semantic-only validation + a note that structural validation was skipped (no schema + source). Exit 0 valid, exit 2 on errors. --json adds {{valid, errors, warnings, notes}}. + + kbagent flow new --project NAME --name "Name" [--description D] [--file YAML|@file|-] [--branch ID] + Create a new conditional flow. --file accepts YAML with 'phases' and 'tasks' keys. + Validated against the LIVE conditional-flow schema fetched from the stack + (INVALID_FLOW_DEFINITION on failure). A schema-fetch failure does NOT block the write: + structural check skipped, semantic checks still run, a warning is surfaced. + IDs are STRINGS; phases use next[].goto (a phase id or null); tasks are typed + (job/notification/variable). Execute with: job run --component-id keboola.flow --config-id ID. + + kbagent flow update --project NAME --flow-id ID [--name N] [--description D] [--file YAML] [--branch ID] Update a flow's name, description, or phases/tasks. --file replaces both phases and tasks. - Omitting --file leaves the flow body unchanged. DAG re-validated on write. + Omitting --file leaves the flow body unchanged. Validated against the live conditional-flow + schema on write (merge-aware; INVALID_FLOW_DEFINITION on failure; schema-fetch failure -> + semantic-only + warning). - kbagent flow delete --project NAME --flow-id ID [--component-id ID] [--branch ID] [--yes] + kbagent flow delete --project NAME --flow-id ID [--branch ID] [--yes] Delete a flow. Does NOT remove associated keboola.scheduler configs. Run 'flow schedule-remove' first if you want to clean up schedules. - kbagent flow schedule --project NAME --flow-id ID --cron "0 6 * * *" [--component-id ID] [--timezone TZ] [--enabled/--disabled] [--name NAME] [--branch ID] + kbagent flow schedule --project NAME --flow-id ID --cron "0 6 * * *" [--timezone TZ] [--enabled/--disabled] [--name NAME] [--branch ID] Upsert a cron schedule: updates the existing keboola.scheduler config if one exists, creates one otherwise. Calling twice with a new cron replaces the old schedule — no duplicates created. Schedules are stored as Storage API configs, not a separate scheduler service. - kbagent flow schedule-remove --project NAME --flow-id ID [--component-id ID] [--branch ID] [--yes] + kbagent flow schedule-remove --project NAME --flow-id ID [--branch ID] [--yes] Remove all schedules bound to this flow (deletes all matching keboola.scheduler configs). Idempotent: safe to run when no schedules exist. diff --git a/src/keboola_agent_cli/commands/flow.py b/src/keboola_agent_cli/commands/flow.py index 56b0871c..f5539920 100644 --- a/src/keboola_agent_cli/commands/flow.py +++ b/src/keboola_agent_cli/commands/flow.py @@ -27,61 +27,93 @@ logger = logging.getLogger(__name__) -flow_app = typer.Typer(help="Manage flows (keboola.orchestrator + keboola.flow)") +flow_app = typer.Typer(help="Manage conditional flows (keboola.flow)") -_FLOW_COMPONENT_CHOICES = ["keboola.orchestrator", "keboola.flow"] - -# YAML/JSON schema snippet shown by 'flow schema' +# YAML template shown by 'flow schema' (keboola.flow / Conditional Flow). # -# Tasks use the nested ``task: {mode, componentId, configId}`` form that matches -# the keboola-as-code convention. The API also accepts the flat form -# (``componentId``/``configId`` at task root) for backward compatibility, but -# new flows should use the nested form shown below. +# IDs are STRINGS. goto is a phase id or null (= end the flow). A phase with +# conditional transitions must end with a default (condition-less) transition. _FLOW_SCHEMA = """\ -# kbagent flow schema -- keboola.flow configuration format +# kbagent flow schema -- keboola.flow (Conditional Flow) configuration format # -# Create with: kbagent flow new --project ALIAS --name "My Flow" [--file flow.yaml] -# Update with: kbagent flow update --project ALIAS --flow-id ID --file flow.yaml - -name: "My Flow" -description: "Optional description" +# Create with: kbagent flow new --project ALIAS --name "My Flow" --file @flow.yaml +# Update with: kbagent flow update --project ALIAS --flow-id ID --file @flow.yaml +# Validate offline: kbagent flow validate --file @flow.yaml +# Full JSON schema: kbagent flow schema --full +# +# IDs are STRINGS. goto is a phase id or null (= end the flow). phases: - - id: 1 - name: "Phase 1 - Extract" - dependsOn: [] # IDs of phases that must complete first - - id: 2 - name: "Phase 2 - Transform" - dependsOn: [1] + - id: "extract" + name: "Extract" + next: + # Conditional transition: if any task in 'extract' failed, go to 'notify'. + - id: "on-failure" + goto: "notify" + condition: + type: operator + operator: ANY_TASKS_IN_PHASE + phase: "extract" + operands: [] + # Default transition (NO condition) -- MUST be last. + - id: "default" + goto: "transform" + - id: "transform" + name: "Transform" + retry: + strategy: linear + strategyParams: + delaySeconds: 60 + retryOn: ["error"] + next: + - id: "done" + goto: null + - id: "notify" + name: "Notify on failure" tasks: - - id: 1 - name: "Extract Data" - phase: 1 # phase.id this task belongs to + - id: "task-extract" + name: "Run HTTP extractor" + phase: "extract" enabled: true - continueOnFailure: false task: - mode: run + type: job componentId: "keboola.ex-http" configId: "123456789" - - id: 2 - name: "Run Transformation" - phase: 2 + mode: run + retry: + strategy: linear + strategyParams: + delaySeconds: 30 + retryOn: ["error"] + - id: "task-transform" + name: "Run transformation" + phase: "transform" enabled: true - continueOnFailure: false task: - mode: run + type: job componentId: "keboola.snowflake-transformation" configId: "987654321" - -# Notes: -# - dependsOn: IDs form a directed acyclic graph (kbagent validates this) -# - task.configId values must be string IDs of existing configs in the project -# - task.mode defaults to "run" (the only supported value today) -# - For keboola.orchestrator (legacy), phases are referenced by name (string), -# not ID (integer); use keboola.flow for new flows -# - The flat shape (componentId/configId at task root) is still accepted by -# the API but is deprecated in the schema; prefer the nested task: form + mode: run + - id: "task-notify" + name: "Email the team" + phase: "notify" + enabled: true + task: + type: notification + title: "Flow failed" + message: "The extract phase reported a failure." + recipients: + - channel: email + address: "team@example.com" + - id: "task-setvar" + name: "Set a flow variable" + phase: "extract" + enabled: true + task: + type: variable + name: "run_date" + value: "2026-01-01" """ @@ -113,7 +145,10 @@ def flow_list( "(one extra API call per project, NOT per flow).", ), ) -> None: - """List all flows (keboola.orchestrator + keboola.flow) across projects. + """List conditional flows (keboola.flow) across projects. + + Legacy keboola.orchestrator flows are NOT listed (orchestrator support was + dropped in 0.56.0); a count of any that exist is shown as a warning. With ``--with-schedules`` each row includes a ``schedules`` list of ``{schedule_id, cron, timezone, enabled}`` entries. Flows without @@ -159,7 +194,7 @@ def _format_flows_table( if not flows: formatter.console.print("[dim]No flows found.[/dim]") else: - columns = ["Project", "Component", "Config ID", "Name", "Disabled"] + columns = ["Project", "Config ID", "Name", "Disabled"] if with_schedules: columns.append("Schedules") tbl = Table( @@ -171,7 +206,6 @@ def _format_flows_table( disabled = "[red]yes[/red]" if f.get("is_disabled") else "[dim]no[/dim]" row = [ escape(f.get("project_alias", "")), - escape(f.get("component_id", "")), escape(f.get("config_id", "")), escape(f.get("name", "")), disabled, @@ -196,6 +230,13 @@ def _format_flows_table( f"Project '{err.get('project_alias', '?')}': {err.get('message', 'error')}" ) + legacy = result.get("legacy_orchestrator_count", 0) + if legacy: + formatter.warning( + f"{legacy} legacy keboola.orchestrator flow(s) are not shown " + f"(orchestrator support was dropped in 0.56.0; migrate to keboola.flow)." + ) + # --------------------------------------------------------------------------- # flow detail @@ -207,15 +248,9 @@ def flow_detail( ctx: typer.Context, project: str = typer.Option(..., "--project", help="Project alias"), flow_id: str = typer.Option(..., "--flow-id", help="Flow configuration ID"), - component_id: str = typer.Option( - "keboola.orchestrator", - "--component-id", - help="Flow component ID (default: keboola.orchestrator). " - "Use --component-id keboola.flow for flows listed with component_id=keboola.flow.", - ), branch: int | None = typer.Option(None, "--branch", help="Dev branch ID"), ) -> None: - """Show detailed flow information including phases and tasks.""" + """Show detailed conditional-flow information including phases and tasks.""" formatter = get_formatter(ctx) service = get_service(ctx, "flow_service") config_store = ctx.obj["config_store"] @@ -224,7 +259,6 @@ def flow_detail( try: result = service.get_flow_detail( alias=project, - component_id=component_id, config_id=flow_id, branch_id=effective_branch, ) @@ -241,10 +275,26 @@ def flow_detail( _format_flow_detail(formatter, result) +def _summarize_condition(condition: dict[str, Any] | None) -> str: + """One-line human summary of a transition condition.""" + if not condition: + return "default" + ctype = condition.get("type") + if ctype == "operator": + op = condition.get("operator", "?") + phase = condition.get("phase") + return f"{op}({phase})" if phase else f"{op}(...)" + if ctype == "function": + return f"{condition.get('function', '?')}(...)" + if ctype in ("const", "constant"): + return f"const={condition.get('value')!r}" + return str(ctype) + + def _format_flow_detail(formatter: Any, result: dict[str, Any]) -> None: formatter.console.print( f"\n[bold]{escape(result.get('name', ''))}[/bold]" - f" [dim]({escape(result.get('component_id', ''))} / {escape(str(result.get('id', '')))})[/dim]" + f" [dim](keboola.flow / {escape(str(result.get('id', '')))})[/dim]" ) if result.get("description"): formatter.console.print(f"[dim]{escape(result['description'])}[/dim]") @@ -253,7 +303,6 @@ def _format_flow_detail(formatter: Any, result: dict[str, Any]) -> None: phases = result.get("phases", []) tasks = result.get("tasks", []) - if not phases and not tasks: formatter.console.print("\n[dim]No phases or tasks defined.[/dim]") return @@ -262,33 +311,45 @@ def _format_flow_detail(formatter: Any, result: dict[str, Any]) -> None: f"\n[bold]Phases[/bold] ({len(phases)}) [bold]Tasks[/bold] ({len(tasks)})" ) - # Group tasks by phase tasks_by_phase: dict[Any, list[dict[str, Any]]] = {} for task in tasks: - phase_key = task.get("phase") - tasks_by_phase.setdefault(phase_key, []).append(task) + tasks_by_phase.setdefault(str(task.get("phase")), []).append(task) + + type_colors = {"job": "green", "notification": "yellow", "variable": "magenta"} for phase in phases: - pid = phase.get("id") - deps = phase.get("dependsOn", []) - dep_str = f" ← {deps}" if deps else "" + pid = str(phase.get("id")) + retry = " [dim](retry)[/dim]" if phase.get("retry") else "" formatter.console.print( - f"\n [cyan bold]Phase {escape(str(pid))}: {escape(phase.get('name', ''))}[/cyan bold]" - f"[dim]{escape(dep_str)}[/dim]" + f"\n [cyan bold]Phase {escape(pid)}: {escape(phase.get('name', ''))}[/cyan bold]{retry}" ) + for transition in phase.get("next", []): + goto = transition.get("goto") + target = "END" if goto is None else str(goto) + summary = _summarize_condition(transition.get("condition")) + formatter.console.print(f" [dim]→ {escape(target)} \\[{escape(summary)}][/dim]") for task in tasks_by_phase.get(pid, []): t_info = task.get("task") or {} - comp = t_info.get("componentId", task.get("componentId", "")) - cfg = t_info.get("configId", task.get("configId", "")) + ttype = t_info.get("type", "?") + color = type_colors.get(ttype, "white") + badge = f"[{color}]{escape(ttype)}[/{color}]" + detail_str = "" + if ttype == "job": + detail_str = ( + f" {escape(str(t_info.get('componentId', '')))}" + f"/{escape(str(t_info.get('configId', '')))}" + ) + elif ttype == "variable": + detail_str = f" {escape(str(t_info.get('name', '')))}" + t_retry = " [dim](retry)[/dim]" if t_info.get("retry") else "" enabled = "" if task.get("enabled", True) else " [dim](disabled)[/dim]" formatter.console.print( - f" [{escape(str(task.get('id', '?')))}] {escape(task.get('name', ''))}" - f" [dim]{escape(comp)}/{escape(str(cfg))}[/dim]{enabled}" + f" \\[{escape(str(task.get('id', '?')))}] {badge} " + f"{escape(task.get('name', ''))}[dim]{detail_str}[/dim]{enabled}{t_retry}" ) - # Orphan tasks (phase not found in phases list) - orphan_phase_keys = set(tasks_by_phase.keys()) - {p.get("id") for p in phases} - for key in sorted(str(k) for k in orphan_phase_keys): + orphan_keys = set(tasks_by_phase.keys()) - {str(p.get("id")) for p in phases} + for key in sorted(orphan_keys): formatter.console.print(f"\n [yellow]Phase '{key}' (not in phases list)[/yellow]") for task in tasks_by_phase.get(key, []): formatter.console.print(f" {escape(task.get('name', str(task)))}") @@ -300,14 +361,71 @@ def _format_flow_detail(formatter: Any, result: dict[str, Any]) -> None: @flow_app.command("schema") -def flow_schema(ctx: typer.Context) -> None: - """Print the YAML format expected by 'flow new' and 'flow update'.""" +def flow_schema( + ctx: typer.Context, + full: bool = typer.Option( + False, + "--full", + help="Dump the live JSON Schema fetched from the stack (requires --project).", + ), + project: str | None = typer.Option( + None, + "--project", + help="Project alias -- required for --full (the schema is served by the stack).", + ), +) -> None: + """Print the conditional-flow YAML template, or --full for the live JSON Schema. + + The plain template is offline. ``--full`` fetches the real keboola.flow + JSON Schema from the stack's component registry, so it needs ``--project``. + """ formatter = get_formatter(ctx) + if full: + if not project: + formatter.error( + message=( + "--full requires --project: the conditional-flow JSON Schema is " + "served by the stack's component registry, not bundled. " + "Run e.g. 'kbagent flow schema --full --project ALIAS'." + ), + error_code=ErrorCode.VALIDATION_ERROR, + ) + raise typer.Exit(code=2) + + service = get_service(ctx, "flow_service") + try: + schema, reason = service.fetch_flow_schema(project) + except ConfigError as exc: + formatter.error(message=exc.message, error_code=ErrorCode.CONFIG_ERROR) + raise typer.Exit(code=5) from None + except KeboolaApiError as exc: + formatter.error(message=exc.message, error_code=exc.error_code, retryable=exc.retryable) + raise typer.Exit(code=map_error_to_exit_code(exc)) from None + + if schema is None: + formatter.error( + message=f"Could not fetch the conditional-flow schema: {reason}", + error_code=ErrorCode.NOT_FOUND, + ) + raise typer.Exit(code=4) + + if formatter.json_mode: + formatter.output({"format": "json-schema", "schema": schema}) + else: + import json as _json + + from rich.syntax import Syntax + + formatter.console.print( + Syntax(_json.dumps(schema, indent=2), "json", theme="monokai", line_numbers=False) + ) + return + if formatter.json_mode: formatter.output( { "format": "yaml", - "description": "keboola.flow configuration schema", + "description": "keboola.flow (Conditional Flow) configuration schema", "schema": _FLOW_SCHEMA, } ) @@ -317,6 +435,92 @@ def flow_schema(ctx: typer.Context) -> None: formatter.console.print(Syntax(_FLOW_SCHEMA, "yaml", theme="monokai", line_numbers=False)) +# --------------------------------------------------------------------------- +# flow validate +# --------------------------------------------------------------------------- + + +@flow_app.command("validate") +def flow_validate( + ctx: typer.Context, + file: str = typer.Option( + ..., + "--file", + help="YAML/JSON flow definition to validate (@file, -, or inline).", + ), + project: str | None = typer.Option( + None, + "--project", + help=( + "Project alias -- fetch the live JSON Schema from the stack for full " + "structural + semantic validation. Without it, only semantic checks run." + ), + ), +) -> None: + """Validate a conditional-flow definition (schema + semantic checks). + + With ``--project`` the live keboola.flow JSON Schema is fetched from the + stack and structural validation runs alongside the semantic checks; a fetch + failure degrades gracefully (semantic-only + a warning). Without + ``--project`` only the semantic checks run and a note records that + structural schema validation was skipped (no schema source). + + Exit 0 when valid (warnings still printed), exit 2 when there are errors. + """ + formatter = get_formatter(ctx) + from ..services.flow_validation import find_unreachable_phases, validate_conditional_flow + + try: + flow_def = _load_flow_yaml(file) + except (OSError, yaml.YAMLError, ValueError) as exc: + formatter.error( + message=f"Cannot load flow definition: {exc}", error_code=ErrorCode.VALIDATION_ERROR + ) + raise typer.Exit(code=2) from None + + phases = flow_def.get("phases", []) + tasks = flow_def.get("tasks", []) + + schema: dict[str, Any] | None = None + notes: list[str] = [] + if project: + service = get_service(ctx, "flow_service") + try: + schema, reason = service.fetch_flow_schema(project) + except ConfigError as exc: + formatter.error(message=exc.message, error_code=ErrorCode.CONFIG_ERROR) + raise typer.Exit(code=5) from None + if schema is None: + notes.append(f"structural schema validation skipped: {reason}") + else: + notes.append( + "structural schema validation skipped: no schema source " + "(pass --project ALIAS to fetch the live schema from the stack)" + ) + + errors = validate_conditional_flow(phases, tasks, schema) + warnings = [ + f"Phase '{pid}' is unreachable from the entry phase" + for pid in find_unreachable_phases(phases) + ] + valid = not errors + + if formatter.json_mode: + formatter.output({"valid": valid, "errors": errors, "warnings": warnings, "notes": notes}) + else: + for note in notes: + formatter.console.print(f"[dim]note: {escape(note)}[/dim]") + for w in warnings: + formatter.warning(w) + if valid: + formatter.success("Flow definition is valid.") + else: + for e in errors: + formatter.console.print(f"[red]✗[/red] {escape(e)}") + if not valid: + raise typer.Exit(code=2) + + # --------------------------------------------------------------------------- # flow new # --------------------------------------------------------------------------- @@ -350,11 +554,6 @@ def flow_new( ctx: typer.Context, project: str = typer.Option(..., "--project", help="Project alias"), name: str = typer.Option(..., "--name", help="Flow name"), - component_id: str = typer.Option( - "keboola.flow", - "--component-id", - help="Component ID (default: keboola.flow)", - ), description: str = typer.Option("", "--description", help="Optional description"), file: str | None = typer.Option( None, @@ -364,13 +563,10 @@ def flow_new( ), branch: int | None = typer.Option(None, "--branch", help="Dev branch ID"), ) -> None: - """Create a new flow configuration. + """Create a new conditional-flow (keboola.flow) configuration. \b Examples: - # Empty skeleton - kbagent flow new --project prod --name "Daily ETL" - # From a YAML file kbagent flow new --project prod --name "Daily ETL" --file @flow.yaml @@ -397,7 +593,6 @@ def flow_new( try: result = service.create_flow( alias=project, - component_id=component_id, name=name, description=description, phases=phases, @@ -417,8 +612,10 @@ def flow_new( branch_info = f" (branch {result.get('branch_id')})" if result.get("branch_id") else "" formatter.success( f"Created flow '{escape(result.get('name', name))}' " - f"[{escape(component_id)}/{escape(str(result.get('id', '')))}]{branch_info}" + f"[keboola.flow/{escape(str(result.get('id', '')))}]{branch_info}" ) + for warning in result.get("warnings", []): + formatter.warning(warning) # --------------------------------------------------------------------------- @@ -431,11 +628,6 @@ def flow_update( ctx: typer.Context, project: str = typer.Option(..., "--project", help="Project alias"), flow_id: str = typer.Option(..., "--flow-id", help="Flow configuration ID"), - component_id: str = typer.Option( - "keboola.orchestrator", - "--component-id", - help="Flow component ID (default: keboola.orchestrator)", - ), name: str | None = typer.Option(None, "--name", help="New flow name"), description: str | None = typer.Option(None, "--description", help="New description"), file: str | None = typer.Option( @@ -482,7 +674,6 @@ def flow_update( try: result = service.update_flow( alias=project, - component_id=component_id, config_id=flow_id, name=name, description=description, @@ -503,8 +694,10 @@ def flow_update( branch_info = f" (branch {result.get('branch_id')})" if result.get("branch_id") else "" formatter.success( f"Updated flow '{escape(result.get('name', flow_id))}' " - f"[{escape(component_id)}/{escape(flow_id)}]{branch_info}" + f"[keboola.flow/{escape(flow_id)}]{branch_info}" ) + for warning in result.get("warnings", []): + formatter.warning(warning) # --------------------------------------------------------------------------- @@ -517,11 +710,6 @@ def flow_delete( ctx: typer.Context, project: str = typer.Option(..., "--project", help="Project alias"), flow_id: str = typer.Option(..., "--flow-id", help="Flow configuration ID"), - component_id: str = typer.Option( - "keboola.orchestrator", - "--component-id", - help="Flow component ID (default: keboola.orchestrator)", - ), branch: int | None = typer.Option(None, "--branch", help="Dev branch ID"), dry_run: bool = typer.Option( False, @@ -530,7 +718,7 @@ def flow_delete( ), yes: bool = typer.Option(False, "--yes", "-y", help="Skip confirmation prompt"), ) -> None: - """Delete a flow configuration. + """Delete a conditional-flow (keboola.flow) configuration. Note: associated keboola.scheduler configs are NOT automatically removed. Run 'flow schedule-remove' first if you want to clean up schedules. @@ -542,7 +730,7 @@ def flow_delete( result = { "would_delete": { "project_alias": project, - "component_id": component_id, + "component_id": "keboola.flow", "config_id": flow_id, "branch_id": branch, }, @@ -551,14 +739,13 @@ def flow_delete( formatter.output(result) else: formatter.console.print( - f"[bold blue]Would delete:[/bold blue] flow " - f"{escape(component_id)}/{escape(flow_id)}" + f"[bold blue]Would delete:[/bold blue] flow keboola.flow/{escape(flow_id)}" + (f" (branch {branch})" if branch else "") ) return if not yes and not formatter.json_mode: - confirmed = typer.confirm(f"Delete flow {component_id}/{flow_id}?") + confirmed = typer.confirm(f"Delete flow keboola.flow/{flow_id}?") if not confirmed: formatter.console.print("[yellow]Aborted.[/yellow]") raise typer.Exit(code=0) @@ -566,7 +753,6 @@ def flow_delete( try: result = service.delete_flow( alias=project, - component_id=component_id, config_id=flow_id, branch_id=branch, ) @@ -580,7 +766,7 @@ def flow_delete( if formatter.json_mode: formatter.output(result) else: - formatter.success(f"Deleted flow {escape(component_id)}/{escape(flow_id)}") + formatter.success(f"Deleted flow keboola.flow/{escape(flow_id)}") # --------------------------------------------------------------------------- @@ -593,11 +779,6 @@ def flow_schedule( ctx: typer.Context, project: str = typer.Option(..., "--project", help="Project alias"), flow_id: str = typer.Option(..., "--flow-id", help="Flow configuration ID"), - component_id: str = typer.Option( - "keboola.orchestrator", - "--component-id", - help="Flow component ID (default: keboola.orchestrator)", - ), cron: str = typer.Option(..., "--cron", help="Cron expression (e.g. '0 6 * * *')"), timezone: str = typer.Option("UTC", "--timezone", help="IANA timezone (default: UTC)"), enabled: bool = typer.Option(True, "--enabled/--disabled", help="Enable the schedule"), @@ -626,7 +807,6 @@ def flow_schedule( try: result = service.set_flow_schedule( alias=project, - component_id=component_id, config_id=flow_id, cron_tab=cron, timezone=timezone, @@ -672,11 +852,6 @@ def flow_schedule_remove( ctx: typer.Context, project: str = typer.Option(..., "--project", help="Project alias"), flow_id: str = typer.Option(..., "--flow-id", help="Flow configuration ID"), - component_id: str = typer.Option( - "keboola.orchestrator", - "--component-id", - help="Flow component ID (default: keboola.orchestrator)", - ), branch: int | None = typer.Option(None, "--branch", help="Dev branch ID"), dry_run: bool = typer.Option( False, @@ -696,7 +871,6 @@ def flow_schedule_remove( try: sched_result = service.list_flow_schedules( alias=project, - component_id=component_id, config_id=flow_id, branch_id=branch, ) @@ -711,7 +885,7 @@ def flow_schedule_remove( payload = { "would_delete": { "project_alias": project, - "component_id": component_id, + "component_id": "keboola.flow", "config_id": flow_id, "branch_id": branch, "schedules": schedules, @@ -726,7 +900,7 @@ def flow_schedule_remove( else: formatter.console.print( f"[bold blue]Would remove {len(schedules)} schedule(s) " - f"from flow[/bold blue] {escape(component_id)}/{escape(flow_id)}:" + f"from flow[/bold blue] keboola.flow/{escape(flow_id)}:" ) _print_schedule_list(formatter, schedules) return @@ -736,7 +910,6 @@ def flow_schedule_remove( try: sched_result = service.list_flow_schedules( alias=project, - component_id=component_id, config_id=flow_id, branch_id=branch, ) @@ -757,7 +930,6 @@ def flow_schedule_remove( try: result = service.remove_flow_schedule( alias=project, - component_id=component_id, config_id=flow_id, branch_id=branch, ) diff --git a/src/keboola_agent_cli/errors.py b/src/keboola_agent_cli/errors.py index ea5c9881..4ab6d770 100644 --- a/src/keboola_agent_cli/errors.py +++ b/src/keboola_agent_cli/errors.py @@ -97,8 +97,9 @@ class ErrorCode(StrEnum): JOB_TIMEOUT_TERMINATED = "JOB_TIMEOUT_TERMINATED" # Flow (new in 0.22.0) - INVALID_FLOW_DAG = "INVALID_FLOW_DAG" SCHEDULE_DELETE_FAILED = "SCHEDULE_DELETE_FAILED" + # Conditional-flow validation (replaces INVALID_FLOW_DAG; since 0.56.0) + INVALID_FLOW_DEFINITION = "INVALID_FLOW_DEFINITION" # Data apps (new in 0.27.0) DATA_APP_BUILD_FAILED = "DATA_APP_BUILD_FAILED" diff --git a/src/keboola_agent_cli/models.py b/src/keboola_agent_cli/models.py index 79679669..3edc50ec 100644 --- a/src/keboola_agent_cli/models.py +++ b/src/keboola_agent_cli/models.py @@ -273,6 +273,15 @@ class ComponentDetail(BaseModel): long_description: str = Field(default="", alias="longDescription") documentation: str = Field(default="") documentation_url: str = Field(default="", alias="documentationUrl") + + @field_validator( + "description", "long_description", "documentation", "documentation_url", mode="before" + ) + @classmethod + def _none_to_empty_string(cls, value: Any) -> Any: + """AI Service returns explicit null for missing docs (e.g. keboola.flow).""" + return "" if value is None else value + configuration_schema: dict[str, Any] = Field(default_factory=dict, alias="configurationSchema") configuration_row_schema: dict[str, Any] = Field( default_factory=dict, alias="configurationRowSchema" diff --git a/src/keboola_agent_cli/permissions.py b/src/keboola_agent_cli/permissions.py index fd667ab9..733de02a 100644 --- a/src/keboola_agent_cli/permissions.py +++ b/src/keboola_agent_cli/permissions.py @@ -288,6 +288,7 @@ "flow.list": "read", "flow.detail": "read", "flow.schema": "read", + "flow.validate": "read", "flow.new": "write", "flow.update": "write", "flow.delete": "destructive", diff --git a/src/keboola_agent_cli/server/routers/flows.py b/src/keboola_agent_cli/server/routers/flows.py index f8179b36..ac9fe1e2 100644 --- a/src/keboola_agent_cli/server/routers/flows.py +++ b/src/keboola_agent_cli/server/routers/flows.py @@ -1,4 +1,4 @@ -"""Flow + flow-schedule endpoints.""" +"""Flow + flow-schedule endpoints (conditional flows / keboola.flow only).""" from __future__ import annotations @@ -11,12 +11,9 @@ router = APIRouter(prefix="/flows", tags=["flows"]) -DEFAULT_FLOW_COMPONENT = "keboola.flow" - class FlowCreate(BaseModel): name: str - component_id: str = DEFAULT_FLOW_COMPONENT description: str = "" phases: list[dict[str, Any]] | None = None tasks: list[dict[str, Any]] | None = None @@ -24,7 +21,6 @@ class FlowCreate(BaseModel): class FlowUpdate(BaseModel): - component_id: str = DEFAULT_FLOW_COMPONENT name: str | None = None description: str | None = None phases: list[dict[str, Any]] | None = None @@ -33,7 +29,6 @@ class FlowUpdate(BaseModel): class FlowSchedule(BaseModel): - component_id: str = DEFAULT_FLOW_COMPONENT cron_tab: str timezone: str = "UTC" enabled: bool = True @@ -58,14 +53,11 @@ def list_flows( def detail( project: str, config_id: str, - component_id: str = DEFAULT_FLOW_COMPONENT, branch_id: int | None = None, registry: ServiceRegistry = Depends(get_registry), ) -> dict[str, Any]: """Fetch a single flow configuration. Mirrors `kbagent flow detail`.""" - return registry.flow.get_flow_detail( - alias=project, component_id=component_id, config_id=config_id, branch_id=branch_id - ) + return registry.flow.get_flow_detail(alias=project, config_id=config_id, branch_id=branch_id) @router.post("/{project}", summary="Create a new flow") @@ -75,7 +67,6 @@ def create( """Create a new flow configuration. Mirrors `kbagent flow new`.""" return registry.flow.create_flow( alias=project, - component_id=body.component_id, name=body.name, description=body.description, phases=body.phases, @@ -94,7 +85,6 @@ def update( """Update name, description, or phases/tasks of a flow. Mirrors `kbagent flow update`.""" return registry.flow.update_flow( alias=project, - component_id=body.component_id, config_id=config_id, name=body.name, description=body.description, @@ -108,27 +98,23 @@ def update( def delete( project: str, config_id: str, - component_id: str = DEFAULT_FLOW_COMPONENT, branch_id: int | None = None, registry: ServiceRegistry = Depends(get_registry), ) -> dict[str, Any]: """Delete a flow configuration. Mirrors `kbagent flow delete`.""" - return registry.flow.delete_flow( - alias=project, component_id=component_id, config_id=config_id, branch_id=branch_id - ) + return registry.flow.delete_flow(alias=project, config_id=config_id, branch_id=branch_id) @router.get("/{project}/{config_id}/schedules", summary="List schedules for a flow") def list_schedules( project: str, config_id: str, - component_id: str = DEFAULT_FLOW_COMPONENT, branch_id: int | None = None, registry: ServiceRegistry = Depends(get_registry), ) -> dict[str, Any]: """List cron schedules attached to a flow.""" return registry.flow.list_flow_schedules( - alias=project, component_id=component_id, config_id=config_id, branch_id=branch_id + alias=project, config_id=config_id, branch_id=branch_id ) @@ -142,7 +128,6 @@ def set_schedule( """Attach or update a cron schedule on a flow. Mirrors `kbagent flow schedule`.""" return registry.flow.set_flow_schedule( alias=project, - component_id=body.component_id, config_id=config_id, cron_tab=body.cron_tab, timezone=body.timezone, @@ -156,11 +141,10 @@ def set_schedule( def remove_schedule( project: str, config_id: str, - component_id: str = DEFAULT_FLOW_COMPONENT, branch_id: int | None = None, registry: ServiceRegistry = Depends(get_registry), ) -> dict[str, Any]: """Remove the cron schedule from a flow. Mirrors `kbagent flow schedule-remove`.""" return registry.flow.remove_flow_schedule( - alias=project, component_id=component_id, config_id=config_id, branch_id=branch_id + alias=project, config_id=config_id, branch_id=branch_id ) diff --git a/src/keboola_agent_cli/services/component_service.py b/src/keboola_agent_cli/services/component_service.py index be1f5853..beea1f6d 100644 --- a/src/keboola_agent_cli/services/component_service.py +++ b/src/keboola_agent_cli/services/component_service.py @@ -40,7 +40,7 @@ def default_ai_client_factory(stack_url: str, token: str) -> AiServiceClient: _PYTHON_TRANSFORMATION_FRAGMENT = "python-transformation" _CUSTOM_PYTHON_APP_ID = "kds-team.app-custom-python" -_FLOW_COMPONENT_IDS = ("keboola.orchestrator", "keboola.flow") +_FLOW_COMPONENT_IDS = ("keboola.flow",) def _detect_component_category(component_id: str) -> str: @@ -289,36 +289,34 @@ def _build_pyproject_toml(component_id: str, name: str, packages: list[str] | No ) -def _build_flow_config_yml(name: str, component_id: str = "keboola.orchestrator") -> str: - """Generate flow/orchestrator configuration YAML.""" +def _build_flow_config_yml(name: str, component_id: str = "keboola.flow") -> str: + """Generate a conditional-flow (keboola.flow) configuration YAML skeleton. + + IDs are strings; phases carry next[].goto transitions (a phase id or null) + and tasks are typed (job/notification/variable). + """ lines = [ - "version: 2", f'name: "{name}"', "description: |", " TODO: describe this flow", - "", - "schedules:", - ' - name: "Daily run"', - ' cron: "0 6 * * *"', - " timezone: Europe/Prague", - " enabled: false", - "", "phases:", - ' - name: "Phase 1"', - " tasks:", - ' - component: "keboola.ex-http"', - ' config: "extractor/keboola.ex-http/my-extractor"', - ' - name: "Phase 2"', - ' depends_on: ["Phase 1"]', - " tasks:", - ' - component: "keboola.snowflake-transformation"', - ' config: "transformation/keboola.snowflake-transformation/my-transform"', - "", - "_keboola:", - f" component_id: {component_id}", - "", + ' - id: "phase-1"', + ' name: "Phase 1"', + " next:", + ' - id: "default"', + " goto: null", + "tasks:", + ' - id: "task-1"', + ' name: "Task 1"', + ' phase: "phase-1"', + " enabled: true", + " task:", + " type: job", + ' componentId: "keboola.ex-http"', + ' configId: "TODO"', + " mode: run", ] - return "\n".join(lines) + return "\n".join(lines) + "\n" class ComponentService(BaseService): @@ -647,7 +645,7 @@ def _generate_files( { "path": "_config.yml", "content": _build_flow_config_yml(config_name, detail.component_id), - "description": "Flow/orchestrator configuration", + "description": "Conditional flow (keboola.flow) configuration", } ) return files diff --git a/src/keboola_agent_cli/services/flow_service.py b/src/keboola_agent_cli/services/flow_service.py index 4ee04280..cecb2579 100644 --- a/src/keboola_agent_cli/services/flow_service.py +++ b/src/keboola_agent_cli/services/flow_service.py @@ -1,7 +1,11 @@ -"""Flow (orchestrator + conditional flow) lifecycle service. +"""Conditional flow (keboola.flow) lifecycle service. -Provides CRUD for keboola.orchestrator and keboola.flow configurations, -plus schedule bind/unbind via keboola.scheduler component configs. +Provides CRUD for keboola.flow (Conditional Flow) configurations, plus +schedule bind/unbind via keboola.scheduler component configs. + +keboola.orchestrator support was dropped in 0.56.0; this service targets the +single component keboola.flow. Legacy orchestrator configs are still counted +(not listed) so the CLI can warn users why a flow "disappeared". Flows are semantic sugar over the Storage API config layer -- no separate HTTP client is needed. Schedules are stored as keboola.scheduler configs @@ -12,17 +16,29 @@ import json import logging +from collections.abc import Callable from typing import Any +from ..ai_client import AiServiceClient +from ..config_store import ConfigStore from ..errors import ErrorCode, KeboolaApiError -from ..models import ProjectConfig -from .base import BaseService +from ..models import ComponentDetail, ProjectConfig +from .base import BaseService, ClientFactory +from .flow_validation import find_unreachable_phases, validate_conditional_flow logger = logging.getLogger(__name__) -FLOW_COMPONENT_IDS: tuple[str, ...] = ("keboola.orchestrator", "keboola.flow") +FLOW_COMPONENT_ID = "keboola.flow" +LEGACY_FLOW_COMPONENT_ID = "keboola.orchestrator" SCHEDULER_COMPONENT_ID = "keboola.scheduler" +AiClientFactory = Callable[[str, str], AiServiceClient] + + +def default_ai_client_factory(stack_url: str, token: str) -> AiServiceClient: + """Default factory: build an ``AiServiceClient`` for the given project.""" + return AiServiceClient(stack_url=stack_url, token=token) + # --------------------------------------------------------------------------- # Internal helpers @@ -44,58 +60,6 @@ def _count_phases_tasks(body: dict[str, Any]) -> tuple[int, int]: return len(body.get("phases", [])), len(body.get("tasks", [])) -def _validate_dag(phases: list[dict[str, Any]], tasks: list[dict[str, Any]]) -> list[str]: - """Validate phase dependency DAG for cycles and bad references. - - Uses Kahn's topological sort to detect cycles. Returns a list of - human-readable error strings; empty list means valid. - """ - errors: list[str] = [] - phase_ids = {p.get("id") for p in phases if p.get("id") is not None} - - # Check dependsOn references - for phase in phases: - for dep_id in phase.get("dependsOn", []): - if dep_id not in phase_ids: - errors.append(f"Phase '{phase.get('id')}' depends on unknown phase '{dep_id}'") - - # Check task phase references - for task in tasks: - phase_ref = task.get("phase") - if phase_ref is not None and phase_ref not in phase_ids: - errors.append(f"Task '{task.get('id', '?')}' references unknown phase '{phase_ref}'") - - if errors: - return errors - - # Kahn's algorithm for cycle detection - in_degree: dict[Any, int] = {p.get("id"): 0 for p in phases if p.get("id") is not None} - adj: dict[Any, list[Any]] = {p.get("id"): [] for p in phases if p.get("id") is not None} - for phase in phases: - pid = phase.get("id") - if pid is None: - continue - for dep_id in phase.get("dependsOn", []): - if dep_id in adj: - adj[dep_id].append(pid) - in_degree[pid] += 1 - - queue = [pid for pid, deg in in_degree.items() if deg == 0] - visited = 0 - while queue: - node = queue.pop(0) - visited += 1 - for neighbor in adj.get(node, []): - in_degree[neighbor] -= 1 - if in_degree[neighbor] == 0: - queue.append(neighbor) - - if visited != len(phase_ids): - errors.append("Phase dependency graph contains a cycle") - - return errors - - def _collect_schedules_by_parent( client: Any, branch_id: int | None ) -> dict[tuple[str, str], list[dict[str, Any]]]: @@ -150,12 +114,69 @@ def _collect_schedules_by_parent( class FlowService(BaseService): - """Business logic for flow (keboola.orchestrator + keboola.flow) CRUD. + """Business logic for conditional flow (keboola.flow) CRUD. All schedule operations use keboola.scheduler component configs -- no separate Scheduler Service HTTP client required. + + The structural conditional-flow JSON Schema is fetched at runtime from the + stack's component registry (AI Service ``configurationSchema`` for + ``keboola.flow``) via ``ai_client_factory`` -- it is never bundled. """ + def __init__( + self, + config_store: ConfigStore, + client_factory: ClientFactory | None = None, + ai_client_factory: AiClientFactory | None = None, + ) -> None: + super().__init__(config_store, client_factory) + self._ai_client_factory = ai_client_factory or default_ai_client_factory + + # ── schema fetch ───────────────────────────────────────────────── + + def _fetch_flow_schema( + self, project: ProjectConfig + ) -> tuple[dict[str, Any] | None, str | None]: + """Fetch the live keboola.flow JSON Schema from the AI Service. + + Returns ``(schema, None)`` on success, or ``(None, reason)`` when the + schema cannot be obtained (network error, KeboolaApiError, malformed or + empty schema). A ``None`` schema must NOT block a write -- the caller + degrades to semantic-only validation and surfaces ``reason`` as a + warning. + """ + ai_client = self._ai_client_factory(project.stack_url, project.token) + try: + raw = ai_client.get_component_detail(FLOW_COMPONENT_ID) + except KeboolaApiError as exc: + return None, exc.message + except Exception as exc: + return None, str(exc) + finally: + ai_client.close() + + try: + detail = ComponentDetail(**raw) + except (TypeError, ValueError) as exc: + return None, f"component detail could not be parsed ({exc})" + + schema = detail.configuration_schema + if not schema: + return None, "AI Service returned no configurationSchema for keboola.flow" + return schema, None + + def fetch_flow_schema(self, alias: str) -> tuple[dict[str, Any] | None, str | None]: + """Public schema fetch for a project alias (used by ``flow validate + --project`` and ``flow schema --full --project``). + + Returns ``(schema, None)`` on success or ``(None, reason)`` on any + failure -- the caller decides how to surface the reason. + """ + projects = self.resolve_projects([alias]) + project = projects[alias] + return self._fetch_flow_schema(project) + # ── list ──────────────────────────────────────────────────────── def list_flows( @@ -164,7 +185,12 @@ def list_flows( branch_id: int | None = None, with_schedules: bool = False, ) -> dict[str, Any]: - """List all flows across projects (both component IDs). + """List conditional flows (keboola.flow) across projects. + + Only ``keboola.flow`` configs are returned. Legacy + ``keboola.orchestrator`` configs are counted (not listed) and surfaced + as ``legacy_orchestrator_count`` so the CLI can warn users why a flow + "disappeared" (orchestrator support was dropped in 0.56.0). When ``with_schedules`` is True, each flow row is enriched with a ``schedules`` list pulled from the same project's @@ -179,10 +205,12 @@ def list_flows( Returns: Dict with keys: - - "flows": list of flow dicts (project_alias, component_id, - config_id, name, description, is_disabled, and + - "flows": list of keboola.flow dicts (project_alias, + component_id, config_id, name, description, is_disabled, and ``schedules`` when ``with_schedules`` is True) - "errors": list of error dicts + - "legacy_orchestrator_count": total legacy orchestrator + configs found across the queried projects (not listed) """ projects = self.resolve_projects(aliases) @@ -191,26 +219,39 @@ def worker(alias: str, project: ProjectConfig) -> tuple[Any, ...]: effective_branch = branch_id or project.active_branch_id try: flows: list[dict[str, Any]] = [] - for comp_id in FLOW_COMPONENT_IDS: - try: - configs = client.list_component_configs(comp_id, branch_id=effective_branch) - except KeboolaApiError as exc: - # 404 = component not installed; skip gracefully - if exc.error_code == "NOT_FOUND": - continue + try: + configs = client.list_component_configs( + FLOW_COMPONENT_ID, branch_id=effective_branch + ) + except KeboolaApiError as exc: + if exc.error_code == "NOT_FOUND": + configs = [] + else: + raise + for cfg in configs: + flow_row: dict[str, Any] = { + "project_alias": alias, + "component_id": FLOW_COMPONENT_ID, + "config_id": str(cfg.get("id", "")), + "name": cfg.get("name", ""), + "description": cfg.get("description", ""), + "is_disabled": cfg.get("isDisabled", False), + } + if with_schedules: + flow_row["schedules"] = [] + flows.append(flow_row) + + # Count (do not list) legacy orchestrator configs so the CLI can warn. + try: + legacy = client.list_component_configs( + LEGACY_FLOW_COMPONENT_ID, branch_id=effective_branch + ) + legacy_count = len(legacy) + except KeboolaApiError as exc: + if exc.error_code == "NOT_FOUND": + legacy_count = 0 + else: raise - for cfg in configs: - flow_row: dict[str, Any] = { - "project_alias": alias, - "component_id": comp_id, - "config_id": str(cfg.get("id", "")), - "name": cfg.get("name", ""), - "description": cfg.get("description", ""), - "is_disabled": cfg.get("isDisabled", False), - } - if with_schedules: - flow_row["schedules"] = [] - flows.append(flow_row) # One extra list call per project, then a map-join in memory. if with_schedules and flows: @@ -219,7 +260,7 @@ def worker(alias: str, project: ProjectConfig) -> tuple[Any, ...]: key = (flow_row["component_id"], flow_row["config_id"]) flow_row["schedules"] = schedules_by_parent.get(key, []) - return (alias, flows, True) + return (alias, flows, legacy_count) except KeboolaApiError as exc: return ( alias, @@ -244,19 +285,24 @@ def worker(alias: str, project: ProjectConfig) -> tuple[Any, ...]: successes, errors = self._run_parallel(projects, worker) all_flows: list[dict[str, Any]] = [] - for _, flows, _ in successes: + legacy_total = 0 + for _, flows, legacy_count in successes: all_flows.extend(flows) - all_flows.sort(key=lambda f: (f["project_alias"], f["component_id"], f["name"].lower())) + legacy_total += legacy_count + all_flows.sort(key=lambda f: (f["project_alias"], f["name"].lower())) errors.sort(key=lambda e: e.get("project_alias", "")) - return {"flows": all_flows, "errors": errors} + return { + "flows": all_flows, + "errors": errors, + "legacy_orchestrator_count": legacy_total, + } # ── detail ────────────────────────────────────────────────────── def get_flow_detail( self, alias: str, - component_id: str, config_id: str, branch_id: int | None = None, ) -> dict[str, Any]: @@ -272,7 +318,9 @@ def get_flow_detail( client = self._client_factory(project.stack_url, project.token) try: - detail = client.get_config_detail(component_id, config_id, branch_id=effective_branch) + detail = client.get_config_detail( + FLOW_COMPONENT_ID, config_id, branch_id=effective_branch + ) finally: client.close() @@ -281,6 +329,7 @@ def get_flow_detail( tasks = body.get("tasks", []) detail["project_alias"] = alias + detail["component_id"] = FLOW_COMPONENT_ID detail["branch_id"] = effective_branch detail["phases"] = phases detail["tasks"] = tasks @@ -293,51 +342,57 @@ def get_flow_detail( def create_flow( self, alias: str, - component_id: str, name: str, description: str = "", phases: list[dict[str, Any]] | None = None, tasks: list[dict[str, Any]] | None = None, branch_id: int | None = None, ) -> dict[str, Any]: - """Create a new flow configuration. + """Create a new conditional-flow (keboola.flow) configuration. Args: alias: Project alias. - component_id: 'keboola.flow' (default) or 'keboola.orchestrator'. name: Flow name. description: Optional description. - phases: Phase definitions (validated for DAG correctness). - tasks: Task definitions. + phases: Phase definitions (validated against the CF schema). + tasks: Task definitions (validated against the CF schema). branch_id: Dev branch override. Raises: - KeboolaApiError: On API failure or DAG validation error - (error_code='INVALID_FLOW_DAG'). + KeboolaApiError: On API failure or definition validation error + (error_code='INVALID_FLOW_DEFINITION'). """ phases = phases or [] tasks = tasks or [] - if phases: - dag_errors = _validate_dag(phases, tasks) - if dag_errors: - raise KeboolaApiError( - message=f"Flow DAG validation failed: {'; '.join(dag_errors)}", - status_code=400, - error_code=ErrorCode.INVALID_FLOW_DAG, - retryable=False, - ) - - configuration: dict[str, Any] = {"phases": phases, "tasks": tasks} - projects = self.resolve_projects([alias]) project = projects[alias] effective_branch = branch_id or project.active_branch_id + schema, schema_reason = self._fetch_flow_schema(project) + warnings: list[str] = [] + if schema is None: + warnings.append(f"structural schema validation skipped: {schema_reason}") + + definition_errors = validate_conditional_flow(phases, tasks, schema) + if definition_errors: + raise KeboolaApiError( + message="Flow definition is invalid: " + "; ".join(definition_errors), + status_code=400, + error_code=ErrorCode.INVALID_FLOW_DEFINITION, + retryable=False, + ) + warnings.extend( + f"Phase '{pid}' is unreachable from the entry phase" + for pid in find_unreachable_phases(phases) + ) + + configuration: dict[str, Any] = {"phases": phases, "tasks": tasks} + client = self._client_factory(project.stack_url, project.token) try: result = client.create_config( - component_id=component_id, + component_id=FLOW_COMPONENT_ID, name=name, configuration=configuration, description=description, @@ -350,6 +405,7 @@ def create_flow( result["branch_id"] = effective_branch result["phase_count"] = len(phases) result["task_count"] = len(tasks) + result["warnings"] = warnings return result # ── update ────────────────────────────────────────────────────── @@ -357,7 +413,6 @@ def create_flow( def update_flow( self, alias: str, - component_id: str, config_id: str, name: str | None = None, description: str | None = None, @@ -365,45 +420,56 @@ def update_flow( tasks: list[dict[str, Any]] | None = None, branch_id: int | None = None, ) -> dict[str, Any]: - """Update an existing flow configuration. + """Update an existing conditional-flow (keboola.flow) configuration. - When both phases and tasks are provided, validates the DAG before writing. - When only one is provided, the other is fetched from the current config. + When phases and/or tasks are provided, validation runs on the merged + body (the unspecified side is fetched from the current config) so a + half-config is never validated. Raises: - KeboolaApiError: On API failure or DAG validation error. + KeboolaApiError: On API failure or definition validation error + (error_code='INVALID_FLOW_DEFINITION'). """ projects = self.resolve_projects([alias]) project = projects[alias] effective_branch = branch_id or project.active_branch_id + warnings: list[str] = [] client = self._client_factory(project.stack_url, project.token) try: configuration: dict[str, Any] | None = None if phases is not None or tasks is not None: current = client.get_config_detail( - component_id, config_id, branch_id=effective_branch + FLOW_COMPONENT_ID, config_id, branch_id=effective_branch ) current_body = _parse_configuration(current.get("configuration")) merged_phases = phases if phases is not None else current_body.get("phases", []) merged_tasks = tasks if tasks is not None else current_body.get("tasks", []) - if merged_phases: - dag_errors = _validate_dag(merged_phases, merged_tasks) - if dag_errors: - raise KeboolaApiError( - message=f"Flow DAG validation failed: {'; '.join(dag_errors)}", - status_code=400, - error_code=ErrorCode.INVALID_FLOW_DAG, - retryable=False, - ) + schema, schema_reason = self._fetch_flow_schema(project) + if schema is None: + warnings.append(f"structural schema validation skipped: {schema_reason}") + + definition_errors = validate_conditional_flow(merged_phases, merged_tasks, schema) + if definition_errors: + raise KeboolaApiError( + message="Flow definition is invalid: " + "; ".join(definition_errors), + status_code=400, + error_code=ErrorCode.INVALID_FLOW_DEFINITION, + retryable=False, + ) + + warnings.extend( + f"Phase '{pid}' is unreachable from the entry phase" + for pid in find_unreachable_phases(merged_phases) + ) configuration = dict(current_body) configuration["phases"] = merged_phases configuration["tasks"] = merged_tasks result = client.update_config( - component_id=component_id, + component_id=FLOW_COMPONENT_ID, config_id=config_id, name=name, description=description, @@ -416,6 +482,7 @@ def update_flow( result["project_alias"] = alias result["branch_id"] = effective_branch + result["warnings"] = warnings return result # ── delete ────────────────────────────────────────────────────── @@ -423,11 +490,10 @@ def update_flow( def delete_flow( self, alias: str, - component_id: str, config_id: str, branch_id: int | None = None, ) -> dict[str, Any]: - """Delete a flow configuration. + """Delete a conditional-flow (keboola.flow) configuration. Does NOT automatically remove associated keboola.scheduler configs. Use remove_flow_schedule() first if needed. @@ -439,7 +505,7 @@ def delete_flow( client = self._client_factory(project.stack_url, project.token) try: client.delete_config( - component_id=component_id, + component_id=FLOW_COMPONENT_ID, config_id=config_id, branch_id=effective_branch, ) @@ -449,7 +515,7 @@ def delete_flow( return { "status": "deleted", "project_alias": alias, - "component_id": component_id, + "component_id": FLOW_COMPONENT_ID, "config_id": config_id, "branch_id": effective_branch, } @@ -459,14 +525,13 @@ def delete_flow( def list_flow_schedules( self, alias: str, - component_id: str, config_id: str, branch_id: int | None = None, ) -> dict[str, Any]: """List keboola.scheduler configs that target this flow. Fetches all keboola.scheduler configs and filters by - target.componentId + target.configurationId. + target.componentId == keboola.flow + target.configurationId. """ projects = self.resolve_projects([alias]) project = projects[alias] @@ -490,7 +555,7 @@ def list_flow_schedules( for sched in all_sched: body = _parse_configuration(sched.get("configuration")) target = body.get("target") or {} - if target.get("componentId") == component_id and str( + if target.get("componentId") == FLOW_COMPONENT_ID and str( target.get("configurationId", "") ) == str(config_id): sched_info = body.get("schedule") or {} @@ -506,7 +571,7 @@ def list_flow_schedules( return { "project_alias": alias, - "component_id": component_id, + "component_id": FLOW_COMPONENT_ID, "config_id": config_id, "schedules": schedules, } @@ -514,7 +579,6 @@ def list_flow_schedules( def set_flow_schedule( self, alias: str, - component_id: str, config_id: str, cron_tab: str, timezone: str = "UTC", @@ -529,11 +593,10 @@ def set_flow_schedule( duplicate schedules when called repeatedly. The schedule is stored as a keboola.scheduler configuration whose - ``target`` points at the flow component + config. + ``target`` points at the keboola.flow component + config. Args: alias: Project alias. - component_id: Flow component ID. config_id: Flow configuration ID. cron_tab: Cron expression (e.g. '0 6 * * *'). timezone: IANA timezone (default 'UTC'). @@ -550,7 +613,7 @@ def set_flow_schedule( if not schedule_name: try: detail = client.get_config_detail( - component_id, config_id, branch_id=effective_branch + FLOW_COMPONENT_ID, config_id, branch_id=effective_branch ) schedule_name = f"{detail.get('name', config_id)} (Schedule)" except KeboolaApiError: @@ -564,7 +627,7 @@ def set_flow_schedule( }, "target": { "mode": "run", - "componentId": component_id, + "componentId": FLOW_COMPONENT_ID, "configurationId": config_id, }, } @@ -584,7 +647,7 @@ def set_flow_schedule( for sched in existing: body = _parse_configuration(sched.get("configuration")) target = body.get("target") or {} - if target.get("componentId") == component_id and str( + if target.get("componentId") == FLOW_COMPONENT_ID and str( target.get("configurationId", "") ) == str(config_id): existing_id = str(sched.get("id", "")) @@ -615,7 +678,7 @@ def set_flow_schedule( "project_alias": alias, "schedule_id": str(result.get("id", existing_id or "")), "schedule_name": schedule_name, - "component_id": component_id, + "component_id": FLOW_COMPONENT_ID, "config_id": config_id, "cron_tab": cron_tab, "timezone": timezone, @@ -626,7 +689,6 @@ def set_flow_schedule( def remove_flow_schedule( self, alias: str, - component_id: str, config_id: str, branch_id: int | None = None, ) -> dict[str, Any]: @@ -655,7 +717,7 @@ def remove_flow_schedule( for sched in all_sched: body = _parse_configuration(sched.get("configuration")) target = body.get("target") or {} - if target.get("componentId") == component_id and str( + if target.get("componentId") == FLOW_COMPONENT_ID and str( target.get("configurationId", "") ) == str(config_id): sched_id = str(sched.get("id", "")) @@ -680,7 +742,7 @@ def remove_flow_schedule( return { "status": "removed", "project_alias": alias, - "component_id": component_id, + "component_id": FLOW_COMPONENT_ID, "config_id": config_id, "deleted_schedule_ids": deleted, "deleted_count": len(deleted), diff --git a/src/keboola_agent_cli/services/flow_validation.py b/src/keboola_agent_cli/services/flow_validation.py new file mode 100644 index 00000000..81eda845 --- /dev/null +++ b/src/keboola_agent_cli/services/flow_validation.py @@ -0,0 +1,188 @@ +"""Conditional-flow (keboola.flow) validation. + +Pure functions: no HTTP, no ConfigStore -- trivially unit-testable. + +The structural JSON Schema (Draft7) is NOT bundled. It is fetched at runtime +from the stack's component registry (AI Service ``configurationSchema`` for +``keboola.flow``) by the caller and passed into ``validate_conditional_flow`` +as an explicit parameter. When no schema is available (offline, fetch failure, +empty schema) structural validation is skipped and only the semantic checks +run -- this module never reaches the network itself. +""" + +from __future__ import annotations + +from collections import deque +from typing import Any + +import jsonschema + +# Operand arity per operator (semantic; the schema cannot express these counts). +_BINARY_OPERATORS = frozenset( + {"EQUALS", "NOT_EQUALS", "GREATER_THAN", "LESS_THAN", "INCLUDES", "CONTAINS"} +) +_VARIADIC_MIN1_OPERATORS = frozenset({"AND", "OR"}) +_PHASE_SCOPED_OPERATORS = frozenset({"ALL_TASKS_IN_PHASE", "ANY_TASKS_IN_PHASE"}) +_UNARY_FUNCTIONS = frozenset({"COUNT", "DATE"}) + + +def _structural_errors( + phases: list[dict[str, Any]], tasks: list[dict[str, Any]], schema: dict[str, Any] +) -> list[str]: + """Run Draft7 validation against the supplied schema, collecting ALL errors.""" + document = {"phases": phases, "tasks": tasks} + validator = jsonschema.Draft7Validator(schema) + errors: list[str] = [] + for err in sorted(validator.iter_errors(document), key=lambda e: list(e.path)): + path = "/".join(str(p) for p in err.path) or "(root)" + errors.append(f"Schema error at {path}: {err.message}") + return errors + + +def _condition_arity_errors(condition: Any) -> list[str]: + """Recursively check operator/function operand arity.""" + if not isinstance(condition, dict): + return [] + errors: list[str] = [] + ctype = condition.get("type") + operands = condition.get("operands", []) + + if ctype == "operator": + op = condition.get("operator") + if op in _BINARY_OPERATORS and len(operands) != 2: + errors.append(f"Operator '{op}' requires exactly 2 operands, got {len(operands)}") + elif op in _VARIADIC_MIN1_OPERATORS and len(operands) < 1: + errors.append(f"Operator '{op}' requires at least 1 operand, got {len(operands)}") + elif op in _PHASE_SCOPED_OPERATORS and not condition.get("phase"): + errors.append(f"Operator '{op}' requires a 'phase' field") + elif ctype == "function": + fn = condition.get("function") + if fn in _UNARY_FUNCTIONS and len(operands) != 1: + errors.append(f"Function '{fn}' requires exactly 1 operand, got {len(operands)}") + + for child in operands: + errors.extend(_condition_arity_errors(child)) + return errors + + +def _semantic_errors(phases: list[dict[str, Any]], tasks: list[dict[str, Any]]) -> list[str]: + errors: list[str] = [] + + # Unique phase ids + phase_ids: list[str] = [str(p.get("id")) for p in phases] + seen: set[str] = set() + for pid in phase_ids: + if pid in seen: + errors.append(f"Duplicate phase id '{pid}'") + seen.add(pid) + valid_phase_ids = set(phase_ids) + + # Unique task ids + seen_tasks: set[str] = set() + for task in tasks: + tid = str(task.get("id")) + if tid in seen_tasks: + errors.append(f"Duplicate task id '{tid}'") + seen_tasks.add(tid) + + # task.phase references an existing phase + for task in tasks: + ref = str(task.get("phase")) + if ref not in valid_phase_ids: + errors.append(f"Task '{task.get('id', '?')}' references unknown phase '{ref}'") + + # next[].goto is an existing phase id or null + for phase in phases: + for transition in phase.get("next", []): + goto = transition.get("goto") + if goto is not None and str(goto) not in valid_phase_ids: + errors.append( + f"Phase '{phase.get('id', '?')}' transition goto '{goto}' " + f"is not an existing phase id (use null to end the flow)" + ) + + # A phase with conditional transitions must end with a default + # (condition-less) transition. + for phase in phases: + nexts = phase.get("next", []) + if not nexts: + continue + has_conditional = any("condition" in t for t in nexts) + last_is_default = "condition" not in nexts[-1] + if has_conditional and not last_is_default: + errors.append( + f"Phase '{phase.get('id', '?')}' has conditional transitions but " + f"no default (condition-less) transition as the last next[] item" + ) + + # Every phase must have at least one enabled task. + enabled_by_phase: dict[str, int] = {str(p.get("id")): 0 for p in phases} + for task in tasks: + if task.get("enabled", True): + enabled_by_phase[str(task.get("phase"))] = ( + enabled_by_phase.get(str(task.get("phase")), 0) + 1 + ) + for phase in phases: + pid = str(phase.get("id")) + if enabled_by_phase.get(pid, 0) == 0: + errors.append(f"Phase '{pid}' has no enabled task") + + # Condition operand arity (recursive). + for phase in phases: + for transition in phase.get("next", []): + cond = transition.get("condition") + if cond is not None: + errors.extend(_condition_arity_errors(cond)) + + return errors + + +def validate_conditional_flow( + phases: list[dict[str, Any]], + tasks: list[dict[str, Any]], + schema: dict[str, Any] | None = None, +) -> list[str]: + """Validate a conditional-flow body. Returns a flat list of error strings + (empty == valid). Reachability is computed separately as a warning -- call + ``find_unreachable_phases`` for that. NO cycle detection: goto loops are + legal at runtime. + + Structural (Draft7) validation runs ONLY when ``schema`` is supplied (the + schema is fetched at runtime from the stack by the caller -- this module is + pure). When the structure is unsound the structural errors are returned + first and the semantic checks are skipped, to avoid cascade noise from a + malformed document. + + The semantic checks (unique ids, task.phase refs, goto refs, + default-transition rule, enabled-task-per-phase, operator/function arity) + ALWAYS run -- with or without a schema -- because the Storage API does not + validate flow configurations server-side. + """ + if schema: + structural = _structural_errors(phases, tasks, schema) + if structural: + return structural + return _semantic_errors(phases, tasks) + + +def find_unreachable_phases(phases: list[dict[str, Any]]) -> list[str]: + """Return ids of phases not reachable from the entry phase (first in the + list) by following next[].goto edges. WARNING-level only -- never blocks a + write. Returns ids in the order they appear in ``phases``. + """ + if not phases: + return [] + by_id = {str(p.get("id")): p for p in phases} + entry = str(phases[0].get("id")) + reachable: set[str] = set() + queue: deque[str] = deque([entry]) + while queue: + pid = queue.popleft() + if pid in reachable or pid not in by_id: + continue + reachable.add(pid) + for transition in by_id[pid].get("next", []): + goto = transition.get("goto") + if goto is not None: + queue.append(str(goto)) + return [str(p.get("id")) for p in phases if str(p.get("id")) not in reachable] diff --git a/src/keboola_agent_cli/sync/config_format.py b/src/keboola_agent_cli/sync/config_format.py index 92e3bf38..16558c79 100644 --- a/src/keboola_agent_cli/sync/config_format.py +++ b/src/keboola_agent_cli/sync/config_format.py @@ -66,9 +66,6 @@ def _normalize_scripts(parameters: Any) -> Any: "other": "other", } -# Orchestrator-like components that have special handling -ORCHESTRATOR_COMPONENTS: set[str] = {"keboola.orchestrator", "keboola.flow"} - # Row-bearing components whose `configuration` top-level keys do NOT fit the # standard `parameters` / `storage` / `processors` shape. For these, the # non-standard keys (e.g. `values` for variables, `code_content` for shared-code) diff --git a/tests/test_agent_prompt.py b/tests/test_agent_prompt.py index ec6d1742..d31486b4 100644 --- a/tests/test_agent_prompt.py +++ b/tests/test_agent_prompt.py @@ -110,8 +110,8 @@ def test_rule_present(self, agent_body: str, needle: str, why: str) -> None: INLINE_GOTCHAS = [ - # Flow phase behavior stripping (MCP strip + --file full-replace) - ("behavior.onError", "flow phase behavior preservation"), + # Conditional flows: validate-before-push + INVALID_FLOW_DEFINITION (since 0.56.0) + ("INVALID_FLOW_DEFINITION", "conditional-flow validation error code"), # Snowflake transformation scaffolding refusal by MCP create_config ("keboola.snowflake-transformation", "MCP create_config refuses this component"), # Primary keys on new output tables crash first run (nullable default) @@ -143,8 +143,8 @@ def test_gotcha_present(self, agent_body: str, needle: str, why: str) -> None: TOOL_MATRIX_ROWS = [ - "kbagent flow update", - "tool call update_flow", + "kbagent flow validate", + "conditional flow", "kbagent config new --component-id keboola.snowflake-transformation", "kbagent job run", "kbagent config list", diff --git a/tests/test_component_service.py b/tests/test_component_service.py index 1c1443d0..cdc6ce0d 100644 --- a/tests/test_component_service.py +++ b/tests/test_component_service.py @@ -90,8 +90,8 @@ } FLOW_RESPONSE: dict[str, Any] = { - "componentId": "keboola.orchestrator", - "componentName": "Orchestrator", + "componentId": "keboola.flow", + "componentName": "Conditional Flow", "componentType": "other", "componentCategories": [], "componentFlags": [], @@ -463,11 +463,11 @@ def test_scaffold_custom_python_app(self, tmp_config_dir: Path) -> None: assert 'requires-python = ">=3.11"' in toml_file["content"] def test_scaffold_flow(self, tmp_config_dir: Path) -> None: - """Flow/orchestrator generates flow _config.yml with phases.""" + """keboola.flow generates a conditional-flow _config.yml with phases + tasks.""" mock_ai = _make_ai_client(detail_response=FLOW_RESPONSE) service = _make_service(tmp_config_dir, ai_client=mock_ai) - result = service.generate_scaffold(alias="prod", component_id="keboola.orchestrator") + result = service.generate_scaffold(alias="prod", component_id="keboola.flow") files = result["files"] assert len(files) == 1, "Flow should produce exactly 1 file" @@ -475,13 +475,15 @@ def test_scaffold_flow(self, tmp_config_dir: Path) -> None: content = files[0]["content"] assert "phases:" in content, "Flow config must contain phases section" - assert "schedules:" in content, "Flow config must contain schedules section" - assert "depends_on:" in content, "Flow config must contain dependencies" + assert "tasks:" in content, "Flow config must contain tasks section" + assert "goto:" in content, "Flow config must use goto transitions" + assert "dependsOn" not in content, "Conditional flows do not use dependsOn" - # Verify it's valid YAML + # Verify it's valid YAML with string ids parsed = yaml.safe_load(content) - assert parsed["version"] == 2 - assert len(parsed["phases"]) == 2 + assert parsed["phases"][0]["id"] == "phase-1" + assert parsed["tasks"][0]["phase"] == "phase-1" + assert parsed["tasks"][0]["task"]["type"] == "job" def test_scaffold_with_secrets(self, tmp_config_dir: Path) -> None: """Parameters with #password are masked to SECRET_PLACEHOLDER.""" @@ -607,7 +609,7 @@ class TestDetectComponentCategory: ("keboola.bigquery-transformation", "sql_transformation"), ("keboola.python-transformation-v2", "python_transformation"), ("kds-team.app-custom-python", "custom_python"), - ("keboola.orchestrator", "flow"), + ("keboola.orchestrator", "generic"), ("keboola.flow", "flow"), ("keboola.ex-http", "generic"), ("keboola.ex-db-snowflake", "generic"), diff --git a/tests/test_e2e.py b/tests/test_e2e.py index dca322e8..4902a480 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -5167,28 +5167,82 @@ def _run(self, *args: str) -> Any: def _run_ok(self, *args: str) -> dict[str, Any]: return _json_ok(self._run(*args)) + @staticmethod + def _write_cf(tmp_path: Path, name: str = "cf.yaml") -> Path: + """Write a minimal valid conditional-flow (string ids, one job task).""" + body = ( + "phases:\n" + ' - id: "p1"\n' + ' name: "P1"\n' + " next:\n" + ' - id: "n"\n' + " goto: null\n" + "tasks:\n" + ' - id: "t1"\n' + ' name: "T1"\n' + ' phase: "p1"\n' + " enabled: true\n" + " task:\n" + " type: job\n" + ' componentId: "keboola.ex-http"\n' + ' configId: "1"\n' + " mode: run\n" + ) + path = tmp_path / name + path.write_text(body, encoding="utf-8") + return path + def test_flow_crud_and_schedule(self, tmp_path: Path) -> None: - """Full lifecycle: schema → new → list → detail → update → schedule → schedule-remove → delete.""" + """Full lifecycle: schema → validate → new → list → detail → update → + schedule → schedule-remove → delete (conditional flow).""" - _step(1, "flow schema returns YAML template with phases key") + _step(1, "flow schema returns the conditional-flow YAML template") result = self._run("flow", "schema") assert result.exit_code == 0 data = json.loads(result.output) assert "phases" in data["data"]["schema"] + assert "goto" in data["data"]["schema"] + + cf_file = self._write_cf(tmp_path) + + _step(2, "flow validate (semantic-only, no --project) -- structural skipped note") + result = self._run("flow", "validate", "--file", f"@{cf_file}") + assert result.exit_code == 0, result.output + payload = json.loads(result.output)["data"] + assert payload["valid"] is True + assert any("structural schema validation skipped" in n for n in payload.get("notes", [])) + + _step(2.1, "flow validate --project -- fetch live schema, full validation") + result = self._run("flow", "validate", "--file", f"@{cf_file}", "--project", self.alias) + # Skip cleanly if the project has conditional flows disabled. + if result.exit_code != 0 and "conditional" in result.output.lower(): + pytest.skip("Project reports conditional_flows=false; skipping CF E2E") + assert result.exit_code == 0, result.output + assert json.loads(result.output)["data"]["valid"] is True + + _step(2.2, "flow schema --full --project -- live JSON Schema from the stack") + result = self._run("flow", "schema", "--full", "--project", self.alias) + assert result.exit_code == 0, result.output + full = json.loads(result.output)["data"] + assert full["format"] == "json-schema" + assert isinstance(full["schema"], dict) and full["schema"] - _step(2, "flow new -- create a keboola.flow config") + _step(3, "flow new -- create a keboola.flow config") result = self._run( "flow", "new", "--project", self.alias, - "--component-id", - "keboola.flow", "--name", f"{RUN_ID}-flow", "--description", "E2E flow test", + "--file", + f"@{cf_file}", ) + # Skip cleanly if the project has conditional flows disabled. + if result.exit_code != 0 and "conditional" in result.output.lower(): + pytest.skip("Project reports conditional_flows=false; skipping CF E2E") assert result.exit_code == 0, result.output created = json.loads(result.output)["data"] flow_id = created["id"] @@ -5196,37 +5250,35 @@ def test_flow_crud_and_schedule(self, tmp_path: Path) -> None: assert created["project_alias"] == self.alias self._created_flows.append(("keboola.flow", flow_id)) - _step(3, "flow list -- flow appears in listing") + _step(4, "flow list -- flow appears in listing") result = self._run("flow", "list", "--project", self.alias) assert result.exit_code == 0 listing = json.loads(result.output)["data"] ids = {f["config_id"] for f in listing["flows"]} assert flow_id in ids + assert "legacy_orchestrator_count" in listing - _step(4, "flow detail -- returns phase/task counts") + _step(5, "flow detail -- returns phase/task counts") result = self._run( "flow", "detail", "--project", self.alias, - "--component-id", - "keboola.flow", "--flow-id", flow_id, ) assert result.exit_code == 0, result.output detail = json.loads(result.output)["data"] assert detail["id"] == flow_id + assert detail["component_id"] == "keboola.flow" assert "phase_count" in detail - _step(5, "flow update -- rename the flow") + _step(6, "flow update -- rename the flow") result = self._run( "flow", "update", "--project", self.alias, - "--component-id", - "keboola.flow", "--flow-id", flow_id, "--name", @@ -5236,14 +5288,12 @@ def test_flow_crud_and_schedule(self, tmp_path: Path) -> None: updated = json.loads(result.output)["data"] assert updated["id"] == flow_id - _step(6, "flow schedule -- attach a cron schedule") + _step(7, "flow schedule -- attach a cron schedule") result = self._run( "flow", "schedule", "--project", self.alias, - "--component-id", - "keboola.flow", "--flow-id", flow_id, "--cron", @@ -5255,14 +5305,12 @@ def test_flow_crud_and_schedule(self, tmp_path: Path) -> None: assert sched["config_id"] == flow_id assert sched["cron_tab"] == "0 6 * * *" - _step(7, "flow schedule-remove -- remove schedule, idempotent") + _step(8, "flow schedule-remove -- remove schedule, idempotent") result = self._run( "flow", "schedule-remove", "--project", self.alias, - "--component-id", - "keboola.flow", "--flow-id", flow_id, "--yes", @@ -5277,8 +5325,6 @@ def test_flow_crud_and_schedule(self, tmp_path: Path) -> None: "schedule-remove", "--project", self.alias, - "--component-id", - "keboola.flow", "--flow-id", flow_id, "--yes", @@ -5286,14 +5332,12 @@ def test_flow_crud_and_schedule(self, tmp_path: Path) -> None: assert result2.exit_code == 0 assert json.loads(result2.output)["data"]["deleted_count"] == 0 - _step(8, "flow delete -- delete the flow") + _step(9, "flow delete -- delete the flow") result = self._run( "flow", "delete", "--project", self.alias, - "--component-id", - "keboola.flow", "--flow-id", flow_id, "--yes", @@ -5305,284 +5349,44 @@ def test_flow_crud_and_schedule(self, tmp_path: Path) -> None: # Remove from cleanup list since we deleted it self._created_flows.remove(("keboola.flow", flow_id)) - def test_flow_update_preserves_behavior_onerror(self, tmp_path: Path) -> None: - """Verify that ``kbagent flow update`` preserves ``behavior.onError``. - - If any assertion fails, the pilot agent prompt must route flow writes - through the ``kbagent serve`` REST API + direct API instead of - ``kbagent flow update`` as the first choice. - - Covered scenarios: - A. Rename-only update (no ``--file``) must leave behavior intact. - B. ``--file`` update with explicit behavior must propagate the - supplied value (documented pass-through). - C. ``--file`` update where phases omit behavior documents the - actual server response (strip vs default-applied). Printed - diagnostically; not a hard assertion since the strip itself - is expected replace-semantics, not a bug. - """ - import yaml as _yaml - - initial_def = { - "phases": [ - { - "id": 1, - "name": "Phase One", - "dependsOn": [], - "behavior": {"onError": "warning"}, - }, - { - "id": 2, - "name": "Phase Two", - "dependsOn": [1], - "behavior": {"onError": "stop"}, - }, - ], - "tasks": [ - { - "id": 1, - "name": "Phase 1 task", - "phase": 1, - "enabled": True, - "continueOnFailure": False, - "task": { - "mode": "run", - "componentId": "keboola.ex-db-snowflake", - "configId": "nonexistent-placeholder-1", - }, - }, - { - "id": 2, - "name": "Phase 2 task", - "phase": 2, - "enabled": True, - "continueOnFailure": False, - "task": { - "mode": "run", - "componentId": "keboola.ex-db-snowflake", - "configId": "nonexistent-placeholder-2", - }, - }, - ], - } - - initial_yaml = tmp_path / "flow_initial.yaml" - initial_yaml.write_text(_yaml.safe_dump(initial_def)) + def test_flow_validation_rejects_invalid_definition(self, tmp_path: Path) -> None: + """flow new with a task referencing a missing phase must fail with + INVALID_FLOW_DEFINITION (semantic validation, which always runs -- + independent of whether the live schema fetch succeeds).""" + bad = ( + "phases:\n" + ' - id: "p1"\n' + ' name: "P1"\n' + " next:\n" + ' - id: "n"\n' + " goto: null\n" + "tasks:\n" + ' - id: "t1"\n' + ' name: "T1"\n' + ' phase: "ghost"\n' + " enabled: true\n" + " task:\n" + " type: job\n" + ' componentId: "keboola.ex-http"\n' + ' configId: "1"\n' + " mode: run\n" + ) + bad_file = tmp_path / "bad.yaml" + bad_file.write_text(bad, encoding="utf-8") - _step(1, "flow new -- create flow with behavior.onError on both phases") result = self._run( "flow", "new", "--project", self.alias, - "--component-id", - "keboola.flow", "--name", - f"{RUN_ID}-behavior-flow", + f"{RUN_ID}-invalid", "--file", - f"@{initial_yaml}", - ) - assert result.exit_code == 0, result.output - created = json.loads(result.output)["data"] - flow_id = created["id"] - self._created_flows.append(("keboola.flow", flow_id)) - - _step(2, "verify behavior stored correctly on creation") - detail = self._run_ok( - "flow", - "detail", - "--project", - self.alias, - "--component-id", - "keboola.flow", - "--flow-id", - flow_id, + f"@{bad_file}", ) - phases = detail["data"]["phases"] - assert len(phases) == 2, f"Expected 2 phases, got {len(phases)}" - assert phases[0].get("behavior", {}).get("onError") == "warning", ( - f"Create did not store phases[0].behavior.onError correctly. " - f"Got: {phases[0].get('behavior')!r}" - ) - assert phases[1].get("behavior", {}).get("onError") == "stop", ( - f"Create did not store phases[1].behavior.onError correctly. " - f"Got: {phases[1].get('behavior')!r}" - ) - - # --- Scenario A: rename-only update, no --file ----------------- - _step(3, "Scenario A -- rename only (no --file); behavior must survive") - result = self._run( - "flow", - "update", - "--project", - self.alias, - "--component-id", - "keboola.flow", - "--flow-id", - flow_id, - "--name", - f"{RUN_ID}-behavior-flow-renamed", - ) - assert result.exit_code == 0, result.output - after_rename = self._run_ok( - "flow", - "detail", - "--project", - self.alias, - "--component-id", - "keboola.flow", - "--flow-id", - flow_id, - ) - rphases = after_rename["data"]["phases"] - assert rphases[0].get("behavior", {}).get("onError") == "warning", ( - "BLOCKER: rename-only flow update stripped " - f"phases[0].behavior.onError. Expected 'warning', got " - f"{rphases[0].get('behavior')!r}. Plan §6.6 tool matrix must be " - "revised -- 'kbagent flow update' is NOT safe for partial updates." - ) - assert rphases[1].get("behavior", {}).get("onError") == "stop", ( - "BLOCKER: rename-only flow update stripped " - f"phases[1].behavior.onError. Expected 'stop', got " - f"{rphases[1].get('behavior')!r}." - ) - - # --- Scenario B: --file with explicit (changed) behavior ------- - _step(4, "Scenario B -- --file with explicit behavior; pass-through") - v2_def = { - "phases": [ - { - "id": 1, - "name": "Phase One", - "dependsOn": [], - "behavior": {"onError": "stop"}, # flipped - }, - { - "id": 2, - "name": "Phase Two", - "dependsOn": [1], - "behavior": {"onError": "warning"}, # flipped - }, - ], - "tasks": initial_def["tasks"], - } - v2_yaml = tmp_path / "flow_v2.yaml" - v2_yaml.write_text(_yaml.safe_dump(v2_def)) - - result = self._run( - "flow", - "update", - "--project", - self.alias, - "--component-id", - "keboola.flow", - "--flow-id", - flow_id, - "--file", - f"@{v2_yaml}", - ) - assert result.exit_code == 0, result.output - after_v2 = self._run_ok( - "flow", - "detail", - "--project", - self.alias, - "--component-id", - "keboola.flow", - "--flow-id", - flow_id, - ) - v2phases = after_v2["data"]["phases"] - assert v2phases[0].get("behavior", {}).get("onError") == "stop", ( - "--file with explicit behavior did not propagate: " - f"expected 'stop', got {v2phases[0].get('behavior')!r}" - ) - assert v2phases[1].get("behavior", {}).get("onError") == "warning", ( - "--file with explicit behavior did not propagate: " - f"expected 'warning', got {v2phases[1].get('behavior')!r}" - ) - - # --- Scenario C: --file WITHOUT behavior (document actual) ----- - _step(5, "Scenario C -- --file without behavior; document server response") - v3_def = { - "phases": [ - {"id": 1, "name": "Phase One", "dependsOn": []}, - {"id": 2, "name": "Phase Two", "dependsOn": [1]}, - ], - "tasks": initial_def["tasks"], - } - v3_yaml = tmp_path / "flow_v3.yaml" - v3_yaml.write_text(_yaml.safe_dump(v3_def)) - - result = self._run( - "flow", - "update", - "--project", - self.alias, - "--component-id", - "keboola.flow", - "--flow-id", - flow_id, - "--file", - f"@{v3_yaml}", - ) - assert result.exit_code == 0, result.output - after_v3 = self._run_ok( - "flow", - "detail", - "--project", - self.alias, - "--component-id", - "keboola.flow", - "--flow-id", - flow_id, - ) - v3phases = after_v3["data"]["phases"] - assert len(v3phases) == 2 - # Diagnostic: capture what Keboola did with a behavior-less phase - # (either echoes empty dict, fills default, or omits the field entirely) - print( - f"\n [DIAGNOSTIC] --file without behavior -> " - f"phases[0].behavior = {v3phases[0].get('behavior')!r}, " - f"phases[1].behavior = {v3phases[1].get('behavior')!r}" - ) - - def test_flow_dag_validation_rejects_cycle(self) -> None: - """flow new with a cyclic phase dependency must fail with INVALID_FLOW_DAG.""" - cyclic_yaml = ( - "phases:\n" - " - id: 1\n name: A\n dependsOn: [2]\n" - " - id: 2\n name: B\n dependsOn: [1]\n" - "tasks: []\n" - ) - import tempfile - - with tempfile.NamedTemporaryFile( - mode="w", suffix=".yaml", delete=False, encoding="utf-8" - ) as f: - f.write(cyclic_yaml) - yaml_path = f.name - - try: - result = self._run( - "flow", - "new", - "--project", - self.alias, - "--component-id", - "keboola.flow", - "--name", - f"{RUN_ID}-cyclic", - "--file", - f"@{yaml_path}", - ) - assert result.exit_code != 0 - out = json.loads(result.output) - assert out["error"]["code"] == "INVALID_FLOW_DAG" - finally: - import os as _os - - _os.unlink(yaml_path) + assert result.exit_code != 0 + out = json.loads(result.output) + assert out["error"]["code"] == "INVALID_FLOW_DEFINITION" def test_flow_list_no_project_returns_all(self) -> None: """flow list without --project returns flows from all registered projects.""" @@ -5591,26 +5395,29 @@ def test_flow_list_no_project_returns_all(self) -> None: data = json.loads(result.output)["data"] assert "flows" in data assert "errors" in data + assert "legacy_orchestrator_count" in data - def test_flow_list_with_schedules(self) -> None: + def test_flow_list_with_schedules(self, tmp_path: Path) -> None: """flow list --with-schedules enriches rows with schedule metadata. Creates a flow + schedule, verifies the enrichment appears on the - correct flow row (and is empty on other flows), then cleans up. + correct flow row, then cleans up. """ - # Create a flow + cf_file = self._write_cf(tmp_path, name="cf-ws.yaml") result = self._run( "flow", "new", "--project", self.alias, - "--component-id", - "keboola.flow", "--name", f"{RUN_ID}-flow-ws", "--description", "E2E with-schedules test", + "--file", + f"@{cf_file}", ) + if result.exit_code != 0 and "conditional" in result.output.lower(): + pytest.skip("Project reports conditional_flows=false; skipping CF E2E") assert result.exit_code == 0, result.output flow_id = json.loads(result.output)["data"]["id"] self._created_flows.append(("keboola.flow", flow_id)) @@ -5621,8 +5428,6 @@ def test_flow_list_with_schedules(self) -> None: "schedule", "--project", self.alias, - "--component-id", - "keboola.flow", "--flow-id", flow_id, "--cron", @@ -5653,8 +5458,6 @@ def test_flow_list_with_schedules(self) -> None: "schedule-remove", "--project", self.alias, - "--component-id", - "keboola.flow", "--flow-id", flow_id, "--yes", @@ -5703,6 +5506,26 @@ def setup(self, tmp_path: Path) -> Generator[None, None, None]: assert result.exit_code == 0, f"project add failed: {result.output}" # Create a flow + schedule up-front so every test has data to work with. + cf_file = tmp_path / "sched-cf.yaml" + cf_file.write_text( + "phases:\n" + ' - id: "p1"\n' + ' name: "P1"\n' + " next:\n" + ' - id: "n"\n' + " goto: null\n" + "tasks:\n" + ' - id: "t1"\n' + ' name: "T1"\n' + ' phase: "p1"\n' + " enabled: true\n" + " task:\n" + " type: job\n" + ' componentId: "keboola.ex-http"\n' + ' configId: "1"\n' + " mode: run\n", + encoding="utf-8", + ) flow_result = _invoke( self.config_dir, [ @@ -5711,14 +5534,16 @@ def setup(self, tmp_path: Path) -> Generator[None, None, None]: "new", "--project", self.alias, - "--component-id", - "keboola.flow", "--name", f"{RUN_ID}-sched-flow", "--description", "E2E schedule discovery fixture", + "--file", + f"@{cf_file}", ], ) + if flow_result.exit_code != 0 and "conditional" in flow_result.output.lower(): + pytest.skip("Project reports conditional_flows=false; skipping schedule E2E") assert flow_result.exit_code == 0, flow_result.output self.flow_id = json.loads(flow_result.output)["data"]["id"] self._created_flows.append(("keboola.flow", self.flow_id)) @@ -5731,8 +5556,6 @@ def setup(self, tmp_path: Path) -> Generator[None, None, None]: "schedule", "--project", self.alias, - "--component-id", - "keboola.flow", "--flow-id", self.flow_id, "--cron", @@ -5758,8 +5581,6 @@ def setup(self, tmp_path: Path) -> Generator[None, None, None]: "schedule-remove", "--project", self.alias, - "--component-id", - "keboola.flow", "--flow-id", self.flow_id, "--yes", diff --git a/tests/test_flow_cli.py b/tests/test_flow_cli.py index ce449dbe..1cc1025e 100644 --- a/tests/test_flow_cli.py +++ b/tests/test_flow_cli.py @@ -1,8 +1,8 @@ -"""Tests for flow CLI commands via CliRunner. +"""Tests for flow CLI commands via CliRunner (conditional flows). -Tests all flow subcommands: list, detail, schema, new, update, delete, -schedule, schedule-remove. Follows the existing CLI test pattern with -patched services in ctx.obj. +Tests all flow subcommands: list, detail, schema, validate, new, update, +delete, schedule, schedule-remove. Mock-service tests patch FlowService in +ctx.obj; the offline `validate` / `schema` paths exercise the real app. """ from __future__ import annotations @@ -39,17 +39,14 @@ def _setup_config(config_dir: Path, projects: dict[str, dict] | None = None) -> return store -def _run(args: list[str], store: ConfigStore) -> Any: - """Run CLI with the given args and a fresh mock flow_service.""" - mock_flow = MagicMock() +def _invoke(store: ConfigStore, mock_flow: MagicMock, args: list[str]) -> Any: with ( patch("keboola_agent_cli.cli.ConfigStore") as MockStore, patch("keboola_agent_cli.cli.FlowService") as MockFlowService, ): MockStore.return_value = store MockFlowService.return_value = mock_flow - result = runner.invoke(app, args) - return result, mock_flow + return runner.invoke(app, args) # --------------------------------------------------------------------------- @@ -65,7 +62,7 @@ def test_list_json(self, tmp_path: Path) -> None: "flows": [ { "project_alias": "prod", - "component_id": "keboola.orchestrator", + "component_id": "keboola.flow", "config_id": "111", "name": "Daily ETL", "description": "", @@ -73,15 +70,9 @@ def test_list_json(self, tmp_path: Path) -> None: } ], "errors": [], + "legacy_orchestrator_count": 0, } - with ( - patch("keboola_agent_cli.cli.ConfigStore") as MockStore, - patch("keboola_agent_cli.cli.FlowService") as MockFlowService, - ): - MockStore.return_value = store - MockFlowService.return_value = mock_flow - result = runner.invoke(app, ["--json", "flow", "list", "--project", "prod"]) - + result = _invoke(store, mock_flow, ["--json", "flow", "list", "--project", "prod"]) assert result.exit_code == 0, result.output data = json.loads(result.output) assert data["data"]["flows"][0]["config_id"] == "111" @@ -89,32 +80,34 @@ def test_list_json(self, tmp_path: Path) -> None: aliases=["prod"], branch_id=None, with_schedules=False ) - def test_list_empty(self, tmp_path: Path) -> None: + def test_list_legacy_count_warns(self, tmp_path: Path) -> None: store = _setup_config(tmp_path / "cfg", {"prod": {}}) mock_flow = MagicMock() - mock_flow.list_flows.return_value = {"flows": [], "errors": []} - with ( - patch("keboola_agent_cli.cli.ConfigStore") as MockStore, - patch("keboola_agent_cli.cli.FlowService") as MockFlowService, - ): - MockStore.return_value = store - MockFlowService.return_value = mock_flow - result = runner.invoke(app, ["flow", "list"]) + mock_flow.list_flows.return_value = { + "flows": [], + "errors": [], + "legacy_orchestrator_count": 3, + } + result = _invoke(store, mock_flow, ["flow", "list", "--project", "prod"]) + assert result.exit_code == 0, result.output + assert "3 legacy" in result.output and "orchestrator" in result.output + def test_list_empty(self, tmp_path: Path) -> None: + store = _setup_config(tmp_path / "cfg", {"prod": {}}) + mock_flow = MagicMock() + mock_flow.list_flows.return_value = { + "flows": [], + "errors": [], + "legacy_orchestrator_count": 0, + } + result = _invoke(store, mock_flow, ["flow", "list"]) assert result.exit_code == 0 def test_list_config_error(self, tmp_path: Path) -> None: store = _setup_config(tmp_path / "cfg", {"prod": {}}) mock_flow = MagicMock() mock_flow.list_flows.side_effect = ConfigError("No projects") - with ( - patch("keboola_agent_cli.cli.ConfigStore") as MockStore, - patch("keboola_agent_cli.cli.FlowService") as MockFlowService, - ): - MockStore.return_value = store - MockFlowService.return_value = mock_flow - result = runner.invoke(app, ["--json", "flow", "list"]) - + result = _invoke(store, mock_flow, ["--json", "flow", "list"]) assert result.exit_code == 5 def test_list_all_projects_no_project_flag(self, tmp_path: Path) -> None: @@ -124,7 +117,7 @@ def test_list_all_projects_no_project_flag(self, tmp_path: Path) -> None: "flows": [ { "project_alias": "prod", - "component_id": "keboola.orchestrator", + "component_id": "keboola.flow", "config_id": "111", "name": "Flow A", "description": "", @@ -140,19 +133,12 @@ def test_list_all_projects_no_project_flag(self, tmp_path: Path) -> None: }, ], "errors": [], + "legacy_orchestrator_count": 0, } - with ( - patch("keboola_agent_cli.cli.ConfigStore") as MockStore, - patch("keboola_agent_cli.cli.FlowService") as MockFlowService, - ): - MockStore.return_value = store - MockFlowService.return_value = mock_flow - result = runner.invoke(app, ["--json", "flow", "list"]) - + result = _invoke(store, mock_flow, ["--json", "flow", "list"]) assert result.exit_code == 0, result.output data = json.loads(result.output) assert len(data["data"]["flows"]) == 2 - # aliases=None means all projects mock_flow.list_flows.assert_called_once_with( aliases=None, branch_id=None, with_schedules=False ) @@ -160,14 +146,7 @@ def test_list_all_projects_no_project_flag(self, tmp_path: Path) -> None: def test_branch_without_project_fails(self, tmp_path: Path) -> None: store = _setup_config(tmp_path / "cfg", {"prod": {}}) mock_flow = MagicMock() - with ( - patch("keboola_agent_cli.cli.ConfigStore") as MockStore, - patch("keboola_agent_cli.cli.FlowService") as MockFlowService, - ): - MockStore.return_value = store - MockFlowService.return_value = mock_flow - result = runner.invoke(app, ["--json", "flow", "list", "--branch", "42"]) - + result = _invoke(store, mock_flow, ["--json", "flow", "list", "--branch", "42"]) assert result.exit_code == 2 @@ -184,9 +163,17 @@ def _mock_detail(self) -> dict: "description": "", "configuration": {}, "project_alias": "prod", + "component_id": "keboola.flow", "branch_id": None, - "phases": [{"id": 1, "name": "P1", "dependsOn": []}], - "tasks": [{"id": 1, "name": "T1", "phase": 1, "task": {}}], + "phases": [{"id": "p1", "name": "P1", "next": [{"id": "n", "goto": None}]}], + "tasks": [ + { + "id": "t1", + "name": "T1", + "phase": "p1", + "task": {"type": "job", "componentId": "c", "configId": "1", "mode": "run"}, + } + ], "phase_count": 1, "task_count": 1, } @@ -195,48 +182,25 @@ def test_detail_json(self, tmp_path: Path) -> None: store = _setup_config(tmp_path / "cfg", {"prod": {}}) mock_flow = MagicMock() mock_flow.get_flow_detail.return_value = self._mock_detail() - with ( - patch("keboola_agent_cli.cli.ConfigStore") as MockStore, - patch("keboola_agent_cli.cli.FlowService") as MockFlowService, - ): - MockStore.return_value = store - MockFlowService.return_value = mock_flow - result = runner.invoke( - app, ["--json", "flow", "detail", "--project", "prod", "--flow-id", "flow-1"] - ) - + result = _invoke( + store, + mock_flow, + ["--json", "flow", "detail", "--project", "prod", "--flow-id", "flow-1"], + ) assert result.exit_code == 0, result.output data = json.loads(result.output) assert data["data"]["phase_count"] == 1 - def test_detail_explicit_component_id(self, tmp_path: Path) -> None: + def test_detail_human(self, tmp_path: Path) -> None: store = _setup_config(tmp_path / "cfg", {"prod": {}}) mock_flow = MagicMock() mock_flow.get_flow_detail.return_value = self._mock_detail() - with ( - patch("keboola_agent_cli.cli.ConfigStore") as MockStore, - patch("keboola_agent_cli.cli.FlowService") as MockFlowService, - ): - MockStore.return_value = store - MockFlowService.return_value = mock_flow - result = runner.invoke( - app, - [ - "--json", - "flow", - "detail", - "--project", - "prod", - "--flow-id", - "flow-1", - "--component-id", - "keboola.flow", - ], - ) - + result = _invoke( + store, mock_flow, ["flow", "detail", "--project", "prod", "--flow-id", "flow-1"] + ) assert result.exit_code == 0, result.output mock_flow.get_flow_detail.assert_called_once_with( - alias="prod", component_id="keboola.flow", config_id="flow-1", branch_id=None + alias="prod", config_id="flow-1", branch_id=None ) def test_detail_not_found(self, tmp_path: Path) -> None: @@ -245,133 +209,209 @@ def test_detail_not_found(self, tmp_path: Path) -> None: mock_flow.get_flow_detail.side_effect = KeboolaApiError( message="Not found", status_code=404, error_code="NOT_FOUND", retryable=False ) - with ( - patch("keboola_agent_cli.cli.ConfigStore") as MockStore, - patch("keboola_agent_cli.cli.FlowService") as MockFlowService, - ): - MockStore.return_value = store - MockFlowService.return_value = mock_flow - result = runner.invoke( - app, ["--json", "flow", "detail", "--project", "prod", "--flow-id", "bad"] - ) - + result = _invoke( + store, mock_flow, ["--json", "flow", "detail", "--project", "prod", "--flow-id", "bad"] + ) assert result.exit_code == 1 +def test_component_id_flag_removed(tmp_path: Path) -> None: + # --component-id is no longer a recognized option on flow detail + store = _setup_config(tmp_path / "cfg", {"prod": {}}) + mock_flow = MagicMock() + result = _invoke( + store, + mock_flow, + ["flow", "detail", "--project", "prod", "--flow-id", "1", "--component-id", "keboola.flow"], + ) + assert result.exit_code == 2 + assert "No such option" in result.output or "no such option" in result.output.lower() + + # --------------------------------------------------------------------------- # flow schema # --------------------------------------------------------------------------- -class TestFlowSchema: - def test_schema_human(self, tmp_path: Path) -> None: - store = _setup_config(tmp_path / "cfg") - mock_flow = MagicMock() - with ( - patch("keboola_agent_cli.cli.ConfigStore") as MockStore, - patch("keboola_agent_cli.cli.FlowService") as MockFlowService, - ): - MockStore.return_value = store - MockFlowService.return_value = mock_flow - result = runner.invoke(app, ["flow", "schema"]) +_LIVE_SCHEMA = { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["phases", "tasks"], + "properties": { + "phases": {"type": "array"}, + "tasks": { + "type": "array", + "items": { + "type": "object", + "properties": { + "task": { + "type": "object", + "properties": { + "type": {"type": "string", "enum": ["job", "notification", "variable"]} + }, + } + }, + }, + }, + }, +} - assert result.exit_code == 0 - assert "phases" in result.output - def test_schema_json(self, tmp_path: Path) -> None: - store = _setup_config(tmp_path / "cfg") - mock_flow = MagicMock() - with ( - patch("keboola_agent_cli.cli.ConfigStore") as MockStore, - patch("keboola_agent_cli.cli.FlowService") as MockFlowService, - ): - MockStore.return_value = store - MockFlowService.return_value = mock_flow - result = runner.invoke(app, ["--json", "flow", "schema"]) +class TestFlowSchema: + def test_schema_default_is_conditional_template(self) -> None: + result = runner.invoke(app, ["flow", "schema"]) + assert result.exit_code == 0 + assert "next:" in result.output + assert "goto" in result.output + assert "dependsOn" not in result.output + def test_schema_json(self) -> None: + result = runner.invoke(app, ["--json", "flow", "schema"]) assert result.exit_code == 0 data = json.loads(result.output) assert "phases" in data["data"]["schema"] - def test_schema_uses_nested_task_form(self, tmp_path: Path) -> None: - """Schema output must use the nested ``task: {mode, componentId, configId}`` - form that matches the keboola-as-code convention (see flow-workflow.md).""" - store = _setup_config(tmp_path / "cfg") + def test_schema_full_without_project_errors(self, tmp_path: Path) -> None: + store = _setup_config(tmp_path) + result = _invoke(store, MagicMock(), ["flow", "schema", "--full"]) + assert result.exit_code == 2 + assert "--project" in result.output + + def test_schema_full_with_project_dumps_live_schema(self, tmp_path: Path) -> None: + store = _setup_config(tmp_path, {"prod": {}}) mock_flow = MagicMock() - with ( - patch("keboola_agent_cli.cli.ConfigStore") as MockStore, - patch("keboola_agent_cli.cli.FlowService") as MockFlowService, - ): - MockStore.return_value = store - MockFlowService.return_value = mock_flow - result = runner.invoke(app, ["--json", "flow", "schema"]) + mock_flow.fetch_flow_schema.return_value = (_LIVE_SCHEMA, None) + result = _invoke(store, mock_flow, ["flow", "schema", "--full", "--project", "prod"]) + assert result.exit_code == 0 + assert "$schema" in result.output or "draft-07" in result.output + mock_flow.fetch_flow_schema.assert_called_once_with("prod") + def test_schema_full_with_project_json_mode(self, tmp_path: Path) -> None: + store = _setup_config(tmp_path, {"prod": {}}) + mock_flow = MagicMock() + mock_flow.fetch_flow_schema.return_value = (_LIVE_SCHEMA, None) + result = _invoke( + store, mock_flow, ["--json", "flow", "schema", "--full", "--project", "prod"] + ) assert result.exit_code == 0 - data = json.loads(result.output) - schema = data["data"]["schema"] - # Nested form: "task:" introduces a mapping with mode/componentId/configId - assert "task:" in schema - assert "mode: run" in schema - # No flat componentId/configId at task-root level (indented directly under "- id:") - # We check that "componentId:" never appears at the top indent level under tasks - - # in the nested form it's always indented further under "task:". - for line in schema.splitlines(): - stripped = line.lstrip() - if stripped.startswith("componentId:") or stripped.startswith("configId:"): - # Count indent: nested form has 6+ spaces (2 for list, 4 for task dict) - indent = len(line) - len(stripped) - assert indent >= 6, f"Found flat componentId/configId at top level: {line!r}" + payload = json.loads(result.output) + assert payload["data"]["schema"]["required"] == ["phases", "tasks"] + + def test_schema_full_fetch_failure_errors(self, tmp_path: Path) -> None: + store = _setup_config(tmp_path, {"prod": {}}) + mock_flow = MagicMock() + mock_flow.fetch_flow_schema.return_value = (None, "network down") + result = _invoke(store, mock_flow, ["flow", "schema", "--full", "--project", "prod"]) + assert result.exit_code == 4 + assert "network down" in result.output # --------------------------------------------------------------------------- -# flow new +# flow validate # --------------------------------------------------------------------------- -class TestFlowNew: - def test_new_success_json(self, tmp_path: Path) -> None: - store = _setup_config(tmp_path / "cfg", {"prod": {}}) - mock_flow = MagicMock() - mock_flow.create_flow.return_value = { - "id": "new-123", - "name": "My Flow", - "project_alias": "prod", - "branch_id": None, - "phase_count": 0, - "task_count": 0, - } - with ( - patch("keboola_agent_cli.cli.ConfigStore") as MockStore, - patch("keboola_agent_cli.cli.FlowService") as MockFlowService, - ): - MockStore.return_value = store - MockFlowService.return_value = mock_flow - result = runner.invoke( - app, ["--json", "flow", "new", "--project", "prod", "--name", "My Flow"] - ) +_VALID_FLOW_YAML = """ +phases: + - id: "p1" + name: "P1" + next: + - id: "n" + goto: null +tasks: + - id: "t1" + name: "T1" + phase: "p1" + enabled: true + task: + type: job + componentId: "keboola.ex-http" + configId: "1" + mode: run +""" - assert result.exit_code == 0, result.output - data = json.loads(result.output) - assert data["data"]["id"] == "new-123" - def test_new_api_error(self, tmp_path: Path) -> None: - store = _setup_config(tmp_path / "cfg", {"prod": {}}) +class TestFlowValidate: + def test_validate_valid_semantic_only(self, tmp_path: Path) -> None: + f = tmp_path / "flow.yaml" + f.write_text(_VALID_FLOW_YAML) + result = runner.invoke(app, ["flow", "validate", "--file", f"@{f}"]) + assert result.exit_code == 0 + + def test_validate_no_project_notes_structural_skipped(self, tmp_path: Path) -> None: + f = tmp_path / "flow.yaml" + f.write_text(_VALID_FLOW_YAML) + result = runner.invoke(app, ["--json", "flow", "validate", "--file", f"@{f}"]) + assert result.exit_code == 0 + payload = json.loads(result.output) + assert payload["data"]["valid"] is True + assert any("structural schema validation skipped" in n for n in payload["data"]["notes"]) + + def test_validate_no_project_note_human(self, tmp_path: Path) -> None: + f = tmp_path / "flow.yaml" + f.write_text(_VALID_FLOW_YAML) + result = runner.invoke(app, ["flow", "validate", "--file", f"@{f}"]) + assert result.exit_code == 0 + assert "structural schema validation skipped" in result.output + + def test_validate_invalid_exit_2(self, tmp_path: Path) -> None: + bad = _VALID_FLOW_YAML.replace('phase: "p1"', 'phase: "ghost"') + f = tmp_path / "bad.yaml" + f.write_text(bad) + result = runner.invoke(app, ["--json", "flow", "validate", "--file", f"@{f}"]) + assert result.exit_code == 2 + payload = json.loads(result.output) + assert payload["data"]["valid"] is False + assert payload["data"]["errors"] + + def test_validate_json_valid_lists_warnings(self, tmp_path: Path) -> None: + f = tmp_path / "flow.yaml" + f.write_text(_VALID_FLOW_YAML) + result = runner.invoke(app, ["--json", "flow", "validate", "--file", f"@{f}"]) + assert result.exit_code == 0 + payload = json.loads(result.output) + assert payload["data"]["valid"] is True + assert payload["data"]["errors"] == [] + assert "warnings" in payload["data"] + + def test_validate_with_project_full_validation(self, tmp_path: Path) -> None: + # Live schema fetched -> bad task type caught structurally (exit 2). + store = _setup_config(tmp_path, {"prod": {}}) mock_flow = MagicMock() - mock_flow.create_flow.side_effect = KeboolaApiError( - message="Server error", status_code=500, error_code="API_ERROR", retryable=True + mock_flow.fetch_flow_schema.return_value = (_LIVE_SCHEMA, None) + bad = _VALID_FLOW_YAML.replace("type: job", "type: nonsense") + f = tmp_path / "bad.yaml" + f.write_text(bad) + result = _invoke( + store, mock_flow, ["--json", "flow", "validate", "--file", f"@{f}", "--project", "prod"] ) - with ( - patch("keboola_agent_cli.cli.ConfigStore") as MockStore, - patch("keboola_agent_cli.cli.FlowService") as MockFlowService, - ): - MockStore.return_value = store - MockFlowService.return_value = mock_flow - result = runner.invoke( - app, ["--json", "flow", "new", "--project", "prod", "--name", "Bad"] - ) + assert result.exit_code == 2 + payload = json.loads(result.output) + assert payload["data"]["valid"] is False + mock_flow.fetch_flow_schema.assert_called_once_with("prod") + + def test_validate_with_project_fetch_failure_degrades(self, tmp_path: Path) -> None: + # Schema fetch fails -> semantic-only, valid flow still passes + a note. + store = _setup_config(tmp_path, {"prod": {}}) + mock_flow = MagicMock() + mock_flow.fetch_flow_schema.return_value = (None, "network down") + f = tmp_path / "flow.yaml" + f.write_text(_VALID_FLOW_YAML) + result = _invoke( + store, mock_flow, ["--json", "flow", "validate", "--file", f"@{f}", "--project", "prod"] + ) + assert result.exit_code == 0 + payload = json.loads(result.output) + assert payload["data"]["valid"] is True + assert any("network down" in n for n in payload["data"]["notes"]) + + +# --------------------------------------------------------------------------- +# flow new +# --------------------------------------------------------------------------- - assert result.exit_code == 1 +class TestFlowNew: def test_new_from_yaml_file(self, tmp_path: Path) -> None: store = _setup_config(tmp_path / "cfg", {"prod": {}}) mock_flow = MagicMock() @@ -382,63 +422,61 @@ def test_new_from_yaml_file(self, tmp_path: Path) -> None: "branch_id": None, "phase_count": 1, "task_count": 1, + "warnings": [], } flow_yaml = tmp_path / "flow.yaml" - flow_yaml.write_text( - "phases:\n - id: 1\n dependsOn: []\ntasks:\n - id: 1\n phase: 1\n", - encoding="utf-8", + flow_yaml.write_text(_VALID_FLOW_YAML, encoding="utf-8") + result = _invoke( + store, + mock_flow, + [ + "--json", + "flow", + "new", + "--project", + "prod", + "--name", + "YAML Flow", + "--file", + f"@{flow_yaml}", + ], ) - with ( - patch("keboola_agent_cli.cli.ConfigStore") as MockStore, - patch("keboola_agent_cli.cli.FlowService") as MockFlowService, - ): - MockStore.return_value = store - MockFlowService.return_value = mock_flow - result = runner.invoke( - app, - [ - "--json", - "flow", - "new", - "--project", - "prod", - "--name", - "YAML Flow", - "--file", - f"@{flow_yaml}", - ], - ) - assert result.exit_code == 0, result.output call_kwargs = mock_flow.create_flow.call_args.kwargs assert len(call_kwargs["phases"]) == 1 + assert "component_id" not in call_kwargs + + def test_new_api_error(self, tmp_path: Path) -> None: + store = _setup_config(tmp_path / "cfg", {"prod": {}}) + mock_flow = MagicMock() + mock_flow.create_flow.side_effect = KeboolaApiError( + message="Server error", status_code=500, error_code="API_ERROR", retryable=True + ) + result = _invoke( + store, mock_flow, ["--json", "flow", "new", "--project", "prod", "--name", "Bad"] + ) + assert result.exit_code == 1 def test_new_invalid_yaml_type_exits_2(self, tmp_path: Path) -> None: store = _setup_config(tmp_path / "cfg", {"prod": {}}) mock_flow = MagicMock() bad_yaml = tmp_path / "bad.yaml" bad_yaml.write_text("- just a list\n- not a mapping\n", encoding="utf-8") - with ( - patch("keboola_agent_cli.cli.ConfigStore") as MockStore, - patch("keboola_agent_cli.cli.FlowService") as MockFlowService, - ): - MockStore.return_value = store - MockFlowService.return_value = mock_flow - result = runner.invoke( - app, - [ - "--json", - "flow", - "new", - "--project", - "prod", - "--name", - "Bad", - "--file", - f"@{bad_yaml}", - ], - ) - + result = _invoke( + store, + mock_flow, + [ + "--json", + "flow", + "new", + "--project", + "prod", + "--name", + "Bad", + "--file", + f"@{bad_yaml}", + ], + ) assert result.exit_code == 2 mock_flow.create_flow.assert_not_called() @@ -458,42 +496,30 @@ def test_update_name(self, tmp_path: Path) -> None: "project_alias": "prod", "branch_id": None, } - with ( - patch("keboola_agent_cli.cli.ConfigStore") as MockStore, - patch("keboola_agent_cli.cli.FlowService") as MockFlowService, - ): - MockStore.return_value = store - MockFlowService.return_value = mock_flow - result = runner.invoke( - app, - [ - "--json", - "flow", - "update", - "--project", - "prod", - "--flow-id", - "1", - "--name", - "New Name", - ], - ) - + result = _invoke( + store, + mock_flow, + [ + "--json", + "flow", + "update", + "--project", + "prod", + "--flow-id", + "1", + "--name", + "New Name", + ], + ) assert result.exit_code == 0, result.output + assert "component_id" not in mock_flow.update_flow.call_args.kwargs def test_update_without_anything_fails(self, tmp_path: Path) -> None: store = _setup_config(tmp_path / "cfg", {"prod": {}}) mock_flow = MagicMock() - with ( - patch("keboola_agent_cli.cli.ConfigStore") as MockStore, - patch("keboola_agent_cli.cli.FlowService") as MockFlowService, - ): - MockStore.return_value = store - MockFlowService.return_value = mock_flow - result = runner.invoke( - app, ["--json", "flow", "update", "--project", "prod", "--flow-id", "1"] - ) - + result = _invoke( + store, mock_flow, ["--json", "flow", "update", "--project", "prod", "--flow-id", "1"] + ) assert result.exit_code == 2 @@ -509,61 +535,32 @@ def test_delete_with_yes(self, tmp_path: Path) -> None: mock_flow.delete_flow.return_value = { "status": "deleted", "project_alias": "prod", - "component_id": "keboola.orchestrator", + "component_id": "keboola.flow", "config_id": "1", "branch_id": None, } - with ( - patch("keboola_agent_cli.cli.ConfigStore") as MockStore, - patch("keboola_agent_cli.cli.FlowService") as MockFlowService, - ): - MockStore.return_value = store - MockFlowService.return_value = mock_flow - result = runner.invoke( - app, - [ - "--json", - "flow", - "delete", - "--project", - "prod", - "--flow-id", - "1", - "--yes", - ], - ) - + result = _invoke( + store, + mock_flow, + ["--json", "flow", "delete", "--project", "prod", "--flow-id", "1", "--yes"], + ) assert result.exit_code == 0, result.output data = json.loads(result.output) assert data["data"]["status"] == "deleted" + assert "component_id" not in mock_flow.delete_flow.call_args.kwargs def test_delete_dry_run_does_not_call_service(self, tmp_path: Path) -> None: store = _setup_config(tmp_path / "cfg", {"prod": {}}) mock_flow = MagicMock() - with ( - patch("keboola_agent_cli.cli.ConfigStore") as MockStore, - patch("keboola_agent_cli.cli.FlowService") as MockFlowService, - ): - MockStore.return_value = store - MockFlowService.return_value = mock_flow - result = runner.invoke( - app, - [ - "--json", - "flow", - "delete", - "--project", - "prod", - "--flow-id", - "1", - "--dry-run", - ], - ) - + result = _invoke( + store, + mock_flow, + ["--json", "flow", "delete", "--project", "prod", "--flow-id", "1", "--dry-run"], + ) assert result.exit_code == 0, result.output data = json.loads(result.output) assert data["data"]["would_delete"]["config_id"] == "1" - assert data["data"]["would_delete"]["component_id"] == "keboola.orchestrator" + assert data["data"]["would_delete"]["component_id"] == "keboola.flow" mock_flow.delete_flow.assert_not_called() @@ -581,38 +578,32 @@ def test_schedule_success_json(self, tmp_path: Path) -> None: "project_alias": "prod", "schedule_id": "sched-99", "schedule_name": "Daily Run (Schedule)", - "component_id": "keboola.orchestrator", + "component_id": "keboola.flow", "config_id": "flow-1", "cron_tab": "0 6 * * *", "timezone": "UTC", "state": "enabled", "branch_id": None, } - with ( - patch("keboola_agent_cli.cli.ConfigStore") as MockStore, - patch("keboola_agent_cli.cli.FlowService") as MockFlowService, - ): - MockStore.return_value = store - MockFlowService.return_value = mock_flow - result = runner.invoke( - app, - [ - "--json", - "flow", - "schedule", - "--project", - "prod", - "--flow-id", - "flow-1", - "--cron", - "0 6 * * *", - ], - ) - + result = _invoke( + store, + mock_flow, + [ + "--json", + "flow", + "schedule", + "--project", + "prod", + "--flow-id", + "flow-1", + "--cron", + "0 6 * * *", + ], + ) assert result.exit_code == 0, result.output data = json.loads(result.output) assert data["data"]["schedule_id"] == "sched-99" - mock_flow.set_flow_schedule.assert_called_once() + assert "component_id" not in mock_flow.set_flow_schedule.call_args.kwargs def test_schedule_with_timezone_and_disabled(self, tmp_path: Path) -> None: store = _setup_config(tmp_path / "cfg", {"prod": {}}) @@ -622,37 +613,31 @@ def test_schedule_with_timezone_and_disabled(self, tmp_path: Path) -> None: "project_alias": "prod", "schedule_id": "sched-tz", "schedule_name": "Flow (Schedule)", - "component_id": "keboola.orchestrator", + "component_id": "keboola.flow", "config_id": "flow-1", "cron_tab": "0 8 * * 1-5", "timezone": "Europe/Prague", "state": "disabled", "branch_id": None, } - with ( - patch("keboola_agent_cli.cli.ConfigStore") as MockStore, - patch("keboola_agent_cli.cli.FlowService") as MockFlowService, - ): - MockStore.return_value = store - MockFlowService.return_value = mock_flow - result = runner.invoke( - app, - [ - "--json", - "flow", - "schedule", - "--project", - "prod", - "--flow-id", - "flow-1", - "--cron", - "0 8 * * 1-5", - "--timezone", - "Europe/Prague", - "--disabled", - ], - ) - + result = _invoke( + store, + mock_flow, + [ + "--json", + "flow", + "schedule", + "--project", + "prod", + "--flow-id", + "flow-1", + "--cron", + "0 8 * * 1-5", + "--timezone", + "Europe/Prague", + "--disabled", + ], + ) assert result.exit_code == 0, result.output call_kwargs = mock_flow.set_flow_schedule.call_args.kwargs assert call_kwargs["timezone"] == "Europe/Prague" @@ -671,42 +656,37 @@ def test_remove_with_yes(self, tmp_path: Path) -> None: mock_flow.remove_flow_schedule.return_value = { "status": "removed", "project_alias": "prod", - "component_id": "keboola.orchestrator", + "component_id": "keboola.flow", "config_id": "flow-1", "deleted_schedule_ids": ["sched-1"], "deleted_count": 1, "branch_id": None, } - with ( - patch("keboola_agent_cli.cli.ConfigStore") as MockStore, - patch("keboola_agent_cli.cli.FlowService") as MockFlowService, - ): - MockStore.return_value = store - MockFlowService.return_value = mock_flow - result = runner.invoke( - app, - [ - "--json", - "flow", - "schedule-remove", - "--project", - "prod", - "--flow-id", - "flow-1", - "--yes", - ], - ) - + result = _invoke( + store, + mock_flow, + [ + "--json", + "flow", + "schedule-remove", + "--project", + "prod", + "--flow-id", + "flow-1", + "--yes", + ], + ) assert result.exit_code == 0, result.output data = json.loads(result.output) assert data["data"]["deleted_count"] == 1 + assert "component_id" not in mock_flow.remove_flow_schedule.call_args.kwargs def test_schedule_remove_dry_run_lists_schedules(self, tmp_path: Path) -> None: store = _setup_config(tmp_path / "cfg", {"prod": {}}) mock_flow = MagicMock() mock_flow.list_flow_schedules.return_value = { "project_alias": "prod", - "component_id": "keboola.orchestrator", + "component_id": "keboola.flow", "config_id": "flow-1", "schedules": [ { @@ -718,26 +698,20 @@ def test_schedule_remove_dry_run_lists_schedules(self, tmp_path: Path) -> None: } ], } - with ( - patch("keboola_agent_cli.cli.ConfigStore") as MockStore, - patch("keboola_agent_cli.cli.FlowService") as MockFlowService, - ): - MockStore.return_value = store - MockFlowService.return_value = mock_flow - result = runner.invoke( - app, - [ - "--json", - "flow", - "schedule-remove", - "--project", - "prod", - "--flow-id", - "flow-1", - "--dry-run", - ], - ) - + result = _invoke( + store, + mock_flow, + [ + "--json", + "flow", + "schedule-remove", + "--project", + "prod", + "--flow-id", + "flow-1", + "--dry-run", + ], + ) assert result.exit_code == 0, result.output data = json.loads(result.output) assert data["data"]["would_delete"]["count"] == 1 @@ -749,31 +723,86 @@ def test_schedule_remove_dry_run_no_schedules(self, tmp_path: Path) -> None: mock_flow = MagicMock() mock_flow.list_flow_schedules.return_value = { "project_alias": "prod", - "component_id": "keboola.orchestrator", + "component_id": "keboola.flow", "config_id": "flow-1", "schedules": [], } - with ( - patch("keboola_agent_cli.cli.ConfigStore") as MockStore, - patch("keboola_agent_cli.cli.FlowService") as MockFlowService, - ): - MockStore.return_value = store - MockFlowService.return_value = mock_flow - result = runner.invoke( - app, - [ - "--json", - "flow", - "schedule-remove", - "--project", - "prod", - "--flow-id", - "flow-1", - "--dry-run", - ], - ) - + result = _invoke( + store, + mock_flow, + [ + "--json", + "flow", + "schedule-remove", + "--project", + "prod", + "--flow-id", + "flow-1", + "--dry-run", + ], + ) assert result.exit_code == 0, result.output data = json.loads(result.output) assert data["data"]["would_delete"]["count"] == 0 - mock_flow.remove_flow_schedule.assert_not_called() + + +# --------------------------------------------------------------------------- +# detail rendering (pure formatter unit) +# --------------------------------------------------------------------------- + + +def test_format_flow_detail_renders_transitions_and_badges(capsys) -> None: + from keboola_agent_cli.commands.flow import _format_flow_detail + from keboola_agent_cli.output import OutputFormatter + + formatter = OutputFormatter(json_mode=False) + detail = { + "name": "My CF", + "id": "100", + "phases": [ + { + "id": "p1", + "name": "Extract", + "next": [ + { + "id": "c", + "goto": "p2", + "condition": { + "type": "operator", + "operator": "ANY_TASKS_IN_PHASE", + "phase": "p1", + "operands": [], + }, + }, + {"id": "d", "goto": None}, + ], + }, + {"id": "p2", "name": "Transform"}, + ], + "tasks": [ + { + "id": "t1", + "name": "Run", + "phase": "p1", + "enabled": True, + "task": { + "type": "job", + "componentId": "keboola.ex-http", + "configId": "9", + "mode": "run", + }, + }, + { + "id": "t2", + "name": "Notify", + "phase": "p2", + "task": {"type": "notification", "title": "x", "recipients": []}, + }, + ], + } + _format_flow_detail(formatter, detail) + out = capsys.readouterr().out + assert "Extract" in out and "Transform" in out + assert "→" in out # transition arrow + assert "default" in out.lower() # condition-less transition labeled + assert "job" in out and "notification" in out # task type badges diff --git a/tests/test_flow_service.py b/tests/test_flow_service.py index a108afd7..d5909372 100644 --- a/tests/test_flow_service.py +++ b/tests/test_flow_service.py @@ -1,7 +1,4 @@ -"""Unit tests for FlowService. - -Tests business logic in isolation using mocked KeboolaClient. -""" +"""Unit tests for FlowService (conditional flows only).""" from __future__ import annotations @@ -9,12 +6,12 @@ import pytest -from keboola_agent_cli.errors import KeboolaApiError +from keboola_agent_cli.errors import ErrorCode, KeboolaApiError from keboola_agent_cli.services.flow_service import ( + FLOW_COMPONENT_ID, FlowService, _count_phases_tasks, _parse_configuration, - _validate_dag, ) # --------------------------------------------------------------------------- @@ -35,11 +32,76 @@ def _mock_config_store(projects: dict) -> MagicMock: return cs -def _make_flow_service(mock_client: MagicMock, projects: dict | None = None) -> FlowService: +# A minimal keboola.flow configurationSchema for tests -- enough to exercise the +# structural-validation path without touching the network. +_FLOW_SCHEMA: dict = { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["phases", "tasks"], + "properties": { + "phases": {"type": "array"}, + "tasks": { + "type": "array", + "items": { + "type": "object", + "properties": { + "task": { + "type": "object", + "properties": { + "type": {"type": "string", "enum": ["job", "notification", "variable"]} + }, + } + }, + }, + }, + }, +} + + +def _make_ai_client(schema: dict | None = _FLOW_SCHEMA, raise_exc: Exception | None = None): + """Build a mock AiServiceClient returning a keboola.flow component detail.""" + ai = MagicMock() + if raise_exc is not None: + ai.get_component_detail.side_effect = raise_exc + else: + ai.get_component_detail.return_value = { + "componentId": FLOW_COMPONENT_ID, + "componentName": "Conditional Flow", + "componentType": "other", + "configurationSchema": schema or {}, + } + return ai + + +def _make_flow_service( + mock_client: MagicMock, + projects: dict | None = None, + ai_client: MagicMock | None = None, +) -> FlowService: if projects is None: projects = {"prod": {"url": "https://connection.keboola.com", "token": "tok"}} cs = _mock_config_store(projects) - return FlowService(config_store=cs, client_factory=lambda url, tok: mock_client) + ai = ai_client if ai_client is not None else _make_ai_client() + return FlowService( + config_store=cs, + client_factory=lambda url, token: mock_client, + ai_client_factory=lambda url, token: ai, + ) + + +def _valid_body(): + phases = [ + {"id": "p1", "name": "P1", "next": [{"id": "n", "goto": None}]}, + ] + tasks = [ + { + "id": "t1", + "name": "T1", + "phase": "p1", + "task": {"type": "job", "componentId": "c", "configId": "1", "mode": "run"}, + }, + ] + return phases, tasks # --------------------------------------------------------------------------- @@ -64,454 +126,306 @@ def test_none_returns_empty(self): class TestCountPhasesTasks: def test_counts(self): - body = {"phases": [{"id": 1}, {"id": 2}], "tasks": [{"id": 1}]} + body = {"phases": [{"id": "a"}, {"id": "b"}], "tasks": [{"id": "1"}]} assert _count_phases_tasks(body) == (2, 1) def test_empty(self): assert _count_phases_tasks({}) == (0, 0) -class TestValidateDag: - def test_valid_linear(self): - phases = [ - {"id": 1, "dependsOn": []}, - {"id": 2, "dependsOn": [1]}, - ] - tasks = [{"id": 1, "phase": 1}, {"id": 2, "phase": 2}] - assert _validate_dag(phases, tasks) == [] - - def test_empty_phases(self): - assert _validate_dag([], []) == [] - - def test_unknown_phase_dependency(self): - phases = [{"id": 1, "dependsOn": [99]}] - errors = _validate_dag(phases, []) - assert any("unknown phase" in e for e in errors) - - def test_task_references_unknown_phase(self): - phases = [{"id": 1, "dependsOn": []}] - tasks = [{"id": 1, "phase": 99}] - errors = _validate_dag(phases, tasks) - assert any("unknown phase" in e for e in errors) - - def test_cycle_detected(self): - phases = [ - {"id": 1, "dependsOn": [2]}, - {"id": 2, "dependsOn": [1]}, - ] - errors = _validate_dag(phases, []) - assert any("cycle" in e for e in errors) - - def test_diamond_dag_valid(self): - phases = [ - {"id": 1, "dependsOn": []}, - {"id": 2, "dependsOn": [1]}, - {"id": 3, "dependsOn": [1]}, - {"id": 4, "dependsOn": [2, 3]}, - ] - assert _validate_dag(phases, []) == [] - - # --------------------------------------------------------------------------- -# FlowService.list_flows +# Component constant # --------------------------------------------------------------------------- -class TestListFlows: - def test_aggregates_both_component_ids(self): - client = MagicMock() - client.list_component_configs.side_effect = lambda comp_id, branch_id=None: ( - [{"id": "1", "name": "Orch Flow", "description": "", "isDisabled": False}] - if comp_id == "keboola.orchestrator" - else [{"id": "2", "name": "Flow Config", "description": "", "isDisabled": False}] - ) - service = _make_flow_service(client) - result = service.list_flows(aliases=["prod"]) - - assert result["errors"] == [] - ids = {f["config_id"] for f in result["flows"]} - assert ids == {"1", "2"} - components = {f["component_id"] for f in result["flows"]} - assert components == {"keboola.orchestrator", "keboola.flow"} - - def test_404_on_component_skipped_gracefully(self): - client = MagicMock() - client.list_component_configs.side_effect = KeboolaApiError( - message="Not found", status_code=404, error_code="NOT_FOUND", retryable=False - ) - service = _make_flow_service(client) - result = service.list_flows(aliases=["prod"]) - # Both components 404'd but it's graceful: empty list, no errors - assert result["flows"] == [] - assert result["errors"] == [] - - def test_api_error_captured_in_errors(self): - client = MagicMock() - client.list_component_configs.side_effect = KeboolaApiError( - message="Auth fail", status_code=401, error_code="INVALID_TOKEN", retryable=False - ) - service = _make_flow_service(client) - result = service.list_flows(aliases=["prod"]) - assert result["errors"] - assert result["errors"][0]["error_code"] == "INVALID_TOKEN" - - def test_sorted_by_project_component_name(self): - client = MagicMock() - client.list_component_configs.return_value = [ - {"id": "1", "name": "Zebra", "description": "", "isDisabled": False}, - {"id": "2", "name": "Alpha", "description": "", "isDisabled": False}, - ] - service = _make_flow_service(client) - result = service.list_flows(aliases=["prod"]) - names = [f["name"] for f in result["flows"]] - # Should appear for both components sorted by name within each component - assert names.index("Alpha") < names.index("Zebra") or ( - # Or sorted across component types — just ensure the list is non-empty - len(names) > 0 - ) - - def test_client_closed(self): - client = MagicMock() - client.list_component_configs.return_value = [] - service = _make_flow_service(client) - service.list_flows(aliases=["prod"]) - client.close.assert_called() +def test_component_id_constant(): + assert FLOW_COMPONENT_ID == "keboola.flow" # --------------------------------------------------------------------------- -# FlowService.get_flow_detail +# create_flow # --------------------------------------------------------------------------- -class TestGetFlowDetail: - def test_returns_phases_and_tasks(self): - client = MagicMock() - client.get_config_detail.return_value = { - "id": "123", - "name": "My Flow", - "description": "", - "configuration": { - "phases": [{"id": 1, "name": "Phase 1", "dependsOn": []}], - "tasks": [{"id": 1, "name": "Task 1", "phase": 1}], - }, +def test_create_flow_rejects_invalid_definition(): + client = MagicMock() + svc = _make_flow_service(client) + # task references a phase that does not exist -> semantic error + phases = [{"id": "p1", "name": "P1", "next": [{"id": "n", "goto": None}]}] + tasks = [ + { + "id": "t1", + "name": "T1", + "phase": "ghost", + "task": {"type": "job", "componentId": "c", "configId": "1", "mode": "run"}, } - service = _make_flow_service(client) - result = service.get_flow_detail("prod", "keboola.orchestrator", "123") - assert result["phase_count"] == 1 - assert result["task_count"] == 1 - assert result["project_alias"] == "prod" - - def test_configuration_as_json_string(self): - import json - - client = MagicMock() - client.get_config_detail.return_value = { - "id": "123", - "name": "My Flow", - "configuration": json.dumps({"phases": [{"id": 1}], "tasks": []}), + ] + with pytest.raises(KeboolaApiError) as exc: + svc.create_flow(alias="prod", name="F", phases=phases, tasks=tasks) + assert exc.value.error_code == ErrorCode.INVALID_FLOW_DEFINITION + + +def test_create_flow_uses_keboola_flow_component(): + client = MagicMock() + client.create_config.return_value = {"id": "999", "name": "F"} + svc = _make_flow_service(client) + phases, tasks = _valid_body() + result = svc.create_flow(alias="prod", name="F", phases=phases, tasks=tasks) + assert client.create_config.call_args.kwargs["component_id"] == "keboola.flow" + assert result["id"] == "999" + + +def test_create_flow_attaches_unreachable_warnings(): + client = MagicMock() + client.create_config.return_value = {"id": "999", "name": "F"} + svc = _make_flow_service(client) + phases = [ + {"id": "p1", "name": "P1", "next": [{"id": "n", "goto": None}]}, + {"id": "island", "name": "Island"}, # unreachable + ] + tasks = [ + { + "id": "t1", + "name": "T1", + "phase": "p1", + "task": {"type": "job", "componentId": "c", "configId": "1", "mode": "run"}, + }, + { + "id": "t2", + "name": "T2", + "phase": "island", + "task": {"type": "job", "componentId": "c", "configId": "2", "mode": "run"}, + }, + ] + result = svc.create_flow(alias="prod", name="F", phases=phases, tasks=tasks) + assert any("island" in w for w in result["warnings"]) + + +def test_create_flow_full_validation_rejects_bad_structure(): + # Live schema present -> structural validation catches the bad task type. + client = MagicMock() + client.create_config.return_value = {"id": "999", "name": "F"} + svc = _make_flow_service(client) + phases = [{"id": "p1", "name": "P1", "next": [{"id": "n", "goto": None}]}] + tasks = [ + { + "id": "t1", + "name": "T1", + "phase": "p1", + "task": {"type": "nonsense", "componentId": "c", "configId": "1", "mode": "run"}, } - service = _make_flow_service(client) - result = service.get_flow_detail("prod", "keboola.orchestrator", "123") - assert result["phase_count"] == 1 + ] + with pytest.raises(KeboolaApiError) as exc: + svc.create_flow(alias="prod", name="F", phases=phases, tasks=tasks) + assert exc.value.error_code == ErrorCode.INVALID_FLOW_DEFINITION + client.create_config.assert_not_called() + + +def test_create_flow_schema_fetch_failure_degrades_to_semantic_only(): + # AI Service raises -> structural validation skipped, semantic checks run, + # write proceeds with a warning. The bad task type slips through (no schema). + client = MagicMock() + client.create_config.return_value = {"id": "999", "name": "F"} + ai = _make_ai_client( + raise_exc=KeboolaApiError("boom", status_code=500, error_code="NETWORK_ERROR") + ) + svc = _make_flow_service(client, ai_client=ai) + phases = [{"id": "p1", "name": "P1", "next": [{"id": "n", "goto": None}]}] + tasks = [ + { + "id": "t1", + "name": "T1", + "phase": "p1", + "task": {"type": "nonsense", "componentId": "c", "configId": "1", "mode": "run"}, + } + ] + result = svc.create_flow(alias="prod", name="F", phases=phases, tasks=tasks) + assert result["id"] == "999" + assert any("structural schema validation skipped" in w for w in result["warnings"]) + + +def test_create_flow_empty_schema_degrades_to_semantic_only(): + client = MagicMock() + client.create_config.return_value = {"id": "999", "name": "F"} + ai = _make_ai_client(schema={}) # empty configurationSchema + svc = _make_flow_service(client, ai_client=ai) + phases, tasks = _valid_body() + result = svc.create_flow(alias="prod", name="F", phases=phases, tasks=tasks) + assert result["id"] == "999" + assert any("structural schema validation skipped" in w for w in result["warnings"]) + + +def test_create_flow_fetch_failure_still_rejects_semantic_errors(): + # Even without a schema, a semantic error must still reject the write. + client = MagicMock() + ai = _make_ai_client(raise_exc=RuntimeError("network down")) + svc = _make_flow_service(client, ai_client=ai) + phases = [{"id": "p1", "name": "P1", "next": [{"id": "n", "goto": None}]}] + tasks = [ + { + "id": "t1", + "name": "T1", + "phase": "ghost", # semantic error + "task": {"type": "job", "componentId": "c", "configId": "1", "mode": "run"}, + } + ] + with pytest.raises(KeboolaApiError) as exc: + svc.create_flow(alias="prod", name="F", phases=phases, tasks=tasks) + assert exc.value.error_code == ErrorCode.INVALID_FLOW_DEFINITION - def test_empty_configuration(self): - client = MagicMock() - client.get_config_detail.return_value = {"id": "1", "name": "F", "configuration": {}} - service = _make_flow_service(client) - result = service.get_flow_detail("prod", "keboola.orchestrator", "1") - assert result["phase_count"] == 0 - assert result["task_count"] == 0 +def test_fetch_flow_schema_success(): + svc = _make_flow_service(MagicMock()) + schema, reason = svc.fetch_flow_schema("prod") + assert reason is None + assert schema and schema["required"] == ["phases", "tasks"] -# --------------------------------------------------------------------------- -# FlowService.create_flow -# --------------------------------------------------------------------------- + +def test_fetch_flow_schema_empty_returns_reason(): + svc = _make_flow_service(MagicMock(), ai_client=_make_ai_client(schema={})) + schema, reason = svc.fetch_flow_schema("prod") + assert schema is None + assert reason and "configurationSchema" in reason -class TestCreateFlow: - def test_create_success(self): - client = MagicMock() - client.create_config.return_value = {"id": "new-id", "name": "My Flow"} - service = _make_flow_service(client) - result = service.create_flow("prod", "keboola.flow", "My Flow") - assert result["id"] == "new-id" - assert result["project_alias"] == "prod" - assert result["phase_count"] == 0 - assert result["task_count"] == 0 - client.create_config.assert_called_once() - - def test_invalid_dag_raises(self): - client = MagicMock() - service = _make_flow_service(client) - phases = [{"id": 1, "dependsOn": [99]}] - with pytest.raises(KeboolaApiError) as exc_info: - service.create_flow("prod", "keboola.flow", "Bad", phases=phases, tasks=[]) - assert exc_info.value.error_code == "INVALID_FLOW_DAG" - client.create_config.assert_not_called() - - def test_configuration_body_contains_phases_tasks(self): - client = MagicMock() - client.create_config.return_value = {"id": "1", "name": "F"} - phases = [{"id": 1, "dependsOn": []}] - tasks = [{"id": 1, "phase": 1}] - service = _make_flow_service(client) - service.create_flow("prod", "keboola.flow", "F", phases=phases, tasks=tasks) - call_kwargs = client.create_config.call_args - assert call_kwargs.kwargs["configuration"]["phases"] == phases - assert call_kwargs.kwargs["configuration"]["tasks"] == tasks +def test_fetch_flow_schema_error_returns_reason(): + ai = _make_ai_client(raise_exc=KeboolaApiError("nope", status_code=404, error_code="NOT_FOUND")) + svc = _make_flow_service(MagicMock(), ai_client=ai) + schema, reason = svc.fetch_flow_schema("prod") + assert schema is None + assert reason == "nope" # --------------------------------------------------------------------------- -# FlowService.update_flow +# update_flow (merge-aware validation) # --------------------------------------------------------------------------- -class TestUpdateFlow: - def test_update_name_only(self): - client = MagicMock() - client.update_config.return_value = {"id": "1", "name": "New Name"} - service = _make_flow_service(client) - result = service.update_flow("prod", "keboola.orchestrator", "1", name="New Name") - assert result["id"] == "1" - client.get_config_detail.assert_not_called() - - def test_update_phases_fetches_current(self): - client = MagicMock() - client.get_config_detail.return_value = { - "configuration": {"phases": [], "tasks": []}, +def test_update_flow_validates_merged_body(): + client = MagicMock() + # Current remote body has valid phases; update supplies only tasks that break it. + client.get_config_detail.return_value = { + "configuration": { + "phases": [{"id": "p1", "name": "P1", "next": [{"id": "n", "goto": None}]}], + "tasks": [], } - client.update_config.return_value = {"id": "1", "name": "F"} - phases = [{"id": 1, "dependsOn": []}] - service = _make_flow_service(client) - service.update_flow("prod", "keboola.orchestrator", "1", phases=phases) - client.get_config_detail.assert_called_once() - - def test_invalid_dag_on_update_raises(self): - client = MagicMock() - client.get_config_detail.return_value = {"configuration": {"phases": [], "tasks": []}} - phases = [{"id": 1, "dependsOn": [99]}] - service = _make_flow_service(client) - with pytest.raises(KeboolaApiError) as exc_info: - service.update_flow("prod", "keboola.orchestrator", "1", phases=phases, tasks=[]) - assert exc_info.value.error_code == "INVALID_FLOW_DAG" + } + svc = _make_flow_service(client) + bad_tasks = [ + { + "id": "t1", + "name": "T1", + "phase": "ghost", + "task": {"type": "job", "componentId": "c", "configId": "1", "mode": "run"}, + } + ] + with pytest.raises(KeboolaApiError) as exc: + svc.update_flow(alias="prod", config_id="5", tasks=bad_tasks) + assert exc.value.error_code == ErrorCode.INVALID_FLOW_DEFINITION + + +def test_update_flow_uses_keboola_flow_component(): + client = MagicMock() + client.update_config.return_value = {"id": "5", "name": "renamed"} + svc = _make_flow_service(client) + result = svc.update_flow(alias="prod", config_id="5", name="renamed") + assert client.update_config.call_args.kwargs["component_id"] == "keboola.flow" + assert result["id"] == "5" # --------------------------------------------------------------------------- -# FlowService.delete_flow +# list_flows # --------------------------------------------------------------------------- -class TestDeleteFlow: - def test_delete_success(self): - client = MagicMock() - service = _make_flow_service(client) - result = service.delete_flow("prod", "keboola.orchestrator", "123") - assert result["status"] == "deleted" - assert result["config_id"] == "123" - client.delete_config.assert_called_once_with( - component_id="keboola.orchestrator", - config_id="123", - branch_id=None, - ) +def test_list_flows_reports_legacy_orchestrator_count(): + client = MagicMock() + def list_configs(component_id, branch_id=None): + if component_id == "keboola.flow": + return [{"id": "1", "name": "CF"}] + if component_id == "keboola.orchestrator": + return [{"id": "9", "name": "Old"}, {"id": "10", "name": "Old2"}] + return [] -# --------------------------------------------------------------------------- -# FlowService.list_flow_schedules -# --------------------------------------------------------------------------- + client.list_component_configs.side_effect = list_configs + svc = _make_flow_service(client) + result = svc.list_flows(aliases=["prod"]) + assert result["legacy_orchestrator_count"] == 2 + assert all(f["component_id"] == "keboola.flow" for f in result["flows"]) -class TestListFlowSchedules: - def test_filters_by_target(self): +def test_list_flows_legacy_count_zero_when_orchestrator_404(): + client = MagicMock() - matching = { - "id": "sched-1", - "name": "Daily", - "configuration": { - "schedule": {"cronTab": "0 6 * * *", "timezone": "UTC", "state": "enabled"}, - "target": {"componentId": "keboola.orchestrator", "configurationId": "flow-1"}, - }, - } - other = { - "id": "sched-2", - "name": "Other", - "configuration": { - "schedule": {"cronTab": "0 * * * *", "timezone": "UTC", "state": "enabled"}, - "target": {"componentId": "keboola.orchestrator", "configurationId": "other-flow"}, - }, - } - client = MagicMock() - client.list_component_configs.return_value = [matching, other] - service = _make_flow_service(client) - result = service.list_flow_schedules("prod", "keboola.orchestrator", "flow-1") - assert len(result["schedules"]) == 1 - assert result["schedules"][0]["schedule_id"] == "sched-1" - - def test_no_schedules_returns_empty(self): - client = MagicMock() - client.list_component_configs.return_value = [] - service = _make_flow_service(client) - result = service.list_flow_schedules("prod", "keboola.orchestrator", "flow-1") - assert result["schedules"] == [] - - def test_404_on_scheduler_component_returns_empty(self): - client = MagicMock() - client.list_component_configs.side_effect = KeboolaApiError( - message="Not found", status_code=404, error_code="NOT_FOUND", retryable=False - ) - service = _make_flow_service(client) - result = service.list_flow_schedules("prod", "keboola.orchestrator", "flow-1") - assert result["schedules"] == [] + def list_configs(component_id, branch_id=None): + if component_id == "keboola.flow": + return [{"id": "1", "name": "CF"}] + raise KeboolaApiError(message="nope", status_code=404, error_code="NOT_FOUND") + + client.list_component_configs.side_effect = list_configs + svc = _make_flow_service(client) + result = svc.list_flows(aliases=["prod"]) + assert result["legacy_orchestrator_count"] == 0 + assert len(result["flows"]) == 1 # --------------------------------------------------------------------------- -# FlowService.set_flow_schedule +# delete_flow / detail # --------------------------------------------------------------------------- -class TestSetFlowSchedule: - def test_creates_scheduler_config_when_none_exists(self): - client = MagicMock() - client.get_config_detail.return_value = {"name": "My Flow"} - client.list_component_configs.return_value = [] # no existing schedules - client.create_config.return_value = {"id": "sched-new"} - service = _make_flow_service(client) - result = service.set_flow_schedule( - "prod", "keboola.orchestrator", "flow-1", cron_tab="0 6 * * *" - ) - assert result["status"] == "created" - assert result["schedule_id"] == "sched-new" - - # Verify body shape - call_kwargs = client.create_config.call_args.kwargs - assert call_kwargs["component_id"] == "keboola.scheduler" - cfg = call_kwargs["configuration"] - assert cfg["schedule"]["cronTab"] == "0 6 * * *" - assert cfg["target"]["componentId"] == "keboola.orchestrator" - assert cfg["target"]["configurationId"] == "flow-1" - - def test_updates_existing_schedule_upsert(self): - existing_sched = { - "id": "sched-old", - "configuration": { - "schedule": {"cronTab": "0 1 * * *", "timezone": "UTC", "state": "enabled"}, - "target": {"componentId": "keboola.orchestrator", "configurationId": "flow-1"}, - }, - } - client = MagicMock() - client.get_config_detail.return_value = {"name": "My Flow"} - client.list_component_configs.return_value = [existing_sched] - client.update_config.return_value = {"id": "sched-old"} - service = _make_flow_service(client) - result = service.set_flow_schedule( - "prod", "keboola.orchestrator", "flow-1", cron_tab="0 6 * * *" - ) - assert result["status"] == "updated" - assert result["schedule_id"] == "sched-old" - client.create_config.assert_not_called() - call_kwargs = client.update_config.call_args.kwargs - assert call_kwargs["config_id"] == "sched-old" - assert call_kwargs["configuration"]["schedule"]["cronTab"] == "0 6 * * *" - - def test_enabled_state_in_body(self): - client = MagicMock() - client.get_config_detail.return_value = {"name": "F"} - client.list_component_configs.return_value = [] - client.create_config.return_value = {"id": "s1"} - service = _make_flow_service(client) - service.set_flow_schedule("prod", "keboola.orchestrator", "1", "0 * * * *", enabled=False) - cfg = client.create_config.call_args.kwargs["configuration"] - assert cfg["schedule"]["state"] == "disabled" - - def test_non_404_error_on_list_schedules_propagates(self): - client = MagicMock() - client.get_config_detail.return_value = {"name": "F"} - client.list_component_configs.side_effect = KeboolaApiError( - message="Forbidden", status_code=403, error_code="INVALID_TOKEN", retryable=False - ) - service = _make_flow_service(client) - with pytest.raises(KeboolaApiError) as exc_info: - service.set_flow_schedule("prod", "keboola.orchestrator", "1", "0 * * * *") - assert exc_info.value.error_code == "INVALID_TOKEN" - client.create_config.assert_not_called() +def test_delete_flow_uses_keboola_flow_component(): + client = MagicMock() + svc = _make_flow_service(client) + result = svc.delete_flow(alias="prod", config_id="5") + assert client.delete_config.call_args.kwargs["component_id"] == "keboola.flow" + assert result["component_id"] == "keboola.flow" + + +def test_get_flow_detail_uses_keboola_flow_component(): + client = MagicMock() + client.get_config_detail.return_value = { + "id": "5", + "name": "CF", + "configuration": {"phases": [{"id": "p1"}], "tasks": []}, + } + svc = _make_flow_service(client) + result = svc.get_flow_detail(alias="prod", config_id="5") + assert client.get_config_detail.call_args[0][0] == "keboola.flow" + assert result["component_id"] == "keboola.flow" + assert result["phase_count"] == 1 # --------------------------------------------------------------------------- -# FlowService.remove_flow_schedule +# schedules # --------------------------------------------------------------------------- -class TestRemoveFlowSchedule: - def test_removes_matching_schedules(self): - matching = { - "id": "sched-1", - "configuration": { - "target": {"componentId": "keboola.orchestrator", "configurationId": "flow-1"} - }, - } - other = { - "id": "sched-2", - "configuration": { - "target": {"componentId": "keboola.orchestrator", "configurationId": "other"} - }, - } - client = MagicMock() - client.list_component_configs.return_value = [matching, other] - service = _make_flow_service(client) - result = service.remove_flow_schedule("prod", "keboola.orchestrator", "flow-1") - assert result["deleted_count"] == 1 - assert "sched-1" in result["deleted_schedule_ids"] - client.delete_config.assert_called_once_with("keboola.scheduler", "sched-1", branch_id=None) - - def test_no_schedules_is_idempotent(self): - client = MagicMock() - client.list_component_configs.return_value = [] - service = _make_flow_service(client) - result = service.remove_flow_schedule("prod", "keboola.orchestrator", "flow-1") - assert result["deleted_count"] == 0 - assert result["deleted_schedule_ids"] == [] - client.delete_config.assert_not_called() - - def test_partial_delete_failure_returns_successes(self): - sched1 = { - "id": "sched-a", - "configuration": { - "target": {"componentId": "keboola.orchestrator", "configurationId": "flow-1"} - }, - } - sched2 = { - "id": "sched-b", - "configuration": { - "target": {"componentId": "keboola.orchestrator", "configurationId": "flow-1"} - }, - } - client = MagicMock() - client.list_component_configs.return_value = [sched1, sched2] - # first delete succeeds, second raises - client.delete_config.side_effect = [ - None, - KeboolaApiError( - message="Server error", status_code=500, error_code="INTERNAL", retryable=True - ), - ] - service = _make_flow_service(client) - result = service.remove_flow_schedule("prod", "keboola.orchestrator", "flow-1") - # Partial success: first was deleted, second failed but is not re-raised when some succeeded - assert result["deleted_count"] == 1 - assert "sched-a" in result["deleted_schedule_ids"] - - def test_all_deletes_fail_raises(self): - sched1 = { - "id": "sched-x", +def test_set_flow_schedule_targets_keboola_flow(): + client = MagicMock() + client.get_config_detail.return_value = {"name": "CF"} + client.list_component_configs.return_value = [] + client.create_config.return_value = {"id": "77"} + svc = _make_flow_service(client) + result = svc.set_flow_schedule(alias="prod", config_id="5", cron_tab="0 6 * * *") + # scheduler config created with target.componentId == keboola.flow + cfg = client.create_config.call_args.kwargs["configuration"] + assert cfg["target"]["componentId"] == "keboola.flow" + assert result["component_id"] == "keboola.flow" + + +def test_remove_flow_schedule_filters_keboola_flow(): + client = MagicMock() + client.list_component_configs.return_value = [ + { + "id": "77", "configuration": { - "target": {"componentId": "keboola.orchestrator", "configurationId": "flow-1"} + "target": {"componentId": "keboola.flow", "configurationId": "5"}, }, } - client = MagicMock() - client.list_component_configs.return_value = [sched1] - client.delete_config.side_effect = KeboolaApiError( - message="Forbidden", status_code=403, error_code="INVALID_TOKEN", retryable=False - ) - service = _make_flow_service(client) - with pytest.raises(KeboolaApiError) as exc_info: - service.remove_flow_schedule("prod", "keboola.orchestrator", "flow-1") - assert exc_info.value.error_code == "SCHEDULE_DELETE_FAILED" + ] + svc = _make_flow_service(client) + result = svc.remove_flow_schedule(alias="prod", config_id="5") + assert result["deleted_count"] == 1 + assert result["component_id"] == "keboola.flow" diff --git a/tests/test_flow_validation.py b/tests/test_flow_validation.py new file mode 100644 index 00000000..a1de1163 --- /dev/null +++ b/tests/test_flow_validation.py @@ -0,0 +1,295 @@ +"""Unit tests for conditional-flow validation (services/flow_validation.py). + +Pure functions, no HTTP, no ConfigStore. The structural JSON Schema is no +longer bundled -- it is passed in explicitly (the service fetches it live from +the stack). These tests supply a compact representative Draft7 schema. +""" + +from __future__ import annotations + +from keboola_agent_cli.services.flow_validation import ( + find_unreachable_phases, + validate_conditional_flow, +) + +# A compact representative conditional-flow JSON Schema. It exercises the +# structural constraints the tests care about (ids are strings, task.type is an +# enum) without reproducing the full upstream schema -- which now lives on the +# stack and is fetched at runtime, never bundled. +_SCHEMA: dict = { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["phases", "tasks"], + "properties": { + "phases": { + "type": "array", + "items": { + "type": "object", + "required": ["id"], + "properties": { + "id": {"type": "string"}, + "name": {"type": "string"}, + "next": {"type": "array"}, + }, + }, + }, + "tasks": { + "type": "array", + "items": { + "type": "object", + "required": ["id", "phase", "task"], + "properties": { + "id": {"type": "string"}, + "phase": {"type": "string"}, + "enabled": {"type": "boolean"}, + "task": { + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["job", "notification", "variable"], + } + }, + }, + }, + }, + }, + }, +} + + +def _valid_phases(): + return [ + { + "id": "extract", + "name": "Extract", + "next": [ + { + "id": "t1", + "goto": "transform", + "condition": { + "type": "operator", + "operator": "ANY_TASKS_IN_PHASE", + "phase": "extract", + "operands": [], + }, + }, + {"id": "t2", "goto": None}, # default transition (no condition) + ], + }, + {"id": "transform", "name": "Transform"}, + ] + + +def _valid_tasks(): + return [ + { + "id": "task-1", + "name": "Run extractor", + "phase": "extract", + "enabled": True, + "task": { + "type": "job", + "componentId": "keboola.ex-http", + "configId": "123", + "mode": "run", + }, + }, + { + "id": "task-2", + "name": "Run transform", + "phase": "transform", + "task": { + "type": "job", + "componentId": "keboola.snowflake-transformation", + "configId": "456", + "mode": "run", + }, + }, + ] + + +# ── schema parameter behaviour ──────────────────────────────────────────── + + +def test_valid_flow_has_no_errors_with_schema(): + assert validate_conditional_flow(_valid_phases(), _valid_tasks(), _SCHEMA) == [] + + +def test_valid_flow_has_no_errors_without_schema(): + # Semantic-only path (no structural validation) -- still valid. + assert validate_conditional_flow(_valid_phases(), _valid_tasks()) == [] + assert validate_conditional_flow(_valid_phases(), _valid_tasks(), None) == [] + + +def test_structural_error_bad_task_type_with_schema(): + tasks = _valid_tasks() + tasks[0]["task"]["type"] = "nonsense" + errors = validate_conditional_flow(_valid_phases(), tasks, _SCHEMA) + assert errors # at least one structural error reported + assert any("task" in e.lower() for e in errors) + + +def test_structural_error_not_reported_without_schema(): + # Same bad task type, but no schema => structural check is skipped. + tasks = _valid_tasks() + tasks[0]["task"]["type"] = "nonsense" + errors = validate_conditional_flow(_valid_phases(), tasks, None) + # No structural complaint about the bad type; semantic checks still pass. + assert errors == [] + + +# ── semantic checks (always run, schema or not) ─────────────────────────── + + +def test_duplicate_phase_ids(): + phases = _valid_phases() + phases[1]["id"] = "extract" # collide with phase[0] + errors = validate_conditional_flow(phases, _valid_tasks()) + assert any("duplicate phase id" in e.lower() for e in errors) + + +def test_duplicate_task_ids(): + tasks = _valid_tasks() + tasks[1]["id"] = "task-1" + errors = validate_conditional_flow(_valid_phases(), tasks) + assert any("duplicate task id" in e.lower() for e in errors) + + +def test_task_references_missing_phase(): + tasks = _valid_tasks() + tasks[0]["phase"] = "ghost" + errors = validate_conditional_flow(_valid_phases(), tasks) + assert any("ghost" in e and "phase" in e.lower() for e in errors) + + +def test_goto_references_missing_phase(): + phases = _valid_phases() + phases[0]["next"][0]["goto"] = "ghost" + errors = validate_conditional_flow(phases, _valid_tasks()) + assert any("ghost" in e and "goto" in e.lower() for e in errors) + + +def test_goto_null_is_allowed(): + phases = _valid_phases() + phases[0]["next"] = [{"id": "x", "goto": None}] + assert validate_conditional_flow(phases, _valid_tasks()) == [] + + +def test_conditional_transitions_need_default_last(): + phases = _valid_phases() + # remove the default (last, condition-less) transition, leaving only conditional + phases[0]["next"] = [phases[0]["next"][0]] + errors = validate_conditional_flow(phases, _valid_tasks()) + assert any("default" in e.lower() and "transition" in e.lower() for e in errors) + + +def test_phase_without_enabled_task(): + tasks = _valid_tasks() + tasks[1]["enabled"] = False # transform phase now has zero enabled tasks + errors = validate_conditional_flow(_valid_phases(), tasks) + assert any("transform" in e and "enabled task" in e.lower() for e in errors) + + +def _phase_with_condition(condition): + return [ + { + "id": "p1", + "name": "P1", + "next": [ + {"id": "c", "goto": "p2", "condition": condition}, + {"id": "d", "goto": None}, + ], + }, + {"id": "p2", "name": "P2"}, + ] + + +def _tasks_two_phases(): + return [ + { + "id": "a", + "name": "A", + "phase": "p1", + "task": {"type": "job", "componentId": "c", "configId": "1", "mode": "run"}, + }, + { + "id": "b", + "name": "B", + "phase": "p2", + "task": {"type": "job", "componentId": "c", "configId": "2", "mode": "run"}, + }, + ] + + +def _const(v): + return {"type": "const", "value": v} + + +def test_equals_requires_two_operands(): + cond = {"type": "operator", "operator": "EQUALS", "operands": [_const("x")]} + errors = validate_conditional_flow(_phase_with_condition(cond), _tasks_two_phases()) + assert any("EQUALS" in e and "2 operand" in e for e in errors) + + +def test_and_requires_at_least_one_operand(): + cond = {"type": "operator", "operator": "AND", "operands": []} + errors = validate_conditional_flow(_phase_with_condition(cond), _tasks_two_phases()) + assert any("AND" in e and "at least 1" in e for e in errors) + + +def test_function_count_requires_one_operand(): + inner = {"type": "function", "function": "COUNT", "operands": [_const("a"), _const("b")]} + cond = {"type": "operator", "operator": "AND", "operands": [inner]} + errors = validate_conditional_flow(_phase_with_condition(cond), _tasks_two_phases()) + assert any("COUNT" in e and "1 operand" in e for e in errors) + + +def test_valid_equals_two_operands_ok(): + cond = {"type": "operator", "operator": "EQUALS", "operands": [_const("x"), _const("y")]} + assert validate_conditional_flow(_phase_with_condition(cond), _tasks_two_phases()) == [] + + +# ── reachability (warning-level helper) ──────────────────────────────────── + + +def test_all_phases_reachable(): + assert find_unreachable_phases(_valid_phases()) == [] + + +def test_unreachable_phase_reported(): + phases = [ + {"id": "start", "name": "Start", "next": [{"id": "x", "goto": None}]}, + {"id": "island", "name": "Island"}, # never targeted + ] + assert find_unreachable_phases(phases) == ["island"] + + +def test_goto_loop_is_not_an_error(): + # start -> loop -> start ... legal at runtime, must NOT be flagged + phases = [ + {"id": "start", "name": "Start", "next": [{"id": "a", "goto": "loop"}]}, + {"id": "loop", "name": "Loop", "next": [{"id": "b", "goto": "start"}]}, + ] + assert find_unreachable_phases(phases) == [] + assert ( + validate_conditional_flow( + phases, + [ + { + "id": "t", + "name": "T", + "phase": "start", + "task": {"type": "job", "componentId": "c", "configId": "1", "mode": "run"}, + }, + { + "id": "u", + "name": "U", + "phase": "loop", + "task": {"type": "job", "componentId": "c", "configId": "2", "mode": "run"}, + }, + ], + ) + == [] + ) diff --git a/tests/test_schedule_service.py b/tests/test_schedule_service.py index f27ffa4b..0aae5927 100644 --- a/tests/test_schedule_service.py +++ b/tests/test_schedule_service.py @@ -695,24 +695,24 @@ def test_schedules_joined_on_parent_id(self) -> None: client = MagicMock() def side_effect(component_id: str, branch_id=None): - if component_id == "keboola.orchestrator": + if component_id == "keboola.flow": return [ { - "id": "orchestrator-1", + "id": "flow-1", "name": "Daily ETL", "description": "", "isDisabled": False, } ] - if component_id == "keboola.flow": + if component_id == "keboola.orchestrator": return [] if component_id == SCHEDULER_COMPONENT_ID: return [ _scheduler_cfg( config_id="sc1", name="Schedule", - target_component="keboola.orchestrator", - target_config_id="orchestrator-1", + target_component="keboola.flow", + target_config_id="flow-1", state="enabled", ) ] @@ -736,8 +736,8 @@ def test_without_flag_schedules_key_absent(self) -> None: client = MagicMock() def side_effect(component_id: str, branch_id=None): - if component_id == "keboola.orchestrator": - return [{"id": "o1", "name": "Flow", "description": "", "isDisabled": False}] + if component_id == "keboola.flow": + return [{"id": "f1", "name": "Flow", "description": "", "isDisabled": False}] return [] client.list_component_configs.side_effect = side_effect From 0a9d800f7240fa2b47f163f6e5e23117f1f4762b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Jodas?= <12143866+ondrajodas@users.noreply.github.com> Date: Thu, 4 Jun 2026 17:41:12 +0200 Subject: [PATCH 2/8] fix(flow): address PR review findings - add POST /flows/validate and GET /flows/{project}/schema REST routes to keep the 1:1 CLI-to-REST parity for the new flow commands; routes are declared before the path-param routes so literal segments win - add router call tests for both endpoints (incl. route-ordering guards and the fetch-failure degrade/502 paths) - document why the broad except in _fetch_flow_schema is intentional (httpx errors do not subclass OSError; any failure must degrade) - use ErrorCode.NOT_FOUND enum instead of raw string in flow_service - hoist deferred json/rich.syntax imports to module top in commands/flow.py - schedule-workflow.md: switch example target to keboola.flow and note legacy orchestrator schedules still appear in schedule list --- .../kbagent/references/schedule-workflow.md | 6 +- src/keboola_agent_cli/commands/flow.py | 10 +- src/keboola_agent_cli/server/routers/flows.py | 55 ++++++- .../services/flow_service.py | 16 +- tests/test_server_router_calls.py | 138 ++++++++++++++++++ 5 files changed, 209 insertions(+), 16 deletions(-) diff --git a/plugins/kbagent/skills/kbagent/references/schedule-workflow.md b/plugins/kbagent/skills/kbagent/references/schedule-workflow.md index f94e2a66..0be08cdd 100644 --- a/plugins/kbagent/skills/kbagent/references/schedule-workflow.md +++ b/plugins/kbagent/skills/kbagent/references/schedule-workflow.md @@ -13,7 +13,7 @@ Schedules are stored as ordinary Storage API configurations of the `keboola.sche "configuration": { "target": { "mode": "run", - "componentId": "keboola.orchestrator", + "componentId": "keboola.flow", "configurationId": "01kmjawd6w80vn2rgh6yeaa12r" }, "schedule": { @@ -47,7 +47,7 @@ Each row has every field you need for a spreadsheet or dashboard: "project_alias": "prod", "schedule_id": "01kpx6zv0krbp05gh7eb0dzd5y", "schedule_name": "1stFlow (Schedule)", - "parent_component_id": "keboola.orchestrator", + "parent_component_id": "keboola.flow", "parent_config_id": "01kmjawd6w80vn2rgh6yeaa12r", "parent_name": "1stFlow", "cron": "0 6 * * *", @@ -56,6 +56,8 @@ Each row has every field you need for a spreadsheet or dashboard: } ``` +`schedule list` returns every `keboola.scheduler` config regardless of its target component, so legacy schedules may still show `"parent_component_id": "keboola.orchestrator"` -- those targets are no longer manageable via `kbagent flow` (orchestrator support was dropped in v0.56.0), but the schedules themselves list and audit normally. + ## Inspect a single schedule ```bash diff --git a/src/keboola_agent_cli/commands/flow.py b/src/keboola_agent_cli/commands/flow.py index f5539920..eed9bf47 100644 --- a/src/keboola_agent_cli/commands/flow.py +++ b/src/keboola_agent_cli/commands/flow.py @@ -6,6 +6,7 @@ from __future__ import annotations +import json import logging import sys from pathlib import Path @@ -14,6 +15,7 @@ import typer import yaml from rich.markup import escape +from rich.syntax import Syntax from rich.table import Table from ..errors import ConfigError, ErrorCode, KeboolaApiError @@ -412,12 +414,8 @@ def flow_schema( if formatter.json_mode: formatter.output({"format": "json-schema", "schema": schema}) else: - import json as _json - - from rich.syntax import Syntax - formatter.console.print( - Syntax(_json.dumps(schema, indent=2), "json", theme="monokai", line_numbers=False) + Syntax(json.dumps(schema, indent=2), "json", theme="monokai", line_numbers=False) ) return @@ -430,8 +428,6 @@ def flow_schema( } ) else: - from rich.syntax import Syntax - formatter.console.print(Syntax(_FLOW_SCHEMA, "yaml", theme="monokai", line_numbers=False)) diff --git a/src/keboola_agent_cli/server/routers/flows.py b/src/keboola_agent_cli/server/routers/flows.py index ac9fe1e2..a9725eff 100644 --- a/src/keboola_agent_cli/server/routers/flows.py +++ b/src/keboola_agent_cli/server/routers/flows.py @@ -4,9 +4,10 @@ from typing import Any -from fastapi import APIRouter, Depends, Query +from fastapi import APIRouter, Depends, HTTPException, Query from pydantic import BaseModel +from ...services.flow_validation import find_unreachable_phases, validate_conditional_flow from ..dependencies import ServiceRegistry, get_registry router = APIRouter(prefix="/flows", tags=["flows"]) @@ -36,6 +37,58 @@ class FlowSchedule(BaseModel): branch_id: int | None = None +class FlowValidate(BaseModel): + phases: list[dict[str, Any]] = [] + tasks: list[dict[str, Any]] = [] + project: str | None = None + + +# NOTE: /validate and /{project}/schema are declared BEFORE the /{project} +# and /{project}/{config_id} routes -- FastAPI matches in declaration order, +# so the literal segments must win over the path parameters. + + +@router.post("/validate", summary="Validate a conditional-flow definition") +def validate( + body: FlowValidate, registry: ServiceRegistry = Depends(get_registry) +) -> dict[str, Any]: + """Validate phases/tasks (schema + semantic checks). Mirrors `kbagent flow validate`. + + With ``project`` the live keboola.flow JSON Schema is fetched from the stack + for structural validation; a fetch failure degrades to semantic-only and is + recorded in ``notes``. Without ``project`` only semantic checks run. + """ + schema: dict[str, Any] | None = None + notes: list[str] = [] + if body.project: + schema, reason = registry.flow.fetch_flow_schema(body.project) + if schema is None: + notes.append(f"structural schema validation skipped: {reason}") + else: + notes.append( + "structural schema validation skipped: no schema source " + "(pass 'project' to fetch the live schema from the stack)" + ) + errors = validate_conditional_flow(body.phases, body.tasks, schema) + warnings = [ + f"Phase '{pid}' is unreachable from the entry phase" + for pid in find_unreachable_phases(body.phases) + ] + return {"valid": not errors, "errors": errors, "warnings": warnings, "notes": notes} + + +@router.get("/{project}/schema", summary="Fetch the live conditional-flow JSON Schema") +def get_schema(project: str, registry: ServiceRegistry = Depends(get_registry)) -> dict[str, Any]: + """Dump the keboola.flow JSON Schema served by the stack. Mirrors `kbagent flow schema --full`.""" + schema, reason = registry.flow.fetch_flow_schema(project) + if schema is None: + raise HTTPException( + status_code=502, + detail=f"Could not fetch the conditional-flow schema: {reason}", + ) + return {"format": "json-schema", "schema": schema} + + @router.get("", summary="List flows across projects") def list_flows( project: list[str] | None = Query(None), diff --git a/src/keboola_agent_cli/services/flow_service.py b/src/keboola_agent_cli/services/flow_service.py index cecb2579..6fbec9ca 100644 --- a/src/keboola_agent_cli/services/flow_service.py +++ b/src/keboola_agent_cli/services/flow_service.py @@ -74,7 +74,7 @@ def _collect_schedules_by_parent( try: all_sched = client.list_component_configs(SCHEDULER_COMPONENT_ID, branch_id=branch_id) except KeboolaApiError as exc: - if exc.error_code == "NOT_FOUND": + if exc.error_code == ErrorCode.NOT_FOUND: return {} raise @@ -152,6 +152,10 @@ def _fetch_flow_schema( except KeboolaApiError as exc: return None, exc.message except Exception as exc: + # Intentionally broad: ANY schema-fetch failure must degrade to + # semantic-only validation, never block the write. Narrowing to + # OSError-style transport errors would miss httpx exceptions + # (httpx.HTTPError does not subclass OSError) and re-raise them. return None, str(exc) finally: ai_client.close() @@ -224,7 +228,7 @@ def worker(alias: str, project: ProjectConfig) -> tuple[Any, ...]: FLOW_COMPONENT_ID, branch_id=effective_branch ) except KeboolaApiError as exc: - if exc.error_code == "NOT_FOUND": + if exc.error_code == ErrorCode.NOT_FOUND: configs = [] else: raise @@ -248,7 +252,7 @@ def worker(alias: str, project: ProjectConfig) -> tuple[Any, ...]: ) legacy_count = len(legacy) except KeboolaApiError as exc: - if exc.error_code == "NOT_FOUND": + if exc.error_code == ErrorCode.NOT_FOUND: legacy_count = 0 else: raise @@ -544,7 +548,7 @@ def list_flow_schedules( SCHEDULER_COMPONENT_ID, branch_id=effective_branch ) except KeboolaApiError as exc: - if exc.error_code == "NOT_FOUND": + if exc.error_code == ErrorCode.NOT_FOUND: all_sched = [] else: raise @@ -638,7 +642,7 @@ def set_flow_schedule( SCHEDULER_COMPONENT_ID, branch_id=effective_branch ) except KeboolaApiError as exc: - if exc.error_code == "NOT_FOUND": + if exc.error_code == ErrorCode.NOT_FOUND: existing = [] else: raise @@ -707,7 +711,7 @@ def remove_flow_schedule( SCHEDULER_COMPONENT_ID, branch_id=effective_branch ) except KeboolaApiError as exc: - if exc.error_code == "NOT_FOUND": + if exc.error_code == ErrorCode.NOT_FOUND: all_sched = [] else: raise diff --git a/tests/test_server_router_calls.py b/tests/test_server_router_calls.py index c78ddcbe..feb604d8 100644 --- a/tests/test_server_router_calls.py +++ b/tests/test_server_router_calls.py @@ -621,3 +621,141 @@ def test_reference_data_delete_route(tmp_path: Path) -> None: ) assert resp.status_code == 200, resp.text sl.delete_reference_data.assert_called_once_with(alias=PROJECT, record_id="r1") + + +# --------------------------------------------------------------------------- +# flows.py POST /flows/validate + GET /flows/{project}/schema +# New in 0.56.0 -- mirror `flow validate` / `flow schema --full`. +# --------------------------------------------------------------------------- + +_CF_PHASES = [{"id": "p1", "name": "Extract", "next": [{"id": "n1", "goto": None}]}] +_CF_TASKS = [ + { + "id": "t1", + "name": "Run extractor", + "phase": "p1", + "enabled": True, + "task": {"type": "job", "componentId": "keboola.ex-http", "configId": "1", "mode": "run"}, + } +] + + +def test_flows_validate_without_project_is_semantic_only(tmp_path: Path) -> None: + """No `project` in body -> no schema fetch, semantic-only note, valid payload passes. + + Also guards FastAPI route ordering: /flows/validate must NOT be captured by + POST /flows/{project} (which would call create_flow). + """ + flow_svc = MagicMock() + registry = _mock_registry(flow=flow_svc) + app = _make_app_with_registry(tmp_path, registry) + + with TestClient(app) as client: + res = client.post( + "/flows/validate", + headers=AUTH, + json={"phases": _CF_PHASES, "tasks": _CF_TASKS}, + ) + + assert res.status_code == 200, res.text + body = res.json() + assert body["valid"] is True, body + assert any("no schema source" in n for n in body["notes"]), body + flow_svc.fetch_flow_schema.assert_not_called() + flow_svc.create_flow.assert_not_called() + + +def test_flows_validate_with_project_fetches_live_schema(tmp_path: Path) -> None: + """`project` in body -> fetch_flow_schema(alias) is called and schema is applied.""" + flow_svc = MagicMock() + flow_svc.fetch_flow_schema.return_value = ( + {"type": "object", "required": ["phases", "tasks"]}, + None, + ) + registry = _mock_registry(flow=flow_svc) + app = _make_app_with_registry(tmp_path, registry) + + with TestClient(app) as client: + res = client.post( + "/flows/validate", + headers=AUTH, + json={"phases": _CF_PHASES, "tasks": _CF_TASKS, "project": PROJECT}, + ) + + assert res.status_code == 200, res.text + body = res.json() + assert body["valid"] is True, body + assert body["notes"] == [], body + flow_svc.fetch_flow_schema.assert_called_once_with(PROJECT) + + +def test_flows_validate_schema_fetch_failure_degrades(tmp_path: Path) -> None: + """Fetch failure -> semantic-only validation + skip reason in notes, never 5xx.""" + flow_svc = MagicMock() + flow_svc.fetch_flow_schema.return_value = (None, "AI Service unreachable") + registry = _mock_registry(flow=flow_svc) + app = _make_app_with_registry(tmp_path, registry) + + with TestClient(app) as client: + res = client.post( + "/flows/validate", + headers=AUTH, + json={"phases": _CF_PHASES, "tasks": _CF_TASKS, "project": PROJECT}, + ) + + assert res.status_code == 200, res.text + body = res.json() + assert any("AI Service unreachable" in n for n in body["notes"]), body + + +def test_flows_validate_reports_semantic_errors(tmp_path: Path) -> None: + """Semantic violation (task references missing phase) -> valid=false with errors.""" + registry = _mock_registry(flow=MagicMock()) + app = _make_app_with_registry(tmp_path, registry) + + bad_tasks = [dict(_CF_TASKS[0], phase="missing-phase")] + with TestClient(app) as client: + res = client.post( + "/flows/validate", + headers=AUTH, + json={"phases": _CF_PHASES, "tasks": bad_tasks}, + ) + + assert res.status_code == 200, res.text + body = res.json() + assert body["valid"] is False, body + assert body["errors"], body + + +def test_flows_get_schema_returns_live_schema(tmp_path: Path) -> None: + """GET /flows/{project}/schema -> fetch_flow_schema(alias), json-schema envelope. + + Also guards route ordering: /{project}/schema must NOT be captured by + GET /flows/{project}/{config_id} (which would call get_flow_detail). + """ + flow_svc = MagicMock() + flow_svc.fetch_flow_schema.return_value = ({"type": "object"}, None) + registry = _mock_registry(flow=flow_svc) + app = _make_app_with_registry(tmp_path, registry) + + with TestClient(app) as client: + res = client.get(f"/flows/{PROJECT}/schema", headers=AUTH) + + assert res.status_code == 200, res.text + assert res.json() == {"format": "json-schema", "schema": {"type": "object"}} + flow_svc.fetch_flow_schema.assert_called_once_with(PROJECT) + flow_svc.get_flow_detail.assert_not_called() + + +def test_flows_get_schema_fetch_failure_is_502(tmp_path: Path) -> None: + """Schema unavailable -> 502 with the reason (REST has no degrade path to offer).""" + flow_svc = MagicMock() + flow_svc.fetch_flow_schema.return_value = (None, "no configurationSchema") + registry = _mock_registry(flow=flow_svc) + app = _make_app_with_registry(tmp_path, registry) + + with TestClient(app) as client: + res = client.get(f"/flows/{PROJECT}/schema", headers=AUTH) + + assert res.status_code == 502, res.text + assert "no configurationSchema" in res.text From 90360fb9317606767ae496901ad72b464517ccb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Jodas?= <12143866+ondrajodas@users.noreply.github.com> Date: Thu, 4 Jun 2026 18:22:10 +0200 Subject: [PATCH 3/8] refactor(flow): hoist flow_validation import to module top Follow-up to the review-fix commit: the deferred import inside flow_validate() was the last remaining one; flow_validation is pure (stdlib + jsonschema), so module-level import is circular-import safe. --- src/keboola_agent_cli/commands/flow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/keboola_agent_cli/commands/flow.py b/src/keboola_agent_cli/commands/flow.py index eed9bf47..77fb3424 100644 --- a/src/keboola_agent_cli/commands/flow.py +++ b/src/keboola_agent_cli/commands/flow.py @@ -19,6 +19,7 @@ from rich.table import Table from ..errors import ConfigError, ErrorCode, KeboolaApiError +from ..services.flow_validation import find_unreachable_phases, validate_conditional_flow from ._helpers import ( check_cli_permission, get_formatter, @@ -464,7 +465,6 @@ def flow_validate( Exit 0 when valid (warnings still printed), exit 2 when there are errors. """ formatter = get_formatter(ctx) - from ..services.flow_validation import find_unreachable_phases, validate_conditional_flow try: flow_def = _load_flow_yaml(file) From c6607ef1b246c78ea9e2b1517fc78649e6f38bab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Jodas?= <12143866+ondrajodas@users.noreply.github.com> Date: Thu, 4 Jun 2026 19:09:24 +0200 Subject: [PATCH 4/8] fix(tests): make parallel list_jobs asserts thread-safe TestPullJobsFallback asserted MagicMock.call_count on a path where _fetch_jobs_per_config calls the mock from a ThreadPoolExecutor. MagicMock.call_count is incremented without a lock (read-modify-write), so concurrent calls lose updates -- reproducible locally with a lowered sys.setswitchinterval (30/30 runs lose increments) and surfaced in CI where coverage tracing widens the race window (observed 174 != 200). call_args_list.append IS atomic under the GIL, so assert on len(call_args_list) instead; the 200-config test additionally checks the distinct (component_id, config_id) pairs to guarantee exactly-once fetching. Production code is unaffected (results were already collected under an explicit lock). --- tests/test_sync_storage_jobs.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tests/test_sync_storage_jobs.py b/tests/test_sync_storage_jobs.py index 3cdbb4e8..dec86f72 100644 --- a/tests/test_sync_storage_jobs.py +++ b/tests/test_sync_storage_jobs.py @@ -1315,9 +1315,16 @@ def test_falls_back_to_per_config_when_limit_insufficient( result = svc.pull(alias="prod", project_root=project_root, job_limit=10) - # Should NOT use grouped-jobs, SHOULD use list_jobs per config + # Should NOT use grouped-jobs, SHOULD use list_jobs per config. + # NOTE: _fetch_jobs_per_config calls the mock from a ThreadPoolExecutor + # and MagicMock.call_count increments are NOT thread-safe (lost updates + # under concurrency -- flaked in CI under coverage tracing). call_args_list + # uses list.append, which IS atomic under the GIL, so assert on that. pull_client.list_jobs_grouped.assert_not_called() - assert pull_client.list_jobs.call_count == 200 + calls = pull_client.list_jobs.call_args_list + assert len(calls) == 200 + distinct_pairs = {(c.kwargs["component_id"], c.kwargs["config_id"]) for c in calls} + assert len(distinct_pairs) == 200, "each config must be fetched exactly once" assert result["jobs_written"] == 200 def test_boundary_exact_limit(self, tmp_config_dir: Path, tmp_path: Path) -> None: @@ -1366,4 +1373,6 @@ def test_boundary_one_over_limit(self, tmp_config_dir: Path, tmp_path: Path) -> svc.pull(alias="prod", project_root=project_root, job_limit=5) pull_client.list_jobs_grouped.assert_not_called() - assert pull_client.list_jobs.call_count == 101 + # call_args_list, not call_count: see the thread-safety note in + # test_falls_back_to_per_config_when_limit_insufficient. + assert len(pull_client.list_jobs.call_args_list) == 101 From d1e6cf7b2da29de4f39278418ea4608e278940f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Jodas?= <12143866+ondrajodas@users.noreply.github.com> Date: Thu, 4 Jun 2026 21:39:28 +0200 Subject: [PATCH 5/8] fix(flow): use product names in legacy-flows warning '3 legacy keboola.orchestrator flow(s) ... migrate to keboola.flow' read like internal component IDs; reword to product terminology: 'Legacy Flows were dropped in 0.56.0; migrate them to Conditional Flows'. --- src/keboola_agent_cli/commands/flow.py | 4 ++-- tests/test_flow_cli.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/keboola_agent_cli/commands/flow.py b/src/keboola_agent_cli/commands/flow.py index 77fb3424..3523079f 100644 --- a/src/keboola_agent_cli/commands/flow.py +++ b/src/keboola_agent_cli/commands/flow.py @@ -236,8 +236,8 @@ def _format_flows_table( legacy = result.get("legacy_orchestrator_count", 0) if legacy: formatter.warning( - f"{legacy} legacy keboola.orchestrator flow(s) are not shown " - f"(orchestrator support was dropped in 0.56.0; migrate to keboola.flow)." + f"{legacy} legacy flow(s) are not shown " + f"(Legacy Flows were dropped in 0.56.0; migrate them to Conditional Flows)." ) diff --git a/tests/test_flow_cli.py b/tests/test_flow_cli.py index 1cc1025e..9f14b8d3 100644 --- a/tests/test_flow_cli.py +++ b/tests/test_flow_cli.py @@ -90,7 +90,7 @@ def test_list_legacy_count_warns(self, tmp_path: Path) -> None: } result = _invoke(store, mock_flow, ["flow", "list", "--project", "prod"]) assert result.exit_code == 0, result.output - assert "3 legacy" in result.output and "orchestrator" in result.output + assert "3 legacy" in result.output and "Conditional Flows" in result.output def test_list_empty(self, tmp_path: Path) -> None: store = _setup_config(tmp_path / "cfg", {"prod": {}}) From 0a950eaf3118db33467be02b3bd6a2c0e20183cd Mon Sep 17 00:00:00 2001 From: Petr Date: Thu, 4 Jun 2026 23:06:12 +0200 Subject: [PATCH 6/8] chore(release): retarget conditional flows to 0.57.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit main shipped v0.56.0 as a maintenance re-release (#399) — claiming the 0.56.0 version, git tag, and GitHub Latest release — to push the 0.55.0 reference-data commands through the auto-update version gate. The conditional-flows work therefore moves to 0.57.0. Bumps pyproject + plugin.json + marketplace.json + uv.lock, and rewrites every (since v0.56.0) doc tag to v0.57.0 across CLAUDE.md, README, keboola-expert.md, gotchas.md, commands-reference.md, flow/schedule workflows, AGENT_CONTEXT, and code comments. The changelog 0.57.0 key carries the conditional-flows entry; the 0.56.0 maintenance entry is preserved. --- .claude-plugin/marketplace.json | 2 +- CLAUDE.md | 2 +- README.md | 2 +- .../2026-06-04-conditional-flow-support.md | 24 +++++++++---------- ...6-06-04-conditional-flow-support-design.md | 8 +++---- plugins/kbagent/.claude-plugin/plugin.json | 2 +- plugins/kbagent/agents/keboola-expert.md | 4 ++-- .../kbagent/references/commands-reference.md | 2 +- .../kbagent/references/flow-workflow.md | 2 +- .../skills/kbagent/references/gotchas.md | 18 +++++++------- .../kbagent/references/schedule-workflow.md | 2 +- pyproject.toml | 2 +- src/keboola_agent_cli/commands/context.py | 2 +- src/keboola_agent_cli/commands/flow.py | 4 ++-- src/keboola_agent_cli/errors.py | 2 +- .../services/flow_service.py | 4 ++-- tests/test_agent_prompt.py | 2 +- tests/test_server_router_calls.py | 2 +- uv.lock | 2 +- 19 files changed, 44 insertions(+), 44 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index da284850..81c16bcb 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -10,7 +10,7 @@ "plugins": [ { "name": "kbagent", - "version": "0.56.0", + "version": "0.57.0", "source": "./plugins/kbagent", "description": "AI-friendly interface to Keboola Connection projects — explore configs, jobs, lineage, call MCP tools, manage dev branches, and debug SQL in workspaces", "category": "development" diff --git a/CLAUDE.md b/CLAUDE.md index a23b4480..6ff32b1d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -542,7 +542,7 @@ kbagent flow update --project NAME --flow-id ID [--name N] [--description D] [-- kbagent flow delete --project NAME --flow-id ID [--branch ID] [--yes] kbagent flow schedule --project NAME --flow-id ID --cron "0 6 * * *" [--timezone TZ] [--disabled] [--branch ID] kbagent flow schedule-remove --project NAME --flow-id ID [--branch ID] [--yes] -# Flows are conditional flows (keboola.flow). keboola.orchestrator is NOT supported (dropped 0.56.0). +# Flows are conditional flows (keboola.flow). keboola.orchestrator is NOT supported (dropped 0.57.0). # IDs are strings; phases use next[].goto + conditions; tasks are typed (job/notification/variable). # flow new/update validate against the live CF schema fetched from the stack (AI Service # configurationSchema for keboola.flow; NOT bundled) -> INVALID_FLOW_DEFINITION on failure. diff --git a/README.md b/README.md index e8f3b33d..c2867f60 100644 --- a/README.md +++ b/README.md @@ -119,7 +119,7 @@ kbagent workspace query --project prod --workspace-id WS_ID \ | **Search** | `kbagent search "QUERY"` -- find tables, configs, flows, data apps across every connected project in one call (since 0.30.0). Backed by Storage `global-search`; falls back to per-project body scan with `--search-type config-based`. | | **Configurations** | List, search, inspect, scaffold, update, delete configs. Full-text search across all config bodies (incl. rows). Row CRUD (`row-create / row-update / row-delete`) with `--merge`, `--set`, `--dry-run`, `--is-disabled / --is-enabled` (since 0.30.0). OAuth wizard URL minting with short-lived child tokens (`config oauth-url`, since 0.30.0). Variables management (`variables-set / -get / -clear`). Metadata CRUD + folder grouping. Output-bucket override (`set-default-bucket`). String-script auto-normalize for SQL transformations (closes the silent runtime crash from #245, since 0.28.0). | | **Jobs** | List, inspect, run with `--wait` polling (exponential curve), `--timeout` auto-kill, log tail on failure. Row-level execution for multi-row configs. Bulk terminate by ID list or filter (`job terminate --status processing` -- since 0.20.2). | -| **Flows** | Create, update, delete **conditional flows** (`keboola.flow`) with schema-backed validation (`next[].goto` transitions + conditions; typed `job`/`notification`/`variable` tasks; string ids). Offline `flow validate` and `flow schema --full`. Attach cron schedules (timezone + enabled/disabled state). `keboola.orchestrator` is not supported (dropped in 0.56.0). | +| **Flows** | Create, update, delete **conditional flows** (`keboola.flow`) with schema-backed validation (`next[].goto` transitions + conditions; typed `job`/`notification`/`variable` tasks; string ids). Offline `flow validate` and `flow schema --full`. Attach cron schedules (timezone + enabled/disabled state). `keboola.orchestrator` is not supported (dropped in 0.57.0). | | **Storage** | Buckets, tables, files -- full CRUD. Upload CSV (auto-creates bucket+table). Download by file ID or by tag. Descriptions on buckets/tables/columns (batch-applicable from YAML). Native column types (`VARCHAR(40)`, `NUMBER(18,2)`, `TIMESTAMP_TZ`, `VARIANT`, ...) with per-column `--not-null` and `--default` flags; dev branches auto-materialize target buckets on first write. **`storage swap-tables`** -- atomically swap a typed rebuild back into the original table name in a dev branch without touching downstream config references (since 0.28.0; closes the typify migration footgun). Streamed downloads cap memory at ~1 MiB regardless of table size. Parquet export via `unload-table --file-type parquet`. BigQuery dialect-aware paths in `bucket-detail`. | | **Dev branches** | Create a branch, activate it, and every command auto-targets it. Storage writes, MCP, sync -- everything follows. Storage reads default to production (safer). | | **Sync & GitOps** | Pull configs as YAML, edit in IDE, push back. SQL/Python extracted as real files. Diff and status tracking. Adopt existing kbc Go CLI checkouts (`sync init --adopt-existing`). | diff --git a/docs/superpowers/plans/2026-06-04-conditional-flow-support.md b/docs/superpowers/plans/2026-06-04-conditional-flow-support.md index 55844c09..2f7dbcef 100644 --- a/docs/superpowers/plans/2026-06-04-conditional-flow-support.md +++ b/docs/superpowers/plans/2026-06-04-conditional-flow-support.md @@ -2,7 +2,7 @@ > **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. -**Goal:** Rewrite the `kbagent flow` surface to support `keboola.flow` (Conditional Flows) with correct schema-backed validation, and drop `keboola.orchestrator` support entirely, shipping as a single breaking release 0.56.0. +**Goal:** Rewrite the `kbagent flow` surface to support `keboola.flow` (Conditional Flows) with correct schema-backed validation, and drop `keboola.orchestrator` support entirely, shipping as a single breaking release 0.57.0. **Architecture:** Follows the repo's 3-layer design (commands → services → client). A new pure-function module `services/flow_validation.py` loads a bundled copy of the upstream CF JSON Schema (`resources/conditional-flow-schema.json`) and performs structural (jsonschema Draft7) + semantic validation. `FlowService` hardcodes the single component `keboola.flow`, drops every `component_id` parameter, and calls the validator on create/update. The REST router mirror drops `component_id`. @@ -261,7 +261,7 @@ with: ```python # Flow (new in 0.22.0) SCHEDULE_DELETE_FAILED = "SCHEDULE_DELETE_FAILED" - # Conditional-flow validation (replaces INVALID_FLOW_DAG; since 0.56.0) + # Conditional-flow validation (replaces INVALID_FLOW_DAG; since 0.57.0) INVALID_FLOW_DEFINITION = "INVALID_FLOW_DEFINITION" ``` @@ -1492,7 +1492,7 @@ In `_format_flows_table`, after the errors loop, add: if legacy: formatter.warning( f"{legacy} legacy keboola.orchestrator flow(s) are not shown " - f"(orchestrator support was dropped in 0.56.0; migrate to keboola.flow)." + f"(orchestrator support was dropped in 0.57.0; migrate to keboola.flow)." ) ``` @@ -1932,7 +1932,7 @@ kbagent flow update --project NAME --flow-id ID [--name N] [--description D] [-- kbagent flow delete --project NAME --flow-id ID [--branch ID] [--yes] kbagent flow schedule --project NAME --flow-id ID --cron "0 6 * * *" [--timezone TZ] [--disabled] [--branch ID] kbagent flow schedule-remove --project NAME --flow-id ID [--branch ID] [--yes] -# Flows are conditional flows (keboola.flow). keboola.orchestrator is NOT supported (dropped 0.56.0). +# Flows are conditional flows (keboola.flow). keboola.orchestrator is NOT supported (dropped 0.57.0). # Execute a flow with: kbagent job run --project NAME --component-id keboola.flow --config-id ID ``` @@ -1971,7 +1971,7 @@ git commit -m "docs(flow): refresh CLAUDE.md + AGENT_CONTEXT for conditional flo Update the tool-selection matrix / version gate: flows are conditional flows; `--component-id` removed; add `flow validate`; reference the validate-before-push -loop; note orchestrator dropped in 0.56.0. +loop; note orchestrator dropped in 0.57.0. - [ ] **Step 2: SKILL.md + commands-reference.md** @@ -1989,7 +1989,7 @@ Remove all `dependsOn` content. - [ ] **Step 4: gotchas.md — new entries** -Add, each tagged `(since v0.56.0)`: +Add, each tagged `(since v0.57.0)`: - orchestrator support dropped; `flow list` hides legacy flows (shows a count). - `--component-id` removed from all flow subcommands. - old `dependsOn` template is invalid; use `phases[].next[].goto` + conditions. @@ -2003,7 +2003,7 @@ Mark the old "flow default-component differs between subcommands" gotcha as ```bash git add plugins/kbagent/ -git commit -m "docs(plugin): sync flow surface to conditional flows (0.56.0)" +git commit -m "docs(plugin): sync flow surface to conditional flows (0.57.0)" ``` --- @@ -2022,15 +2022,15 @@ Update any flow mention to conditional flows; drop `--component-id`. - [ ] **Step 2: Bump version** -In `pyproject.toml`, change `version = "0.55.0"` to `version = "0.56.0"`. +In `pyproject.toml`, change `version = "0.55.0"` to `version = "0.57.0"`. - [ ] **Step 3: Add changelog entry** -In `src/keboola_agent_cli/changelog.py`, add a `"0.56.0"` key at the TOP of +In `src/keboola_agent_cli/changelog.py`, add a `"0.57.0"` key at the TOP of `CHANGELOG` (newest-first) with a breaking-change callout, e.g.: ```python - "0.56.0": [ + "0.57.0": [ "BREAKING: `flow` command group now targets conditional flows " "(`keboola.flow`) only; `keboola.orchestrator` support is dropped. " "`--component-id` removed from every `flow` subcommand and from the " @@ -2048,7 +2048,7 @@ In `src/keboola_agent_cli/changelog.py`, add a `"0.56.0"` key at the TOP of - [ ] **Step 4: Sync plugin version** Run: `make version-sync` -Expected: `plugin.json` / `marketplace.json` updated to 0.56.0. +Expected: `plugin.json` / `marketplace.json` updated to 0.57.0. - [ ] **Step 5: Full check suite** @@ -2065,7 +2065,7 @@ Expected: PASS or clean CF-disabled skip. ```bash git add README.md pyproject.toml src/keboola_agent_cli/changelog.py plugins/kbagent/.claude-plugin/ .claude-plugin/ -git commit -m "release: 0.56.0 -- conditional flow support, drop orchestrator" +git commit -m "release: 0.57.0 -- conditional flow support, drop orchestrator" ``` --- diff --git a/docs/superpowers/specs/2026-06-04-conditional-flow-support-design.md b/docs/superpowers/specs/2026-06-04-conditional-flow-support-design.md index 77e3bae1..63ae5b75 100644 --- a/docs/superpowers/specs/2026-06-04-conditional-flow-support-design.md +++ b/docs/superpowers/specs/2026-06-04-conditional-flow-support-design.md @@ -1,7 +1,7 @@ # Design: Conditional Flow (`keboola.flow`) support in kbagent — drop `keboola.orchestrator` **Linear issue:** AJDA-2813 "CF: add support in new CLI" -**Target release:** 0.56.0 (one breaking release) +**Target release:** 0.57.0 (one breaking release) **Status:** design approved (subagent-driven; decisions recorded below) **Date:** 2026-06-04 @@ -131,7 +131,7 @@ should review them. | D10 | Behavior of `update_flow` validation. | Validation runs on the **merged** result (fetch current body when only one of phases/tasks supplied), preserving today's merge-aware behavior. | Matches issue Phase 2 and current code; avoids validating a half-config. | | D11 | `flow detail` JSON output. | **Full-body passthrough unchanged.** Only the **human** rendering is rewritten (per-phase transitions, task-type badges, retry). | Stable machine contract; agents already consume the raw body. | | D12 | `flow schema --full`. | Add `--full` to dump the **bundled JSON schema verbatim**; default prints the YAML template. JSON mode (`--json`) of `--full` returns the parsed schema object. | Agents need the exact contract; humans need a copy-paste template. | -| D13 | Removing `INVALID_FLOW_DAG` from `ErrorCode`. | **Remove** it and add `INVALID_FLOW_DEFINITION`. Grep confirmed references are only in this repo (errors.py, flow_service.py, changelog.py history, tests, docs) — no external wire consumers known. | Per coding-convention note "renaming/removing a code = major bump"; we accept this as part of the single 0.56.0 breaking release and changelog it loudly. | +| D13 | Removing `INVALID_FLOW_DAG` from `ErrorCode`. | **Remove** it and add `INVALID_FLOW_DEFINITION`. Grep confirmed references are only in this repo (errors.py, flow_service.py, changelog.py history, tests, docs) — no external wire consumers known. | Per coding-convention note "renaming/removing a code = major bump"; we accept this as part of the single 0.57.0 breaking release and changelog it loudly. | | D14 | `component_id` on REST models. | **Drop** `component_id` from `FlowCreate`/`FlowUpdate`/`FlowSchedule` and from query params on `detail`/`delete`/`list_schedules`/`remove_schedule`. Keep URL paths stable. | Issue Phase 4; CF is the only component now. | | D15 | Service signatures. | **Remove** `component_id` from all 8 service methods; hardcode `FLOW_COMPONENT_ID = "keboola.flow"`. Scheduler `target.componentId` is always `keboola.flow`. | Issue Phase 2. Reduces a whole class of "wrong default component" bugs. | | D16 | `notification` / `variable` task validation depth. | Rely on Draft7 structural validation for their internal shape; semantic layer only checks the cross-cutting rules (unique ids, phase refs, enabled-task-per-phase). | The schema already encodes their structure; re-implementing it in Python would drift. | @@ -308,14 +308,14 @@ flow validate --file @flow.yaml (offline) matrix); `SKILL.md` + `references/commands-reference.md`; full rewrite of `references/flow-workflow.md` (CF template, conditions cookbook, validate-before-push loop, `job run --component-id keboola.flow` to execute); `references/gotchas.md` -new entries tagged `(since v0.56.0)` (orchestrator dropped, `--component-id` +new entries tagged `(since v0.57.0)` (orchestrator dropped, `--component-id` removed, old `dependsOn` template invalid, `INVALID_FLOW_DAG` → `INVALID_FLOW_DEFINITION`, **string ids**), and mark the old default-component gotcha resolved; `README.md` if flows mentioned. ## 10. Release -Bump `pyproject.toml` → `0.56.0`; add `changelog.py` entry with an explicit +Bump `pyproject.toml` → `0.57.0`; add `changelog.py` entry with an explicit **breaking-change** callout (orchestrator dropped, `--component-id` removed, `INVALID_FLOW_DAG` → `INVALID_FLOW_DEFINITION`, CF schema validation, string ids); `make version-sync`; `make check`; `make test-e2e`. diff --git a/plugins/kbagent/.claude-plugin/plugin.json b/plugins/kbagent/.claude-plugin/plugin.json index d84bb456..f7884767 100644 --- a/plugins/kbagent/.claude-plugin/plugin.json +++ b/plugins/kbagent/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "kbagent", - "version": "0.56.0", + "version": "0.57.0", "description": "AI-friendly interface to Keboola Connection projects — explore configs, jobs, lineage, call MCP tools, manage dev branches, and debug SQL in workspaces", "author": { "name": "Keboola", diff --git a/plugins/kbagent/agents/keboola-expert.md b/plugins/kbagent/agents/keboola-expert.md index f4e9eadb..6d408a8f 100644 --- a/plugins/kbagent/agents/keboola-expert.md +++ b/plugins/kbagent/agents/keboola-expert.md @@ -83,7 +83,7 @@ a critical failure. | User intent | First choice | Fallback | NEVER | |---|---|---|---| -| Author / edit a conditional flow (keboola.flow) | `kbagent flow validate --file @flow.yaml --project ALIAS` (fetches live schema; loop until clean) then `kbagent flow new`/`flow update --file` | fetch `flow detail`, merge phases/tasks locally, re-validate, push | `--component-id` (removed 0.56.0); integer ids (ids are STRINGS); `dependsOn` (use `next[].goto` + conditions); `keboola.orchestrator` (dropped 0.56.0); assuming `flow schema --full` works offline (now needs `--project`) | +| Author / edit a conditional flow (keboola.flow) | `kbagent flow validate --file @flow.yaml --project ALIAS` (fetches live schema; loop until clean) then `kbagent flow new`/`flow update --file` | fetch `flow detail`, merge phases/tasks locally, re-validate, push | `--component-id` (removed 0.57.0); integer ids (ids are STRINGS); `dependsOn` (use `next[].goto` + conditions); `keboola.orchestrator` (dropped 0.57.0); assuming `flow schema --full` works offline (now needs `--project`) | | Schedule flow | `kbagent flow schedule --cron ... [--timezone]` | `tool call create_flow_schedule` | raw REST to `/storage/configurations/keboola.scheduler` | | Create Snowflake transformation | `kbagent config new --component-id keboola.snowflake-transformation --name N --project P --push --no-files` (0.33.0+; one-shot, no scaffold, body defaults to `{}` and validation auto-skips for empty shell -- then `config update --set ...` to fill in script) **or** `kbagent config new --component-id keboola.snowflake-transformation --project P --output-dir D` + `config update --set ...` (scaffold-then-patch) | `tool call create_sql_transformation` (lower schema, avoids the MCP `create_config` Snowflake refusal) | `tool call create_config` (refuses keboola.snowflake-transformation) -- note: `config new --push` does NOT inherit this refusal because it wraps the raw Storage API directly | | Update SQL transformation body (script[]) | `kbagent config update --project P --component-id keboola.snowflake-transformation --config-id K --configuration @body.json` (0.28.0+ auto-normalizes string `script` to array; SQL gets statement-level split, Python/R gets `[script]` wrap; envelope's `normalizations: [...]` records every change. 0.31.0+ also re-splits multi-statement LIST elements -- closes the #274 ODBC `statement count 2 vs desired 1` crash that survives the 0.28.0 string fix) | -- | `tool call update_sql_transformation` -- still vulnerable to BOTH the #245 string-vs-array AND #274 list-element runtime crashes because it pushes raw to Storage API; raw `PUT /v2/storage/components/.../configs/...` -- same trap | @@ -161,7 +161,7 @@ read it when a trigger fires. Each `(X.Y.Z+)` tag is the version floor. **Flow / config edits** -- **Conditional flows only (since 0.56.0)**: `flow` targets `keboola.flow`; +- **Conditional flows only (since 0.57.0)**: `flow` targets `keboola.flow`; `keboola.orchestrator` is dropped and `--component-id` is removed from every `flow` subcommand. IDs are **strings**; phases use `next[].goto` (a phase id or `null` to end) + optional `condition`; tasks are typed (`job`/`notification`/ diff --git a/plugins/kbagent/skills/kbagent/references/commands-reference.md b/plugins/kbagent/skills/kbagent/references/commands-reference.md index 99f92011..e37e6efe 100644 --- a/plugins/kbagent/skills/kbagent/references/commands-reference.md +++ b/plugins/kbagent/skills/kbagent/references/commands-reference.md @@ -204,7 +204,7 @@ Requires the project to be added with its **master ('owner') Storage API token** - `kai history [--project NAME] [--limit N]` -- list recent Kai chat sessions (default limit: 10) ## Flows (Conditional Flows -- keboola.flow only) -> Since 0.56.0 the `flow` group targets `keboola.flow` (Conditional Flows) ONLY; `keboola.orchestrator` is dropped and `--component-id` is removed from every subcommand. IDs are **strings**; phases use `next[].goto` (a phase id or `null`) + optional `condition`; tasks are typed (`job`/`notification`/`variable`). The old `dependsOn` template is invalid. Execute a flow with `kbagent job run --component-id keboola.flow --config-id ID`. See `flow-workflow.md`. +> Since 0.57.0 the `flow` group targets `keboola.flow` (Conditional Flows) ONLY; `keboola.orchestrator` is dropped and `--component-id` is removed from every subcommand. IDs are **strings**; phases use `next[].goto` (a phase id or `null`) + optional `condition`; tasks are typed (`job`/`notification`/`variable`). The old `dependsOn` template is invalid. Execute a flow with `kbagent job run --component-id keboola.flow --config-id ID`. See `flow-workflow.md`. - `flow list [--project NAME] [--branch ID] [--with-schedules]` -- list conditional flows (keboola.flow) across one or all projects. Legacy keboola.orchestrator configs are NOT listed; their total appears as `legacy_orchestrator_count` (+ a warning). `--with-schedules` enriches each row with `schedules: [{schedule_id, cron, timezone, enabled}, ...]` via one extra keboola.scheduler list call per project (not per flow) - `flow detail --project NAME --flow-id ID [--branch ID]` -- full phase/task breakdown; per-phase transitions (`→ goto [condition | default]`), typed-task badges, retry info; JSON is the raw body unchanged - `flow schema [--full --project NAME]` -- plain form prints the offline conditional-flow YAML template (string ids, `next[].goto`, typed tasks). `--full` fetches and dumps the **live** JSON Schema from the stack (AI Service `configurationSchema` for `keboola.flow`) and **requires `--project`** -- the schema is no longer bundled diff --git a/plugins/kbagent/skills/kbagent/references/flow-workflow.md b/plugins/kbagent/skills/kbagent/references/flow-workflow.md index f20ace74..a6a2052c 100644 --- a/plugins/kbagent/skills/kbagent/references/flow-workflow.md +++ b/plugins/kbagent/skills/kbagent/references/flow-workflow.md @@ -1,6 +1,6 @@ # Flow Workflow (Conditional Flows / keboola.flow) -> **Since v0.56.0:** the `flow` command group targets **`keboola.flow` +> **Since v0.57.0:** the `flow` command group targets **`keboola.flow` > (Conditional Flows) ONLY**. `keboola.orchestrator` support was dropped and > `--component-id` was removed from every subcommand. The old `dependsOn` > phase-DAG template is **invalid**. IDs are **strings**. diff --git a/plugins/kbagent/skills/kbagent/references/gotchas.md b/plugins/kbagent/skills/kbagent/references/gotchas.md index ed6ce330..fa04d5d5 100644 --- a/plugins/kbagent/skills/kbagent/references/gotchas.md +++ b/plugins/kbagent/skills/kbagent/references/gotchas.md @@ -2097,40 +2097,40 @@ CLI hides via its four-bucket response, but they matter when interpreting result subdirectory and there is no risk of name collisions. Override with `--output DIR` if you need a custom location. -## Flow: conditional flows only; `--component-id` removed (since v0.56.0) +## Flow: conditional flows only; `--component-id` removed (since v0.57.0) -- **RESOLVED (since v0.56.0):** the old foot-gun where `flow new` defaulted to +- **RESOLVED (since v0.57.0):** the old foot-gun where `flow new` defaulted to `keboola.flow` but `flow detail/update/delete/schedule/...` defaulted to `keboola.orchestrator` is **gone**. The `flow` group now targets the single component `keboola.flow`, and `--component-id` has been **removed** from every `flow` subcommand. Passing it errors with "No such option". -- **`keboola.orchestrator` is dropped (since v0.56.0).** `flow list` does NOT +- **`keboola.orchestrator` is dropped (since v0.57.0).** `flow list` does NOT list orchestrator configs; it reports their total as `legacy_orchestrator_count` (+ a warning) so you can see why a legacy flow "disappeared". There is no migration command (cross-component migration is out of scope). -- **IDs are STRINGS (since v0.56.0).** `phase.id`, `task.id`, `next.id`, +- **IDs are STRINGS (since v0.57.0).** `phase.id`, `task.id`, `next.id`, `task.phase`, and `goto` are all JSON strings (`goto` is `string | null`). Integer ids fail Draft7 validation and are rejected with `INVALID_FLOW_DEFINITION`. -- **The old `dependsOn` phase-DAG template is invalid (since v0.56.0).** Phases +- **The old `dependsOn` phase-DAG template is invalid (since v0.57.0).** Phases use `next[].goto` (a phase id or `null` to end) with an optional `condition`; a phase with conditional transitions must end with a default (condition-less) transition. Tasks are typed (`job`/`notification`/`variable`). -- **`INVALID_FLOW_DAG` was renamed to `INVALID_FLOW_DEFINITION` (since v0.56.0).** +- **`INVALID_FLOW_DAG` was renamed to `INVALID_FLOW_DEFINITION` (since v0.57.0).** Update any code/string matching on the old error code. -- **Validation (since v0.56.0):** `kbagent flow validate --file @flow.yaml [--project ALIAS]`. +- **Validation (since v0.57.0):** `kbagent flow validate --file @flow.yaml [--project ALIAS]`. With `--project` it fetches the **live** JSON Schema from the stack and runs full structural + semantic checks; without `--project` it runs semantic-only and adds a note that structural validation was skipped (no schema source). Exit 0 valid, exit 2 on errors. Use it in a tight loop before `flow new`/`flow update`. -- **Schema is fetched live from the stack, NOT bundled (since v0.56.0).** The +- **Schema is fetched live from the stack, NOT bundled (since v0.57.0).** The conditional-flow JSON Schema is served by the stack's component registry and read at runtime via the AI Service `configurationSchema` for `keboola.flow` (the same path `config new --push` uses). There is nothing vendored, pinned, or to re-sync. `flow schema --full` therefore **requires `--project`** (plain `flow schema` is still the offline YAML template). -- **Graceful semantic-only degradation (since v0.56.0).** If the live schema +- **Graceful semantic-only degradation (since v0.57.0).** If the live schema fetch fails (network error, or the AI Service returns no `configurationSchema`), `flow new`/`flow update`/`flow validate --project` do **not** block: structural validation is skipped, the semantic checks still run (Storage does not validate diff --git a/plugins/kbagent/skills/kbagent/references/schedule-workflow.md b/plugins/kbagent/skills/kbagent/references/schedule-workflow.md index 0be08cdd..f6170f59 100644 --- a/plugins/kbagent/skills/kbagent/references/schedule-workflow.md +++ b/plugins/kbagent/skills/kbagent/references/schedule-workflow.md @@ -56,7 +56,7 @@ Each row has every field you need for a spreadsheet or dashboard: } ``` -`schedule list` returns every `keboola.scheduler` config regardless of its target component, so legacy schedules may still show `"parent_component_id": "keboola.orchestrator"` -- those targets are no longer manageable via `kbagent flow` (orchestrator support was dropped in v0.56.0), but the schedules themselves list and audit normally. +`schedule list` returns every `keboola.scheduler` config regardless of its target component, so legacy schedules may still show `"parent_component_id": "keboola.orchestrator"` -- those targets are no longer manageable via `kbagent flow` (orchestrator support was dropped in v0.57.0), but the schedules themselves list and audit normally. ## Inspect a single schedule diff --git a/pyproject.toml b/pyproject.toml index 52e81752..805b95a4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "keboola-agent-cli" -version = "0.56.0" +version = "0.57.0" description = "AI-friendly CLI for managing Keboola projects" readme = "README.md" requires-python = ">=3.12" diff --git a/src/keboola_agent_cli/commands/context.py b/src/keboola_agent_cli/commands/context.py index 73bf7ba4..1c79983e 100644 --- a/src/keboola_agent_cli/commands/context.py +++ b/src/keboola_agent_cli/commands/context.py @@ -564,7 +564,7 @@ kbagent feature user-remove --project ALIAS --email EMAIL --feature NAME [--dry-run] [--yes] Per-user features (GET/POST/DELETE /manage/users/{{email}}/features). -### Flows (Conditional Flows -- keboola.flow only; orchestrator dropped in 0.56.0) +### Flows (Conditional Flows -- keboola.flow only; orchestrator dropped in 0.57.0) kbagent flow list [--project NAME] [--branch ID] [--with-schedules] List conditional flows (keboola.flow) across projects. Legacy keboola.orchestrator diff --git a/src/keboola_agent_cli/commands/flow.py b/src/keboola_agent_cli/commands/flow.py index 3523079f..5e42bf65 100644 --- a/src/keboola_agent_cli/commands/flow.py +++ b/src/keboola_agent_cli/commands/flow.py @@ -151,7 +151,7 @@ def flow_list( """List conditional flows (keboola.flow) across projects. Legacy keboola.orchestrator flows are NOT listed (orchestrator support was - dropped in 0.56.0); a count of any that exist is shown as a warning. + dropped in 0.57.0); a count of any that exist is shown as a warning. With ``--with-schedules`` each row includes a ``schedules`` list of ``{schedule_id, cron, timezone, enabled}`` entries. Flows without @@ -237,7 +237,7 @@ def _format_flows_table( if legacy: formatter.warning( f"{legacy} legacy flow(s) are not shown " - f"(Legacy Flows were dropped in 0.56.0; migrate them to Conditional Flows)." + f"(Legacy Flows were dropped in 0.57.0; migrate them to Conditional Flows)." ) diff --git a/src/keboola_agent_cli/errors.py b/src/keboola_agent_cli/errors.py index 4ab6d770..6e02005b 100644 --- a/src/keboola_agent_cli/errors.py +++ b/src/keboola_agent_cli/errors.py @@ -98,7 +98,7 @@ class ErrorCode(StrEnum): # Flow (new in 0.22.0) SCHEDULE_DELETE_FAILED = "SCHEDULE_DELETE_FAILED" - # Conditional-flow validation (replaces INVALID_FLOW_DAG; since 0.56.0) + # Conditional-flow validation (replaces INVALID_FLOW_DAG; since 0.57.0) INVALID_FLOW_DEFINITION = "INVALID_FLOW_DEFINITION" # Data apps (new in 0.27.0) diff --git a/src/keboola_agent_cli/services/flow_service.py b/src/keboola_agent_cli/services/flow_service.py index 6fbec9ca..4e101b72 100644 --- a/src/keboola_agent_cli/services/flow_service.py +++ b/src/keboola_agent_cli/services/flow_service.py @@ -3,7 +3,7 @@ Provides CRUD for keboola.flow (Conditional Flow) configurations, plus schedule bind/unbind via keboola.scheduler component configs. -keboola.orchestrator support was dropped in 0.56.0; this service targets the +keboola.orchestrator support was dropped in 0.57.0; this service targets the single component keboola.flow. Legacy orchestrator configs are still counted (not listed) so the CLI can warn users why a flow "disappeared". @@ -194,7 +194,7 @@ def list_flows( Only ``keboola.flow`` configs are returned. Legacy ``keboola.orchestrator`` configs are counted (not listed) and surfaced as ``legacy_orchestrator_count`` so the CLI can warn users why a flow - "disappeared" (orchestrator support was dropped in 0.56.0). + "disappeared" (orchestrator support was dropped in 0.57.0). When ``with_schedules`` is True, each flow row is enriched with a ``schedules`` list pulled from the same project's diff --git a/tests/test_agent_prompt.py b/tests/test_agent_prompt.py index d31486b4..76c8e9c1 100644 --- a/tests/test_agent_prompt.py +++ b/tests/test_agent_prompt.py @@ -110,7 +110,7 @@ def test_rule_present(self, agent_body: str, needle: str, why: str) -> None: INLINE_GOTCHAS = [ - # Conditional flows: validate-before-push + INVALID_FLOW_DEFINITION (since 0.56.0) + # Conditional flows: validate-before-push + INVALID_FLOW_DEFINITION (since 0.57.0) ("INVALID_FLOW_DEFINITION", "conditional-flow validation error code"), # Snowflake transformation scaffolding refusal by MCP create_config ("keboola.snowflake-transformation", "MCP create_config refuses this component"), diff --git a/tests/test_server_router_calls.py b/tests/test_server_router_calls.py index feb604d8..8ce43e66 100644 --- a/tests/test_server_router_calls.py +++ b/tests/test_server_router_calls.py @@ -625,7 +625,7 @@ def test_reference_data_delete_route(tmp_path: Path) -> None: # --------------------------------------------------------------------------- # flows.py POST /flows/validate + GET /flows/{project}/schema -# New in 0.56.0 -- mirror `flow validate` / `flow schema --full`. +# New in 0.57.0 -- mirror `flow validate` / `flow schema --full`. # --------------------------------------------------------------------------- _CF_PHASES = [{"id": "p1", "name": "Extract", "next": [{"id": "n1", "goto": None}]}] diff --git a/uv.lock b/uv.lock index e92c5b91..6f6596f2 100644 --- a/uv.lock +++ b/uv.lock @@ -580,7 +580,7 @@ wheels = [ [[package]] name = "keboola-agent-cli" -version = "0.56.0" +version = "0.57.0" source = { editable = "." } dependencies = [ { name = "croniter" }, From d0f251cf91e9d1dadfb6d99e731c249721b83c3d Mon Sep 17 00:00:00 2001 From: Petr Date: Thu, 4 Jun 2026 23:36:19 +0200 Subject: [PATCH 7/8] refactor(flow): FlowSchemaFetch dataclass + close two test gaps Addresses self-review findings on the conditional-flows work: - Replace the (schema, reason) tuple from FlowService._fetch_flow_schema / fetch_flow_schema with a frozen FlowSchemaFetch dataclass (CONTRIBUTING.md forbids new semantically-distinct tuple returns). Updates all six src call sites (flow_service, flows router, flow command) and the tuple mocks in test_flow_service / test_flow_cli / test_server_router_calls. - Add a structural integer-id rejection test (test_flow_validation): an integer id is rejected only when the live schema is supplied; without a schema the semantic checks coerce it and stay silent. - Add router-level tests that POST/PATCH /flows/{project} call create_flow / update_flow WITHOUT component_id, proving the orchestrator-drop is enforced at the REST layer (FlowCreate/FlowUpdate silently drop a legacy component_id). --- src/keboola_agent_cli/commands/flow.py | 8 ++- src/keboola_agent_cli/server/routers/flows.py | 13 ++-- .../services/flow_service.py | 68 ++++++++++++------- tests/test_flow_cli.py | 15 ++-- tests/test_flow_service.py | 18 ++--- tests/test_flow_validation.py | 19 ++++++ tests/test_server_router_calls.py | 66 ++++++++++++++++-- 7 files changed, 154 insertions(+), 53 deletions(-) diff --git a/src/keboola_agent_cli/commands/flow.py b/src/keboola_agent_cli/commands/flow.py index 5e42bf65..052d5a34 100644 --- a/src/keboola_agent_cli/commands/flow.py +++ b/src/keboola_agent_cli/commands/flow.py @@ -397,7 +397,7 @@ def flow_schema( service = get_service(ctx, "flow_service") try: - schema, reason = service.fetch_flow_schema(project) + fetch = service.fetch_flow_schema(project) except ConfigError as exc: formatter.error(message=exc.message, error_code=ErrorCode.CONFIG_ERROR) raise typer.Exit(code=5) from None @@ -405,6 +405,7 @@ def flow_schema( formatter.error(message=exc.message, error_code=exc.error_code, retryable=exc.retryable) raise typer.Exit(code=map_error_to_exit_code(exc)) from None + schema, reason = fetch.schema, fetch.reason if schema is None: formatter.error( message=f"Could not fetch the conditional-flow schema: {reason}", @@ -482,12 +483,13 @@ def flow_validate( if project: service = get_service(ctx, "flow_service") try: - schema, reason = service.fetch_flow_schema(project) + fetch = service.fetch_flow_schema(project) except ConfigError as exc: formatter.error(message=exc.message, error_code=ErrorCode.CONFIG_ERROR) raise typer.Exit(code=5) from None + schema = fetch.schema if schema is None: - notes.append(f"structural schema validation skipped: {reason}") + notes.append(f"structural schema validation skipped: {fetch.reason}") else: notes.append( "structural schema validation skipped: no schema source " diff --git a/src/keboola_agent_cli/server/routers/flows.py b/src/keboola_agent_cli/server/routers/flows.py index a9725eff..a9129cb5 100644 --- a/src/keboola_agent_cli/server/routers/flows.py +++ b/src/keboola_agent_cli/server/routers/flows.py @@ -61,9 +61,10 @@ def validate( schema: dict[str, Any] | None = None notes: list[str] = [] if body.project: - schema, reason = registry.flow.fetch_flow_schema(body.project) + fetch = registry.flow.fetch_flow_schema(body.project) + schema = fetch.schema if schema is None: - notes.append(f"structural schema validation skipped: {reason}") + notes.append(f"structural schema validation skipped: {fetch.reason}") else: notes.append( "structural schema validation skipped: no schema source " @@ -80,13 +81,13 @@ def validate( @router.get("/{project}/schema", summary="Fetch the live conditional-flow JSON Schema") def get_schema(project: str, registry: ServiceRegistry = Depends(get_registry)) -> dict[str, Any]: """Dump the keboola.flow JSON Schema served by the stack. Mirrors `kbagent flow schema --full`.""" - schema, reason = registry.flow.fetch_flow_schema(project) - if schema is None: + fetch = registry.flow.fetch_flow_schema(project) + if fetch.schema is None: raise HTTPException( status_code=502, - detail=f"Could not fetch the conditional-flow schema: {reason}", + detail=f"Could not fetch the conditional-flow schema: {fetch.reason}", ) - return {"format": "json-schema", "schema": schema} + return {"format": "json-schema", "schema": fetch.schema} @router.get("", summary="List flows across projects") diff --git a/src/keboola_agent_cli/services/flow_service.py b/src/keboola_agent_cli/services/flow_service.py index 4e101b72..7b7fd059 100644 --- a/src/keboola_agent_cli/services/flow_service.py +++ b/src/keboola_agent_cli/services/flow_service.py @@ -17,6 +17,7 @@ import json import logging from collections.abc import Callable +from dataclasses import dataclass from typing import Any from ..ai_client import AiServiceClient @@ -35,6 +36,20 @@ AiClientFactory = Callable[[str, str], AiServiceClient] +@dataclass(frozen=True) +class FlowSchemaFetch: + """Outcome of fetching the live keboola.flow JSON Schema from the stack. + + ``schema`` holds the JSON Schema dict on success and is ``None`` when it + could not be obtained; ``reason`` explains the failure (``None`` on + success). A ``None`` schema must NOT block a write -- callers degrade to + semantic-only validation and surface ``reason`` as a warning. + """ + + schema: dict[str, Any] | None + reason: str | None + + def default_ai_client_factory(stack_url: str, token: str) -> AiServiceClient: """Default factory: build an ``AiServiceClient`` for the given project.""" return AiServiceClient(stack_url=stack_url, token=token) @@ -135,47 +150,50 @@ def __init__( # ── schema fetch ───────────────────────────────────────────────── - def _fetch_flow_schema( - self, project: ProjectConfig - ) -> tuple[dict[str, Any] | None, str | None]: + def _fetch_flow_schema(self, project: ProjectConfig) -> FlowSchemaFetch: """Fetch the live keboola.flow JSON Schema from the AI Service. - Returns ``(schema, None)`` on success, or ``(None, reason)`` when the - schema cannot be obtained (network error, KeboolaApiError, malformed or - empty schema). A ``None`` schema must NOT block a write -- the caller - degrades to semantic-only validation and surfaces ``reason`` as a - warning. + Returns a ``FlowSchemaFetch`` with ``schema`` set on success, or + ``schema=None`` + a ``reason`` when the schema cannot be obtained + (network error, KeboolaApiError, malformed or empty schema). A ``None`` + schema must NOT block a write -- the caller degrades to semantic-only + validation and surfaces ``reason`` as a warning. """ ai_client = self._ai_client_factory(project.stack_url, project.token) try: raw = ai_client.get_component_detail(FLOW_COMPONENT_ID) except KeboolaApiError as exc: - return None, exc.message + return FlowSchemaFetch(schema=None, reason=exc.message) except Exception as exc: # Intentionally broad: ANY schema-fetch failure must degrade to # semantic-only validation, never block the write. Narrowing to # OSError-style transport errors would miss httpx exceptions # (httpx.HTTPError does not subclass OSError) and re-raise them. - return None, str(exc) + return FlowSchemaFetch(schema=None, reason=str(exc)) finally: ai_client.close() try: detail = ComponentDetail(**raw) except (TypeError, ValueError) as exc: - return None, f"component detail could not be parsed ({exc})" + return FlowSchemaFetch( + schema=None, reason=f"component detail could not be parsed ({exc})" + ) schema = detail.configuration_schema if not schema: - return None, "AI Service returned no configurationSchema for keboola.flow" - return schema, None + return FlowSchemaFetch( + schema=None, reason="AI Service returned no configurationSchema for keboola.flow" + ) + return FlowSchemaFetch(schema=schema, reason=None) - def fetch_flow_schema(self, alias: str) -> tuple[dict[str, Any] | None, str | None]: + def fetch_flow_schema(self, alias: str) -> FlowSchemaFetch: """Public schema fetch for a project alias (used by ``flow validate --project`` and ``flow schema --full --project``). - Returns ``(schema, None)`` on success or ``(None, reason)`` on any - failure -- the caller decides how to surface the reason. + Returns a ``FlowSchemaFetch`` (``schema`` set on success, or + ``schema=None`` + ``reason`` on any failure) -- the caller decides how + to surface the reason. """ projects = self.resolve_projects([alias]) project = projects[alias] @@ -373,12 +391,12 @@ def create_flow( project = projects[alias] effective_branch = branch_id or project.active_branch_id - schema, schema_reason = self._fetch_flow_schema(project) + fetch = self._fetch_flow_schema(project) warnings: list[str] = [] - if schema is None: - warnings.append(f"structural schema validation skipped: {schema_reason}") + if fetch.schema is None: + warnings.append(f"structural schema validation skipped: {fetch.reason}") - definition_errors = validate_conditional_flow(phases, tasks, schema) + definition_errors = validate_conditional_flow(phases, tasks, fetch.schema) if definition_errors: raise KeboolaApiError( message="Flow definition is invalid: " + "; ".join(definition_errors), @@ -450,11 +468,13 @@ def update_flow( merged_phases = phases if phases is not None else current_body.get("phases", []) merged_tasks = tasks if tasks is not None else current_body.get("tasks", []) - schema, schema_reason = self._fetch_flow_schema(project) - if schema is None: - warnings.append(f"structural schema validation skipped: {schema_reason}") + fetch = self._fetch_flow_schema(project) + if fetch.schema is None: + warnings.append(f"structural schema validation skipped: {fetch.reason}") - definition_errors = validate_conditional_flow(merged_phases, merged_tasks, schema) + definition_errors = validate_conditional_flow( + merged_phases, merged_tasks, fetch.schema + ) if definition_errors: raise KeboolaApiError( message="Flow definition is invalid: " + "; ".join(definition_errors), diff --git a/tests/test_flow_cli.py b/tests/test_flow_cli.py index 9f14b8d3..e84e693e 100644 --- a/tests/test_flow_cli.py +++ b/tests/test_flow_cli.py @@ -18,6 +18,7 @@ from keboola_agent_cli.config_store import ConfigStore from keboola_agent_cli.errors import ConfigError, KeboolaApiError from keboola_agent_cli.models import ProjectConfig +from keboola_agent_cli.services.flow_service import FlowSchemaFetch runner = CliRunner() TEST_TOKEN = "999-token-abc" @@ -280,7 +281,7 @@ def test_schema_full_without_project_errors(self, tmp_path: Path) -> None: def test_schema_full_with_project_dumps_live_schema(self, tmp_path: Path) -> None: store = _setup_config(tmp_path, {"prod": {}}) mock_flow = MagicMock() - mock_flow.fetch_flow_schema.return_value = (_LIVE_SCHEMA, None) + mock_flow.fetch_flow_schema.return_value = FlowSchemaFetch(schema=_LIVE_SCHEMA, reason=None) result = _invoke(store, mock_flow, ["flow", "schema", "--full", "--project", "prod"]) assert result.exit_code == 0 assert "$schema" in result.output or "draft-07" in result.output @@ -289,7 +290,7 @@ def test_schema_full_with_project_dumps_live_schema(self, tmp_path: Path) -> Non def test_schema_full_with_project_json_mode(self, tmp_path: Path) -> None: store = _setup_config(tmp_path, {"prod": {}}) mock_flow = MagicMock() - mock_flow.fetch_flow_schema.return_value = (_LIVE_SCHEMA, None) + mock_flow.fetch_flow_schema.return_value = FlowSchemaFetch(schema=_LIVE_SCHEMA, reason=None) result = _invoke( store, mock_flow, ["--json", "flow", "schema", "--full", "--project", "prod"] ) @@ -300,7 +301,9 @@ def test_schema_full_with_project_json_mode(self, tmp_path: Path) -> None: def test_schema_full_fetch_failure_errors(self, tmp_path: Path) -> None: store = _setup_config(tmp_path, {"prod": {}}) mock_flow = MagicMock() - mock_flow.fetch_flow_schema.return_value = (None, "network down") + mock_flow.fetch_flow_schema.return_value = FlowSchemaFetch( + schema=None, reason="network down" + ) result = _invoke(store, mock_flow, ["flow", "schema", "--full", "--project", "prod"]) assert result.exit_code == 4 assert "network down" in result.output @@ -378,7 +381,7 @@ def test_validate_with_project_full_validation(self, tmp_path: Path) -> None: # Live schema fetched -> bad task type caught structurally (exit 2). store = _setup_config(tmp_path, {"prod": {}}) mock_flow = MagicMock() - mock_flow.fetch_flow_schema.return_value = (_LIVE_SCHEMA, None) + mock_flow.fetch_flow_schema.return_value = FlowSchemaFetch(schema=_LIVE_SCHEMA, reason=None) bad = _VALID_FLOW_YAML.replace("type: job", "type: nonsense") f = tmp_path / "bad.yaml" f.write_text(bad) @@ -394,7 +397,9 @@ def test_validate_with_project_fetch_failure_degrades(self, tmp_path: Path) -> N # Schema fetch fails -> semantic-only, valid flow still passes + a note. store = _setup_config(tmp_path, {"prod": {}}) mock_flow = MagicMock() - mock_flow.fetch_flow_schema.return_value = (None, "network down") + mock_flow.fetch_flow_schema.return_value = FlowSchemaFetch( + schema=None, reason="network down" + ) f = tmp_path / "flow.yaml" f.write_text(_VALID_FLOW_YAML) result = _invoke( diff --git a/tests/test_flow_service.py b/tests/test_flow_service.py index d5909372..b6918f85 100644 --- a/tests/test_flow_service.py +++ b/tests/test_flow_service.py @@ -276,24 +276,24 @@ def test_create_flow_fetch_failure_still_rejects_semantic_errors(): def test_fetch_flow_schema_success(): svc = _make_flow_service(MagicMock()) - schema, reason = svc.fetch_flow_schema("prod") - assert reason is None - assert schema and schema["required"] == ["phases", "tasks"] + fetch = svc.fetch_flow_schema("prod") + assert fetch.reason is None + assert fetch.schema and fetch.schema["required"] == ["phases", "tasks"] def test_fetch_flow_schema_empty_returns_reason(): svc = _make_flow_service(MagicMock(), ai_client=_make_ai_client(schema={})) - schema, reason = svc.fetch_flow_schema("prod") - assert schema is None - assert reason and "configurationSchema" in reason + fetch = svc.fetch_flow_schema("prod") + assert fetch.schema is None + assert fetch.reason and "configurationSchema" in fetch.reason def test_fetch_flow_schema_error_returns_reason(): ai = _make_ai_client(raise_exc=KeboolaApiError("nope", status_code=404, error_code="NOT_FOUND")) svc = _make_flow_service(MagicMock(), ai_client=ai) - schema, reason = svc.fetch_flow_schema("prod") - assert schema is None - assert reason == "nope" + fetch = svc.fetch_flow_schema("prod") + assert fetch.schema is None + assert fetch.reason == "nope" # --------------------------------------------------------------------------- diff --git a/tests/test_flow_validation.py b/tests/test_flow_validation.py index a1de1163..5def7676 100644 --- a/tests/test_flow_validation.py +++ b/tests/test_flow_validation.py @@ -140,6 +140,25 @@ def test_structural_error_not_reported_without_schema(): assert errors == [] +def test_structural_error_integer_id_with_schema(): + # IDs must be strings (the schema declares "id": {"type": "string"}). An + # integer id is a structural violation -- caught only when the live schema + # is supplied. Uses a task id (never referenced elsewhere) so the failure + # is purely structural, not a knock-on semantic missing-phase error. + tasks = _valid_tasks() + tasks[0]["id"] = 1 # integer instead of "task-1" + errors = validate_conditional_flow(_valid_phases(), tasks, _SCHEMA) + assert errors # rejected: non-string id flagged structurally + + +def test_integer_id_not_flagged_structurally_without_schema(): + # Same integer id, but no schema => structural check is skipped. Semantic + # checks coerce ids to str, so a non-referenced integer id raises nothing. + tasks = _valid_tasks() + tasks[0]["id"] = 1 + assert validate_conditional_flow(_valid_phases(), tasks, None) == [] + + # ── semantic checks (always run, schema or not) ─────────────────────────── diff --git a/tests/test_server_router_calls.py b/tests/test_server_router_calls.py index 8ce43e66..fb1983b3 100644 --- a/tests/test_server_router_calls.py +++ b/tests/test_server_router_calls.py @@ -35,6 +35,7 @@ from keboola_agent_cli.server import create_app from keboola_agent_cli.server.dependencies import ServiceRegistry, get_manage_token, get_registry +from keboola_agent_cli.services.flow_service import FlowSchemaFetch AUTH = {"Authorization": "Bearer test-token"} PROJECT = "my-proj" @@ -668,9 +669,8 @@ def test_flows_validate_without_project_is_semantic_only(tmp_path: Path) -> None def test_flows_validate_with_project_fetches_live_schema(tmp_path: Path) -> None: """`project` in body -> fetch_flow_schema(alias) is called and schema is applied.""" flow_svc = MagicMock() - flow_svc.fetch_flow_schema.return_value = ( - {"type": "object", "required": ["phases", "tasks"]}, - None, + flow_svc.fetch_flow_schema.return_value = FlowSchemaFetch( + schema={"type": "object", "required": ["phases", "tasks"]}, reason=None ) registry = _mock_registry(flow=flow_svc) app = _make_app_with_registry(tmp_path, registry) @@ -692,7 +692,9 @@ def test_flows_validate_with_project_fetches_live_schema(tmp_path: Path) -> None def test_flows_validate_schema_fetch_failure_degrades(tmp_path: Path) -> None: """Fetch failure -> semantic-only validation + skip reason in notes, never 5xx.""" flow_svc = MagicMock() - flow_svc.fetch_flow_schema.return_value = (None, "AI Service unreachable") + flow_svc.fetch_flow_schema.return_value = FlowSchemaFetch( + schema=None, reason="AI Service unreachable" + ) registry = _mock_registry(flow=flow_svc) app = _make_app_with_registry(tmp_path, registry) @@ -734,7 +736,9 @@ def test_flows_get_schema_returns_live_schema(tmp_path: Path) -> None: GET /flows/{project}/{config_id} (which would call get_flow_detail). """ flow_svc = MagicMock() - flow_svc.fetch_flow_schema.return_value = ({"type": "object"}, None) + flow_svc.fetch_flow_schema.return_value = FlowSchemaFetch( + schema={"type": "object"}, reason=None + ) registry = _mock_registry(flow=flow_svc) app = _make_app_with_registry(tmp_path, registry) @@ -750,7 +754,9 @@ def test_flows_get_schema_returns_live_schema(tmp_path: Path) -> None: def test_flows_get_schema_fetch_failure_is_502(tmp_path: Path) -> None: """Schema unavailable -> 502 with the reason (REST has no degrade path to offer).""" flow_svc = MagicMock() - flow_svc.fetch_flow_schema.return_value = (None, "no configurationSchema") + flow_svc.fetch_flow_schema.return_value = FlowSchemaFetch( + schema=None, reason="no configurationSchema" + ) registry = _mock_registry(flow=flow_svc) app = _make_app_with_registry(tmp_path, registry) @@ -759,3 +765,51 @@ def test_flows_get_schema_fetch_failure_is_502(tmp_path: Path) -> None: assert res.status_code == 502, res.text assert "no configurationSchema" in res.text + + +def test_flows_create_drops_component_id(tmp_path: Path) -> None: + """POST /flows/{project} calls create_flow WITHOUT component_id. + + Orchestrator support is dropped: FlowCreate has no component_id field, so + even a client that still sends one has it silently dropped (Pydantic + extra='ignore') and the service is never asked to target a component. + """ + flow_svc = MagicMock() + flow_svc.create_flow.return_value = {"id": "999", "name": "My Flow"} + registry = _mock_registry(flow=flow_svc) + app = _make_app_with_registry(tmp_path, registry) + + with TestClient(app) as client: + res = client.post( + f"/flows/{PROJECT}", + headers=AUTH, + json={ + "name": "My Flow", + "phases": _CF_PHASES, + "tasks": _CF_TASKS, + "component_id": "keboola.orchestrator", # legacy field -> dropped + }, + ) + + assert res.status_code == 200, res.text + flow_svc.create_flow.assert_called_once() + assert "component_id" not in flow_svc.create_flow.call_args.kwargs + + +def test_flows_update_drops_component_id(tmp_path: Path) -> None: + """PATCH /flows/{project}/{config_id} calls update_flow WITHOUT component_id.""" + flow_svc = MagicMock() + flow_svc.update_flow.return_value = {"id": "999", "name": "Renamed"} + registry = _mock_registry(flow=flow_svc) + app = _make_app_with_registry(tmp_path, registry) + + with TestClient(app) as client: + res = client.patch( + f"/flows/{PROJECT}/999", + headers=AUTH, + json={"name": "Renamed", "component_id": "keboola.orchestrator"}, + ) + + assert res.status_code == 200, res.text + flow_svc.update_flow.assert_called_once() + assert "component_id" not in flow_svc.update_flow.call_args.kwargs From 76a34ab9fe8ae20673b047ba152b6722cf5b7caa Mon Sep 17 00:00:00 2001 From: Petr Date: Fri, 5 Jun 2026 00:18:42 +0200 Subject: [PATCH 8/8] fix(flow): address fresh review findings (stale doc code, dead helper) - docs/build-your-own-client.md: INVALID_FLOW_DAG -> INVALID_FLOW_DEFINITION. The error code was renamed in this PR; the client-docs example still referenced the removed name (docs/ was outside the original rename sweep, which only grepped src/ + tests/). - Remove the dead _count_phases_tasks helper from flow_service.py: the conditional-flows rewrite orphaned it (zero src callers, exercised only by an isolated test) and it carried a banned tuple[int, int] return. Drops its two tests too. --- docs/build-your-own-client.md | 2 +- src/keboola_agent_cli/services/flow_service.py | 5 ----- tests/test_flow_service.py | 10 ---------- 3 files changed, 1 insertion(+), 16 deletions(-) diff --git a/docs/build-your-own-client.md b/docs/build-your-own-client.md index 5a3e75ab..42ac41b3 100644 --- a/docs/build-your-own-client.md +++ b/docs/build-your-own-client.md @@ -266,7 +266,7 @@ major bump). | 500 | `INTERNAL_ERROR`, `UNKNOWN_ERROR` | Bug in the server. File an issue. | Domain-specific codes (`DATA_APP_*`, `KAI_*`, `JOB_TIMEOUT_TERMINATED`, -`INVALID_FLOW_DAG`, …) appear alongside the generic ones when the failure +`INVALID_FLOW_DEFINITION`, …) appear alongside the generic ones when the failure is specific to a feature area. Treat them as informational refinement — the HTTP status is the contract. diff --git a/src/keboola_agent_cli/services/flow_service.py b/src/keboola_agent_cli/services/flow_service.py index 7b7fd059..d6354551 100644 --- a/src/keboola_agent_cli/services/flow_service.py +++ b/src/keboola_agent_cli/services/flow_service.py @@ -70,11 +70,6 @@ def _parse_configuration(raw: Any) -> dict[str, Any]: return raw or {} -def _count_phases_tasks(body: dict[str, Any]) -> tuple[int, int]: - """Return (phase_count, task_count) from a flow configuration body.""" - return len(body.get("phases", [])), len(body.get("tasks", [])) - - def _collect_schedules_by_parent( client: Any, branch_id: int | None ) -> dict[tuple[str, str], list[dict[str, Any]]]: diff --git a/tests/test_flow_service.py b/tests/test_flow_service.py index b6918f85..4a54a79c 100644 --- a/tests/test_flow_service.py +++ b/tests/test_flow_service.py @@ -10,7 +10,6 @@ from keboola_agent_cli.services.flow_service import ( FLOW_COMPONENT_ID, FlowService, - _count_phases_tasks, _parse_configuration, ) @@ -124,15 +123,6 @@ def test_none_returns_empty(self): assert _parse_configuration(None) == {} -class TestCountPhasesTasks: - def test_counts(self): - body = {"phases": [{"id": "a"}, {"id": "b"}], "tasks": [{"id": "1"}]} - assert _count_phases_tasks(body) == (2, 1) - - def test_empty(self): - assert _count_phases_tasks({}) == (0, 0) - - # --------------------------------------------------------------------------- # Component constant # ---------------------------------------------------------------------------