diff --git a/CLAUDE.md b/CLAUDE.md index 6b5c228e..b4323140 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -512,6 +512,10 @@ kbagent semantic-layer import --project P --file PATH [--model M] [--types T,T,. kbagent semantic-layer promote --from-project A --to-project B [--from-model M] [--to-model M] [--types T,T,...] [--dry-run] [--yes] kbagent semantic-layer build --project P [--model M] --tables T,T,... [--name N] [--dry-run] [--keep-on-failure] [--output PATH] kbagent semantic-layer token --encrypt --project P --component-id C +kbagent semantic-layer reference-data list --project P [--model M] +kbagent semantic-layer reference-data get --project P (--id ID | --dimension D) +kbagent semantic-layer reference-data set --project P [--model M] --dimension D --members-file PATH [--dataset-id T] [--description X] +kbagent semantic-layer reference-data delete --project P --id ID [--yes] # Alias: `kbagent sl ...` (hidden) is equivalent to `kbagent semantic-layer ...`. kbagent http get PATH [--timeout SECONDS] diff --git a/plugins/kbagent/skills/kbagent/SKILL.md b/plugins/kbagent/skills/kbagent/SKILL.md index de76f586..df05e50d 100644 --- a/plugins/kbagent/skills/kbagent/SKILL.md +++ b/plugins/kbagent/skills/kbagent/SKILL.md @@ -60,6 +60,8 @@ description: > DIM_METRIC_THRESHOLD, dangling metric FK, orphaned constraint, phantom field, AGG on STRING, SUM on PCT, deep validate, sl, kbagent sl, semantic layer wizard, sl-build, sl-add, sl-edit, + reference data, semantic-layer reference-data, chart of accounts, COA, + dimension members, account list, dimension data, member list, developer portal, dev-portal, apps-api, register component, vendor app, portal property, ui-options, encryption portal, defaultBucket portal, app icon, configurationSchema portal, publish component, deprecate component, @@ -297,6 +299,10 @@ When working inside a git repository or project directory, run `kbagent init` (o | Remove a constraint | `kbagent semantic-layer remove constraint --project PROJECT --name NAME` | | Remove a relationship. | `kbagent semantic-layer remove relationship --project PROJECT --name NAME` | | Remove a glossary term. | `kbagent semantic-layer remove glossary --project PROJECT --term TERM` | +| List reference-data records (dimension summaries; use ``get`` for members) | `kbagent semantic-layer reference-data list --project PROJECT` | +| Fetch one record (all members) by ``--id`` or by ``--dimension`` | `kbagent semantic-layer reference-data get --project PROJECT` | +| Create or replace a reference-data record (keyed by dimension) | `kbagent semantic-layer reference-data set --project PROJECT --dimension DIMENSION --members-file MEMBERS-FILE` | +| Delete a reference-data record by UUID (server-side soft-delete) | `kbagent semantic-layer reference-data delete --project PROJECT --id ID-` | | Encrypt the project's storage token for transformation `user_properties` | `kbagent sl token --project PROJECT --component-id COMPONENT-ID` | | Build a semantic-layer model from a list of storage tables (non-interactive) | `kbagent sl build --project PROJECT` | | Promote a model from one project to another (NEW + overwrite CHANGED; never deletes) | `kbagent sl promote --from-project FROM-PROJECT --to-project TO-PROJECT` | @@ -325,6 +331,10 @@ When working inside a git repository or project directory, run `kbagent init` (o | Remove a constraint | `kbagent sl remove constraint --project PROJECT --name NAME` | | Remove a relationship. | `kbagent sl remove relationship --project PROJECT --name NAME` | | Remove a glossary term. | `kbagent sl remove glossary --project PROJECT --term TERM` | +| List reference-data records (dimension summaries; use ``get`` for members) | `kbagent sl reference-data list --project PROJECT` | +| Fetch one record (all members) by ``--id`` or by ``--dimension`` | `kbagent sl reference-data get --project PROJECT` | +| Create or replace a reference-data record (keyed by dimension) | `kbagent sl reference-data set --project PROJECT --dimension DIMENSION --members-file MEMBERS-FILE` | +| Delete a reference-data record by UUID (server-side soft-delete) | `kbagent sl reference-data delete --project PROJECT --id ID-` | | GET an endpoint on the running kbagent serve | `kbagent http get ` | | POST to an endpoint on the running kbagent serve | `kbagent http post ` | | PATCH an endpoint on the running kbagent serve | `kbagent http patch ` | diff --git a/plugins/kbagent/skills/kbagent/references/commands-reference.md b/plugins/kbagent/skills/kbagent/references/commands-reference.md index 2072c7f3..048294ab 100644 --- a/plugins/kbagent/skills/kbagent/references/commands-reference.md +++ b/plugins/kbagent/skills/kbagent/references/commands-reference.md @@ -279,6 +279,15 @@ Manage Keboola metastore models -- datasets, metrics, relationships, constraints - `semantic-layer build --project P [--model M] --tables T,T,... [--name N] [--dry-run] [--keep-on-failure] [--output PATH]` -- non-interactive heuristic builder. **AI caveat**: the existing `ai_client` has no arbitrary-JSON endpoint, so `build` falls back to a deterministic heuristic synthesising one dataset + one COUNT(*) metric + one glossary entry per table (FQN derived; fields[] role-classified). Response carries `fallback_used: "heuristic"`. The push loop walks all 5 child types in dependency order -- this **fixes** the `sl-build` skill bug where `semantic-constraint` was silently dropped. `--model` omitted creates a new model (default name `kbagent_build_model` or `--name N`). **Rollback on push failure (since v0.41.10)**: every successfully-POSTed child is DELETEd in reverse PUSH_ORDER, and the model itself is DELETEd if we created it during this call. The wrapped `KeboolaApiError` carries `details.rollback={attempted, posted_children, deleted, failed_deletes, model_created_here, model_deleted, model_uuid}` so operators get full diagnostics. Pass `--keep-on-failure` to preserve the partial state for forensic inspection (mirrors `data-app create --keep-on-failure`); the wrapped error then carries `details.rollback.attempted=False, reason='keep_on_failure'`. - `semantic-layer token --encrypt --project P --component-id C` -- encrypt the project's storage token for a transformation's `user_properties`. Builds `{"#metastore_token": }` from the project's already-stored Storage API token and delegates to the existing EncryptService. `--encrypt` is currently required; other modes are refused with `USAGE_ERROR` (exit 2). Output (human): the raw envelope ready to paste. JSON: full `{encrypted, component_id, project}`. +### Reference data (dimension members, e.g. a Chart of Accounts) (since 0.55.0) + +`semantic-reference-data` is a per-dimension member store: ONE record per dimension holding the full member list in a `members[]` array. The driving use case is a Chart of Accounts (the account list + all attributes) held in the metastore instead of a hardcoded Storage table. It is deliberately kept **outside** `build` / `export` / `diff` / cascade / `PUSH_ORDER` — its members come from `DIM_COA`, not from AI generation — so it has its own self-contained CRUD surface. Member field names mirror the `DIM_COA` columns 1:1 (snake_case: `account_code`, `account_name`, `parent_code`, `is_leaf`, `level_1_code`, `cf_category`, …). + +- `semantic-layer reference-data list --project P [--model M]` -- list dimension records (summaries: `id`, `dimension_name`, `model_uuid`, `dataset_id`, `member_count`). `--model` filters to one model. Members are omitted from the summary; use `get` for them. +- `semantic-layer reference-data get --project P (--id ID | --dimension D)` -- fetch one record with all members. Resolve by record UUID (`--id`) or by `--dimension`. The dimension is **unique per project**, so the lookup is project-wide and needs no model. Passing both `--id` and `--dimension` (or neither) is a usage error (exit 2). Returns `{id, dimension_name, model_uuid, dataset_id, member_count, revision, members[]}`. +- `semantic-layer reference-data set --project P [--model M] --dimension D --members-file PATH [--dataset-id T] [--description X]` -- create-or-replace. `--members-file` is a JSON array of member objects (`-` reads stdin). **Idempotent** on `dimension`: the lookup is project-wide (the envelope `name` = dimension is unique per project per type), so an existing record is replaced in place via `PUT` (the metastore increments `meta.revision`, preserving history) — distinct from the DELETE+POST used by `edit` — regardless of which `--model` is passed (the resolved model is stored on the record); otherwise a new record is `POST`-ed. Response: `{id, dimension_name, member_count, action: "created"|"updated"}`. +- `semantic-layer reference-data delete --project P --id ID [--yes]` -- delete by UUID (server-side soft-delete; the record stays in revision history). Non-TTY without `--yes` refuses with exit 2. + ## Self-Call HTTP (inside `kbagent serve` subprocesses; since v0.40.0) - `http get PATH [--timeout SECONDS]` -- GET an endpoint on the running `kbagent serve` - `http post PATH [--body JSON|@file|-] [--timeout SECONDS]` -- POST with optional JSON body diff --git a/plugins/kbagent/skills/kbagent/references/gotchas.md b/plugins/kbagent/skills/kbagent/references/gotchas.md index 46ecc01f..85869ad8 100644 --- a/plugins/kbagent/skills/kbagent/references/gotchas.md +++ b/plugins/kbagent/skills/kbagent/references/gotchas.md @@ -144,6 +144,43 @@ project's semantic model is populated before kicking off a downstream pipeline; the previous workaround (a `keboola-mcp-server` MCP server entry in `.mcp.json` solely for these two tools) can be dropped. +## `semantic-layer reference-data` holds a whole dimension as ONE record; `set` is PUT-replace, not append (since v0.55.0) + +`semantic-reference-data` stores one record **per dimension** (e.g. a Chart +of Accounts), with the full member list in a `members[]` array — NOT one +record per member. Consequences an agent must internalize: + +- **`set` replaces the entire members array.** `kbagent sl reference-data + set --dimension chart_of_accounts --members-file coa.json` is + create-or-replace, idempotent on the `dimension`. To add/remove a + single account you must `get` the record, mutate the array client-side, + and `set` the whole thing back. There is no per-member endpoint. +- **It uses the metastore's real `PUT`** (revisioned update, `meta.revision` + increments, history preserved) when a record for that dimension already + exists — distinct from the DELETE+POST that `edit metric|…` uses. + A brand-new dimension is `POST`-ed. +- **The envelope `name` is the dimension, unique per project per type, so + the `set`/`get` lookup is project-wide.** Because the dimension name is the + project-unique key, `set` finds and PUT-replaces an existing record + regardless of which `--model` you pass (the resolved model is just stored + on the record) — it does NOT POST and collide with `ALREADY_EXISTS`. For + the same reason `get --dimension` needs no `--model` (one dimension name + per project for this type). `get` rejects passing both `--id` and + `--dimension` (exit 2). +- **Member field names mirror the `DIM_COA` columns 1:1 (snake_case):** + `account_code` (required key), `account_name`, `parent_code`, `is_leaf` + (integer 0/1, not bool), `level_1_code`, `cf_category`, … The metastore + schema sets `additionalProperties: true` on members, so unknown columns + are stored but not validated. +- **Deliberately invisible to `build` / `export` / `diff` / cascade / + `PUSH_ORDER`.** Deleting a model does NOT cascade-delete its + reference-data records (they are not model children in the snapshot + sense); `export`/`diff` will not include them. Manage them only through + the `reference-data` sub-app. +- **JSON Schema cannot enforce cross-member referential integrity** (every + `parent_code` resolving to some member `account_code`). That stays an + app/sync-layer concern. + ## `workspace list` / `workspace detail` now expose loginType + RO + qs_compatible (since v0.42.0, closes #304) Before v0.42.0 the Storage workspace endpoint already returned diff --git a/src/keboola_agent_cli/commands/_semantic_layer_reference_data.py b/src/keboola_agent_cli/commands/_semantic_layer_reference_data.py new file mode 100644 index 00000000..e2fafbe1 --- /dev/null +++ b/src/keboola_agent_cli/commands/_semantic_layer_reference_data.py @@ -0,0 +1,247 @@ +"""Typer sub-app for ``kbagent semantic-layer reference-data``. + +Reference data = dimension-member records in the metastore +(``semantic-reference-data``): one record per dimension, holding the full +member list in a ``members[]`` array. The driving use case is a Chart of +Accounts (the account list + all attributes) held in the semantic layer +instead of a hardcoded Storage table. + +Deliberately self-contained: reference-data is NOT AI-generated and is kept +out of ``build`` / ``export`` / ``diff`` / cascade. The four leaves here +(``list`` / ``get`` / ``set`` / ``delete``) compose the generic metastore +verbs in :class:`SemanticLayerService`. +""" + +from __future__ import annotations + +import json +import sys +from pathlib import Path +from typing import Any + +import typer +from rich.console import Console +from rich.table import Table + +from ..errors import ErrorCode +from ._helpers import ( + check_cli_permission, + get_formatter, + get_service, +) +from ._semantic_layer_helpers import _handle_service_call, _is_stdin_tty + +reference_data_app = typer.Typer( + name="reference-data", + help=( + "Manage reference / dimension-member records (e.g. a Chart of " + "Accounts) held in the metastore: list / get / set / delete." + ), + no_args_is_help=True, +) + + +@reference_data_app.callback(invoke_without_command=True) +def _reference_data_permission_check(ctx: typer.Context) -> None: + """Per-leaf permission check for the ``reference-data`` sub-app. + + ``check_cli_permission`` composes ``semantic-layer.reference-data.{leaf}`` + so ``list`` / ``get`` stay ``read`` while ``set`` is ``write`` and + ``delete`` is ``destructive`` (see permissions.OPERATION_REGISTRY). + """ + check_cli_permission(ctx, "semantic-layer.reference-data") + + +def _print_reference_data_table(console: Console, data: dict) -> None: + project = data.get("project", "") + records = data.get("reference_data", []) + if not records: + console.print(f"[dim]No reference-data records in project '{project}'.[/dim]") + return + table = Table(title=f"Reference data in '{project}'") + table.add_column("Dimension", style="bold cyan") + table.add_column("UUID", style="dim") + table.add_column("Members", justify="right") + table.add_column("Dataset", max_width=40) + for r in records: + table.add_row( + r.get("dimension_name", ""), + r.get("id", ""), + str(r.get("member_count", 0)), + r.get("dataset_id") or "", + ) + console.print(table) + + +def _print_reference_data_detail(console: Console, data: dict) -> None: + console.print( + f"[bold]{data.get('dimension_name', '')}[/bold] " + f"([dim]{data.get('id', '')}[/dim]) — " + f"{data.get('member_count', 0)} members, rev {data.get('revision')}" + ) + if data.get("dataset_id"): + console.print(f" dataset: {data['dataset_id']}") + members = data.get("members") or [] + preview = members[:10] + for m in preview: + key = m.get("account_code") or m.get("code") or "?" + name = m.get("account_name") or m.get("name") or "" + console.print(f" · [cyan]{key}[/cyan] {name}") + if len(members) > len(preview): + console.print(f" [dim]… and {len(members) - len(preview)} more[/dim]") + + +def _print_reference_data_set_result(console: Console, data: dict) -> None: + action = str(data.get("action", "set")).capitalize() + console.print( + f"[bold green]{action}[/bold green] reference data " + f"[cyan]{data.get('dimension_name', '')}[/cyan] " + f"({data.get('member_count', 0)} members, [dim]{data.get('id', '')}[/dim])" + ) + + +def _print_reference_data_delete_result(console: Console, data: dict) -> None: + removed = data.get("removed", {}) + console.print( + f"[bold green]Removed reference data[/bold green] " + f"[cyan]{removed.get('dimension_name', '')}[/cyan] ([dim]{removed.get('id', '')}[/dim])" + ) + + +def _load_members(formatter: Any, members_file: str) -> list[dict]: + """Read a JSON array of member objects from a file or ``-`` (stdin).""" + try: + raw = sys.stdin.read() if members_file == "-" else Path(members_file).read_text() + except OSError as exc: + formatter.error( + message=f"Could not read members file {members_file!r}: {exc}", + error_code=ErrorCode.VALIDATION_ERROR, + ) + raise typer.Exit(code=2) from exc + try: + parsed = json.loads(raw) + except json.JSONDecodeError as exc: + formatter.error( + message=f"Members file is not valid JSON: {exc}", + error_code=ErrorCode.VALIDATION_ERROR, + ) + raise typer.Exit(code=2) from exc + if not isinstance(parsed, list): + formatter.error( + message="Members file must contain a JSON array of member objects.", + error_code=ErrorCode.VALIDATION_ERROR, + ) + raise typer.Exit(code=2) + return parsed + + +@reference_data_app.command("list") +def reference_data_list( + ctx: typer.Context, + project: str = typer.Option(..., "--project", help="Project alias"), + model: str | None = typer.Option(None, "--model", help="Filter to one model (name or UUID)"), +) -> None: + """List reference-data records (dimension summaries; use ``get`` for members).""" + formatter = get_formatter(ctx) + service = get_service(ctx, "semantic_layer_service") + result = _handle_service_call( + ctx, service.list_reference_data, alias=project, model_name_or_uuid=model + ) + formatter.output(result, _print_reference_data_table) + + +@reference_data_app.command("get") +def reference_data_get( + ctx: typer.Context, + project: str = typer.Option(..., "--project", help="Project alias"), + id_: str | None = typer.Option(None, "--id", help="Record UUID"), + dimension: str | None = typer.Option( + None, "--dimension", help="Dimension name (project-unique; instead of --id)" + ), +) -> None: + """Fetch one record (all members) by ``--id`` or by ``--dimension``.""" + formatter = get_formatter(ctx) + if id_ is not None and dimension is not None: + formatter.error( + message="Pass --id or --dimension, not both.", + error_code=ErrorCode.VALIDATION_ERROR, + ) + raise typer.Exit(code=2) + if id_ is None and dimension is None: + formatter.error( + message="Provide --id or --dimension.", + error_code=ErrorCode.VALIDATION_ERROR, + ) + raise typer.Exit(code=2) + service = get_service(ctx, "semantic_layer_service") + result = _handle_service_call( + ctx, + service.get_reference_data, + alias=project, + record_id=id_, + dimension=dimension, + ) + formatter.output(result, _print_reference_data_detail) + + +@reference_data_app.command("set") +def reference_data_set( + ctx: typer.Context, + project: str = typer.Option(..., "--project", help="Project alias"), + model: str | None = typer.Option(None, "--model", help="Model name or UUID"), + dimension: str = typer.Option( + ..., "--dimension", help="Dimension name, e.g. 'chart_of_accounts'" + ), + members_file: str = typer.Option( + ..., + "--members-file", + help="Path to a JSON array of member objects ('-' reads stdin).", + ), + dataset_id: str | None = typer.Option( + None, "--dataset-id", help="Optional tableId of the descriptive dataset (e.g. DIM_COA)" + ), + description: str | None = typer.Option(None, "--description", help="Optional description"), +) -> None: + """Create or replace a reference-data record (keyed by dimension). + + Idempotent: an existing record for the same dimension (project-unique) is + replaced in place (revision increments); otherwise a new record is created. + """ + formatter = get_formatter(ctx) + service = get_service(ctx, "semantic_layer_service") + members = _load_members(formatter, members_file) + result = _handle_service_call( + ctx, + service.set_reference_data, + alias=project, + model_name_or_uuid=model, + dimension=dimension, + members=members, + dataset_id=dataset_id, + description=description, + ) + formatter.output(result, _print_reference_data_set_result) + + +@reference_data_app.command("delete") +def reference_data_delete( + ctx: typer.Context, + project: str = typer.Option(..., "--project", help="Project alias"), + id_: str = typer.Option(..., "--id", help="Record UUID"), + yes: bool = typer.Option(False, "--yes", "-y", help="Skip the confirm prompt"), +) -> None: + """Delete a reference-data record by UUID (server-side soft-delete).""" + formatter = get_formatter(ctx) + service = get_service(ctx, "semantic_layer_service") + if not yes: + if not _is_stdin_tty(): + formatter.error( + message=f"Refusing to delete reference-data {id_!r} non-interactively without --yes.", + error_code=ErrorCode.VALIDATION_ERROR, + ) + raise typer.Exit(code=2) + if not formatter.json_mode and not typer.confirm(f"Delete reference-data record '{id_}'?"): + formatter.console.print("Aborted.") + raise typer.Exit(code=0) + result = _handle_service_call(ctx, service.delete_reference_data, alias=project, record_id=id_) + formatter.output(result, _print_reference_data_delete_result) diff --git a/src/keboola_agent_cli/commands/context.py b/src/keboola_agent_cli/commands/context.py index 3ca08567..7c3beb06 100644 --- a/src/keboola_agent_cli/commands/context.py +++ b/src/keboola_agent_cli/commands/context.py @@ -919,6 +919,20 @@ Three-way diff: project<->project, project<->file, file<->file. Output groups changes per entity type: added, removed, changed (with diff_keys). + kbagent semantic-layer reference-data list|get|set|delete ... (since 0.55.0) + Dimension-member records (semantic-reference-data): one record per + dimension holding the full member list in a members[] array (e.g. a + Chart of Accounts). Deliberately OUTSIDE build/export/diff/cascade. + list --project P [--model M] -> dimension summaries (id, dimension, + member_count). get --project P (--id ID | --dimension D) -> + one record + all members (dimension is project-unique, so no model + needed). set --project P [--model M] --dimension D + --members-file PATH ('-' = stdin) [--dataset-id T] [--description X] -> + create-or-replace, idempotent on dimension (project-wide lookup): an + existing record is replaced in place via PUT (revision++), else POST. + delete --project P --id ID [--yes]. Member keys mirror the DIM_COA + columns (account_code, account_name, parent_code, is_leaf, ...). + kbagent semantic-layer add metric|dataset|relationship|constraint|glossary ... Add one entity. Dataset auto-derives `fqn` from --table-id; --deep-fields fetches the storage schema and synthesises role-classified fields diff --git a/src/keboola_agent_cli/commands/semantic_layer.py b/src/keboola_agent_cli/commands/semantic_layer.py index eb3bcf35..935b0d1f 100644 --- a/src/keboola_agent_cli/commands/semantic_layer.py +++ b/src/keboola_agent_cli/commands/semantic_layer.py @@ -24,6 +24,7 @@ ) from ._semantic_layer_crud import add_app, edit_app, remove_app from ._semantic_layer_helpers import _handle_service_call +from ._semantic_layer_reference_data import reference_data_app semantic_layer_app = typer.Typer( name="semantic-layer", @@ -153,6 +154,7 @@ def model_create( semantic_layer_app.add_typer(add_app, name="add") semantic_layer_app.add_typer(edit_app, name="edit") semantic_layer_app.add_typer(remove_app, name="remove") +semantic_layer_app.add_typer(reference_data_app, name="reference-data") @model_app.command("delete") diff --git a/src/keboola_agent_cli/metastore_client.py b/src/keboola_agent_cli/metastore_client.py index 9343e315..09fbdb0e 100644 --- a/src/keboola_agent_cli/metastore_client.py +++ b/src/keboola_agent_cli/metastore_client.py @@ -38,6 +38,7 @@ "semantic-relationship", "semantic-constraint", "semantic-glossary", + "semantic-reference-data", ] @@ -48,6 +49,7 @@ "semantic-relationship", "semantic-constraint", "semantic-glossary", + "semantic-reference-data", ) @@ -172,6 +174,38 @@ def post_item( body = response.json() return body.get("data", body) if isinstance(body, dict) else body + def put_item( + self, + item_type: SemanticType, + item_id: str, + name: str, + data: dict[str, Any], + ) -> dict[str, Any]: + """Replace an item in place via ``PUT`` (revisioned update). + + Unlike the DELETE+POST pattern the higher-level ``edit`` operations + use, ``PUT`` updates the record in place and increments + ``meta.revision`` server-side, preserving the metastore's revision + history. ``data`` is the inner ``attributes`` payload; the outer + envelope is added here (identical shape to :meth:`post_item`). + + Raises :class:`KeboolaApiError` with ``error_code=NOT_FOUND`` on 404. + """ + envelope = { + "name": name, + "data": data, + "branch": _ENVELOPE_BRANCH, + "schemaVersion": _ENVELOPE_SCHEMA_VERSION, + "scope": _ENVELOPE_SCOPE, + } + response = self._do_request( + "PUT", + f"/api/v1/repository/{item_type}/{item_id}", + json=envelope, + ) + body = response.json() + return body.get("data", body) if isinstance(body, dict) else body + def delete_item(self, item_type: SemanticType, item_id: str) -> None: """Delete an item by its UUID. Returns silently on 204. diff --git a/src/keboola_agent_cli/permissions.py b/src/keboola_agent_cli/permissions.py index 530ca6c1..fd667ab9 100644 --- a/src/keboola_agent_cli/permissions.py +++ b/src/keboola_agent_cli/permissions.py @@ -259,6 +259,15 @@ "semantic-layer.remove.constraint": "destructive", "semantic-layer.remove.relationship": "destructive", "semantic-layer.remove.glossary": "destructive", + # `reference-data` sub-app: dimension-member records (e.g. a Chart of + # Accounts). Parent key at the least-privileged level (read) so the + # top-level `semantic-layer` callback does not over-block `list` / `get`; + # per-leaf keys carry the real classification. + "semantic-layer.reference-data": "read", + "semantic-layer.reference-data.list": "read", + "semantic-layer.reference-data.get": "read", + "semantic-layer.reference-data.set": "write", + "semantic-layer.reference-data.delete": "destructive", # Raw HTTP client against `kbagent serve` (used by AI subprocesses). # Categorised by the underlying HTTP method: GET = read, mutating verbs # = write. The serve's own routes enforce their own permissions on top. diff --git a/src/keboola_agent_cli/server/routers/semantic_layer.py b/src/keboola_agent_cli/server/routers/semantic_layer.py index 43182560..6c20a5b7 100644 --- a/src/keboola_agent_cli/server/routers/semantic_layer.py +++ b/src/keboola_agent_cli/server/routers/semantic_layer.py @@ -1,4 +1,4 @@ -"""Semantic-layer endpoints — 14 routes covering 26 CLI subcommands. +"""Semantic-layer endpoints — 18 routes covering 30 CLI subcommands. Mirrors the per-subcommand surface of :class:`keboola_agent_cli.services.semantic_layer_service.SemanticLayerService`. @@ -195,6 +195,17 @@ class TokenEncryptRequest(BaseModel): component_id: str +class RefDataSet(BaseModel): + """Create-or-replace body for ``PUT /reference-data`` (idempotent on dimension).""" + + project: str + model: str | None = None + dimension: str + members: list[dict[str, Any]] + dataset_id: str | None = None + description: str | None = None + + # ── Routes (14 declarations, in the order from the plan) ──────────── @@ -587,5 +598,53 @@ def token_encrypt( return registry.semantic_layer.encrypt_token(alias=body.project, component_id=body.component_id) +# ── reference-data (dimension-member records, e.g. a Chart of Accounts) ── + + +@router.get("/reference-data", summary="List reference-data records") +def list_reference_data( + project: str, + model: str | None = None, + registry: ServiceRegistry = Depends(get_registry), +) -> dict[str, Any]: + """List dimension-member records (summaries; use the by-id route for members).""" + return registry.semantic_layer.list_reference_data(alias=project, model_name_or_uuid=model) + + +@router.get("/reference-data/{record_id}", summary="Get one reference-data record") +def get_reference_data( + record_id: str, + project: str, + registry: ServiceRegistry = Depends(get_registry), +) -> dict[str, Any]: + """Fetch one record (all members) by UUID.""" + return registry.semantic_layer.get_reference_data(alias=project, record_id=record_id) + + +@router.put("/reference-data", summary="Create or replace a reference-data record") +def set_reference_data( + body: RefDataSet, registry: ServiceRegistry = Depends(get_registry) +) -> dict[str, Any]: + """Create or replace (by model + dimension) a record. Idempotent; PUT semantics.""" + return registry.semantic_layer.set_reference_data( + alias=body.project, + model_name_or_uuid=body.model, + dimension=body.dimension, + members=body.members, + dataset_id=body.dataset_id, + description=body.description, + ) + + +@router.delete("/reference-data/{record_id}", summary="Delete a reference-data record") +def delete_reference_data( + record_id: str, + project: str, + registry: ServiceRegistry = Depends(get_registry), +) -> dict[str, Any]: + """Delete a record by UUID (``--yes`` implicit on REST; server-side soft-delete).""" + return registry.semantic_layer.delete_reference_data(alias=project, record_id=record_id) + + # Re-export the closed set of kinds for tests / docs. __all__ = ["ErrorCode", "ItemKind", "router"] diff --git a/src/keboola_agent_cli/services/_semantic_layer_reference_data.py b/src/keboola_agent_cli/services/_semantic_layer_reference_data.py new file mode 100644 index 00000000..1d863251 --- /dev/null +++ b/src/keboola_agent_cli/services/_semantic_layer_reference_data.py @@ -0,0 +1,217 @@ +"""Reference-data (dimension-member) operations for ``SemanticLayerService``. + +Extracted from :mod:`semantic_layer_service` (which composes these via thin +public methods) so that orchestrator stays under the CONTRIBUTING.md +file-size ceiling — mirroring the existing ``_semantic_layer_crud`` / +``_semantic_layer_internals`` / ``_semantic_layer_lookup`` split. + +``semantic-reference-data`` is a per-dimension member store: one record per +dimension, holding the full member list in a ``members[]`` array (e.g. a +Chart of Accounts). It is deliberately kept OUT of ``build`` / ``export`` / +``diff`` / cascade / ``PUSH_ORDER`` — these are its full, self-contained +CRUD operations. Each ``run_*`` helper takes an ``open_client`` thunk and +owns the client lifecycle (same shape as ``_semantic_layer_lookup``). +""" + +from __future__ import annotations + +from collections.abc import Callable +from typing import Any + +from ..errors import ErrorCode, KeboolaApiError +from ..metastore_client import MetastoreClient +from ._semantic_layer_internals import resolve_model_uuid + +REFERENCE_DATA_TYPE = "semantic-reference-data" + +OpenClient = Callable[[], MetastoreClient] + + +def unpack_record( + alias: str, + item: dict[str, Any], + *, + include_members: bool, + include_project: bool = True, +) -> dict[str, Any]: + """Project a raw metastore item into the CLI reference-data shape. + + ``include_project`` controls the top-level ``project`` key — the list + summary omits it (the alias is already on the envelope), the detail shapes + keep it. + """ + attrs = item.get("attributes") or {} + members = attrs.get("members") or [] + out: dict[str, Any] = {} + if include_project: + out["project"] = alias + out.update( + { + "id": item.get("id", ""), + "dimension_name": attrs.get("dimensionName", ""), + "model_uuid": attrs.get("modelUUID", ""), + "dataset_id": attrs.get("datasetId"), + "description": attrs.get("description"), + "member_count": len(members), + "revision": (item.get("meta") or {}).get("revision"), + } + ) + if include_members: + out["members"] = members + return out + + +def find_by_dimension( + client: MetastoreClient, + dimension: str, +) -> dict[str, Any] | None: + """Return the existing record for ``dimension`` (project-wide) or None. + + The ``semantic-reference-data`` envelope ``name`` (= the dimension) is + unique **per project per type**, so the lookup is project-wide and + independent of any model — this keeps ``set`` idempotent regardless of the + ``--model`` passed (a record created under model A is still found, and + PUT-replaced, when ``set --model B --dimension `` runs, instead of + taking the POST path and colliding with ``ALREADY_EXISTS``). + """ + for item in client.list_items(REFERENCE_DATA_TYPE): + if (item.get("attributes") or {}).get("dimensionName") == dimension: + return item + return None + + +def run_list( + open_client: OpenClient, + alias: str, + model_name_or_uuid: str | None, +) -> dict[str, Any]: + """List reference-data records (optionally scoped to one model). + + Returns ``{project, reference_data: [{id, dimension_name, model_uuid, + dataset_id, member_count}]}``. Member lists are omitted from the summary + — use :func:`run_get` for the full members. + """ + client = open_client() + try: + model_uuid: str | None = None + if model_name_or_uuid is not None: + model_uuid, _ = resolve_model_uuid(client, model_name_or_uuid) + raw = client.list_items(REFERENCE_DATA_TYPE, model_uuid) + finally: + client.close() + records = [ + unpack_record(alias, item, include_members=False, include_project=False) for item in raw + ] + return {"project": alias, "reference_data": records} + + +def run_get( + open_client: OpenClient, + alias: str, + *, + record_id: str | None, + dimension: str | None, +) -> dict[str, Any]: + """Fetch one record by ``record_id``, or by ``dimension``. + + ``dimension`` is the project-unique key (the metastore envelope ``name``), + so resolving by dimension is a project-wide lookup — no model needed. + """ + if record_id is None and dimension is None: + raise KeboolaApiError( + message="Provide --id or --dimension.", + error_code=ErrorCode.VALIDATION_ERROR, + ) + client = open_client() + try: + if record_id is not None: + item = client.get_item(REFERENCE_DATA_TYPE, record_id) + elif dimension is not None: + found = find_by_dimension(client, dimension) + if found is None: + raise KeboolaApiError( + message=( + f"No reference-data record for dimension {dimension!r} " + f"in project {alias!r}." + ), + error_code=ErrorCode.NOT_FOUND, + ) + item = found + else: # pragma: no cover - the guard above guarantees one is set + raise KeboolaApiError( + message="Provide --id or --dimension.", + error_code=ErrorCode.VALIDATION_ERROR, + ) + finally: + client.close() + return unpack_record(alias, item, include_members=True) + + +def run_set( + open_client: OpenClient, + alias: str, + model_name_or_uuid: str | None, + *, + dimension: str, + members: list[dict[str, Any]], + dataset_id: str | None = None, + description: str | None = None, +) -> dict[str, Any]: + """Create or replace a reference-data record, keyed by ``dimension``. + + Idempotent on ``dimensionName`` (the project-unique envelope ``name``): an + existing record is replaced in place via ``PUT`` (revision increments, + history preserved), with ``modelUUID`` updated to the resolved model; + otherwise a new record is ``POST``-ed. The lookup is project-wide, so + ``set`` stays idempotent regardless of which ``--model`` is passed. + """ + if not isinstance(members, list): + raise KeboolaApiError( + message="members must be a JSON array of member objects.", + error_code=ErrorCode.VALIDATION_ERROR, + ) + client = open_client() + try: + model_uuid, _ = resolve_model_uuid(client, model_name_or_uuid) + data: dict[str, Any] = { + "modelUUID": model_uuid, + "dimensionName": dimension, + "members": members, + } + if dataset_id: + data["datasetId"] = dataset_id + if description: + data["description"] = description + + existing = find_by_dimension(client, dimension) + if existing is not None: + item = client.put_item( + REFERENCE_DATA_TYPE, + existing.get("id", ""), + name=dimension, + data=data, + ) + action = "updated" + else: + item = client.post_item(REFERENCE_DATA_TYPE, name=dimension, data=data) + action = "created" + finally: + client.close() + result = unpack_record(alias, item, include_members=False) + result["action"] = action + return result + + +def run_delete(open_client: OpenClient, alias: str, record_id: str) -> dict[str, Any]: + """Delete a reference-data record by UUID (server-side soft-delete).""" + client = open_client() + try: + item = client.get_item(REFERENCE_DATA_TYPE, record_id) + attrs = item.get("attributes") or {} + client.delete_item(REFERENCE_DATA_TYPE, record_id) + finally: + client.close() + return { + "project": alias, + "removed": {"id": record_id, "dimension_name": attrs.get("dimensionName", "")}, + } diff --git a/src/keboola_agent_cli/services/semantic_layer_service.py b/src/keboola_agent_cli/services/semantic_layer_service.py index 58ff1791..3d9a4878 100644 --- a/src/keboola_agent_cli/services/semantic_layer_service.py +++ b/src/keboola_agent_cli/services/semantic_layer_service.py @@ -25,6 +25,7 @@ from ..errors import ConfigError, ErrorCode, KeboolaApiError from ..metastore_client import MetastoreClient, SemanticType from ..models import ProjectConfig +from . import _semantic_layer_reference_data as _refdata from ._semantic_layer_cascade import cascade_delete_model as _cascade_delete_model_impl from ._semantic_layer_crud import REMOVE_KINDS as _REMOVE_KINDS_HELPER from ._semantic_layer_crud import code_metric as _code_metric_helper @@ -829,6 +830,70 @@ def add_glossary( data["definition"] = definition return client.post_item("semantic-glossary", name=term, data=data) + # ------------------------------------------------------------------ + # Reference data — dimension-member records (e.g. a Chart of Accounts). + # Thin delegators; logic lives in :mod:`._semantic_layer_reference_data`. + # ------------------------------------------------------------------ + + def list_reference_data( + self, + alias: str, + model_name_or_uuid: str | None = None, + ) -> dict[str, Any]: + """List reference-data records (optionally scoped to one model).""" + return _refdata.run_list( + lambda: self._new_metastore_client(self._resolve_one_project(alias)), + alias, + model_name_or_uuid, + ) + + def get_reference_data( + self, + alias: str, + *, + record_id: str | None = None, + dimension: str | None = None, + ) -> dict[str, Any]: + """Fetch one reference-data record by ``record_id`` or by ``dimension``. + + ``dimension`` is the project-unique key, so no model is needed. + """ + return _refdata.run_get( + lambda: self._new_metastore_client(self._resolve_one_project(alias)), + alias, + record_id=record_id, + dimension=dimension, + ) + + def set_reference_data( + self, + alias: str, + model_name_or_uuid: str | None, + *, + dimension: str, + members: list[dict[str, Any]], + dataset_id: str | None = None, + description: str | None = None, + ) -> dict[str, Any]: + """Create or replace (by model + dimension) a reference-data record.""" + return _refdata.run_set( + lambda: self._new_metastore_client(self._resolve_one_project(alias)), + alias, + model_name_or_uuid, + dimension=dimension, + members=members, + dataset_id=dataset_id, + description=description, + ) + + def delete_reference_data(self, alias: str, record_id: str) -> dict[str, Any]: + """Delete a reference-data record by UUID (server-side soft-delete).""" + return _refdata.run_delete( + lambda: self._new_metastore_client(self._resolve_one_project(alias)), + alias, + record_id, + ) + # ------------------------------------------------------------------ # Phase 4 — edit (DELETE-then-POST with rollback + rename cascade) # ------------------------------------------------------------------ diff --git a/tests/test_e2e.py b/tests/test_e2e.py index fca70ba1..4c921dd3 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -9765,6 +9765,128 @@ def _direct_delete(item_type: str, item_id: str) -> None: except _ApiError as exc: print(f" WARN: residue scan failed: {exc}") + def test_semantic_layer_reference_data_roundtrip(self) -> None: + """Exercise `reference-data` set (create) → list → get → set (replace) → delete.""" + from keboola_agent_cli.metastore_client import MetastoreClient + + tag = f"kbagent_e2e_{int(time.time())}" + model_name = tag + dimension = f"{tag}_coa" + model_id: str | None = None + record_id: str | None = None + + def _direct_delete(item_type: str, item_id: str) -> None: + with MetastoreClient(stack_url=self.url, token=self.token) as mc: + mc.delete_item(item_type, item_id) # type: ignore[arg-type] # ty: ignore[invalid-argument-type] + + try: + _step(1, "model create") + model_id = self._run_ok( + "semantic-layer", "model", "create", "--project", self.alias, "--name", model_name + )["data"]["model"]["id"] + assert model_id + + members_file = self.work_dir / "coa.json" + members_file.write_text( + json.dumps( + [ + {"account_code": "4011", "account_name": "Revenue", "is_leaf": 1}, + { + "account_code": "ISR99999", + "account_name": "Revenue Rollup", + "is_leaf": 0, + }, + ] + ) + ) + + _step(2, "reference-data set (create)") + created = self._run_ok( + "semantic-layer", + "reference-data", + "set", + "--project", + self.alias, + "--model", + model_name, + "--dimension", + dimension, + "--members-file", + str(members_file), + "--dataset-id", + "out.c-syn.DIM_COA", + )["data"] + assert created["action"] == "created" + assert created["member_count"] == 2 + record_id = created["id"] + assert record_id + + _step(3, "reference-data list (dimension present)") + listed = self._run_ok( + "semantic-layer", "reference-data", "list", "--project", self.alias + )["data"] + assert dimension in {r["dimension_name"] for r in listed["reference_data"]} + + _step(4, "reference-data get --id (members intact)") + got = self._run_ok( + "semantic-layer", + "reference-data", + "get", + "--project", + self.alias, + "--id", + record_id, + )["data"] + assert {m["account_code"] for m in got["members"]} == {"4011", "ISR99999"} + + _step(5, "reference-data set (replace -> revision++)") + members_file.write_text( + json.dumps([{"account_code": "4011", "account_name": "Revenue (EU)", "is_leaf": 1}]) + ) + replaced = self._run_ok( + "semantic-layer", + "reference-data", + "set", + "--project", + self.alias, + "--model", + model_name, + "--dimension", + dimension, + "--members-file", + str(members_file), + )["data"] + assert replaced["action"] == "updated" + assert replaced["id"] == record_id + assert replaced["member_count"] == 1 + + _step(6, "reference-data delete") + removed = self._run_ok( + "semantic-layer", + "reference-data", + "delete", + "--project", + self.alias, + "--id", + record_id, + "--yes", + )["data"] + assert removed["removed"]["id"] == record_id + record_id = None + + finally: + print("\n--- REFERENCE-DATA CLEANUP ---") + if record_id is not None: + try: + _direct_delete("semantic-reference-data", record_id) + except Exception as exc: + print(f" WARN: failed to delete reference-data: {exc}") + if model_id is not None: + try: + _direct_delete("semantic-model", model_id) + except Exception as exc: + print(f" WARN: failed to delete semantic-model {model_id}: {exc}") + def test_semantic_layer_delete_cascade(self) -> None: """Regression test for #306 — cascade-delete frees up per-project dataset names. diff --git a/tests/test_metastore_client.py b/tests/test_metastore_client.py index c29875f4..efa881cb 100644 --- a/tests/test_metastore_client.py +++ b/tests/test_metastore_client.py @@ -246,8 +246,66 @@ def test_delete_404(self, httpx_mock) -> None: assert excinfo.value.error_code == ErrorCode.NOT_FOUND +class TestPutItem: + """put_item wraps the same envelope as post_item but targets PUT /{type}/{id}.""" + + def test_put_envelope_and_url(self, httpx_mock) -> None: + httpx_mock.add_response( + method="PUT", + url=f"{METASTORE_URL_US}/api/v1/repository/semantic-reference-data/rec-1", + json={ + "data": { + "type": "semantic-reference-data", + "id": "rec-1", + "attributes": {"dimensionName": "chart_of_accounts"}, + "meta": {"revision": 2}, + } + }, + status_code=200, + ) + client = MetastoreClient(stack_url=STACK_URL_US, token=TOKEN) + try: + stored = client.put_item( + "semantic-reference-data", + "rec-1", + name="chart_of_accounts", + data={"modelUUID": "u", "dimensionName": "chart_of_accounts", "members": []}, + ) + finally: + client.close() + assert stored["id"] == "rec-1" + assert stored["meta"]["revision"] == 2 + + request = httpx_mock.get_requests()[0] + assert request.method == "PUT" + body = json.loads(request.content) + assert body["name"] == "chart_of_accounts" + assert body["branch"] == "main" + assert body["schemaVersion"] == "1.0.0" + assert body["scope"] == "project" + assert body["data"]["dimensionName"] == "chart_of_accounts" + assert body["data"]["members"] == [] + + def test_put_404(self, httpx_mock) -> None: + httpx_mock.add_response( + method="PUT", + url=f"{METASTORE_URL_US}/api/v1/repository/semantic-reference-data/missing", + status_code=404, + json={"error": "not found"}, + ) + client = MetastoreClient(stack_url=STACK_URL_US, token=TOKEN) + try: + with pytest.raises(KeboolaApiError) as excinfo: + client.put_item( + "semantic-reference-data", "missing", name="d", data={"members": []} + ) + finally: + client.close() + assert excinfo.value.error_code == ErrorCode.NOT_FOUND + + class TestSemanticTypes: - """Sanity-check the SEMANTIC_TYPES tuple has the six expected slugs.""" + """Sanity-check the SEMANTIC_TYPES tuple has the expected slugs.""" def test_semantic_types_complete(self) -> None: assert set(SEMANTIC_TYPES) == { @@ -257,4 +315,5 @@ def test_semantic_types_complete(self) -> None: "semantic-relationship", "semantic-constraint", "semantic-glossary", + "semantic-reference-data", } diff --git a/tests/test_semantic_layer_cli.py b/tests/test_semantic_layer_cli.py index 046278c6..0fc55037 100644 --- a/tests/test_semantic_layer_cli.py +++ b/tests/test_semantic_layer_cli.py @@ -1806,3 +1806,204 @@ def test_get_context_human_renders_attributes(self, store: ConfigStore) -> None: assert result.exit_code == 0 assert "users" in result.output assert "dataset" in result.output + + +# --------------------------------------------------------------------------- +# semantic-layer reference-data (list / get / set / delete) +# --------------------------------------------------------------------------- + + +class TestReferenceDataList: + def test_json_success(self, store: ConfigStore) -> None: + mock = MagicMock() + mock.list_reference_data.return_value = { + "project": "prod", + "reference_data": [ + { + "id": "r1", + "dimension_name": "chart_of_accounts", + "model_uuid": "U", + "dataset_id": "in.c-f.DIM_COA", + "member_count": 3, + } + ], + } + result = _invoke( + ["--json", "semantic-layer", "reference-data", "list", "--project", "prod"], + store=store, + sl_mock=mock, + ) + assert result.exit_code == 0, result.output + body = json.loads(result.output) + assert body["data"]["reference_data"][0]["dimension_name"] == "chart_of_accounts" + mock.list_reference_data.assert_called_once() + + +class TestReferenceDataGet: + def test_by_id(self, store: ConfigStore) -> None: + mock = MagicMock() + mock.get_reference_data.return_value = { + "project": "prod", + "id": "r1", + "dimension_name": "chart_of_accounts", + "model_uuid": "U", + "member_count": 1, + "revision": 2, + "members": [{"account_code": "4011", "account_name": "Revenue"}], + } + result = _invoke( + [ + "--json", + "semantic-layer", + "reference-data", + "get", + "--project", + "prod", + "--id", + "r1", + ], + store=store, + sl_mock=mock, + ) + assert result.exit_code == 0, result.output + body = json.loads(result.output) + assert body["data"]["members"][0]["account_code"] == "4011" + _, kwargs = mock.get_reference_data.call_args + assert kwargs["record_id"] == "r1" + + def test_id_and_dimension_together_exits_2(self, store: ConfigStore) -> None: + mock = MagicMock() + result = _invoke( + [ + "--json", + "semantic-layer", + "reference-data", + "get", + "--project", + "prod", + "--id", + "r1", + "--dimension", + "chart_of_accounts", + ], + store=store, + sl_mock=mock, + ) + assert result.exit_code == 2, result.output + mock.get_reference_data.assert_not_called() + + def test_neither_id_nor_dimension_exits_2(self, store: ConfigStore) -> None: + mock = MagicMock() + result = _invoke( + ["--json", "semantic-layer", "reference-data", "get", "--project", "prod"], + store=store, + sl_mock=mock, + ) + assert result.exit_code == 2, result.output + mock.get_reference_data.assert_not_called() + + +class TestReferenceDataSet: + def test_set_from_file(self, store: ConfigStore, tmp_path: Path) -> None: + members_file = tmp_path / "coa.json" + members_file.write_text(json.dumps([{"account_code": "4011", "account_name": "Revenue"}])) + mock = MagicMock() + mock.set_reference_data.return_value = { + "project": "prod", + "id": "r1", + "dimension_name": "chart_of_accounts", + "member_count": 1, + "action": "created", + } + result = _invoke( + [ + "--json", + "semantic-layer", + "reference-data", + "set", + "--project", + "prod", + "--dimension", + "chart_of_accounts", + "--members-file", + str(members_file), + ], + store=store, + sl_mock=mock, + ) + assert result.exit_code == 0, result.output + body = json.loads(result.output) + assert body["data"]["action"] == "created" + _, kwargs = mock.set_reference_data.call_args + assert kwargs["dimension"] == "chart_of_accounts" + assert kwargs["members"] == [{"account_code": "4011", "account_name": "Revenue"}] + + def test_set_bad_json_exits_2(self, store: ConfigStore, tmp_path: Path) -> None: + members_file = tmp_path / "coa.json" + members_file.write_text("{not valid json") + mock = MagicMock() + result = _invoke( + [ + "--json", + "semantic-layer", + "reference-data", + "set", + "--project", + "prod", + "--dimension", + "chart_of_accounts", + "--members-file", + str(members_file), + ], + store=store, + sl_mock=mock, + ) + assert result.exit_code == 2, result.output + mock.set_reference_data.assert_not_called() + + +class TestReferenceDataDelete: + def test_delete_requires_yes_non_tty(self, store: ConfigStore) -> None: + mock = MagicMock() + result = _invoke( + [ + "--json", + "semantic-layer", + "reference-data", + "delete", + "--project", + "prod", + "--id", + "r1", + ], + store=store, + sl_mock=mock, + ) + assert result.exit_code == 2, result.output + mock.delete_reference_data.assert_not_called() + + def test_delete_with_yes(self, store: ConfigStore) -> None: + mock = MagicMock() + mock.delete_reference_data.return_value = { + "project": "prod", + "removed": {"id": "r1", "dimension_name": "chart_of_accounts"}, + } + result = _invoke( + [ + "--json", + "semantic-layer", + "reference-data", + "delete", + "--project", + "prod", + "--id", + "r1", + "--yes", + ], + store=store, + sl_mock=mock, + ) + assert result.exit_code == 0, result.output + body = json.loads(result.output) + assert body["data"]["removed"]["id"] == "r1" + mock.delete_reference_data.assert_called_once() diff --git a/tests/test_semantic_layer_service.py b/tests/test_semantic_layer_service.py index 1f213aef..e463ff0f 100644 --- a/tests/test_semantic_layer_service.py +++ b/tests/test_semantic_layer_service.py @@ -2680,3 +2680,193 @@ def test_client_closed_even_on_api_error(self, tmp_path: Path) -> None: service.get_context("prod", "x") mock.__exit__.assert_called_once() + + +# --------------------------------------------------------------------------- +# reference-data (dimension-member records, e.g. Chart of Accounts) +# --------------------------------------------------------------------------- + + +def _refdata_item( + item_id: str, + dimension: str, + model_uuid: str = "U", + members: list[dict[str, Any]] | None = None, + revision: int = 1, +) -> dict[str, Any]: + return { + "type": "semantic-reference-data", + "id": item_id, + "attributes": { + "modelUUID": model_uuid, + "dimensionName": dimension, + "members": members if members is not None else [], + }, + "meta": {"revision": revision}, + } + + +class TestReferenceData: + @staticmethod + def _model_only_list(extra: dict[str, list[dict[str, Any]]] | None = None): + """Build a list_items side_effect: one model + per-type extras. + + Mirrors the real ``MetastoreClient.list_items`` client-side + ``modelUUID`` filter so that model-scoped vs project-wide lookups are + actually distinguishable (a model-scoped lookup of a record living + under a *different* model must return ``[]`` here, exactly as it would + against the live metastore). + """ + extra = extra or {} + + def _list(item_type: str, model_uuid: str | None = None) -> list[dict[str, Any]]: + if item_type == "semantic-model": + return [_model_item("U", "m")] + items = extra.get(item_type, []) + if model_uuid is not None: + items = [ + i for i in items if (i.get("attributes") or {}).get("modelUUID") == model_uuid + ] + return items + + return _list + + def test_list_summarizes_records(self, tmp_path: Path) -> None: + store = _make_store(tmp_path) + service, mock = _make_service(store) + mock.list_items.side_effect = self._model_only_list( + { + "semantic-reference-data": [ + _refdata_item("r1", "chart_of_accounts", members=[{"account_code": "4011"}]), + ] + } + ) + out = service.list_reference_data("prod") + assert out["project"] == "prod" + assert len(out["reference_data"]) == 1 + rec = out["reference_data"][0] + assert rec["dimension_name"] == "chart_of_accounts" + assert rec["member_count"] == 1 + assert "members" not in rec + + def test_get_by_id_returns_members(self, tmp_path: Path) -> None: + store = _make_store(tmp_path) + service, mock = _make_service(store) + members = [{"account_code": "4011", "account_name": "Revenue"}] + mock.get_item.return_value = _refdata_item("r1", "chart_of_accounts", members=members) + out = service.get_reference_data("prod", record_id="r1") + mock.get_item.assert_called_once_with("semantic-reference-data", "r1") + assert out["members"] == members + assert out["member_count"] == 1 + + def test_get_by_dimension(self, tmp_path: Path) -> None: + service, mock = _make_service(_make_store(tmp_path)) + mock.list_items.side_effect = self._model_only_list( + {"semantic-reference-data": [_refdata_item("r1", "chart_of_accounts")]} + ) + out = service.get_reference_data("prod", dimension="chart_of_accounts") + assert out["id"] == "r1" + mock.get_item.assert_not_called() + + def test_get_requires_id_or_dimension(self, tmp_path: Path) -> None: + store = _make_store(tmp_path) + service, _ = _make_service(store) + with pytest.raises(KeboolaApiError) as exc: + service.get_reference_data("prod") + assert exc.value.error_code == ErrorCode.VALIDATION_ERROR + + def test_get_by_dimension_not_found(self, tmp_path: Path) -> None: + store = _make_store(tmp_path) + service, mock = _make_service(store) + mock.list_items.side_effect = self._model_only_list({"semantic-reference-data": []}) + with pytest.raises(KeboolaApiError) as exc: + service.get_reference_data("prod", dimension="missing") + assert exc.value.error_code == ErrorCode.NOT_FOUND + + def test_set_creates_when_absent(self, tmp_path: Path) -> None: + store = _make_store(tmp_path) + service, mock = _make_service(store) + mock.list_items.side_effect = self._model_only_list({"semantic-reference-data": []}) + mock.post_item.return_value = _refdata_item("r1", "chart_of_accounts") + members = [{"account_code": "4011", "account_name": "Revenue"}] + out = service.set_reference_data( + "prod", + None, + dimension="chart_of_accounts", + members=members, + dataset_id="in.c-f.DIM_COA", + ) + assert out["action"] == "created" + mock.post_item.assert_called_once() + mock.put_item.assert_not_called() + _, kwargs = mock.post_item.call_args + assert kwargs["name"] == "chart_of_accounts" + assert kwargs["data"]["modelUUID"] == "U" + assert kwargs["data"]["dimensionName"] == "chart_of_accounts" + assert kwargs["data"]["members"] == members + assert kwargs["data"]["datasetId"] == "in.c-f.DIM_COA" + + def test_set_replaces_when_present(self, tmp_path: Path) -> None: + store = _make_store(tmp_path) + service, mock = _make_service(store) + existing = _refdata_item("r1", "chart_of_accounts", revision=1) + mock.list_items.side_effect = self._model_only_list({"semantic-reference-data": [existing]}) + mock.put_item.return_value = _refdata_item("r1", "chart_of_accounts", revision=2) + out = service.set_reference_data( + "prod", None, dimension="chart_of_accounts", members=[{"account_code": "4011"}] + ) + assert out["action"] == "updated" + mock.put_item.assert_called_once() + mock.post_item.assert_not_called() + args, _ = mock.put_item.call_args + assert args[0] == "semantic-reference-data" + assert args[1] == "r1" + + def test_set_is_idempotent_across_models(self, tmp_path: Path) -> None: + """Regression: dimension name is unique per project, so an existing + record under a *different* model must still PUT-replace (not POST and + collide with ALREADY_EXISTS) -- lookup is project-wide by dimension.""" + store = _make_store(tmp_path) + service, mock = _make_service(store) + existing = _refdata_item("r1", "chart_of_accounts", model_uuid="OTHER_MODEL", revision=1) + mock.list_items.side_effect = self._model_only_list({"semantic-reference-data": [existing]}) + mock.put_item.return_value = _refdata_item("r1", "chart_of_accounts", revision=2) + out = service.set_reference_data( + "prod", None, dimension="chart_of_accounts", members=[{"account_code": "4011"}] + ) + assert out["action"] == "updated" + mock.put_item.assert_called_once() + mock.post_item.assert_not_called() + + def test_set_rejects_non_list_members(self, tmp_path: Path) -> None: + store = _make_store(tmp_path) + service, _ = _make_service(store) + # Intentionally wrong type (Any-typed) to exercise the runtime guard. + bad_members: Any = {"not": "a list"} + with pytest.raises(KeboolaApiError) as exc: + service.set_reference_data( + "prod", + None, + dimension="chart_of_accounts", + members=bad_members, + ) + assert exc.value.error_code == ErrorCode.VALIDATION_ERROR + + def test_delete_echoes_dimension(self, tmp_path: Path) -> None: + store = _make_store(tmp_path) + service, mock = _make_service(store) + mock.get_item.return_value = _refdata_item("r1", "chart_of_accounts") + out = service.delete_reference_data("prod", "r1") + mock.delete_item.assert_called_once_with("semantic-reference-data", "r1") + assert out["removed"]["id"] == "r1" + assert out["removed"]["dimension_name"] == "chart_of_accounts" + + +class TestReferenceDataPermissions: + def test_registry_entries(self) -> None: + from keboola_agent_cli.permissions import OPERATION_REGISTRY + + assert OPERATION_REGISTRY["semantic-layer.reference-data.list"] == "read" + assert OPERATION_REGISTRY["semantic-layer.reference-data.get"] == "read" + assert OPERATION_REGISTRY["semantic-layer.reference-data.set"] == "write" + assert OPERATION_REGISTRY["semantic-layer.reference-data.delete"] == "destructive" diff --git a/tests/test_server_router_calls.py b/tests/test_server_router_calls.py index f05ef02e..c78ddcbe 100644 --- a/tests/test_server_router_calls.py +++ b/tests/test_server_router_calls.py @@ -552,3 +552,72 @@ def test_dev_portal_list_no_identity_no_default_is_400(tmp_path: Path) -> None: assert res.status_code == 400, res.text dp_svc.list_apps.assert_not_called() + + +# --------------------------------------------------------------------------- +# semantic_layer.py reference-data routes -> SemanticLayerService parity +# --------------------------------------------------------------------------- + + +def test_reference_data_list_route(tmp_path: Path) -> None: + """GET /semantic-layer/reference-data -> list_reference_data(alias=, model_name_or_uuid=).""" + sl = MagicMock() + sl.list_reference_data.return_value = {"project": PROJECT, "reference_data": []} + app = _make_app_with_registry(tmp_path, _mock_registry(semantic_layer=sl)) + resp = TestClient(app).get( + "/semantic-layer/reference-data", + params={"project": PROJECT, "model": "m"}, + headers=AUTH, + ) + assert resp.status_code == 200, resp.text + sl.list_reference_data.assert_called_once_with(alias=PROJECT, model_name_or_uuid="m") + + +def test_reference_data_get_route(tmp_path: Path) -> None: + """GET /semantic-layer/reference-data/{id} -> get_reference_data(alias=, record_id=).""" + sl = MagicMock() + sl.get_reference_data.return_value = {"id": "r1", "members": []} + app = _make_app_with_registry(tmp_path, _mock_registry(semantic_layer=sl)) + resp = TestClient(app).get( + "/semantic-layer/reference-data/r1", params={"project": PROJECT}, headers=AUTH + ) + assert resp.status_code == 200, resp.text + sl.get_reference_data.assert_called_once_with(alias=PROJECT, record_id="r1") + + +def test_reference_data_set_route(tmp_path: Path) -> None: + """PUT /semantic-layer/reference-data -> set_reference_data(...) with all kwargs.""" + sl = MagicMock() + sl.set_reference_data.return_value = {"id": "r1", "action": "created"} + app = _make_app_with_registry(tmp_path, _mock_registry(semantic_layer=sl)) + resp = TestClient(app).put( + "/semantic-layer/reference-data", + json={ + "project": PROJECT, + "dimension": "chart_of_accounts", + "members": [{"account_code": "4011"}], + "dataset_id": "in.c-f.DIM_COA", + }, + headers=AUTH, + ) + assert resp.status_code == 200, resp.text + sl.set_reference_data.assert_called_once_with( + alias=PROJECT, + model_name_or_uuid=None, + dimension="chart_of_accounts", + members=[{"account_code": "4011"}], + dataset_id="in.c-f.DIM_COA", + description=None, + ) + + +def test_reference_data_delete_route(tmp_path: Path) -> None: + """DELETE /semantic-layer/reference-data/{id} -> delete_reference_data(alias=, record_id=).""" + sl = MagicMock() + sl.delete_reference_data.return_value = {"removed": {"id": "r1"}} + app = _make_app_with_registry(tmp_path, _mock_registry(semantic_layer=sl)) + resp = TestClient(app).delete( + "/semantic-layer/reference-data/r1", params={"project": PROJECT}, headers=AUTH + ) + assert resp.status_code == 200, resp.text + sl.delete_reference_data.assert_called_once_with(alias=PROJECT, record_id="r1") diff --git a/tests/test_server_semantic_layer_routes_e2e.py b/tests/test_server_semantic_layer_routes_e2e.py index 42c89e82..5f25bb9e 100644 --- a/tests/test_server_semantic_layer_routes_e2e.py +++ b/tests/test_server_semantic_layer_routes_e2e.py @@ -1,7 +1,7 @@ """HTTP integration tests for ``/semantic-layer/*`` routes (real metastore). Bootstraps a throwaway ``kbagent_e2e_`` model on ``e2e-1143`` -(``E2E_URL`` / ``E2E_API_TOKEN``), exercises every one of the 14 routes +(``E2E_URL`` / ``E2E_API_TOKEN``), exercises every one of the 18 routes declared in :mod:`keboola_agent_cli.server.routers.semantic_layer` against the real ``SemanticLayerService`` (NOT mocked), and tears down in a ``finally`` block. Residue assertion at session end verifies no @@ -506,6 +506,91 @@ def test_post_token_encrypt(http_session: dict[str, Any]) -> None: ) +# ── reference-data routes (PUT create-or-replace / GET list / GET id / DELETE) ── + + +def test_put_reference_data_create(http_session: dict[str, Any]) -> None: + """PUT /semantic-layer/reference-data — create a Chart-of-Accounts record.""" + dimension = f"{http_session['tag']}_coa" + res = http_session["client"].put( + "/semantic-layer/reference-data", + headers=_auth(), + json={ + "project": _PROJECT_ALIAS, + "model": http_session["model_name"], + "dimension": dimension, + "members": [ + {"account_code": "4011", "account_name": "Revenue", "is_leaf": 1}, + {"account_code": "ISR99999", "account_name": "Revenue Rollup", "is_leaf": 0}, + ], + "dataset_id": "out.c-syn.DIM_COA", + }, + ) + assert res.status_code == 200, res.text + body = res.json() + assert body["action"] == "created" + assert body["dimension_name"] == dimension + assert body["member_count"] == 2 + http_session["refdata_dimension"] = dimension + http_session["refdata_id"] = body["id"] + http_session["created_items"].append(("semantic-reference-data", body["id"])) + + +def test_get_reference_data_list(http_session: dict[str, Any]) -> None: + """GET /semantic-layer/reference-data — the created dimension is listed.""" + res = http_session["client"].get( + f"/semantic-layer/reference-data?project={_PROJECT_ALIAS}", headers=_auth() + ) + assert res.status_code == 200, res.text + dims = {r["dimension_name"] for r in res.json()["reference_data"]} + assert http_session["refdata_dimension"] in dims + + +def test_get_reference_data_by_id(http_session: dict[str, Any]) -> None: + """GET /semantic-layer/reference-data/{id} — returns all members.""" + res = http_session["client"].get( + f"/semantic-layer/reference-data/{http_session['refdata_id']}?project={_PROJECT_ALIAS}", + headers=_auth(), + ) + assert res.status_code == 200, res.text + body = res.json() + assert body["member_count"] == 2 + codes = {m["account_code"] for m in body["members"]} + assert codes == {"4011", "ISR99999"} + + +def test_put_reference_data_replace(http_session: dict[str, Any]) -> None: + """PUT again — same (model, dimension) replaces in place (revision++).""" + res = http_session["client"].put( + "/semantic-layer/reference-data", + headers=_auth(), + json={ + "project": _PROJECT_ALIAS, + "model": http_session["model_name"], + "dimension": http_session["refdata_dimension"], + "members": [{"account_code": "4011", "account_name": "Revenue (EU)", "is_leaf": 1}], + }, + ) + assert res.status_code == 200, res.text + body = res.json() + assert body["action"] == "updated" + assert body["member_count"] == 1 + assert body["id"] == http_session["refdata_id"] + + +def test_delete_reference_data(http_session: dict[str, Any]) -> None: + """DELETE /semantic-layer/reference-data/{id} — removes the record.""" + res = http_session["client"].delete( + f"/semantic-layer/reference-data/{http_session['refdata_id']}?project={_PROJECT_ALIAS}", + headers=_auth(), + ) + assert res.status_code == 200, res.text + assert res.json()["removed"]["id"] == http_session["refdata_id"] + http_session["created_items"] = [ + (t, i) for (t, i) in http_session["created_items"] if t != "semantic-reference-data" + ] + + def test_delete_items_remove_glossary(http_session: dict[str, Any]) -> None: """DELETE /semantic-layer/items/glossary/{term} — removes the glossary entry.""" term = f"{http_session['tag']}_term"