From 17d4bd03a20206d54a18132a509101621f05e381 Mon Sep 17 00:00:00 2001 From: Oleg Shulyakov Date: Thu, 21 May 2026 11:34:33 +0300 Subject: [PATCH] feat(skills): expand writer-sql to codegen-database - Replace writer-sql with codegen-database covering schemas, OLTP SQL, migrations, and analytics SQL - Add references: schema-design, migration, analytics, bigquery, snowflake, clickhouse, cockroachdb - Add MySQL reference with dialect-native patterns and gotchas - Rewrite common.md with OLTP/analytics routing, portable SQL guidance - Restructure existing dialect references with action-oriented section headers - Update skills README table and packaging examples --- .agents/skills/README.md | 6 +- .agents/skills/codegen-database/SKILL.md | 88 + .../skills/codegen-database/evals/evals.json | 1565 +++++++++++++++++ .../codegen-database/references/analytics.md | 44 + .../codegen-database/references/bigquery.md | 28 + .../codegen-database/references/clickhouse.md | 30 + .../references/cockroachdb.md | 28 + .../references/common.md | 46 +- .../codegen-database/references/migration.md | 62 + .../references/mssql.md | 38 +- .../codegen-database/references/mysql.md | 131 ++ .../references/oracle.md | 48 +- .../references/postgres.md | 34 +- .../references/schema-design.md} | 64 +- .../codegen-database/references/snowflake.md | 27 + .../references/sqlite.md | 18 + .agents/skills/writer-sql/SKILL.md | 59 - .agents/skills/writer-sql/evals/evals.json | 117 -- .agents/skills/writer-sql/references/mysql.md | 111 -- 19 files changed, 2179 insertions(+), 365 deletions(-) create mode 100644 .agents/skills/codegen-database/SKILL.md create mode 100644 .agents/skills/codegen-database/evals/evals.json create mode 100644 .agents/skills/codegen-database/references/analytics.md create mode 100644 .agents/skills/codegen-database/references/bigquery.md create mode 100644 .agents/skills/codegen-database/references/clickhouse.md create mode 100644 .agents/skills/codegen-database/references/cockroachdb.md rename .agents/skills/{writer-sql => codegen-database}/references/common.md (60%) create mode 100644 .agents/skills/codegen-database/references/migration.md rename .agents/skills/{writer-sql => codegen-database}/references/mssql.md (65%) create mode 100644 .agents/skills/codegen-database/references/mysql.md rename .agents/skills/{writer-sql => codegen-database}/references/oracle.md (61%) rename .agents/skills/{writer-sql => codegen-database}/references/postgres.md (64%) rename .agents/skills/{writer-sql/references/design.md => codegen-database/references/schema-design.md} (68%) create mode 100644 .agents/skills/codegen-database/references/snowflake.md rename .agents/skills/{writer-sql => codegen-database}/references/sqlite.md (83%) delete mode 100644 .agents/skills/writer-sql/SKILL.md delete mode 100644 .agents/skills/writer-sql/evals/evals.json delete mode 100644 .agents/skills/writer-sql/references/mysql.md diff --git a/.agents/skills/README.md b/.agents/skills/README.md index ae35bfb..d5ea75c 100644 --- a/.agents/skills/README.md +++ b/.agents/skills/README.md @@ -10,6 +10,7 @@ A complete skill is a directory with a required `SKILL.md` file and optional bun | --- | --- | --- | | [`audit-skill-security`](audit-skill-security/SKILL.md) | Auditing third-party or local skills before installing, updating, or trusting them. | [`references/audit-protocol.md`](audit-skill-security/references/audit-protocol.md) | | [`codegen-backend`](codegen-backend/SKILL.md) | Production backend code: APIs, services, middleware, workers, persistence, validation, auth, and backend tests. | [`references/`](codegen-backend/references/), [`evals/`](codegen-backend/evals/) | +| [`codegen-database`](codegen-database/SKILL.md) | Database code: schemas, DDL, OLTP SQL, analytics SQL, migrations, indexes, stored procedures, and dialect-specific scripts. | [`references/`](codegen-database/references/), [`evals/`](codegen-database/evals/) | | [`codegen-frontend`](codegen-frontend/SKILL.md) | Production frontend code: components, routes, client state, forms, styling, accessibility, performance, PWA behavior, and visualization. | [`references/`](codegen-frontend/references/), [`evals/`](codegen-frontend/evals/) | | [`codegen-test`](codegen-test/SKILL.md) | Automated tests and evals, including E2E, API, integration, performance, AI output, tool-use, RAG, and prompt regression suites. | [`references/`](codegen-test/references/), [`scripts/`](codegen-test/scripts/), [`evals/`](codegen-test/evals/) | | [`creator-rule`](creator-rule/SKILL.md) | Writing or improving agent rules, instruction files, `AGENTS.md`, `CLAUDE.md`, Cursor rules, Copilot instructions, and `.agents/rules/*.md`. | [`scripts/`](creator-rule/scripts/), [`evals/`](creator-rule/evals/) | @@ -21,7 +22,6 @@ A complete skill is a directory with a required `SKILL.md` file and optional bun | [`review-code`](review-code/SKILL.md) | Reviewing code changes, diffs, pull requests, branches, or patches for correctness, regressions, security, performance, and test gaps. | [`references/`](review-code/references/), [`evals/`](review-code/evals/) | | [`writer-prd`](writer-prd/SKILL.md) | Product requirements, product briefs, feature requirements, product scope, and launch requirements. | [`references/`](writer-prd/references/), [`evals/`](writer-prd/evals/) | | [`writer-spec`](writer-spec/SKILL.md) | Technical specs, design docs, functional and non-functional requirements, data contracts, UI specs, release specs, and handoff docs. | [`references/`](writer-spec/references/), [`evals/`](writer-spec/evals/) | -| [`writer-sql`](writer-sql/SKILL.md) | Database schemas, SQL queries, dialect guidance, normalization, indexing, optimization, and troubleshooting. | [`references/`](writer-sql/references/), [`evals/`](writer-sql/evals/) | | [`writer-tech-docs`](writer-tech-docs/SKILL.md) | READMEs, API docs, endpoint references, routine and on-call runbooks, changelogs, and release notes. | [`references/`](writer-tech-docs/references/), [`evals/`](writer-tech-docs/evals/) | | [`writer-user-story`](writer-user-story/SKILL.md) | User stories, acceptance criteria, developer tasks, tickets, story points, and sprint planning breakdowns. | [`references/`](writer-user-story/references/), [`evals/`](writer-user-story/evals/) | @@ -41,13 +41,13 @@ Use [`creator-skill`](creator-skill/SKILL.md) to create, revise, package, or eva ```bash cd .agents/skills/creator-skill -python3 -m scripts.package_skill ../writer-sql /tmp/skills-dist +python3 -m scripts.package_skill ../codegen-database /tmp/skills-dist ``` Use this validation command when changing an existing skill: ```bash -python3 .agents/skills/creator-skill/scripts/quick_validate.py .agents/skills/writer-sql +python3 .agents/skills/creator-skill/scripts/quick_validate.py .agents/skills/codegen-database ``` The key rule is simple: keep `SKILL.md` and any files it references together. If a skill says to read `references/postgres.md`, that file must remain available relative to the skill folder. Tiny rule, large consequences. Filesystems enjoy pettiness. diff --git a/.agents/skills/codegen-database/SKILL.md b/.agents/skills/codegen-database/SKILL.md new file mode 100644 index 0000000..df303bc --- /dev/null +++ b/.agents/skills/codegen-database/SKILL.md @@ -0,0 +1,88 @@ +--- +name: codegen-database +description: > + Generate or modify database code: schemas, DDL, SQL queries, migrations, analytics SQL, + indexes, stored procedures, and dialect-specific database scripts. +author: Oleg Shulyakov +license: MIT +version: 1.0.0 +--- + +# codegen-database + +Generate production-ready database code for schemas, DDL, OLTP queries, analytics SQL, migrations, indexes, stored procedures, and dialect-specific scripts. Use this as a router: classify the database artifact first, detect the dialect from context or repository evidence, then read only the relevant references. + +## Variant Detection + +**Route from artifact type before choosing syntax details.** + +- **Schema design:** Requests for entities, tables, relationships, normalization, constraints, ERD-to-DDL, or "what tables do I need" route to `references/schema-design.md`. +- **Migrations:** Requests for up/down migrations, Flyway, Liquibase, Rails/ActiveRecord migrations, Alembic, Prisma migrations, rollback, data backfills, or deployment-safe DDL route to `references/migration.md`. +- **OLTP SQL:** Requests for queries, DML, views, indexes, transactions, upserts, stored procedures, or query optimization route to `references/common.md`, then the dialect reference. +- **Analytics SQL:** Requests for warehouses, metrics, cohorts, funnels, retention, partitioned fact tables, dbt-like transformations, BigQuery, Snowflake, or ClickHouse route to `references/analytics.md`, then any matching warehouse reference. +- **Adjacent skills:** Use `report-db-health` for database health findings from existing telemetry. Use `strategy-backup` for backup and recovery policy. Use `writer-spec` for data contracts when the output is prose rather than executable database code. +- **Ambiguity:** If the artifact type or database remains genuinely ambiguous after inspecting context, ask one short question naming the likely choices. + +## Dialect Routing + +**Choose the database from explicit signals, then repository evidence, then ask only if the choice changes the code.** + +| Signal | Reference | +| --- | --- | +| PostgreSQL, Postgres, `JSONB`, `TIMESTAMPTZ`, `ON CONFLICT`, `pg`, `psql` | `references/postgres.md` | +| MySQL, MariaDB, `AUTO_INCREMENT`, `ON DUPLICATE KEY`, `mysql2` | `references/mysql.md` | +| SQL Server, MSSQL, T-SQL, `pyodbc`, `OFFSET-FETCH`, `MERGE` | `references/mssql.md` | +| SQLite, embedded/mobile/local database, FTS5, WAL mode | `references/sqlite.md` | +| Oracle, PL/SQL, sequences, `DUAL`, Oracle hints | `references/oracle.md` | +| BigQuery, GoogleSQL, ARRAY/STRUCT, partitioned warehouse tables | `references/bigquery.md` | +| Snowflake, VARIANT, streams/tasks, clustering, time travel | `references/snowflake.md` | +| ClickHouse, MergeTree, materialized views, sparse indexes | `references/clickhouse.md` | +| CockroachDB, distributed SQL, follower reads, regional tables | `references/cockroachdb.md` | + +If the user asks for portable SQL, use `references/common.md` and avoid dialect-specific syntax unless you clearly mark alternatives. + +## Working Rules + +**Database code must be executable, reversible when applicable, and honest about assumptions.** + +- **Inspect first:** Read existing migrations, schema files, ORM models, query builders, naming conventions, fixtures, and migration tooling before editing repository files. +- **Prefer structural guarantees:** Encode business rules with constraints, foreign keys, uniqueness, checks, generated columns, and transaction boundaries before relying on application-only enforcement. +- **Keep migrations safe:** Make destructive DDL explicit, separate schema changes from risky data rewrites when needed, and include rollback or forward-fix guidance when true rollback is unsafe. +- **Use parameters:** Never generate SQL that interpolates user input into executable statements. Use the placeholder style for the target dialect or framework. +- **Index deliberately:** Tie each recommended index to a query, constraint, or access pattern. Avoid adding write-costly indexes without a reason. +- **Handle concurrency:** Use transactions, locks, isolation levels, uniqueness, idempotency keys, or retry notes when the database operation can race. +- **Respect dialect limits:** Do not mix syntax across engines. If the dialect is unknown and syntax materially differs, ask once instead of producing decorative nonsense in SQL clothing. +- **Verify locally:** Run the narrowest relevant migration check, SQL parser, formatter, test, or application test available. If no database is available, state what was reviewed statically. + +## Implementation Flow + +**Move from existing schema to minimal database change to verification.** + +1. Identify artifact type and dialect, then read the selected artifact reference and dialect reference. +2. Inspect the closest existing schema, migration, query, model, and tests. +3. Plan the minimal database surface: tables, columns, constraints, indexes, migrations, queries, and tests or fixtures. +4. Edit using project naming and migration conventions. +5. Add or update focused tests, fixtures, or migration assertions when the repository supports them. +6. Run focused verification and fix regressions within scope. + +## Output Format + +**Return runnable database code plus the operational context needed to use it.** + +When editing a repository, finish with changed files, commands run, and verification status. + +When drafting code only, include: + +```text +Assumptions: +- ... + +SQL / Migration: +- ... + +Performance: +- ... + +Rollback / Safety: +- ... +``` diff --git a/.agents/skills/codegen-database/evals/evals.json b/.agents/skills/codegen-database/evals/evals.json new file mode 100644 index 0000000..85ab6d1 --- /dev/null +++ b/.agents/skills/codegen-database/evals/evals.json @@ -0,0 +1,1565 @@ +{ + "skill_name": "codegen-database", + "evals": [ + { + "id": 1, + "reference": "references/schema-design.md", + "prompt": "Design a PostgreSQL schema for a subscription billing system with customers, plans, subscriptions, invoices, invoice line items, and payments. Support soft deletes for customers.", + "expected_output": "A response that routes to references/schema-design.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to schema-design guidance", + "Includes entities and relationship overview", + "Uses PostgreSQL-compatible DDL", + "Defines foreign keys with ON DELETE behavior", + "Uses NUMERIC for money", + "Indexes foreign keys and common lookup columns" + ] + }, + { + "id": 2, + "reference": "references/schema-design.md", + "prompt": "Design a normalized MySQL schema for a library with books, authors, book copies, patrons, loans, reservations, and late fees.", + "expected_output": "A response that routes to references/schema-design.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to schema-design guidance", + "Uses MySQL-compatible DDL", + "Normalizes many-to-many book/authors", + "Models loans and reservations with foreign keys", + "Uses DECIMAL for fees", + "Indexes foreign keys" + ] + }, + { + "id": 3, + "reference": "references/schema-design.md", + "prompt": "Create a SQLite schema for an offline mobile notes app with notebooks, notes, tags, note_tags, and sync metadata.", + "expected_output": "A response that routes to references/schema-design.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to schema-design guidance", + "Uses SQLite-compatible types", + "Models many-to-many tags", + "Accounts for SQLite foreign key behavior", + "Includes sync metadata fields", + "Avoids unsupported SQLite DDL features" + ] + }, + { + "id": 4, + "reference": "references/schema-design.md", + "prompt": "Design a PostgreSQL multi-tenant schema for projects, tasks, comments, attachments, and members. Every tenant-scoped table needs tenant_id.", + "expected_output": "A response that routes to references/schema-design.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to schema-design guidance", + "Includes tenant_id on scoped tables", + "Uses composite indexes starting with tenant_id", + "Defines membership relationships", + "States soft delete or audit assumptions", + "Uses explicit constraints" + ] + }, + { + "id": 5, + "reference": "references/schema-design.md", + "prompt": "Turn this ERD into DDL: departments have employees, employees have managers, employees can be assigned to many projects, and assignments have allocation percent. Use SQL Server.", + "expected_output": "A response that routes to references/schema-design.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to schema-design guidance", + "Uses SQL Server-compatible DDL", + "Models self-referential manager relationship", + "Models many-to-many assignments", + "Constrains allocation percent", + "Indexes relationship columns" + ] + }, + { + "id": 6, + "reference": "references/schema-design.md", + "prompt": "Design an Oracle schema for purchase orders, vendors, products, warehouses, inventory balances, receipts, and receipt lines.", + "expected_output": "A response that routes to references/schema-design.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to schema-design guidance", + "Uses Oracle-compatible types", + "Models inventory relationships clearly", + "Uses NUMBER for quantities and money", + "Defines primary keys and sequences or identities", + "Includes index rationale" + ] + }, + { + "id": 7, + "reference": "references/schema-design.md", + "prompt": "Design a ClickHouse reporting table schema for daily product metrics with product_id, date, impressions, clicks, add_to_cart, purchases, and revenue.", + "expected_output": "A response that routes to references/schema-design.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes schema work to analytics-aware schema guidance", + "Uses ClickHouse-compatible table design", + "Defines row grain", + "Chooses MergeTree family engine", + "Specifies partition/order keys", + "Avoids OLTP-only normalization assumptions" + ] + }, + { + "id": 8, + "reference": "references/schema-design.md", + "prompt": "Design a CockroachDB schema for globally distributed accounts, users, sessions, and audit events with regional data requirements.", + "expected_output": "A response that routes to references/schema-design.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to schema-design guidance and CockroachDB dialect", + "Avoids hot key patterns", + "Uses distributed-SQL-compatible DDL", + "Mentions regional/locality assumptions", + "Defines constraints and indexes", + "Calls out transaction retry considerations" + ] + }, + { + "id": 9, + "reference": "references/common.md", + "prompt": "Write portable SQL to find customers who bought product A but never bought product B.", + "expected_output": "A response that routes to references/common.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to common OLTP SQL", + "Uses NOT EXISTS or anti-join logic", + "Avoids dialect-specific syntax", + "Uses clear table aliases", + "States table assumptions", + "No SELECT *" + ] + }, + { + "id": 10, + "reference": "references/common.md", + "prompt": "Optimize a slow query joining orders, customers, and payments filtered by paid_at and sorted by created_at. Keep it database-neutral.", + "expected_output": "A response that routes to references/common.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to common optimization guidance", + "Mentions execution-plan validation", + "Recommends join/filter/sort indexes", + "Avoids dialect-specific syntax unless noted", + "Explains tradeoffs", + "Avoids SELECT * in rewrite" + ] + }, + { + "id": 11, + "reference": "references/common.md", + "prompt": "Write a transaction that transfers balance between two accounts and prevents negative balances using portable SQL where possible.", + "expected_output": "A response that routes to references/common.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to common OLTP SQL", + "Uses explicit transaction boundaries", + "Uses parameters", + "Checks/prevents negative balances", + "Mentions isolation or concurrency", + "Explains portability limits" + ] + }, + { + "id": 12, + "reference": "references/common.md", + "prompt": "Write a query for the latest order per customer without using vendor-specific DISTINCT ON.", + "expected_output": "A response that routes to references/common.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to common OLTP SQL", + "Uses window function or portable correlated approach", + "Returns one row per customer", + "Avoids vendor-only syntax", + "Uses clear aliases", + "Mentions useful indexes" + ] + }, + { + "id": 13, + "reference": "references/common.md", + "prompt": "Write a query that finds duplicate normalized email addresses in users and shows the count for each duplicate.", + "expected_output": "A response that routes to references/common.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to common OLTP SQL", + "Groups by normalized email expression", + "Uses HAVING COUNT greater than one", + "Avoids SELECT *", + "Mentions expression/function index if useful", + "Handles NULL or blank assumptions" + ] + }, + { + "id": 14, + "reference": "references/common.md", + "prompt": "Write a keyset pagination query for orders sorted by created_at and id, database-neutral if possible.", + "expected_output": "A response that routes to references/common.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to common OLTP SQL", + "Uses keyset predicate on created_at and id", + "Avoids OFFSET for large pages", + "Uses deterministic ordering", + "Uses parameters", + "Mentions composite index" + ] + }, + { + "id": 15, + "reference": "references/common.md", + "prompt": "Write a query to update order status from pending to expired when expires_at is in the past, and return affected rows if supported.", + "expected_output": "A response that routes to references/common.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to common OLTP SQL", + "Uses parameterized or current timestamp condition", + "Explains RETURNING portability", + "Avoids unsafe mass update assumptions", + "Mentions transaction/safety", + "Uses clear WHERE clause" + ] + }, + { + "id": 16, + "reference": "references/common.md", + "prompt": "Explain and rewrite a query that uses LIKE \"%term%\" on a large products table.", + "expected_output": "A response that routes to references/common.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to common optimization guidance", + "Explains leading wildcard index issue", + "Suggests full-text search or trigram-like alternatives with dialect caveat", + "Provides safer rewritten option or notes", + "Mentions execution plan", + "Avoids overclaiming portability" + ] + }, + { + "id": 17, + "reference": "references/migration.md", + "prompt": "Create a PostgreSQL migration to add users.phone_number, backfill from user_profiles.phone, then enforce uniqueness for non-null values.", + "expected_output": "A response that routes to references/migration.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to migration guidance", + "Adds nullable column first", + "Includes explicit backfill", + "Uses partial unique index", + "Mentions lock/transaction behavior", + "Includes rollback or forward-fix notes" + ] + }, + { + "id": 18, + "reference": "references/migration.md", + "prompt": "Write a Flyway migration to create an orders table and an order_items table with foreign keys and indexes.", + "expected_output": "A response that routes to references/migration.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to migration guidance", + "Recognizes Flyway SQL style", + "Uses versioned SQL migration shape", + "Creates tables in dependency order", + "Adds indexes for foreign keys", + "Includes rollback limitation note if Flyway lacks down migrations" + ] + }, + { + "id": 19, + "reference": "references/migration.md", + "prompt": "Write an Alembic migration adding a not-null account_id to invoices for an existing populated table.", + "expected_output": "A response that routes to references/migration.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to migration guidance", + "Uses staged nullable-add/backfill/not-null pattern", + "Uses Alembic upgrade/downgrade style", + "Avoids unsafe immediate NOT NULL on populated table", + "Mentions batching or validation", + "Includes downgrade behavior" + ] + }, + { + "id": 20, + "reference": "references/migration.md", + "prompt": "Create a Rails migration to rename customers.full_name to display_name without breaking production reads.", + "expected_output": "A response that routes to references/migration.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to migration guidance", + "Uses expand/contract or compatibility strategy", + "Uses Rails migration style", + "Avoids a one-step breaking rename where risky", + "Mentions app deploy sequencing", + "Includes rollback considerations" + ] + }, + { + "id": 21, + "reference": "references/migration.md", + "prompt": "Write a Liquibase changeset to add a foreign key from payments.invoice_id to invoices.id and create the supporting index.", + "expected_output": "A response that routes to references/migration.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to migration guidance", + "Uses Liquibase-style changeset expectations", + "Adds index before/with FK", + "Defines FK behavior", + "Includes rollback guidance", + "Mentions validation on existing data" + ] + }, + { + "id": 22, + "reference": "references/migration.md", + "prompt": "Create a Prisma migration plan for splitting users.name into first_name and last_name.", + "expected_output": "A response that routes to references/migration.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to migration guidance", + "Recognizes Prisma migration context", + "Separates schema change and data backfill", + "Notes ambiguous name parsing risk", + "Includes rollback/forward-fix guidance", + "Mentions generated migration review" + ] + }, + { + "id": 23, + "reference": "references/migration.md", + "prompt": "Write a PostgreSQL migration to create an index concurrently on events(tenant_id, occurred_at) for a large table.", + "expected_output": "A response that routes to references/migration.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to migration guidance", + "Uses CREATE INDEX CONCURRENTLY", + "Does not wrap concurrent index creation in a transaction", + "Names the index clearly", + "Includes down migration with DROP INDEX CONCURRENTLY", + "Mentions lock behavior" + ] + }, + { + "id": 24, + "reference": "references/migration.md", + "prompt": "Create a SQL Server migration to add a computed persisted column for normalized email and a unique filtered index for active users.", + "expected_output": "A response that routes to references/migration.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to migration guidance", + "Uses SQL Server-compatible syntax", + "Adds computed persisted column", + "Uses filtered unique index", + "Accounts for existing duplicate data", + "Includes rollback steps" + ] + }, + { + "id": 25, + "reference": "references/analytics.md", + "prompt": "Write analytics SQL for weekly signup-to-active retention cohorts from an events table.", + "expected_output": "A response that routes to references/analytics.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to analytics guidance", + "Defines cohort grain", + "Separates signup and active events", + "Calculates retention by week", + "Handles time window assumptions", + "Includes validation notes" + ] + }, + { + "id": 26, + "reference": "references/analytics.md", + "prompt": "Create a monthly revenue cohort query from subscriptions and invoices with one row per cohort month and activity month.", + "expected_output": "A response that routes to references/analytics.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to analytics guidance", + "Defines row grain", + "Avoids fanout between subscriptions and invoices", + "Uses date truncation", + "Calculates revenue by cohort/activity period", + "Mentions timezone or period boundaries" + ] + }, + { + "id": 27, + "reference": "references/analytics.md", + "prompt": "Write a funnel query for viewed_product, add_to_cart, checkout_started, and purchase events.", + "expected_output": "A response that routes to references/analytics.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to analytics guidance", + "Defines funnel steps", + "Preserves user/session grain", + "Avoids double counting", + "Calculates step counts and conversion rates", + "Includes validation checks" + ] + }, + { + "id": 28, + "reference": "references/analytics.md", + "prompt": "Build a daily active users query from events that excludes bot users and late-arriving events older than 7 days.", + "expected_output": "A response that routes to references/analytics.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to analytics guidance", + "Defines DAU grain", + "Handles bot exclusion", + "States late-arriving data rule", + "Uses stable date boundaries", + "Includes partition/filter performance notes" + ] + }, + { + "id": 29, + "reference": "references/analytics.md", + "prompt": "Write SQL for a fact table model fact_order_items from orders, order_items, products, and customers.", + "expected_output": "A response that routes to references/analytics.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to analytics guidance", + "Defines fact table grain", + "Separates facts and dimensions", + "Avoids fanout", + "Includes surrogate/business keys as appropriate", + "Includes validation checks" + ] + }, + { + "id": 30, + "reference": "references/analytics.md", + "prompt": "Create a query for 30-day rolling average purchase revenue by product category.", + "expected_output": "A response that routes to references/analytics.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to analytics guidance", + "Uses window function or equivalent rolling frame", + "Defines date/category grain", + "Handles missing dates or notes assumption", + "Avoids metric fanout", + "Mentions partitioning/indexing where relevant" + ] + }, + { + "id": 31, + "reference": "references/analytics.md", + "prompt": "Write a churn analysis query for users with no activity in the last 60 days after previously being active.", + "expected_output": "A response that routes to references/analytics.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to analytics guidance", + "Defines churn condition clearly", + "Uses stable date boundaries", + "Separates eligibility from churn calculation", + "Includes validation of user counts", + "Mentions assumptions" + ] + }, + { + "id": 32, + "reference": "references/analytics.md", + "prompt": "Create a deduped events staging query that keeps the latest event by event_id using ingested_at.", + "expected_output": "A response that routes to references/analytics.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to analytics guidance", + "Defines staging row grain", + "Uses ROW_NUMBER or equivalent dedupe", + "Keeps latest by ingested_at", + "Mentions duplicate validation", + "Includes late-arrival assumptions" + ] + }, + { + "id": 33, + "reference": "references/postgres.md", + "prompt": "Write a PostgreSQL JSONB query to find orders whose metadata has fraud_score greater than 80 and add an index recommendation.", + "expected_output": "A response that routes to references/postgres.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to PostgreSQL dialect", + "Uses JSONB operators or casts correctly", + "Uses parameterization where applicable", + "Recommends appropriate JSONB/expression index", + "Explains cast/index tradeoff", + "Uses PostgreSQL syntax" + ] + }, + { + "id": 34, + "reference": "references/postgres.md", + "prompt": "Write a Postgres upsert for users by email that updates name and updated_at on conflict.", + "expected_output": "A response that routes to references/postgres.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to PostgreSQL dialect", + "Uses ON CONFLICT", + "States required unique constraint", + "Uses EXCLUDED values", + "Updates updated_at", + "Uses parameters" + ] + }, + { + "id": 35, + "reference": "references/postgres.md", + "prompt": "Create a PostgreSQL full-text search query for articles with title and body weighted differently.", + "expected_output": "A response that routes to references/postgres.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to PostgreSQL dialect", + "Uses tsvector/tsquery functions", + "Applies weights", + "Ranks results", + "Recommends GIN index", + "Uses safe query construction" + ] + }, + { + "id": 36, + "reference": "references/postgres.md", + "prompt": "Use a writable CTE in PostgreSQL to insert an order and related order_items in one transaction.", + "expected_output": "A response that routes to references/postgres.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to PostgreSQL dialect", + "Uses CTE with INSERT RETURNING", + "Maintains transactional relationship", + "Uses parameters", + "Avoids unsafe interpolation", + "Explains constraints/indexes needed" + ] + }, + { + "id": 37, + "reference": "references/postgres.md", + "prompt": "Write a Postgres keyset pagination query over events ordered by occurred_at and id.", + "expected_output": "A response that routes to references/postgres.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to PostgreSQL dialect", + "Uses tuple comparison or equivalent", + "Uses deterministic ORDER BY", + "Uses LIMIT", + "Recommends composite index", + "Avoids OFFSET" + ] + }, + { + "id": 38, + "reference": "references/postgres.md", + "prompt": "Show EXPLAIN ANALYZE usage for a slow Postgres query and how to interpret sequential scan vs index scan.", + "expected_output": "A response that routes to references/postgres.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to PostgreSQL dialect", + "Uses EXPLAIN ANALYZE syntax", + "Mentions buffers or timing if useful", + "Explains seq scan vs index scan caveat", + "Avoids assuming seq scan is always bad", + "Suggests next validation step" + ] + }, + { + "id": 39, + "reference": "references/postgres.md", + "prompt": "Write a PostgreSQL query using array operations to find products that have all requested tags.", + "expected_output": "A response that routes to references/postgres.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to PostgreSQL dialect", + "Uses array containment or relational alternative", + "Uses parameterized array", + "Mentions GIN index if arrays are used", + "Explains modeling tradeoff", + "Uses valid Postgres syntax" + ] + }, + { + "id": 40, + "reference": "references/postgres.md", + "prompt": "Create PostgreSQL DDL with named check and unique constraints for a coupon_codes table.", + "expected_output": "A response that routes to references/postgres.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to PostgreSQL dialect", + "Uses explicit constraint names", + "Uses CHECK for status/date rules", + "Uses UNIQUE constraint or index", + "Uses TIMESTAMPTZ", + "Follows naming convention" + ] + }, + { + "id": 41, + "reference": "references/mysql.md", + "prompt": "Write a MySQL ON DUPLICATE KEY UPDATE query to increment daily login count per user and date.", + "expected_output": "A response that routes to references/mysql.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to MySQL dialect", + "Uses ON DUPLICATE KEY UPDATE", + "States required unique key", + "Uses parameters", + "Handles increment correctly", + "Uses MySQL-compatible syntax" + ] + }, + { + "id": 42, + "reference": "references/mysql.md", + "prompt": "Create MySQL DDL for users with utf8mb4 charset and a generated lower_email column for uniqueness.", + "expected_output": "A response that routes to references/mysql.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to MySQL dialect", + "Uses utf8mb4 charset/collation", + "Uses generated column or functional index where appropriate", + "Defines unique constraint", + "Uses InnoDB", + "Mentions version caveats if needed" + ] + }, + { + "id": 43, + "reference": "references/mysql.md", + "prompt": "Write a MySQL JSON query to find products where attributes.color is red and recommend an index strategy.", + "expected_output": "A response that routes to references/mysql.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to MySQL dialect", + "Uses JSON_EXTRACT or arrow syntax", + "Uses valid comparison syntax", + "Mentions generated/virtual column for indexing", + "Uses parameters where applicable", + "Explains JSON index caveat" + ] + }, + { + "id": 44, + "reference": "references/mysql.md", + "prompt": "Write a MySQL full-text search query over articles title and body.", + "expected_output": "A response that routes to references/mysql.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to MySQL dialect", + "Uses MATCH AGAINST", + "States FULLTEXT index requirement", + "Uses InnoDB-compatible expectations", + "Avoids PostgreSQL syntax", + "Mentions boolean/natural language mode if relevant" + ] + }, + { + "id": 45, + "reference": "references/mysql.md", + "prompt": "Optimize MySQL pagination for a large orders table sorted by created_at and id.", + "expected_output": "A response that routes to references/mysql.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to MySQL dialect", + "Uses keyset pagination", + "Avoids large OFFSET", + "Uses composite index recommendation", + "Uses MySQL parameter style", + "Explains tradeoff" + ] + }, + { + "id": 46, + "reference": "references/mysql.md", + "prompt": "Create MySQL DDL for an orders table using DECIMAL money columns and foreign keys.", + "expected_output": "A response that routes to references/mysql.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to MySQL dialect", + "Uses DECIMAL not FLOAT", + "Uses InnoDB", + "Defines foreign keys", + "Indexes foreign keys", + "Uses compatible timestamp/datetime types" + ] + }, + { + "id": 47, + "reference": "references/mysql.md", + "prompt": "Explain a MySQL ONLY_FULL_GROUP_BY error and rewrite the query correctly.", + "expected_output": "A response that routes to references/mysql.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to MySQL dialect", + "Explains ONLY_FULL_GROUP_BY", + "Adds grouped columns or aggregate functions", + "Avoids arbitrary non-grouped selects", + "Mentions ANY_VALUE only if justified", + "Uses valid MySQL syntax" + ] + }, + { + "id": 48, + "reference": "references/mysql.md", + "prompt": "Write a MySQL EXPLAIN workflow for a query filtering by tenant_id and status.", + "expected_output": "A response that routes to references/mysql.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to MySQL dialect", + "Uses EXPLAIN syntax", + "Mentions composite index order", + "Explains type/key/rows signals", + "Recommends validation with actual query", + "Avoids overclaiming" + ] + }, + { + "id": 49, + "reference": "references/mssql.md", + "prompt": "Write a SQL Server stored procedure to create an order and order items inside a transaction with TRY/CATCH.", + "expected_output": "A response that routes to references/mssql.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to MSSQL dialect", + "Uses T-SQL procedure syntax", + "Uses explicit transaction", + "Uses TRY/CATCH with rollback", + "Uses table-valued parameter or clear input approach", + "Avoids non-SQL Server syntax" + ] + }, + { + "id": 50, + "reference": "references/mssql.md", + "prompt": "Write SQL Server pagination using OFFSET FETCH for customers sorted by created_at and id.", + "expected_output": "A response that routes to references/mssql.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to MSSQL dialect", + "Uses OFFSET FETCH", + "Uses deterministic ORDER BY", + "Uses parameters", + "Recommends supporting index", + "Mentions keyset alternative for large offsets" + ] + }, + { + "id": 51, + "reference": "references/mssql.md", + "prompt": "Create a SQL Server MERGE upsert for inventory counts by sku and warehouse_id.", + "expected_output": "A response that routes to references/mssql.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to MSSQL dialect", + "Uses MERGE syntax", + "States matching key/unique constraint", + "Handles update and insert branches", + "Mentions MERGE caveats or locking if relevant", + "Uses valid T-SQL" + ] + }, + { + "id": 52, + "reference": "references/mssql.md", + "prompt": "Use a SQL Server temp table to prefilter active customers before joining orders.", + "expected_output": "A response that routes to references/mssql.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to MSSQL dialect", + "Uses #temp table syntax", + "Creates useful index if needed", + "Shows join to temp table", + "Mentions TempDB tradeoff", + "Avoids other dialect temp syntax" + ] + }, + { + "id": 53, + "reference": "references/mssql.md", + "prompt": "Write SQL Server JSON query extracting customer_id from a payload column.", + "expected_output": "A response that routes to references/mssql.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to MSSQL dialect", + "Uses JSON_VALUE or OPENJSON", + "Casts extracted value if needed", + "Mentions computed column/index option", + "Uses valid T-SQL", + "Handles missing JSON assumption" + ] + }, + { + "id": 54, + "reference": "references/mssql.md", + "prompt": "Create SQL Server DDL for users with UNIQUEIDENTIFIER primary key and default NEWSEQUENTIALID.", + "expected_output": "A response that routes to references/mssql.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to MSSQL dialect", + "Uses UNIQUEIDENTIFIER", + "Uses NEWSEQUENTIALID or explains NEWID tradeoff", + "Defines named constraints", + "Schema-qualifies objects", + "Uses SQL Server types" + ] + }, + { + "id": 55, + "reference": "references/mssql.md", + "prompt": "Optimize a SQL Server query and mention how to inspect the execution plan.", + "expected_output": "A response that routes to references/mssql.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to MSSQL dialect", + "Mentions actual execution plan or SET STATISTICS IO/TIME", + "Recommends indexes tied to joins/filters", + "Avoids SELECT *", + "Mentions parameter sniffing if relevant", + "Uses T-SQL syntax" + ] + }, + { + "id": 56, + "reference": "references/mssql.md", + "prompt": "Write SQL Server string cleanup SQL to trim and normalize emails before duplicate detection.", + "expected_output": "A response that routes to references/mssql.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to MSSQL dialect", + "Uses SQL Server string functions", + "Normalizes case and whitespace", + "Groups duplicate emails", + "Handles NULL/empty assumptions", + "Avoids non-T-SQL functions" + ] + }, + { + "id": 57, + "reference": "references/sqlite.md", + "prompt": "Write a SQLite query for a 7-day rolling average of daily active users from events.", + "expected_output": "A response that routes to references/sqlite.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to SQLite dialect", + "Uses SQLite date functions", + "Groups by day", + "Uses window function or valid alternative", + "Avoids unsupported syntax", + "Uses clear aliases" + ] + }, + { + "id": 58, + "reference": "references/sqlite.md", + "prompt": "Create SQLite DDL for notes, tags, and note_tags with foreign keys enabled.", + "expected_output": "A response that routes to references/sqlite.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to SQLite dialect", + "Uses SQLite-compatible types", + "Models many-to-many relationship", + "Mentions PRAGMA foreign_keys", + "Avoids unsupported column constraints", + "Defines indexes" + ] + }, + { + "id": 59, + "reference": "references/sqlite.md", + "prompt": "Write SQLite FTS5 setup and query for searching documents by title and body.", + "expected_output": "A response that routes to references/sqlite.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to SQLite dialect", + "Uses FTS5 virtual table", + "Shows MATCH query", + "Explains content table or sync assumption", + "Avoids PostgreSQL full-text syntax", + "Mentions indexing/search behavior" + ] + }, + { + "id": 60, + "reference": "references/sqlite.md", + "prompt": "Enable WAL mode in SQLite and explain when it is appropriate.", + "expected_output": "A response that routes to references/sqlite.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to SQLite dialect", + "Uses PRAGMA journal_mode=WAL", + "Explains concurrency tradeoff", + "Mentions checkpoint/durability assumptions", + "Avoids treating WAL as universal", + "Uses SQLite syntax" + ] + }, + { + "id": 61, + "reference": "references/sqlite.md", + "prompt": "Write a SQLite upsert for settings keyed by user_id and setting_name.", + "expected_output": "A response that routes to references/sqlite.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to SQLite dialect", + "Uses ON CONFLICT DO UPDATE", + "States required unique constraint", + "Uses placeholders", + "Uses valid SQLite syntax", + "Mentions version support if relevant" + ] + }, + { + "id": 62, + "reference": "references/sqlite.md", + "prompt": "Design a SQLite STRICT table for app_users with UUID stored as text.", + "expected_output": "A response that routes to references/sqlite.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to SQLite dialect", + "Uses STRICT table syntax if appropriate", + "Stores UUID as TEXT", + "Uses CHECK or format caveat if useful", + "Uses compatible timestamp storage", + "Mentions version caveat" + ] + }, + { + "id": 63, + "reference": "references/sqlite.md", + "prompt": "Explain how to add a NOT NULL column to an existing SQLite table safely.", + "expected_output": "A response that routes to references/sqlite.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to SQLite dialect", + "Mentions SQLite ALTER TABLE limitations", + "Uses staged table rebuild or default strategy", + "Preserves data", + "Mentions transaction/foreign keys", + "Avoids unsupported ADD COLUMN pattern" + ] + }, + { + "id": 64, + "reference": "references/sqlite.md", + "prompt": "Write SQLite JSON query to filter events where payload.action equals purchase.", + "expected_output": "A response that routes to references/sqlite.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to SQLite dialect", + "Uses json_extract or compatible JSON function", + "Mentions SQLite JSON version/support", + "Uses parameters", + "Avoids JSONB/Postgres syntax", + "Handles missing key assumption" + ] + }, + { + "id": 65, + "reference": "references/oracle.md", + "prompt": "Write an Oracle MERGE upsert for customer email preferences by customer_id.", + "expected_output": "A response that routes to references/oracle.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to Oracle dialect", + "Uses MERGE INTO syntax", + "Defines ON match condition", + "Handles update and insert branches", + "Uses bind variables", + "Avoids MySQL/Postgres upsert syntax" + ] + }, + { + "id": 66, + "reference": "references/oracle.md", + "prompt": "Create an Oracle sequence and trigger or identity column for orders.id.", + "expected_output": "A response that routes to references/oracle.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to Oracle dialect", + "Uses Oracle sequence/identity pattern", + "Explains version choice if relevant", + "Uses NUMBER for id", + "Avoids PostgreSQL serial syntax", + "Includes DDL in correct order" + ] + }, + { + "id": 67, + "reference": "references/oracle.md", + "prompt": "Write a PL/SQL procedure to mark overdue loans and insert audit records.", + "expected_output": "A response that routes to references/oracle.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to Oracle dialect", + "Uses PL/SQL block/procedure syntax", + "Uses transaction/error considerations", + "Uses bind/procedure parameters", + "Inserts audit records", + "Avoids non-Oracle procedural syntax" + ] + }, + { + "id": 68, + "reference": "references/oracle.md", + "prompt": "Write an Oracle query using DUAL to return current timestamp and a generated UUID-like value if available.", + "expected_output": "A response that routes to references/oracle.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to Oracle dialect", + "Uses DUAL appropriately", + "Uses Oracle date/time functions", + "Uses SYS_GUID or explains option", + "Avoids other dialect functions", + "Keeps query minimal" + ] + }, + { + "id": 69, + "reference": "references/oracle.md", + "prompt": "Write Oracle 12c pagination for invoices sorted by created_at and id.", + "expected_output": "A response that routes to references/oracle.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to Oracle dialect", + "Uses OFFSET FETCH syntax", + "Uses deterministic ORDER BY", + "Uses bind variables", + "Mentions index support", + "Avoids LIMIT syntax" + ] + }, + { + "id": 70, + "reference": "references/oracle.md", + "prompt": "Show Oracle EXPLAIN PLAN usage for a slow orders query.", + "expected_output": "A response that routes to references/oracle.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to Oracle dialect", + "Uses EXPLAIN PLAN FOR", + "Uses DBMS_XPLAN.DISPLAY or equivalent", + "Mentions stats/index validation", + "Avoids overclaiming", + "Uses Oracle syntax" + ] + }, + { + "id": 71, + "reference": "references/oracle.md", + "prompt": "Create an Oracle partitioned table for events by month on occurred_at.", + "expected_output": "A response that routes to references/oracle.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to Oracle dialect", + "Uses Oracle partitioning syntax", + "Partitions by date/month", + "Defines key columns/types", + "Mentions maintenance/pruning rationale", + "Avoids PostgreSQL partition syntax" + ] + }, + { + "id": 72, + "reference": "references/oracle.md", + "prompt": "Explain Oracle empty string equals NULL and rewrite a filter that checks blank middle_name.", + "expected_output": "A response that routes to references/oracle.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to Oracle dialect", + "Mentions empty string is NULL", + "Uses IS NULL appropriately", + "Explains semantic difference", + "Avoids = empty string assumption", + "Uses valid Oracle SQL" + ] + }, + { + "id": 73, + "reference": "references/bigquery.md", + "prompt": "Write a BigQuery weekly retention cohort query from project.dataset.events.", + "expected_output": "A response that routes to references/bigquery.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to BigQuery dialect", + "Uses GoogleSQL syntax", + "Uses DATE_TRUNC or compatible date functions", + "Defines cohort and retention weeks", + "Uses project-qualified table", + "Includes cost/partition filter notes" + ] + }, + { + "id": 74, + "reference": "references/bigquery.md", + "prompt": "Query BigQuery nested event_params ARRAY> to extract page_location.", + "expected_output": "A response that routes to references/bigquery.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to BigQuery dialect", + "Uses UNNEST correctly", + "Avoids unintended row multiplication or explains it", + "Uses STRUCT field access", + "Uses GoogleSQL syntax", + "Mentions performance/cost" + ] + }, + { + "id": 75, + "reference": "references/bigquery.md", + "prompt": "Create BigQuery DDL for a partitioned and clustered events table.", + "expected_output": "A response that routes to references/bigquery.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to BigQuery dialect", + "Uses PARTITION BY", + "Uses CLUSTER BY", + "Chooses partition field deliberately", + "Includes require_partition_filter if appropriate", + "Uses BigQuery types" + ] + }, + { + "id": 76, + "reference": "references/bigquery.md", + "prompt": "Write a BigQuery parameterized query filtering events between @start_date and @end_date.", + "expected_output": "A response that routes to references/bigquery.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to BigQuery dialect", + "Uses named parameters", + "Filters partition/date range early", + "Uses BigQuery date syntax", + "Selects needed columns only", + "Mentions cost control" + ] + }, + { + "id": 77, + "reference": "references/bigquery.md", + "prompt": "Write BigQuery SQL to deduplicate rows by event_id keeping latest ingested_at.", + "expected_output": "A response that routes to references/bigquery.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to BigQuery dialect", + "Uses QUALIFY or ROW_NUMBER", + "Partitions by event_id", + "Orders by ingested_at DESC", + "Uses GoogleSQL syntax", + "Mentions validation of duplicate counts" + ] + }, + { + "id": 78, + "reference": "references/bigquery.md", + "prompt": "Aggregate BigQuery ecommerce revenue by country from nested line_items.", + "expected_output": "A response that routes to references/bigquery.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to BigQuery dialect", + "Uses UNNEST for line_items", + "Aggregates revenue correctly", + "Avoids fanout mistakes", + "Uses NUMERIC-safe math", + "Mentions row grain" + ] + }, + { + "id": 79, + "reference": "references/bigquery.md", + "prompt": "Use BigQuery INFORMATION_SCHEMA to list partitioned tables in a dataset.", + "expected_output": "A response that routes to references/bigquery.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to BigQuery dialect", + "Uses INFORMATION_SCHEMA", + "Uses project/dataset-qualified metadata path", + "Selects relevant partition metadata", + "Avoids non-BigQuery catalog syntax", + "Mentions permissions/region caveat if useful" + ] + }, + { + "id": 80, + "reference": "references/bigquery.md", + "prompt": "Write a BigQuery query using ARRAY_AGG to return top 3 products per category.", + "expected_output": "A response that routes to references/bigquery.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to BigQuery dialect", + "Uses ARRAY_AGG with ORDER BY LIMIT", + "Groups by category", + "Uses STRUCT if returning multiple fields", + "Uses GoogleSQL syntax", + "Avoids unsupported syntax" + ] + }, + { + "id": 81, + "reference": "references/snowflake.md", + "prompt": "Write Snowflake SQL to flatten events from a VARIANT payload and count by event_name.", + "expected_output": "A response that routes to references/snowflake.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to Snowflake dialect", + "Uses LATERAL FLATTEN", + "Uses VARIANT path access", + "Casts values explicitly", + "Aggregates by event_name", + "Uses Snowflake syntax" + ] + }, + { + "id": 82, + "reference": "references/snowflake.md", + "prompt": "Create a Snowflake task and stream pattern to incrementally process raw_events into fact_events.", + "expected_output": "A response that routes to references/snowflake.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to Snowflake dialect", + "Mentions streams and tasks", + "Defines incremental processing", + "Uses Snowflake-compatible DDL", + "Mentions scheduling/warehouse assumptions", + "Includes validation or idempotency notes" + ] + }, + { + "id": 83, + "reference": "references/snowflake.md", + "prompt": "Write Snowflake SQL using time travel to recover rows from customers as of 2 hours ago.", + "expected_output": "A response that routes to references/snowflake.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to Snowflake dialect", + "Uses AT or BEFORE time travel syntax", + "Explains retention assumption", + "Shows recovery/select pattern", + "Avoids non-Snowflake syntax", + "Mentions safety before overwrite" + ] + }, + { + "id": 84, + "reference": "references/snowflake.md", + "prompt": "Create Snowflake DDL for a table with VARIANT metadata and clustering by tenant_id, event_date.", + "expected_output": "A response that routes to references/snowflake.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to Snowflake dialect", + "Uses VARIANT type", + "Uses CLUSTER BY where justified", + "Uses Snowflake timestamp/date types", + "Avoids overpromising clustering benefit", + "Defines columns clearly" + ] + }, + { + "id": 85, + "reference": "references/snowflake.md", + "prompt": "Write Snowflake SQL to parse payload:user.id and payload:amount into typed columns.", + "expected_output": "A response that routes to references/snowflake.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to Snowflake dialect", + "Uses colon path access", + "Casts values explicitly", + "Handles missing values if relevant", + "Uses valid Snowflake syntax", + "Explains type choices" + ] + }, + { + "id": 86, + "reference": "references/snowflake.md", + "prompt": "Optimize a Snowflake query scanning raw_events by event_date and tenant_id.", + "expected_output": "A response that routes to references/snowflake.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to Snowflake dialect", + "Mentions pruning/clustering", + "Filters early", + "Selects needed columns", + "Avoids generic OLTP indexes", + "Uses query profile/warehouse cost notes" + ] + }, + { + "id": 87, + "reference": "references/snowflake.md", + "prompt": "Write Snowflake SQL to create a transient staging table from raw JSON events.", + "expected_output": "A response that routes to references/snowflake.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to Snowflake dialect", + "Uses TRANSIENT table syntax", + "Uses VARIANT or typed columns appropriately", + "Explains retention/fail-safe tradeoff", + "Uses Snowflake syntax", + "Includes validation" + ] + }, + { + "id": 88, + "reference": "references/snowflake.md", + "prompt": "Write Snowflake SQL with QUALIFY to keep the latest subscription status per account.", + "expected_output": "A response that routes to references/snowflake.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to Snowflake dialect", + "Uses QUALIFY with ROW_NUMBER", + "Partitions by account", + "Orders by status timestamp", + "Uses valid syntax", + "States row grain" + ] + }, + { + "id": 89, + "reference": "references/clickhouse.md", + "prompt": "Create a ClickHouse MergeTree table for events with occurred_at, tenant_id, user_id, and event_name.", + "expected_output": "A response that routes to references/clickhouse.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to ClickHouse dialect", + "Uses MergeTree engine", + "Defines PARTITION BY", + "Defines ORDER BY key", + "Uses LowCardinality where useful", + "Explains sparse primary key behavior" + ] + }, + { + "id": 90, + "reference": "references/clickhouse.md", + "prompt": "Write a ClickHouse materialized view for daily event counts by tenant and event_name.", + "expected_output": "A response that routes to references/clickhouse.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to ClickHouse dialect", + "Uses materialized view syntax", + "Uses aggregate target table or SummingMergeTree pattern", + "Defines daily grain", + "Uses ClickHouse date functions", + "Mentions backfill caveat" + ] + }, + { + "id": 91, + "reference": "references/clickhouse.md", + "prompt": "Write a ClickHouse query for top 10 pages by unique users in the last 7 days.", + "expected_output": "A response that routes to references/clickhouse.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to ClickHouse dialect", + "Uses ClickHouse aggregate functions", + "Filters time range", + "Uses LIMIT", + "Uses appropriate date/time syntax", + "Mentions partition/order pruning" + ] + }, + { + "id": 92, + "reference": "references/clickhouse.md", + "prompt": "Explain why ClickHouse primary key is not a uniqueness constraint and design around it.", + "expected_output": "A response that routes to references/clickhouse.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to ClickHouse dialect", + "States primary key is sparse index", + "Does not claim uniqueness enforcement", + "Suggests dedupe/versioning pattern if needed", + "Mentions ORDER BY role", + "Uses ClickHouse terminology" + ] + }, + { + "id": 93, + "reference": "references/clickhouse.md", + "prompt": "Create a ClickHouse ReplacingMergeTree table for user profile snapshots.", + "expected_output": "A response that routes to references/clickhouse.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to ClickHouse dialect", + "Uses ReplacingMergeTree", + "Defines version column", + "Defines ORDER BY key", + "Explains eventual dedupe behavior", + "Mentions FINAL tradeoff if relevant" + ] + }, + { + "id": 94, + "reference": "references/clickhouse.md", + "prompt": "Write a ClickHouse query using LowCardinality event_name and group by event_name.", + "expected_output": "A response that routes to references/clickhouse.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to ClickHouse dialect", + "Uses LowCardinality context correctly", + "Uses valid GROUP BY", + "Mentions dictionary/cardinality benefit", + "Avoids OLTP index language", + "Uses ClickHouse syntax" + ] + }, + { + "id": 95, + "reference": "references/clickhouse.md", + "prompt": "Design ClickHouse monthly partitions for a high-volume metrics table and explain partition cardinality.", + "expected_output": "A response that routes to references/clickhouse.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to ClickHouse dialect", + "Uses coarse partitioning", + "Avoids high-cardinality partitions", + "Defines ORDER BY", + "Explains pruning/maintenance", + "Uses MergeTree assumptions" + ] + }, + { + "id": 96, + "reference": "references/clickhouse.md", + "prompt": "Write a ClickHouse query to calculate p95 latency by service over the last hour.", + "expected_output": "A response that routes to references/clickhouse.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to ClickHouse dialect", + "Uses quantile or quantileExact appropriately", + "Filters last hour", + "Groups by service", + "Uses ClickHouse interval syntax", + "Mentions approximate/exact tradeoff" + ] + }, + { + "id": 97, + "reference": "references/cockroachdb.md", + "prompt": "Create CockroachDB DDL for accounts with UUID primary keys and unique tenant/email.", + "expected_output": "A response that routes to references/cockroachdb.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to CockroachDB dialect", + "Uses Cockroach-compatible types", + "Uses gen_random_uuid or compatible default", + "Defines unique tenant/email constraint", + "Avoids serial hot spot", + "Mentions distributed SQL behavior" + ] + }, + { + "id": 98, + "reference": "references/cockroachdb.md", + "prompt": "Write a CockroachDB transaction retry pattern for transferring funds between accounts.", + "expected_output": "A response that routes to references/cockroachdb.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to CockroachDB dialect", + "Mentions serializable isolation", + "Includes retry guidance", + "Uses explicit transaction", + "Uses parameters", + "Avoids assuming single-node locking behavior" + ] + }, + { + "id": 99, + "reference": "references/cockroachdb.md", + "prompt": "Design CockroachDB regional tables for users and audit_events across US and EU.", + "expected_output": "A response that routes to references/cockroachdb.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to CockroachDB dialect", + "Mentions regional/locality concepts", + "Defines tables/regions at appropriate level", + "Explains data residency assumption", + "Avoids generic PostgreSQL-only answer", + "Mentions follower reads if relevant" + ] + }, + { + "id": 100, + "reference": "references/cockroachdb.md", + "prompt": "Write a CockroachDB follower read query for historical reporting where stale reads are acceptable.", + "expected_output": "A response that routes to references/cockroachdb.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to CockroachDB dialect", + "Uses follower read/staleness syntax concept", + "States staleness tradeoff", + "Applies to read-only reporting", + "Avoids using for fresh writes", + "Uses Cockroach terminology" + ] + }, + { + "id": 101, + "reference": "references/cockroachdb.md", + "prompt": "Optimize CockroachDB schema that uses monotonically increasing integer primary keys on a write-heavy events table.", + "expected_output": "A response that routes to references/cockroachdb.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to CockroachDB dialect", + "Identifies hot range risk", + "Recommends UUID/hash/sharded key alternative", + "Explains distributed write tradeoff", + "Mentions secondary index costs", + "Avoids generic Postgres advice only" + ] + }, + { + "id": 102, + "reference": "references/cockroachdb.md", + "prompt": "Write CockroachDB-compatible UPSERT for inventory by sku and warehouse_id.", + "expected_output": "A response that routes to references/cockroachdb.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to CockroachDB dialect", + "Uses CockroachDB UPSERT or INSERT ON CONFLICT if appropriate", + "States uniqueness requirement", + "Uses parameters", + "Mentions transaction retry if relevant", + "Avoids MySQL syntax" + ] + }, + { + "id": 103, + "reference": "references/cockroachdb.md", + "prompt": "Create CockroachDB indexes for tenant-scoped orders filtered by tenant_id, status, created_at.", + "expected_output": "A response that routes to references/cockroachdb.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to CockroachDB dialect", + "Recommends composite index order", + "Considers tenant scope", + "Mentions distributed secondary index cost", + "Uses Cockroach-compatible DDL", + "Ties index to query pattern" + ] + }, + { + "id": 104, + "reference": "references/cockroachdb.md", + "prompt": "Explain PostgreSQL extension compatibility risk when porting uuid-ossp schema to CockroachDB.", + "expected_output": "A response that routes to references/cockroachdb.md and produces correct, executable database guidance for the requested scenario.", + "files": [], + "expectations": [ + "Routes to CockroachDB dialect", + "States CockroachDB is PostgreSQL-like not identical", + "Mentions extension/function compatibility", + "Suggests gen_random_uuid or compatible alternative", + "Identifies migration changes", + "Avoids assuming all Postgres extensions work" + ] + } + ] +} diff --git a/.agents/skills/codegen-database/references/analytics.md b/.agents/skills/codegen-database/references/analytics.md new file mode 100644 index 0000000..5192a61 --- /dev/null +++ b/.agents/skills/codegen-database/references/analytics.md @@ -0,0 +1,44 @@ +# Analytics SQL Reference + +Write warehouse-oriented SQL for metrics, cohorts, funnels, retention, denormalized reporting tables, and transformation models. + +## Analytics Rules + +**Make metric SQL auditable by naming grain, time rules, and validation checks.** + +- **Define the grain:** State the row grain for every result or model, such as one row per user per day or one row per order item. +- **Separate facts and dimensions:** Keep event/fact measures separate from descriptive dimensions unless a denormalized output is explicitly requested. +- **Use stable time windows:** Make timezone, date truncation, inclusive/exclusive boundaries, and late-arriving data assumptions explicit. +- **Avoid silent fanout:** Pre-aggregate before joining one-to-many relationships when metrics can multiply. +- **Prefer named CTEs:** Use CTEs to make metric definitions auditable, then collapse only if the target engine needs it for performance. +- **Partition and cluster:** Tie partitioning, clustering, or sort keys to common filters and joins. +- **Validate metrics:** Include sanity checks such as row counts, distinct keys, duplicate detection, and null-rate checks for important dimensions. + +## Output Shape + +**Return code and the minimum context needed to trust the metric.** + +Return the query or model definition, followed by short notes for grain, assumptions, performance, and validation. + +```sql +WITH base_events AS ( + SELECT + user_id, + event_name, + occurred_at + FROM events + WHERE occurred_at >= :start_at + AND occurred_at < :end_at +) +SELECT + DATE_TRUNC('day', occurred_at) AS event_day, + COUNT(DISTINCT user_id) AS active_users +FROM base_events +GROUP BY 1; +``` + +## Warehouse Routing + +**Load engine-specific syntax only when the target warehouse is known.** + +Use dialect references for engine-specific details when present. If no warehouse-specific reference exists, keep SQL ANSI-oriented and flag any syntax assumptions. diff --git a/.agents/skills/codegen-database/references/bigquery.md b/.agents/skills/codegen-database/references/bigquery.md new file mode 100644 index 0000000..05e61bf --- /dev/null +++ b/.agents/skills/codegen-database/references/bigquery.md @@ -0,0 +1,28 @@ +# BigQuery Reference + +Use GoogleSQL syntax for BigQuery warehouse SQL. + +## Key Patterns + +**Write BigQuery SQL with cost, partition pruning, and nested data semantics in mind.** + +- **Types:** Use `ARRAY`, `STRUCT`, `JSON`, `NUMERIC`, `BIGNUMERIC`, `TIMESTAMP`, and `DATETIME` intentionally. Prefer `TIMESTAMP` for absolute instants. +- **Nested data:** Use `UNNEST` with aliases and guard against row multiplication. +- **Partitioning:** Partition large tables by ingestion time or business date. Use `require_partition_filter = TRUE` where appropriate. +- **Clustering:** Cluster on high-cardinality filter or join keys used after partition pruning. +- **Cost control:** Select only needed columns, filter partitions early, and avoid repeated scans of large CTEs when materialization would be cheaper. +- **Parameters:** Use named parameters like `@start_date`. + +## Example + +**Use GoogleSQL syntax and project-qualified tables when appropriate.** + +```sql +SELECT + DATE(event_timestamp) AS event_date, + COUNT(DISTINCT user_id) AS active_users +FROM `project.dataset.events` +WHERE DATE(event_timestamp) BETWEEN @start_date AND @end_date +GROUP BY event_date +ORDER BY event_date; +``` diff --git a/.agents/skills/codegen-database/references/clickhouse.md b/.agents/skills/codegen-database/references/clickhouse.md new file mode 100644 index 0000000..64a8898 --- /dev/null +++ b/.agents/skills/codegen-database/references/clickhouse.md @@ -0,0 +1,30 @@ +# ClickHouse Reference + +Use ClickHouse patterns for high-volume analytical tables and queries. + +## Key Patterns + +**Design ClickHouse tables around scan pruning, append-heavy writes, and explicit aggregation.** + +- **Engines:** Choose `MergeTree` family engines deliberately. State the `ORDER BY` key because it drives data skipping and query performance. +- **Partitioning:** Partition by coarse time windows or stable lifecycle boundaries. Avoid high-cardinality partitions. +- **Materialized views:** Use materialized views for incremental aggregation when raw-event scans are too expensive. +- **Primary key:** In ClickHouse, primary key is sparse index metadata, not a uniqueness guarantee. +- **Types:** Prefer concrete numeric and datetime types; use `LowCardinality(String)` for repeated low-cardinality text. +- **Mutations:** Treat updates and deletes as expensive asynchronous mutations. + +## Example + +**Choose partition and order keys from the most common query filters.** + +```sql +CREATE TABLE events +( + occurred_at DateTime, + user_id UUID, + event_name LowCardinality(String) +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(occurred_at) +ORDER BY (event_name, occurred_at, user_id); +``` diff --git a/.agents/skills/codegen-database/references/cockroachdb.md b/.agents/skills/codegen-database/references/cockroachdb.md new file mode 100644 index 0000000..2c2ffda --- /dev/null +++ b/.agents/skills/codegen-database/references/cockroachdb.md @@ -0,0 +1,28 @@ +# CockroachDB Reference + +Use CockroachDB-compatible distributed SQL and call out behavior that differs from single-node PostgreSQL. + +## Key Patterns + +**Treat CockroachDB as distributed SQL with PostgreSQL-like syntax, not as a PostgreSQL clone.** + +- **Postgres-like, not Postgres:** CockroachDB supports much PostgreSQL syntax, but extensions, functions, locking behavior, and some types differ. +- **Primary keys:** Prefer keys that avoid hot ranges. Random UUIDs are often safer than monotonic keys for write-heavy tables. +- **Transactions:** Expect serializable isolation. Add retry guidance for transaction conflicts. +- **Regional data:** Use regional tables, locality, and follower reads when geo-distribution is part of the requirement. +- **Indexes:** Use secondary indexes carefully because distributed writes pay extra coordination cost. +- **Sequences:** Avoid sequence-heavy designs for high-scale distributed writes unless the tradeoff is acceptable. + +## Example + +**Prefer keys and constraints that avoid distributed hot spots.** + +```sql +CREATE TABLE accounts ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + tenant_id UUID NOT NULL, + email STRING NOT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + CONSTRAINT accounts_tenant_email_uq UNIQUE (tenant_id, email) +); +``` diff --git a/.agents/skills/writer-sql/references/common.md b/.agents/skills/codegen-database/references/common.md similarity index 60% rename from .agents/skills/writer-sql/references/common.md rename to .agents/skills/codegen-database/references/common.md index f89fa16..606bbd0 100644 --- a/.agents/skills/writer-sql/references/common.md +++ b/.agents/skills/codegen-database/references/common.md @@ -1,9 +1,11 @@ -# DML & Query Writing Reference +# OLTP SQL Reference Write **production-quality SQL** for OLTP databases: queries, DDL, stored procedures, views, transactions, and optimization. ## Variant detection +**Detect dialect from context before choosing syntax.** + Identify the dialect from context. Check in this order: 1. Explicit mention ("postgres", "mysql", "sqlite", etc.) @@ -19,9 +21,15 @@ Once identified, load the dialect-specific reference for syntax details: - **MSSQL / SQL Server** → read `references/mssql.md` - **SQLite** → read `references/sqlite.md` - **Oracle** → read `references/oracle.md` +- **BigQuery** → read `references/bigquery.md` for warehouse SQL, usually with `references/analytics.md` +- **Snowflake** → read `references/snowflake.md` for warehouse SQL, usually with `references/analytics.md` +- **ClickHouse** → read `references/clickhouse.md` for analytical tables and queries, usually with `references/analytics.md` +- **CockroachDB** → read `references/cockroachdb.md` for distributed SQL and PostgreSQL-like syntax caveats ## SQL quality standards +**Write readable, parameterized SQL with explicit assumptions.** + ### Formatting ```sql @@ -72,9 +80,9 @@ FROM active_users u ### NULL handling -- Use `IS NULL` / `IS NOT NULL`, never `= NULL` -- Use `COALESCE` for defaults, explain the semantic choice -- Document nullable columns in comments +- **Null predicates:** Use `IS NULL` / `IS NOT NULL`, never `= NULL`. +- **Defaults:** Use `COALESCE` for defaults and explain the semantic choice. +- **Nullable columns:** Document nullable columns in comments. ### Transactions @@ -112,23 +120,27 @@ After writing a query, note which indexes it relies on: ## Common patterns by dialect +**Load dialect details only when syntax or behavior differs.** + Read the relevant `references/.md` file for: -- Type system quirks (e.g., Postgres JSONB, MySQL ENUM pitfalls, SQLite type affinity) -- Pagination idioms (OFFSET vs keyset) -- EXPLAIN / execution plan syntax -- Full-text search capabilities -- Date/time functions -- Upsert syntax (`ON CONFLICT`, `ON DUPLICATE KEY UPDATE`, `MERGE`) +- **Type system:** Type system quirks, such as Postgres JSONB, MySQL ENUM pitfalls, and SQLite type affinity. +- **Pagination:** Pagination idioms, especially OFFSET versus keyset. +- **Plans:** EXPLAIN and execution-plan syntax. +- **Search:** Full-text search capabilities. +- **Time:** Date/time functions. +- **Upsert:** Upsert syntax such as `ON CONFLICT`, `ON DUPLICATE KEY UPDATE`, and `MERGE`. ## Query optimization checklist +**Optimize from access patterns and execution plans, not guesses.** + When asked to optimize a query, check: -- [ ] Are all JOIN columns indexed? -- [ ] Is there an index on all WHERE clause columns used for filtering (not full-table-scan)? -- [ ] Are there unnecessary subqueries that could be CTEs or JOINs? -- [ ] Is `SELECT *` used where specific columns would suffice? -- [ ] For pagination: is OFFSET used on large tables? (switch to keyset if so) -- [ ] Is `LIKE '%value%'` used? (leading wildcard prevents index use — consider full-text search) -- [ ] Are there implicit type casts in WHERE clauses causing index skips? +- **Rule:** [ ] Are all JOIN columns indexed? +- **Rule:** [ ] Is there an index on all WHERE clause columns used for filtering (not full-table-scan)? +- **Rule:** [ ] Are there unnecessary subqueries that could be CTEs or JOINs? +- **Rule:** [ ] Is `SELECT *` used where specific columns would suffice? +- **Rule:** [ ] For pagination: is OFFSET used on large tables? (switch to keyset if so) +- **Rule:** [ ] Is `LIKE '%value%'` used? (leading wildcard prevents index use — consider full-text search) +- **Rule:** [ ] Are there implicit type casts in WHERE clauses causing index skips? diff --git a/.agents/skills/codegen-database/references/migration.md b/.agents/skills/codegen-database/references/migration.md new file mode 100644 index 0000000..944d29b --- /dev/null +++ b/.agents/skills/codegen-database/references/migration.md @@ -0,0 +1,62 @@ +# Migration Reference + +Produce database migrations that are safe to run, clear to review, and aligned with the repository's migration tool. + +## Detect the Migration Tool + +**Use the repository's migration tool instead of inventing a new format.** + +Identify tooling from file names, folders, dependencies, or framework conventions: + +| Signal | Tooling | +| --- | --- | +| `V1__name.sql`, `flyway.conf` | Flyway | +| `db/changelog`, XML/YAML change sets | Liquibase | +| `versions/*.py`, `alembic.ini` | Alembic | +| `db/migrate/*.rb` | Rails / ActiveRecord | +| `prisma/migrations`, `schema.prisma` | Prisma | +| `migrations/*.sql`, `up.sql`, `down.sql` | SQL-first migration tool | + +Follow the existing repository style before introducing a new migration shape. + +## Safety Rules + +**Prefer staged, reversible changes that avoid surprise locks and data loss.** + +- **Transactional DDL:** Use transaction-wrapped migrations when the dialect and tool support it. Note exceptions such as PostgreSQL `CREATE INDEX CONCURRENTLY`. +- **Reversibility:** Provide a down migration when the tool expects one. If data loss prevents a true rollback, state the forward-fix path. +- **Expand and contract:** For production schema changes, add nullable columns or compatibility structures first, backfill safely, then enforce constraints or remove old columns in a later migration. +- **Backfills:** Batch large updates, make them idempotent, and avoid long exclusive locks. +- **Indexes:** For large PostgreSQL tables, prefer `CREATE INDEX CONCURRENTLY` outside a transaction. For other dialects, call out lock behavior where relevant. +- **Constraints:** Add constraints after data is valid. For PostgreSQL, consider `NOT VALID` plus later validation for large tables. +- **Destructive changes:** Do not drop columns, tables, or data without making the risk explicit and offering a safer staged alternative. + +## Output Shape + +**Match the project's migration format and make rollback behavior explicit.** + +For SQL-first migrations, produce: + +```sql +-- Up +BEGIN; + +-- schema changes + +COMMIT; + +-- Down +BEGIN; + +-- rollback changes + +COMMIT; +``` + +For framework migrations, use the framework's existing class/function style and include validation notes for generated SQL when the ORM hides important DDL details. + +## Verification + +**Check both schema application and rollback whenever the project supports it.** + +Recommend the narrowest useful checks: migration dry-run, schema diff, rollback test, application tests that touch the changed table, and query-plan review for new indexes. diff --git a/.agents/skills/writer-sql/references/mssql.md b/.agents/skills/codegen-database/references/mssql.md similarity index 65% rename from .agents/skills/writer-sql/references/mssql.md rename to .agents/skills/codegen-database/references/mssql.md index b79c70c..6bd50f2 100644 --- a/.agents/skills/writer-sql/references/mssql.md +++ b/.agents/skills/codegen-database/references/mssql.md @@ -2,17 +2,21 @@ ## Key types -- `UNIQUEIDENTIFIER` — UUID/GUID (`NEWID()` or `NEWSEQUENTIALID()`) -- `BIGINT IDENTITY(1,1)` — auto-increment integer PK -- `NVARCHAR(n)` / `NVARCHAR(MAX)` — Unicode strings (use N prefix for literals) -- `DECIMAL(p,s)` — exact decimals -- `DATETIME2` — preferred over `DATETIME` (higher precision, wider range) -- `DATETIMEOFFSET` — timezone-aware timestamps -- `BIT` — boolean (0/1) -- `VARBINARY(MAX)` — binary data +**Use dialect-native types and call out portability tradeoffs.** + +- **Rule:** `UNIQUEIDENTIFIER` — UUID/GUID (`NEWID()` or `NEWSEQUENTIALID()`) +- **Rule:** `BIGINT IDENTITY(1,1)` — auto-increment integer PK +- **Rule:** `NVARCHAR(n)` / `NVARCHAR(MAX)` — Unicode strings (use N prefix for literals) +- **Rule:** `DECIMAL(p,s)` — exact decimals +- **Rule:** `DATETIME2` — preferred over `DATETIME` (higher precision, wider range) +- **Rule:** `DATETIMEOFFSET` — timezone-aware timestamps +- **Rule:** `BIT` — boolean (0/1) +- **Rule:** `VARBINARY(MAX)` — binary data ## T-SQL specific syntax +**Use T-SQL constructs intentionally and keep SQL Server behavior explicit.** + ### Variables and blocks ```sql @@ -89,6 +93,8 @@ SET SHOWPLAN_XML OFF; ## TempDB patterns +**Use temporary objects when they reduce complexity or repeated work.** + ```sql -- Temp table (session-scoped) CREATE TABLE #TempResults ( @@ -103,6 +109,8 @@ DECLARE @Results TABLE (id UNIQUEIDENTIFIER, name NVARCHAR(255)); ## String functions +**Use SQL Server string functions with collation and null behavior in mind.** + ```sql -- Concatenation SELECT CONCAT(first_name, N' ', last_name) AS full_name FROM users; @@ -114,6 +122,8 @@ SELECT value FROM STRING_SPLIT(@csv, ','); ## JSON support (SQL Server 2016+) +**Use SQL Server JSON functions while remembering JSON is stored as text.** + ```sql -- Parse JSON SELECT JSON_VALUE(payload, '$.userId') AS user_id @@ -128,8 +138,10 @@ SELECT * FROM events WHERE ISJSON(payload) = 1; ## Common naming conventions -- Schema-qualify all objects: `dbo.users`, `dbo.orders` -- Stored procedures: `usp_[Description]` -- Views: `vw_[Description]` -- Triggers: `trg_[Table]_[Action]` -- Indexes: `IX_[Table]_[Columns]`, `UX_[Table]_[Columns]` (unique) +**Keep object names schema-qualified and predictable.** + +- **Rule:** Schema-qualify all objects: `dbo.users`, `dbo.orders` +- **Rule:** Stored procedures: `usp_[Description]` +- **Rule:** Views: `vw_[Description]` +- **Rule:** Triggers: `trg_[Table]_[Action]` +- **Rule:** Indexes: `IX_[Table]_[Columns]`, `UX_[Table]_[Columns]` (unique) diff --git a/.agents/skills/codegen-database/references/mysql.md b/.agents/skills/codegen-database/references/mysql.md new file mode 100644 index 0000000..e8b06e3 --- /dev/null +++ b/.agents/skills/codegen-database/references/mysql.md @@ -0,0 +1,131 @@ +# MySQL / MariaDB Reference + +## Key types + +**Use dialect-native types and call out portability tradeoffs.** + +- **Rule:** `BIGINT UNSIGNED AUTO_INCREMENT` — primary keys (or `CHAR(36)` for UUID) +- **Rule:** `VARCHAR(n)` — always specify length (required by MySQL) +- **Rule:** `DECIMAL(p,s)` — exact decimals (not FLOAT for money) +- **Rule:** `DATETIME` / `TIMESTAMP` — TIMESTAMP auto-converts to UTC; DATETIME stores as-is +- **Rule:** `JSON` — native JSON type (MySQL 5.7.8+, MariaDB 10.2+) +- **Rule:** `TEXT` / `MEDIUMTEXT` / `LONGTEXT` — for large text (no indexes without prefix) +- **Rule:** `TINYINT(1)` — booleans (MySQL has no native BOOLEAN, maps to TINYINT) + +## Upsert + +**Use the dialect-native upsert form and state the required uniqueness constraint.** + +```sql +INSERT INTO users (email, name, updated_at) +VALUES (?, ?, NOW()) +ON DUPLICATE KEY UPDATE + name = VALUES(name), + updated_at = NOW(); +``` + +## Auto-increment and UUIDs + +**Choose identifiers based on distribution, ordering, and write patterns.** + +```sql +-- Integer PK (simpler, better performance) +CREATE TABLE users ( + id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, + ... +); + +-- UUID PK (distributed-safe) +CREATE TABLE users ( + id CHAR(36) NOT NULL DEFAULT (UUID()) PRIMARY KEY, + ... +) ENGINE=InnoDB; +``` + +## Storage engines + +**Choose storage engines that preserve transactions and referential integrity.** + +- **Rule:** Always use `ENGINE=InnoDB` (transactions, foreign keys, row-level locking) +- **Rule:** `ENGINE=MyISAM` is legacy — never use for new tables + +## Character set + +**Use Unicode-safe defaults unless the repository already requires otherwise.** + +```sql +-- Database level +CREATE DATABASE mydb CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; + +-- Table level (always explicit) +CREATE TABLE articles ( + ... +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; +``` + +`utf8mb4` is required for emoji and full Unicode support. `utf8` in MySQL is 3-byte only. + +## JSON columns + +**Use JSON columns for flexible attributes while preserving queryable constraints where needed.** + +```sql +CREATE TABLE events ( + id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, + payload JSON NOT NULL, + -- Virtual columns for JSON key indexing: + event_type VARCHAR(50) AS (JSON_UNQUOTE(payload->>'$.type')) STORED, + INDEX idx_events_type (event_type) +); + +-- Query +SELECT * FROM events WHERE JSON_UNQUOTE(payload->>'$.userId') = ?; +``` + +## Full-text search + +**Use built-in full-text search syntax and indexes for search workloads.** + +```sql +-- Index (MyISAM supports this; InnoDB from 5.6+) +ALTER TABLE articles ADD FULLTEXT(title, body); + +-- Query +SELECT *, MATCH(title, body) AGAINST (? IN BOOLEAN MODE) AS score +FROM articles +WHERE MATCH(title, body) AGAINST (? IN BOOLEAN MODE) +ORDER BY score DESC; +``` + +## EXPLAIN + +**Use execution plans to confirm whether indexes and joins behave as expected.** + +```sql +EXPLAIN FORMAT=JSON +SELECT * FROM orders WHERE user_id = ?; +``` + +## Pagination — keyset preferred + +**Use keyset pagination when offsets would scan too much data.** + +```sql +-- Offset (avoid on large tables) +SELECT * FROM orders ORDER BY created_at DESC LIMIT ? OFFSET ?; + +-- Keyset +SELECT * FROM orders +WHERE created_at < ? -- cursor +ORDER BY created_at DESC +LIMIT ?; +``` + +## Common gotchas + +**Call out dialect behavior that commonly changes query results or safety.** + +- **Rule:** `GROUP BY` in MySQL 5.7+ with `ONLY_FULL_GROUP_BY` mode: all non-aggregate SELECT columns must be in GROUP BY +- **Rule:** `ENUM` type: changes to ENUM values require ALTER TABLE (expensive on large tables); prefer VARCHAR + CHECK constraint or a lookup table +- **Rule:** String comparison is case-insensitive by default (depends on collation) +- **Rule:** No `RETURNING` clause — use `LAST_INSERT_ID()` after INSERT diff --git a/.agents/skills/writer-sql/references/oracle.md b/.agents/skills/codegen-database/references/oracle.md similarity index 61% rename from .agents/skills/writer-sql/references/oracle.md rename to .agents/skills/codegen-database/references/oracle.md index f0d1460..7ded319 100644 --- a/.agents/skills/writer-sql/references/oracle.md +++ b/.agents/skills/codegen-database/references/oracle.md @@ -2,17 +2,21 @@ ## Key types -- `NUMBER(p,s)` — exact numeric (both integers and decimals) -- `VARCHAR2(n CHAR)` — variable-length string (prefer CHAR semantics over BYTE) -- `NVARCHAR2(n)` — Unicode strings -- `DATE` — stores date and time (not just date!) -- `TIMESTAMP WITH TIME ZONE` — timezone-aware timestamps (prefer over DATE) -- `CLOB` / `NCLOB` — large text -- `BLOB` — binary data -- `RAW(16)` — UUIDs (or use `VARCHAR2(36)` with string format) +**Use dialect-native types and call out portability tradeoffs.** + +- **Rule:** `NUMBER(p,s)` — exact numeric (both integers and decimals) +- **Rule:** `VARCHAR2(n CHAR)` — variable-length string (prefer CHAR semantics over BYTE) +- **Rule:** `NVARCHAR2(n)` — Unicode strings +- **Rule:** `DATE` — stores date and time (not just date!) +- **Rule:** `TIMESTAMP WITH TIME ZONE` — timezone-aware timestamps (prefer over DATE) +- **Rule:** `CLOB` / `NCLOB` — large text +- **Rule:** `BLOB` — binary data +- **Rule:** `RAW(16)` — UUIDs (or use `VARCHAR2(36)` with string format) ## Sequences and identity +**Use the dialect identity mechanism that fits insert and migration behavior.** + ```sql -- Traditional sequence CREATE SEQUENCE user_seq START WITH 1 INCREMENT BY 1 NOCACHE NOCYCLE; @@ -32,6 +36,8 @@ CREATE TABLE users ( ## PL/SQL procedures +**Keep procedural database code focused, parameterized, and testable.** + ```sql CREATE OR REPLACE PROCEDURE create_user( p_email IN VARCHAR2, @@ -55,6 +61,8 @@ END create_user; ## Dual table +**Use Oracle DUAL only for expression queries that need it.** + Used for expressions without a real table: ```sql @@ -65,6 +73,8 @@ SELECT UPPER('hello') FROM DUAL; ## Upsert (MERGE) +**Use MERGE carefully and state match conditions explicitly.** + ```sql MERGE INTO users tgt USING (SELECT :email AS email, :name AS name FROM DUAL) src @@ -78,6 +88,8 @@ WHEN NOT MATCHED THEN ## Pagination (12c+) +**Use modern row limiting syntax for Oracle 12c and newer.** + ```sql -- Row limiting clause (Oracle 12c+) SELECT id, email, created_at @@ -95,6 +107,8 @@ SELECT * FROM ( ## CTEs +**Use CTEs for readable intermediate result sets.** + ```sql WITH active_users AS ( SELECT id, email FROM users WHERE deleted_at IS NULL @@ -109,6 +123,8 @@ LEFT JOIN order_stats o ON o.user_id = u.id; ## Hints +**Use optimizer hints sparingly and explain why statistics or indexes are insufficient.** + Use hints sparingly — prefer fixing statistics or indexes first: ```sql @@ -120,6 +136,8 @@ SELECT /*+ PARALLEL(o, 4) */ * FROM orders o; ## EXPLAIN PLAN +**Inspect Oracle execution plans before asserting performance improvements.** + ```sql EXPLAIN PLAN FOR SELECT * FROM orders WHERE user_id = :uid; @@ -129,6 +147,8 @@ SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY); ## Partitioning +**Partition only when pruning, maintenance, or lifecycle management benefits are clear.** + ```sql CREATE TABLE orders ( id RAW(16) DEFAULT SYS_GUID(), @@ -142,8 +162,10 @@ INTERVAL (NUMTOYMINTERVAL(1, 'MONTH')) ## Common gotchas -- Empty string `''` equals NULL in Oracle — there is no distinction -- `VARCHAR2` max is 32767 bytes in PL/SQL, 4000 bytes in SQL (use CLOB beyond that) -- Date arithmetic: `SYSDATE + 1` adds 1 day; `SYSDATE + 1/24` adds 1 hour -- NVL vs COALESCE: both work; COALESCE is ANSI-standard -- String concatenation: use `||` operator +**Call out dialect behavior that commonly changes query results or safety.** + +- **Rule:** Empty string `''` equals NULL in Oracle — there is no distinction +- **Rule:** `VARCHAR2` max is 32767 bytes in PL/SQL, 4000 bytes in SQL (use CLOB beyond that) +- **Rule:** Date arithmetic: `SYSDATE + 1` adds 1 day; `SYSDATE + 1/24` adds 1 hour +- **Rule:** NVL vs COALESCE: both work; COALESCE is ANSI-standard +- **Rule:** String concatenation: use `||` operator diff --git a/.agents/skills/writer-sql/references/postgres.md b/.agents/skills/codegen-database/references/postgres.md similarity index 64% rename from .agents/skills/writer-sql/references/postgres.md rename to .agents/skills/codegen-database/references/postgres.md index 08f12e5..1fa4270 100644 --- a/.agents/skills/writer-sql/references/postgres.md +++ b/.agents/skills/codegen-database/references/postgres.md @@ -2,16 +2,20 @@ ## Key types -- `UUID` — primary/foreign keys (use `uuid_generate_v4()` or `gen_random_uuid()` in PG 13+) -- `TEXT` — variable-length strings (no arbitrary VARCHAR limits) -- `NUMERIC(p,s)` — exact decimal (money, quantities) -- `TIMESTAMPTZ` — always use timezone-aware timestamps -- `JSONB` — structured JSON data with indexing support -- `BOOLEAN` — true/false (not 0/1) -- `BIGINT` — for counts and large IDs when UUID overhead matters +**Use dialect-native types and call out portability tradeoffs.** + +- **Rule:** `UUID` — primary/foreign keys (use `uuid_generate_v4()` or `gen_random_uuid()` in PG 13+) +- **Rule:** `TEXT` — variable-length strings (no arbitrary VARCHAR limits) +- **Rule:** `NUMERIC(p,s)` — exact decimal (money, quantities) +- **Rule:** `TIMESTAMPTZ` — always use timezone-aware timestamps +- **Rule:** `JSONB` — structured JSON data with indexing support +- **Rule:** `BOOLEAN` — true/false (not 0/1) +- **Rule:** `BIGINT` — for counts and large IDs when UUID overhead matters ## Upsert +**Use the dialect-native upsert form and state the required uniqueness constraint.** + ```sql INSERT INTO users (email, name) VALUES ($1, $2) @@ -24,6 +28,8 @@ RETURNING *; ## CTEs with modification +**Use writable CTEs only when they clarify transactional data flow.** + ```sql WITH inserted AS ( INSERT INTO audit_log (action, table_name, record_id) @@ -38,6 +44,8 @@ WHERE u.id = $1; ## JSONB queries +**Use JSONB operators with indexes that match the access pattern.** + ```sql -- Index JSONB key CREATE INDEX idx_users_metadata_role ON users ((metadata->>'role')); @@ -55,6 +63,8 @@ WHERE id = $1; ## Full-text search +**Use built-in full-text search syntax and indexes for search workloads.** + ```sql -- Index CREATE INDEX idx_articles_search ON articles @@ -69,6 +79,8 @@ ORDER BY rank DESC; ## Pagination +**Prefer keyset pagination for large or user-facing result sets.** + ```sql -- Offset (simple, degrades on large tables) SELECT * FROM orders ORDER BY created_at DESC LIMIT $1 OFFSET $2; @@ -82,6 +94,8 @@ LIMIT $2; ## EXPLAIN ANALYZE +**Validate performance claims with the dialect execution-plan tool.** + ```sql EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT) SELECT * FROM orders WHERE user_id = $1; @@ -89,6 +103,8 @@ SELECT * FROM orders WHERE user_id = $1; ## Window functions +**Use window functions for row-relative calculations without collapsing result grain.** + ```sql SELECT user_id, @@ -101,6 +117,8 @@ FROM orders; ## Array operations +**Use arrays for bounded multi-value attributes, not hidden relationships.** + ```sql -- Array column CREATE TABLE tags (id UUID PRIMARY KEY, post_id UUID, names TEXT[]); @@ -111,6 +129,8 @@ SELECT * FROM tags WHERE names @> ARRAY['postgresql', 'sql']; ## Constraint naming convention +**Name constraints consistently so errors and migrations stay readable.** + ```sql CONSTRAINT [table]_[col]_[type] -- Examples: diff --git a/.agents/skills/writer-sql/references/design.md b/.agents/skills/codegen-database/references/schema-design.md similarity index 68% rename from .agents/skills/writer-sql/references/design.md rename to .agents/skills/codegen-database/references/schema-design.md index 8d33205..15b9471 100644 --- a/.agents/skills/writer-sql/references/design.md +++ b/.agents/skills/codegen-database/references/schema-design.md @@ -1,13 +1,17 @@ -# Design Database Reference +# Schema Design Reference Produce a **normalized relational database schema** with DDL, relationship documentation, and design rationale. ## What makes a great schema +**Encode business rules in the schema without overcomplicating common queries.** + A good schema encodes business rules structurally so they can't be violated at the application layer. It anticipates common query patterns and pre-optimizes with the right indexes. It's normalized enough to avoid update anomalies, but not over-normalized to the point of making every query a 6-way join. ## Detect the SQL dialect +**Choose schema syntax from the target engine before writing DDL.** + Identify the target database from context: | Signal | Dialect | @@ -17,10 +21,14 @@ Identify the target database from context: | "sqlite", mobile app, embedded | SQLite | | "mssql", "sql server", T-SQL | MSSQL | | "oracle" | Oracle | -| Ambiguous / not mentioned | Default to PostgreSQL; note the assumption | +| "bigquery", "snowflake", "clickhouse", warehouse terminology | Warehouse / analytics dialect | +| "cockroachdb", distributed SQL, regional tables | CockroachDB | +| Ambiguous / not mentioned | Default to PostgreSQL for OLTP schemas; note the assumption | ## Information gathering +**Extract the domain facts that affect tables, relationships, constraints, and indexes.** + Extract: - **Domain** and core entities (e.g., e-commerce: products, orders, customers) @@ -32,11 +40,15 @@ Extract: ## Output format +**Produce schema output in a reviewable order from overview to executable DDL.** + ### Part 1: Entity-Relationship Summary ``` ## Entity-Relationship Overview +**Summarize entities and relationships before showing DDL.** + ### Entities - **[Entity]** — [1-line description] @@ -47,7 +59,7 @@ Extract: ### Part 2: DDL -Produce `CREATE TABLE` statements in dependency order (referenced tables first). +Produce `CREATE TABLE` statements in dependency order (referenced tables first). For warehouse schemas, define grain, partitioning, clustering or sort keys, and load/update assumptions instead of forcing OLTP normalization. **PostgreSQL template:** @@ -124,43 +136,45 @@ Document key choices: **Naming:** -- Table names: `snake_case`, plural (`orders`, `order_items`) -- Column names: `snake_case`, singular -- FK columns: `[referenced_table_singular]_id` (e.g., `user_id`, `order_id`) -- Constraint names: `[table]_[description]_[type]` (e.g., `orders_status_chk`, `users_email_uq`) -- Index names: `[table]_[col(s)]_idx` +- **Rule:** Table names: `snake_case`, plural (`orders`, `order_items`) +- **Rule:** Column names: `snake_case`, singular +- **Rule:** FK columns: `[referenced_table_singular]_id` (e.g., `user_id`, `order_id`) +- **Rule:** Constraint names: `[table]_[description]_[type]` (e.g., `orders_status_chk`, `users_email_uq`) +- **Rule:** Index names: `[table]_[col(s)]_idx` **Type selection:** -- IDs: `UUID` (with `uuid_generate_v4()` default) unless high-volume serial is needed -- Text: `TEXT` for variable length (no magic VARCHAR lengths unless there's a business rule) -- Money: `NUMERIC(15,4)` — never `FLOAT` -- Status/enum: `TEXT` with a CHECK constraint or a proper `ENUM` type, noted -- Timestamps: `TIMESTAMPTZ` (always timezone-aware) +- **Rule:** IDs: `UUID` (with `uuid_generate_v4()` default) unless high-volume serial is needed +- **Rule:** Text: `TEXT` for variable length (no magic VARCHAR lengths unless there's a business rule) +- **Rule:** Money: `NUMERIC(15,4)` — never `FLOAT` +- **Rule:** Status/enum: `TEXT` with a CHECK constraint or a proper `ENUM` type, noted +- **Rule:** Timestamps: `TIMESTAMPTZ` (always timezone-aware) **Standard columns on every table:** -- `id UUID PRIMARY KEY` -- `created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()` -- `updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()` -- `deleted_at TIMESTAMPTZ` — only if soft delete is appropriate +- **Rule:** `id UUID PRIMARY KEY` +- **Rule:** `created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()` +- **Rule:** `updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()` +- **Rule:** `deleted_at TIMESTAMPTZ` — only if soft delete is appropriate **Referential integrity:** -- All FKs should be explicit with ON DELETE behavior stated -- CASCADE: child rows are meaningless without the parent -- SET NULL: child rows can exist without the parent (nullable FK) -- RESTRICT: prevent deletion if children exist (default safe choice) +- **Rule:** All FKs should be explicit with ON DELETE behavior stated +- **Rule:** CASCADE: child rows are meaningless without the parent +- **Rule:** SET NULL: child rows can exist without the parent (nullable FK) +- **Rule:** RESTRICT: prevent deletion if children exist (default safe choice) **Indexing strategy:** -- Index every FK column (Postgres doesn't do this automatically) -- Index columns that appear frequently in WHERE clauses on large tables -- Partial indexes for soft-delete patterns: `WHERE deleted_at IS NULL` -- Avoid over-indexing write-heavy tables +- **Rule:** Index every FK column (Postgres doesn't do this automatically) +- **Rule:** Index columns that appear frequently in WHERE clauses on large tables +- **Rule:** Partial indexes for soft-delete patterns: `WHERE deleted_at IS NULL` +- **Rule:** Avoid over-indexing write-heavy tables ## Scale / special patterns +**Add scale patterns only when the requirements justify them.** + Add these sections only if relevant: **Multi-tenancy:** diff --git a/.agents/skills/codegen-database/references/snowflake.md b/.agents/skills/codegen-database/references/snowflake.md new file mode 100644 index 0000000..f9de2a7 --- /dev/null +++ b/.agents/skills/codegen-database/references/snowflake.md @@ -0,0 +1,27 @@ +# Snowflake Reference + +Use Snowflake SQL for warehouse transformations, semi-structured data, tasks, and analytical queries. + +## Key Patterns + +**Write Snowflake SQL with semi-structured data and warehouse cost behavior in mind.** + +- **Semi-structured data:** Use `VARIANT`, `OBJECT`, `ARRAY`, `:` path access, and `LATERAL FLATTEN` for nested values. +- **Time travel:** Mention retention and recovery implications when changing or replacing tables. +- **Clustering:** Recommend clustering only when pruning materially improves repeated large-table queries. +- **Tasks and streams:** Use streams for change capture and tasks for scheduled transformations when requested. +- **Identifiers:** Avoid quoted mixed-case identifiers unless the existing warehouse already uses them. +- **Parameters:** Use bind variables or session variables according to the execution context. + +## Example + +**Cast semi-structured values explicitly so downstream types are predictable.** + +```sql +SELECT + payload:user_id::STRING AS user_id, + COUNT(*) AS event_count +FROM analytics.raw_events, + LATERAL FLATTEN(input => payload:events) event +GROUP BY user_id; +``` diff --git a/.agents/skills/writer-sql/references/sqlite.md b/.agents/skills/codegen-database/references/sqlite.md similarity index 83% rename from .agents/skills/writer-sql/references/sqlite.md rename to .agents/skills/codegen-database/references/sqlite.md index ee4f0c1..4f468a4 100644 --- a/.agents/skills/writer-sql/references/sqlite.md +++ b/.agents/skills/codegen-database/references/sqlite.md @@ -2,6 +2,8 @@ ## Key concepts +**Account for SQLite behavior before writing portable-looking SQL.** + SQLite has **type affinity** — not strict types. Declared types are suggestions, not enforcements (unless STRICT mode is used). ### Type affinity rules @@ -28,6 +30,8 @@ Use STRICT for new tables to get actual type enforcement. ## Primary keys and ROWID +**Choose SQLite primary keys with ROWID behavior in mind.** + ```sql -- INTEGER PRIMARY KEY is an alias for ROWID (fast) CREATE TABLE items ( @@ -45,6 +49,8 @@ CREATE TABLE kv_store ( ## UUID simulation +**Represent UUIDs explicitly because SQLite has no native UUID type.** + SQLite has no native UUID type. Use TEXT: ```sql @@ -63,6 +69,8 @@ Or generate UUIDs in application code and insert as TEXT. ## JSON (SQLite 3.38+) +**Use SQLite JSON functions only when the runtime version supports them.** + ```sql -- JSON functions SELECT json_extract(payload, '$.userId') AS user_id FROM events; @@ -74,6 +82,8 @@ SELECT * FROM events WHERE json_extract(payload, '$.type') = 'login'; ## Full-text search (FTS5) +**Use this section to apply the relevant database rule precisely.** + ```sql -- Create virtual FTS table CREATE VIRTUAL TABLE articles_fts USING fts5(title, body, content=articles, content_rowid=id); @@ -92,6 +102,8 @@ ORDER BY rank; ## Limitations to be aware of +**Surface SQLite limits before proposing migration or constraint patterns.** + - **No ALTER TABLE ADD COLUMN with constraints**: only bare ADD COLUMN is supported (no NOT NULL with no default, no UNIQUE) - **No DROP COLUMN** (before 3.35): must recreate the table - **No RIGHT JOIN or FULL OUTER JOIN** (before 3.39) @@ -99,6 +111,8 @@ ORDER BY rank; ## WAL mode (recommended for most apps) +**Use WAL mode when concurrency and durability tradeoffs fit the app.** + ```sql PRAGMA journal_mode=WAL; PRAGMA synchronous=NORMAL; -- good tradeoff for WAL @@ -110,6 +124,8 @@ Always enable `foreign_keys=ON` at connection open — SQLite ignores FK constra ## Upsert +**Use the dialect-native upsert form and state the required uniqueness constraint.** + ```sql INSERT INTO users (email, name, updated_at) VALUES (?, ?, datetime('now')) @@ -121,6 +137,8 @@ DO UPDATE SET ## Date/time +**Store dates consistently because SQLite has no dedicated datetime type.** + SQLite stores dates as TEXT (ISO 8601), REAL (Julian day), or INTEGER (Unix timestamp). ```sql diff --git a/.agents/skills/writer-sql/SKILL.md b/.agents/skills/writer-sql/SKILL.md deleted file mode 100644 index 660563d..0000000 --- a/.agents/skills/writer-sql/SKILL.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -name: writer-sql -description: > - Design database schemas and write or optimize SQL queries. Routes schema work, query writing, - dialect guidance, normalization, indexing, and troubleshooting to the right reference. -author: Oleg Shulyakov -license: MIT -version: 1.1.0 ---- - -# writer-sql (router) - -A **router** skill to write **production-quality SQL** for OLTP databases. Route to the appropriate SQL sub-skill based on the user's request. - -## Task detection - -### Top-level routing - -| If the user asks... | Route to... | -| --------------------------------------------------- | ---------------------- | -| Design a database schema for an e-commerce platform | `references/design.md` | -| What tables do I need for a blog? | `references/design.md` | -| Normalize this into a schema | `references/design.md` | -| Write a query to find top 10 customers by revenue | `references/common.md` | -| Optimize this slow join | `references/common.md` | -| How do I use CTEs? | `references/common.md` | - -After routing to `references/common.md`, detect the dialect and load the corresponding dialect reference (see below). - -### Dialect-specific routing - -When the question targets a specific database — load its reference directly: - -| "How do I..." example | Read this first | -| ------------------------------------------ | ------------------------ | -| Do full-text search in Postgres? | `references/postgres.md` | -| Write a JSONB query | `references/postgres.md` | -| Postgres upsert with ON CONFLICT | `references/postgres.md` | -| Write a MySQL upsert with ON DUPLICATE KEY | `references/mysql.md` | -| MySQL JSON columns and virtual indexes | `references/mysql.md` | -| utf8mb4 charset setup | `references/mysql.md` | -| Write a T-SQL stored procedure | `references/mssql.md` | -| MSSQL pagination with OFFSET-FETCH | `references/mssql.md` | -| SQL Server MERGE upsert | `references/mssql.md` | -| Enable WAL mode in SQLite | `references/sqlite.md` | -| SQLite FTS5 full-text search | `references/sqlite.md` | -| SQLite type affinity | `references/sqlite.md` | -| Write a PL/SQL procedure | `references/oracle.md` | -| Oracle MERGE upsert | `references/oracle.md` | -| Oracle row limiting clause | `references/oracle.md` | - -## Output format - -Always produce: - -1. **The SQL** — formatted, readable, with comments -2. **Explanation** — what it does and why the approach was chosen (1–3 sentences per non-trivial decision) -3. **Performance notes** — indexes this query benefits from, or will create -4. **Edge cases** — NULLs, empty sets, concurrent modification diff --git a/.agents/skills/writer-sql/evals/evals.json b/.agents/skills/writer-sql/evals/evals.json deleted file mode 100644 index 1661e68..0000000 --- a/.agents/skills/writer-sql/evals/evals.json +++ /dev/null @@ -1,117 +0,0 @@ -{ - "skill_name": "writer-sql", - "evals": [ - { - "id": 1, - "prompt": "Write a PostgreSQL query to find the top 10 customers by total order value in the last 90 days, including their email, number of orders, and total spent. Exclude customers who have been deleted (soft delete). Use a CTE.", - "expected_output": "A well-formatted PostgreSQL query using CTEs, proper aggregation, soft-delete filtering, and a LIMIT clause, with comments explaining the approach.", - "files": [], - "expectations": [ - "Query uses a WITH clause (CTE)", - "Query filters deleted_at IS NULL for soft-delete support", - "Query uses a date filter for the 90-day window using NOW() or CURRENT_TIMESTAMP", - "Query aggregates with COUNT for order count and SUM for total value", - "Query uses GROUP BY and ORDER BY with LIMIT 10", - "Query includes a comment explaining the index(es) it benefits from", - "SQL keywords are in UPPERCASE", - "No SELECT * is used" - ] - }, - { - "id": 2, - "prompt": "I need a MySQL upsert query. When a user logs in, I want to insert a login_events record or, if a record for that user and date already exists, increment the login_count column.", - "expected_output": "A MySQL INSERT ... ON DUPLICATE KEY UPDATE query for the described upsert behavior.", - "files": [], - "expectations": [ - "Query uses INSERT INTO ... ON DUPLICATE KEY UPDATE syntax", - "The UPDATE clause increments login_count using VALUES() or direct expression", - "Query uses parameterized placeholders (? or :named)", - "Output explains what UNIQUE constraint must exist for the upsert to work", - "SQL keywords are in UPPERCASE" - ] - }, - { - "id": 3, - "prompt": "Design a PostgreSQL schema for a multi-tenant SaaS application with users, organizations, and roles. Users belong to organizations and have roles within them. Support soft deletes.", - "expected_output": "PostgreSQL DDL with CREATE TABLE statements for organizations, users, roles, and a junction table, using UUIDs, TIMESTAMPTZ, proper FKs with ON DELETE behavior, and indexes.", - "files": [], - "expectations": [ - "Output contains SQL CREATE TABLE statements", - "Tables use UUID primary keys with DEFAULT uuid_generate_v4() or gen_random_uuid()", - "All tables include created_at and updated_at TIMESTAMPTZ columns", - "At least one table includes deleted_at for soft delete support", - "A junction table exists for the many-to-many user-organization-role relationship", - "Foreign key constraints include explicit ON DELETE behavior", - "Indexes are created on all foreign key columns", - "Constraint names follow a consistent naming convention" - ] - }, - { - "id": 4, - "prompt": "Design a database schema for an e-commerce platform: products, categories (hierarchical), orders, order items, and customers. Use PostgreSQL.", - "expected_output": "PostgreSQL DDL covering all 5 entities with proper relationships, a self-referential FK for hierarchical categories, numeric types for money, and relevant indexes.", - "files": [], - "expectations": [ - "Output contains CREATE TABLE statements for all 5 entities", - "categories table has a self-referential parent_id foreign key for hierarchy", - "Price/amount columns use NUMERIC type (not FLOAT or REAL)", - "orders and order_items tables are properly related with a FK", - "An index exists on order_items.order_id", - "Entity-relationship overview or comment section explains relationships" - ] - }, - { - "id": 5, - "prompt": "Optimize this slow PostgreSQL query that joins orders, customers, and payments, filters by paid_at, and sorts by created_at. Explain indexes and rewrite if useful.", - "expected_output": "A query-optimization response with rewritten SQL where useful, index recommendations, and explanation of tradeoffs.", - "files": [], - "expectations": [ - "Routes to query-writing/common guidance rather than schema design", - "Mentions EXPLAIN or execution-plan validation", - "Recommends indexes aligned with joins, filters, or sort order", - "Avoids SELECT * in rewritten query", - "Explains tradeoffs or assumptions" - ] - }, - { - "id": 6, - "prompt": "Write a SQLite query to calculate a 7-day rolling average of daily active users from an events table.", - "expected_output": "SQLite-compatible SQL using date grouping and a window function or clear alternative for rolling average.", - "files": [], - "expectations": [ - "Detects SQLite dialect", - "Uses SQLite-compatible date functions", - "Groups events by day", - "Calculates a 7-day rolling average", - "Uses readable aliases and no dialect-incompatible syntax" - ] - }, - { - "id": 7, - "prompt": "Design a normalized schema for a library system with books, authors, patrons, loans, reservations, and late fees. Use MySQL.", - "expected_output": "MySQL DDL for a normalized library schema with relationships, constraints, indexes, and money/date types appropriate for MySQL.", - "files": [], - "expectations": [ - "Routes to schema design guidance", - "Output contains MySQL-compatible CREATE TABLE statements", - "Many-to-many book/author relationship is normalized", - "Loans and reservations reference patrons and book copies or books", - "Late fees use a decimal money type", - "Indexes exist on foreign keys" - ] - }, - { - "id": 8, - "prompt": "How do I query customers who bought product A but never bought product B? Show a portable SQL version.", - "expected_output": "A portable SQL query using EXISTS/NOT EXISTS or equivalent set logic with explanation.", - "files": [], - "expectations": [ - "Routes to common query guidance", - "Uses NOT EXISTS or equivalent anti-join logic", - "Avoids dialect-specific syntax unless noted", - "Explains assumptions about orders/order_items/products tables", - "Uses clear table aliases" - ] - } - ] -} diff --git a/.agents/skills/writer-sql/references/mysql.md b/.agents/skills/writer-sql/references/mysql.md deleted file mode 100644 index 1fac3b2..0000000 --- a/.agents/skills/writer-sql/references/mysql.md +++ /dev/null @@ -1,111 +0,0 @@ -# MySQL / MariaDB Reference - -## Key types - -- `BIGINT UNSIGNED AUTO_INCREMENT` — primary keys (or `CHAR(36)` for UUID) -- `VARCHAR(n)` — always specify length (required by MySQL) -- `DECIMAL(p,s)` — exact decimals (not FLOAT for money) -- `DATETIME` / `TIMESTAMP` — TIMESTAMP auto-converts to UTC; DATETIME stores as-is -- `JSON` — native JSON type (MySQL 5.7.8+, MariaDB 10.2+) -- `TEXT` / `MEDIUMTEXT` / `LONGTEXT` — for large text (no indexes without prefix) -- `TINYINT(1)` — booleans (MySQL has no native BOOLEAN, maps to TINYINT) - -## Upsert - -```sql -INSERT INTO users (email, name, updated_at) -VALUES (?, ?, NOW()) -ON DUPLICATE KEY UPDATE - name = VALUES(name), - updated_at = NOW(); -``` - -## Auto-increment and UUIDs - -```sql --- Integer PK (simpler, better performance) -CREATE TABLE users ( - id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, - ... -); - --- UUID PK (distributed-safe) -CREATE TABLE users ( - id CHAR(36) NOT NULL DEFAULT (UUID()) PRIMARY KEY, - ... -) ENGINE=InnoDB; -``` - -## Storage engines - -- Always use `ENGINE=InnoDB` (transactions, foreign keys, row-level locking) -- `ENGINE=MyISAM` is legacy — never use for new tables - -## Character set - -```sql --- Database level -CREATE DATABASE mydb CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; - --- Table level (always explicit) -CREATE TABLE articles ( - ... -) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; -``` - -`utf8mb4` is required for emoji and full Unicode support. `utf8` in MySQL is 3-byte only. - -## JSON columns - -```sql -CREATE TABLE events ( - id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, - payload JSON NOT NULL, - -- Virtual columns for JSON key indexing: - event_type VARCHAR(50) AS (JSON_UNQUOTE(payload->>'$.type')) STORED, - INDEX idx_events_type (event_type) -); - --- Query -SELECT * FROM events WHERE JSON_UNQUOTE(payload->>'$.userId') = ?; -``` - -## Full-text search - -```sql --- Index (MyISAM supports this; InnoDB from 5.6+) -ALTER TABLE articles ADD FULLTEXT(title, body); - --- Query -SELECT *, MATCH(title, body) AGAINST (? IN BOOLEAN MODE) AS score -FROM articles -WHERE MATCH(title, body) AGAINST (? IN BOOLEAN MODE) -ORDER BY score DESC; -``` - -## EXPLAIN - -```sql -EXPLAIN FORMAT=JSON -SELECT * FROM orders WHERE user_id = ?; -``` - -## Pagination — keyset preferred - -```sql --- Offset (avoid on large tables) -SELECT * FROM orders ORDER BY created_at DESC LIMIT ? OFFSET ?; - --- Keyset -SELECT * FROM orders -WHERE created_at < ? -- cursor -ORDER BY created_at DESC -LIMIT ?; -``` - -## Common gotchas - -- `GROUP BY` in MySQL 5.7+ with `ONLY_FULL_GROUP_BY` mode: all non-aggregate SELECT columns must be in GROUP BY -- `ENUM` type: changes to ENUM values require ALTER TABLE (expensive on large tables); prefer VARCHAR + CHECK constraint or a lookup table -- String comparison is case-insensitive by default (depends on collation) -- No `RETURNING` clause — use `LAST_INSERT_ID()` after INSERT