diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c8bf9ed..61cb9475 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Unreleased] +### Changed + +- **Curated index — link to external skills, don't re-host them.** forgekit no longer vendors `SKILL.md` body copies for skills it didn't author. The externally-sourced catalog skills (from `anthropics/skills`, `obra/superpowers`, `alirezarezvani/claude-skills`) now keep only their manifests as curated link-out entries (`source` + `homepage`); forgekit-authored skills keep their bodies. `forge install ` fetches the body from the upstream source on demand instead of serving a copy; skill detail pages render "maintained upstream — view at source" for body-less entries. Rationale in `docs/decisions/2026-06-06-curated-index-link-dont-rehost.md`. + ## [0.28.0] - 2026-05-27 ### Added diff --git a/apps/web/src/pages/skills/[id].astro b/apps/web/src/pages/skills/[id].astro index 22b4a658..ca99f1a8 100644 --- a/apps/web/src/pages/skills/[id].astro +++ b/apps/web/src/pages/skills/[id].astro @@ -20,6 +20,28 @@ const cmd = installCommand("skill", skill.id)!; const tutorials = await getTutorials(); const collections = await getCollections(); const base = import.meta.env.BASE_URL; + +// Externally-sourced skills are not re-hosted here — forgekit links to them. +// `getSkills` leaves `body` empty when no SKILL.md is vendored in the catalog. +const hasBody = skill.body.trim().length > 0; +const src = skill.source; +// Only render source links with an http(s) scheme — metadata is catalog-authored +// (incl. external manifests), so guard against javascript:/data: href injection. +const safeHttpUrl = (value?: string | null): string | null => { + if (!value) return null; + try { + const u = new URL(value); + return u.protocol === "http:" || u.protocol === "https:" ? u.toString() : null; + } catch { + return null; + } +}; +const sourceUrl = + safeHttpUrl(skill.homepage) ?? + safeHttpUrl(src?.repo && src.path ? `${src.repo.replace(/\.git$/, "")}/blob/${src.ref ?? "main"}/${src.path}` : null) ?? + safeHttpUrl(src?.repo) ?? + null; +const sourceLabel = src?.repo?.replace(/^https?:\/\/(www\.)?github\.com\//, "") ?? src?.path ?? sourceUrl; --- @@ -34,16 +56,31 @@ const base = import.meta.env.BASE_URL; - {tabs ? ( - + {hasBody ? ( + tabs ? ( + + ) : ( +
+ ) ) : ( -
+
+

This skill is maintained upstream — forgekit links to it rather than re-hosting a copy.

+ {sourceUrl && ( +

+ View the full skill at source ↗ +

+ )} +
)} - {(skill.source?.path || skill.license) && ( + {(sourceUrl || skill.source?.path || skill.license) && (

- {skill.source?.path && <>Source: {skill.source.path}} - {skill.license && <>{skill.source?.path ? " · " : "License: "}{skill.license}} + {sourceUrl ? ( + <>Source: {sourceLabel} + ) : ( + skill.source?.path && <>Source: {skill.source.path} + )} + {skill.license && <>{(sourceUrl || skill.source?.path) ? " · " : "License: "}{skill.license}}

)} diff --git a/docs/decisions/2026-06-06-curated-index-link-dont-rehost.md b/docs/decisions/2026-06-06-curated-index-link-dont-rehost.md new file mode 100644 index 00000000..82ecfb3a --- /dev/null +++ b/docs/decisions/2026-06-06-curated-index-link-dont-rehost.md @@ -0,0 +1,100 @@ +# ADR: Curated index — link to external skills, don't re-host them + +**Date:** 2026-06-06 +**Status:** Accepted + +## Context + +forgekit is positioned as a **curated, useful tools list** (inspired by skills.sh and +mcpmarket.com), not an exhaustive mirror of every skill that exists. Yet the catalog had +drifted toward re-hosting: of the ~155 published skill manifests, **37 stored a full +`SKILL.md` body copy vendored from other people's repositories** — 13 from +`anthropics/skills`, 12 from `obra/superpowers`, 12 from `alirezarezvani/claude-skills` +(~365 KB of duplicated content). + +Re-hosting other authors' skills has real costs: the copies go stale the moment upstream +changes, forgekit takes on implicit maintenance/attribution of content it didn't write, +and the catalog reads as "everything" rather than "the curated set worth knowing about." +Every external manifest already carried `source.repo` + `ref` + `path` + `homepage`, so +the provenance to link out was present — only the redundant body needed to go. + +## Alternatives considered + +| Option | Description | Rejection reason | +|--------|-------------|------------------| +| **A — De-vendor externals → link to source (CHOSEN)** | Drop the 37 vendored `SKILL.md` bodies; keep the manifests as curated link-out entries; install fetches the body from upstream on demand; web shows "view at source" | — (chosen) | +| **B — Keep vendoring everything** | Status quo — store a copy of every catalog skill | The drift, staleness, and "list everything" framing this ADR exists to reverse. A copy of someone else's skill is wrong within days of their next commit. | +| **C — Remove external skills from the catalog entirely** | Only list forgekit-authored skills | Loses curation value — the point of a curated list is to surface the good external tools too, not hide them. Discovery is the product. | +| **D — Add a bespoke `skills.sh` URL source type + scrape/sync** | Integrate with skills.sh as a content source | Over-engineered. `source.type: git` already resolves to upstream content; a registry-specific integration is new surface area for no extra benefit today. | + +## Decision + +**Adopt Option A.** External skills stay in the catalog as **curated references that link to +their source**; forgekit stores manifests, not bodies, for content it didn't author. + +Scope of "external" = `source.type: git` whose `source.repo` is **not** the forgekit repo. +forgekit-authored skills (86) and unsourced skills (32) keep their bodies unchanged — only +the 37 externally-authored skills were de-vendored. + +### Mechanism + +- **Catalog**: each external skill keeps `/manifest.json` (with `source` + `homepage`); + the `/SKILL.md` body copy is removed. `catalog:validate` passes — bodies are not + required by the schema. +- **CLI (`forge install`)**: when a skill body is not vendored locally, fetch it from the + upstream source on demand — `raw.githubusercontent.com` built from + `source.repo`/`ref`/`path` — instead of erroring. Install still works; forgekit pulls + from upstream rather than serving a copy. +- **Web (`skills/[id]`)**: a body-less skill renders *"maintained upstream — View the full + skill at source ↗"* with a clickable link; forgekit-own skills render their body as before. + +### Invariant + +forgekit re-hosts only content it authored. A skill sourced from another repo is **listed +and linked, never copied**. New external additions to the catalog must follow this pattern +(manifest + `source` + `homepage`, no vendored `SKILL.md`). + +## Consequences + +### Positive +- **No stale copies.** External skills always resolve to upstream's current version at + install time; forgekit can't ship a months-old fork of someone's skill. +- **Honest curation + attribution.** The catalog is a curated index that credits and links + to authors, not a re-host. Matches the skills.sh/mcpmarket framing. +- **Smaller, clearer repo.** ~365 KB of duplicated bodies gone; the diff to review when a + skill is added/removed is a manifest, not a vendored markdown blob. +- **Install still works** for external skills (fetched from source), so discoverability is + preserved without the hosting cost. + +### Negative +- **Install now needs network for external skills.** `forge install ` fetches from + GitHub raw at install time; offline installs of external skills fail (own skills are + unaffected). Acceptable — installation is inherently an online operation. +- **Upstream can break a link.** If an upstream repo moves/renames the file or deletes the + ref, the fetch 404s. Mitigated: the CLI prints the `homepage` on failure; a future + link-checker could flag dead sources. +- **Two rendering paths in the web detail page** (body vs link-out) to keep in sync. + +### Neutral +- Reversible per skill: re-vendoring a body is just re-adding the `SKILL.md`. +- `source.type: vendored` remains available in the schema for the rare case forgekit + deliberately wants a pinned copy. + +## Revisit when + +1. **External link rot becomes common** (several dead `source` URLs) → add a CI link-checker + over external manifests, and/or a `vendored` fallback for high-value skills that pins a + copy with an explicit "snapshot of " note. +2. **skills.sh exposes a stable per-skill API/URL** → consider linking `homepage` at the + skills.sh entry rather than the raw source repo, for richer discovery. +3. **An external author requests removal or changes license** → de-listing is a manifest + delete; no body to scrub. +4. **forge install's on-demand fetch proves too slow/fragile at scale** → reconsider a + pinned `vendored` snapshot model with a refresh command. + +## Affected change + +- Commit de-vendoring the 37 externals + CLI/web support (this branch, + `feat/catalog-v0.24.0-skill-refresh`). +- Verified: `catalog:validate` ✅, `cli:typecheck` ✅, `web:build` ✅ (297 pages; external + pages render the source link, own pages render their body). diff --git a/packages/catalog/catalog/skills/brainstorming/SKILL.md b/packages/catalog/catalog/skills/brainstorming/SKILL.md deleted file mode 100644 index 06cd0a21..00000000 --- a/packages/catalog/catalog/skills/brainstorming/SKILL.md +++ /dev/null @@ -1,164 +0,0 @@ ---- -name: brainstorming -description: "You MUST use this before any creative work - creating features, building components, adding functionality, or modifying behavior. Explores user intent, requirements and design before implementation." ---- - -# Brainstorming Ideas Into Designs - -Help turn ideas into fully formed designs and specs through natural collaborative dialogue. - -Start by understanding the current project context, then ask questions one at a time to refine the idea. Once you understand what you're building, present the design and get user approval. - - -Do NOT invoke any implementation skill, write any code, scaffold any project, or take any implementation action until you have presented a design and the user has approved it. This applies to EVERY project regardless of perceived simplicity. - - -## Anti-Pattern: "This Is Too Simple To Need A Design" - -Every project goes through this process. A todo list, a single-function utility, a config change — all of them. "Simple" projects are where unexamined assumptions cause the most wasted work. The design can be short (a few sentences for truly simple projects), but you MUST present it and get approval. - -## Checklist - -You MUST create a task for each of these items and complete them in order: - -1. **Explore project context** — check files, docs, recent commits -2. **Offer visual companion** (if topic will involve visual questions) — this is its own message, not combined with a clarifying question. See the Visual Companion section below. -3. **Ask clarifying questions** — one at a time, understand purpose/constraints/success criteria -4. **Propose 2-3 approaches** — with trade-offs and your recommendation -5. **Present design** — in sections scaled to their complexity, get user approval after each section -6. **Write design doc** — save to `docs/superpowers/specs/YYYY-MM-DD--design.md` and commit -7. **Spec self-review** — quick inline check for placeholders, contradictions, ambiguity, scope (see below) -8. **User reviews written spec** — ask user to review the spec file before proceeding -9. **Transition to implementation** — invoke writing-plans skill to create implementation plan - -## Process Flow - -```dot -digraph brainstorming { - "Explore project context" [shape=box]; - "Visual questions ahead?" [shape=diamond]; - "Offer Visual Companion\n(own message, no other content)" [shape=box]; - "Ask clarifying questions" [shape=box]; - "Propose 2-3 approaches" [shape=box]; - "Present design sections" [shape=box]; - "User approves design?" [shape=diamond]; - "Write design doc" [shape=box]; - "Spec self-review\n(fix inline)" [shape=box]; - "User reviews spec?" [shape=diamond]; - "Invoke writing-plans skill" [shape=doublecircle]; - - "Explore project context" -> "Visual questions ahead?"; - "Visual questions ahead?" -> "Offer Visual Companion\n(own message, no other content)" [label="yes"]; - "Visual questions ahead?" -> "Ask clarifying questions" [label="no"]; - "Offer Visual Companion\n(own message, no other content)" -> "Ask clarifying questions"; - "Ask clarifying questions" -> "Propose 2-3 approaches"; - "Propose 2-3 approaches" -> "Present design sections"; - "Present design sections" -> "User approves design?"; - "User approves design?" -> "Present design sections" [label="no, revise"]; - "User approves design?" -> "Write design doc" [label="yes"]; - "Write design doc" -> "Spec self-review\n(fix inline)"; - "Spec self-review\n(fix inline)" -> "User reviews spec?"; - "User reviews spec?" -> "Write design doc" [label="changes requested"]; - "User reviews spec?" -> "Invoke writing-plans skill" [label="approved"]; -} -``` - -**The terminal state is invoking writing-plans.** Do NOT invoke frontend-design, mcp-builder, or any other implementation skill. The ONLY skill you invoke after brainstorming is writing-plans. - -## The Process - -**Understanding the idea:** - -- Check out the current project state first (files, docs, recent commits) -- Before asking detailed questions, assess scope: if the request describes multiple independent subsystems (e.g., "build a platform with chat, file storage, billing, and analytics"), flag this immediately. Don't spend questions refining details of a project that needs to be decomposed first. -- If the project is too large for a single spec, help the user decompose into sub-projects: what are the independent pieces, how do they relate, what order should they be built? Then brainstorm the first sub-project through the normal design flow. Each sub-project gets its own spec → plan → implementation cycle. -- For appropriately-scoped projects, ask questions one at a time to refine the idea -- Prefer multiple choice questions when possible, but open-ended is fine too -- Only one question per message - if a topic needs more exploration, break it into multiple questions -- Focus on understanding: purpose, constraints, success criteria - -**Exploring approaches:** - -- Propose 2-3 different approaches with trade-offs -- Present options conversationally with your recommendation and reasoning -- Lead with your recommended option and explain why - -**Presenting the design:** - -- Once you believe you understand what you're building, present the design -- Scale each section to its complexity: a few sentences if straightforward, up to 200-300 words if nuanced -- Ask after each section whether it looks right so far -- Cover: architecture, components, data flow, error handling, testing -- Be ready to go back and clarify if something doesn't make sense - -**Design for isolation and clarity:** - -- Break the system into smaller units that each have one clear purpose, communicate through well-defined interfaces, and can be understood and tested independently -- For each unit, you should be able to answer: what does it do, how do you use it, and what does it depend on? -- Can someone understand what a unit does without reading its internals? Can you change the internals without breaking consumers? If not, the boundaries need work. -- Smaller, well-bounded units are also easier for you to work with - you reason better about code you can hold in context at once, and your edits are more reliable when files are focused. When a file grows large, that's often a signal that it's doing too much. - -**Working in existing codebases:** - -- Explore the current structure before proposing changes. Follow existing patterns. -- Where existing code has problems that affect the work (e.g., a file that's grown too large, unclear boundaries, tangled responsibilities), include targeted improvements as part of the design - the way a good developer improves code they're working in. -- Don't propose unrelated refactoring. Stay focused on what serves the current goal. - -## After the Design - -**Documentation:** - -- Write the validated design (spec) to `docs/superpowers/specs/YYYY-MM-DD--design.md` - - (User preferences for spec location override this default) -- Use elements-of-style:writing-clearly-and-concisely skill if available -- Commit the design document to git - -**Spec Self-Review:** -After writing the spec document, look at it with fresh eyes: - -1. **Placeholder scan:** Any "TBD", "TODO", incomplete sections, or vague requirements? Fix them. -2. **Internal consistency:** Do any sections contradict each other? Does the architecture match the feature descriptions? -3. **Scope check:** Is this focused enough for a single implementation plan, or does it need decomposition? -4. **Ambiguity check:** Could any requirement be interpreted two different ways? If so, pick one and make it explicit. - -Fix any issues inline. No need to re-review — just fix and move on. - -**User Review Gate:** -After the spec review loop passes, ask the user to review the written spec before proceeding: - -> "Spec written and committed to ``. Please review it and let me know if you want to make any changes before we start writing out the implementation plan." - -Wait for the user's response. If they request changes, make them and re-run the spec review loop. Only proceed once the user approves. - -**Implementation:** - -- Invoke the writing-plans skill to create a detailed implementation plan -- Do NOT invoke any other skill. writing-plans is the next step. - -## Key Principles - -- **One question at a time** - Don't overwhelm with multiple questions -- **Multiple choice preferred** - Easier to answer than open-ended when possible -- **YAGNI ruthlessly** - Remove unnecessary features from all designs -- **Explore alternatives** - Always propose 2-3 approaches before settling -- **Incremental validation** - Present design, get approval before moving on -- **Be flexible** - Go back and clarify when something doesn't make sense - -## Visual Companion - -A browser-based companion for showing mockups, diagrams, and visual options during brainstorming. Available as a tool — not a mode. Accepting the companion means it's available for questions that benefit from visual treatment; it does NOT mean every question goes through the browser. - -**Offering the companion:** When you anticipate that upcoming questions will involve visual content (mockups, layouts, diagrams), offer it once for consent: -> "Some of what we're working on might be easier to explain if I can show it to you in a web browser. I can put together mockups, diagrams, comparisons, and other visuals as we go. This feature is still new and can be token-intensive. Want to try it? (Requires opening a local URL)" - -**This offer MUST be its own message.** Do not combine it with clarifying questions, context summaries, or any other content. The message should contain ONLY the offer above and nothing else. Wait for the user's response before continuing. If they decline, proceed with text-only brainstorming. - -**Per-question decision:** Even after the user accepts, decide FOR EACH QUESTION whether to use the browser or the terminal. The test: **would the user understand this better by seeing it than reading it?** - -- **Use the browser** for content that IS visual — mockups, wireframes, layout comparisons, architecture diagrams, side-by-side visual designs -- **Use the terminal** for content that is text — requirements questions, conceptual choices, tradeoff lists, A/B/C/D text options, scope decisions - -A question about a UI topic is not automatically a visual question. "What does personality mean in this context?" is a conceptual question — use the terminal. "Which wizard layout works better?" is a visual question — use the browser. - -If they agree to the companion, read the detailed guide before proceeding: -`skills/brainstorming/visual-companion.md` diff --git a/packages/catalog/catalog/skills/claude-api/SKILL.md b/packages/catalog/catalog/skills/claude-api/SKILL.md deleted file mode 100644 index 8531740b..00000000 --- a/packages/catalog/catalog/skills/claude-api/SKILL.md +++ /dev/null @@ -1,324 +0,0 @@ ---- -name: claude-api -description: "Build, debug, and optimize Claude API / Anthropic SDK apps. Apps built with this skill should include prompt caching. Also handles migrating existing Claude API code between Claude model versions (4.5 → 4.6, 4.6 → 4.7, retired-model replacements). TRIGGER when: code imports `anthropic`/`@anthropic-ai/sdk`; user asks for the Claude API, Anthropic SDK, or Managed Agents; user adds/modifies/tunes a Claude feature (caching, thinking, compaction, tool use, batch, files, citations, memory) or model (Opus/Sonnet/Haiku) in a file; questions about prompt caching / cache hit rate in an Anthropic SDK project. SKIP: file imports `openai`/other-provider SDK, filename like `*-openai.py`/`*-generic.py`, provider-neutral code, general programming/ML." -license: Complete terms in LICENSE.txt ---- - -# Building LLM-Powered Applications with Claude - -This skill helps you build LLM-powered applications with Claude. Choose the right surface based on your needs, detect the project language, then read the relevant language-specific documentation. - -## Before You Start - -Scan the target file (or, if no target file, the prompt and project) for non-Anthropic provider markers — `import openai`, `from openai`, `langchain_openai`, `OpenAI(`, `gpt-4`, `gpt-5`, file names like `agent-openai.py` or `*-generic.py`, or any explicit instruction to keep the code provider-neutral. If you find any, stop and tell the user that this skill produces Claude/Anthropic SDK code; ask whether they want to switch the file to Claude or want a non-Claude implementation. Do not edit a non-Anthropic file with Anthropic SDK calls. - -## Output Requirement - -When the user asks you to add, modify, or implement a Claude feature, your code must call Claude through one of: - -1. **The official Anthropic SDK** for the project's language (`anthropic`, `@anthropic-ai/sdk`, `com.anthropic.*`, etc.). This is the default whenever a supported SDK exists for the project. -2. **Raw HTTP** (`curl`, `requests`, `fetch`, `httpx`, etc.) — only when the user explicitly asks for cURL/REST/raw HTTP, the project is a shell/cURL project, or the language has no official SDK. - -Never mix the two — don't reach for `requests`/`fetch` in a Python or TypeScript project just because it feels lighter. Never fall back to OpenAI-compatible shims. - -**Never guess SDK usage.** Function names, class names, namespaces, method signatures, and import paths must come from explicit documentation — either the `{lang}/` files in this skill or the official SDK repositories or documentation links listed in `shared/live-sources.md`. If the binding you need is not explicitly documented in the skill files, WebFetch the relevant SDK repo from `shared/live-sources.md` before writing code. Do not infer Ruby/Java/Go/PHP/C# APIs from cURL shapes or from another language's SDK. - -## Defaults - -Unless the user requests otherwise: - -For the Claude model version, please use Claude Opus 4.7, which you can access via the exact model string `claude-opus-4-7`. Please default to using adaptive thinking (`thinking: {type: "adaptive"}`) for anything remotely complicated. And finally, please default to streaming for any request that may involve long input, long output, or high `max_tokens` — it prevents hitting request timeouts. Use the SDK's `.get_final_message()` / `.finalMessage()` helper to get the complete response if you don't need to handle individual stream events - ---- - -## Subcommands - -If the User Request at the bottom of this prompt is a bare subcommand string (no prose), search every **Subcommands** table in this document — including any in sections appended below — and follow the matching Action column directly. This lets users invoke specific flows via `/claude-api `. If no table in the document matches, treat the request as normal prose. - - ---- - -## Language Detection - -Before reading code examples, determine which language the user is working in: - -1. **Look at project files** to infer the language: - - - `*.py`, `requirements.txt`, `pyproject.toml`, `setup.py`, `Pipfile` → **Python** — read from `python/` - - `*.ts`, `*.tsx`, `package.json`, `tsconfig.json` → **TypeScript** — read from `typescript/` - - `*.js`, `*.jsx` (no `.ts` files present) → **TypeScript** — JS uses the same SDK, read from `typescript/` - - `*.java`, `pom.xml`, `build.gradle` → **Java** — read from `java/` - - `*.kt`, `*.kts`, `build.gradle.kts` → **Java** — Kotlin uses the Java SDK, read from `java/` - - `*.scala`, `build.sbt` → **Java** — Scala uses the Java SDK, read from `java/` - - `*.go`, `go.mod` → **Go** — read from `go/` - - `*.rb`, `Gemfile` → **Ruby** — read from `ruby/` - - `*.cs`, `*.csproj` → **C#** — read from `csharp/` - - `*.php`, `composer.json` → **PHP** — read from `php/` - -2. **If multiple languages detected** (e.g., both Python and TypeScript files): - - - Check which language the user's current file or question relates to - - If still ambiguous, ask: "I detected both Python and TypeScript files. Which language are you using for the Claude API integration?" - -3. **If language can't be inferred** (empty project, no source files, or unsupported language): - - - Use AskUserQuestion with options: Python, TypeScript, Java, Go, Ruby, cURL/raw HTTP, C#, PHP - - If AskUserQuestion is unavailable, default to Python examples and note: "Showing Python examples. Let me know if you need a different language." - -4. **If unsupported language detected** (Rust, Swift, C++, Elixir, etc.): - - - Suggest cURL/raw HTTP examples from `curl/` and note that community SDKs may exist - - Offer to show Python or TypeScript examples as reference implementations - -5. **If user needs cURL/raw HTTP examples**, read from `curl/`. - -### Language-Specific Feature Support - -| Language | Tool Runner | Managed Agents | Notes | -| ---------- | ----------- | -------------- | ------------------------------------- | -| Python | Yes (beta) | Yes (beta) | Full support — `@beta_tool` decorator | -| TypeScript | Yes (beta) | Yes (beta) | Full support — `betaZodTool` + Zod | -| Java | Yes (beta) | Yes (beta) | Beta tool use with annotated classes | -| Go | Yes (beta) | Yes (beta) | `BetaToolRunner` in `toolrunner` pkg | -| Ruby | Yes (beta) | Yes (beta) | `BaseTool` + `tool_runner` in beta | -| C# | No | No | Official SDK | -| PHP | Yes (beta) | Yes (beta) | `BetaRunnableTool` + `toolRunner()` | -| cURL | N/A | Yes (beta) | Raw HTTP, no SDK features | - -> **Managed Agents code examples**: dedicated language-specific READMEs are provided for Python, TypeScript, Go, Ruby, PHP, Java, and cURL (`{lang}/managed-agents/README.md`, `curl/managed-agents.md`). Read your language's README plus the language-agnostic `shared/managed-agents-*.md` concept files. **Agents are persistent — create once, reference by ID.** Store the agent ID returned by `agents.create` and pass it to every subsequent `sessions.create`; do not call `agents.create` in the request path. The Anthropic CLI is one convenient way to create agents and environments from version-controlled YAML — its URL is in `shared/live-sources.md`. If a binding you need isn't shown in the README, WebFetch the relevant entry from `shared/live-sources.md` rather than guess. C# does not currently have Managed Agents support; use cURL-style raw HTTP requests against the API. - ---- - -## Which Surface Should I Use? - -> **Start simple.** Default to the simplest tier that meets your needs. Single API calls and workflows handle most use cases — only reach for agents when the task genuinely requires open-ended, model-driven exploration. - -| Use Case | Tier | Recommended Surface | Why | -| ----------------------------------------------- | --------------- | ------------------------- | ------------------------------------------------------------ | -| Classification, summarization, extraction, Q&A | Single LLM call | **Claude API** | One request, one response | -| Batch processing or embeddings | Single LLM call | **Claude API** | Specialized endpoints | -| Multi-step pipelines with code-controlled logic | Workflow | **Claude API + tool use** | You orchestrate the loop | -| Custom agent with your own tools | Agent | **Claude API + tool use** | Maximum flexibility | -| Server-managed stateful agent with workspace | Agent | **Managed Agents** | Anthropic runs the loop and hosts the tool-execution sandbox | -| Persisted, versioned agent configs | Agent | **Managed Agents** | Agents are stored objects; sessions pin to a version | -| Long-running multi-turn agent with file mounts | Agent | **Managed Agents** | Per-session containers, SSE event stream, Skills + MCP | - -> **Note:** Managed Agents is the right choice when you want Anthropic to run the agent loop *and* host the container where tools execute — file ops, bash, code execution all run in the per-session workspace. If you want to host the compute yourself or run your own custom tool runtime, Claude API + tool use is the right choice — use the tool runner for automatic loop handling, or the manual loop for fine-grained control (approval gates, custom logging, conditional execution). - -> **Third-party providers (Amazon Bedrock, Google Vertex AI, Microsoft Foundry):** Managed Agents is **not available** on Bedrock, Vertex, or Foundry. If you are deploying through any third-party provider, use **Claude API + tool use** for all use cases — including ones where Managed Agents would otherwise be the recommended surface. - -### Decision Tree - -``` -What does your application need? - -0. Are you deploying through Amazon Bedrock, Google Vertex AI, or Microsoft Foundry? - └── Yes → Claude API (+ tool use for agents) — Managed Agents is 1P only. - No → continue. - -1. Single LLM call (classification, summarization, extraction, Q&A) - └── Claude API — one request, one response - -2. Do you want Anthropic to run the agent loop and host a per-session - container where Claude executes tools (bash, file ops, code)? - └── Yes → Managed Agents — server-managed sessions, persisted agent configs, - SSE event stream, Skills + MCP, file mounts. - Examples: "stateful coding agent with a workspace per task", - "long-running research agent that streams events to a UI", - "agent with persisted, versioned config used across many sessions" - -3. Workflow (multi-step, code-orchestrated, with your own tools) - └── Claude API with tool use — you control the loop - -4. Open-ended agent (model decides its own trajectory, your own tools, you host the compute) - └── Claude API agentic loop (maximum flexibility) -``` - -### Should I Build an Agent? - -Before choosing the agent tier, check all four criteria: - -- **Complexity** — Is the task multi-step and hard to fully specify in advance? (e.g., "turn this design doc into a PR" vs. "extract the title from this PDF") -- **Value** — Does the outcome justify higher cost and latency? -- **Viability** — Is Claude capable at this task type? -- **Cost of error** — Can errors be caught and recovered from? (tests, review, rollback) - -If the answer is "no" to any of these, stay at a simpler tier (single call or workflow). - ---- - -## Architecture - -Everything goes through `POST /v1/messages`. Tools and output constraints are features of this single endpoint — not separate APIs. - -**User-defined tools** — You define tools (via decorators, Zod schemas, or raw JSON), and the SDK's tool runner handles calling the API, executing your functions, and looping until Claude is done. For full control, you can write the loop manually. - -**Server-side tools** — Anthropic-hosted tools that run on Anthropic's infrastructure. Code execution is fully server-side (declare it in `tools`, Claude runs code automatically). Computer use can be server-hosted or self-hosted. - -**Structured outputs** — Constrains the Messages API response format (`output_config.format`) and/or tool parameter validation (`strict: true`). The recommended approach is `client.messages.parse()` which validates responses against your schema automatically. Note: the old `output_format` parameter is deprecated; use `output_config: {format: {...}}` on `messages.create()`. - -**Supporting endpoints** — Batches (`POST /v1/messages/batches`), Files (`POST /v1/files`), Token Counting, and Models (`GET /v1/models`, `GET /v1/models/{id}` — live capability/context-window discovery) feed into or support Messages API requests. - ---- - -## Current Models (cached: 2026-04-15) - -| Model | Model ID | Context | Input $/1M | Output $/1M | -| ----------------- | ------------------- | -------------- | ---------- | ----------- | -| Claude Opus 4.7 | `claude-opus-4-7` | 1M | $5.00 | $25.00 | -| Claude Opus 4.6 | `claude-opus-4-6` | 1M | $5.00 | $25.00 | -| Claude Sonnet 4.6 | `claude-sonnet-4-6` | 1M | $3.00 | $15.00 | -| Claude Haiku 4.5 | `claude-haiku-4-5` | 200K | $1.00 | $5.00 | - -**ALWAYS use `claude-opus-4-7` unless the user explicitly names a different model.** This is non-negotiable. Do not use `claude-sonnet-4-6`, `claude-sonnet-4-5`, or any other model unless the user literally says "use sonnet" or "use haiku". Never downgrade for cost — that's the user's decision, not yours. - -**CRITICAL: Use only the exact model ID strings from the table above — they are complete as-is. Do not append date suffixes.** For example, use `claude-sonnet-4-5`, never `claude-sonnet-4-5-20250514` or any other date-suffixed variant you might recall from training data. If the user requests an older model not in the table (e.g., "opus 4.5", "sonnet 3.7"), read `shared/models.md` for the exact ID — do not construct one yourself. - -A note: if any of the model strings above look unfamiliar to you, that's to be expected — that just means they were released after your training data cutoff. Rest assured they are real models; we wouldn't mess with you like that. - -**Live capability lookup:** The table above is cached. When the user asks "what's the context window for X", "does X support vision/thinking/effort", or "which models support Y", query the Models API (`client.models.retrieve(id)` / `client.models.list()`) — see `shared/models.md` for the field reference and capability-filter examples. - ---- - -## Thinking & Effort (Quick Reference) - -**Opus 4.7 — Adaptive thinking only:** Use `thinking: {type: "adaptive"}`. `thinking: {type: "enabled", budget_tokens: N}` returns a 400 on Opus 4.7 — adaptive is the only on-mode. `{type: "disabled"}` and omitting `thinking` both work. Sampling parameters (`temperature`, `top_p`, `top_k`) are also removed and will 400. See `shared/model-migration.md` → Migrating to Opus 4.7 for the full breaking-change list. -**Opus 4.6 — Adaptive thinking (recommended):** Use `thinking: {type: "adaptive"}`. Claude dynamically decides when and how much to think. No `budget_tokens` needed — `budget_tokens` is deprecated on Opus 4.6 and Sonnet 4.6 and should not be used for new code. Adaptive thinking also automatically enables interleaved thinking (no beta header needed). **When the user asks for "extended thinking", a "thinking budget", or `budget_tokens`: always use Opus 4.7 or 4.6 with `thinking: {type: "adaptive"}`. The concept of a fixed token budget for thinking is deprecated — adaptive thinking replaces it. Do NOT use `budget_tokens` for new 4.6/4.7 code and do NOT switch to an older model.** *Gradual-migration carve-out:* `budget_tokens` is still functional on Opus 4.6 and Sonnet 4.6 as a transitional escape hatch — if you're migrating existing code and need a hard token ceiling before you've tuned `effort`, see `shared/model-migration.md` → Transitional escape hatch. Note: this carve-out does **not** apply to Opus 4.7 — `budget_tokens` is fully removed there. -**Effort parameter (GA, no beta header):** Controls thinking depth and overall token spend via `output_config: {effort: "low"|"medium"|"high"|"max"}` (inside `output_config`, not top-level). Default is `high` (equivalent to omitting it). `max` is Opus-tier only (Opus 4.6 and later — not Sonnet or Haiku). Opus 4.7 adds `"xhigh"` (between `high` and `max`) — the best setting for most coding and agentic use cases on 4.7, and the default in Claude Code; use a minimum of `high` for most intelligence-sensitive work. Works on Opus 4.5, Opus 4.6, Opus 4.7, and Sonnet 4.6. Will error on Sonnet 4.5 / Haiku 4.5. On Opus 4.7, effort matters more than on any prior Opus — re-tune it when migrating. Combine with adaptive thinking for the best cost-quality tradeoffs. Lower effort means fewer and more-consolidated tool calls, less preamble, and terser confirmations — `high` is often the sweet spot balancing quality and token efficiency; use `max` when correctness matters more than cost; use `low` for subagents or simple tasks. - -**Opus 4.7 — thinking content omitted by default:** `thinking` blocks still stream but their text is empty unless you opt in with `thinking: {type: "adaptive", display: "summarized"}` (default is `"omitted"`). Silent change — no error. If you stream reasoning to users, the default looks like a long pause before output; set `"summarized"` to restore visible progress. - -**Task Budgets (beta, Opus 4.7):** `output_config: {task_budget: {type: "tokens", total: N}}` tells the model how many tokens it has for a full agentic loop — it sees a running countdown and self-moderates (minimum 20,000; beta header `task-budgets-2026-03-13`). Distinct from `max_tokens`, which is an enforced per-response ceiling the model is not aware of. See `shared/model-migration.md` → Task Budgets. - -**Sonnet 4.6:** Supports adaptive thinking (`thinking: {type: "adaptive"}`). `budget_tokens` is deprecated on Sonnet 4.6 — use adaptive thinking instead. - -**Older models (only if explicitly requested):** If the user specifically asks for Sonnet 4.5 or another older model, use `thinking: {type: "enabled", budget_tokens: N}`. `budget_tokens` must be less than `max_tokens` (minimum 1024). Never choose an older model just because the user mentions `budget_tokens` — use Opus 4.7 with adaptive thinking instead. - ---- - -## Compaction (Quick Reference) - -**Beta, Opus 4.7, Opus 4.6, and Sonnet 4.6.** For long-running conversations that may exceed the 1M context window, enable server-side compaction. The API automatically summarizes earlier context when it approaches the trigger threshold (default: 150K tokens). Requires beta header `compact-2026-01-12`. - -**Critical:** Append `response.content` (not just the text) back to your messages on every turn. Compaction blocks in the response must be preserved — the API uses them to replace the compacted history on the next request. Extracting only the text string and appending that will silently lose the compaction state. - -See `{lang}/claude-api/README.md` (Compaction section) for code examples. Full docs via WebFetch in `shared/live-sources.md`. - ---- - -## Prompt Caching (Quick Reference) - -**Prefix match.** Any byte change anywhere in the prefix invalidates everything after it. Render order is `tools` → `system` → `messages`. Keep stable content first (frozen system prompt, deterministic tool list), put volatile content (timestamps, per-request IDs, varying questions) after the last `cache_control` breakpoint. - -**Top-level auto-caching** (`cache_control: {type: "ephemeral"}` on `messages.create()`) is the simplest option when you don't need fine-grained placement. Max 4 breakpoints per request. Minimum cacheable prefix is ~1024 tokens — shorter prefixes silently won't cache. - -**Verify with `usage.cache_read_input_tokens`** — if it's zero across repeated requests, a silent invalidator is at work (`datetime.now()` in system prompt, unsorted JSON, varying tool set). - -For placement patterns, architectural guidance, and the silent-invalidator audit checklist: read `shared/prompt-caching.md`. Language-specific syntax: `{lang}/claude-api/README.md` (Prompt Caching section). - ---- - -## Managed Agents (Beta) - -**Managed Agents** is a third surface: server-managed stateful agents with Anthropic-hosted tool execution. You create a persisted, versioned Agent config (`POST /v1/agents`), then start Sessions that reference it. Each session provisions a container as the agent's workspace — bash, file ops, and code execution run there; the agent loop itself runs on Anthropic's orchestration layer and acts on the container via tools. The session streams events; you send messages and tool results back. - -**Managed Agents is first-party only.** It is not available on Amazon Bedrock, Google Vertex AI, or Microsoft Foundry. For agents on third-party providers, use Claude API + tool use. - -**Mandatory flow:** Agent (once) → Session (every run). `model`/`system`/`tools` live on the agent, never the session. See `shared/managed-agents-overview.md` for the full reading guide, beta headers, and pitfalls. - -**Beta headers:** `managed-agents-2026-04-01` — the SDK sets this automatically for all `client.beta.{agents,environments,sessions,vaults}.*` calls. Skills API uses `skills-2025-10-02` and Files API uses `files-api-2025-04-14`, but you don't need to explicitly pass those in for endpoints other than `/v1/skills` and `/v1/files`. - -**Subcommands** — invoke directly with `/claude-api `: - -| Subcommand | Action | -|---|---| -| `managed-agents-onboard` | Walk the user through setting up a Managed Agent from scratch. **Read `shared/managed-agents-onboarding.md` immediately** and follow its interview script: mental model → know-or-explore branch → template config → session setup → emit code. Do not summarize — run the interview. | - -**Reading guide:** Start with `shared/managed-agents-overview.md`, then the topical `shared/managed-agents-*.md` files (core, environments, tools, events, client-patterns, onboarding, api-reference). For Python, TypeScript, Go, Ruby, PHP, and Java, read `{lang}/managed-agents/README.md` for code examples. For cURL, read `curl/managed-agents.md`. **Agents are persistent — create once, reference by ID.** Store the agent ID returned by `agents.create` and pass it to every subsequent `sessions.create`; do not call `agents.create` in the request path. The Anthropic CLI is one convenient way to create agents and environments from version-controlled YAML (URL in `shared/live-sources.md`). If a binding you need isn't shown in the language README, WebFetch the relevant entry from `shared/live-sources.md` rather than guess. C# does not currently have Managed Agents support; use raw HTTP from `curl/managed-agents.md` as a reference. - -**When the user wants to set up a Managed Agent from scratch** (e.g. "how do I get started", "walk me through creating one", "set up a new agent"): read `shared/managed-agents-onboarding.md` and run its interview — same flow as the `managed-agents-onboard` subcommand. - -**When the user asks "how do I write the client code for X":** reach for `shared/managed-agents-client-patterns.md` — covers lossless stream reconnect, `processed_at` queued/processed gate, interrupt, `tool_confirmation` round-trip, the correct idle/terminated break gate, post-idle status race, stream-first ordering, file-mount gotchas, keeping credentials host-side via custom tools, etc. - ---- - -## Reading Guide - -After detecting the language, read the relevant files based on what the user needs: - -### Quick Task Reference - -**Single text classification/summarization/extraction/Q&A:** -→ Read only `{lang}/claude-api/README.md` - -**Chat UI or real-time response display:** -→ Read `{lang}/claude-api/README.md` + `{lang}/claude-api/streaming.md` - -**Long-running conversations (may exceed context window):** -→ Read `{lang}/claude-api/README.md` — see Compaction section -**Migrating to a newer model (Opus 4.7 / Opus 4.6 / Sonnet 4.6) or replacing a retired model:** -→ Read `shared/model-migration.md` -**Prompt caching / optimize caching / "why is my cache hit rate low":** -→ Read `shared/prompt-caching.md` + `{lang}/claude-api/README.md` (Prompt Caching section) - -**Function calling / tool use / agents:** -→ Read `{lang}/claude-api/README.md` + `shared/tool-use-concepts.md` + `{lang}/claude-api/tool-use.md` - -**Agent design (tool surface, context management, caching strategy):** -→ Read `shared/agent-design.md` - -**Batch processing (non-latency-sensitive):** -→ Read `{lang}/claude-api/README.md` + `{lang}/claude-api/batches.md` - -**File uploads across multiple requests:** -→ Read `{lang}/claude-api/README.md` + `{lang}/claude-api/files-api.md` - -**Managed Agents (server-managed stateful agents with workspace):** -→ Read `shared/managed-agents-overview.md` + the rest of the `shared/managed-agents-*.md` files. For Python, TypeScript, Go, Ruby, PHP, and Java, read `{lang}/managed-agents/README.md` for code examples. For cURL, read `curl/managed-agents.md`. **Agents are persistent — create once, reference by ID.** Store the agent ID returned by `agents.create` and pass it to every subsequent `sessions.create`; do not call `agents.create` in the request path. The Anthropic CLI is one convenient way to create agents and environments from version-controlled YAML (URL in `shared/live-sources.md`). If a binding you need isn't shown in the language README, WebFetch the relevant entry from `shared/live-sources.md` rather than guess. C# does not currently support Managed Agents — use raw HTTP from `curl/managed-agents.md` as a reference. - -### Claude API (Full File Reference) - -Read the **language-specific Claude API folder** (`{language}/claude-api/`): - -1. **`{language}/claude-api/README.md`** — **Read this first.** Installation, quick start, common patterns, error handling. -2. **`shared/tool-use-concepts.md`** — Read when the user needs function calling, code execution, memory, or structured outputs. Covers conceptual foundations. -3. **`shared/agent-design.md`** — Read when designing an agent: bash vs. dedicated tools, programmatic tool calling, tool search/skills, context editing vs. compaction vs. memory, caching principles. -4. **`{language}/claude-api/tool-use.md`** — Read for language-specific tool use code examples (tool runner, manual loop, code execution, memory, structured outputs). -5. **`{language}/claude-api/streaming.md`** — Read when building chat UIs or interfaces that display responses incrementally. -6. **`{language}/claude-api/batches.md`** — Read when processing many requests offline (not latency-sensitive). Runs asynchronously at 50% cost. -7. **`{language}/claude-api/files-api.md`** — Read when sending the same file across multiple requests without re-uploading. -8. **`shared/prompt-caching.md`** — Read when adding or optimizing prompt caching. Covers prefix-stability design, breakpoint placement, and anti-patterns that silently invalidate cache. -9. **`shared/error-codes.md`** — Read when debugging HTTP errors or implementing error handling. -10. **`shared/model-migration.md`** — Read when upgrading to newer models, replacing retired models, or translating `budget_tokens` / prefill patterns to the current API. -11. **`shared/live-sources.md`** — WebFetch URLs for fetching the latest official documentation. - -> **Note:** For Java, Go, Ruby, C#, PHP, and cURL — these have a single file each covering all basics. Read that file plus `shared/tool-use-concepts.md` and `shared/error-codes.md` as needed. - -> **Note:** For the Managed Agents file reference, see the `## Managed Agents (Beta)` section above — it lists every `shared/managed-agents-*.md` file and the language-specific READMEs. - ---- - -## When to Use WebFetch - -Use WebFetch to get the latest documentation when: - -- User asks for "latest" or "current" information -- Cached data seems incorrect -- User asks about features not covered here - -Live documentation URLs are in `shared/live-sources.md`. - -## Common Pitfalls - -- Don't truncate inputs when passing files or content to the API. If the content is too long to fit in the context window, notify the user and discuss options (chunking, summarization, etc.) rather than silently truncating. -- **Opus 4.7 thinking:** Adaptive only. `thinking: {type: "enabled", budget_tokens: N}` returns 400 on Opus 4.7 — `budget_tokens` is fully removed there (along with `temperature`, `top_p`, `top_k`). Use `thinking: {type: "adaptive"}`. -- **Opus 4.6 / Sonnet 4.6 thinking:** Use `thinking: {type: "adaptive"}` — do NOT use `budget_tokens` for new 4.6 code (deprecated on both Opus 4.6 and Sonnet 4.6; for gradual migration of existing code, see the transitional escape hatch in `shared/model-migration.md` — note this carve-out does not apply to Opus 4.7). For older models, `budget_tokens` must be less than `max_tokens` (minimum 1024). This will throw an error if you get it wrong. -- **4.6/4.7 family prefill removed:** Assistant message prefills (last-assistant-turn prefills) return a 400 error on Opus 4.6, Opus 4.7, and Sonnet 4.6. Use structured outputs (`output_config.format`) or system prompt instructions to control response format instead. -- **Confirm migration scope before editing:** When a user asks to migrate code to a newer Claude model without naming a specific file, directory, or file list, **ask which scope to apply first** — the entire working directory, a specific subdirectory, or a specific set of files. Do not start editing until the user confirms. Imperative phrasings like "migrate my codebase", "move my project to X", "upgrade to Sonnet 4.6", or bare "migrate to Opus 4.7" are **still ambiguous** — they tell you what to do but not where, so ask. Proceed without asking only when the prompt names an exact file, a specific directory, or an explicit file list ("migrate `app.py`", "migrate everything under `services/`", "update `a.py` and `b.py`"). See `shared/model-migration.md` Step 0. -- **`max_tokens` defaults:** Don't lowball `max_tokens` — hitting the cap truncates output mid-thought and requires a retry. For non-streaming requests, default to `~16000` (keeps responses under SDK HTTP timeouts). For streaming requests, default to `~64000` (timeouts aren't a concern, so give the model room). Only go lower when you have a hard reason: classification (`~256`), cost caps, or deliberately short outputs. -- **128K output tokens:** Opus 4.6 and Opus 4.7 support up to 128K `max_tokens`, but the SDKs require streaming for values that large to avoid HTTP timeouts. Use `.stream()` with `.get_final_message()` / `.finalMessage()`. -- **Tool call JSON parsing (4.6/4.7 family):** Opus 4.6, Opus 4.7, and Sonnet 4.6 may produce different JSON string escaping in tool call `input` fields (e.g., Unicode or forward-slash escaping). Always parse tool inputs with `json.loads()` / `JSON.parse()` — never do raw string matching on the serialized input. -- **Structured outputs (all models):** Use `output_config: {format: {...}}` instead of the deprecated `output_format` parameter on `messages.create()`. This is a general API change, not 4.6-specific. -- **Don't reimplement SDK functionality:** The SDK provides high-level helpers — use them instead of building from scratch. Specifically: use `stream.finalMessage()` instead of wrapping `.on()` events in `new Promise()`; use typed exception classes (`Anthropic.RateLimitError`, etc.) instead of string-matching error messages; use SDK types (`Anthropic.MessageParam`, `Anthropic.Tool`, `Anthropic.Message`, etc.) instead of redefining equivalent interfaces. -- **Don't define custom types for SDK data structures:** The SDK exports types for all API objects. Use `Anthropic.MessageParam` for messages, `Anthropic.Tool` for tool definitions, `Anthropic.ToolUseBlock` / `Anthropic.ToolResultBlockParam` for tool results, `Anthropic.Message` for responses. Defining your own `interface ChatMessage { role: string; content: unknown }` duplicates what the SDK already provides and loses type safety. -- **Report and document output:** For tasks that produce reports, documents, or visualizations, the code execution sandbox has `python-docx`, `python-pptx`, `matplotlib`, `pillow`, and `pypdf` pre-installed. Claude can generate formatted files (DOCX, PDF, charts) and return them via the Files API — consider this for "report" or "document" type requests instead of plain stdout text. diff --git a/packages/catalog/catalog/skills/doc-coauthoring/SKILL.md b/packages/catalog/catalog/skills/doc-coauthoring/SKILL.md deleted file mode 100644 index a5a69839..00000000 --- a/packages/catalog/catalog/skills/doc-coauthoring/SKILL.md +++ /dev/null @@ -1,375 +0,0 @@ ---- -name: doc-coauthoring -description: Guide users through a structured workflow for co-authoring documentation. Use when user wants to write documentation, proposals, technical specs, decision docs, or similar structured content. This workflow helps users efficiently transfer context, refine content through iteration, and verify the doc works for readers. Trigger when user mentions writing docs, creating proposals, drafting specs, or similar documentation tasks. ---- - -# Doc Co-Authoring Workflow - -This skill provides a structured workflow for guiding users through collaborative document creation. Act as an active guide, walking users through three stages: Context Gathering, Refinement & Structure, and Reader Testing. - -## When to Offer This Workflow - -**Trigger conditions:** -- User mentions writing documentation: "write a doc", "draft a proposal", "create a spec", "write up" -- User mentions specific doc types: "PRD", "design doc", "decision doc", "RFC" -- User seems to be starting a substantial writing task - -**Initial offer:** -Offer the user a structured workflow for co-authoring the document. Explain the three stages: - -1. **Context Gathering**: User provides all relevant context while Claude asks clarifying questions -2. **Refinement & Structure**: Iteratively build each section through brainstorming and editing -3. **Reader Testing**: Test the doc with a fresh Claude (no context) to catch blind spots before others read it - -Explain that this approach helps ensure the doc works well when others read it (including when they paste it into Claude). Ask if they want to try this workflow or prefer to work freeform. - -If user declines, work freeform. If user accepts, proceed to Stage 1. - -## Stage 1: Context Gathering - -**Goal:** Close the gap between what the user knows and what Claude knows, enabling smart guidance later. - -### Initial Questions - -Start by asking the user for meta-context about the document: - -1. What type of document is this? (e.g., technical spec, decision doc, proposal) -2. Who's the primary audience? -3. What's the desired impact when someone reads this? -4. Is there a template or specific format to follow? -5. Any other constraints or context to know? - -Inform them they can answer in shorthand or dump information however works best for them. - -**If user provides a template or mentions a doc type:** -- Ask if they have a template document to share -- If they provide a link to a shared document, use the appropriate integration to fetch it -- If they provide a file, read it - -**If user mentions editing an existing shared document:** -- Use the appropriate integration to read the current state -- Check for images without alt-text -- If images exist without alt-text, explain that when others use Claude to understand the doc, Claude won't be able to see them. Ask if they want alt-text generated. If so, request they paste each image into chat for descriptive alt-text generation. - -### Info Dumping - -Once initial questions are answered, encourage the user to dump all the context they have. Request information such as: -- Background on the project/problem -- Related team discussions or shared documents -- Why alternative solutions aren't being used -- Organizational context (team dynamics, past incidents, politics) -- Timeline pressures or constraints -- Technical architecture or dependencies -- Stakeholder concerns - -Advise them not to worry about organizing it - just get it all out. Offer multiple ways to provide context: -- Info dump stream-of-consciousness -- Point to team channels or threads to read -- Link to shared documents - -**If integrations are available** (e.g., Slack, Teams, Google Drive, SharePoint, or other MCP servers), mention that these can be used to pull in context directly. - -**If no integrations are detected and in Claude.ai or Claude app:** Suggest they can enable connectors in their Claude settings to allow pulling context from messaging apps and document storage directly. - -Inform them clarifying questions will be asked once they've done their initial dump. - -**During context gathering:** - -- If user mentions team channels or shared documents: - - If integrations available: Inform them the content will be read now, then use the appropriate integration - - If integrations not available: Explain lack of access. Suggest they enable connectors in Claude settings, or paste the relevant content directly. - -- If user mentions entities/projects that are unknown: - - Ask if connected tools should be searched to learn more - - Wait for user confirmation before searching - -- As user provides context, track what's being learned and what's still unclear - -**Asking clarifying questions:** - -When user signals they've done their initial dump (or after substantial context provided), ask clarifying questions to ensure understanding: - -Generate 5-10 numbered questions based on gaps in the context. - -Inform them they can use shorthand to answer (e.g., "1: yes, 2: see #channel, 3: no because backwards compat"), link to more docs, point to channels to read, or just keep info-dumping. Whatever's most efficient for them. - -**Exit condition:** -Sufficient context has been gathered when questions show understanding - when edge cases and trade-offs can be asked about without needing basics explained. - -**Transition:** -Ask if there's any more context they want to provide at this stage, or if it's time to move on to drafting the document. - -If user wants to add more, let them. When ready, proceed to Stage 2. - -## Stage 2: Refinement & Structure - -**Goal:** Build the document section by section through brainstorming, curation, and iterative refinement. - -**Instructions to user:** -Explain that the document will be built section by section. For each section: -1. Clarifying questions will be asked about what to include -2. 5-20 options will be brainstormed -3. User will indicate what to keep/remove/combine -4. The section will be drafted -5. It will be refined through surgical edits - -Start with whichever section has the most unknowns (usually the core decision/proposal), then work through the rest. - -**Section ordering:** - -If the document structure is clear: -Ask which section they'd like to start with. - -Suggest starting with whichever section has the most unknowns. For decision docs, that's usually the core proposal. For specs, it's typically the technical approach. Summary sections are best left for last. - -If user doesn't know what sections they need: -Based on the type of document and template, suggest 3-5 sections appropriate for the doc type. - -Ask if this structure works, or if they want to adjust it. - -**Once structure is agreed:** - -Create the initial document structure with placeholder text for all sections. - -**If access to artifacts is available:** -Use `create_file` to create an artifact. This gives both Claude and the user a scaffold to work from. - -Inform them that the initial structure with placeholders for all sections will be created. - -Create artifact with all section headers and brief placeholder text like "[To be written]" or "[Content here]". - -Provide the scaffold link and indicate it's time to fill in each section. - -**If no access to artifacts:** -Create a markdown file in the working directory. Name it appropriately (e.g., `decision-doc.md`, `technical-spec.md`). - -Inform them that the initial structure with placeholders for all sections will be created. - -Create file with all section headers and placeholder text. - -Confirm the filename has been created and indicate it's time to fill in each section. - -**For each section:** - -### Step 1: Clarifying Questions - -Announce work will begin on the [SECTION NAME] section. Ask 5-10 clarifying questions about what should be included: - -Generate 5-10 specific questions based on context and section purpose. - -Inform them they can answer in shorthand or just indicate what's important to cover. - -### Step 2: Brainstorming - -For the [SECTION NAME] section, brainstorm [5-20] things that might be included, depending on the section's complexity. Look for: -- Context shared that might have been forgotten -- Angles or considerations not yet mentioned - -Generate 5-20 numbered options based on section complexity. At the end, offer to brainstorm more if they want additional options. - -### Step 3: Curation - -Ask which points should be kept, removed, or combined. Request brief justifications to help learn priorities for the next sections. - -Provide examples: -- "Keep 1,4,7,9" -- "Remove 3 (duplicates 1)" -- "Remove 6 (audience already knows this)" -- "Combine 11 and 12" - -**If user gives freeform feedback** (e.g., "looks good" or "I like most of it but...") instead of numbered selections, extract their preferences and proceed. Parse what they want kept/removed/changed and apply it. - -### Step 4: Gap Check - -Based on what they've selected, ask if there's anything important missing for the [SECTION NAME] section. - -### Step 5: Drafting - -Use `str_replace` to replace the placeholder text for this section with the actual drafted content. - -Announce the [SECTION NAME] section will be drafted now based on what they've selected. - -**If using artifacts:** -After drafting, provide a link to the artifact. - -Ask them to read through it and indicate what to change. Note that being specific helps learning for the next sections. - -**If using a file (no artifacts):** -After drafting, confirm completion. - -Inform them the [SECTION NAME] section has been drafted in [filename]. Ask them to read through it and indicate what to change. Note that being specific helps learning for the next sections. - -**Key instruction for user (include when drafting the first section):** -Provide a note: Instead of editing the doc directly, ask them to indicate what to change. This helps learning of their style for future sections. For example: "Remove the X bullet - already covered by Y" or "Make the third paragraph more concise". - -### Step 6: Iterative Refinement - -As user provides feedback: -- Use `str_replace` to make edits (never reprint the whole doc) -- **If using artifacts:** Provide link to artifact after each edit -- **If using files:** Just confirm edits are complete -- If user edits doc directly and asks to read it: mentally note the changes they made and keep them in mind for future sections (this shows their preferences) - -**Continue iterating** until user is satisfied with the section. - -### Quality Checking - -After 3 consecutive iterations with no substantial changes, ask if anything can be removed without losing important information. - -When section is done, confirm [SECTION NAME] is complete. Ask if ready to move to the next section. - -**Repeat for all sections.** - -### Near Completion - -As approaching completion (80%+ of sections done), announce intention to re-read the entire document and check for: -- Flow and consistency across sections -- Redundancy or contradictions -- Anything that feels like "slop" or generic filler -- Whether every sentence carries weight - -Read entire document and provide feedback. - -**When all sections are drafted and refined:** -Announce all sections are drafted. Indicate intention to review the complete document one more time. - -Review for overall coherence, flow, completeness. - -Provide any final suggestions. - -Ask if ready to move to Reader Testing, or if they want to refine anything else. - -## Stage 3: Reader Testing - -**Goal:** Test the document with a fresh Claude (no context bleed) to verify it works for readers. - -**Instructions to user:** -Explain that testing will now occur to see if the document actually works for readers. This catches blind spots - things that make sense to the authors but might confuse others. - -### Testing Approach - -**If access to sub-agents is available (e.g., in Claude Code):** - -Perform the testing directly without user involvement. - -### Step 1: Predict Reader Questions - -Announce intention to predict what questions readers might ask when trying to discover this document. - -Generate 5-10 questions that readers would realistically ask. - -### Step 2: Test with Sub-Agent - -Announce that these questions will be tested with a fresh Claude instance (no context from this conversation). - -For each question, invoke a sub-agent with just the document content and the question. - -Summarize what Reader Claude got right/wrong for each question. - -### Step 3: Run Additional Checks - -Announce additional checks will be performed. - -Invoke sub-agent to check for ambiguity, false assumptions, contradictions. - -Summarize any issues found. - -### Step 4: Report and Fix - -If issues found: -Report that Reader Claude struggled with specific issues. - -List the specific issues. - -Indicate intention to fix these gaps. - -Loop back to refinement for problematic sections. - ---- - -**If no access to sub-agents (e.g., claude.ai web interface):** - -The user will need to do the testing manually. - -### Step 1: Predict Reader Questions - -Ask what questions people might ask when trying to discover this document. What would they type into Claude.ai? - -Generate 5-10 questions that readers would realistically ask. - -### Step 2: Setup Testing - -Provide testing instructions: -1. Open a fresh Claude conversation: https://claude.ai -2. Paste or share the document content (if using a shared doc platform with connectors enabled, provide the link) -3. Ask Reader Claude the generated questions - -For each question, instruct Reader Claude to provide: -- The answer -- Whether anything was ambiguous or unclear -- What knowledge/context the doc assumes is already known - -Check if Reader Claude gives correct answers or misinterprets anything. - -### Step 3: Additional Checks - -Also ask Reader Claude: -- "What in this doc might be ambiguous or unclear to readers?" -- "What knowledge or context does this doc assume readers already have?" -- "Are there any internal contradictions or inconsistencies?" - -### Step 4: Iterate Based on Results - -Ask what Reader Claude got wrong or struggled with. Indicate intention to fix those gaps. - -Loop back to refinement for any problematic sections. - ---- - -### Exit Condition (Both Approaches) - -When Reader Claude consistently answers questions correctly and doesn't surface new gaps or ambiguities, the doc is ready. - -## Final Review - -When Reader Testing passes: -Announce the doc has passed Reader Claude testing. Before completion: - -1. Recommend they do a final read-through themselves - they own this document and are responsible for its quality -2. Suggest double-checking any facts, links, or technical details -3. Ask them to verify it achieves the impact they wanted - -Ask if they want one more review, or if the work is done. - -**If user wants final review, provide it. Otherwise:** -Announce document completion. Provide a few final tips: -- Consider linking this conversation in an appendix so readers can see how the doc was developed -- Use appendices to provide depth without bloating the main doc -- Update the doc as feedback is received from real readers - -## Tips for Effective Guidance - -**Tone:** -- Be direct and procedural -- Explain rationale briefly when it affects user behavior -- Don't try to "sell" the approach - just execute it - -**Handling Deviations:** -- If user wants to skip a stage: Ask if they want to skip this and write freeform -- If user seems frustrated: Acknowledge this is taking longer than expected. Suggest ways to move faster -- Always give user agency to adjust the process - -**Context Management:** -- Throughout, if context is missing on something mentioned, proactively ask -- Don't let gaps accumulate - address them as they come up - -**Artifact Management:** -- Use `create_file` for drafting full sections -- Use `str_replace` for all edits -- Provide artifact link after every change -- Never use artifacts for brainstorming lists - that's just conversation - -**Quality over Speed:** -- Don't rush through stages -- Each iteration should make meaningful improvements -- The goal is a document that actually works for readers diff --git a/packages/catalog/catalog/skills/docx/SKILL.md b/packages/catalog/catalog/skills/docx/SKILL.md deleted file mode 100644 index 2951e559..00000000 --- a/packages/catalog/catalog/skills/docx/SKILL.md +++ /dev/null @@ -1,590 +0,0 @@ ---- -name: docx -description: "Use this skill whenever the user wants to create, read, edit, or manipulate Word documents (.docx files). Triggers include: any mention of 'Word doc', 'word document', '.docx', or requests to produce professional documents with formatting like tables of contents, headings, page numbers, or letterheads. Also use when extracting or reorganizing content from .docx files, inserting or replacing images in documents, performing find-and-replace in Word files, working with tracked changes or comments, or converting content into a polished Word document. If the user asks for a 'report', 'memo', 'letter', 'template', or similar deliverable as a Word or .docx file, use this skill. Do NOT use for PDFs, spreadsheets, Google Docs, or general coding tasks unrelated to document generation." -license: Proprietary. LICENSE.txt has complete terms ---- - -# DOCX creation, editing, and analysis - -## Overview - -A .docx file is a ZIP archive containing XML files. - -## Quick Reference - -| Task | Approach | -|------|----------| -| Read/analyze content | `pandoc` or unpack for raw XML | -| Create new document | Use `docx-js` - see Creating New Documents below | -| Edit existing document | Unpack → edit XML → repack - see Editing Existing Documents below | - -### Converting .doc to .docx - -Legacy `.doc` files must be converted before editing: - -```bash -python scripts/office/soffice.py --headless --convert-to docx document.doc -``` - -### Reading Content - -```bash -# Text extraction with tracked changes -pandoc --track-changes=all document.docx -o output.md - -# Raw XML access -python scripts/office/unpack.py document.docx unpacked/ -``` - -### Converting to Images - -```bash -python scripts/office/soffice.py --headless --convert-to pdf document.docx -pdftoppm -jpeg -r 150 document.pdf page -``` - -### Accepting Tracked Changes - -To produce a clean document with all tracked changes accepted (requires LibreOffice): - -```bash -python scripts/accept_changes.py input.docx output.docx -``` - ---- - -## Creating New Documents - -Generate .docx files with JavaScript, then validate. Install: `npm install -g docx` - -### Setup -```javascript -const { Document, Packer, Paragraph, TextRun, Table, TableRow, TableCell, ImageRun, - Header, Footer, AlignmentType, PageOrientation, LevelFormat, ExternalHyperlink, - InternalHyperlink, Bookmark, FootnoteReferenceRun, PositionalTab, - PositionalTabAlignment, PositionalTabRelativeTo, PositionalTabLeader, - TabStopType, TabStopPosition, Column, SectionType, - TableOfContents, HeadingLevel, BorderStyle, WidthType, ShadingType, - VerticalAlign, PageNumber, PageBreak } = require('docx'); - -const doc = new Document({ sections: [{ children: [/* content */] }] }); -Packer.toBuffer(doc).then(buffer => fs.writeFileSync("doc.docx", buffer)); -``` - -### Validation -After creating the file, validate it. If validation fails, unpack, fix the XML, and repack. -```bash -python scripts/office/validate.py doc.docx -``` - -### Page Size - -```javascript -// CRITICAL: docx-js defaults to A4, not US Letter -// Always set page size explicitly for consistent results -sections: [{ - properties: { - page: { - size: { - width: 12240, // 8.5 inches in DXA - height: 15840 // 11 inches in DXA - }, - margin: { top: 1440, right: 1440, bottom: 1440, left: 1440 } // 1 inch margins - } - }, - children: [/* content */] -}] -``` - -**Common page sizes (DXA units, 1440 DXA = 1 inch):** - -| Paper | Width | Height | Content Width (1" margins) | -|-------|-------|--------|---------------------------| -| US Letter | 12,240 | 15,840 | 9,360 | -| A4 (default) | 11,906 | 16,838 | 9,026 | - -**Landscape orientation:** docx-js swaps width/height internally, so pass portrait dimensions and let it handle the swap: -```javascript -size: { - width: 12240, // Pass SHORT edge as width - height: 15840, // Pass LONG edge as height - orientation: PageOrientation.LANDSCAPE // docx-js swaps them in the XML -}, -// Content width = 15840 - left margin - right margin (uses the long edge) -``` - -### Styles (Override Built-in Headings) - -Use Arial as the default font (universally supported). Keep titles black for readability. - -```javascript -const doc = new Document({ - styles: { - default: { document: { run: { font: "Arial", size: 24 } } }, // 12pt default - paragraphStyles: [ - // IMPORTANT: Use exact IDs to override built-in styles - { id: "Heading1", name: "Heading 1", basedOn: "Normal", next: "Normal", quickFormat: true, - run: { size: 32, bold: true, font: "Arial" }, - paragraph: { spacing: { before: 240, after: 240 }, outlineLevel: 0 } }, // outlineLevel required for TOC - { id: "Heading2", name: "Heading 2", basedOn: "Normal", next: "Normal", quickFormat: true, - run: { size: 28, bold: true, font: "Arial" }, - paragraph: { spacing: { before: 180, after: 180 }, outlineLevel: 1 } }, - ] - }, - sections: [{ - children: [ - new Paragraph({ heading: HeadingLevel.HEADING_1, children: [new TextRun("Title")] }), - ] - }] -}); -``` - -### Lists (NEVER use unicode bullets) - -```javascript -// ❌ WRONG - never manually insert bullet characters -new Paragraph({ children: [new TextRun("• Item")] }) // BAD -new Paragraph({ children: [new TextRun("\u2022 Item")] }) // BAD - -// ✅ CORRECT - use numbering config with LevelFormat.BULLET -const doc = new Document({ - numbering: { - config: [ - { reference: "bullets", - levels: [{ level: 0, format: LevelFormat.BULLET, text: "•", alignment: AlignmentType.LEFT, - style: { paragraph: { indent: { left: 720, hanging: 360 } } } }] }, - { reference: "numbers", - levels: [{ level: 0, format: LevelFormat.DECIMAL, text: "%1.", alignment: AlignmentType.LEFT, - style: { paragraph: { indent: { left: 720, hanging: 360 } } } }] }, - ] - }, - sections: [{ - children: [ - new Paragraph({ numbering: { reference: "bullets", level: 0 }, - children: [new TextRun("Bullet item")] }), - new Paragraph({ numbering: { reference: "numbers", level: 0 }, - children: [new TextRun("Numbered item")] }), - ] - }] -}); - -// ⚠️ Each reference creates INDEPENDENT numbering -// Same reference = continues (1,2,3 then 4,5,6) -// Different reference = restarts (1,2,3 then 1,2,3) -``` - -### Tables - -**CRITICAL: Tables need dual widths** - set both `columnWidths` on the table AND `width` on each cell. Without both, tables render incorrectly on some platforms. - -```javascript -// CRITICAL: Always set table width for consistent rendering -// CRITICAL: Use ShadingType.CLEAR (not SOLID) to prevent black backgrounds -const border = { style: BorderStyle.SINGLE, size: 1, color: "CCCCCC" }; -const borders = { top: border, bottom: border, left: border, right: border }; - -new Table({ - width: { size: 9360, type: WidthType.DXA }, // Always use DXA (percentages break in Google Docs) - columnWidths: [4680, 4680], // Must sum to table width (DXA: 1440 = 1 inch) - rows: [ - new TableRow({ - children: [ - new TableCell({ - borders, - width: { size: 4680, type: WidthType.DXA }, // Also set on each cell - shading: { fill: "D5E8F0", type: ShadingType.CLEAR }, // CLEAR not SOLID - margins: { top: 80, bottom: 80, left: 120, right: 120 }, // Cell padding (internal, not added to width) - children: [new Paragraph({ children: [new TextRun("Cell")] })] - }) - ] - }) - ] -}) -``` - -**Table width calculation:** - -Always use `WidthType.DXA` — `WidthType.PERCENTAGE` breaks in Google Docs. - -```javascript -// Table width = sum of columnWidths = content width -// US Letter with 1" margins: 12240 - 2880 = 9360 DXA -width: { size: 9360, type: WidthType.DXA }, -columnWidths: [7000, 2360] // Must sum to table width -``` - -**Width rules:** -- **Always use `WidthType.DXA`** — never `WidthType.PERCENTAGE` (incompatible with Google Docs) -- Table width must equal the sum of `columnWidths` -- Cell `width` must match corresponding `columnWidth` -- Cell `margins` are internal padding - they reduce content area, not add to cell width -- For full-width tables: use content width (page width minus left and right margins) - -### Images - -```javascript -// CRITICAL: type parameter is REQUIRED -new Paragraph({ - children: [new ImageRun({ - type: "png", // Required: png, jpg, jpeg, gif, bmp, svg - data: fs.readFileSync("image.png"), - transformation: { width: 200, height: 150 }, - altText: { title: "Title", description: "Desc", name: "Name" } // All three required - })] -}) -``` - -### Page Breaks - -```javascript -// CRITICAL: PageBreak must be inside a Paragraph -new Paragraph({ children: [new PageBreak()] }) - -// Or use pageBreakBefore -new Paragraph({ pageBreakBefore: true, children: [new TextRun("New page")] }) -``` - -### Hyperlinks - -```javascript -// External link -new Paragraph({ - children: [new ExternalHyperlink({ - children: [new TextRun({ text: "Click here", style: "Hyperlink" })], - link: "https://example.com", - })] -}) - -// Internal link (bookmark + reference) -// 1. Create bookmark at destination -new Paragraph({ heading: HeadingLevel.HEADING_1, children: [ - new Bookmark({ id: "chapter1", children: [new TextRun("Chapter 1")] }), -]}) -// 2. Link to it -new Paragraph({ children: [new InternalHyperlink({ - children: [new TextRun({ text: "See Chapter 1", style: "Hyperlink" })], - anchor: "chapter1", -})]}) -``` - -### Footnotes - -```javascript -const doc = new Document({ - footnotes: { - 1: { children: [new Paragraph("Source: Annual Report 2024")] }, - 2: { children: [new Paragraph("See appendix for methodology")] }, - }, - sections: [{ - children: [new Paragraph({ - children: [ - new TextRun("Revenue grew 15%"), - new FootnoteReferenceRun(1), - new TextRun(" using adjusted metrics"), - new FootnoteReferenceRun(2), - ], - })] - }] -}); -``` - -### Tab Stops - -```javascript -// Right-align text on same line (e.g., date opposite a title) -new Paragraph({ - children: [ - new TextRun("Company Name"), - new TextRun("\tJanuary 2025"), - ], - tabStops: [{ type: TabStopType.RIGHT, position: TabStopPosition.MAX }], -}) - -// Dot leader (e.g., TOC-style) -new Paragraph({ - children: [ - new TextRun("Introduction"), - new TextRun({ children: [ - new PositionalTab({ - alignment: PositionalTabAlignment.RIGHT, - relativeTo: PositionalTabRelativeTo.MARGIN, - leader: PositionalTabLeader.DOT, - }), - "3", - ]}), - ], -}) -``` - -### Multi-Column Layouts - -```javascript -// Equal-width columns -sections: [{ - properties: { - column: { - count: 2, // number of columns - space: 720, // gap between columns in DXA (720 = 0.5 inch) - equalWidth: true, - separate: true, // vertical line between columns - }, - }, - children: [/* content flows naturally across columns */] -}] - -// Custom-width columns (equalWidth must be false) -sections: [{ - properties: { - column: { - equalWidth: false, - children: [ - new Column({ width: 5400, space: 720 }), - new Column({ width: 3240 }), - ], - }, - }, - children: [/* content */] -}] -``` - -Force a column break with a new section using `type: SectionType.NEXT_COLUMN`. - -### Table of Contents - -```javascript -// CRITICAL: Headings must use HeadingLevel ONLY - no custom styles -new TableOfContents("Table of Contents", { hyperlink: true, headingStyleRange: "1-3" }) -``` - -### Headers/Footers - -```javascript -sections: [{ - properties: { - page: { margin: { top: 1440, right: 1440, bottom: 1440, left: 1440 } } // 1440 = 1 inch - }, - headers: { - default: new Header({ children: [new Paragraph({ children: [new TextRun("Header")] })] }) - }, - footers: { - default: new Footer({ children: [new Paragraph({ - children: [new TextRun("Page "), new TextRun({ children: [PageNumber.CURRENT] })] - })] }) - }, - children: [/* content */] -}] -``` - -### Critical Rules for docx-js - -- **Set page size explicitly** - docx-js defaults to A4; use US Letter (12240 x 15840 DXA) for US documents -- **Landscape: pass portrait dimensions** - docx-js swaps width/height internally; pass short edge as `width`, long edge as `height`, and set `orientation: PageOrientation.LANDSCAPE` -- **Never use `\n`** - use separate Paragraph elements -- **Never use unicode bullets** - use `LevelFormat.BULLET` with numbering config -- **PageBreak must be in Paragraph** - standalone creates invalid XML -- **ImageRun requires `type`** - always specify png/jpg/etc -- **Always set table `width` with DXA** - never use `WidthType.PERCENTAGE` (breaks in Google Docs) -- **Tables need dual widths** - `columnWidths` array AND cell `width`, both must match -- **Table width = sum of columnWidths** - for DXA, ensure they add up exactly -- **Always add cell margins** - use `margins: { top: 80, bottom: 80, left: 120, right: 120 }` for readable padding -- **Use `ShadingType.CLEAR`** - never SOLID for table shading -- **Never use tables as dividers/rules** - cells have minimum height and render as empty boxes (including in headers/footers); use `border: { bottom: { style: BorderStyle.SINGLE, size: 6, color: "2E75B6", space: 1 } }` on a Paragraph instead. For two-column footers, use tab stops (see Tab Stops section), not tables -- **TOC requires HeadingLevel only** - no custom styles on heading paragraphs -- **Override built-in styles** - use exact IDs: "Heading1", "Heading2", etc. -- **Include `outlineLevel`** - required for TOC (0 for H1, 1 for H2, etc.) - ---- - -## Editing Existing Documents - -**Follow all 3 steps in order.** - -### Step 1: Unpack -```bash -python scripts/office/unpack.py document.docx unpacked/ -``` -Extracts XML, pretty-prints, merges adjacent runs, and converts smart quotes to XML entities (`“` etc.) so they survive editing. Use `--merge-runs false` to skip run merging. - -### Step 2: Edit XML - -Edit files in `unpacked/word/`. See XML Reference below for patterns. - -**Use "Claude" as the author** for tracked changes and comments, unless the user explicitly requests use of a different name. - -**Use the Edit tool directly for string replacement. Do not write Python scripts.** Scripts introduce unnecessary complexity. The Edit tool shows exactly what is being replaced. - -**CRITICAL: Use smart quotes for new content.** When adding text with apostrophes or quotes, use XML entities to produce smart quotes: -```xml - -Here’s a quote: “Hello” -``` -| Entity | Character | -|--------|-----------| -| `‘` | ‘ (left single) | -| `’` | ’ (right single / apostrophe) | -| `“` | “ (left double) | -| `”` | ” (right double) | - -**Adding comments:** Use `comment.py` to handle boilerplate across multiple XML files (text must be pre-escaped XML): -```bash -python scripts/comment.py unpacked/ 0 "Comment text with & and ’" -python scripts/comment.py unpacked/ 1 "Reply text" --parent 0 # reply to comment 0 -python scripts/comment.py unpacked/ 0 "Text" --author "Custom Author" # custom author name -``` -Then add markers to document.xml (see Comments in XML Reference). - -### Step 3: Pack -```bash -python scripts/office/pack.py unpacked/ output.docx --original document.docx -``` -Validates with auto-repair, condenses XML, and creates DOCX. Use `--validate false` to skip. - -**Auto-repair will fix:** -- `durableId` >= 0x7FFFFFFF (regenerates valid ID) -- Missing `xml:space="preserve"` on `` with whitespace - -**Auto-repair won't fix:** -- Malformed XML, invalid element nesting, missing relationships, schema violations - -### Common Pitfalls - -- **Replace entire `` elements**: When adding tracked changes, replace the whole `...` block with `......` as siblings. Don't inject tracked change tags inside a run. -- **Preserve `` formatting**: Copy the original run's `` block into your tracked change runs to maintain bold, font size, etc. - ---- - -## XML Reference - -### Schema Compliance - -- **Element order in ``**: ``, ``, ``, ``, ``, `` last -- **Whitespace**: Add `xml:space="preserve"` to `` with leading/trailing spaces -- **RSIDs**: Must be 8-digit hex (e.g., `00AB1234`) - -### Tracked Changes - -**Insertion:** -```xml - - inserted text - -``` - -**Deletion:** -```xml - - deleted text - -``` - -**Inside ``**: Use `` instead of ``, and `` instead of ``. - -**Minimal edits** - only mark what changes: -```xml - -The term is - - 30 - - - 60 - - days. -``` - -**Deleting entire paragraphs/list items** - when removing ALL content from a paragraph, also mark the paragraph mark as deleted so it merges with the next paragraph. Add `` inside ``: -```xml - - - ... - - - - - - Entire paragraph content being deleted... - - -``` -Without the `` in ``, accepting changes leaves an empty paragraph/list item. - -**Rejecting another author's insertion** - nest deletion inside their insertion: -```xml - - - their inserted text - - -``` - -**Restoring another author's deletion** - add insertion after (don't modify their deletion): -```xml - - deleted text - - - deleted text - -``` - -### Comments - -After running `comment.py` (see Step 2), add markers to document.xml. For replies, use `--parent` flag and nest markers inside the parent's. - -**CRITICAL: `` and `` are siblings of ``, never inside ``.** - -```xml - - - - deleted - - more text - - - - - - - text - - - - -``` - -### Images - -1. Add image file to `word/media/` -2. Add relationship to `word/_rels/document.xml.rels`: -```xml - -``` -3. Add content type to `[Content_Types].xml`: -```xml - -``` -4. Reference in document.xml: -```xml - - - - - - - - - - - - -``` - ---- - -## Dependencies - -- **pandoc**: Text extraction -- **docx**: `npm install -g docx` (new documents) -- **LibreOffice**: PDF conversion (auto-configured for sandboxed environments via `scripts/office/soffice.py`) -- **Poppler**: `pdftoppm` for images diff --git a/packages/catalog/catalog/skills/eng-api-design-reviewer/SKILL.md b/packages/catalog/catalog/skills/eng-api-design-reviewer/SKILL.md deleted file mode 100644 index 23094463..00000000 --- a/packages/catalog/catalog/skills/eng-api-design-reviewer/SKILL.md +++ /dev/null @@ -1,421 +0,0 @@ ---- -name: "api-design-reviewer" -description: "API Design Reviewer" ---- - -# API Design Reviewer - -**Tier:** POWERFUL -**Category:** Engineering / Architecture -**Maintainer:** Claude Skills Team - -## Overview - -The API Design Reviewer skill provides comprehensive analysis and review of API designs, focusing on REST conventions, best practices, and industry standards. This skill helps engineering teams build consistent, maintainable, and well-designed APIs through automated linting, breaking change detection, and design scorecards. - -## Core Capabilities - -### 1. API Linting and Convention Analysis -- **Resource Naming Conventions**: Enforces kebab-case for resources, camelCase for fields -- **HTTP Method Usage**: Validates proper use of GET, POST, PUT, PATCH, DELETE -- **URL Structure**: Analyzes endpoint patterns for consistency and RESTful design -- **Status Code Compliance**: Ensures appropriate HTTP status codes are used -- **Error Response Formats**: Validates consistent error response structures -- **Documentation Coverage**: Checks for missing descriptions and documentation gaps - -### 2. Breaking Change Detection -- **Endpoint Removal**: Detects removed or deprecated endpoints -- **Response Shape Changes**: Identifies modifications to response structures -- **Field Removal**: Tracks removed or renamed fields in API responses -- **Type Changes**: Catches field type modifications that could break clients -- **Required Field Additions**: Flags new required fields that could break existing integrations -- **Status Code Changes**: Detects changes to expected status codes - -### 3. API Design Scoring and Assessment -- **Consistency Analysis** (30%): Evaluates naming conventions, response patterns, and structural consistency -- **Documentation Quality** (20%): Assesses completeness and clarity of API documentation -- **Security Implementation** (20%): Reviews authentication, authorization, and security headers -- **Usability Design** (15%): Analyzes ease of use, discoverability, and developer experience -- **Performance Patterns** (15%): Evaluates caching, pagination, and efficiency patterns - -## REST Design Principles - -### Resource Naming Conventions -``` -✅ Good Examples: -- /api/v1/users -- /api/v1/user-profiles -- /api/v1/orders/123/line-items - -❌ Bad Examples: -- /api/v1/getUsers -- /api/v1/user_profiles -- /api/v1/orders/123/lineItems -``` - -### HTTP Method Usage -- **GET**: Retrieve resources (safe, idempotent) -- **POST**: Create new resources (not idempotent) -- **PUT**: Replace entire resources (idempotent) -- **PATCH**: Partial resource updates (not necessarily idempotent) -- **DELETE**: Remove resources (idempotent) - -### URL Structure Best Practices -``` -Collection Resources: /api/v1/users -Individual Resources: /api/v1/users/123 -Nested Resources: /api/v1/users/123/orders -Actions: /api/v1/users/123/activate (POST) -Filtering: /api/v1/users?status=active&role=admin -``` - -## Versioning Strategies - -### 1. URL Versioning (Recommended) -``` -/api/v1/users -/api/v2/users -``` -**Pros**: Clear, explicit, easy to route -**Cons**: URL proliferation, caching complexity - -### 2. Header Versioning -``` -GET /api/users -Accept: application/vnd.api+json;version=1 -``` -**Pros**: Clean URLs, content negotiation -**Cons**: Less visible, harder to test manually - -### 3. Media Type Versioning -``` -GET /api/users -Accept: application/vnd.myapi.v1+json -``` -**Pros**: RESTful, supports multiple representations -**Cons**: Complex, harder to implement - -### 4. Query Parameter Versioning -``` -/api/users?version=1 -``` -**Pros**: Simple to implement -**Cons**: Not RESTful, can be ignored - -## Pagination Patterns - -### Offset-Based Pagination -```json -{ - "data": [...], - "pagination": { - "offset": 20, - "limit": 10, - "total": 150, - "hasMore": true - } -} -``` - -### Cursor-Based Pagination -```json -{ - "data": [...], - "pagination": { - "nextCursor": "eyJpZCI6MTIzfQ==", - "hasMore": true - } -} -``` - -### Page-Based Pagination -```json -{ - "data": [...], - "pagination": { - "page": 3, - "pageSize": 10, - "totalPages": 15, - "totalItems": 150 - } -} -``` - -## Error Response Formats - -### Standard Error Structure -```json -{ - "error": { - "code": "VALIDATION_ERROR", - "message": "The request contains invalid parameters", - "details": [ - { - "field": "email", - "code": "INVALID_FORMAT", - "message": "Email address is not valid" - } - ], - "requestId": "req-123456", - "timestamp": "2024-02-16T13:00:00Z" - } -} -``` - -### HTTP Status Code Usage -- **400 Bad Request**: Invalid request syntax or parameters -- **401 Unauthorized**: Authentication required -- **403 Forbidden**: Access denied (authenticated but not authorized) -- **404 Not Found**: Resource not found -- **409 Conflict**: Resource conflict (duplicate, version mismatch) -- **422 Unprocessable Entity**: Valid syntax but semantic errors -- **429 Too Many Requests**: Rate limit exceeded -- **500 Internal Server Error**: Unexpected server error - -## Authentication and Authorization Patterns - -### Bearer Token Authentication -``` -Authorization: Bearer -``` - -### API Key Authentication -``` -X-API-Key: -Authorization: Api-Key -``` - -### OAuth 2.0 Flow -``` -Authorization: Bearer -``` - -### Role-Based Access Control (RBAC) -```json -{ - "user": { - "id": "123", - "roles": ["admin", "editor"], - "permissions": ["read:users", "write:orders"] - } -} -``` - -## Rate Limiting Implementation - -### Headers -``` -X-RateLimit-Limit: 1000 -X-RateLimit-Remaining: 999 -X-RateLimit-Reset: 1640995200 -``` - -### Response on Limit Exceeded -```json -{ - "error": { - "code": "RATE_LIMIT_EXCEEDED", - "message": "Too many requests", - "retryAfter": 3600 - } -} -``` - -## HATEOAS (Hypermedia as the Engine of Application State) - -### Example Implementation -```json -{ - "id": "123", - "name": "John Doe", - "email": "john@example.com", - "_links": { - "self": { "href": "/api/v1/users/123" }, - "orders": { "href": "/api/v1/users/123/orders" }, - "profile": { "href": "/api/v1/users/123/profile" }, - "deactivate": { - "href": "/api/v1/users/123/deactivate", - "method": "POST" - } - } -} -``` - -## Idempotency - -### Idempotent Methods -- **GET**: Always safe and idempotent -- **PUT**: Should be idempotent (replace entire resource) -- **DELETE**: Should be idempotent (same result) -- **PATCH**: May or may not be idempotent - -### Idempotency Keys -``` -POST /api/v1/payments -Idempotency-Key: 123e4567-e89b-12d3-a456-426614174000 -``` - -## Backward Compatibility Guidelines - -### Safe Changes (Non-Breaking) -- Adding optional fields to requests -- Adding fields to responses -- Adding new endpoints -- Making required fields optional -- Adding new enum values (with graceful handling) - -### Breaking Changes (Require Version Bump) -- Removing fields from responses -- Making optional fields required -- Changing field types -- Removing endpoints -- Changing URL structures -- Modifying error response formats - -## OpenAPI/Swagger Validation - -### Required Components -- **API Information**: Title, description, version -- **Server Information**: Base URLs and descriptions -- **Path Definitions**: All endpoints with methods -- **Parameter Definitions**: Query, path, header parameters -- **Request/Response Schemas**: Complete data models -- **Security Definitions**: Authentication schemes -- **Error Responses**: Standard error formats - -### Best Practices -- Use consistent naming conventions -- Provide detailed descriptions for all components -- Include examples for complex objects -- Define reusable components and schemas -- Validate against OpenAPI specification - -## Performance Considerations - -### Caching Strategies -``` -Cache-Control: public, max-age=3600 -ETag: "123456789" -Last-Modified: Wed, 21 Oct 2015 07:28:00 GMT -``` - -### Efficient Data Transfer -- Use appropriate HTTP methods -- Implement field selection (`?fields=id,name,email`) -- Support compression (gzip) -- Implement efficient pagination -- Use ETags for conditional requests - -### Resource Optimization -- Avoid N+1 queries -- Implement batch operations -- Use async processing for heavy operations -- Support partial updates (PATCH) - -## Security Best Practices - -### Input Validation -- Validate all input parameters -- Sanitize user data -- Use parameterized queries -- Implement request size limits - -### Authentication Security -- Use HTTPS everywhere -- Implement secure token storage -- Support token expiration and refresh -- Use strong authentication mechanisms - -### Authorization Controls -- Implement principle of least privilege -- Use resource-based permissions -- Support fine-grained access control -- Audit access patterns - -## Tools and Scripts - -### api_linter.py -Analyzes API specifications for compliance with REST conventions and best practices. - -**Features:** -- OpenAPI/Swagger spec validation -- Naming convention checks -- HTTP method usage validation -- Error format consistency -- Documentation completeness analysis - -### breaking_change_detector.py -Compares API specification versions to identify breaking changes. - -**Features:** -- Endpoint comparison -- Schema change detection -- Field removal/modification tracking -- Migration guide generation -- Impact severity assessment - -### api_scorecard.py -Provides comprehensive scoring of API design quality. - -**Features:** -- Multi-dimensional scoring -- Detailed improvement recommendations -- Letter grade assessment (A-F) -- Benchmark comparisons -- Progress tracking - -## Integration Examples - -### CI/CD Integration -```yaml -- name: "api-linting" - run: python scripts/api_linter.py openapi.json - -- name: "breaking-change-detection" - run: python scripts/breaking_change_detector.py openapi-v1.json openapi-v2.json - -- name: "api-scorecard" - run: python scripts/api_scorecard.py openapi.json -``` - -### Pre-commit Hooks -```bash -#!/bin/bash -python engineering/api-design-reviewer/scripts/api_linter.py api/openapi.json -if [ $? -ne 0 ]; then - echo "API linting failed. Please fix the issues before committing." - exit 1 -fi -``` - -## Best Practices Summary - -1. **Consistency First**: Maintain consistent naming, response formats, and patterns -2. **Documentation**: Provide comprehensive, up-to-date API documentation -3. **Versioning**: Plan for evolution with clear versioning strategies -4. **Error Handling**: Implement consistent, informative error responses -5. **Security**: Build security into every layer of the API -6. **Performance**: Design for scale and efficiency from the start -7. **Backward Compatibility**: Minimize breaking changes and provide migration paths -8. **Testing**: Implement comprehensive testing including contract testing -9. **Monitoring**: Add observability for API usage and performance -10. **Developer Experience**: Prioritize ease of use and clear documentation - -## Common Anti-Patterns to Avoid - -1. **Verb-based URLs**: Use nouns for resources, not actions -2. **Inconsistent Response Formats**: Maintain standard response structures -3. **Over-nesting**: Avoid deeply nested resource hierarchies -4. **Ignoring HTTP Status Codes**: Use appropriate status codes for different scenarios -5. **Poor Error Messages**: Provide actionable, specific error information -6. **Missing Pagination**: Always paginate list endpoints -7. **No Versioning Strategy**: Plan for API evolution from day one -8. **Exposing Internal Structure**: Design APIs for external consumption, not internal convenience -9. **Missing Rate Limiting**: Protect your API from abuse and overload -10. **Inadequate Testing**: Test all aspects including error cases and edge conditions - -## Conclusion - -The API Design Reviewer skill provides a comprehensive framework for building, reviewing, and maintaining high-quality REST APIs. By following these guidelines and using the provided tools, development teams can create APIs that are consistent, well-documented, secure, and maintainable. - -Regular use of the linting, breaking change detection, and scoring tools ensures continuous improvement and helps maintain API quality throughout the development lifecycle. diff --git a/packages/catalog/catalog/skills/eng-api-test-suite-builder/SKILL.md b/packages/catalog/catalog/skills/eng-api-test-suite-builder/SKILL.md deleted file mode 100644 index e95da10d..00000000 --- a/packages/catalog/catalog/skills/eng-api-test-suite-builder/SKILL.md +++ /dev/null @@ -1,177 +0,0 @@ ---- -name: "api-test-suite-builder" -description: "Use when the user asks to generate API tests, create integration test suites, test REST endpoints, or build contract tests." ---- - -# API Test Suite Builder - -**Tier:** POWERFUL -**Category:** Engineering -**Domain:** Testing / API Quality - ---- - -## Overview - -Scans API route definitions across frameworks (Next.js App Router, Express, FastAPI, Django REST) and -auto-generates comprehensive test suites covering auth, input validation, error codes, pagination, file -uploads, and rate limiting. Outputs ready-to-run test files for Vitest+Supertest (Node) or Pytest+httpx -(Python). - ---- - -## Core Capabilities - -- **Route detection** — scan source files to extract all API endpoints -- **Auth coverage** — valid/invalid/expired tokens, missing auth header -- **Input validation** — missing fields, wrong types, boundary values, injection attempts -- **Error code matrix** — 400/401/403/404/422/500 for each route -- **Pagination** — first/last/empty/oversized pages -- **File uploads** — valid, oversized, wrong MIME type, empty -- **Rate limiting** — burst detection, per-user vs global limits - ---- - -## When to Use - -- New API added — generate test scaffold before writing implementation (TDD) -- Legacy API with no tests — scan and generate baseline coverage -- API contract review — verify existing tests match current route definitions -- Pre-release regression check — ensure all routes have at least smoke tests -- Security audit prep — generate adversarial input tests - ---- - -## Route Detection - -### Next.js App Router -```bash -# Find all route handlers -find ./app/api -name "route.ts" -o -name "route.js" | sort - -# Extract HTTP methods from each route file -grep -rn "export async function\|export function" app/api/**/route.ts | \ - grep -oE "(GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS)" | sort -u - -# Full route map -find ./app/api -name "route.ts" | while read f; do - route=$(echo $f | sed 's|./app||' | sed 's|/route.ts||') - methods=$(grep -oE "export (async )?function (GET|POST|PUT|PATCH|DELETE)" "$f" | \ - grep -oE "(GET|POST|PUT|PATCH|DELETE)") - echo "$methods $route" -done -``` - -### Express -```bash -# Find all router files -find ./src -name "*.ts" -o -name "*.js" | xargs grep -l "router\.\(get\|post\|put\|delete\|patch\)" 2>/dev/null - -# Extract routes with line numbers -grep -rn "router\.\(get\|post\|put\|delete\|patch\)\|app\.\(get\|post\|put\|delete\|patch\)" \ - src/ --include="*.ts" | grep -oE "(get|post|put|delete|patch)\(['\"][^'\"]*['\"]" - -# Generate route map -grep -rn "router\.\|app\." src/ --include="*.ts" | \ - grep -oE "\.(get|post|put|delete|patch)\(['\"][^'\"]+['\"]" | \ - sed "s/\.\(.*\)('\(.*\)'/\U\1 \2/" -``` - -### FastAPI -```bash -# Find all route decorators -grep -rn "@app\.\|@router\." . --include="*.py" | \ - grep -E "@(app|router)\.(get|post|put|delete|patch)" - -# Extract with path and function name -grep -rn "@\(app\|router\)\.\(get\|post\|put\|delete\|patch\)" . --include="*.py" | \ - grep -oE "@(app|router)\.(get|post|put|delete|patch)\(['\"][^'\"]*['\"]" -``` - -### Django REST Framework -```bash -# urlpatterns extraction -grep -rn "path\|re_path\|url(" . --include="*.py" | grep "urlpatterns" -A 50 | \ - grep -E "path\(['\"]" | grep -oE "['\"][^'\"]+['\"]" | head -40 - -# ViewSet router registration -grep -rn "router\.register\|DefaultRouter\|SimpleRouter" . --include="*.py" -``` - ---- - -## Test Generation Patterns - -### Auth Test Matrix - -For every authenticated endpoint, generate: - -| Test Case | Expected Status | -|-----------|----------------| -| No Authorization header | 401 | -| Invalid token format | 401 | -| Valid token, wrong user role | 403 | -| Expired JWT token | 401 | -| Valid token, correct role | 2xx | -| Token from deleted user | 401 | - -### Input Validation Matrix - -For every POST/PUT/PATCH endpoint with a request body: - -| Test Case | Expected Status | -|-----------|----------------| -| Empty body `{}` | 400 or 422 | -| Missing required fields (one at a time) | 400 or 422 | -| Wrong type (string where int expected) | 400 or 422 | -| Boundary: value at min-1 | 400 or 422 | -| Boundary: value at min | 2xx | -| Boundary: value at max | 2xx | -| Boundary: value at max+1 | 400 or 422 | -| SQL injection in string field | 400 or 200 (sanitized) | -| XSS payload in string field | 400 or 200 (sanitized) | -| Null values for required fields | 400 or 422 | - ---- - -## Example Test Files -→ See references/example-test-files.md for details - -## Generating Tests from Route Scan - -When given a codebase, follow this process: - -1. **Scan routes** using the detection commands above -2. **Read each route handler** to understand: - - Expected request body schema - - Auth requirements (middleware, decorators) - - Return types and status codes - - Business rules (ownership, role checks) -3. **Generate test file** per route group using the patterns above -4. **Name tests descriptively**: `"returns 401 when token is expired"` not `"auth test 3"` -5. **Use factories/fixtures** for test data — never hardcode IDs -6. **Assert response shape**, not just status code - ---- - -## Common Pitfalls - -- **Testing only happy paths** — 80% of bugs live in error paths; test those first -- **Hardcoded test data IDs** — use factories/fixtures; IDs change between environments -- **Shared state between tests** — always clean up in afterEach/afterAll -- **Testing implementation, not behavior** — test what the API returns, not how it does it -- **Missing boundary tests** — off-by-one errors are extremely common in pagination and limits -- **Not testing token expiry** — expired tokens behave differently from invalid ones -- **Ignoring Content-Type** — test that API rejects wrong content types (xml when json expected) - ---- - -## Best Practices - -1. One describe block per endpoint — keeps failures isolated and readable -2. Seed minimal data — don't load the entire DB; create only what the test needs -3. Use `beforeAll` for shared setup, `afterAll` for cleanup — not `beforeEach` for expensive ops -4. Assert specific error messages/fields, not just status codes -5. Test that sensitive fields (password, secret) are never in responses -6. For auth tests, always test the "missing header" case separately from "invalid token" -7. Add rate limit tests last — they can interfere with other test suites if run in parallel diff --git a/packages/catalog/catalog/skills/eng-ci-cd-pipeline-builder/SKILL.md b/packages/catalog/catalog/skills/eng-ci-cd-pipeline-builder/SKILL.md deleted file mode 100644 index 07fb2b16..00000000 --- a/packages/catalog/catalog/skills/eng-ci-cd-pipeline-builder/SKILL.md +++ /dev/null @@ -1,147 +0,0 @@ ---- -name: "ci-cd-pipeline-builder" -description: "CI/CD Pipeline Builder" ---- - -# CI/CD Pipeline Builder - -**Tier:** POWERFUL -**Category:** Engineering -**Domain:** DevOps / Automation - -## Overview - -Use this skill to generate pragmatic CI/CD pipelines from detected project stack signals, not guesswork. It focuses on fast baseline generation, repeatable checks, and environment-aware deployment stages. - -## Core Capabilities - -- Detect language/runtime/tooling from repository files -- Recommend CI stages (`lint`, `test`, `build`, `deploy`) -- Generate GitHub Actions or GitLab CI starter pipelines -- Include caching and matrix strategy based on detected stack -- Emit machine-readable detection output for automation -- Keep pipeline logic aligned with project lockfiles and build commands - -## When to Use - -- Bootstrapping CI for a new repository -- Replacing brittle copied pipeline files -- Migrating between GitHub Actions and GitLab CI -- Auditing whether pipeline steps match actual stack -- Creating a reproducible baseline before custom hardening - -## Key Workflows - -### 1. Detect Stack - -```bash -python3 scripts/stack_detector.py --repo . --format text -python3 scripts/stack_detector.py --repo . --format json > detected-stack.json -``` - -Supports input via stdin or `--input` file for offline analysis payloads. - -### 2. Generate Pipeline From Detection - -```bash -python3 scripts/pipeline_generator.py \ - --input detected-stack.json \ - --platform github \ - --output .github/workflows/ci.yml \ - --format text -``` - -Or end-to-end from repo directly: - -```bash -python3 scripts/pipeline_generator.py --repo . --platform gitlab --output .gitlab-ci.yml -``` - -### 3. Validate Before Merge - -1. Confirm commands exist in project (`test`, `lint`, `build`). -2. Run generated pipeline locally where possible. -3. Ensure required secrets/env vars are documented. -4. Keep deploy jobs gated by protected branches/environments. - -### 4. Add Deployment Stages Safely - -- Start with CI-only (`lint/test/build`). -- Add staging deploy with explicit environment context. -- Add production deploy with manual gate/approval. -- Keep rollout/rollback commands explicit and auditable. - -## Script Interfaces - -- `python3 scripts/stack_detector.py --help` - - Detects stack signals from repository files - - Reads optional JSON input from stdin/`--input` -- `python3 scripts/pipeline_generator.py --help` - - Generates GitHub/GitLab YAML from detection payload - - Writes to stdout or `--output` - -## Common Pitfalls - -1. Copying a Node pipeline into Python/Go repos -2. Enabling deploy jobs before stable tests -3. Forgetting dependency cache keys -4. Running expensive matrix builds for every trivial branch -5. Missing branch protections around prod deploy jobs -6. Hardcoding secrets in YAML instead of CI secret stores - -## Best Practices - -1. Detect stack first, then generate pipeline. -2. Keep generated baseline under version control. -3. Add one optimization at a time (cache, matrix, split jobs). -4. Require green CI before deployment jobs. -5. Use protected environments for production credentials. -6. Regenerate pipeline when stack changes significantly. - -## References - -- [references/github-actions-templates.md](references/github-actions-templates.md) -- [references/gitlab-ci-templates.md](references/gitlab-ci-templates.md) -- [references/deployment-gates.md](references/deployment-gates.md) -- [README.md](README.md) - -## Detection Heuristics - -The stack detector prioritizes deterministic file signals over heuristics: - -- Lockfiles determine package manager preference -- Language manifests determine runtime families -- Script commands (if present) drive lint/test/build commands -- Missing scripts trigger conservative placeholder commands - -## Generation Strategy - -Start with a minimal, reliable pipeline: - -1. Checkout and setup runtime -2. Install dependencies with cache strategy -3. Run lint, test, build in separate steps -4. Publish artifacts only after passing checks - -Then layer advanced behavior (matrix builds, security scans, deploy gates). - -## Platform Decision Notes - -- GitHub Actions for tight GitHub ecosystem integration -- GitLab CI for integrated SCM + CI in self-hosted environments -- Keep one canonical pipeline source per repo to reduce drift - -## Validation Checklist - -1. Generated YAML parses successfully. -2. All referenced commands exist in the repo. -3. Cache strategy matches package manager. -4. Required secrets are documented, not embedded. -5. Branch/protected-environment rules match org policy. - -## Scaling Guidance - -- Split long jobs by stage when runtime exceeds 10 minutes. -- Introduce test matrix only when compatibility truly requires it. -- Separate deploy jobs from CI jobs to keep feedback fast. -- Track pipeline duration and flakiness as first-class metrics. diff --git a/packages/catalog/catalog/skills/eng-code-tour/SKILL.md b/packages/catalog/catalog/skills/eng-code-tour/SKILL.md deleted file mode 100644 index b9aa98c4..00000000 --- a/packages/catalog/catalog/skills/eng-code-tour/SKILL.md +++ /dev/null @@ -1,140 +0,0 @@ ---- -name: "code-tour" -description: "Use when the user asks to create a CodeTour .tour file — persona-targeted, step-by-step walkthroughs that link to real files and line numbers. Trigger for: create a tour, onboarding tour, architecture tour, PR review tour, explain how X works, vibe check, RCA tour, contributor guide, or any structured code walkthrough request." ---- - -# Code Tour - -Create **CodeTour** files — persona-targeted, step-by-step walkthroughs of a codebase that link directly to files and line numbers. CodeTour files live in `.tours/` and work with the [VS Code CodeTour extension](https://github.com/microsoft/codetour). - -## Overview - -A great tour is a **narrative** — a story told to a specific person about what matters, why it matters, and what to do next. Only create `.tour` JSON files. Never modify source code. - -## When to Use This Skill - -- User asks to create a code tour, onboarding tour, or architecture walkthrough -- User says "tour for this PR", "explain how X works", "vibe check", "RCA tour" -- User wants a contributor guide, security review, or bug investigation walkthrough -- Any request for a structured walkthrough with file/line anchors - -## Core Workflow - -### 1. Discover the repo - -Before asking anything, explore the codebase: - -In parallel: list root directory, read README, check config files. -Then: identify language(s), framework(s), project purpose. Map folder structure 1-2 levels deep. Find entry points — every path in the tour must be real. - -If the repo has fewer than 5 source files, create a quick-depth tour regardless of persona — there's not enough to warrant a deep one. - -### 2. Infer the intent - -One message should be enough. Infer persona, depth, and focus silently. - -| User says | Persona | Depth | -|-----------|---------|-------| -| "tour for this PR" | pr-reviewer | standard | -| "why did X break" / "RCA" | rca-investigator | standard | -| "onboarding" / "new joiner" | new-joiner | standard | -| "quick tour" / "vibe check" | vibecoder | quick | -| "architecture" | architect | deep | -| "security" / "auth review" | security-reviewer | standard | -| (no qualifier) | new-joiner | standard | - -When intent is ambiguous, default to **new-joiner** persona at **standard** depth — it's the most generally useful. - -### 3. Read actual files - -**Every file path and line number must be verified.** A tour pointing to the wrong line is worse than no tour. - -### 4. Write the tour - -Save to `.tours/-.tour`. - -```json -{ - "$schema": "https://aka.ms/codetour-schema", - "title": "Descriptive Title — Persona / Goal", - "description": "Who this is for and what they'll understand after.", - "ref": "", - "steps": [] -} -``` - -### Step types - -| Type | When to use | Example | -|------|-------------|---------| -| **Content** | Intro/closing only (max 2) | `{ "title": "Welcome", "description": "..." }` | -| **Directory** | Orient to a module | `{ "directory": "src/services", "title": "..." }` | -| **File + line** | The workhorse | `{ "file": "src/auth.ts", "line": 42, "title": "..." }` | -| **Selection** | Highlight a code block | `{ "file": "...", "selection": {...}, "title": "..." }` | -| **Pattern** | Regex match (volatile files) | `{ "file": "...", "pattern": "class App", "title": "..." }` | -| **URI** | Link to PR, issue, doc | `{ "uri": "https://...", "title": "..." }` | - -### Step count - -| Depth | Steps | Use for | -|-------|-------|---------| -| Quick | 5-8 | Vibecoder, fast exploration | -| Standard | 9-13 | Most personas | -| Deep | 14-18 | Architect, RCA | - -### Writing descriptions — SMIG formula - -- **S — Situation**: What is the reader looking at? -- **M — Mechanism**: How does this code work? -- **I — Implication**: Why does this matter for this persona? -- **G — Gotcha**: What would a smart person get wrong? - -### 5. Validate - -- [ ] Every `file` path relative to repo root (no leading `/` or `./`) -- [ ] Every `file` confirmed to exist -- [ ] Every `line` verified by reading the file -- [ ] First step has `file` or `directory` anchor -- [ ] At most 2 content-only steps -- [ ] `nextTour` matches another tour's `title` exactly if set - -## Personas - -| Persona | Goal | Must cover | -|---------|------|------------| -| **Vibecoder** | Get the vibe fast | Entry point, main modules. Max 8 steps. | -| **New joiner** | Structured ramp-up | Directories, setup, business context | -| **Bug fixer** | Root cause fast | Trigger -> fault points -> tests | -| **RCA investigator** | Why did it fail | Causality chain, observability anchors | -| **Feature explainer** | End-to-end | UI -> API -> backend -> storage | -| **PR reviewer** | Review correctly | Change story, invariants, risky areas | -| **Architect** | Shape and rationale | Boundaries, tradeoffs, extension points | -| **Security reviewer** | Trust boundaries | Auth flow, validation, secret handling | -| **Refactorer** | Safe restructuring | Seams, hidden deps, extraction order | -| **External contributor** | Contribute safely | Safe areas, conventions, landmines | - -## Narrative Arc - -1. **Orientation** — `file` or `directory` step (never content-only first step — blank in VS Code) -2. **High-level map** — 1-3 directory steps showing major modules -3. **Core path** — file/line steps, the heart of the tour -4. **Closing** — what the reader can now do, suggested follow-ups - -## Anti-Patterns - -| Anti-pattern | Fix | -|---|---| -| **File listing** — "this file contains the models" | Tell a story. Each step depends on the previous. | -| **Generic descriptions** | Name the specific pattern unique to this codebase. | -| **Line number guessing** | Never write a line you didn't verify by reading. | -| **Too many steps** for quick depth | Actually cut steps. | -| **Hallucinated files** | If it doesn't exist, skip the step. | -| **Recap closing** — "we covered X, Y, Z" | Tell the reader what they can now *do*. | -| **Content-only first step** | Anchor step 1 to a file or directory. | - -## Cross-References - -- Related: `engineering/codebase-onboarding` — for broader onboarding beyond tours -- Related: `engineering/pr-review-expert` — for automated PR review workflows -- CodeTour extension: [microsoft/codetour](https://github.com/microsoft/codetour) -- Real-world tours: [coder/code-server](https://github.com/coder/code-server/blob/main/.tours/contributing.tour) diff --git a/packages/catalog/catalog/skills/eng-database-designer/SKILL.md b/packages/catalog/catalog/skills/eng-database-designer/SKILL.md deleted file mode 100644 index 9fa36ca8..00000000 --- a/packages/catalog/catalog/skills/eng-database-designer/SKILL.md +++ /dev/null @@ -1,289 +0,0 @@ ---- -name: "database-designer" -description: "Use when the user asks to design database schemas, plan data migrations, optimize queries, choose between SQL and NoSQL, or model data relationships." ---- - -# Database Designer - POWERFUL Tier Skill - -## Overview - -A comprehensive database design skill that provides expert-level analysis, optimization, and migration capabilities for modern database systems. This skill combines theoretical principles with practical tools to help architects and developers create scalable, performant, and maintainable database schemas. - -## Core Competencies - -### Schema Design & Analysis -- **Normalization Analysis**: Automated detection of normalization levels (1NF through BCNF) -- **Denormalization Strategy**: Smart recommendations for performance optimization -- **Data Type Optimization**: Identification of inappropriate types and size issues -- **Constraint Analysis**: Missing foreign keys, unique constraints, and null checks -- **Naming Convention Validation**: Consistent table and column naming patterns -- **ERD Generation**: Automatic Mermaid diagram creation from DDL - -### Index Optimization -- **Index Gap Analysis**: Identification of missing indexes on foreign keys and query patterns -- **Composite Index Strategy**: Optimal column ordering for multi-column indexes -- **Index Redundancy Detection**: Elimination of overlapping and unused indexes -- **Performance Impact Modeling**: Selectivity estimation and query cost analysis -- **Index Type Selection**: B-tree, hash, partial, covering, and specialized indexes - -### Migration Management -- **Zero-Downtime Migrations**: Expand-contract pattern implementation -- **Schema Evolution**: Safe column additions, deletions, and type changes -- **Data Migration Scripts**: Automated data transformation and validation -- **Rollback Strategy**: Complete reversal capabilities with validation -- **Execution Planning**: Ordered migration steps with dependency resolution - -## Database Design Principles -→ See references/database-design-reference.md for details - -## Best Practices - -### Schema Design -1. **Use meaningful names**: Clear, consistent naming conventions -2. **Choose appropriate data types**: Right-sized columns for storage efficiency -3. **Define proper constraints**: Foreign keys, check constraints, unique indexes -4. **Consider future growth**: Plan for scale from the beginning -5. **Document relationships**: Clear foreign key relationships and business rules - -### Performance Optimization -1. **Index strategically**: Cover common query patterns without over-indexing -2. **Monitor query performance**: Regular analysis of slow queries -3. **Partition large tables**: Improve query performance and maintenance -4. **Use appropriate isolation levels**: Balance consistency with performance -5. **Implement connection pooling**: Efficient resource utilization - -### Security Considerations -1. **Principle of least privilege**: Grant minimal necessary permissions -2. **Encrypt sensitive data**: At rest and in transit -3. **Audit access patterns**: Monitor and log database access -4. **Validate inputs**: Prevent SQL injection attacks -5. **Regular security updates**: Keep database software current - -## Query Generation Patterns - -### SELECT with JOINs - -```sql --- INNER JOIN: only matching rows -SELECT o.id, c.name, o.total -FROM orders o -INNER JOIN customers c ON c.id = o.customer_id; - --- LEFT JOIN: all left rows, NULLs for non-matches -SELECT c.name, COUNT(o.id) AS order_count -FROM customers c -LEFT JOIN orders o ON o.customer_id = c.id -GROUP BY c.name; - --- Self-join: hierarchical data (employees/managers) -SELECT e.name AS employee, m.name AS manager -FROM employees e -LEFT JOIN employees m ON m.id = e.manager_id; -``` - -### Common Table Expressions (CTEs) - -```sql --- Recursive CTE for org chart -WITH RECURSIVE org AS ( - SELECT id, name, manager_id, 1 AS depth - FROM employees WHERE manager_id IS NULL - UNION ALL - SELECT e.id, e.name, e.manager_id, o.depth + 1 - FROM employees e INNER JOIN org o ON o.id = e.manager_id -) -SELECT * FROM org ORDER BY depth, name; -``` - -### Window Functions - -```sql --- ROW_NUMBER for pagination / dedup -SELECT *, ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY created_at DESC) AS rn -FROM orders; - --- RANK with gaps, DENSE_RANK without gaps -SELECT name, score, RANK() OVER (ORDER BY score DESC) AS rank FROM leaderboard; - --- LAG/LEAD for comparing adjacent rows -SELECT date, revenue, - revenue - LAG(revenue) OVER (ORDER BY date) AS daily_change -FROM daily_sales; -``` - -### Aggregation Patterns - -```sql --- FILTER clause (PostgreSQL) for conditional aggregation -SELECT - COUNT(*) AS total, - COUNT(*) FILTER (WHERE status = 'active') AS active, - AVG(amount) FILTER (WHERE amount > 0) AS avg_positive -FROM accounts; - --- GROUPING SETS for multi-level rollups -SELECT region, product, SUM(revenue) -FROM sales -GROUP BY GROUPING SETS ((region, product), (region), ()); -``` - ---- - -## Migration Patterns - -### Up/Down Migration Scripts - -Every migration must have a reversible counterpart. Name files with a timestamp prefix for ordering: - -``` -migrations/ -├── 20260101_000001_create_users.up.sql -├── 20260101_000001_create_users.down.sql -├── 20260115_000002_add_users_email_index.up.sql -└── 20260115_000002_add_users_email_index.down.sql -``` - -### Zero-Downtime Migrations (Expand/Contract) - -Use the expand-contract pattern to avoid locking or breaking running code: - -1. **Expand** — add the new column/table (nullable, with default) -2. **Migrate data** — backfill in batches; dual-write from application -3. **Transition** — application reads from new column; stop writing to old -4. **Contract** — drop old column in a follow-up migration - -### Data Backfill Strategies - -```sql --- Batch update to avoid long-running locks -UPDATE users SET email_normalized = LOWER(email) -WHERE id IN (SELECT id FROM users WHERE email_normalized IS NULL LIMIT 5000); --- Repeat in a loop until 0 rows affected -``` - -### Rollback Procedures - -- Always test the `down.sql` in staging before deploying `up.sql` to production -- Keep rollback window short — if the contract step has run, rollback requires a new forward migration -- For irreversible changes (dropping columns with data), take a logical backup first - ---- - -## Performance Optimization - -### Indexing Strategies - -| Index Type | Use Case | Example | -|------------|----------|---------| -| **B-tree** (default) | Equality, range, ORDER BY | `CREATE INDEX idx_users_email ON users(email);` | -| **GIN** | Full-text search, JSONB, arrays | `CREATE INDEX idx_docs_body ON docs USING gin(to_tsvector('english', body));` | -| **GiST** | Geometry, range types, nearest-neighbor | `CREATE INDEX idx_locations ON places USING gist(coords);` | -| **Partial** | Subset of rows (reduce size) | `CREATE INDEX idx_active ON users(email) WHERE active = true;` | -| **Covering** | Index-only scans | `CREATE INDEX idx_cov ON orders(customer_id) INCLUDE (total, created_at);` | - -### EXPLAIN Plan Reading - -```sql -EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT) SELECT ...; -``` - -Key signals to watch: -- **Seq Scan** on large tables — missing index -- **Nested Loop** with high row estimates — consider hash/merge join or add index -- **Buffers shared read** much higher than **hit** — working set exceeds memory - -### N+1 Query Detection - -Symptoms: application issues one query per row (e.g., fetching related records in a loop). - -Fixes: -- Use `JOIN` or subquery to fetch in one round-trip -- ORM eager loading (`select_related` / `includes` / `with`) -- DataLoader pattern for GraphQL resolvers - -### Connection Pooling - -| Tool | Protocol | Best For | -|------|----------|----------| -| **PgBouncer** | PostgreSQL | Transaction/statement pooling, low overhead | -| **ProxySQL** | MySQL | Query routing, read/write splitting | -| **Built-in pool** (HikariCP, SQLAlchemy pool) | Any | Application-level pooling | - -**Rule of thumb:** Set pool size to `(2 * CPU cores) + disk spindles`. For cloud SSDs, start with `2 * vCPUs` and tune. - -### Read Replicas and Query Routing - -- Route all `SELECT` queries to replicas; writes to primary -- Account for replication lag (typically <1s for async, 0 for sync) -- Use `pg_last_wal_replay_lsn()` to detect lag before reading critical data - ---- - -## Multi-Database Decision Matrix - -| Criteria | PostgreSQL | MySQL | SQLite | SQL Server | -|----------|-----------|-------|--------|------------| -| **Best for** | Complex queries, JSONB, extensions | Web apps, read-heavy workloads | Embedded, dev/test, edge | Enterprise .NET stacks | -| **JSON support** | Excellent (JSONB + GIN) | Good (JSON type) | Minimal | Good (OPENJSON) | -| **Replication** | Streaming, logical | Group replication, InnoDB cluster | N/A | Always On AG | -| **Licensing** | Open source (PostgreSQL License) | Open source (GPL) / commercial | Public domain | Commercial | -| **Max practical size** | Multi-TB | Multi-TB | ~1 TB (single-writer) | Multi-TB | - -**When to choose:** -- **PostgreSQL** — default choice for new projects; best extensibility and standards compliance -- **MySQL** — existing MySQL ecosystem; simple read-heavy web applications -- **SQLite** — mobile apps, CLI tools, unit test databases, IoT/edge -- **SQL Server** — mandated by enterprise policy; deep .NET/Azure integration - -### NoSQL Considerations - -| Database | Model | Use When | -|----------|-------|----------| -| **MongoDB** | Document | Schema flexibility, rapid prototyping, content management | -| **Redis** | Key-value / cache | Session store, rate limiting, leaderboards, pub/sub | -| **DynamoDB** | Wide-column | Serverless AWS apps, single-digit-ms latency at any scale | - -> Use SQL as default. Reach for NoSQL only when the access pattern clearly benefits from it. - ---- - -## Sharding & Replication - -### Horizontal vs Vertical Partitioning - -- **Vertical partitioning**: Split columns across tables (e.g., separate BLOB columns). Reduces I/O for narrow queries. -- **Horizontal partitioning (sharding)**: Split rows across databases/servers. Required when a single node cannot hold the dataset or handle the throughput. - -### Sharding Strategies - -| Strategy | How It Works | Pros | Cons | -|----------|-------------|------|------| -| **Hash** | `shard = hash(key) % N` | Even distribution | Resharding is expensive | -| **Range** | Shard by date or ID range | Simple, good for time-series | Hot spots on latest shard | -| **Geographic** | Shard by user region | Data locality, compliance | Cross-region queries are hard | - -### Replication Patterns - -| Pattern | Consistency | Latency | Use Case | -|---------|------------|---------|----------| -| **Synchronous** | Strong | Higher write latency | Financial transactions | -| **Asynchronous** | Eventual | Low write latency | Read-heavy web apps | -| **Semi-synchronous** | At-least-one replica confirmed | Moderate | Balance of safety and speed | - ---- - -## Cross-References - -- **sql-database-assistant** — query writing, optimization, and debugging for day-to-day SQL work -- **database-schema-designer** — ERD modeling, normalization analysis, and schema generation -- **migration-architect** — large-scale migration planning across database engines or major schema overhauls -- **senior-backend** — application-layer patterns (connection pooling, ORM best practices) -- **senior-devops** — infrastructure provisioning for database clusters and replicas - ---- - -## Conclusion - -Effective database design requires balancing multiple competing concerns: performance, scalability, maintainability, and business requirements. This skill provides the tools and knowledge to make informed decisions throughout the database lifecycle, from initial schema design through production optimization and evolution. - -The included tools automate common analysis and optimization tasks, while the comprehensive guides provide the theoretical foundation for making sound architectural decisions. Whether building a new system or optimizing an existing one, these resources provide expert-level guidance for creating robust, scalable database solutions. diff --git a/packages/catalog/catalog/skills/eng-dependency-auditor/SKILL.md b/packages/catalog/catalog/skills/eng-dependency-auditor/SKILL.md deleted file mode 100644 index 68b4a16b..00000000 --- a/packages/catalog/catalog/skills/eng-dependency-auditor/SKILL.md +++ /dev/null @@ -1,338 +0,0 @@ ---- -name: "dependency-auditor" -description: "Dependency Auditor" ---- - -# Dependency Auditor - -> **Skill Type:** POWERFUL -> **Category:** Engineering -> **Domain:** Dependency Management & Security - -## Overview - -The **Dependency Auditor** is a comprehensive toolkit for analyzing, auditing, and managing dependencies across multi-language software projects. This skill provides deep visibility into your project's dependency ecosystem, enabling teams to identify vulnerabilities, ensure license compliance, optimize dependency trees, and plan safe upgrades. - -In modern software development, dependencies form complex webs that can introduce significant security, legal, and maintenance risks. A single project might have hundreds of direct and transitive dependencies, each potentially introducing vulnerabilities, license conflicts, or maintenance burden. This skill addresses these challenges through automated analysis and actionable recommendations. - -## Core Capabilities - -### 1. Vulnerability Scanning & CVE Matching - -**Comprehensive Security Analysis** -- Scans dependencies against built-in vulnerability databases -- Matches Common Vulnerabilities and Exposures (CVE) patterns -- Identifies known security issues across multiple ecosystems -- Analyzes transitive dependency vulnerabilities -- Provides CVSS scores and exploit assessments -- Tracks vulnerability disclosure timelines -- Maps vulnerabilities to dependency paths - -**Multi-Language Support** -- **JavaScript/Node.js**: package.json, package-lock.json, yarn.lock -- **Python**: requirements.txt, pyproject.toml, Pipfile.lock, poetry.lock -- **Go**: go.mod, go.sum -- **Rust**: Cargo.toml, Cargo.lock -- **Ruby**: Gemfile, Gemfile.lock -- **Java/Maven**: pom.xml, gradle.lockfile -- **PHP**: composer.json, composer.lock -- **C#/.NET**: packages.config, project.assets.json - -### 2. License Compliance & Legal Risk Assessment - -**License Classification System** -- **Permissive Licenses**: MIT, Apache 2.0, BSD (2-clause, 3-clause), ISC -- **Copyleft (Strong)**: GPL (v2, v3), AGPL (v3) -- **Copyleft (Weak)**: LGPL (v2.1, v3), MPL (v2.0) -- **Proprietary**: Commercial, custom, or restrictive licenses -- **Dual Licensed**: Multi-license scenarios and compatibility -- **Unknown/Ambiguous**: Missing or unclear licensing - -**Conflict Detection** -- Identifies incompatible license combinations -- Warns about GPL contamination in permissive projects -- Analyzes license inheritance through dependency chains -- Provides compliance recommendations for distribution -- Generates legal risk matrices for decision-making - -### 3. Outdated Dependency Detection - -**Version Analysis** -- Identifies dependencies with available updates -- Categorizes updates by severity (patch, minor, major) -- Detects pinned versions that may be outdated -- Analyzes semantic versioning patterns -- Identifies floating version specifiers -- Tracks release frequencies and maintenance status - -**Maintenance Status Assessment** -- Identifies abandoned or unmaintained packages -- Analyzes commit frequency and contributor activity -- Tracks last release dates and security patch availability -- Identifies packages with known end-of-life dates -- Assesses upstream maintenance quality - -### 4. Dependency Bloat Analysis - -**Unused Dependency Detection** -- Identifies dependencies that aren't actually imported/used -- Analyzes import statements and usage patterns -- Detects redundant dependencies with overlapping functionality -- Identifies oversized packages for simple use cases -- Maps actual vs. declared dependency usage - -**Redundancy Analysis** -- Identifies multiple packages providing similar functionality -- Detects version conflicts in transitive dependencies -- Analyzes bundle size impact of dependencies -- Identifies opportunities for dependency consolidation -- Maps dependency overlap and duplication - -### 5. Upgrade Path Planning & Breaking Change Risk - -**Semantic Versioning Analysis** -- Analyzes semver patterns to predict breaking changes -- Identifies safe upgrade paths (patch/minor versions) -- Flags major version updates requiring attention -- Tracks breaking changes across dependency updates -- Provides rollback strategies for failed upgrades - -**Risk Assessment Matrix** -- Low Risk: Patch updates, security fixes -- Medium Risk: Minor updates with new features -- High Risk: Major version updates, API changes -- Critical Risk: Dependencies with known breaking changes - -**Upgrade Prioritization** -- Security patches: Highest priority -- Bug fixes: High priority -- Feature updates: Medium priority -- Major rewrites: Planned priority -- Deprecated features: Immediate attention - -### 6. Supply Chain Security - -**Dependency Provenance** -- Verifies package signatures and checksums -- Analyzes package download sources and mirrors -- Identifies suspicious or compromised packages -- Tracks package ownership changes and maintainer shifts -- Detects typosquatting and malicious packages - -**Transitive Risk Analysis** -- Maps complete dependency trees -- Identifies high-risk transitive dependencies -- Analyzes dependency depth and complexity -- Tracks influence of indirect dependencies -- Provides supply chain risk scoring - -### 7. Lockfile Analysis & Deterministic Builds - -**Lockfile Validation** -- Ensures lockfiles are up-to-date with manifests -- Validates integrity hashes and version consistency -- Identifies drift between environments -- Analyzes lockfile conflicts and resolution strategies -- Ensures deterministic, reproducible builds - -**Environment Consistency** -- Compares dependencies across environments (dev/staging/prod) -- Identifies version mismatches between team members -- Validates CI/CD environment consistency -- Tracks dependency resolution differences - -## Technical Architecture - -### Scanner Engine (`dep_scanner.py`) -- Multi-format parser supporting 8+ package ecosystems -- Built-in vulnerability database with 500+ CVE patterns -- Transitive dependency resolution from lockfiles -- JSON and human-readable output formats -- Configurable scanning depth and exclusion patterns - -### License Analyzer (`license_checker.py`) -- License detection from package metadata and files -- Compatibility matrix with 20+ license types -- Conflict detection engine with remediation suggestions -- Risk scoring based on distribution and usage context -- Export capabilities for legal review - -### Upgrade Planner (`upgrade_planner.py`) -- Semantic version analysis with breaking change prediction -- Dependency ordering based on risk and interdependence -- Migration checklists with testing recommendations -- Rollback procedures for failed upgrades -- Timeline estimation for upgrade cycles - -## Use Cases & Applications - -### Security Teams -- **Vulnerability Management**: Continuous scanning for security issues -- **Incident Response**: Rapid assessment of vulnerable dependencies -- **Supply Chain Monitoring**: Tracking third-party security posture -- **Compliance Reporting**: Automated security compliance documentation - -### Legal & Compliance Teams -- **License Auditing**: Comprehensive license compliance verification -- **Risk Assessment**: Legal risk analysis for software distribution -- **Due Diligence**: Dependency licensing for M&A activities -- **Policy Enforcement**: Automated license policy compliance - -### Development Teams -- **Dependency Hygiene**: Regular cleanup of unused dependencies -- **Upgrade Planning**: Strategic dependency update scheduling -- **Performance Optimization**: Bundle size optimization through dep analysis -- **Technical Debt**: Identifying and prioritizing dependency technical debt - -### DevOps & Platform Teams -- **Build Optimization**: Faster builds through dependency optimization -- **Security Automation**: Automated vulnerability scanning in CI/CD -- **Environment Consistency**: Ensuring consistent dependencies across environments -- **Release Management**: Dependency-aware release planning - -## Integration Patterns - -### CI/CD Pipeline Integration -```bash -# Security gate in CI -python dep_scanner.py /project --format json --fail-on-high -python license_checker.py /project --policy strict --format json -``` - -### Scheduled Audits -```bash -# Weekly dependency audit -./audit_dependencies.sh > weekly_report.html -python upgrade_planner.py deps.json --timeline 30days -``` - -### Development Workflow -```bash -# Pre-commit dependency check -python dep_scanner.py . --quick-scan -python license_checker.py . --warn-conflicts -``` - -## Advanced Features - -### Custom Vulnerability Databases -- Support for internal/proprietary vulnerability feeds -- Custom CVE pattern definitions -- Organization-specific risk scoring -- Integration with enterprise security tools - -### Policy-Based Scanning -- Configurable license policies by project type -- Custom risk thresholds and escalation rules -- Automated policy enforcement and notifications -- Exception management for approved violations - -### Reporting & Dashboards -- Executive summaries for management -- Technical reports for development teams -- Trend analysis and dependency health metrics -- Integration with project management tools - -### Multi-Project Analysis -- Portfolio-level dependency analysis -- Shared dependency impact analysis -- Organization-wide license compliance -- Cross-project vulnerability propagation - -## Best Practices - -### Scanning Frequency -- **Security Scans**: Daily or on every commit -- **License Audits**: Weekly or monthly -- **Upgrade Planning**: Monthly or quarterly -- **Full Dependency Audit**: Quarterly - -### Risk Management -1. **Prioritize Security**: Address high/critical CVEs immediately -2. **License First**: Ensure compliance before functionality -3. **Gradual Updates**: Incremental dependency updates -4. **Test Thoroughly**: Comprehensive testing after updates -5. **Monitor Continuously**: Automated monitoring and alerting - -### Team Workflows -1. **Security Champions**: Designate dependency security owners -2. **Review Process**: Mandatory review for new dependencies -3. **Update Cycles**: Regular, scheduled dependency updates -4. **Documentation**: Maintain dependency rationale and decisions -5. **Training**: Regular team education on dependency security - -## Metrics & KPIs - -### Security Metrics -- Mean Time to Patch (MTTP) for vulnerabilities -- Number of high/critical vulnerabilities -- Percentage of dependencies with known vulnerabilities -- Security debt accumulation rate - -### Compliance Metrics -- License compliance percentage -- Number of license conflicts -- Time to resolve compliance issues -- Policy violation frequency - -### Maintenance Metrics -- Percentage of up-to-date dependencies -- Average dependency age -- Number of abandoned dependencies -- Upgrade success rate - -### Efficiency Metrics -- Bundle size reduction percentage -- Unused dependency elimination rate -- Build time improvement -- Developer productivity impact - -## Troubleshooting Guide - -### Common Issues -1. **False Positives**: Tuning vulnerability detection sensitivity -2. **License Ambiguity**: Resolving unclear or multiple licenses -3. **Breaking Changes**: Managing major version upgrades -4. **Performance Impact**: Optimizing scanning for large codebases - -### Resolution Strategies -- Whitelist false positives with documentation -- Contact maintainers for license clarification -- Implement feature flags for risky upgrades -- Use incremental scanning for large projects - -## Future Enhancements - -### Planned Features -- Machine learning for vulnerability prediction -- Automated dependency update pull requests -- Integration with container image scanning -- Real-time dependency monitoring dashboards -- Natural language policy definition - -### Ecosystem Expansion -- Additional language support (Swift, Kotlin, Dart) -- Container and infrastructure dependencies -- Development tool and build system dependencies -- Cloud service and SaaS dependency tracking - ---- - -## Quick Start - -```bash -# Scan project for vulnerabilities and licenses -python scripts/dep_scanner.py /path/to/project - -# Check license compliance -python scripts/license_checker.py /path/to/project --policy strict - -# Plan dependency upgrades -python scripts/upgrade_planner.py deps.json --risk-threshold medium -``` - -For detailed usage instructions, see [README.md](README.md). - ---- - -*This skill provides comprehensive dependency management capabilities essential for maintaining secure, compliant, and efficient software projects. Regular use helps teams stay ahead of security threats, maintain legal compliance, and optimize their dependency ecosystems.* diff --git a/packages/catalog/catalog/skills/eng-docker-development/SKILL.md b/packages/catalog/catalog/skills/eng-docker-development/SKILL.md deleted file mode 100644 index bd65d708..00000000 --- a/packages/catalog/catalog/skills/eng-docker-development/SKILL.md +++ /dev/null @@ -1,366 +0,0 @@ ---- -name: "docker-development" -description: "Docker and container development agent skill and plugin for Dockerfile optimization, docker-compose orchestration, multi-stage builds, and container security hardening. Use when: user wants to optimize a Dockerfile, create or improve docker-compose configurations, implement multi-stage builds, audit container security, reduce image size, or follow container best practices. Covers build performance, layer caching, secret management, and production-ready container patterns." -license: MIT -metadata: - version: 1.0.0 - author: Alireza Rezvani - category: engineering - updated: 2026-03-16 ---- - -# Docker Development - -> Smaller images. Faster builds. Secure containers. No guesswork. - -Opinionated Docker workflow that turns bloated Dockerfiles into production-grade containers. Covers optimization, multi-stage builds, compose orchestration, and security hardening. - -Not a Docker tutorial — a set of concrete decisions about how to build containers that don't waste time, space, or attack surface. - ---- - -## Slash Commands - -| Command | What it does | -|---------|-------------| -| `/docker:optimize` | Analyze and optimize a Dockerfile for size, speed, and layer caching | -| `/docker:compose` | Generate or improve docker-compose.yml with best practices | -| `/docker:security` | Audit a Dockerfile or running container for security issues | - ---- - -## When This Skill Activates - -Recognize these patterns from the user: - -- "Optimize this Dockerfile" -- "My Docker build is slow" -- "Create a docker-compose for this project" -- "Is this Dockerfile secure?" -- "Reduce my Docker image size" -- "Set up multi-stage builds" -- "Docker best practices for [language/framework]" -- Any request involving: Dockerfile, docker-compose, container, image size, build cache, Docker security - -If the user has a Dockerfile or wants to containerize something → this skill applies. - ---- - -## Workflow - -### `/docker:optimize` — Dockerfile Optimization - -1. **Analyze current state** - - Read the Dockerfile - - Identify base image and its size - - Count layers (each RUN/COPY/ADD = 1 layer) - - Check for common anti-patterns - -2. **Apply optimization checklist** - - ``` - BASE IMAGE - ├── Use specific tags, never :latest in production - ├── Prefer slim/alpine variants (debian-slim > ubuntu > debian) - ├── Pin digest for reproducibility in CI: image@sha256:... - └── Match base to runtime needs (don't use python:3.12 for a compiled binary) - - LAYER OPTIMIZATION - ├── Combine related RUN commands with && \ - ├── Order layers: least-changing first (deps before source code) - ├── Clean package manager cache in the same RUN layer - ├── Use .dockerignore to exclude unnecessary files - └── Separate build deps from runtime deps - - BUILD CACHE - ├── COPY dependency files before source code (package.json, requirements.txt, go.mod) - ├── Install deps in a separate layer from code copy - ├── Use BuildKit cache mounts: --mount=type=cache,target=/root/.cache - └── Avoid COPY . . before dependency installation - - MULTI-STAGE BUILDS - ├── Stage 1: build (full SDK, build tools, dev deps) - ├── Stage 2: runtime (minimal base, only production artifacts) - ├── COPY --from=builder only what's needed - └── Final image should have NO build tools, NO source code, NO dev deps - ``` - -3. **Generate optimized Dockerfile** - - Apply all relevant optimizations - - Add inline comments explaining each decision - - Report estimated size reduction - -4. **Validate** - ```bash - python3 scripts/dockerfile_analyzer.py Dockerfile - ``` - -### `/docker:compose` — Docker Compose Configuration - -1. **Identify services** - - Application (web, API, worker) - - Database (postgres, mysql, redis, mongo) - - Cache (redis, memcached) - - Queue (rabbitmq, kafka) - - Reverse proxy (nginx, traefik, caddy) - -2. **Apply compose best practices** - - ``` - SERVICES - ├── Use depends_on with condition: service_healthy - ├── Add healthchecks for every service - ├── Set resource limits (mem_limit, cpus) - ├── Use named volumes for persistent data - └── Pin image versions - - NETWORKING - ├── Create explicit networks (don't rely on default) - ├── Separate frontend and backend networks - ├── Only expose ports that need external access - └── Use internal: true for backend-only networks - - ENVIRONMENT - ├── Use env_file for secrets, not inline environment - ├── Never commit .env files (add to .gitignore) - ├── Use variable substitution: ${VAR:-default} - └── Document all required env vars - - DEVELOPMENT vs PRODUCTION - ├── Use compose profiles or override files - ├── Dev: bind mounts for hot reload, debug ports exposed - ├── Prod: named volumes, no debug ports, restart: unless-stopped - └── docker-compose.override.yml for dev-only config - ``` - -3. **Generate compose file** - - Output docker-compose.yml with healthchecks, networks, volumes - - Generate .env.example with all required variables documented - - Add dev/prod profile annotations - -### `/docker:security` — Container Security Audit - -1. **Dockerfile audit** - - | Check | Severity | Fix | - |-------|----------|-----| - | Running as root | Critical | Add `USER nonroot` after creating user | - | Using :latest tag | High | Pin to specific version | - | Secrets in ENV/ARG | Critical | Use BuildKit secrets: `--mount=type=secret` | - | COPY with broad glob | Medium | Use specific paths, add .dockerignore | - | Unnecessary EXPOSE | Low | Only expose ports the app uses | - | No HEALTHCHECK | Medium | Add HEALTHCHECK with appropriate interval | - | Privileged instructions | High | Avoid `--privileged`, drop capabilities | - | Package manager cache retained | Low | Clean in same RUN layer | - -2. **Runtime security checks** - - | Check | Severity | Fix | - |-------|----------|-----| - | Container running as root | Critical | Set user in Dockerfile or compose | - | Writable root filesystem | Medium | Use `read_only: true` in compose | - | All capabilities retained | High | Drop all, add only needed: `cap_drop: [ALL]` | - | No resource limits | Medium | Set `mem_limit` and `cpus` | - | Host network mode | High | Use bridge or custom network | - | Sensitive mounts | Critical | Never mount /etc, /var/run/docker.sock in prod | - | No log driver configured | Low | Set `logging:` with size limits | - -3. **Generate security report** - ``` - SECURITY AUDIT — [Dockerfile/Image name] - Date: [timestamp] - - CRITICAL: [count] - HIGH: [count] - MEDIUM: [count] - LOW: [count] - - [Detailed findings with fix recommendations] - ``` - ---- - -## Tooling - -### `scripts/dockerfile_analyzer.py` - -CLI utility for static analysis of Dockerfiles. - -**Features:** -- Layer count and optimization suggestions -- Base image analysis with size estimates -- Anti-pattern detection (15+ rules) -- Security issue flagging -- Multi-stage build detection and validation -- JSON and text output - -**Usage:** -```bash -# Analyze a Dockerfile -python3 scripts/dockerfile_analyzer.py Dockerfile - -# JSON output -python3 scripts/dockerfile_analyzer.py Dockerfile --output json - -# Analyze with security focus -python3 scripts/dockerfile_analyzer.py Dockerfile --security - -# Check a specific directory -python3 scripts/dockerfile_analyzer.py path/to/Dockerfile -``` - -### `scripts/compose_validator.py` - -CLI utility for validating docker-compose files. - -**Features:** -- Service dependency validation -- Healthcheck presence detection -- Network configuration analysis -- Volume mount validation -- Environment variable audit -- Port conflict detection -- Best practice scoring - -**Usage:** -```bash -# Validate a compose file -python3 scripts/compose_validator.py docker-compose.yml - -# JSON output -python3 scripts/compose_validator.py docker-compose.yml --output json - -# Strict mode (fail on warnings) -python3 scripts/compose_validator.py docker-compose.yml --strict -``` - ---- - -## Multi-Stage Build Patterns - -### Pattern 1: Compiled Language (Go, Rust, C++) - -```dockerfile -# Build stage -FROM golang:1.22-alpine AS builder -WORKDIR /app -COPY go.mod go.sum ./ -RUN go mod download -COPY . . -RUN CGO_ENABLED=0 go build -ldflags="-s -w" -o /app/server ./cmd/server - -# Runtime stage -FROM gcr.io/distroless/static-debian12 -COPY --from=builder /app/server /server -USER nonroot:nonroot -ENTRYPOINT ["/server"] -``` - -### Pattern 2: Node.js / TypeScript - -```dockerfile -# Dependencies stage -FROM node:20-alpine AS deps -WORKDIR /app -COPY package.json package-lock.json ./ -RUN npm ci --production=false - -# Build stage -FROM deps AS builder -COPY . . -RUN npm run build - -# Runtime stage -FROM node:20-alpine -WORKDIR /app -RUN addgroup -g 1001 -S appgroup && adduser -S appuser -u 1001 -COPY --from=builder /app/dist ./dist -COPY --from=deps /app/node_modules ./node_modules -COPY package.json ./ -USER appuser -EXPOSE 3000 -CMD ["node", "dist/index.js"] -``` - -### Pattern 3: Python - -```dockerfile -# Build stage -FROM python:3.12-slim AS builder -WORKDIR /app -COPY requirements.txt . -RUN pip install --no-cache-dir --prefix=/install -r requirements.txt - -# Runtime stage -FROM python:3.12-slim -WORKDIR /app -RUN groupadd -r appgroup && useradd -r -g appgroup appuser -COPY --from=builder /install /usr/local -COPY . . -USER appuser -EXPOSE 8000 -CMD ["python", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] -``` - ---- - -## Base Image Decision Tree - -``` -Is it a compiled binary (Go, Rust, C)? -├── Yes → distroless/static or scratch -└── No - ├── Need a shell for debugging? - │ ├── Yes → alpine variant (e.g., node:20-alpine) - │ └── No → distroless variant - ├── Need glibc (not musl)? - │ ├── Yes → slim variant (e.g., python:3.12-slim) - │ └── No → alpine variant - └── Need specific OS packages? - ├── Many → debian-slim - └── Few → alpine + apk add -``` - ---- - -## Proactive Triggers - -Flag these without being asked: - -- **Dockerfile uses :latest** → Suggest pinning to a specific version tag. -- **No .dockerignore** → Create one. At minimum: `.git`, `node_modules`, `__pycache__`, `.env`. -- **COPY . . before dependency install** → Cache bust. Reorder to install deps first. -- **Running as root** → Add USER instruction. No exceptions for production. -- **Secrets in ENV or ARG** → Use BuildKit secret mounts. Never bake secrets into layers. -- **Image over 1GB** → Multi-stage build required. No reason for a production image this large. -- **No healthcheck** → Add one. Orchestrators (Compose, K8s) need it for proper lifecycle management. -- **apt-get without cleanup in same layer** → `rm -rf /var/lib/apt/lists/*` in the same RUN. - ---- - -## Installation - -### One-liner (any tool) -```bash -git clone https://github.com/alirezarezvani/claude-skills.git -cp -r claude-skills/engineering/docker-development ~/.claude/skills/ -``` - -### Multi-tool install -```bash -./scripts/convert.sh --skill docker-development --tool codex|gemini|cursor|windsurf|openclaw -``` - -### OpenClaw -```bash -clawhub install cs-docker-development -``` - ---- - -## Related Skills - -- **senior-devops** — Broader DevOps scope (CI/CD, IaC, monitoring). Complementary — use docker-development for container-specific work, senior-devops for pipeline and infrastructure. -- **senior-security** — Application security. Complementary — docker-development covers container security, senior-security covers application-level threats. -- **autoresearch-agent** — Can optimize Docker build times or image sizes as measurable experiments. -- **ci-cd-pipeline-builder** — Pipeline construction. Complementary — docker-development builds the containers, ci-cd-pipeline-builder deploys them. diff --git a/packages/catalog/catalog/skills/eng-env-secrets-manager/SKILL.md b/packages/catalog/catalog/skills/eng-env-secrets-manager/SKILL.md deleted file mode 100644 index 21217a4e..00000000 --- a/packages/catalog/catalog/skills/eng-env-secrets-manager/SKILL.md +++ /dev/null @@ -1,260 +0,0 @@ ---- -name: "env-secrets-manager" -description: "Env & Secrets Manager" ---- - -# Env & Secrets Manager - -**Tier:** POWERFUL -**Category:** Engineering -**Domain:** Security / DevOps / Configuration Management - ---- - -## Overview - -Manage environment-variable hygiene and secrets safety across local development and production workflows. This skill focuses on practical auditing, drift awareness, and rotation readiness. - -## Core Capabilities - -- `.env` and `.env.example` lifecycle guidance -- Secret leak detection for repository working trees -- Severity-based findings for likely credentials -- Operational pointers for rotation and containment -- Integration-ready outputs for CI checks - ---- - -## When to Use - -- Before pushing commits that touched env/config files -- During security audits and incident triage -- When onboarding contributors who need safe env conventions -- When validating that no obvious secrets are hardcoded - ---- - -## Quick Start - -```bash -# Scan a repository for likely secret leaks -python3 scripts/env_auditor.py /path/to/repo - -# JSON output for CI pipelines -python3 scripts/env_auditor.py /path/to/repo --json -``` - ---- - -## Recommended Workflow - -1. Run `scripts/env_auditor.py` on the repository root. -2. Prioritize `critical` and `high` findings first. -3. Rotate real credentials and remove exposed values. -4. Update `.env.example` and `.gitignore` as needed. -5. Add or tighten pre-commit/CI secret scanning gates. - ---- - -## Reference Docs - -- `references/validation-detection-rotation.md` -- `references/secret-patterns.md` - ---- - -## Common Pitfalls - -- Committing real values in `.env.example` -- Rotating one system but missing downstream consumers -- Logging secrets during debugging or incident response -- Treating suspected leaks as low urgency without validation - -## Best Practices - -1. Use a secret manager as the production source of truth. -2. Keep dev env files local and gitignored. -3. Enforce detection in CI before merge. -4. Re-test application paths immediately after credential rotation. - ---- - -## Cloud Secret Store Integration - -Production applications should never read secrets from `.env` files or environment variables baked into container images. Use a dedicated secret store instead. - -### Provider Comparison - -| Provider | Best For | Key Feature | -|----------|----------|-------------| -| **HashiCorp Vault** | Multi-cloud / hybrid | Dynamic secrets, policy engine, pluggable backends | -| **AWS Secrets Manager** | AWS-native workloads | Native Lambda/ECS/EKS integration, automatic RDS rotation | -| **Azure Key Vault** | Azure-native workloads | Managed HSM, Azure AD RBAC, certificate management | -| **GCP Secret Manager** | GCP-native workloads | IAM-based access, automatic replication, versioning | - -### Selection Guidance - -- **Single cloud provider** — use the cloud-native secret manager. It integrates tightly with IAM, reduces operational overhead, and costs less than self-hosting. -- **Multi-cloud or hybrid** — use HashiCorp Vault. It provides a uniform API across environments and supports dynamic secret generation (database credentials, cloud IAM keys) that expire automatically. -- **Kubernetes-heavy** — combine External Secrets Operator with any backend above to sync secrets into K8s `Secret` objects without hardcoding. - -### Application Access Patterns - -1. **SDK/API pull** — application fetches secret at startup or on-demand via provider SDK. -2. **Sidecar injection** — a sidecar container (e.g., Vault Agent) writes secrets to a shared volume or injects them as environment variables. -3. **Init container** — a Kubernetes init container fetches secrets before the main container starts. -4. **CSI driver** — secrets mount as a filesystem volume via the Secrets Store CSI Driver. - -> **Cross-reference:** See `engineering/secrets-vault-manager` for production vault infrastructure patterns, HA deployment, and disaster recovery procedures. - ---- - -## Secret Rotation Workflow - -Stale secrets are a liability. Rotation ensures that even if a credential leaks, its useful lifetime is bounded. - -### Phase 1: Detection - -- Track secret creation and expiry dates in your secret store metadata. -- Set alerts at 30, 14, and 7 days before expiry. -- Use `scripts/env_auditor.py` to flag secrets with no recorded rotation date. - -### Phase 2: Rotation - -1. **Generate** a new credential (API key, database password, certificate). -2. **Deploy** the new credential to all consumers (apps, services, pipelines) in parallel. -3. **Verify** each consumer can authenticate using the new credential. -4. **Revoke** the old credential only after all consumers are confirmed healthy. -5. **Update** metadata with the new rotation timestamp and next rotation date. - -### Phase 3: Automation - -- **AWS Secrets Manager** — use built-in Lambda-based rotation for RDS, Redshift, and DocumentDB. -- **HashiCorp Vault** — configure dynamic secrets with TTLs; credentials are generated on-demand and auto-expire. -- **Azure Key Vault** — use Event Grid notifications to trigger rotation functions. -- **GCP Secret Manager** — use Pub/Sub notifications tied to Cloud Functions for rotation logic. - -### Emergency Rotation Checklist - -When a secret is confirmed leaked: - -1. **Immediately revoke** the compromised credential at the provider level. -2. Generate and deploy a replacement credential to all consumers. -3. Audit access logs for unauthorized usage during the exposure window. -4. Scan git history, CI logs, and artifact registries for the leaked value. -5. File an incident report documenting scope, timeline, and remediation steps. -6. Review and tighten detection controls to prevent recurrence. - ---- - -## CI/CD Secret Injection - -Secrets in CI/CD pipelines require careful handling to avoid exposure in logs, artifacts, or pull request contexts. - -### GitHub Actions - -- Use **repository secrets** or **environment secrets** via `${{ secrets.SECRET_NAME }}`. -- Prefer **OIDC federation** (`aws-actions/configure-aws-credentials` with `role-to-assume`) over long-lived access keys. -- Environment secrets with required reviewers add approval gates for production deployments. -- GitHub automatically masks secrets in logs, but avoid `echo` or `toJSON()` on secret values. - -### GitLab CI - -- Store secrets as **CI/CD variables** with the `masked` and `protected` flags enabled. -- Use **HashiCorp Vault integration** (`secrets:vault`) for dynamic secret injection without storing values in GitLab. -- Scope variables to specific environments (`production`, `staging`) to enforce least privilege. - -### Universal Patterns - -- **Never echo or print** secret values in pipeline output, even for debugging. -- **Use short-lived tokens** (OIDC, STS AssumeRole) instead of static credentials wherever possible. -- **Restrict PR access** — do not expose secrets to pipelines triggered by forks or untrusted branches. -- **Rotate CI secrets** on the same schedule as application secrets; pipeline credentials are attack vectors too. -- **Audit pipeline logs** periodically for accidental secret exposure that masking may have missed. - ---- - -## Pre-Commit Secret Detection - -Catching secrets before they reach version control is the most cost-effective defense. Two leading tools cover this space. - -### gitleaks - -```toml -# .gitleaks.toml — minimal configuration -[extend] -useDefault = true - -[[rules]] -id = "custom-internal-token" -description = "Internal service token pattern" -regex = '''INTERNAL_TOKEN_[A-Za-z0-9]{32}''' -secretGroup = 0 -``` - -- Install: `brew install gitleaks` or download from GitHub releases. -- Pre-commit hook: `gitleaks git --pre-commit --staged` -- Baseline scanning: `gitleaks detect --source . --report-path gitleaks-report.json` -- Manage false positives in `.gitleaksignore` (one fingerprint per line). - -### detect-secrets - -```bash -# Generate baseline -detect-secrets scan --all-files > .secrets.baseline - -# Pre-commit hook (via pre-commit framework) -# .pre-commit-config.yaml -repos: - - repo: https://github.com/Yelp/detect-secrets - rev: v1.5.0 - hooks: - - id: detect-secrets - args: ['--baseline', '.secrets.baseline'] -``` - -- Supports **custom plugins** for organization-specific patterns. -- Audit workflow: `detect-secrets audit .secrets.baseline` interactively marks true/false positives. - -### False Positive Management - -- Maintain `.gitleaksignore` or `.secrets.baseline` in version control so the whole team shares exclusions. -- Review false positive lists during security audits — patterns may mask real leaks over time. -- Prefer tightening regex patterns over broadly ignoring files. - ---- - -## Audit Logging - -Knowing who accessed which secret and when is critical for incident investigation and compliance. - -### Cloud-Native Audit Trails - -| Provider | Service | What It Captures | -|----------|---------|-----------------| -| **AWS** | CloudTrail | Every `GetSecretValue`, `DescribeSecret`, `RotateSecret` API call | -| **Azure** | Activity Log + Diagnostic Logs | Key Vault access events, including caller identity and IP | -| **GCP** | Cloud Audit Logs | Data access logs for Secret Manager with principal and timestamp | -| **Vault** | Audit Backend | Full request/response logging (file, syslog, or socket backend) | - -### Alerting Strategy - -- Alert on **access from unknown IP ranges** or service accounts outside the expected set. -- Alert on **bulk secret reads** (more than N secrets accessed within a time window). -- Alert on **access outside deployment windows** when no CI/CD pipeline is running. -- Feed audit logs into your SIEM (Splunk, Datadog, Elastic) for correlation with other security events. -- Review audit logs quarterly as part of access recertification. - ---- - -## Cross-References - -This skill covers env hygiene and secret detection. For deeper coverage of related domains, see: - -| Skill | Path | Relationship | -|-------|------|-------------| -| **Secrets Vault Manager** | `engineering/secrets-vault-manager` | Production vault infrastructure, HA deployment, DR | -| **Senior SecOps** | `engineering/senior-secops` | Security operations perspective, incident response | -| **CI/CD Pipeline Builder** | `engineering/ci-cd-pipeline-builder` | Pipeline architecture, secret injection patterns | -| **Infrastructure as Code** | `engineering/infrastructure-as-code` | Terraform/Pulumi secret backend configuration | -| **Container Orchestration** | `engineering/container-orchestration` | Kubernetes secret mounting, sealed secrets | diff --git a/packages/catalog/catalog/skills/eng-focused-fix/SKILL.md b/packages/catalog/catalog/skills/eng-focused-fix/SKILL.md deleted file mode 100644 index b936b5ab..00000000 --- a/packages/catalog/catalog/skills/eng-focused-fix/SKILL.md +++ /dev/null @@ -1,318 +0,0 @@ ---- -name: "focused-fix" -description: "Use when the user asks to fix, debug, or make a specific feature/module/area work end-to-end. Triggers: 'make X work', 'fix the Y feature', 'the Z module is broken', 'focus on [area]'. Not for quick single-bug fixes — this is for systematic deep-dive repair across all files and dependencies." ---- - -# Focused Fix — Deep-Dive Feature Repair - -## When to Use - -Activate when the user asks to fix, debug, or make a specific feature/module/area work. Key triggers: -- "make X work" -- "fix the Y feature" -- "the Z module is broken" -- "focus on [area]" -- "this feature needs to work properly" - -This is NOT for quick single-bug fixes (use systematic-debugging for that). This is for when an entire feature or module needs systematic repair — tracing every dependency, reading logs, checking tests, mapping the full dependency graph. - -```dot -digraph when_to_use { - "User reports feature broken" [shape=diamond]; - "Single bug or symptom?" [shape=diamond]; - "Use systematic-debugging" [shape=box]; - "Entire feature/module needs repair?" [shape=diamond]; - "Use focused-fix" [shape=box]; - "Something else" [shape=box]; - - "User reports feature broken" -> "Single bug or symptom?"; - "Single bug or symptom?" -> "Use systematic-debugging" [label="yes"]; - "Single bug or symptom?" -> "Entire feature/module needs repair?" [label="no"]; - "Entire feature/module needs repair?" -> "Use focused-fix" [label="yes"]; - "Entire feature/module needs repair?" -> "Something else" [label="no"]; -} -``` - -## The Iron Law - -``` -NO FIXES WITHOUT COMPLETING SCOPE → TRACE → DIAGNOSE FIRST -``` - -If you haven't finished Phase 3, you cannot propose fixes. Period. - -**Violating the letter of these phases is violating the spirit of focused repair.** - -## Protocol — STRICTLY follow these 5 phases IN ORDER - -```dot -digraph phases { - rankdir=LR; - SCOPE [shape=box, label="Phase 1\nSCOPE"]; - TRACE [shape=box, label="Phase 2\nTRACE"]; - DIAGNOSE [shape=box, label="Phase 3\nDIAGNOSE"]; - FIX [shape=box, label="Phase 4\nFIX"]; - VERIFY [shape=box, label="Phase 5\nVERIFY"]; - - SCOPE -> TRACE -> DIAGNOSE -> FIX -> VERIFY; - FIX -> DIAGNOSE [label="fix broke\nsomething else"]; - FIX -> ESCALATE [label="3+ fixes\ncreate new issues"]; - ESCALATE [shape=doubleoctagon, label="STOP\nQuestion Architecture\nDiscuss with User"]; -} -``` - -### Phase 1: SCOPE — Map the Feature Boundary - -Before touching any code, understand the full scope of the feature. - -1. Ask the user: "Which feature/folder should I focus on?" if not already clear -2. Identify the PRIMARY folder/files for this feature -3. Map EVERY file in that folder — read each one, understand its purpose -4. Create a feature manifest: - -``` -FEATURE SCOPE: - Primary path: src/features/auth/ - Entry points: [files that are imported by other parts of the app] - Internal files: [files only used within this feature] - Total files: N - Total lines: N -``` - -### Phase 2: TRACE — Map All Dependencies (Inside AND Outside) - -Trace every connection this feature has to the rest of the codebase. - -**INBOUND (what this feature imports):** -1. For every import statement in every file in the feature folder: - - Trace it to its source - - Verify the source file exists - - Verify the imported entity (function, type, component) exists and is exported - - Check if the types/signatures match what the feature expects -2. Check for: - - Environment variables used (grep for process.env, import.meta.env, os.environ, etc.) - - Config files referenced - - Database models/schemas used - - API endpoints called - - Third-party packages imported - -**OUTBOUND (what imports this feature):** -1. Search the entire codebase for imports from this feature folder -2. For each consumer: - - Verify they're importing entities that actually exist - - Check if they're using the correct API/interface - - Note if any consumers are using deprecated patterns - -Output format: -``` -DEPENDENCY MAP: - Inbound (this feature depends on): - src/lib/db.ts → used in auth/repository.ts (getUserById, createUser) - src/lib/jwt.ts → used in auth/service.ts (signToken, verifyToken) - @prisma/client → used in auth/repository.ts - process.env.JWT_SECRET → used in auth/service.ts - process.env.DATABASE_URL → used via prisma - - Outbound (depends on this feature): - src/app/api/login/route.ts → imports { login } from auth/service - src/app/api/register/route.ts → imports { register } from auth/service - src/middleware.ts → imports { verifyToken } from auth/service - - Env vars required: JWT_SECRET, DATABASE_URL - Config files: prisma/schema.prisma (User model) -``` - -### Phase 3: DIAGNOSE — Find Every Issue - -Systematically check for problems. Run ALL of these checks: - -**CODE QUALITY:** -- [ ] Every import resolves to a real file/export -- [ ] No circular dependencies within the feature -- [ ] Types are consistent across boundaries (no `any` at interfaces) -- [ ] Error handling exists for all async operations -- [ ] No TODO/FIXME/HACK comments indicating known issues - -**RUNTIME:** -- [ ] All required environment variables are set (check .env) -- [ ] Database migrations are up to date (if applicable) -- [ ] API endpoints return expected shapes -- [ ] No hardcoded values that should be configurable - -**TESTS:** -- [ ] Run ALL tests related to this feature: find them by searching for imports from the feature folder -- [ ] Record every failure with full error output -- [ ] Check test coverage — are there untested code paths? - -**LOGS & ERRORS:** -- [ ] Search for any log files, error reports, or Sentry-style error tracking -- [ ] Check git log for recent changes to this feature: `git log --oneline -20 -- ` -- [ ] Check if any recent commits might have broken something: `git log --oneline -5 --all -- ` - -**CONFIGURATION:** -- [ ] Verify all config files this feature depends on are valid -- [ ] Check for mismatches between development and production configs -- [ ] Verify third-party service credentials are valid (if testable) - -**ROOT-CAUSE CONFIRMATION:** -For each CRITICAL issue found, confirm root cause before adding it to the fix list: -- State clearly: "I think X is the root cause because Y" -- Trace the data/control flow backward to verify — don't trust surface-level symptoms -- If the issue spans multiple components, add diagnostic logging at each boundary to identify which layer fails -- **REQUIRED SUB-SKILL:** For complex bugs found during diagnosis, apply `superpowers:systematic-debugging` Phase 1 (Root Cause Investigation) to confirm before proceeding - -**RISK LABELING:** -Assign each issue a risk label: - -| Risk | Criteria | -|---|---| -| HIGH | Public API surface / breaking interface contract / DB schema / auth or security logic / widely imported module (>3 callers) / git hotspot | -| MED | Internal module with tests / shared utility / config with runtime impact / internal callers of changed functions | -| LOW | Leaf module / isolated file / test-only change / single-purpose helper with no callers | - -Output format: -``` -DIAGNOSIS REPORT: - Issues found: N - - CRITICAL: - 1. [HIGH] [file:line] — description of issue. Root cause: [confirmed explanation] - 2. [HIGH] [file:line] — description of issue. Root cause: [confirmed explanation] - - WARNINGS: - 1. [MED] [file:line] — description of issue - 2. [LOW] [file:line] — description of issue - - TESTS: - Ran: N tests - Passed: N - Failed: N - [list each failure with one-line summary] -``` - -### Phase 4: FIX — Repair Everything Systematically - -Fix issues in this EXACT order: - -1. **DEPENDENCIES FIRST** — fix broken imports, missing packages, wrong versions -2. **TYPES SECOND** — fix type mismatches at feature boundaries -3. **LOGIC THIRD** — fix actual business logic bugs -4. **TESTS FOURTH** — fix or create tests for each fix -5. **INTEGRATION LAST** — verify the feature works end-to-end with its consumers - -Rules: -- Fix ONE issue at a time -- After each fix, run the related test to confirm it works -- If a fix breaks something else, STOP and re-evaluate (go back to DIAGNOSE) -- Keep a running log of every change made -- Never change code outside the feature folder without explicitly stating why -- Fix HIGH-risk issues before MED, MED before LOW - -**ESCALATION RULE — 3-Strike Architecture Check:** -If 3+ fixes in this phase create NEW issues (not pre-existing ones), STOP immediately. - -This pattern indicates an architectural problem, not a bug collection: -- Each fix reveals new shared state / coupling / problem in a different place -- Fixes require "massive refactoring" to implement -- Each fix creates new symptoms elsewhere - -**Action:** Stop fixing. Tell the user: "3+ fixes have cascaded into new issues. This suggests the feature's architecture may need rethinking, not patching. Here's what I've found: [summary]. Should we continue fixing symptoms or discuss restructuring?" - -Do NOT attempt fix #4 without this discussion. - -Output after each fix: -``` -FIX #1: - File: auth/service.ts:45 - Issue: signToken called with wrong argument order - Change: swapped (expiresIn, payload) to (payload, expiresIn) - Test: auth.test.ts → PASSES -``` - -### Phase 5: VERIFY — Confirm Everything Works - -After all fixes are applied: - -1. Run ALL tests in the feature folder — every single one must pass -2. Run ALL tests in files that IMPORT from this feature — must pass -3. Run the full test suite if available — check for regressions -4. If the feature has a UI, describe how to manually verify it -5. Summarize all changes made - -Final output: -``` -FOCUSED FIX COMPLETE: - Feature: auth - Files changed: 4 - Total fixes: 7 - Tests: 23/23 passing - Regressions: 0 - - Changes: - 1. auth/service.ts — fixed token signing argument order - 2. auth/repository.ts — added null check for user lookup - 3. auth/middleware.ts — fixed async error handling - 4. auth/types.ts — aligned UserResponse type with actual DB schema - - Consumers verified: - - src/app/api/login/route.ts ✅ - - src/app/api/register/route.ts ✅ - - src/middleware.ts ✅ -``` - -## Red Flags — STOP and Return to Current Phase - -If you catch yourself thinking any of these, you are skipping phases: - -- "I can see the bug, let me just fix it" → STOP. You haven't traced dependencies yet. -- "Scoping is overkill, it's obviously just this file" → STOP. That's always wrong for feature-level fixes. -- "I'll map dependencies after I fix the obvious stuff" → STOP. You'll miss root causes. -- "The user said fix X, so I only need to look at X" → STOP. Features have dependencies. -- "Tests are passing so I'm done" → STOP. Did you run consumer tests too? -- "I don't need to check env vars for this" → STOP. Config issues masquerade as code bugs. -- "One more fix should do it" (after 2+ cascading failures) → STOP. Escalate. -- "I'll skip the diagnosis report, the fixes are obvious" → STOP. Write it down. - -**ALL of these mean: Return to the phase you're supposed to be in.** - -## Common Rationalizations - -| Excuse | Reality | -|---|---| -| "The feature is small, I don't need all 5 phases" | Small features have dependencies too. Phases 1-2 take minutes for small features — do them. | -| "I already know this codebase" | Knowledge decays. Trace the actual imports, don't rely on memory. | -| "The user wants speed, not process" | Skipping phases causes rework. Systematic is faster than thrashing. | -| "Only one file is broken" | If only one file were broken, the user would say "fix this bug", not "make the feature work." | -| "I fixed the tests, so it works" | Tests can pass while consumers are broken. Verify Phase 5 fully. | -| "The dependency map is too big to trace" | Then the feature is too big to fix without tracing. That's exactly why you need it. | -| "Root cause is obvious, I don't need to confirm" | "Obvious" root causes are wrong 40% of the time. Confirm with evidence. | -| "3 cascading failures is normal for a big fix" | 3 cascading failures means you're patching symptoms of an architectural problem. | - -## Anti-Patterns — NEVER do these - -| Anti-Pattern | Why It's Wrong | -|---|---| -| Starting to fix code before mapping all dependencies | You'll miss root causes and create whack-a-mole fixes | -| Fixing only the file the user mentioned | Related files likely have issues too | -| Ignoring environment variables and configuration | Many "code bugs" are actually config issues | -| Skipping the test run phase | You can't verify fixes without running tests | -| Making changes outside the feature folder without explaining why | Unexpected side effects confuse the user | -| Fixing symptoms in consumer files instead of root cause in feature | Band-aids that break when the next consumer appears | -| Declaring "done" without running verification tests | Untested fixes are unverified fixes | -| Changing the public API without updating all consumers | Breaks everything that depends on the feature | - -## Related Skills - -- **`superpowers:systematic-debugging`** — Use within Phase 3 for root-cause tracing of individual complex bugs -- **`superpowers:verification-before-completion`** — Use within Phase 5 before claiming the feature is fixed -- **`scope`** — If you need to understand blast radius before starting, run scope first then focused-fix - -## Quick Reference - -| Phase | Key Action | Output | -|---|---|---| -| SCOPE | Read every file, map entry points | Feature manifest | -| TRACE | Map inbound + outbound dependencies | Dependency map | -| DIAGNOSE | Check code, runtime, tests, logs, config | Diagnosis report | -| FIX | Fix in order: deps → types → logic → tests → integration | Fix log per issue | -| VERIFY | Run all tests, check consumers, summarize | Completion report | diff --git a/packages/catalog/catalog/skills/eng-git-worktree-manager/SKILL.md b/packages/catalog/catalog/skills/eng-git-worktree-manager/SKILL.md deleted file mode 100644 index 6e6815a9..00000000 --- a/packages/catalog/catalog/skills/eng-git-worktree-manager/SKILL.md +++ /dev/null @@ -1,193 +0,0 @@ ---- -name: "git-worktree-manager" -description: "Git Worktree Manager" ---- - -# Git Worktree Manager - -**Tier:** POWERFUL -**Category:** Engineering -**Domain:** Parallel Development & Branch Isolation - -## Overview - -Use this skill to run parallel feature work safely with Git worktrees. It standardizes branch isolation, port allocation, environment sync, and cleanup so each worktree behaves like an independent local app without stepping on another branch. - -This skill is optimized for multi-agent workflows where each agent or terminal session owns one worktree. - -## Core Capabilities - -- Create worktrees from new or existing branches with deterministic naming -- Auto-allocate non-conflicting ports per worktree and persist assignments -- Copy local environment files (`.env*`) from main repo to new worktree -- Optionally install dependencies based on lockfile detection -- Detect stale worktrees and uncommitted changes before cleanup -- Identify merged branches and safely remove outdated worktrees - -## When to Use - -- You need 2+ concurrent branches open locally -- You want isolated dev servers for feature, hotfix, and PR validation -- You are working with multiple agents that must not share a branch -- Your current branch is blocked but you need to ship a quick fix now -- You want repeatable cleanup instead of ad-hoc `rm -rf` operations - -## Key Workflows - -### 1. Create a Fully-Prepared Worktree - -1. Pick a branch name and worktree name. -2. Run the manager script (creates branch if missing). -3. Review generated port map. -4. Start app using allocated ports. - -```bash -python scripts/worktree_manager.py \ - --repo . \ - --branch feature/new-auth \ - --name wt-auth \ - --base-branch main \ - --install-deps \ - --format text -``` - -If you use JSON automation input: - -```bash -cat config.json | python scripts/worktree_manager.py --format json -# or -python scripts/worktree_manager.py --input config.json --format json -``` - -### 2. Run Parallel Sessions - -Recommended convention: - -- Main repo: integration branch (`main`/`develop`) on default port -- Worktree A: feature branch + offset ports -- Worktree B: hotfix branch + next offset - -Each worktree contains `.worktree-ports.json` with assigned ports. - -### 3. Cleanup with Safety Checks - -1. Scan all worktrees and stale age. -2. Inspect dirty trees and branch merge status. -3. Remove only merged + clean worktrees, or force explicitly. - -```bash -python scripts/worktree_cleanup.py --repo . --stale-days 14 --format text -python scripts/worktree_cleanup.py --repo . --remove-merged --format text -``` - -### 4. Docker Compose Pattern - -Use per-worktree override files mapped from allocated ports. The script outputs a deterministic port map; apply it to `docker-compose.worktree.yml`. - -See [docker-compose-patterns.md](references/docker-compose-patterns.md) for concrete templates. - -### 5. Port Allocation Strategy - -Default strategy is `base + (index * stride)` with collision checks: - -- App: `3000` -- Postgres: `5432` -- Redis: `6379` -- Stride: `10` - -See [port-allocation-strategy.md](references/port-allocation-strategy.md) for the full strategy and edge cases. - -## Script Interfaces - -- `python scripts/worktree_manager.py --help` - - Create/list worktrees - - Allocate/persist ports - - Copy `.env*` files - - Optional dependency installation -- `python scripts/worktree_cleanup.py --help` - - Stale detection by age - - Dirty-state detection - - Merged-branch detection - - Optional safe removal - -Both tools support stdin JSON and `--input` file mode for automation pipelines. - -## Common Pitfalls - -1. Creating worktrees inside the main repo directory -2. Reusing `localhost:3000` across all branches -3. Sharing one database URL across isolated feature branches -4. Removing a worktree with uncommitted changes -5. Forgetting to prune old metadata after branch deletion -6. Assuming merged status without checking against the target branch - -## Best Practices - -1. One branch per worktree, one agent per worktree. -2. Keep worktrees short-lived; remove after merge. -3. Use a deterministic naming pattern (`wt-`). -4. Persist port mappings in file, not memory or terminal notes. -5. Run cleanup scan weekly in active repos. -6. Use `--format json` for machine flows and `--format text` for human review. -7. Never force-remove dirty worktrees unless changes are intentionally discarded. - -## Validation Checklist - -Before claiming setup complete: - -1. `git worktree list` shows expected path + branch. -2. `.worktree-ports.json` exists and contains unique ports. -3. `.env` files copied successfully (if present in source repo). -4. Dependency install command exits with code `0` (if enabled). -5. Cleanup scan reports no unintended stale dirty trees. - -## References - -- [port-allocation-strategy.md](references/port-allocation-strategy.md) -- [docker-compose-patterns.md](references/docker-compose-patterns.md) -- [README.md](README.md) for quick start and installation details - -## Decision Matrix - -Use this quick selector before creating a new worktree: - -- Need isolated dependencies and server ports -> create a new worktree -- Need only a quick local diff review -> stay on current tree -- Need hotfix while feature branch is dirty -> create dedicated hotfix worktree -- Need ephemeral reproduction branch for bug triage -> create temporary worktree and cleanup same day - -## Operational Checklist - -### Before Creation - -1. Confirm main repo has clean baseline or intentional WIP commits. -2. Confirm target branch naming convention. -3. Confirm required base branch exists (`main`/`develop`). -4. Confirm no reserved local ports are already occupied by non-repo services. - -### After Creation - -1. Verify `git status` branch matches expected branch. -2. Verify `.worktree-ports.json` exists. -3. Verify app boots on allocated app port. -4. Verify DB and cache endpoints target isolated ports. - -### Before Removal - -1. Verify branch has upstream and is merged when intended. -2. Verify no uncommitted files remain. -3. Verify no running containers/processes depend on this worktree path. - -## CI and Team Integration - -- Use worktree path naming that maps to task ID (`wt-1234-auth`). -- Include the worktree path in terminal title to avoid wrong-window commits. -- In automated setups, persist creation metadata in CI artifacts/logs. -- Trigger cleanup report in scheduled jobs and post summary to team channel. - -## Failure Recovery - -- If `git worktree add` fails due to existing path: inspect path, do not overwrite. -- If dependency install fails: keep worktree created, mark status and continue manual recovery. -- If env copy fails: continue with warning and explicit missing file list. -- If port allocation collides with external service: rerun with adjusted base ports. diff --git a/packages/catalog/catalog/skills/frontend-design/SKILL.md b/packages/catalog/catalog/skills/frontend-design/SKILL.md deleted file mode 100644 index 5be498e2..00000000 --- a/packages/catalog/catalog/skills/frontend-design/SKILL.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -name: frontend-design -description: Create distinctive, production-grade frontend interfaces with high design quality. Use this skill when the user asks to build web components, pages, artifacts, posters, or applications (examples include websites, landing pages, dashboards, React components, HTML/CSS layouts, or when styling/beautifying any web UI). Generates creative, polished code and UI design that avoids generic AI aesthetics. -license: Complete terms in LICENSE.txt ---- - -This skill guides creation of distinctive, production-grade frontend interfaces that avoid generic "AI slop" aesthetics. Implement real working code with exceptional attention to aesthetic details and creative choices. - -The user provides frontend requirements: a component, page, application, or interface to build. They may include context about the purpose, audience, or technical constraints. - -## Design Thinking - -Before coding, understand the context and commit to a BOLD aesthetic direction: -- **Purpose**: What problem does this interface solve? Who uses it? -- **Tone**: Pick an extreme: brutally minimal, maximalist chaos, retro-futuristic, organic/natural, luxury/refined, playful/toy-like, editorial/magazine, brutalist/raw, art deco/geometric, soft/pastel, industrial/utilitarian, etc. There are so many flavors to choose from. Use these for inspiration but design one that is true to the aesthetic direction. -- **Constraints**: Technical requirements (framework, performance, accessibility). -- **Differentiation**: What makes this UNFORGETTABLE? What's the one thing someone will remember? - -**CRITICAL**: Choose a clear conceptual direction and execute it with precision. Bold maximalism and refined minimalism both work - the key is intentionality, not intensity. - -Then implement working code (HTML/CSS/JS, React, Vue, etc.) that is: -- Production-grade and functional -- Visually striking and memorable -- Cohesive with a clear aesthetic point-of-view -- Meticulously refined in every detail - -## Frontend Aesthetics Guidelines - -Focus on: -- **Typography**: Choose fonts that are beautiful, unique, and interesting. Avoid generic fonts like Arial and Inter; opt instead for distinctive choices that elevate the frontend's aesthetics; unexpected, characterful font choices. Pair a distinctive display font with a refined body font. -- **Color & Theme**: Commit to a cohesive aesthetic. Use CSS variables for consistency. Dominant colors with sharp accents outperform timid, evenly-distributed palettes. -- **Motion**: Use animations for effects and micro-interactions. Prioritize CSS-only solutions for HTML. Use Motion library for React when available. Focus on high-impact moments: one well-orchestrated page load with staggered reveals (animation-delay) creates more delight than scattered micro-interactions. Use scroll-triggering and hover states that surprise. -- **Spatial Composition**: Unexpected layouts. Asymmetry. Overlap. Diagonal flow. Grid-breaking elements. Generous negative space OR controlled density. -- **Backgrounds & Visual Details**: Create atmosphere and depth rather than defaulting to solid colors. Add contextual effects and textures that match the overall aesthetic. Apply creative forms like gradient meshes, noise textures, geometric patterns, layered transparencies, dramatic shadows, decorative borders, custom cursors, and grain overlays. - -NEVER use generic AI-generated aesthetics like overused font families (Inter, Roboto, Arial, system fonts), cliched color schemes (particularly purple gradients on white backgrounds), predictable layouts and component patterns, and cookie-cutter design that lacks context-specific character. - -Interpret creatively and make unexpected choices that feel genuinely designed for the context. No design should be the same. Vary between light and dark themes, different fonts, different aesthetics. NEVER converge on common choices (Space Grotesk, for example) across generations. - -**IMPORTANT**: Match implementation complexity to the aesthetic vision. Maximalist designs need elaborate code with extensive animations and effects. Minimalist or refined designs need restraint, precision, and careful attention to spacing, typography, and subtle details. Elegance comes from executing the vision well. - -Remember: Claude is capable of extraordinary creative work. Don't hold back, show what can truly be created when thinking outside the box and committing fully to a distinctive vision. diff --git a/packages/catalog/catalog/skills/internal-comms/SKILL.md b/packages/catalog/catalog/skills/internal-comms/SKILL.md deleted file mode 100644 index 63003abf..00000000 --- a/packages/catalog/catalog/skills/internal-comms/SKILL.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -name: internal-comms -description: A set of resources to help me write all kinds of internal communications, using the formats that my company likes to use. Claude should use this skill whenever asked to write some sort of internal communications (status reports, leadership updates, 3P updates, company newsletters, FAQs, incident reports, project updates, etc.). -license: Complete terms in LICENSE.txt ---- - -## When to use this skill -To write internal communications, use this skill for: -- 3P updates (Progress, Plans, Problems) -- Company newsletters -- FAQ responses -- Status reports -- Leadership updates -- Project updates -- Incident reports - -## How to use this skill - -To write any internal communication: - -1. **Identify the communication type** from the request -2. **Load the appropriate guideline file** from the `packages/core/examples/` directory: - - `packages/core/examples/3p-updates.md` - For Progress/Plans/Problems team updates - - `packages/core/examples/company-newsletter.md` - For company-wide newsletters - - `packages/core/examples/faq-answers.md` - For answering frequently asked questions - - `packages/core/examples/general-comms.md` - For anything else that doesn't explicitly match one of the above -3. **Follow the specific instructions** in that file for formatting, tone, and content gathering - -If the communication type doesn't match any existing guideline, ask for clarification or more context about the desired format. - -## Keywords -3P updates, company newsletter, company comms, weekly update, faqs, common questions, updates, internal comms diff --git a/packages/catalog/catalog/skills/mcp-builder/SKILL.md b/packages/catalog/catalog/skills/mcp-builder/SKILL.md deleted file mode 100644 index 8a1a77a4..00000000 --- a/packages/catalog/catalog/skills/mcp-builder/SKILL.md +++ /dev/null @@ -1,236 +0,0 @@ ---- -name: mcp-builder -description: Guide for creating high-quality MCP (Model Context Protocol) servers that enable LLMs to interact with external services through well-designed tools. Use when building MCP servers to integrate external APIs or services, whether in Python (FastMCP) or Node/TypeScript (MCP SDK). -license: Complete terms in LICENSE.txt ---- - -# MCP Server Development Guide - -## Overview - -Create MCP (Model Context Protocol) servers that enable LLMs to interact with external services through well-designed tools. The quality of an MCP server is measured by how well it enables LLMs to accomplish real-world tasks. - ---- - -# Process - -## 🚀 High-Level Workflow - -Creating a high-quality MCP server involves four main phases: - -### Phase 1: Deep Research and Planning - -#### 1.1 Understand Modern MCP Design - -**API Coverage vs. Workflow Tools:** -Balance comprehensive API endpoint coverage with specialized workflow tools. Workflow tools can be more convenient for specific tasks, while comprehensive coverage gives agents flexibility to compose operations. Performance varies by client—some clients benefit from code execution that combines basic tools, while others work better with higher-level workflows. When uncertain, prioritize comprehensive API coverage. - -**Tool Naming and Discoverability:** -Clear, descriptive tool names help agents find the right tools quickly. Use consistent prefixes (e.g., `github_create_issue`, `github_list_repos`) and action-oriented naming. - -**Context Management:** -Agents benefit from concise tool descriptions and the ability to filter/paginate results. Design tools that return focused, relevant data. Some clients support code execution which can help agents filter and process data efficiently. - -**Actionable Error Messages:** -Error messages should guide agents toward solutions with specific suggestions and next steps. - -#### 1.2 Study MCP Protocol Documentation - -**Navigate the MCP specification:** - -Start with the sitemap to find relevant pages: `https://modelcontextprotocol.io/sitemap.xml` - -Then fetch specific pages with `.md` suffix for markdown format (e.g., `https://modelcontextprotocol.io/specification/draft.md`). - -Key pages to review: -- Specification overview and architecture -- Transport mechanisms (streamable HTTP, stdio) -- Tool, resource, and prompt definitions - -#### 1.3 Study Framework Documentation - -**Recommended stack:** -- **Language**: TypeScript (high-quality SDK support and good compatibility in many execution environments e.g. MCPB. Plus AI models are good at generating TypeScript code, benefiting from its broad usage, static typing and good linting tools) -- **Transport**: Streamable HTTP for remote servers, using stateless JSON (simpler to scale and maintain, as opposed to stateful sessions and streaming responses). stdio for local servers. - -**Load framework documentation:** - -- **MCP Best Practices**: [📋 View Best Practices](./reference/mcp_best_practices.md) - Core guidelines - -**For TypeScript (recommended):** -- **TypeScript SDK**: Use WebFetch to load `https://raw.githubusercontent.com/modelcontextprotocol/typescript-sdk/main/README.md` -- [⚡ TypeScript Guide](./reference/node_mcp_server.md) - TypeScript patterns and examples - -**For Python:** -- **Python SDK**: Use WebFetch to load `https://raw.githubusercontent.com/modelcontextprotocol/python-sdk/main/README.md` -- [🐍 Python Guide](./reference/python_mcp_server.md) - Python patterns and examples - -#### 1.4 Plan Your Implementation - -**Understand the API:** -Review the service's API documentation to identify key endpoints, authentication requirements, and data models. Use web search and WebFetch as needed. - -**Tool Selection:** -Prioritize comprehensive API coverage. List endpoints to implement, starting with the most common operations. - ---- - -### Phase 2: Implementation - -#### 2.1 Set Up Project Structure - -See language-specific guides for project setup: -- [⚡ TypeScript Guide](./reference/node_mcp_server.md) - Project structure, package.json, tsconfig.json -- [🐍 Python Guide](./reference/python_mcp_server.md) - Module organization, dependencies - -#### 2.2 Implement Core Infrastructure - -Create shared utilities: -- API client with authentication -- Error handling helpers -- Response formatting (JSON/Markdown) -- Pagination support - -#### 2.3 Implement Tools - -For each tool: - -**Input Schema:** -- Use Zod (TypeScript) or Pydantic (Python) -- Include constraints and clear descriptions -- Add examples in field descriptions - -**Output Schema:** -- Define `outputSchema` where possible for structured data -- Use `structuredContent` in tool responses (TypeScript SDK feature) -- Helps clients understand and process tool outputs - -**Tool Description:** -- Concise summary of functionality -- Parameter descriptions -- Return type schema - -**Implementation:** -- Async/await for I/O operations -- Proper error handling with actionable messages -- Support pagination where applicable -- Return both text content and structured data when using modern SDKs - -**Annotations:** -- `readOnlyHint`: true/false -- `destructiveHint`: true/false -- `idempotentHint`: true/false -- `openWorldHint`: true/false - ---- - -### Phase 3: Review and Test - -#### 3.1 Code Quality - -Review for: -- No duplicated code (DRY principle) -- Consistent error handling -- Full type coverage -- Clear tool descriptions - -#### 3.2 Build and Test - -**TypeScript:** -- Run `npm run build` to verify compilation -- Test with MCP Inspector: `npx @modelcontextprotocol/inspector` - -**Python:** -- Verify syntax: `python -m py_compile your_server.py` -- Test with MCP Inspector - -See language-specific guides for detailed testing approaches and quality checklists. - ---- - -### Phase 4: Create Evaluations - -After implementing your MCP server, create comprehensive evaluations to test its effectiveness. - -**Load [✅ Evaluation Guide](./reference/evaluation.md) for complete evaluation guidelines.** - -#### 4.1 Understand Evaluation Purpose - -Use evaluations to test whether LLMs can effectively use your MCP server to answer realistic, complex questions. - -#### 4.2 Create 10 Evaluation Questions - -To create effective evaluations, follow the process outlined in the evaluation guide: - -1. **Tool Inspection**: List available tools and understand their capabilities -2. **Content Exploration**: Use READ-ONLY operations to explore available data -3. **Question Generation**: Create 10 complex, realistic questions -4. **Answer Verification**: Solve each question yourself to verify answers - -#### 4.3 Evaluation Requirements - -Ensure each question is: -- **Independent**: Not dependent on other questions -- **Read-only**: Only non-destructive operations required -- **Complex**: Requiring multiple tool calls and deep exploration -- **Realistic**: Based on real use cases humans would care about -- **Verifiable**: Single, clear answer that can be verified by string comparison -- **Stable**: Answer won't change over time - -#### 4.4 Output Format - -Create an XML file with this structure: - -```xml - - - Find discussions about AI model launches with animal codenames. One model needed a specific safety designation that uses the format ASL-X. What number X was being determined for the model named after a spotted wild cat? - 3 - - - -``` - ---- - -# Reference Files - -## 📚 Documentation Library - -Load these resources as needed during development: - -### Core MCP Documentation (Load First) -- **MCP Protocol**: Start with sitemap at `https://modelcontextprotocol.io/sitemap.xml`, then fetch specific pages with `.md` suffix -- [📋 MCP Best Practices](./reference/mcp_best_practices.md) - Universal MCP guidelines including: - - Server and tool naming conventions - - Response format guidelines (JSON vs Markdown) - - Pagination best practices - - Transport selection (streamable HTTP vs stdio) - - Security and error handling standards - -### SDK Documentation (Load During Phase 1/2) -- **Python SDK**: Fetch from `https://raw.githubusercontent.com/modelcontextprotocol/python-sdk/main/README.md` -- **TypeScript SDK**: Fetch from `https://raw.githubusercontent.com/modelcontextprotocol/typescript-sdk/main/README.md` - -### Language-Specific Implementation Guides (Load During Phase 2) -- [🐍 Python Implementation Guide](./reference/python_mcp_server.md) - Complete Python/FastMCP guide with: - - Server initialization patterns - - Pydantic model examples - - Tool registration with `@mcp.tool` - - Complete working examples - - Quality checklist - -- [⚡ TypeScript Implementation Guide](./reference/node_mcp_server.md) - Complete TypeScript guide with: - - Project structure - - Zod schema patterns - - Tool registration with `server.registerTool` - - Complete working examples - - Quality checklist - -### Evaluation Guide (Load During Phase 4) -- [✅ Evaluation Guide](./reference/evaluation.md) - Complete evaluation creation guide with: - - Question creation guidelines - - Answer verification strategies - - XML format specifications - - Example questions and answers - - Running an evaluation with the provided scripts diff --git a/packages/catalog/catalog/skills/pdf/SKILL.md b/packages/catalog/catalog/skills/pdf/SKILL.md deleted file mode 100644 index d3e046a5..00000000 --- a/packages/catalog/catalog/skills/pdf/SKILL.md +++ /dev/null @@ -1,314 +0,0 @@ ---- -name: pdf -description: Use this skill whenever the user wants to do anything with PDF files. This includes reading or extracting text/tables from PDFs, combining or merging multiple PDFs into one, splitting PDFs apart, rotating pages, adding watermarks, creating new PDFs, filling PDF forms, encrypting/decrypting PDFs, extracting images, and OCR on scanned PDFs to make them searchable. If the user mentions a .pdf file or asks to produce one, use this skill. -license: Proprietary. LICENSE.txt has complete terms ---- - -# PDF Processing Guide - -## Overview - -This guide covers essential PDF processing operations using Python libraries and command-line tools. For advanced features, JavaScript libraries, and detailed examples, see REFERENCE.md. If you need to fill out a PDF form, read FORMS.md and follow its instructions. - -## Quick Start - -```python -from pypdf import PdfReader, PdfWriter - -# Read a PDF -reader = PdfReader("document.pdf") -print(f"Pages: {len(reader.pages)}") - -# Extract text -text = "" -for page in reader.pages: - text += page.extract_text() -``` - -## Python Libraries - -### pypdf - Basic Operations - -#### Merge PDFs -```python -from pypdf import PdfWriter, PdfReader - -writer = PdfWriter() -for pdf_file in ["doc1.pdf", "doc2.pdf", "doc3.pdf"]: - reader = PdfReader(pdf_file) - for page in reader.pages: - writer.add_page(page) - -with open("merged.pdf", "wb") as output: - writer.write(output) -``` - -#### Split PDF -```python -reader = PdfReader("input.pdf") -for i, page in enumerate(reader.pages): - writer = PdfWriter() - writer.add_page(page) - with open(f"page_{i+1}.pdf", "wb") as output: - writer.write(output) -``` - -#### Extract Metadata -```python -reader = PdfReader("document.pdf") -meta = reader.metadata -print(f"Title: {meta.title}") -print(f"Author: {meta.author}") -print(f"Subject: {meta.subject}") -print(f"Creator: {meta.creator}") -``` - -#### Rotate Pages -```python -reader = PdfReader("input.pdf") -writer = PdfWriter() - -page = reader.pages[0] -page.rotate(90) # Rotate 90 degrees clockwise -writer.add_page(page) - -with open("rotated.pdf", "wb") as output: - writer.write(output) -``` - -### pdfplumber - Text and Table Extraction - -#### Extract Text with Layout -```python -import pdfplumber - -with pdfplumber.open("document.pdf") as pdf: - for page in pdf.pages: - text = page.extract_text() - print(text) -``` - -#### Extract Tables -```python -with pdfplumber.open("document.pdf") as pdf: - for i, page in enumerate(pdf.pages): - tables = page.extract_tables() - for j, table in enumerate(tables): - print(f"Table {j+1} on page {i+1}:") - for row in table: - print(row) -``` - -#### Advanced Table Extraction -```python -import pandas as pd - -with pdfplumber.open("document.pdf") as pdf: - all_tables = [] - for page in pdf.pages: - tables = page.extract_tables() - for table in tables: - if table: # Check if table is not empty - df = pd.DataFrame(table[1:], columns=table[0]) - all_tables.append(df) - -# Combine all tables -if all_tables: - combined_df = pd.concat(all_tables, ignore_index=True) - combined_df.to_excel("extracted_tables.xlsx", index=False) -``` - -### reportlab - Create PDFs - -#### Basic PDF Creation -```python -from reportlab.lib.pagesizes import letter -from reportlab.pdfgen import canvas - -c = canvas.Canvas("hello.pdf", pagesize=letter) -width, height = letter - -# Add text -c.drawString(100, height - 100, "Hello World!") -c.drawString(100, height - 120, "This is a PDF created with reportlab") - -# Add a line -c.line(100, height - 140, 400, height - 140) - -# Save -c.save() -``` - -#### Create PDF with Multiple Pages -```python -from reportlab.lib.pagesizes import letter -from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak -from reportlab.lib.styles import getSampleStyleSheet - -doc = SimpleDocTemplate("report.pdf", pagesize=letter) -styles = getSampleStyleSheet() -story = [] - -# Add content -title = Paragraph("Report Title", styles['Title']) -story.append(title) -story.append(Spacer(1, 12)) - -body = Paragraph("This is the body of the report. " * 20, styles['Normal']) -story.append(body) -story.append(PageBreak()) - -# Page 2 -story.append(Paragraph("Page 2", styles['Heading1'])) -story.append(Paragraph("Content for page 2", styles['Normal'])) - -# Build PDF -doc.build(story) -``` - -#### Subscripts and Superscripts - -**IMPORTANT**: Never use Unicode subscript/superscript characters (₀₁₂₃₄₅₆₇₈₉, ⁰¹²³⁴⁵⁶⁷⁸⁹) in ReportLab PDFs. The built-in fonts do not include these glyphs, causing them to render as solid black boxes. - -Instead, use ReportLab's XML markup tags in Paragraph objects: -```python -from reportlab.platypus import Paragraph -from reportlab.lib.styles import getSampleStyleSheet - -styles = getSampleStyleSheet() - -# Subscripts: use tag -chemical = Paragraph("H2O", styles['Normal']) - -# Superscripts: use tag -squared = Paragraph("x2 + y2", styles['Normal']) -``` - -For canvas-drawn text (not Paragraph objects), manually adjust font the size and position rather than using Unicode subscripts/superscripts. - -## Command-Line Tools - -### pdftotext (poppler-utils) -```bash -# Extract text -pdftotext input.pdf output.txt - -# Extract text preserving layout -pdftotext -layout input.pdf output.txt - -# Extract specific pages -pdftotext -f 1 -l 5 input.pdf output.txt # Pages 1-5 -``` - -### qpdf -```bash -# Merge PDFs -qpdf --empty --pages file1.pdf file2.pdf -- merged.pdf - -# Split pages -qpdf input.pdf --pages . 1-5 -- pages1-5.pdf -qpdf input.pdf --pages . 6-10 -- pages6-10.pdf - -# Rotate pages -qpdf input.pdf output.pdf --rotate=+90:1 # Rotate page 1 by 90 degrees - -# Remove password -qpdf --password=mypassword --decrypt encrypted.pdf decrypted.pdf -``` - -### pdftk (if available) -```bash -# Merge -pdftk file1.pdf file2.pdf cat output merged.pdf - -# Split -pdftk input.pdf burst - -# Rotate -pdftk input.pdf rotate 1east output rotated.pdf -``` - -## Common Tasks - -### Extract Text from Scanned PDFs -```python -# Requires: pip install pytesseract pdf2image -import pytesseract -from pdf2image import convert_from_path - -# Convert PDF to images -images = convert_from_path('scanned.pdf') - -# OCR each page -text = "" -for i, image in enumerate(images): - text += f"Page {i+1}:\n" - text += pytesseract.image_to_string(image) - text += "\n\n" - -print(text) -``` - -### Add Watermark -```python -from pypdf import PdfReader, PdfWriter - -# Create watermark (or load existing) -watermark = PdfReader("watermark.pdf").pages[0] - -# Apply to all pages -reader = PdfReader("document.pdf") -writer = PdfWriter() - -for page in reader.pages: - page.merge_page(watermark) - writer.add_page(page) - -with open("watermarked.pdf", "wb") as output: - writer.write(output) -``` - -### Extract Images -```bash -# Using pdfimages (poppler-utils) -pdfimages -j input.pdf output_prefix - -# This extracts all images as output_prefix-000.jpg, output_prefix-001.jpg, etc. -``` - -### Password Protection -```python -from pypdf import PdfReader, PdfWriter - -reader = PdfReader("input.pdf") -writer = PdfWriter() - -for page in reader.pages: - writer.add_page(page) - -# Add password -writer.encrypt("userpassword", "ownerpassword") - -with open("encrypted.pdf", "wb") as output: - writer.write(output) -``` - -## Quick Reference - -| Task | Best Tool | Command/Code | -|------|-----------|--------------| -| Merge PDFs | pypdf | `writer.add_page(page)` | -| Split PDFs | pypdf | One page per file | -| Extract text | pdfplumber | `page.extract_text()` | -| Extract tables | pdfplumber | `page.extract_tables()` | -| Create PDFs | reportlab | Canvas or Platypus | -| Command line merge | qpdf | `qpdf --empty --pages ...` | -| OCR scanned PDFs | pytesseract | Convert to image first | -| Fill PDF forms | pdf-lib or pypdf (see FORMS.md) | See FORMS.md | - -## Next Steps - -- For advanced pypdfium2 usage, see REFERENCE.md -- For JavaScript libraries (pdf-lib), see REFERENCE.md -- If you need to fill out a PDF form, follow the instructions in FORMS.md -- For troubleshooting guides, see REFERENCE.md diff --git a/packages/catalog/catalog/skills/skill-creator/SKILL.md b/packages/catalog/catalog/skills/skill-creator/SKILL.md deleted file mode 100644 index 65b3a402..00000000 --- a/packages/catalog/catalog/skills/skill-creator/SKILL.md +++ /dev/null @@ -1,485 +0,0 @@ ---- -name: skill-creator -description: Create new skills, modify and improve existing skills, and measure skill performance. Use when users want to create a skill from scratch, edit, or optimize an existing skill, run evals to test a skill, benchmark skill performance with variance analysis, or optimize a skill's description for better triggering accuracy. ---- - -# Skill Creator - -A skill for creating new skills and iteratively improving them. - -At a high level, the process of creating a skill goes like this: - -- Decide what you want the skill to do and roughly how it should do it -- Write a draft of the skill -- Create a few test prompts and run claude-with-access-to-the-skill on them -- Help the user evaluate the results both qualitatively and quantitatively - - While the runs happen in the background, draft some quantitative evals if there aren't any (if there are some, you can either use as is or modify if you feel something needs to change about them). Then explain them to the user (or if they already existed, explain the ones that already exist) - - Use the `eval-viewer/generate_review.py` script to show the user the results for them to look at, and also let them look at the quantitative metrics -- Rewrite the skill based on feedback from the user's evaluation of the results (and also if there are any glaring flaws that become apparent from the quantitative benchmarks) -- Repeat until you're satisfied -- Expand the test set and try again at larger scale - -Your job when using this skill is to figure out where the user is in this process and then jump in and help them progress through these stages. So for instance, maybe they're like "I want to make a skill for X". You can help narrow down what they mean, write a draft, write the test cases, figure out how they want to evaluate, run all the prompts, and repeat. - -On the other hand, maybe they already have a draft of the skill. In this case you can go straight to the eval/iterate part of the loop. - -Of course, you should always be flexible and if the user is like "I don't need to run a bunch of evaluations, just vibe with me", you can do that instead. - -Then after the skill is done (but again, the order is flexible), you can also run the skill description improver, which we have a whole separate script for, to optimize the triggering of the skill. - -Cool? Cool. - -## Communicating with the user - -The skill creator is liable to be used by people across a wide range of familiarity with coding jargon. If you haven't heard (and how could you, it's only very recently that it started), there's a trend now where the power of Claude is inspiring plumbers to open up their terminals, parents and grandparents to google "how to install npm". On the other hand, the bulk of users are probably fairly computer-literate. - -So please pay attention to context cues to understand how to phrase your communication! In the default case, just to give you some idea: - -- "evaluation" and "benchmark" are borderline, but OK -- for "JSON" and "assertion" you want to see serious cues from the user that they know what those things are before using them without explaining them - -It's OK to briefly explain terms if you're in doubt, and feel free to clarify terms with a short definition if you're unsure if the user will get it. - ---- - -## Creating a skill - -### Capture Intent - -Start by understanding the user's intent. The current conversation might already contain a workflow the user wants to capture (e.g., they say "turn this into a skill"). If so, extract answers from the conversation history first — the tools used, the sequence of steps, corrections the user made, input/output formats observed. The user may need to fill the gaps, and should confirm before proceeding to the next step. - -1. What should this skill enable Claude to do? -2. When should this skill trigger? (what user phrases/contexts) -3. What's the expected output format? -4. Should we set up test cases to verify the skill works? Skills with objectively verifiable outputs (file transforms, data extraction, code generation, fixed workflow steps) benefit from test cases. Skills with subjective outputs (writing style, art) often don't need them. Suggest the appropriate default based on the skill type, but let the user decide. - -### Interview and Research - -Proactively ask questions about edge cases, input/output formats, example files, success criteria, and dependencies. Wait to write test prompts until you've got this part ironed out. - -Check available MCPs - if useful for research (searching docs, finding similar skills, looking up best practices), research in parallel via subagents if available, otherwise inline. Come prepared with context to reduce burden on the user. - -### Write the SKILL.md - -Based on the user interview, fill in these components: - -- **name**: Skill identifier -- **description**: When to trigger, what it does. This is the primary triggering mechanism - include both what the skill does AND specific contexts for when to use it. All "when to use" info goes here, not in the body. Note: currently Claude has a tendency to "undertrigger" skills -- to not use them when they'd be useful. To combat this, please make the skill descriptions a little bit "pushy". So for instance, instead of "How to build a simple fast dashboard to display internal Anthropic data.", you might write "How to build a simple fast dashboard to display internal Anthropic data. Make sure to use this skill whenever the user mentions dashboards, data visualization, internal metrics, or wants to display any kind of company data, even if they don't explicitly ask for a 'dashboard.'" -- **compatibility**: Required tools, dependencies (optional, rarely needed) -- **the rest of the skill :)** - -### Skill Writing Guide - -#### Anatomy of a Skill - -``` -skill-name/ -├── SKILL.md (required) -│ ├── YAML frontmatter (name, description required) -│ └── Markdown instructions -└── Bundled Resources (optional) - ├── scripts/ - Executable code for deterministic/repetitive tasks - ├── references/ - Docs loaded into context as needed - └── assets/ - Files used in output (templates, icons, fonts) -``` - -#### Progressive Disclosure - -Skills use a three-level loading system: -1. **Metadata** (name + description) - Always in context (~100 words) -2. **SKILL.md body** - In context whenever skill triggers (<500 lines ideal) -3. **Bundled resources** - As needed (unlimited, scripts can execute without loading) - -These word counts are approximate and you can feel free to go longer if needed. - -**Key patterns:** -- Keep SKILL.md under 500 lines; if you're approaching this limit, add an additional layer of hierarchy along with clear pointers about where the model using the skill should go next to follow up. -- Reference files clearly from SKILL.md with guidance on when to read them -- For large reference files (>300 lines), include a table of contents - -**Domain organization**: When a skill supports multiple domains/frameworks, organize by variant: -``` -cloud-deploy/ -├── SKILL.md (workflow + selection) -└── references/ - ├── aws.md - ├── gcp.md - └── azure.md -``` -Claude reads only the relevant reference file. - -#### Principle of Lack of Surprise - -This goes without saying, but skills must not contain malware, exploit code, or any content that could compromise system security. A skill's contents should not surprise the user in their intent if described. Don't go along with requests to create misleading skills or skills designed to facilitate unauthorized access, data exfiltration, or other malicious activities. Things like a "roleplay as an XYZ" are OK though. - -#### Writing Patterns - -Prefer using the imperative form in instructions. - -**Defining output formats** - You can do it like this: -```markdown -## Report structure -ALWAYS use this exact template: -# [Title] -## Executive summary -## Key findings -## Recommendations -``` - -**Examples pattern** - It's useful to include examples. You can format them like this (but if "Input" and "Output" are in the examples you might want to deviate a little): -```markdown -## Commit message format -**Example 1:** -Input: Added user authentication with JWT tokens -Output: feat(auth): implement JWT-based authentication -``` - -### Writing Style - -Try to explain to the model why things are important in lieu of heavy-handed musty MUSTs. Use theory of mind and try to make the skill general and not super-narrow to specific examples. Start by writing a draft and then look at it with fresh eyes and improve it. - -### Test Cases - -After writing the skill draft, come up with 2-3 realistic test prompts — the kind of thing a real user would actually say. Share them with the user: [you don't have to use this exact language] "Here are a few test cases I'd like to try. Do these look right, or do you want to add more?" Then run them. - -Save test cases to `evals/evals.json`. Don't write assertions yet — just the prompts. You'll draft assertions in the next step while the runs are in progress. - -```json -{ - "skill_name": "example-skill", - "evals": [ - { - "id": 1, - "prompt": "User's task prompt", - "expected_output": "Description of expected result", - "files": [] - } - ] -} -``` - -See `references/schemas.md` for the full schema (including the `assertions` field, which you'll add later). - -## Running and evaluating test cases - -This section is one continuous sequence — don't stop partway through. Do NOT use `/skill-test` or any other testing skill. - -Put results in `-workspace/` as a sibling to the skill directory. Within the workspace, organize results by iteration (`iteration-1/`, `iteration-2/`, etc.) and within that, each test case gets a directory (`eval-0/`, `eval-1/`, etc.). Don't create all of this upfront — just create directories as you go. - -### Step 1: Spawn all runs (with-skill AND baseline) in the same turn - -For each test case, spawn two subagents in the same turn — one with the skill, one without. This is important: don't spawn the with-skill runs first and then come back for baselines later. Launch everything at once so it all finishes around the same time. - -**With-skill run:** - -``` -Execute this task: -- Skill path: -- Task: -- Input files: -- Save outputs to: /iteration-/eval-/with_skill/outputs/ -- Outputs to save: -``` - -**Baseline run** (same prompt, but the baseline depends on context): -- **Creating a new skill**: no skill at all. Same prompt, no skill path, save to `without_skill/outputs/`. -- **Improving an existing skill**: the old version. Before editing, snapshot the skill (`cp -r /skill-snapshot/`), then point the baseline subagent at the snapshot. Save to `old_skill/outputs/`. - -Write an `eval_metadata.json` for each test case (assertions can be empty for now). Give each eval a descriptive name based on what it's testing — not just "eval-0". Use this name for the directory too. If this iteration uses new or modified eval prompts, create these files for each new eval directory — don't assume they carry over from previous iterations. - -```json -{ - "eval_id": 0, - "eval_name": "descriptive-name-here", - "prompt": "The user's task prompt", - "assertions": [] -} -``` - -### Step 2: While runs are in progress, draft assertions - -Don't just wait for the runs to finish — you can use this time productively. Draft quantitative assertions for each test case and explain them to the user. If assertions already exist in `evals/evals.json`, review them and explain what they check. - -Good assertions are objectively verifiable and have descriptive names — they should read clearly in the benchmark viewer so someone glancing at the results immediately understands what each one checks. Subjective skills (writing style, design quality) are better evaluated qualitatively — don't force assertions onto things that need human judgment. - -Update the `eval_metadata.json` files and `evals/evals.json` with the assertions once drafted. Also explain to the user what they'll see in the viewer — both the qualitative outputs and the quantitative benchmark. - -### Step 3: As runs complete, capture timing data - -When each subagent task completes, you receive a notification containing `total_tokens` and `duration_ms`. Save this data immediately to `timing.json` in the run directory: - -```json -{ - "total_tokens": 84852, - "duration_ms": 23332, - "total_duration_seconds": 23.3 -} -``` - -This is the only opportunity to capture this data — it comes through the task notification and isn't persisted elsewhere. Process each notification as it arrives rather than trying to batch them. - -### Step 4: Grade, aggregate, and launch the viewer - -Once all runs are done: - -1. **Grade each run** — spawn a grader subagent (or grade inline) that reads `agents/grader.md` and evaluates each assertion against the outputs. Save results to `grading.json` in each run directory. The grading.json expectations array must use the fields `text`, `passed`, and `evidence` (not `name`/`met`/`details` or other variants) — the viewer depends on these exact field names. For assertions that can be checked programmatically, write and run a script rather than eyeballing it — scripts are faster, more reliable, and can be reused across iterations. - -2. **Aggregate into benchmark** — run the aggregation script from the skill-creator directory: - ```bash - python -m scripts.aggregate_benchmark /iteration-N --skill-name - ``` - This produces `benchmark.json` and `benchmark.md` with pass_rate, time, and tokens for each configuration, with mean ± stddev and the delta. If generating benchmark.json manually, see `references/schemas.md` for the exact schema the viewer expects. -Put each with_skill version before its baseline counterpart. - -3. **Do an analyst pass** — read the benchmark data and surface patterns the aggregate stats might hide. See `agents/analyzer.md` (the "Analyzing Benchmark Results" section) for what to look for — things like assertions that always pass regardless of skill (non-discriminating), high-variance evals (possibly flaky), and time/token tradeoffs. - -4. **Launch the viewer** with both qualitative outputs and quantitative data: - ```bash - nohup python /eval-viewer/generate_review.py \ - /iteration-N \ - --skill-name "my-skill" \ - --benchmark /iteration-N/benchmark.json \ - > /dev/null 2>&1 & - VIEWER_PID=$! - ``` - For iteration 2+, also pass `--previous-workspace /iteration-`. - - **Cowork / headless environments:** If `webbrowser.open()` is not available or the environment has no display, use `--static ` to write a standalone HTML file instead of starting a server. Feedback will be downloaded as a `feedback.json` file when the user clicks "Submit All Reviews". After download, copy `feedback.json` into the workspace directory for the next iteration to pick up. - -Note: please use generate_review.py to create the viewer; there's no need to write custom HTML. - -5. **Tell the user** something like: "I've opened the results in your browser. There are two tabs — 'Outputs' lets you click through each test case and leave feedback, 'Benchmark' shows the quantitative comparison. When you're done, come back here and let me know." - -### What the user sees in the viewer - -The "Outputs" tab shows one test case at a time: -- **Prompt**: the task that was given -- **Output**: the files the skill produced, rendered inline where possible -- **Previous Output** (iteration 2+): collapsed section showing last iteration's output -- **Formal Grades** (if grading was run): collapsed section showing assertion pass/fail -- **Feedback**: a textbox that auto-saves as they type -- **Previous Feedback** (iteration 2+): their comments from last time, shown below the textbox - -The "Benchmark" tab shows the stats summary: pass rates, timing, and token usage for each configuration, with per-eval breakdowns and analyst observations. - -Navigation is via prev/next buttons or arrow keys. When done, they click "Submit All Reviews" which saves all feedback to `feedback.json`. - -### Step 5: Read the feedback - -When the user tells you they're done, read `feedback.json`: - -```json -{ - "reviews": [ - {"run_id": "eval-0-with_skill", "feedback": "the chart is missing axis labels", "timestamp": "..."}, - {"run_id": "eval-1-with_skill", "feedback": "", "timestamp": "..."}, - {"run_id": "eval-2-with_skill", "feedback": "perfect, love this", "timestamp": "..."} - ], - "status": "complete" -} -``` - -Empty feedback means the user thought it was fine. Focus your improvements on the test cases where the user had specific complaints. - -Kill the viewer server when you're done with it: - -```bash -kill $VIEWER_PID 2>/dev/null -``` - ---- - -## Improving the skill - -This is the heart of the loop. You've run the test cases, the user has reviewed the results, and now you need to make the skill better based on their feedback. - -### How to think about improvements - -1. **Generalize from the feedback.** The big picture thing that's happening here is that we're trying to create skills that can be used a million times (maybe literally, maybe even more who knows) across many different prompts. Here you and the user are iterating on only a few examples over and over again because it helps move faster. The user knows these examples in and out and it's quick for them to assess new outputs. But if the skill you and the user are codeveloping works only for those examples, it's useless. Rather than put in fiddly overfitty changes, or oppressively constrictive MUSTs, if there's some stubborn issue, you might try branching out and using different metaphors, or recommending different patterns of working. It's relatively cheap to try and maybe you'll land on something great. - -2. **Keep the prompt lean.** Remove things that aren't pulling their weight. Make sure to read the transcripts, not just the final outputs — if it looks like the skill is making the model waste a bunch of time doing things that are unproductive, you can try getting rid of the parts of the skill that are making it do that and seeing what happens. - -3. **Explain the why.** Try hard to explain the **why** behind everything you're asking the model to do. Today's LLMs are *smart*. They have good theory of mind and when given a good harness can go beyond rote instructions and really make things happen. Even if the feedback from the user is terse or frustrated, try to actually understand the task and why the user is writing what they wrote, and what they actually wrote, and then transmit this understanding into the instructions. If you find yourself writing ALWAYS or NEVER in all caps, or using super rigid structures, that's a yellow flag — if possible, reframe and explain the reasoning so that the model understands why the thing you're asking for is important. That's a more humane, powerful, and effective approach. - -4. **Look for repeated work across test cases.** Read the transcripts from the test runs and notice if the subagents all independently wrote similar helper scripts or took the same multi-step approach to something. If all 3 test cases resulted in the subagent writing a `create_docx.py` or a `build_chart.py`, that's a strong signal the skill should bundle that script. Write it once, put it in `scripts/`, and tell the skill to use it. This saves every future invocation from reinventing the wheel. - -This task is pretty important (we are trying to create billions a year in economic value here!) and your thinking time is not the blocker; take your time and really mull things over. I'd suggest writing a draft revision and then looking at it anew and making improvements. Really do your best to get into the head of the user and understand what they want and need. - -### The iteration loop - -After improving the skill: - -1. Apply your improvements to the skill -2. Rerun all test cases into a new `iteration-/` directory, including baseline runs. If you're creating a new skill, the baseline is always `without_skill` (no skill) — that stays the same across iterations. If you're improving an existing skill, use your judgment on what makes sense as the baseline: the original version the user came in with, or the previous iteration. -3. Launch the reviewer with `--previous-workspace` pointing at the previous iteration -4. Wait for the user to review and tell you they're done -5. Read the new feedback, improve again, repeat - -Keep going until: -- The user says they're happy -- The feedback is all empty (everything looks good) -- You're not making meaningful progress - ---- - -## Advanced: Blind comparison - -For situations where you want a more rigorous comparison between two versions of a skill (e.g., the user asks "is the new version actually better?"), there's a blind comparison system. Read `agents/comparator.md` and `agents/analyzer.md` for the details. The basic idea is: give two outputs to an independent agent without telling it which is which, and let it judge quality. Then analyze why the winner won. - -This is optional, requires subagents, and most users won't need it. The human review loop is usually sufficient. - ---- - -## Description Optimization - -The description field in SKILL.md frontmatter is the primary mechanism that determines whether Claude invokes a skill. After creating or improving a skill, offer to optimize the description for better triggering accuracy. - -### Step 1: Generate trigger eval queries - -Create 20 eval queries — a mix of should-trigger and should-not-trigger. Save as JSON: - -```json -[ - {"query": "the user prompt", "should_trigger": true}, - {"query": "another prompt", "should_trigger": false} -] -``` - -The queries must be realistic and something a Claude Code or Claude.ai user would actually type. Not abstract requests, but requests that are concrete and specific and have a good amount of detail. For instance, file paths, personal context about the user's job or situation, column names and values, company names, URLs. A little bit of backstory. Some might be in lowercase or contain abbreviations or typos or casual speech. Use a mix of different lengths, and focus on edge cases rather than making them clear-cut (the user will get a chance to sign off on them). - -Bad: `"Format this data"`, `"Extract text from PDF"`, `"Create a chart"` - -Good: `"ok so my boss just sent me this xlsx file (its in my downloads, called something like 'Q4 sales final FINAL v2.xlsx') and she wants me to add a column that shows the profit margin as a percentage. The revenue is in column C and costs are in column D i think"` - -For the **should-trigger** queries (8-10), think about coverage. You want different phrasings of the same intent — some formal, some casual. Include cases where the user doesn't explicitly name the skill or file type but clearly needs it. Throw in some uncommon use cases and cases where this skill competes with another but should win. - -For the **should-not-trigger** queries (8-10), the most valuable ones are the near-misses — queries that share keywords or concepts with the skill but actually need something different. Think adjacent domains, ambiguous phrasing where a naive keyword match would trigger but shouldn't, and cases where the query touches on something the skill does but in a context where another tool is more appropriate. - -The key thing to avoid: don't make should-not-trigger queries obviously irrelevant. "Write a fibonacci function" as a negative test for a PDF skill is too easy — it doesn't test anything. The negative cases should be genuinely tricky. - -### Step 2: Review with user - -Present the eval set to the user for review using the HTML template: - -1. Read the template from `assets/eval_review.html` -2. Replace the placeholders: - - `__EVAL_DATA_PLACEHOLDER__` → the JSON array of eval items (no quotes around it — it's a JS variable assignment) - - `__SKILL_NAME_PLACEHOLDER__` → the skill's name - - `__SKILL_DESCRIPTION_PLACEHOLDER__` → the skill's current description -3. Write to a temp file (e.g., `/tmp/eval_review_.html`) and open it: `open /tmp/eval_review_.html` -4. The user can edit queries, toggle should-trigger, add/remove entries, then click "Export Eval Set" -5. The file downloads to `~/Downloads/eval_set.json` — check the Downloads folder for the most recent version in case there are multiple (e.g., `eval_set (1).json`) - -This step matters — bad eval queries lead to bad descriptions. - -### Step 3: Run the optimization loop - -Tell the user: "This will take some time — I'll run the optimization loop in the background and check on it periodically." - -Save the eval set to the workspace, then run in the background: - -```bash -python -m scripts.run_loop \ - --eval-set \ - --skill-path \ - --model \ - --max-iterations 5 \ - --verbose -``` - -Use the model ID from your system prompt (the one powering the current session) so the triggering test matches what the user actually experiences. - -While it runs, periodically tail the output to give the user updates on which iteration it's on and what the scores look like. - -This handles the full optimization loop automatically. It splits the eval set into 60% train and 40% held-out test, evaluates the current description (running each query 3 times to get a reliable trigger rate), then calls Claude to propose improvements based on what failed. It re-evaluates each new description on both train and test, iterating up to 5 times. When it's done, it opens an HTML report in the browser showing the results per iteration and returns JSON with `best_description` — selected by test score rather than train score to avoid overfitting. - -### How skill triggering works - -Understanding the triggering mechanism helps design better eval queries. Skills appear in Claude's `available_skills` list with their name + description, and Claude decides whether to consult a skill based on that description. The important thing to know is that Claude only consults skills for tasks it can't easily handle on its own — simple, one-step queries like "read this PDF" may not trigger a skill even if the description matches perfectly, because Claude can handle them directly with basic tools. Complex, multi-step, or specialized queries reliably trigger skills when the description matches. - -This means your eval queries should be substantive enough that Claude would actually benefit from consulting a skill. Simple queries like "read file X" are poor test cases — they won't trigger skills regardless of description quality. - -### Step 4: Apply the result - -Take `best_description` from the JSON output and update the skill's SKILL.md frontmatter. Show the user before/after and report the scores. - ---- - -### Package and Present (only if `present_files` tool is available) - -Check whether you have access to the `present_files` tool. If you don't, skip this step. If you do, package the skill and present the .skill file to the user: - -```bash -python -m scripts.package_skill -``` - -After packaging, direct the user to the resulting `.skill` file path so they can install it. - ---- - -## Claude.ai-specific instructions - -In Claude.ai, the core workflow is the same (draft → test → review → improve → repeat), but because Claude.ai doesn't have subagents, some mechanics change. Here's what to adapt: - -**Running test cases**: No subagents means no parallel execution. For each test case, read the skill's SKILL.md, then follow its instructions to accomplish the test prompt yourself. Do them one at a time. This is less rigorous than independent subagents (you wrote the skill and you're also running it, so you have full context), but it's a useful sanity check — and the human review step compensates. Skip the baseline runs — just use the skill to complete the task as requested. - -**Reviewing results**: If you can't open a browser (e.g., Claude.ai's VM has no display, or you're on a remote server), skip the browser reviewer entirely. Instead, present results directly in the conversation. For each test case, show the prompt and the output. If the output is a file the user needs to see (like a .docx or .xlsx), save it to the filesystem and tell them where it is so they can download and inspect it. Ask for feedback inline: "How does this look? Anything you'd change?" - -**Benchmarking**: Skip the quantitative benchmarking — it relies on baseline comparisons which aren't meaningful without subagents. Focus on qualitative feedback from the user. - -**The iteration loop**: Same as before — improve the skill, rerun the test cases, ask for feedback — just without the browser reviewer in the middle. You can still organize results into iteration directories on the filesystem if you have one. - -**Description optimization**: This section requires the `claude` CLI tool (specifically `claude -p`) which is only available in Claude Code. Skip it if you're on Claude.ai. - -**Blind comparison**: Requires subagents. Skip it. - -**Packaging**: The `package_skill.py` script works anywhere with Python and a filesystem. On Claude.ai, you can run it and the user can download the resulting `.skill` file. - -**Updating an existing skill**: The user might be asking you to update an existing skill, not create a new one. In this case: -- **Preserve the original name.** Note the skill's directory name and `name` frontmatter field -- use them unchanged. E.g., if the installed skill is `research-helper`, output `research-helper.skill` (not `research-helper-v2`). -- **Copy to a writeable location before editing.** The installed skill path may be read-only. Copy to `/tmp/skill-name/`, edit there, and package from the copy. -- **If packaging manually, stage in `/tmp/` first**, then copy to the output directory -- direct writes may fail due to permissions. - ---- - -## Cowork-Specific Instructions - -If you're in Cowork, the main things to know are: - -- You have subagents, so the main workflow (spawn test cases in parallel, run baselines, grade, etc.) all works. (However, if you run into severe problems with timeouts, it's OK to run the test prompts in series rather than parallel.) -- You don't have a browser or display, so when generating the eval viewer, use `--static ` to write a standalone HTML file instead of starting a server. Then proffer a link that the user can click to open the HTML in their browser. -- For whatever reason, the Cowork setup seems to disincline Claude from generating the eval viewer after running the tests, so just to reiterate: whether you're in Cowork or in Claude Code, after running tests, you should always generate the eval viewer for the human to look at examples before revising the skill yourself and trying to make corrections, using `generate_review.py` (not writing your own boutique html code). Sorry in advance but I'm gonna go all caps here: GENERATE THE EVAL VIEWER *BEFORE* evaluating inputs yourself. You want to get them in front of the human ASAP! -- Feedback works differently: since there's no running server, the viewer's "Submit All Reviews" button will download `feedback.json` as a file. You can then read it from there (you may have to request access first). -- Packaging works — `package_skill.py` just needs Python and a filesystem. -- Description optimization (`run_loop.py` / `run_eval.py`) should work in Cowork just fine since it uses `claude -p` via subprocess, not a browser, but please save it until you've fully finished making the skill and the user agrees it's in good shape. -- **Updating an existing skill**: The user might be asking you to update an existing skill, not create a new one. Follow the update guidance in the claude.ai section above. - ---- - -## Reference files - -The agents/ directory contains instructions for specialized subagents. Read them when you need to spawn the relevant subagent. - -- `agents/grader.md` — How to evaluate assertions against outputs -- `agents/comparator.md` — How to do blind A/B comparison between two outputs -- `agents/analyzer.md` — How to analyze why one version beat another - -The references/ directory has additional documentation: -- `references/schemas.md` — JSON structures for evals.json, grading.json, etc. - ---- - -Repeating one more time the core loop here for emphasis: - -- Figure out what the skill is about -- Draft or edit the skill -- Run claude-with-access-to-the-skill on test prompts -- With the user, evaluate the outputs: - - Create benchmark.json and run `eval-viewer/generate_review.py` to help the user review them - - Run quantitative evals -- Repeat until you and the user are satisfied -- Package the final skill and return it to the user. - -Please add steps to your TodoList, if you have such a thing, to make sure you don't forget. If you're in Cowork, please specifically put "Create evals JSON and run `eval-viewer/generate_review.py` so human can review test cases" in your TodoList to make sure it happens. - -Good luck! diff --git a/packages/catalog/catalog/skills/slack-gif-creator/SKILL.md b/packages/catalog/catalog/skills/slack-gif-creator/SKILL.md deleted file mode 100644 index 16660d8c..00000000 --- a/packages/catalog/catalog/skills/slack-gif-creator/SKILL.md +++ /dev/null @@ -1,254 +0,0 @@ ---- -name: slack-gif-creator -description: Knowledge and utilities for creating animated GIFs optimized for Slack. Provides constraints, validation tools, and animation concepts. Use when users request animated GIFs for Slack like "make me a GIF of X doing Y for Slack." -license: Complete terms in LICENSE.txt ---- - -# Slack GIF Creator - -A toolkit providing utilities and knowledge for creating animated GIFs optimized for Slack. - -## Slack Requirements - -**Dimensions:** -- Emoji GIFs: 128x128 (recommended) -- Message GIFs: 480x480 - -**Parameters:** -- FPS: 10-30 (lower is smaller file size) -- Colors: 48-128 (fewer = smaller file size) -- Duration: Keep under 3 seconds for emoji GIFs - -## Core Workflow - -```python -from core.gif_builder import GIFBuilder -from PIL import Image, ImageDraw - -# 1. Create builder -builder = GIFBuilder(width=128, height=128, fps=10) - -# 2. Generate frames -for i in range(12): - frame = Image.new('RGB', (128, 128), (240, 248, 255)) - draw = ImageDraw.Draw(frame) - - # Draw your animation using PIL primitives - # (circles, polygons, lines, etc.) - - builder.add_frame(frame) - -# 3. Save with optimization -builder.save('output.gif', num_colors=48, optimize_for_emoji=True) -``` - -## Drawing Graphics - -### Working with User-Uploaded Images -If a user uploads an image, consider whether they want to: -- **Use it directly** (e.g., "animate this", "split this into frames") -- **Use it as inspiration** (e.g., "make something like this") - -Load and work with images using PIL: -```python -from PIL import Image - -uploaded = Image.open('file.png') -# Use directly, or just as reference for colors/style -``` - -### Drawing from Scratch -When drawing graphics from scratch, use PIL ImageDraw primitives: - -```python -from PIL import ImageDraw - -draw = ImageDraw.Draw(frame) - -# Circles/ovals -draw.ellipse([x1, y1, x2, y2], fill=(r, g, b), outline=(r, g, b), width=3) - -# Stars, triangles, any polygon -points = [(x1, y1), (x2, y2), (x3, y3), ...] -draw.polygon(points, fill=(r, g, b), outline=(r, g, b), width=3) - -# Lines -draw.line([(x1, y1), (x2, y2)], fill=(r, g, b), width=5) - -# Rectangles -draw.rectangle([x1, y1, x2, y2], fill=(r, g, b), outline=(r, g, b), width=3) -``` - -**Don't use:** Emoji fonts (unreliable across platforms) or assume pre-packaged graphics exist in this skill. - -### Making Graphics Look Good - -Graphics should look polished and creative, not basic. Here's how: - -**Use thicker lines** - Always set `width=2` or higher for outlines and lines. Thin lines (width=1) look choppy and amateurish. - -**Add visual depth**: -- Use gradients for backgrounds (`create_gradient_background`) -- Layer multiple shapes for complexity (e.g., a star with a smaller star inside) - -**Make shapes more interesting**: -- Don't just draw a plain circle - add highlights, rings, or patterns -- Stars can have glows (draw larger, semi-transparent versions behind) -- Combine multiple shapes (stars + sparkles, circles + rings) - -**Pay attention to colors**: -- Use vibrant, complementary colors -- Add contrast (dark outlines on light shapes, light outlines on dark shapes) -- Consider the overall composition - -**For complex shapes** (hearts, snowflakes, etc.): -- Use combinations of polygons and ellipses -- Calculate points carefully for symmetry -- Add details (a heart can have a highlight curve, snowflakes have intricate branches) - -Be creative and detailed! A good Slack GIF should look polished, not like placeholder graphics. - -## Available Utilities - -### GIFBuilder (`core.gif_builder`) -Assembles frames and optimizes for Slack: -```python -builder = GIFBuilder(width=128, height=128, fps=10) -builder.add_frame(frame) # Add PIL Image -builder.add_frames(frames) # Add list of frames -builder.save('out.gif', num_colors=48, optimize_for_emoji=True, remove_duplicates=True) -``` - -### Validators (`core.validators`) -Check if GIF meets Slack requirements: -```python -from core.validators import validate_gif, is_slack_ready - -# Detailed validation -passes, info = validate_gif('my.gif', is_emoji=True, verbose=True) - -# Quick check -if is_slack_ready('my.gif'): - print("Ready!") -``` - -### Easing Functions (`core.easing`) -Smooth motion instead of linear: -```python -from core.easing import interpolate - -# Progress from 0.0 to 1.0 -t = i / (num_frames - 1) - -# Apply easing -y = interpolate(start=0, end=400, t=t, easing='ease_out') - -# Available: linear, ease_in, ease_out, ease_in_out, -# bounce_out, elastic_out, back_out -``` - -### Frame Helpers (`core.frame_composer`) -Convenience functions for common needs: -```python -from core.frame_composer import ( - create_blank_frame, # Solid color background - create_gradient_background, # Vertical gradient - draw_circle, # Helper for circles - draw_text, # Simple text rendering - draw_star # 5-pointed star -) -``` - -## Animation Concepts - -### Shake/Vibrate -Offset object position with oscillation: -- Use `math.sin()` or `math.cos()` with frame index -- Add small random variations for natural feel -- Apply to x and/or y position - -### Pulse/Heartbeat -Scale object size rhythmically: -- Use `math.sin(t * frequency * 2 * math.pi)` for smooth pulse -- For heartbeat: two quick pulses then pause (adjust sine wave) -- Scale between 0.8 and 1.2 of base size - -### Bounce -Object falls and bounces: -- Use `interpolate()` with `easing='bounce_out'` for landing -- Use `easing='ease_in'` for falling (accelerating) -- Apply gravity by increasing y velocity each frame - -### Spin/Rotate -Rotate object around center: -- PIL: `image.rotate(angle, resample=Image.BICUBIC)` -- For wobble: use sine wave for angle instead of linear - -### Fade In/Out -Gradually appear or disappear: -- Create RGBA image, adjust alpha channel -- Or use `Image.blend(image1, image2, alpha)` -- Fade in: alpha from 0 to 1 -- Fade out: alpha from 1 to 0 - -### Slide -Move object from off-screen to position: -- Start position: outside frame bounds -- End position: target location -- Use `interpolate()` with `easing='ease_out'` for smooth stop -- For overshoot: use `easing='back_out'` - -### Zoom -Scale and position for zoom effect: -- Zoom in: scale from 0.1 to 2.0, crop center -- Zoom out: scale from 2.0 to 1.0 -- Can add motion blur for drama (PIL filter) - -### Explode/Particle Burst -Create particles radiating outward: -- Generate particles with random angles and velocities -- Update each particle: `x += vx`, `y += vy` -- Add gravity: `vy += gravity_constant` -- Fade out particles over time (reduce alpha) - -## Optimization Strategies - -Only when asked to make the file size smaller, implement a few of the following methods: - -1. **Fewer frames** - Lower FPS (10 instead of 20) or shorter duration -2. **Fewer colors** - `num_colors=48` instead of 128 -3. **Smaller dimensions** - 128x128 instead of 480x480 -4. **Remove duplicates** - `remove_duplicates=True` in save() -5. **Emoji mode** - `optimize_for_emoji=True` auto-optimizes - -```python -# Maximum optimization for emoji -builder.save( - 'emoji.gif', - num_colors=48, - optimize_for_emoji=True, - remove_duplicates=True -) -``` - -## Philosophy - -This skill provides: -- **Knowledge**: Slack's requirements and animation concepts -- **Utilities**: GIFBuilder, validators, easing functions -- **Flexibility**: Create the animation logic using PIL primitives - -It does NOT provide: -- Rigid animation templates or pre-made functions -- Emoji font rendering (unreliable across platforms) -- A library of pre-packaged graphics built into the skill - -**Note on user uploads**: This skill doesn't include pre-built graphics, but if a user uploads an image, use PIL to load and work with it - interpret based on their request whether they want it used directly or just as inspiration. - -Be creative! Combine concepts (bouncing + rotating, pulsing + sliding, etc.) and use PIL's full capabilities. - -## Dependencies - -```bash -pip install pillow imageio numpy -``` diff --git a/packages/catalog/catalog/skills/web-artifacts-builder/SKILL.md b/packages/catalog/catalog/skills/web-artifacts-builder/SKILL.md deleted file mode 100644 index 8b39b19f..00000000 --- a/packages/catalog/catalog/skills/web-artifacts-builder/SKILL.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -name: web-artifacts-builder -description: Suite of tools for creating elaborate, multi-component claude.ai HTML artifacts using modern frontend web technologies (React, Tailwind CSS, shadcn/ui). Use for complex artifacts requiring state management, routing, or shadcn/ui components - not for simple single-file HTML/JSX artifacts. -license: Complete terms in LICENSE.txt ---- - -# Web Artifacts Builder - -To build powerful frontend claude.ai artifacts, follow these steps: -1. Initialize the frontend repo using `scripts/init-artifact.sh` -2. Develop your artifact by editing the generated code -3. Bundle all code into a single HTML file using `scripts/bundle-artifact.sh` -4. Display artifact to user -5. (Optional) Test the artifact - -**Stack**: React 18 + TypeScript + Vite + Parcel (bundling) + Tailwind CSS + shadcn/ui - -## Design & Style Guidelines - -VERY IMPORTANT: To avoid what is often referred to as "AI slop", avoid using excessive centered layouts, purple gradients, uniform rounded corners, and Inter font. - -## Quick Start - -### Step 1: Initialize Project - -Run the initialization script to create a new React project: -```bash -bash scripts/init-artifact.sh -cd -``` - -This creates a fully configured project with: -- ✅ React + TypeScript (via Vite) -- ✅ Tailwind CSS 3.4.1 with shadcn/ui theming system -- ✅ Path aliases (`@/`) configured -- ✅ 40+ shadcn/ui components pre-installed -- ✅ All Radix UI dependencies included -- ✅ Parcel configured for bundling (via .parcelrc) -- ✅ Node 18+ compatibility (auto-detects and pins Vite version) - -### Step 2: Develop Your Artifact - -To build the artifact, edit the generated files. See **Common Development Tasks** below for guidance. - -### Step 3: Bundle to Single HTML File - -To bundle the React app into a single HTML artifact: -```bash -bash scripts/bundle-artifact.sh -``` - -This creates `bundle.html` - a self-contained artifact with all JavaScript, CSS, and dependencies inlined. This file can be directly shared in Claude conversations as an artifact. - -**Requirements**: Your project must have an `index.html` in the root directory. - -**What the script does**: -- Installs bundling dependencies (parcel, @parcel/config-default, parcel-resolver-tspaths, html-inline) -- Creates `.parcelrc` config with path alias support -- Builds with Parcel (no source maps) -- Inlines all assets into single HTML using html-inline - -### Step 4: Share Artifact with User - -Finally, share the bundled HTML file in conversation with the user so they can view it as an artifact. - -### Step 5: Testing/Visualizing the Artifact (Optional) - -Note: This is a completely optional step. Only perform if necessary or requested. - -To test/visualize the artifact, use available tools (including other Skills or built-in tools like Playwright or Puppeteer). In general, avoid testing the artifact upfront as it adds latency between the request and when the finished artifact can be seen. Test later, after presenting the artifact, if requested or if issues arise. - -## Reference - -- **shadcn/ui components**: https://ui.shadcn.com/docs/components \ No newline at end of file diff --git a/packages/catalog/catalog/skills/webapp-testing/SKILL.md b/packages/catalog/catalog/skills/webapp-testing/SKILL.md deleted file mode 100644 index 8c873308..00000000 --- a/packages/catalog/catalog/skills/webapp-testing/SKILL.md +++ /dev/null @@ -1,96 +0,0 @@ ---- -name: webapp-testing -description: Toolkit for interacting with and testing local web applications using Playwright. Supports verifying frontend functionality, debugging UI behavior, capturing browser screenshots, and viewing browser logs. -license: Complete terms in LICENSE.txt ---- - -# Web Application Testing - -To test local web applications, write native Python Playwright scripts. - -**Helper Scripts Available**: -- `scripts/with_server.py` - Manages server lifecycle (supports multiple servers) - -**Always run scripts with `--help` first** to see usage. DO NOT read the source until you try running the script first and find that a customized solution is abslutely necessary. These scripts can be very large and thus pollute your context window. They exist to be called directly as black-box scripts rather than ingested into your context window. - -## Decision Tree: Choosing Your Approach - -``` -User task → Is it static HTML? - ├─ Yes → Read HTML file directly to identify selectors - │ ├─ Success → Write Playwright script using selectors - │ └─ Fails/Incomplete → Treat as dynamic (below) - │ - └─ No (dynamic webapp) → Is the server already running? - ├─ No → Run: python scripts/with_server.py --help - │ Then use the helper + write simplified Playwright script - │ - └─ Yes → Reconnaissance-then-action: - 1. Navigate and wait for networkidle - 2. Take screenshot or inspect DOM - 3. Identify selectors from rendered state - 4. Execute actions with discovered selectors -``` - -## Example: Using with_server.py - -To start a server, run `--help` first, then use the helper: - -**Single server:** -```bash -python scripts/with_server.py --server "npm run dev" --port 5173 -- python your_automation.py -``` - -**Multiple servers (e.g., backend + frontend):** -```bash -python scripts/with_server.py \ - --server "cd backend && python server.py" --port 3000 \ - --server "cd frontend && npm run dev" --port 5173 \ - -- python your_automation.py -``` - -To create an automation script, include only Playwright logic (servers are managed automatically): -```python -from playwright.sync_api import sync_playwright - -with sync_playwright() as p: - browser = p.chromium.launch(headless=True) # Always launch chromium in headless mode - page = browser.new_page() - page.goto('http://localhost:5173') # Server already running and ready - page.wait_for_load_state('networkidle') # CRITICAL: Wait for JS to execute - # ... your automation logic - browser.close() -``` - -## Reconnaissance-Then-Action Pattern - -1. **Inspect rendered DOM**: - ```python - page.screenshot(path='/tmp/inspect.png', full_page=True) - content = page.content() - page.locator('button').all() - ``` - -2. **Identify selectors** from inspection results - -3. **Execute actions** using discovered selectors - -## Common Pitfall - -❌ **Don't** inspect the DOM before waiting for `networkidle` on dynamic apps -✅ **Do** wait for `page.wait_for_load_state('networkidle')` before inspection - -## Best Practices - -- **Use bundled scripts as black boxes** - To accomplish a task, consider whether one of the scripts available in `scripts/` can help. These scripts handle common, complex workflows reliably without cluttering the context window. Use `--help` to see usage, then invoke directly. -- Use `sync_playwright()` for synchronous scripts -- Always close the browser when done -- Use descriptive selectors: `text=`, `role=`, CSS selectors, or IDs -- Add appropriate waits: `page.wait_for_selector()` or `page.wait_for_timeout()` - -## Reference Files - -- **packages/core/examples/** - Examples showing common patterns: - - `element_discovery.py` - Discovering buttons, links, and inputs on a page - - `static_html_automation.py` - Using file:// URLs for local HTML - - `console_logging.py` - Capturing console logs during automation diff --git a/packages/catalog/catalog/skills/xlsx/SKILL.md b/packages/catalog/catalog/skills/xlsx/SKILL.md deleted file mode 100644 index ba3bdc86..00000000 --- a/packages/catalog/catalog/skills/xlsx/SKILL.md +++ /dev/null @@ -1,292 +0,0 @@ ---- -name: xlsx -description: "Use this skill any time a spreadsheet file is the primary input or output. This means any task where the user wants to: open, read, edit, or fix an existing .xlsx, .xlsm, .csv, or .tsv file (e.g., adding columns, computing formulas, formatting, charting, cleaning messy data); create a new spreadsheet from scratch or from other data sources; or convert between tabular file formats. Trigger especially when the user references a spreadsheet file by name or path — even casually (like \"the xlsx in my downloads\") — and wants something done to it or produced from it. Also trigger for cleaning or restructuring messy tabular data files (malformed rows, misplaced headers, junk data) into proper spreadsheets. The deliverable must be a spreadsheet file. Do NOT trigger when the primary deliverable is a Word document, HTML report, standalone Python script, database pipeline, or Google Sheets API integration, even if tabular data is involved." -license: Proprietary. LICENSE.txt has complete terms ---- - -# Requirements for Outputs - -## All Excel files - -### Professional Font -- Use a consistent, professional font (e.g., Arial, Times New Roman) for all deliverables unless otherwise instructed by the user - -### Zero Formula Errors -- Every Excel model MUST be delivered with ZERO formula errors (#REF!, #DIV/0!, #VALUE!, #N/A, #NAME?) - -### Preserve Existing Templates (when updating templates) -- Study and EXACTLY match existing format, style, and conventions when modifying files -- Never impose standardized formatting on files with established patterns -- Existing template conventions ALWAYS override these guidelines - -## Financial models - -### Color Coding Standards -Unless otherwise stated by the user or existing template - -#### Industry-Standard Color Conventions -- **Blue text (RGB: 0,0,255)**: Hardcoded inputs, and numbers users will change for scenarios -- **Black text (RGB: 0,0,0)**: ALL formulas and calculations -- **Green text (RGB: 0,128,0)**: Links pulling from other worksheets within same workbook -- **Red text (RGB: 255,0,0)**: External links to other files -- **Yellow background (RGB: 255,255,0)**: Key assumptions needing attention or cells that need to be updated - -### Number Formatting Standards - -#### Required Format Rules -- **Years**: Format as text strings (e.g., "2024" not "2,024") -- **Currency**: Use $#,##0 format; ALWAYS specify units in headers ("Revenue ($mm)") -- **Zeros**: Use number formatting to make all zeros "-", including percentages (e.g., "$#,##0;($#,##0);-") -- **Percentages**: Default to 0.0% format (one decimal) -- **Multiples**: Format as 0.0x for valuation multiples (EV/EBITDA, P/E) -- **Negative numbers**: Use parentheses (123) not minus -123 - -### Formula Construction Rules - -#### Assumptions Placement -- Place ALL assumptions (growth rates, margins, multiples, etc.) in separate assumption cells -- Use cell references instead of hardcoded values in formulas -- Example: Use =B5*(1+$B$6) instead of =B5*1.05 - -#### Formula Error Prevention -- Verify all cell references are correct -- Check for off-by-one errors in ranges -- Ensure consistent formulas across all projection periods -- Test with edge cases (zero values, negative numbers) -- Verify no unintended circular references - -#### Documentation Requirements for Hardcodes -- Comment or in cells beside (if end of table). Format: "Source: [System/Document], [Date], [Specific Reference], [URL if applicable]" -- Examples: - - "Source: Company 10-K, FY2024, Page 45, Revenue Note, [SEC EDGAR URL]" - - "Source: Company 10-Q, Q2 2025, Exhibit 99.1, [SEC EDGAR URL]" - - "Source: Bloomberg Terminal, 8/15/2025, AAPL US Equity" - - "Source: FactSet, 8/20/2025, Consensus Estimates Screen" - -# XLSX creation, editing, and analysis - -## Overview - -A user may ask you to create, edit, or analyze the contents of an .xlsx file. You have different tools and workflows available for different tasks. - -## Important Requirements - -**LibreOffice Required for Formula Recalculation**: You can assume LibreOffice is installed for recalculating formula values using the `scripts/recalc.py` script. The script automatically configures LibreOffice on first run, including in sandboxed environments where Unix sockets are restricted (handled by `scripts/office/soffice.py`) - -## Reading and analyzing data - -### Data analysis with pandas -For data analysis, visualization, and basic operations, use **pandas** which provides powerful data manipulation capabilities: - -```python -import pandas as pd - -# Read Excel -df = pd.read_excel('file.xlsx') # Default: first sheet -all_sheets = pd.read_excel('file.xlsx', sheet_name=None) # All sheets as dict - -# Analyze -df.head() # Preview data -df.info() # Column info -df.describe() # Statistics - -# Write Excel -df.to_excel('output.xlsx', index=False) -``` - -## Excel File Workflows - -## CRITICAL: Use Formulas, Not Hardcoded Values - -**Always use Excel formulas instead of calculating values in Python and hardcoding them.** This ensures the spreadsheet remains dynamic and updateable. - -### ❌ WRONG - Hardcoding Calculated Values -```python -# Bad: Calculating in Python and hardcoding result -total = df['Sales'].sum() -sheet['B10'] = total # Hardcodes 5000 - -# Bad: Computing growth rate in Python -growth = (df.iloc[-1]['Revenue'] - df.iloc[0]['Revenue']) / df.iloc[0]['Revenue'] -sheet['C5'] = growth # Hardcodes 0.15 - -# Bad: Python calculation for average -avg = sum(values) / len(values) -sheet['D20'] = avg # Hardcodes 42.5 -``` - -### ✅ CORRECT - Using Excel Formulas -```python -# Good: Let Excel calculate the sum -sheet['B10'] = '=SUM(B2:B9)' - -# Good: Growth rate as Excel formula -sheet['C5'] = '=(C4-C2)/C2' - -# Good: Average using Excel function -sheet['D20'] = '=AVERAGE(D2:D19)' -``` - -This applies to ALL calculations - totals, percentages, ratios, differences, etc. The spreadsheet should be able to recalculate when source data changes. - -## Common Workflow -1. **Choose tool**: pandas for data, openpyxl for formulas/formatting -2. **Create/Load**: Create new workbook or load existing file -3. **Modify**: Add/edit data, formulas, and formatting -4. **Save**: Write to file -5. **Recalculate formulas (MANDATORY IF USING FORMULAS)**: Use the scripts/recalc.py script - ```bash - python scripts/recalc.py output.xlsx - ``` -6. **Verify and fix any errors**: - - The script returns JSON with error details - - If `status` is `errors_found`, check `error_summary` for specific error types and locations - - Fix the identified errors and recalculate again - - Common errors to fix: - - `#REF!`: Invalid cell references - - `#DIV/0!`: Division by zero - - `#VALUE!`: Wrong data type in formula - - `#NAME?`: Unrecognized formula name - -### Creating new Excel files - -```python -# Using openpyxl for formulas and formatting -from openpyxl import Workbook -from openpyxl.styles import Font, PatternFill, Alignment - -wb = Workbook() -sheet = wb.active - -# Add data -sheet['A1'] = 'Hello' -sheet['B1'] = 'World' -sheet.append(['Row', 'of', 'data']) - -# Add formula -sheet['B2'] = '=SUM(A1:A10)' - -# Formatting -sheet['A1'].font = Font(bold=True, color='FF0000') -sheet['A1'].fill = PatternFill('solid', start_color='FFFF00') -sheet['A1'].alignment = Alignment(horizontal='center') - -# Column width -sheet.column_dimensions['A'].width = 20 - -wb.save('output.xlsx') -``` - -### Editing existing Excel files - -```python -# Using openpyxl to preserve formulas and formatting -from openpyxl import load_workbook - -# Load existing file -wb = load_workbook('existing.xlsx') -sheet = wb.active # or wb['SheetName'] for specific sheet - -# Working with multiple sheets -for sheet_name in wb.sheetnames: - sheet = wb[sheet_name] - print(f"Sheet: {sheet_name}") - -# Modify cells -sheet['A1'] = 'New Value' -sheet.insert_rows(2) # Insert row at position 2 -sheet.delete_cols(3) # Delete column 3 - -# Add new sheet -new_sheet = wb.create_sheet('NewSheet') -new_sheet['A1'] = 'Data' - -wb.save('modified.xlsx') -``` - -## Recalculating formulas - -Excel files created or modified by openpyxl contain formulas as strings but not calculated values. Use the provided `scripts/recalc.py` script to recalculate formulas: - -```bash -python scripts/recalc.py [timeout_seconds] -``` - -Example: -```bash -python scripts/recalc.py output.xlsx 30 -``` - -The script: -- Automatically sets up LibreOffice macro on first run -- Recalculates all formulas in all sheets -- Scans ALL cells for Excel errors (#REF!, #DIV/0!, etc.) -- Returns JSON with detailed error locations and counts -- Works on both Linux and macOS - -## Formula Verification Checklist - -Quick checks to ensure formulas work correctly: - -### Essential Verification -- [ ] **Test 2-3 sample references**: Verify they pull correct values before building full model -- [ ] **Column mapping**: Confirm Excel columns match (e.g., column 64 = BL, not BK) -- [ ] **Row offset**: Remember Excel rows are 1-indexed (DataFrame row 5 = Excel row 6) - -### Common Pitfalls -- [ ] **NaN handling**: Check for null values with `pd.notna()` -- [ ] **Far-right columns**: FY data often in columns 50+ -- [ ] **Multiple matches**: Search all occurrences, not just first -- [ ] **Division by zero**: Check denominators before using `/` in formulas (#DIV/0!) -- [ ] **Wrong references**: Verify all cell references point to intended cells (#REF!) -- [ ] **Cross-sheet references**: Use correct format (Sheet1!A1) for linking sheets - -### Formula Testing Strategy -- [ ] **Start small**: Test formulas on 2-3 cells before applying broadly -- [ ] **Verify dependencies**: Check all cells referenced in formulas exist -- [ ] **Test edge cases**: Include zero, negative, and very large values - -### Interpreting scripts/recalc.py Output -The script returns JSON with error details: -```json -{ - "status": "success", // or "errors_found" - "total_errors": 0, // Total error count - "total_formulas": 42, // Number of formulas in file - "error_summary": { // Only present if errors found - "#REF!": { - "count": 2, - "locations": ["Sheet1!B5", "Sheet1!C10"] - } - } -} -``` - -## Best Practices - -### Library Selection -- **pandas**: Best for data analysis, bulk operations, and simple data export -- **openpyxl**: Best for complex formatting, formulas, and Excel-specific features - -### Working with openpyxl -- Cell indices are 1-based (row=1, column=1 refers to cell A1) -- Use `data_only=True` to read calculated values: `load_workbook('file.xlsx', data_only=True)` -- **Warning**: If opened with `data_only=True` and saved, formulas are replaced with values and permanently lost -- For large files: Use `read_only=True` for reading or `write_only=True` for writing -- Formulas are preserved but not evaluated - use scripts/recalc.py to update values - -### Working with pandas -- Specify data types to avoid inference issues: `pd.read_excel('file.xlsx', dtype={'id': str})` -- For large files, read specific columns: `pd.read_excel('file.xlsx', usecols=['A', 'C', 'E'])` -- Handle dates properly: `pd.read_excel('file.xlsx', parse_dates=['date_column'])` - -## Code Style Guidelines -**IMPORTANT**: When generating Python code for Excel operations: -- Write minimal, concise Python code without unnecessary comments -- Avoid verbose variable names and redundant operations -- Avoid unnecessary print statements - -**For Excel files themselves**: -- Add comments to cells with complex formulas or important assumptions -- Document data sources for hardcoded values -- Include notes for key calculations and model sections diff --git a/packages/cli/src/commands/install.ts b/packages/cli/src/commands/install.ts index f3c95ca2..aabf1fa8 100644 --- a/packages/cli/src/commands/install.ts +++ b/packages/cli/src/commands/install.ts @@ -57,16 +57,50 @@ async function installSkill(id: string, force: boolean) { await ensureSkillsDir(); const target = skillInstallPath(id); if (existsSync(target) && !force) return abortExists(target); - await mkdir(target, { recursive: true }); - for (const name of ["manifest.json", "SKILL.md"]) { - const content = await loadFile(`catalog/skills/${id}/${name}`); - await writeFile(path.join(target, name), content); + + const manifestRaw = await loadFile(`catalog/skills/${id}/manifest.json`); + // forgekit does not vendor bodies for externally-sourced skills — it links to + // them. When the body isn't in the catalog, fetch it from the skill's upstream + // source on demand (so `install` still works without re-hosting a copy). + let body = await tryLoadFile(`catalog/skills/${id}/SKILL.md`); + if (body === null) { + const manifest = JSON.parse(manifestRaw) as { + source?: { repo?: string; ref?: string; path?: string }; + homepage?: string; + }; + const url = rawSourceUrl(manifest.source); + if (!url) { + console.error(kleur.red(`✗ '${id}' is an external skill with no resolvable source`)); + if (manifest.homepage) console.error(kleur.dim(` get it from: ${manifest.homepage}`)); + process.exit(1); + } + const res = await fetch(url); + if (!res.ok) { + console.error(kleur.red(`✗ could not fetch '${id}' from source (GET ${url} → ${res.status})`)); + console.error(kleur.dim(` get it from: ${manifest.homepage ?? manifest.source?.repo}`)); + process.exit(1); + } + body = await res.text(); + console.log(kleur.dim(` ↗ external skill — fetched from source: ${url}`)); } + + await mkdir(target, { recursive: true }); + await writeFile(path.join(target, "manifest.json"), manifestRaw); + await writeFile(path.join(target, "SKILL.md"), body); const refContent = await tryLoadFile(`catalog/skills/${id}/REFERENCE.md`); if (refContent) await writeFile(path.join(target, "REFERENCE.md"), refContent); success("skill", id, target, "Claude Code picks this up on next session start."); } +/** Build a raw-content URL for a GitHub git source, or null if unresolvable. */ +function rawSourceUrl(source?: { repo?: string; ref?: string; path?: string }): string | null { + if (!source?.repo || !source.path) return null; + const ref = source.ref || "main"; + const gh = source.repo.match(/^https?:\/\/github\.com\/([^/]+)\/([^/]+?)(?:\.git)?\/?$/); + if (gh) return `https://raw.githubusercontent.com/${gh[1]}/${gh[2]}/${ref}/${source.path}`; + return null; +} + async function installAgent(id: string, force: boolean) { await ensureAgentsDir(); const target = agentInstallPath(id);