diff --git a/evaluations/canhelp.json b/evaluations/canhelp.json new file mode 100644 index 0000000..613c535 --- /dev/null +++ b/evaluations/canhelp.json @@ -0,0 +1,116 @@ +{ + "skill": "canhelp", + "description": "Evaluation cases for the canhelp skill. Tests whether agents correctly resolve canister names/IDs, fetch Candid interfaces, and present structured summaries grouped by query vs update methods.", + + "output_evals": [ + { + "name": "Lookup by canister ID", + "prompt": "What can canister ryjl3-tyaaa-aaaaa-aaaba-cai do?", + "expected_behaviors": [ + "Runs resolve-canister-id.sh with the provided principal", + "Runs fetch-candid.sh with the resolved canister ID", + "Reads the downloaded .did file", + "Groups methods into Query and Update sections", + "Sorts methods alphabetically within each group", + "Lists key custom types (records, variants) defined in the interface" + ] + }, + { + "name": "Lookup by human-readable name", + "prompt": "Show me the interface for the NNS governance canister", + "expected_behaviors": [ + "Runs resolve-canister-id.sh with the name (not a hardcoded canister ID)", + "Displays the resolved canister ID before proceeding", + "Runs fetch-candid.sh with the resolved ID", + "Presents a structured summary with Query and Update method groups" + ] + }, + { + "name": "Ambiguous name with multiple results", + "prompt": "What does the SNS canister do?", + "expected_behaviors": [ + "Runs resolve-canister-id.sh with the name", + "When multiple results are returned, presents the list to the user", + "Asks the user to pick one before continuing", + "Does NOT arbitrarily pick one and proceed without asking" + ] + }, + { + "name": "Large interface summarization", + "prompt": "Show me what the NNS governance canister can do", + "expected_behaviors": [ + "Resolves 'NNS governance' to a canister ID", + "Fetches and reads the Candid interface", + "Groups methods by query vs update", + "Sorts methods alphabetically within each group", + "Shows method signatures with argument types and return types", + "Lists key types like Neuron, ProposalInfo, ManageNeuronRequest" + ] + }, + { + "name": "Output format structure", + "prompt": "Tell me about canister rrkah-fqaaa-aaaaa-aaaaq-cai", + "expected_behaviors": [ + "Starts the summary with 'Canister '", + "Has a Query methods section", + "Has an Update methods section", + "Has a Types section listing custom record/variant types", + "Method signatures include argument names/types and return types", + "Includes brief one-line descriptions where inferable from method names" + ] + }, + { + "name": "Empty Candid interface", + "prompt": "What methods does canister uf6dk-hyaaa-aaaaq-qaaaq-cai expose?", + "expected_behaviors": [ + "Runs fetch-candid.sh with the canister ID", + "If the Candid interface is empty, explains that the canister is not exposing its Candid interface in the wasm metadata", + "Does NOT fabricate or hallucinate methods" + ] + }, + { + "name": "Adversarial: local canister", + "prompt": "Show me the interface for my local canister bkyz2-fmaaa-aaaaa-qaaaq-cai", + "expected_behaviors": [ + "Attempts to resolve and fetch the canister", + "If the fetch fails, suggests verifying the canister ID and that icp is installed", + "Does NOT hallucinate a Candid interface" + ] + }, + { + "name": "Adversarial: invalid input", + "prompt": "Show me the canister interface for notavalidid", + "expected_behaviors": [ + "Runs resolve-canister-id.sh with the input", + "If no results are found, clearly communicates the failure", + "Does NOT fabricate a canister ID or interface" + ] + } + ], + + "trigger_evals": { + "description": "Queries to test whether the skill activates correctly. 'should_trigger' queries should cause the skill to load; 'should_not_trigger' queries should NOT activate this skill.", + "should_trigger": [ + "What can canister ryjl3-tyaaa-aaaaa-aaaba-cai do?", + "Show me the interface for the NNS ledger", + "What methods does the ICP ledger canister expose?", + "canhelp ckBTC minter", + "Describe the API of canister rrkah-fqaaa-aaaaa-aaaaq-cai", + "What's the Candid interface for the cycles minting canister?", + "How do I call the Internet Identity canister? What methods are available?", + "List the methods on the SNS governance canister" + ], + "should_not_trigger": [ + "Deploy my canister to mainnet", + "How do I write a Candid interface file?", + "Set up a new ICP project with Rust", + "How does inter-canister communication work?", + "Explain how the NNS governance works", + "Write a Motoko function that transfers ICP", + "How do I upgrade my canister without losing state?", + "What is the Internet Computer?", + "Add ICRC-1 support to my token canister", + "How do I test my canister locally?" + ] + } +} \ No newline at end of file diff --git a/skills/canhelp/SKILL.md b/skills/canhelp/SKILL.md new file mode 100644 index 0000000..c655ee8 --- /dev/null +++ b/skills/canhelp/SKILL.md @@ -0,0 +1,54 @@ +--- +name: canhelp +description: Display a human-readable summary of a canister's interface given its mainnet canister ID or a human-readable name. Like --help but for canisters. Only for mainnet canisters — for local canisters, read the generated .did file in your project directly. +license: Apache-2.0 +compatibility: "icp-cli >= 0.1.0" +allowed-tools: Bash(./scripts/resolve-canister-id.sh *), Bash(./scripts/fetch-candid.sh *), Read, Grep, Glob +argument-hint: +metadata: + title: Canister Help + category: Infrastructure +--- + +Given a canister ID or name in `$ARGUMENTS`, fetch and summarize its Candid interface. + +## Steps + +1. Resolve the canister ID by running the resolve script from the skill's base directory: + ```bash + ./scripts/resolve-canister-id.sh "$ARGUMENTS" + ``` + If `$ARGUMENTS` is already a valid principal, the script echoes it back. + Otherwise, it queries the IC Dashboard API and outputs matches as ` ` (one per line). + - If there is a single result, clearly display the resolved canister ID and use it directly. + - If there are multiple results, present the list to the user and ask them to pick one before continuing. + +2. Fetch the Candid interface using the resolved canister ID: + ```bash + ./scripts/fetch-candid.sh + ``` + The script outputs the path to the downloaded `.did` file. + +3. Read the file using the `Read` tool. + +4. Present the output as a readable summary with the following structure: + + **Canister ``** + + **Query methods:** + - `method_name(arg1: type1, arg2: type2) → return_type` — one-line description if inferable from the name + + **Update methods:** + - `method_name(arg1: type1) → return_type` + + **Types:** + - List any custom record/variant types defined in the interface, with their fields + +## Guidelines + +- Group methods by query vs update +- Sort methods alphabetically within each group +- For complex nested types, show the top-level structure and note nesting +- If the candid is very large (>100 methods), show a summary count and list only the most important-looking methods, offering to show the full list on request +- If the fetch succeeds, but the Candid interface is empty,explain that the canister is not exposing its Candid interface in the wasm metadata +- If the fetch fails, suggest the user verify the canister ID and that `icp` is installed diff --git a/skills/canhelp/scripts/fetch-candid.sh b/skills/canhelp/scripts/fetch-candid.sh new file mode 100755 index 0000000..435b26d --- /dev/null +++ b/skills/canhelp/scripts/fetch-candid.sh @@ -0,0 +1,13 @@ +#!/bin/bash +set -euo pipefail + +if ! command -v icp &>/dev/null; then + echo "Error: 'icp' CLI not found. Install it from https://dfinity.github.io/icp-cli" >&2 + exit 1 +fi + +CANISTER_ID="${1:?Usage: fetch-candid.sh }" +OUT="/tmp/candid_${CANISTER_ID}.did" + +icp canister metadata "$CANISTER_ID" candid:service --network ic > "$OUT" +echo "$OUT" diff --git a/skills/canhelp/scripts/resolve-canister-id.sh b/skills/canhelp/scripts/resolve-canister-id.sh new file mode 100755 index 0000000..2661f73 --- /dev/null +++ b/skills/canhelp/scripts/resolve-canister-id.sh @@ -0,0 +1,27 @@ +#!/bin/bash +set -euo pipefail + +INPUT="${1:?Usage: resolve-canister-id.sh }" + +# Principal: Base32(CRC32 · blob) grouped into 5-char chunks separated by dashes. +# Each group is exactly 5 lowercase alphanumeric chars, except the last which is 1-5. +# Max 63 chars (29-byte blob → 53 base32 chars + 10 dashes). Must have at least 2 groups. +if [[ "$INPUT" =~ ^[a-z2-7]{5}(-[a-z2-7]{5})*(-[a-z2-7]{1,5})$ ]]; then + echo "$INPUT" + exit 0 +fi + +# Otherwise, query IC Dashboard API for name-based lookup +QUERY=$(python3 -c "import urllib.parse, sys; print(urllib.parse.quote(sys.argv[1]))" "$INPUT") +RESPONSE=$(curl -sf "https://ic-api.internetcomputer.org/api/v4/canisters?format=json&has_name=true&query=${QUERY}&limit=50") + +python3 -c " +import sys, json +data = json.load(sys.stdin) +entries = data.get('data', []) +if not entries: + print('Error: no canister found matching \"$INPUT\"', file=sys.stderr) + sys.exit(1) +for e in entries: + print(f\"{e['canister_id']} {e.get('name', 'N/A')}\") +" <<< "$RESPONSE"