diff --git a/.cursor/agents/consent-triage-inspector.md b/.cursor/agents/consent-triage-inspector.md new file mode 100644 index 00000000..c123b951 --- /dev/null +++ b/.cursor/agents/consent-triage-inspector.md @@ -0,0 +1,39 @@ +--- +name: consent-triage-inspector +description: >- + Inspects how trackers load on a live website using Chrome DevTools MCP. + Checks ad infrastructure, consent state, and airgap classification. +model: fast +readonly: false +is_background: true +--- + +You are a web inspector specialist. Your job is to analyze how trackers +load on live websites using Chrome DevTools MCP tools. + +## Setup + +Fetch the **`consent-inspect-site`** MCP prompt from the `transcend-consent` +server for your full investigation methodology. It contains: + +- URL override parameters for debug mode +- Consent state verification steps +- JS evaluation snippets for performance entries, HTML search, ad + infrastructure, window globals, and airgap classification +- Output format for tracker findings and site summary + +Follow every step in the prompt. Use Chrome DevTools MCP tools (`navigate`, +`evaluate`) to execute the JS snippets on the live site. + +## Input + +You will receive: + +- A target site URL with regime override parameters +- A list of tracker domains to look for + +## Important + +The bundle name (e.g. "acme-platform") may be a platform provider, not +the site with actual trackers. If the primary domain is a corporate landing +page without ad trackers, find a real client site from links on the homepage. diff --git a/.cursor/agents/consent-triage-researcher.md b/.cursor/agents/consent-triage-researcher.md new file mode 100644 index 00000000..da446097 --- /dev/null +++ b/.cursor/agents/consent-triage-researcher.md @@ -0,0 +1,38 @@ +--- +name: consent-triage-researcher +description: >- + Researches trackers, cookies, and data flows for consent classification. + Identifies companies, fetches privacy policies, checks CMP databases, + and determines the correct consent purpose. +model: fast +readonly: false +is_background: true +--- + +You are a privacy research specialist. Your job is to research trackers and +data flows to determine their correct consent classification. + +## Setup + +Fetch the **`consent-research-tracker`** MCP prompt from the `transcend-consent` +server for your full research methodology. It contains: + +- Company identification steps +- Privacy policy lookup guidance +- CMP database URLs (CookieDatabase.org, Ghostery, etc.) +- Essential vs non-essential determination criteria +- Junk indicators and confidence levels +- Output JSON format + +Follow every step in the prompt for each tracker you're assigned. + +## Input + +You will receive: + +- A table of trackers/data flows with columns: + id, domain/name, type, auto-service, auto-purposes, occurrences +- The customer's available tracking purposes (from `consent_list_purposes`) +- Which purposes are actively used in which regimes + +Only recommend purposes from the provided list. diff --git a/.cursor/skills/consent-triage/SKILL.md b/.cursor/skills/consent-triage/SKILL.md new file mode 100644 index 00000000..ff662f73 --- /dev/null +++ b/.cursor/skills/consent-triage/SKILL.md @@ -0,0 +1,27 @@ +--- +name: consent-triage +description: >- + Triage cookies and data flows in Transcend Consent Manager. Use when the user + mentions cookie triage, data flow triage, consent triage, classify trackers, + or consent manager cleanup. +--- + +# Consent Triage + +Invoke the **`consent-triage`** MCP prompt for the full workflow. It covers +setup, batch fetching, research, review, classification push, and looping. + +For Phase 3 (research), spawn subagents in parallel via the Task tool: + +- **consent-triage-researcher** agents (2+): split items into groups of 3-5. + Pass each group as a table with the customer's available purposes. +- **consent-triage-inspector** agent (1): provide all tracker domains + the + site URL with regime override parameters. + +Additional MCP prompts for subagent methodology: + +- **`consent-research-tracker`** -- research methodology for a single tracker +- **`consent-inspect-site`** -- live site investigation via browser DevTools + +MCP resources (live from docs.transcend.io) provide reference material on +tracking purposes, triage workflow, debugging, and telemetry. diff --git a/packages/mcp/mcp-server-consent/src/cli.ts b/packages/mcp/mcp-server-consent/src/cli.ts index 6ec914fe..cb99e9c3 100644 --- a/packages/mcp/mcp-server-consent/src/cli.ts +++ b/packages/mcp/mcp-server-consent/src/cli.ts @@ -1,10 +1,14 @@ #!/usr/bin/env node import { createMCPServer } from '@transcend-io/mcp-server-core'; +import { getConsentPrompts } from './prompts/index.js'; +import { getConsentResources } from './resources/index.js'; import { getConsentTools } from './tools/index.js'; createMCPServer({ name: 'transcend-mcp-consent', version: '1.0.0', getTools: getConsentTools, + getPrompts: getConsentPrompts, + getResources: getConsentResources, }); diff --git a/packages/mcp/mcp-server-consent/src/index.ts b/packages/mcp/mcp-server-consent/src/index.ts index bfbc85e8..f2e391dc 100644 --- a/packages/mcp/mcp-server-consent/src/index.ts +++ b/packages/mcp/mcp-server-consent/src/index.ts @@ -1,4 +1,6 @@ export { getConsentTools } from './tools/index.js'; +export { getConsentPrompts } from './prompts/index.js'; +export { getConsentResources } from './resources/index.js'; export { resolveAirgapBundleId } from './resolveAirgapBundleId.js'; export { GetPreferencesSchema, type GetPreferencesInput } from './tools/consent_get_preferences.js'; diff --git a/packages/mcp/mcp-server-consent/src/prompts/consent_inspect_site.ts b/packages/mcp/mcp-server-consent/src/prompts/consent_inspect_site.ts new file mode 100644 index 00000000..f093bbbf --- /dev/null +++ b/packages/mcp/mcp-server-consent/src/prompts/consent_inspect_site.ts @@ -0,0 +1,287 @@ +import type { PromptDefinition } from '@transcend-io/mcp-server-core'; + +export const consentInspectSitePrompt: PromptDefinition = { + name: 'consent-inspect-site', + description: + 'Live site investigation methodology for consent triage using browser DevTools. ' + + 'Covers regime overrides, consent verification, performance entries, HTML search, ' + + 'ad infrastructure checks, and airgap classification queries.', + arguments: [ + { + name: 'site_url', + description: 'The site to investigate (e.g. "https://example.com")', + required: true, + }, + { + name: 'tracker_domains', + description: + 'Comma-separated tracker domains to look for (e.g. "doubleclick.net,google-analytics.com")', + required: true, + }, + { + name: 'regime', + description: + 'The most permissive regime name for URL override (e.g. "us"). ' + + 'Choose the regime with fewest opted-out purposes so trackers fire.', + required: false, + }, + ], + handler: (args) => { + const siteUrl = args.site_url || '(not specified)'; + const trackerDomains = args.tracker_domains || '(not specified)'; + const regime = args.regime || 'us'; + const domainList = trackerDomains + .split(',') + .map((d) => d.trim()) + .filter(Boolean); + const domainArrayLiteral = JSON.stringify(domainList); + + return [ + { + role: 'user', + content: { + type: 'text', + text: + `Investigate how these trackers load on ${siteUrl}: ${trackerDomains}. ` + + `Use regime "${regime}" for debug overrides.`, + }, + }, + { + role: 'assistant', + content: { + type: 'text', + text: `## Live Site Investigation + +### Important: Platform vs Client Sites + +The bundle name (e.g. "acme-platform") may be a platform provider, not the actual +site with trackers. If the main domain is a corporate page without ad trackers, find a +real client site from links on the homepage and use that instead. + +### Step 1: Navigate with Debug Overrides + +Load the page with hash parameters to control consent behavior: + +\`\`\` +${siteUrl}/#tcm-regime=${regime}&tcm-prompt=Hidden&log=* +\`\`\` + +| Parameter | Purpose | +|-----------|---------| +| \`tcm-regime=${regime}\` | Force the most permissive privacy regime | +| \`tcm-prompt=Hidden\` | Suppress the consent banner | +| \`log=*\` | Enable verbose airgap debug logging | + +Reference: https://docs.transcend.io/docs/articles/consent-management/reference/debugging-and-testing + +### Step 2: Verify Consent State + +\`\`\`javascript +(() => { + if (!window.airgap) return 'airgap not loaded'; + return JSON.stringify({ + regimes: airgap.getRegimes(), + purposes: airgap.getConsent().purposes, + regimePurposes: airgap.getRegimePurposes(), + }, null, 2); +})() +\`\`\` + +All purposes should be \`true\` or \`"Auto"\`. If not, opt in manually: + +\`\`\`javascript +(() => { + airgap.optIn(Object.fromEntries( + airgap.getRegimePurposes().map(p => [p, true]) + )); + return JSON.stringify(airgap.getConsent().purposes); +})() +\`\`\` + +### Step 3: Check Performance Entries for Tracker Domains + +\`\`\`javascript +(() => { + const domains = ${domainArrayLiteral}; + const entries = performance.getEntriesByType('resource'); + const results = {}; + for (const d of domains) { + results[d] = entries.filter(e => e.name.includes(d)).map(e => ({ + url: e.name, + initiator: e.initiatorType, + duration: Math.round(e.duration), + size: e.transferSize, + })); + } + return JSON.stringify(results, null, 2); +})() +\`\`\` + +### Step 4: Search Page HTML + +\`\`\`javascript +(() => { + const terms = ${domainArrayLiteral}; + const html = document.documentElement.outerHTML; + const results = {}; + for (const term of terms) { + const matches = []; + let i = 0; + while ((i = html.indexOf(term, i)) !== -1) { + matches.push(html.substring(Math.max(0, i - 100), Math.min(html.length, i + 100))); + i += term.length; + if (matches.length > 3) break; + } + results[term] = { count: matches.length, samples: matches }; + } + return JSON.stringify(results, null, 2); +})() +\`\`\` + +### Step 5: Identify Ad Infrastructure + +\`\`\`javascript +(() => { + const scripts = Array.from(document.querySelectorAll('script[src]')).map(s => s.src); + // Non-exhaustive list of common ad tech scripts; look for any third-party ad scripts beyond these + const adScripts = scripts.filter(s => + s.includes('prebid') || s.includes('gpt.js') || s.includes('googletag') || + s.includes('taboola') || s.includes('criteo') || s.includes('amazon-adsystem') || + s.includes('adsbygoogle') || s.includes('doubleclick') + ); + const adDivs = Array.from(document.querySelectorAll( + '[data-prebid], [data-ad], [data-ad-slot], [data-ad-unit], [id*="ad-slot"], [id*="ad-unit"], [class*="ad-container"]' + )); + const adSlots = adDivs.map(d => ({ + tag: d.tagName, id: d.id, class: d.className?.substring(0, 60), + dataSizes: d.getAttribute('data-sizes'), + dataPrebid: d.getAttribute('data-prebid'), + dataTargeting: d.getAttribute('data-targeting'), + })); + const iframes = Array.from(document.querySelectorAll('iframe')); + const adIframes = iframes.filter(f => f.title?.includes('ad') || f.id?.includes('ad')); + return JSON.stringify({ + adScripts, + adSlotCount: adSlots.length, + adSlotSamples: adSlots.slice(0, 5), + adIframes: adIframes.map(f => ({ + id: f.id, src: f.src?.substring(0, 150), title: f.title, + })), + }, null, 2); +})() +\`\`\` + +### Step 6: Check Inline Initialization Scripts + +\`\`\`javascript +(() => { + const scripts = Array.from(document.querySelectorAll('script:not([src])')); + const adInline = scripts.filter(s => + s.textContent.includes('prebid') || s.textContent.includes('googletag') || + s.textContent.includes('adsbygoogle') || s.textContent.includes('criteo') || + s.textContent.includes('taboola') + ); + return JSON.stringify(adInline.map(s => ({ + parent: s.parentElement?.tagName, + preview: s.textContent.substring(0, 500), + })), null, 2); +})() +\`\`\` + +### Step 7: Check Window Globals and Ad Config + +\`\`\`javascript +(() => { + const knownAdGlobals = ['pbjs', 'googletag', '__tcfapi', '__gpp', '__cmp', + 'adsbygoogle', '_taboola', 'criteo_q', 'apstag']; + const adGlobals = Object.keys(window).filter(k => + knownAdGlobals.some(g => k.toLowerCase().includes(g.toLowerCase())) + ); + const configs = {}; + for (const g of adGlobals) { + try { + const val = window[g]; + if (val && typeof val === 'object') { + configs[g] = JSON.stringify(val).substring(0, 500); + } + } catch {} + } + return JSON.stringify({ adGlobals, configs }, null, 2); +})() +\`\`\` + +### Step 8: Check Airgap Classification Per Tracker + +\`\`\`javascript +(async () => { + if (!window.airgap) return 'airgap not loaded'; + const domains = ${domainArrayLiteral}; + const results = {}; + for (const d of domains) { + try { + const purposes = await airgap.getPurposes('https://' + d + '/'); + const allowed = await airgap.isAllowed('https://' + d + '/'); + results[d] = { purposes, allowed }; + } catch (e) { results[d] = { error: e.message }; } + } + return JSON.stringify(results, null, 2); +})() +\`\`\` + +### Step 9: Read Console Logs + +Read the browser console output. The \`log=*\` override makes airgap emit detailed +allow/block decisions for every request, including purpose lookups. Search these logs +for each tracker domain to see how airgap classifies and handles it. + +## Useful Console Commands Reference + +| Command | Purpose | +|---------|---------| +| \`airgap.getConsent().purposes\` | Current consent state per purpose | +| \`airgap.getRegimes()\` | Active regime(s) for this session | +| \`airgap.getRegimePurposes()\` | Purposes regulated under current regime | +| \`await airgap.getPurposes('{url}')\` | What purposes a URL is classified under | +| \`await airgap.isAllowed('{url}')\` | Whether a URL is currently allowed | +| \`await airgap.isCookieAllowed({name:'{name}'})\` | Whether a cookie is allowed | +| \`await airgap.getCookiePurposes({name:'{name}'})\` | Cookie's assigned purposes | +| \`airgap.export().requests\` | Quarantined requests | +| \`airgap.export().cookies\` | Quarantined cookies | +| \`airgap.version\` | Current airgap version | + +## Output Format + +For each tracker return: + +\`\`\`json +{ + "domain": "", + "found_on_page": true, + "loading_method": "direct_script|tag_manager|iframe|dynamic|not_found", + "loaded_by": "", + "in_main_document": true, + "airgap_purposes": ["Advertising"], + "airgap_allowed": true, + "ad_infrastructure": " GPT>", + "related_config": "", + "notes": "" +} +\`\`\` + +Also return a site summary: + +\`\`\`json +{ + "site_investigated": "", + "ad_stack": " Google Publisher Tags>", + "consent_manager": "Transcend CMP", + "total_ad_slots": "", + "total_scripts": "", + "total_iframes": "" +} +\`\`\``, + }, + }, + ]; + }, +}; diff --git a/packages/mcp/mcp-server-consent/src/prompts/consent_research_tracker.ts b/packages/mcp/mcp-server-consent/src/prompts/consent_research_tracker.ts new file mode 100644 index 00000000..459e6f6f --- /dev/null +++ b/packages/mcp/mcp-server-consent/src/prompts/consent_research_tracker.ts @@ -0,0 +1,147 @@ +import type { PromptDefinition } from '@transcend-io/mcp-server-core'; + +export const consentResearchTrackerPrompt: PromptDefinition = { + name: 'consent-research-tracker', + description: + 'Research methodology for classifying cookies and data flows. ' + + 'Covers company identification, privacy policy lookup, CMP database checks, ' + + 'and structured evidence gathering for consent purpose assignment.', + arguments: [ + { + name: 'domain', + description: 'The tracker domain or cookie name to research (e.g. "doubleclick.net", "_ga")', + required: true, + }, + { + name: 'type', + description: 'Whether this is a "cookie" or "data_flow" (default: "data_flow")', + required: false, + }, + { + name: 'available_purposes', + description: + "Comma-separated list of the customer's configured purposes " + + '(e.g. "Essential,Functional,Analytics,Advertising,SaleOfInfo"). ' + + 'Only recommend purposes from this list.', + required: false, + }, + ], + handler: (args) => { + const domain = args.domain || '(not specified)'; + const type = args.type || 'data_flow'; + const purposes = args.available_purposes || '(fetch from consent_list_purposes)'; + + return [ + { + role: 'user', + content: { + type: 'text', + text: `Research the ${type} "${domain}" to determine its consent classification. Available purposes: ${purposes}`, + }, + }, + { + role: 'assistant', + content: { + type: 'text', + text: `## Research Methodology + +For each tracker or cookie, follow these steps in order: + +### Step 1: Company Identification + +Search the root domain (strip subdomains for broader matches) to find the operating company. +Check for recent acquisitions or rebrands — ad tech companies frequently change ownership. + +### Step 2: First-Party Privacy Docs + +Find and read the company's privacy policy and/or cookie policy. Look for: +- How they classify their own tracking +- What data they collect +- Stated purposes for data processing +- Data retention periods + +### Step 3: Service Description + +Understand the business model: +- Ad tech (DSP, SSP, ad exchange, header bidding)? +- Analytics (pageview counters, session recording, A/B testing)? +- CMP (consent management platform)? +- CDN / performance (content delivery, image optimization)? +- Functional (chat, support, preferences, authentication)? +- Data broker (selling/sharing data with third parties)? + +### Step 4: CMP Database Lookups + +Search these databases for existing classifications: + +| Database | URL | Use For | +|----------|-----|---------| +| CookieDatabase.org | https://cookiedatabase.org/ | Cookie name lookup | +| better.fyi trackers | https://better.fyi/trackers/ | Domain-to-company lookup | +| Ghostery TrackerDB | https://www.ghostery.com/trackerdb | Tracker classification | +| Cookiepedia | https://cookiepedia.co.uk/ | Cookie purpose database | +| BuiltWith | https://builtwith.com/ | Site technology stack | +| urlscan.io | https://urlscan.io/ | Domain/infrastructure analysis | + +### Step 5: Third-Party Cookie Policies + +Find other companies' published cookie policies that classify this same tracker/service. +Multiple independent classifications strengthen confidence. + +### Step 6: Essential vs Non-Essential Determination + +Based on all evidence: +- Would the site break without this tracker? (Essential) +- Is it required for core functionality like auth, security, or the CMP itself? (Essential) +- Does it enhance features without being required? (Functional) +- Does it measure usage or behavior? (Analytics) +- Does it serve, target, or retarget ads? (Advertising) +- Is data sold or shared with third parties for their own use? (SaleOfInfo) + +Items can have multiple purposes (e.g. ["Advertising", "Analytics"] for an ad platform +that also tracks impressions). + +IMPORTANT: Only recommend purposes from the customer's configured list. If research +suggests a purpose that doesn't exist for this customer, flag it and suggest the closest +available match. + +## Junk Indicators + +Mark as JUNK (not a real tracker to classify) if: +- From a browser extension (Grammarly, LastPass, ad blockers injecting scripts) +- Malware or unwanted injection not placed by the site operator +- A development/testing artifact (localhost, staging URLs) +- A subdomain variant of an already-approved regex rule + +## Confidence Levels + +- **High**: First-party docs confirm, OR multiple CMPs agree, OR well-known tracker +- **Medium**: Some evidence but no definitive first-party documentation +- **Low**: No docs found, best-guess only — flag for manual review + +## Output Format + +Return a structured finding for each item: + +\`\`\`json +{ + "domain": "", + "company_name": "", + "company_description": "", + "service_url": "", + "specific_product": "", + "recommended_purposes": ["Advertising"], + "confidence": "High", + "is_junk": false, + "evidence_summary": "<2-3 sentence summary with key facts>", + "sources": ["", ""], + "suggested_description": "", + "first_party_privacy_url": "", + "other_cmps_classify_as": "" +} +\`\`\``, + }, + }, + ]; + }, +}; diff --git a/packages/mcp/mcp-server-consent/src/prompts/consent_triage.ts b/packages/mcp/mcp-server-consent/src/prompts/consent_triage.ts new file mode 100644 index 00000000..48d7ca4a --- /dev/null +++ b/packages/mcp/mcp-server-consent/src/prompts/consent_triage.ts @@ -0,0 +1,151 @@ +import type { PromptDefinition } from '@transcend-io/mcp-server-core'; + +export const consentTriagePrompt: PromptDefinition = { + name: 'consent-triage', + description: + 'Systematically triage cookies and data flows discovered by Transcend consent telemetry. ' + + 'Walks through setup, batch fetching, research, review, and classification push.', + arguments: [ + { + name: 'triage_type', + description: 'What to triage: "cookies", "data_flows", or "both" (default: "both")', + required: false, + }, + { + name: 'batch_size', + description: 'Number of items per batch (default: 10)', + required: false, + }, + ], + handler: (args) => { + const triageType = args.triage_type || 'both'; + const batchSize = args.batch_size || '10'; + + return [ + { + role: 'user', + content: { + type: 'text', + text: `Triage ${triageType === 'both' ? 'cookies and data flows' : triageType} in batches of ${batchSize}, sorted by highest traffic first.`, + }, + }, + { + role: 'assistant', + content: { + type: 'text', + text: `I'll walk through the consent triage workflow. Here's how it works: + +## Phase 1: Setup + +Gather the customer's consent configuration by calling these tools in parallel: + +1. \`consent_list_airgap_bundles\` — get the consent manager info (bundle ID is auto-resolved) +2. \`consent_get_triage_stats\` — backlog overview +3. \`consent_list_purposes\` — the customer's configured tracking purposes +4. \`consent_list_regimes\` — consent experiences with regions, purposes, and opt-out defaults + +CRITICAL: Each customer configures their own purposes. Do NOT assume defaults exist. Only use purposes returned by \`consent_list_purposes\` for classification. + +From the regimes data, determine: +- Which purposes can be opted out of per experience +- Which purposes default to opted-out +- The most permissive regime (fewest opted-out purposes) — needed for live site investigation + +Present the customer's setup: + +| Purpose | Slug | Used in Regimes | +|---------|------|-----------------| +| (from API) | (from API) | (cross-ref with regimes) | + +Present triage stats: + +| Metric | Cookies | Data Flows | +|--------|---------|------------| +| Needs Review | X | Y | +| Live (Approved) | X | Y | +| Junk | X | Y | + +## Phase 2: Fetch Batch + +Fetch the next batch of items needing review, sorted by highest traffic: + +${[ + triageType === 'cookies' || triageType === 'both' + ? '- `consent_list_cookies { status: "NEEDS_REVIEW", limit: ' + + batchSize + + ', order_field: "occurrences", order_direction: "DESC" }`' + : '', + triageType === 'data_flows' || triageType === 'both' + ? '- `consent_list_data_flows { status: "NEEDS_REVIEW", limit: ' + + batchSize + + ', order_field: "occurrences", order_direction: "DESC" }`' + : '', +] + .filter(Boolean) + .join('\n')} + +Present in this table format: + +| # | Name/Domain | Type | Service | Auto-Purposes | Occurrences | Sites | First Seen | +|---|-------------|------|---------|---------------|-------------|-------|------------| + +## Phase 3: Research + +For each item in the batch, research its purpose using web search and CMP databases. +Use the \`consent-research-tracker\` prompt for detailed research methodology. +If browser/DevTools access is available, use the \`consent-inspect-site\` prompt for live site investigation. + +Split items into parallel research groups of 3–5 items each for efficiency. + +## Phase 4: Present Findings + +For each researched item, present: + +### {name/domain} +| Field | Value | +|-------|-------| +| Type | Cookie / Data Flow (HOST/REGEX) | +| Domain | \`example.com\` | +| Service | Service Name (or "Unknown") | +| Current Purposes | What Transcend auto-classified (if any) | +| Recommended Purpose | Research-based recommendation | +| Confidence | High / Medium / Low | +| How Loaded | Direct script / Tag manager / iframe / Dynamic | +| Occurrences | N | +| Evidence | Brief summary + source URLs | +| Recommended Action | APPROVE with purposes / JUNK / NEEDS MANUAL REVIEW | +| Suggested Note | Description to save to Transcend | + +Then show a summary action table: + +| # | Name/Domain | Action | Purposes | Service | Note | +|---|-------------|--------|----------|---------|------| + +Ask the user to confirm, modify, or reject each recommendation before proceeding. + +## Phase 5: Push Classifications + +For confirmed items, update Transcend: + +- Individual updates with notes: \`consent_update_data_flows\` / \`consent_update_cookies\` with id, tracking_purposes, description, service, status: "LIVE" +- Bulk approve/junk: \`consent_bulk_triage\` with items array containing type, id, action, tracking_purposes +- Mark junk items with action "JUNK" (no purposes needed) + +After pushing, report what was updated and show the remaining triage count. + +## Phase 6: Loop + +Ask the user if they want to continue with the next batch. Repeat from Phase 2. + +## Key References + +- Triage guide: https://docs.transcend.io/docs/articles/consent-management/configuration/triage-cookies-and-dataflows-guide +- Data flows & cookies: https://docs.transcend.io/docs/articles/consent-management/concepts/data-flows-and-cookies +- Tracking purposes: https://docs.transcend.io/docs/articles/consent-management/concepts/tracking-purposes +- Regional experiences: https://docs.transcend.io/docs/articles/consent-management/configuration/regional-experiences +- Telemetry overview: https://docs.transcend.io/docs/articles/consent-management/configuration/telemetry-overview`, + }, + }, + ]; + }, +}; diff --git a/packages/mcp/mcp-server-consent/src/prompts/index.ts b/packages/mcp/mcp-server-consent/src/prompts/index.ts new file mode 100644 index 00000000..6695415f --- /dev/null +++ b/packages/mcp/mcp-server-consent/src/prompts/index.ts @@ -0,0 +1,14 @@ +import type { PromptDefinition, ToolClients } from '@transcend-io/mcp-server-core'; + +import { consentInspectSitePrompt } from './consent_inspect_site.js'; +import { consentResearchTrackerPrompt } from './consent_research_tracker.js'; +import { consentTriagePrompt } from './consent_triage.js'; + +/** + * Returns all consent prompt definitions. + * The clients arg is accepted for API consistency with getTools, + * but prompts don't currently need API access (they return static guidance). + */ +export function getConsentPrompts(_clients: ToolClients): PromptDefinition[] { + return [consentTriagePrompt, consentResearchTrackerPrompt, consentInspectSitePrompt]; +} diff --git a/packages/mcp/mcp-server-consent/src/resources/docs_resource.ts b/packages/mcp/mcp-server-consent/src/resources/docs_resource.ts new file mode 100644 index 00000000..f1cace8d --- /dev/null +++ b/packages/mcp/mcp-server-consent/src/resources/docs_resource.ts @@ -0,0 +1,89 @@ +import type { ResourceDefinition } from '@transcend-io/mcp-server-core'; + +const FETCH_TIMEOUT_MS = 8_000; + +/** + * Creates a ResourceDefinition that fetches content from a Transcend docs page. + * Uses the actual docs URL as the resource URI so MCP clients can open it directly. + * Falls back to a static description if the fetch fails (offline, timeout, etc.). + */ +export function createDocsResource(options: { + /** Full URL of the docs page (becomes the resource URI) */ + url: string; + /** Human-readable name shown in resources/list */ + name: string; + /** Short description of what this docs page covers */ + description: string; + /** Static markdown returned when the live fetch fails */ + fallback: string; +}): ResourceDefinition { + return { + uri: options.url, + name: options.name, + description: options.description, + mimeType: 'text/markdown', + handler: async () => { + try { + const response = await fetch(options.url, { + headers: { Accept: 'text/markdown, text/plain, text/html' }, + signal: AbortSignal.timeout(FETCH_TIMEOUT_MS), + }); + if (!response.ok) { + return withSourceLink(options.fallback, options.url); + } + const text = await response.text(); + const markdown = extractMarkdown(text); + return withSourceLink(markdown, options.url); + } catch { + return withSourceLink(options.fallback, options.url); + } + }, + }; +} + +function withSourceLink(content: string, url: string): string { + return `${content}\n\n---\n*Source: ${url}*\n`; +} + +/** + * Extracts readable markdown from a docs page response. + * Mintlify docs return HTML by default; we strip tags for a usable text version. + * If the content is already markdown-like (starts with # or has no HTML), return as-is. + */ +function extractMarkdown(raw: string): string { + const trimmed = raw.trim(); + if (!trimmed.includes('/gi, '') + .replace(//gi, '') + .replace(//gi, '') + .replace(//gi, '') + .replace(//gi, '') + .replace(//gi, '\n') + .replace(/<\/p>/gi, '\n\n') + .replace(/<\/h[1-6]>/gi, '\n\n') + .replace(/<\/li>/gi, '\n') + .replace(/<\/tr>/gi, '\n') + .replace(/<[^>]+>/g, '') + .replace(/&/g, '&') + .replace(/</g, '<') + .replace(/>/g, '>') + .replace(/"/g, '"') + .replace(/'/g, "'") + .replace(/ /g, ' ') + .replace(/\n{3,}/g, '\n\n') + .trim(); + + const lines = text.split('\n'); + const contentStart = lines.findIndex( + (l) => l.trim().length > 20 && !l.includes('Skip to') && !l.includes('Search...'), + ); + if (contentStart > 0) { + text = lines.slice(contentStart).join('\n').trim(); + } + + return text; +} diff --git a/packages/mcp/mcp-server-consent/src/resources/index.ts b/packages/mcp/mcp-server-consent/src/resources/index.ts new file mode 100644 index 00000000..7b00a47b --- /dev/null +++ b/packages/mcp/mcp-server-consent/src/resources/index.ts @@ -0,0 +1,98 @@ +import type { ResourceDefinition, ToolClients } from '@transcend-io/mcp-server-core'; + +import { createDocsResource } from './docs_resource.js'; + +const DOCS_BASE = 'https://docs.transcend.io/docs/articles/consent-management'; + +/** + * Returns all consent resource definitions. + * Each resource fetches live content from docs.transcend.io with a static fallback. + */ +export function getConsentResources(_clients: ToolClients): ResourceDefinition[] { + return [ + createDocsResource({ + url: `${DOCS_BASE}/concepts/tracking-purposes`, + name: 'Tracking Purposes', + description: + 'List of all available tracking purposes for consent management — ' + + 'Essential, Functional, Advertising, Analytics, Sale/Sharing.', + fallback: TRACKING_PURPOSES_FALLBACK, + }), + createDocsResource({ + url: `${DOCS_BASE}/configuration/triage-cookies-and-dataflows-guide`, + name: 'Triage Guide', + description: + 'How to triage and classify data flows and cookies from telemetry — ' + + 'researching, classifying, regex rules, junk handling, and approval workflow.', + fallback: TRIAGE_GUIDE_FALLBACK, + }), + createDocsResource({ + url: `${DOCS_BASE}/concepts/data-flows-and-cookies`, + name: 'Data Flows & Cookies', + description: + 'Overview of how Transcend discovers and regulates data flows and cookies ' + + 'via the airgap.js consent manager script.', + fallback: DATA_FLOWS_FALLBACK, + }), + createDocsResource({ + url: `${DOCS_BASE}/reference/debugging-and-testing`, + name: 'Debugging & Testing', + description: + 'URL override parameters (tcm-regime, tcm-prompt, log), console commands, ' + + 'and testing methodology for consent manager debugging.', + fallback: DEBUGGING_FALLBACK, + }), + createDocsResource({ + url: `${DOCS_BASE}/configuration/telemetry-overview`, + name: 'Telemetry Overview', + description: + 'How consent telemetry discovers trackers on your site — ' + + 'collection methods, data flow types, and cookie detection.', + fallback: TELEMETRY_FALLBACK, + }), + ]; +} + +const TRACKING_PURPOSES_FALLBACK = `# Tracking Purposes + +| Purpose | Description | +|---------|-------------| +| Essential | No consent required — essential site functionality and flows that don't transmit user data | +| Functional | Non-essential but helpful — support chat, error logging, preferences | +| Advertising | Data flows that collect or share data for marketing or advertising | +| Analytics | Data flows that collect or share information for analytics purposes | +| Sale/Sharing of Personal Information | Data sold or shared with third parties for cross-context behavioral advertising | + +Data flows can have multiple tracking purposes.`; + +const TRIAGE_GUIDE_FALLBACK = `# Guide to Triaging Data Flows & Cookies + +1. Review auto-classified data flows (confirm service and purpose) +2. Research unclassified flows (check cookie policies, CookieDatabase.org, better.fyi) +3. Create regex rules for recurring cookies (e.g. _ga{{UUID}}) +4. Mark browser extension / malware injections as junk +5. Approve classified flows to add them to the airgap.js bundle`; + +const DATA_FLOWS_FALLBACK = `# Data Flows & Cookies + +Data flows are network requests made by your site that Transcend discovers via telemetry. +Cookies are browser cookies set by scripts on your site. +Both are regulated by the airgap.js consent manager based on assigned tracking purposes.`; + +const DEBUGGING_FALLBACK = `# Debugging & Testing + +URL override parameters: +- \`#tcm-regime={name}\` — force a specific privacy regime +- \`#tcm-prompt=Hidden\` — suppress the consent banner +- \`#log=*\` — enable verbose airgap debug logging + +Console commands: +- \`airgap.getConsent().purposes\` — current consent state +- \`airgap.getRegimes()\` — active regimes +- \`await airgap.getPurposes('{url}')\` — URL classification +- \`await airgap.isAllowed('{url}')\` — whether a URL is allowed`; + +const TELEMETRY_FALLBACK = `# Telemetry Overview + +Consent telemetry discovers trackers by monitoring network requests and cookies +on your site. Discovered items appear in the Triage view for classification.`; diff --git a/packages/mcp/mcp-server-core/src/index.ts b/packages/mcp/mcp-server-core/src/index.ts index 0ce8d67d..b4c35bf8 100644 --- a/packages/mcp/mcp-server-core/src/index.ts +++ b/packages/mcp/mcp-server-core/src/index.ts @@ -11,6 +11,15 @@ export { EmptySchema, PaginationSchema } from './validation/schemas.js'; export type { ToolAnnotations, ToolDefinition, ToolClients } from './tools/types.js'; export { defineTool } from './tools/types.js'; +export type { + PromptDefinition, + PromptMessage, + PromptMessageContent, + PromptArgument, +} from './prompts/types.js'; + +export type { ResourceDefinition } from './resources/types.js'; + export { createToolResult, createErrorResult, createListResult, groupBy } from './tools/helpers.js'; export { createMCPServer } from './server/create-server.js'; diff --git a/packages/mcp/mcp-server-core/src/prompts/types.ts b/packages/mcp/mcp-server-core/src/prompts/types.ts new file mode 100644 index 00000000..19d2e467 --- /dev/null +++ b/packages/mcp/mcp-server-core/src/prompts/types.ts @@ -0,0 +1,47 @@ +/** + * Content block within a prompt message. Text-only for now; + * the MCP spec also supports image/audio/resource but we don't need those yet. + */ +export interface PromptMessageContent { + /** Content type */ + type: 'text'; + /** Text body */ + text: string; +} + +/** Single message in a prompt's output sequence. */ +export interface PromptMessage { + /** Whose turn this message represents */ + role: 'user' | 'assistant'; + /** Content block */ + content: PromptMessageContent; +} + +/** Declared argument a prompt accepts. */ +export interface PromptArgument { + /** Argument name (used as key in the args record) */ + name: string; + /** Human-readable description */ + description: string; + /** Whether the caller must supply this argument */ + required?: boolean; +} + +/** + * A reusable prompt template registered with the MCP server. + * Prompts encode workflow guidance that any MCP client can discover + * and invoke without embedding it in tool descriptions. + */ +export interface PromptDefinition { + /** Unique prompt name (kebab-case by convention) */ + name: string; + /** Short description shown in prompts/list */ + description: string; + /** Arguments the prompt accepts */ + arguments?: PromptArgument[]; + /** + * Returns the message sequence for this prompt. + * May be async if it needs to fetch dynamic data (e.g. customer purposes). + */ + handler: (args: Record) => PromptMessage[] | Promise; +} diff --git a/packages/mcp/mcp-server-core/src/resources/types.ts b/packages/mcp/mcp-server-core/src/resources/types.ts new file mode 100644 index 00000000..218967b0 --- /dev/null +++ b/packages/mcp/mcp-server-core/src/resources/types.ts @@ -0,0 +1,17 @@ +/** + * A read-only resource registered with the MCP server. + * Resources expose reference data that clients can pull + * into context on demand via resources/read. + */ +export interface ResourceDefinition { + /** Stable URI (e.g. "consent://classification-guide") */ + uri: string; + /** Human-readable name shown in resources/list */ + name: string; + /** Description of what the resource contains */ + description: string; + /** MIME type of the content (defaults to "text/plain") */ + mimeType?: string; + /** Returns the resource text content. May be async for dynamic data. */ + handler: () => string | Promise; +} diff --git a/packages/mcp/mcp-server-core/src/server/create-server.ts b/packages/mcp/mcp-server-core/src/server/create-server.ts index 27302cd9..90900eb8 100644 --- a/packages/mcp/mcp-server-core/src/server/create-server.ts +++ b/packages/mcp/mcp-server-core/src/server/create-server.ts @@ -1,10 +1,19 @@ import { Server } from '@modelcontextprotocol/sdk/server/index.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import { toJsonSchemaCompat } from '@modelcontextprotocol/sdk/server/zod-json-schema-compat.js'; -import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js'; +import { + CallToolRequestSchema, + GetPromptRequestSchema, + ListPromptsRequestSchema, + ListResourcesRequestSchema, + ListToolsRequestSchema, + ReadResourceRequestSchema, +} from '@modelcontextprotocol/sdk/types.js'; import { SimpleLogger } from '../clients/graphql/base.js'; import { TranscendRestClient } from '../clients/rest-client.js'; +import type { PromptDefinition } from '../prompts/types.js'; +import type { ResourceDefinition } from '../resources/types.js'; import { createErrorResult, createToolResult } from '../tools/helpers.js'; import type { ToolDefinition, ToolClients } from '../tools/types.js'; @@ -15,6 +24,10 @@ export interface MCPServerOptions { version: string; /** Factory that returns tool definitions given API clients */ getTools: (clients: ToolClients) => ToolDefinition[]; + /** Optional factory that returns prompt definitions (workflow templates) */ + getPrompts?: (clients: ToolClients) => PromptDefinition[]; + /** Optional factory that returns resource definitions (reference data) */ + getResources?: (clients: ToolClients) => ResourceDefinition[]; /** Optional custom client factory */ createClients?: (apiKey: string, sombraUrl: string, graphqlUrl: string) => ToolClients; } @@ -61,10 +74,38 @@ export async function createMCPServer(options: MCPServerOptions): Promise logger.info(`Registered ${toolMap.size} tools`, { toolCount: toolMap.size }); - const server = new Server( - { name: options.name, version: options.version }, - { capabilities: { tools: {} } }, - ); + const prompts = options.getPrompts?.(clients) ?? []; + const promptMap = new Map(); + for (const prompt of prompts) { + if (promptMap.has(prompt.name)) { + logger.warn(`Duplicate prompt name "${prompt.name}" — skipping`); + continue; + } + promptMap.set(prompt.name, prompt); + } + + const resources = options.getResources?.(clients) ?? []; + const resourceMap = new Map(); + for (const resource of resources) { + if (resourceMap.has(resource.uri)) { + logger.warn(`Duplicate resource URI "${resource.uri}" — skipping`); + continue; + } + resourceMap.set(resource.uri, resource); + } + + if (promptMap.size > 0) { + logger.info(`Registered ${promptMap.size} prompts`, { promptCount: promptMap.size }); + } + if (resourceMap.size > 0) { + logger.info(`Registered ${resourceMap.size} resources`, { resourceCount: resourceMap.size }); + } + + const capabilities: Record> = { tools: {} }; + if (promptMap.size > 0) capabilities.prompts = {}; + if (resourceMap.size > 0) capabilities.resources = {}; + + const server = new Server({ name: options.name, version: options.version }, { capabilities }); server.setRequestHandler(ListToolsRequestSchema, async () => { logger.debug('Listing MCP tools'); @@ -118,6 +159,57 @@ export async function createMCPServer(options: MCPServerOptions): Promise } }); + if (promptMap.size > 0) { + server.setRequestHandler(ListPromptsRequestSchema, async () => { + logger.debug('Listing MCP prompts'); + return { + prompts: Array.from(promptMap.values()).map((p) => ({ + name: p.name, + description: p.description, + arguments: p.arguments, + })), + }; + }); + + server.setRequestHandler(GetPromptRequestSchema, async (request) => { + const { name, arguments: args } = request.params; + logger.info(`Getting prompt: ${name}`); + const prompt = promptMap.get(name); + if (!prompt) { + throw new Error(`Unknown prompt: ${name}`); + } + const messages = await prompt.handler(args ?? {}); + return { description: prompt.description, messages }; + }); + } + + if (resourceMap.size > 0) { + server.setRequestHandler(ListResourcesRequestSchema, async () => { + logger.debug('Listing MCP resources'); + return { + resources: Array.from(resourceMap.values()).map((r) => ({ + uri: r.uri, + name: r.name, + description: r.description, + mimeType: r.mimeType ?? 'text/plain', + })), + }; + }); + + server.setRequestHandler(ReadResourceRequestSchema, async (request) => { + const { uri } = request.params; + logger.info(`Reading resource: ${uri}`); + const resource = resourceMap.get(uri); + if (!resource) { + throw new Error(`Unknown resource: ${uri}`); + } + const text = await resource.handler(); + return { + contents: [{ uri, text, mimeType: resource.mimeType ?? 'text/plain' }], + }; + }); + } + logger.info(`Starting ${options.name} v${options.version}...`, { toolCount: toolMap.size }); const transport = new StdioServerTransport(); @@ -127,5 +219,7 @@ export async function createMCPServer(options: MCPServerOptions): Promise sombraUrl, graphqlUrl, tools: toolMap.size, + prompts: promptMap.size, + resources: resourceMap.size, }); }