From 444d99b4972cc2a281c3385460ecb0aac59d9909 Mon Sep 17 00:00:00 2001 From: Rinjani Analytics Date: Wed, 17 Jun 2026 09:22:28 +0700 Subject: [PATCH] =?UTF-8?q?feat(feeds):=20AI=20Incident=20Database=20feed?= =?UTF-8?q?=20=E2=80=94=20the=20AI-threat-landscape=20vertical?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #2 of the free-source roadmap. Ingests real-world AI harm/failure incidents from the AI Incident Database (incidentdatabase.ai) — the live "what's actually going wrong with deployed AI" signal that complements the static MITRE ATLAS technique taxonomy. Dedicated `ai_incidents` table (migration 0067), deliberately NOT atlas_case_studies: ATLAS case studies are ~30 curated incidents mapped to AML techniques; AID is ~1500 raw incidents with no technique mapping — mixing them would distort the ATLAS coverage view. AI incidents are their own domain entity, mirroring telco (network_elements/fraud_schemes) and on-chain (wallets). Source: AID's GraphQL API. It gates non-browser callers ("restricted to web browsers") but allows a same-site `Origin` + a browser `User-Agent` — the data is openly licensed for research, the gate is anti-abuse. The connector (apps/worker/src/feeds/ai-incidents.ts) pages incidents(pagination,sort) — ~8 small requests for the ~1.5k corpus — so it runs daily. The API is richer than the CSV snapshot: alleged-party relations carry both an `entity_id` slug (clean tags) and a human `name` (display). Upsert on natural key incident_id; derived `tags` (always `ai-incident` + developer/deployer slugs) so the AI vertical contributes a movers signal like IOC tags. Read route GET /v1/ai-incidents + /ai-incidents/stats (total + monthly timeline + top developers — the "incidents over time" trend). Registered as `aiid`; scheduled daily (02:15 UTC). Verified end-to-end against the live API + local DB: paged 1525 incidents → map → drizzle upsert (0 failed), names resolve ("OpenAI", "Google DeepMind"); idempotent; gateway tsc + api tests (15-feed registry) green. Co-Authored-By: Claude Opus 4.8 (1M context) --- apps/api/src/__tests__/feed-registry.test.ts | 4 +- apps/api/src/queues/scheduler.ts | 13 ++ apps/api/src/routes/v1.ts | 2 + apps/api/src/routes/v1/aiIncidents.ts | 34 +++ apps/api/src/services/aiIncidentStore.ts | 69 ++++++ .../src/services/feedSync/additionalFeeds.ts | 6 + .../api/src/services/feedSync/feedRegistry.ts | 6 +- apps/worker/src/feeds/ai-incidents.ts | 221 ++++++++++++++++++ packages/db/drizzle/0067_ai_incidents.sql | 33 +++ packages/db/src/schema/aiIncidents.ts | 52 +++++ packages/db/src/schema/index.ts | 3 + 11 files changed, 441 insertions(+), 2 deletions(-) create mode 100644 apps/api/src/routes/v1/aiIncidents.ts create mode 100644 apps/api/src/services/aiIncidentStore.ts create mode 100644 apps/worker/src/feeds/ai-incidents.ts create mode 100644 packages/db/drizzle/0067_ai_incidents.sql create mode 100644 packages/db/src/schema/aiIncidents.ts diff --git a/apps/api/src/__tests__/feed-registry.test.ts b/apps/api/src/__tests__/feed-registry.test.ts index 2b59f15..75e720a 100644 --- a/apps/api/src/__tests__/feed-registry.test.ts +++ b/apps/api/src/__tests__/feed-registry.test.ts @@ -17,9 +17,11 @@ describe('Feed Registry', () => { // /breaches sync only — no paid /breachedaccount). // `ofac` joined as the free, authoritative on-chain attribution feed // (OFAC SDN sanctioned crypto addresses; dual-sinks iocs + wallets). + // `aiid` joined as the AI-threat-landscape feed (AI Incident Database; + // sinks to ai_incidents). const EXPECTED_FEEDS = [ 'otx', 'cisa', 'cveorg', 'nvd', 'abusessl', 'threatfox', - 'urlhaus', 'malwarebazaar', 'openphish', 'ofac', 'mitre', 'mispgalaxy', + 'urlhaus', 'malwarebazaar', 'openphish', 'ofac', 'aiid', 'mitre', 'mispgalaxy', 'epss', 'hibp', ]; diff --git a/apps/api/src/queues/scheduler.ts b/apps/api/src/queues/scheduler.ts index cc918f9..9812c0b 100644 --- a/apps/api/src/queues/scheduler.ts +++ b/apps/api/src/queues/scheduler.ts @@ -149,6 +149,19 @@ export const JOB_REGISTRY: ScheduledJobRegistration[] = [ queue: feedSyncQueue, payload: { source: 'openphish' }, }, + { + // AI Incident Database (incidentdatabase.ai). The live AI-threat + // landscape signal — real-world AI harm/failure incidents. Paged from + // the GraphQL API (~8 small requests for the ~1.5k corpus), so daily + // at 02:15 UTC is cheap. Idempotent upsert on incident_id. + key: 'aiidSync', + jobId: 'scheduled-aiid-sync', + name: 'aiid-sync', + description: 'Sync AI Incident Database (incidentdatabase.ai) incidents', + defaultCron: '15 2 * * *', + queue: feedSyncQueue, + payload: { source: 'aiid' }, + }, { // OFAC SDN sanctioned crypto addresses (US Treasury). The free, // authoritative on-chain attribution feed — dual-sinks to iocs diff --git a/apps/api/src/routes/v1.ts b/apps/api/src/routes/v1.ts index 1555958..954e3ea 100644 --- a/apps/api/src/routes/v1.ts +++ b/apps/api/src/routes/v1.ts @@ -16,6 +16,7 @@ import mitreRoutes from './v1/mitre'; import graphRoutes from './v1/graph'; import agentRoutes from './v1/agent'; import onchainRoutes from './v1/onchain'; +import aiIncidentsRoutes from './v1/aiIncidents'; import v1SearchRoutes from './v1/search'; import intelligenceRoutes from './v1/intelligence'; import sightingRoutes from './v1/sightings'; @@ -133,6 +134,7 @@ v1.route('/', mitreRoutes); // /techniques, /threat-actors, /malware, /tool v1.route('/', graphRoutes); // /graph/layout, /graph/neo4j/* v1.route('/', agentRoutes); // /agent/tools, /agent/tool/:name (AA.1 tool plane) v1.route('/', onchainRoutes); // /onchain/wallets (AA.6.1 follow-the-money) +v1.route('/', aiIncidentsRoutes); // /ai-incidents, /ai-incidents/stats (AI vertical) v1.route('/', v1SearchRoutes); // /search, /search/vector, /search/similar/* v1.route('/', intelligenceRoutes); // /intelligence/ioc/:value, /intelligence/cve/:cveId v1.route('/', sightingRoutes); // /iocs/:id/sightings, /sightings/recent, /sightings/stats diff --git a/apps/api/src/routes/v1/aiIncidents.ts b/apps/api/src/routes/v1/aiIncidents.ts new file mode 100644 index 0000000..1d2d566 --- /dev/null +++ b/apps/api/src/routes/v1/aiIncidents.ts @@ -0,0 +1,34 @@ +/** + * /v1/ai-incidents — the AI-threat-landscape vertical. + * + * GET /ai-incidents list (filter: q, since, limit) + * GET /ai-incidents/stats total + monthly timeline + top developers (trend) + * + * Read-only: rows are feed-ingested from the AI Incident Database + * (incidentdatabase.ai) — there is no operator write path. Mirrors /v1/onchain + * + /v1/telco. Reads open to any authenticated user. + */ + +import { Hono } from 'hono'; +import { requireAuth } from '../../middleware/auth'; +import { listAiIncidents, aiIncidentStats } from '../../services/aiIncidentStore'; + +const router = new Hono(); +router.use('*', requireAuth); + +router.get('/ai-incidents/stats', async (c) => { + const months = Number(c.req.query('months')) || 24; + const stats = await aiIncidentStats(months); + return c.json({ success: true, data: stats }); +}); + +router.get('/ai-incidents', async (c) => { + const rows = await listAiIncidents({ + q: c.req.query('q') || undefined, + since: c.req.query('since') || undefined, + limit: Number(c.req.query('limit')) || undefined, + }); + return c.json({ success: true, data: rows, count: rows.length }); +}); + +export default router; diff --git a/apps/api/src/services/aiIncidentStore.ts b/apps/api/src/services/aiIncidentStore.ts new file mode 100644 index 0000000..0339e40 --- /dev/null +++ b/apps/api/src/services/aiIncidentStore.ts @@ -0,0 +1,69 @@ +/** + * AI-incident store — read CRUD for the ai_incidents table (AI vertical). + * + * Rows are feed-ingested from the AI Incident Database (see + * apps/worker/src/feeds/ai-incidents.ts); this is the read side the dashboard + * + Hunt agent consume. `stats()` powers the "AI incidents over time" trend — + * the AI-vertical analogue of the IOC landscape-shift band. + */ + +import { db, desc, ilike, and, gte, sql } from '@rinjani/db'; +import { aiIncidents } from '@rinjani/db/schema'; +import type { AiIncident } from '@rinjani/db/schema'; + +export async function listAiIncidents(filters: { + q?: string; + /** Filter to incidents on/after this YYYY-MM-DD. */ + since?: string; + limit?: number; +} = {}): Promise { + const conds = []; + if (filters.q) conds.push(ilike(aiIncidents.title, `%${filters.q}%`)); + if (filters.since && /^\d{4}-\d{2}-\d{2}$/.test(filters.since)) { + conds.push(gte(aiIncidents.incidentDate, filters.since)); + } + return db + .select() + .from(aiIncidents) + .where(conds.length ? and(...conds) : undefined) + .orderBy(desc(aiIncidents.incidentDate)) + .limit(Math.min(filters.limit ?? 100, 500)); +} + +export interface AiIncidentStats { + total: number; + /** Incidents per month (YYYY-MM) over the window — the "over time" trend. */ + timeline: Array<{ month: string; count: number }>; + /** Most-named developers across all incidents — the AI-vertical movers. */ + topDevelopers: Array<{ name: string; count: number }>; +} + +export async function aiIncidentStats(months = 24): Promise { + const [{ total }] = await db + .select({ total: sql`count(*)::int` }) + .from(aiIncidents); + + // Monthly buckets by incident_date over the trailing window. + const timelineRows = await db.execute(sql` + SELECT to_char(incident_date, 'YYYY-MM') AS month, count(*)::int AS count + FROM ai_incidents + WHERE incident_date >= (CURRENT_DATE - (${months} || ' months')::interval) + GROUP BY month + ORDER BY month + `) as unknown as Array<{ month: string; count: number }>; + + // Top alleged developers (jsonb string[] unnested). + const devRows = await db.execute(sql` + SELECT slug AS name, count(*)::int AS count + FROM ai_incidents, jsonb_array_elements_text(developers) AS slug + GROUP BY slug + ORDER BY count DESC + LIMIT 15 + `) as unknown as Array<{ name: string; count: number }>; + + return { + total: Number(total) || 0, + timeline: timelineRows.map((r) => ({ month: r.month, count: Number(r.count) })), + topDevelopers: devRows.map((r) => ({ name: r.name, count: Number(r.count) })), + }; +} diff --git a/apps/api/src/services/feedSync/additionalFeeds.ts b/apps/api/src/services/feedSync/additionalFeeds.ts index 261db60..f503f7f 100644 --- a/apps/api/src/services/feedSync/additionalFeeds.ts +++ b/apps/api/src/services/feedSync/additionalFeeds.ts @@ -68,6 +68,12 @@ export async function syncOFACFeed(): Promise { return normalise(await syncOFAC()); } +export async function syncAIIncidentsFeed(): Promise { + // @ts-ignore — worker scripts outside rootDir, resolved at runtime + const { syncAIIncidents } = await import('../../../../worker/src/feeds/ai-incidents'); + return normalise(await syncAIIncidents()); +} + export async function syncMITREFeed(): Promise { try { // @ts-ignore diff --git a/apps/api/src/services/feedSync/feedRegistry.ts b/apps/api/src/services/feedSync/feedRegistry.ts index 1a7e51f..84574b9 100644 --- a/apps/api/src/services/feedSync/feedRegistry.ts +++ b/apps/api/src/services/feedSync/feedRegistry.ts @@ -20,7 +20,7 @@ import { syncCveOrgFeed } from './cveOrgSync'; import { syncAbuseSSLFeed, syncThreatFoxFeed, syncURLhausFeed, syncMalwareBazaarFeed, syncOpenPhishFeed, syncMITREFeed, syncMISPGalaxyFeed, - syncEPSSFeed, syncOFACFeed, + syncEPSSFeed, syncOFACFeed, syncAIIncidentsFeed, } from './additionalFeeds'; import { syncHibpBreaches } from './hibpSync'; import { FeedManifest as FeedManifestSchema } from '@rinjani/feed-engine'; @@ -51,6 +51,10 @@ const FEED_REGISTRY: Record = { // on-chain attribution source. Dual-sinks to iocs (tag `sanctioned`, // surfaces in Landscape shift) + wallets (entityType `sanctioned`). ofac: () => syncOFACFeed(), + // AI Incident Database — real-world AI harm/failure incidents + // (incidentdatabase.ai). The live AI-threat-landscape signal; sinks to + // the dedicated ai_incidents table. + aiid: () => syncAIIncidentsFeed(), mitre: () => syncMITREFeed(), mispgalaxy: () => syncMISPGalaxyFeed(), // EPSS — FIRST.org's daily exploit-prediction score. Pairs with the diff --git a/apps/worker/src/feeds/ai-incidents.ts b/apps/worker/src/feeds/ai-incidents.ts new file mode 100644 index 0000000..31f5a62 --- /dev/null +++ b/apps/worker/src/feeds/ai-incidents.ts @@ -0,0 +1,221 @@ +/** + * AI Incident Database — real-world AI harm/failure incidents + * (https://incidentdatabase.ai) + * + * The live "AI threat landscape" signal: ~1500 curated incidents of deployed + * AI systems causing or nearly causing harm, growing weekly. Complements the + * static MITRE ATLAS technique taxonomy — ATLAS says *how* AI gets attacked, + * AID shows *what actually went wrong* in the field, dated. + * + * ── Source: the GraphQL API ── + * AID's GraphQL endpoint gates non-browser callers ("restricted to web + * browsers"): it requires an `Origin` matching the site plus a browser + * `User-Agent`. With those two headers a server-side caller is allowed (the + * data is openly licensed for research/reuse — the gate is anti-abuse, not a + * licence wall). We page through `incidents(pagination, sort)` and upsert. + * + * The API is far richer than the CSV snapshot: the alleged-party relations + * carry both an `entity_id` slug (clean tags) and a human `name` (display), + * and reports come with `report_number`s. Lightweight enough to run daily. + * + * Sink: the dedicated `ai_incidents` table (NOT atlas_case_studies — see the + * schema header). Upsert on the natural key `incident_id`. + */ + +import { db, sql } from '@rinjani/db'; +import { aiIncidents } from '@rinjani/db/schema'; +import type { NewAiIncident } from '@rinjani/db/schema'; +import { createLogger } from '../lib/logger'; + +const log = createLogger('AIID'); + +const AIID_GRAPHQL_URL = process.env.AIID_GRAPHQL_URL + ?? 'https://incidentdatabase.ai/api/graphql'; +// The API requires a same-site Origin + a browser UA, or it returns +// "Forbidden — restricted to web browsers". Overridable in case AID changes +// the allowed origin. +const AIID_ORIGIN = process.env.AIID_ORIGIN ?? 'https://incidentdatabase.ai'; +const AIID_USER_AGENT = process.env.AIID_USER_AGENT + ?? 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36'; +const PAGE_SIZE = 200; +const BATCH_SIZE = 200; + +interface SyncResult { + processed: number; + failed: number; + errors: string[]; +} + +interface Entity { entity_id?: string | null; name?: string | null } +interface Report { report_number?: number | null } +interface IncidentNode { + incident_id: number; + title?: string | null; + date?: string | null; + description?: string | null; + AllegedDeveloperOfAISystem?: Entity[] | null; + AllegedDeployerOfAISystem?: Entity[] | null; + AllegedHarmedOrNearlyHarmedParties?: Entity[] | null; + reports?: Report[] | null; +} + +const INCIDENTS_QUERY = ` +query Incidents($limit: Int!, $skip: Int!) { + incidents(pagination: { limit: $limit, skip: $skip }, sort: { incident_id: ASC }) { + incident_id + title + date + description + AllegedDeveloperOfAISystem { entity_id name } + AllegedDeployerOfAISystem { entity_id name } + AllegedHarmedOrNearlyHarmedParties { entity_id name } + reports { report_number } + } +}`; + +async function fetchPage(skip: number): Promise { + const res = await fetch(AIID_GRAPHQL_URL, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Origin': AIID_ORIGIN, + 'User-Agent': AIID_USER_AGENT, + }, + body: JSON.stringify({ query: INCIDENTS_QUERY, variables: { limit: PAGE_SIZE, skip } }), + }); + if (!res.ok) throw new Error(`GraphQL HTTP ${res.status} ${res.statusText}`); + const body = await res.json() as { data?: { incidents?: IncidentNode[] }; errors?: Array<{ message: string }> }; + if (body.errors?.length) throw new Error(`GraphQL error: ${body.errors[0].message}`); + return body.data?.incidents ?? []; +} + +/** Page through every incident (sorted ascending) until a short page ends it. */ +async function fetchAllIncidents(): Promise { + const all: IncidentNode[] = []; + for (let skip = 0; ; skip += PAGE_SIZE) { + const page = await fetchPage(skip); + all.push(...page); + if (page.length < PAGE_SIZE) break; + if (skip > 100_000) break; // hard safety cap; the corpus is ~1.5k + } + return all; +} + +const names = (es: Entity[] | null | undefined): string[] => + (es ?? []).map((e) => (e.name ?? '').trim()).filter(Boolean); +const slugs = (es: Entity[] | null | undefined): string[] => + (es ?? []).map((e) => (e.entity_id ?? '').trim()).filter(Boolean); + +export async function syncAIIncidents(): Promise { + const result: SyncResult = { processed: 0, failed: 0, errors: [] }; + + let nodes: IncidentNode[]; + try { + nodes = await fetchAllIncidents(); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + log.error('GraphQL fetch failed', err as Error); + result.errors.push(`Fetch error: ${msg}`); + result.failed = 1; + return result; + } + log.info('Fetched AI incidents', { count: nodes.length }); + + const mapped: NewAiIncident[] = []; + for (const n of nodes) { + const incidentId = Number(n.incident_id); + if (!Number.isInteger(incidentId)) continue; + const title = (n.title ?? '').trim(); + if (!title) continue; + + const developers = names(n.AllegedDeveloperOfAISystem); + const deployers = names(n.AllegedDeployerOfAISystem); + const harmedParties = names(n.AllegedHarmedOrNearlyHarmedParties); + const reportIds = (n.reports ?? []) + .map((r) => Number(r.report_number)) + .filter((x) => Number.isFinite(x)); + const incidentDate = /^\d{4}-\d{2}-\d{2}$/.test((n.date ?? '').trim()) + ? (n.date as string).trim() + : null; + + // Tags drive the AI-vertical movers signal — use the clean entity_id + // slugs (developers + deployers), always prefixed with `ai-incident`. + const tagSlugs = [...new Set([ + ...slugs(n.AllegedDeveloperOfAISystem), + ...slugs(n.AllegedDeployerOfAISystem), + ])]; + + mapped.push({ + incidentId, + title, + description: (n.description ?? '').trim() || null, + incidentDate, + deployers, + developers, + harmedParties, + reportIds, + reportCount: reportIds.length, + tags: ['ai-incident', ...tagSlugs], + url: `https://incidentdatabase.ai/cite/${incidentId}`, + source: 'aiid', + }); + } + + for (let i = 0; i < mapped.length; i += BATCH_SIZE) { + const slice = mapped.slice(i, i + BATCH_SIZE); + try { + await writeBatch(slice); + result.processed += slice.length; + } catch (err) { + result.failed += slice.length; + const msg = err instanceof Error ? err.message : String(err); + if (result.errors.length < 10) result.errors.push(`Batch upsert failed: ${msg}`); + log.error('Batch upsert error', err as Error); + } + } + + log.info('Sync completed', { processed: result.processed, failed: result.failed }); + return result; +} + +async function writeBatch(batch: NewAiIncident[]): Promise { + const now = new Date(); + await db.insert(aiIncidents) + .values(batch) + .onConflictDoUpdate({ + target: aiIncidents.incidentId, + set: { + title: sql`excluded.title`, + description: sql`excluded.description`, + incidentDate: sql`excluded.incident_date`, + deployers: sql`excluded.deployers`, + developers: sql`excluded.developers`, + harmedParties: sql`excluded.harmed_parties`, + reportIds: sql`excluded.report_ids`, + reportCount: sql`excluded.report_count`, + tags: sql`excluded.tags`, + url: sql`excluded.url`, + updatedAt: now, + }, + }); +} + +/** Standalone runner — `tsx apps/worker/src/feeds/ai-incidents.ts`. */ +export async function runAIIncidentsSync(): Promise { + log.info('Starting full sync'); + try { + const result = await syncAIIncidents(); + log.info('Full sync completed', { processed: result.processed, failed: result.failed }); + } catch (error) { + log.error('Sync failed', error as Error); + } +} + +if (import.meta.url === `file://${process.argv[1]}`) { + runAIIncidentsSync() + .then(() => process.exit(0)) + .catch((error) => { + console.error(error); + process.exit(1); + }); +} diff --git a/packages/db/drizzle/0067_ai_incidents.sql b/packages/db/drizzle/0067_ai_incidents.sql new file mode 100644 index 0000000..27617ff --- /dev/null +++ b/packages/db/drizzle/0067_ai_incidents.sql @@ -0,0 +1,33 @@ +-- AI Incident Database feed (PLAN AI-vertical) — real-world AI harm/failure +-- incidents from incidentdatabase.ai, the live "AI threat landscape" signal. +-- +-- A DEDICATED table, deliberately NOT atlas_case_studies: MITRE ATLAS case +-- studies are ~30 curated incidents mapped to AML techniques; AID is ~1500 +-- raw incidents with no technique mapping. Mixing them would distort the +-- ATLAS coverage view. AI incidents are their own domain entity, mirroring +-- telco (network_elements/fraud_schemes) and on-chain (wallets). +-- +-- Natural key: incident_id (AID's stable integer id) — upsert target. +-- Idempotent (IF NOT EXISTS) so it applies cleanly via db:apply or psql. + +CREATE TABLE IF NOT EXISTS "ai_incidents" ( + "id" uuid PRIMARY KEY DEFAULT gen_random_uuid(), + "incident_id" integer NOT NULL UNIQUE, -- AID incident_id (stable) + "title" text NOT NULL, + "description" text, + "incident_date" date, -- YYYY-MM-DD from AID + "deployers" jsonb NOT NULL DEFAULT '[]'::jsonb, -- alleged deployer slugs + "developers" jsonb NOT NULL DEFAULT '[]'::jsonb, -- alleged developer slugs + "harmed_parties" jsonb NOT NULL DEFAULT '[]'::jsonb, -- alleged harmed parties + "report_ids" jsonb NOT NULL DEFAULT '[]'::jsonb, -- linked AID report numbers + "report_count" integer NOT NULL DEFAULT 0, -- # of linked reports (corroboration) + "tags" jsonb NOT NULL DEFAULT '[]'::jsonb, -- derived: ai-incident + entity slugs + "url" varchar(512), -- https://incidentdatabase.ai/cite/ + "source" varchar(32) NOT NULL DEFAULT 'aiid', + "created_at" timestamptz NOT NULL DEFAULT NOW(), + "updated_at" timestamptz NOT NULL DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS "ai_incidents_incident_id_idx" ON "ai_incidents" ("incident_id"); +CREATE INDEX IF NOT EXISTS "ai_incidents_incident_date_idx" ON "ai_incidents" ("incident_date"); +CREATE INDEX IF NOT EXISTS "ai_incidents_source_idx" ON "ai_incidents" ("source"); diff --git a/packages/db/src/schema/aiIncidents.ts b/packages/db/src/schema/aiIncidents.ts new file mode 100644 index 0000000..9e749b7 --- /dev/null +++ b/packages/db/src/schema/aiIncidents.ts @@ -0,0 +1,52 @@ +/** + * AI Incident model — the AI-threat-landscape vertical. + * + * Migration: drizzle/0067_ai_incidents.sql + * + * Real-world AI harm/failure incidents ingested from the AI Incident Database + * (incidentdatabase.ai) — the live "what's actually going wrong with deployed + * AI" signal that complements the static MITRE ATLAS technique taxonomy. + * + * DELIBERATELY a dedicated table, NOT atlas_case_studies: ATLAS case studies + * are ~30 curated incidents mapped to AML techniques; AID is ~1500 raw + * incidents with no technique mapping. Mixing them would distort the ATLAS + * coverage heatmap. This mirrors the per-domain entity pattern — telco + * (telco.ts) and on-chain (onchain.ts) each own their tables. + * + * Natural key: `incident_id` (AID's stable integer id) — the upsert target. + * `tags` is derived at ingest (always includes `ai-incident` + entity slugs) + * so the AI vertical can contribute a trend signal the same way IOC tags do. + */ + +import { pgTable, uuid, varchar, text, integer, jsonb, date, timestamp, index } from 'drizzle-orm/pg-core'; + +export const aiIncidents = pgTable('ai_incidents', { + id: uuid('id').primaryKey().defaultRandom(), + // AID's stable integer incident id — required + unique (the upsert key). + incidentId: integer('incident_id').notNull().unique(), + title: text('title').notNull(), + description: text('description'), + // Incident date as reported by AID (YYYY-MM-DD). Real date column so the + // "incidents over time" trend can ORDER BY / bucket by interval. + incidentDate: date('incident_date'), + // Alleged parties (entity slugs, e.g. "uber", "openai"). Claims, per AID. + deployers: jsonb('deployers').$type().notNull().default([]), + developers: jsonb('developers').$type().notNull().default([]), + harmedParties: jsonb('harmed_parties').$type().notNull().default([]), + // Linked AID report numbers + a denormalized count (corroboration weight). + reportIds: jsonb('report_ids').$type().notNull().default([]), + reportCount: integer('report_count').notNull().default(0), + // Derived at ingest — always includes `ai-incident` plus entity slugs. + tags: jsonb('tags').$type().notNull().default([]), + url: varchar('url', { length: 512 }), // https://incidentdatabase.ai/cite/ + source: varchar('source', { length: 32 }).notNull().default('aiid'), + createdAt: timestamp('created_at', { withTimezone: true }).notNull().defaultNow(), + updatedAt: timestamp('updated_at', { withTimezone: true }).notNull().defaultNow(), +}, (table) => ({ + incidentIdIdx: index('ai_incidents_incident_id_idx').on(table.incidentId), + incidentDateIdx: index('ai_incidents_incident_date_idx').on(table.incidentDate), + sourceIdx: index('ai_incidents_source_idx').on(table.source), +})); + +export type AiIncident = typeof aiIncidents.$inferSelect; +export type NewAiIncident = typeof aiIncidents.$inferInsert; diff --git a/packages/db/src/schema/index.ts b/packages/db/src/schema/index.ts index a5c9171..2b45576 100644 --- a/packages/db/src/schema/index.ts +++ b/packages/db/src/schema/index.ts @@ -103,3 +103,6 @@ export * from './agentRuns'; // On-chain entity model — AA.6.1 / Phase 8 (wallets; follow-the-money) export * from './onchain'; + +// AI-incident model — AI vertical (incidentdatabase.ai feed) +export * from './aiIncidents';