From be94b7e3ad656e9de84ce8ee55f68d7809540372 Mon Sep 17 00:00:00 2001 From: Nico Date: Sat, 6 Jun 2026 17:21:07 +0200 Subject: [PATCH 01/46] Feat/portable logic foundation (#386) * Add portable expression logic foundation * Format portable expression foundation changes --- generated/contracts/registry.json | 17 ++ packages/core/src/codegen/body-ts.ts | 69 +++++++++ .../src/codegen/portable-logic-primitives.ts | 46 +++++- packages/core/src/index.ts | 4 + .../core/src/ir/semantics/expression-v1.ts | 145 ++++++++++++++++++ .../core/src/ir/semantics/portable-scalar.ts | 26 ++++ .../core/src/ir/semantics/register-all.ts | 2 + packages/core/src/ir/semantics/ts-leg.ts | 7 +- packages/core/src/node-props.ts | 6 + packages/core/src/parser-core.ts | 4 + .../src/parser-validate-body-statements.ts | 3 +- packages/core/src/schema.ts | 23 +++ packages/core/src/spec.ts | 1 + .../tests/ir-semantics-expression-v1.test.ts | 116 ++++++++++++++ packages/core/tests/native-handlers.test.ts | 104 +++++++++++++ .../tests/portable-logic-primitives.test.ts | 20 ++- packages/python/src/codegen-body-python.ts | 94 +++++++++++- packages/python/src/core/expr/helpers.ts | 2 + .../python/src/ir-semantics/python-leg.ts | 7 +- .../tests/ir-semantics-python-leg.test.ts | 29 ++++ .../tests/native-handlers-python.test.ts | 113 ++++++++++++++ .../native-handlers-slice2-python.test.ts | 2 +- scripts/conformance.mjs | 17 +- 23 files changed, 841 insertions(+), 16 deletions(-) create mode 100644 packages/core/src/ir/semantics/expression-v1.ts create mode 100644 packages/core/tests/ir-semantics-expression-v1.test.ts diff --git a/generated/contracts/registry.json b/generated/contracts/registry.json index 77e33c04..67b4095c 100644 --- a/generated/contracts/registry.json +++ b/generated/contracts/registry.json @@ -100,6 +100,23 @@ } ] }, + { + "nodeType": "expression-v1", + "forbiddenRewrites": [], + "fixtureCount": 11, + "fixtureSamples": [ + { + "description": "expression-v1: number scalar", + "expectedCompletionKind": "normal", + "expectedEventCount": 1 + }, + { + "description": "expression-v1: truthiness basic", + "expectedCompletionKind": "normal", + "expectedEventCount": 1 + } + ] + }, { "nodeType": "fmt", "forbiddenRewrites": [ diff --git a/packages/core/src/codegen/body-ts.ts b/packages/core/src/codegen/body-ts.ts index 46f985b1..3be84e3f 100644 --- a/packages/core/src/codegen/body-ts.ts +++ b/packages/core/src/codegen/body-ts.ts @@ -51,6 +51,7 @@ import type { ExprObject, IRNode } from '../types.js'; import type { ValueIR } from '../value-ir.js'; import { emitFmtTemplate, emitIdentifier, emitTypeAnnotation } from './emitters.js'; import { emitStringKeyArray, parseKeys } from './ground-layer.js'; +import { emitParamList } from './type-system.js'; /** Slice 3e — caller-provided options, parity with the Python body emitter. * `symbolMap` is currently unused on the TS target; reserved for future @@ -154,6 +155,7 @@ export function emitNativeKernBodyTSWithImports(handlerNode: IRNode, options?: B * try/each) emit multiple lines and never receive the slot. */ const TRAILING_COMMENT_TYPES = new Set([ 'let', + 'expression-v1', 'assign', 'fmt', 'clamp', @@ -190,6 +192,10 @@ function emitChildrenTS( for (const line of emitSetTS(child, ctx)) lines.push(`${indent}${line}`); } else if (child.type === 'let') { for (const line of emitLetTS(child, ctx)) lines.push(`${indent}${line}`); + } else if (child.type === 'expression-v1') { + for (const line of emitExpressionV1TS(child, ctx)) lines.push(`${indent}${line}`); + } else if (child.type === 'fn') { + for (const line of emitFnTS(child, ctx, indent)) lines.push(line); } else if (child.type === 'assign') { for (const line of emitAssignTS(child, ctx)) lines.push(`${indent}${line}`); } else if (child.type === 'destructure') { @@ -1312,3 +1318,66 @@ function emitFmtTS(node: IRNode, ctx: BodyEmitContext): string[] { if (ctx.traceHooks?.letAssign) lines.push(letAssignTraceTS(name)); return lines; } + +function emitExpressionV1TS(node: IRNode, ctx: BodyEmitContext): string[] { + const props = (node.props ?? {}) as Record; + const name = String(props.name ?? ''); + if (!name) throw new Error('body-statement `expression-v1` requires `name=`.'); + const typeAnn = props.type ? `: ${emitTypeAnnotation(String(props.type), 'unknown', node)}` : ''; + const rawExpr = props.expr; + const exprSource = unwrapBodyExpr(rawExpr); + if (exprSource === undefined || exprSource === '') { + throw new Error('body-statement `expression-v1` requires `expr=`.'); + } + const exprIR = parseExpression(exprSource); + declareLocalBinding(ctx, name, 'const'); + const lines = [`const ${name}${typeAnn} = ${emitExpression(exprIR)};`]; + if (ctx.traceHooks?.letAssign) lines.push(letAssignTraceTS(name)); + return lines; +} + +function emitFnTS(node: IRNode, ctx: BodyEmitContext, indent: string): string[] { + const props = (node.props ?? {}) as Record; + const name = String(props.name ?? ''); + if (!name) throw new Error('body-statement `fn` requires `name=`.'); + declareLocalBinding(ctx, name, 'const'); + + const isAsync = props.async === 'true' || props.async === true; + const asyncKw = isAsync ? 'async ' : ''; + const returns = props.returns ? emitTypeAnnotation(String(props.returns), 'unknown', node) : ''; + const returnType = returns && isAsync && !/^Promise\s*` : returns; + const retClause = returnType ? `: ${returnType}` : ''; + if (props.params && node.children?.some((c) => c.type === 'param')) { + throw new Error('body-statement `fn` cannot mix legacy `params=` with structured `param` children.'); + } + const paramList = emitParamList(node); + + const lines: string[] = []; + lines.push(`${indent}${asyncKw}function ${name}(${paramList})${retClause} {`); + + const handlerNode = node.children?.find((c) => c.type === 'handler'); + const bodyNodes = handlerNode ? (handlerNode.children ?? []) : (node.children ?? []); + const stmtNodes = bodyNodes.filter((c) => c.type !== 'param' && c.type !== 'decorator'); + + for (const sl of emitChildrenTS(stmtNodes, ctx, indent + INDENT_STEP, paramBindingsFromSignature(paramList))) { + lines.push(sl); + } + lines.push(`${indent}}`); + return lines; +} + +function paramBindingsFromSignature(paramList: string): Array<[string, 'const']> { + if (!paramList.trim()) return []; + return splitBodyExpressionList(paramList, 'fn params=') + .map( + (part) => + part + .split('=')[0] + ?.split(':')[0] + ?.trim() + .replace(/^\.\.\./, '') + .replace(/\?$/, '') ?? '', + ) + .filter((name) => /^[A-Za-z_$][\w$]*$/.test(name)) + .map((name) => [name, 'const']); +} diff --git a/packages/core/src/codegen/portable-logic-primitives.ts b/packages/core/src/codegen/portable-logic-primitives.ts index 96890a46..cde1a919 100644 --- a/packages/core/src/codegen/portable-logic-primitives.ts +++ b/packages/core/src/codegen/portable-logic-primitives.ts @@ -30,6 +30,7 @@ export type PortableLogicPrimitiveId = | 'collection.indexBy' | 'collection.countBy' | 'logic.firstTruthy' + | 'logic.coalesce' | 'time.epochMs' | 'logic.not' | 'number.clamp' @@ -42,9 +43,12 @@ export type PortableLogicPrimitiveId = | 'string.trim' | 'string.split' | 'string.replaceFirst' - | 'string.replaceAll'; + | 'string.replaceAll' + | 'logic.firstDefined' + | 'string.coerce'; export type PortableLogicTarget = 'ts' | 'python' | 'go'; export type PortableLogicSupport = 'stable' | 'preview' | 'unsupported'; +export type GoPortableLogicSupport = 'preview' | 'unsupported'; export type PortableLogicPurity = 'pure' | 'reads-time'; export type PortableLogicIntent = 'semantic-gap' | 'host-pattern' | 'language-operator'; @@ -56,7 +60,11 @@ export interface PortableLogicPrimitive { hostPatterns: readonly string[]; portabilityNotes: readonly string[]; operatorRationale?: string; - targets: Record; + targets: { + ts: PortableLogicSupport; + python: PortableLogicSupport; + go: GoPortableLogicSupport; + }; } export const PORTABLE_LOGIC_PRIMITIVES = { @@ -277,6 +285,16 @@ export const PORTABLE_LOGIC_PRIMITIVES = { operatorRationale: 'KERN firstTruthy names this common fallback operator chain as portable intent.', targets: { ts: 'stable', python: 'stable', go: 'unsupported' }, }, + 'logic.coalesce': { + id: 'logic.coalesce', + description: 'Ordered nullish fallback selection that preserves false, zero, and empty string.', + purity: 'pure', + intent: 'language-operator', + hostPatterns: ['a ?? b ?? c'], + portabilityNotes: ['Uses null/None-only fallback; undefined is normalized to null only at target boundaries.'], + operatorRationale: 'KERN coalesce names the portable nullish fallback operator chain for body and route lowering.', + targets: { ts: 'stable', python: 'stable', go: 'unsupported' }, + }, 'time.epochMs': { id: 'time.epochMs', description: 'Epoch-milliseconds extraction from a date/time value, e.g. JS new Date(x).getTime().', @@ -399,6 +417,29 @@ export const PORTABLE_LOGIC_PRIMITIVES = { portabilityNotes: ['Replacement callbacks, regex searches, and substitution-token replacements are excluded.'], targets: { ts: 'stable', python: 'stable', go: 'unsupported' }, }, + 'logic.firstDefined': { + id: 'logic.firstDefined', + description: 'First defined (non-null/non-undefined) value selection.', + purity: 'pure', + intent: 'language-operator', + hostPatterns: ['a ?? b'], + portabilityNotes: ['Returns the first value that is not null or undefined.'], + operatorRationale: 'Names the nullish coalescing fallback intent.', + targets: { ts: 'stable', python: 'stable', go: 'unsupported' }, + }, + 'string.coerce': { + id: 'string.coerce', + description: 'Portable scalar-to-string coercion for null, booleans, strings, and numbers.', + purity: 'pure', + intent: 'language-operator', + hostPatterns: ['String(value)', '_kern_fmt(value)'], + portabilityNotes: [ + 'Null becomes "null", booleans use lowercase spelling, strings pass through, and numbers use JS decimal text.', + ], + operatorRationale: + 'String coercion is a host operator in TS/Python; KERN documents the expression-v1 subset explicitly.', + targets: { ts: 'stable', python: 'stable', go: 'unsupported' }, + }, } as const satisfies Record; export function validatePortableLogicPrimitiveRegistry( @@ -410,6 +451,7 @@ export function validatePortableLogicPrimitiveRegistry( } const idSegments = id.split('.').map((segment) => segment.toLowerCase()); if ( + id !== 'logic.coalesce' && idSegments.some((segment) => segment === 'nullish' || segment === 'coalesce' || segment === 'nullishcoalesce') ) { throw new Error(`Portable logic primitive '${id}' duplicates existing language nullish/coalesce syntax.`); diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 730cf7c3..d6b7b160 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -201,6 +201,10 @@ export { // TS → .kern importer export type { ImportResult } from './importer.js'; export { escapeKernString, importTypeScript } from './importer.js'; +export { + expressionV1Contract, + registerExpressionV1Contract, +} from './ir/semantics/expression-v1.js'; export type { LowerTarget } from './ir/semantics/fixture-lowering.js'; export { lowerFixtureForTarget, serializeValue } from './ir/semantics/fixture-lowering.js'; // IR runtime semantics — executable contracts + differential harness. diff --git a/packages/core/src/ir/semantics/expression-v1.ts b/packages/core/src/ir/semantics/expression-v1.ts new file mode 100644 index 00000000..166439d1 --- /dev/null +++ b/packages/core/src/ir/semantics/expression-v1.ts @@ -0,0 +1,145 @@ +/** + * `expression-v1` runtime semantics. + */ + +import { parseExpression } from '../../parser-expression.js'; +import { type IRNode, isExprObject } from '../../types.js'; +import { type NodeContract, type NodeFixture, registerContract, type SemanticEnv } from './index.js'; +import { evalPortableValue, isPortableBindingName } from './portable-scalar.js'; +import type { Trace } from './trace.js'; + +interface ExpressionV1Props { + name?: string; + expr?: unknown; +} + +function asExpressionV1Props(ir: IRNode): ExpressionV1Props { + return (ir.props ?? {}) as ExpressionV1Props; +} + +function expressionSource(expr: unknown): string | undefined { + if (expr === undefined || expr === null) return undefined; + if (isExprObject(expr)) return expr.code; + return String(expr); +} + +function expressionV1Preconditions(ir: IRNode, env: SemanticEnv): boolean { + const props = asExpressionV1Props(ir); + if (!isPortableBindingName(props.name)) return false; + if (env.bindings.has(props.name)) return false; + const expr = expressionSource(props.expr); + if (!Object.hasOwn(ir.props ?? {}, 'expr') || expr === undefined || expr === '') return false; + try { + evalPortableValue(parseExpression(expr), env); + return true; + } catch { + return false; + } +} + +function expressionV1Effects(ir: IRNode, env: SemanticEnv): Trace { + const props = asExpressionV1Props(ir); + const name = props.name as string; + const expr = expressionSource(props.expr); + if (expr === undefined || expr === '') { + throw new Error('expression-v1: missing expr'); + } + const value = evalPortableValue(parseExpression(expr), env); + env.bindings.set(name, value); + return { events: [{ op: 'assign', target: name, value }], completion: { kind: 'normal' } }; +} + +function expressionV1Completion() { + return { kind: 'normal' as const }; +} + +const FIXTURES: readonly NodeFixture[] = Object.freeze([ + { + description: 'expression-v1: number scalar', + ir: { type: 'expression-v1', props: { name: 'n', expr: '42' } }, + expected: { events: [{ op: 'assign', target: 'n', value: 42 }], completion: { kind: 'normal' } }, + }, + { + description: 'expression-v1: string scalar', + ir: { type: 'expression-v1', props: { name: 's', expr: '"hello"' } }, + expected: { events: [{ op: 'assign', target: 's', value: 'hello' }], completion: { kind: 'normal' } }, + }, + { + description: 'expression-v1: boolean scalar', + ir: { type: 'expression-v1', props: { name: 'b', expr: 'true' } }, + expected: { events: [{ op: 'assign', target: 'b', value: true }], completion: { kind: 'normal' } }, + }, + { + description: 'expression-v1: null scalar', + ir: { type: 'expression-v1', props: { name: 'nl', expr: 'null' } }, + expected: { events: [{ op: 'assign', target: 'nl', value: null }], completion: { kind: 'normal' } }, + }, + { + description: 'expression-v1: equality', + ir: { type: 'expression-v1', props: { name: 'eq', expr: 'x === y' } }, + env: { + bindings: new Map([ + ['x', 1], + ['y', 1], + ]), + }, + expected: { events: [{ op: 'assign', target: 'eq', value: true }], completion: { kind: 'normal' } }, + }, + { + description: 'expression-v1: truthiness basic', + ir: { type: 'expression-v1', props: { name: 'truth', expr: '!x' } }, + env: { bindings: new Map([['x', '']]) }, + expected: { events: [{ op: 'assign', target: 'truth', value: true }], completion: { kind: 'normal' } }, + }, + { + description: 'expression-v1: template literal string coercion', + ir: { type: 'expression-v1', props: { name: 'res', expr: '`n=${n}`' } }, + env: { bindings: new Map([['n', 100]]) }, + expected: { events: [{ op: 'assign', target: 'res', value: 'n=100' }], completion: { kind: 'normal' } }, + }, + { + description: 'expression-v1: String coercion constructor call', + ir: { type: 'expression-v1', props: { name: 'res', expr: 'String(n)' } }, + env: { bindings: new Map([['n', 100]]) }, + expected: { events: [{ op: 'assign', target: 'res', value: '100' }], completion: { kind: 'normal' } }, + }, + { + description: 'expression-v1: String coercion canonicalizes null', + ir: { type: 'expression-v1', props: { name: 'res', expr: 'String(n)' } }, + env: { bindings: new Map([['n', null]]) }, + expected: { events: [{ op: 'assign', target: 'res', value: 'null' }], completion: { kind: 'normal' } }, + }, + { + description: 'expression-v1: String coercion canonicalizes boolean', + ir: { type: 'expression-v1', props: { name: 'res', expr: 'String(flag)' } }, + env: { bindings: new Map([['flag', false]]) }, + expected: { events: [{ op: 'assign', target: 'res', value: 'false' }], completion: { kind: 'normal' } }, + }, + { + description: 'expression-v1: ExprObject expression prop', + ir: { type: 'expression-v1', props: { name: 'res', expr: { __expr: true, code: 'n + 1' } } }, + env: { bindings: new Map([['n', 41]]) }, + expected: { events: [{ op: 'assign', target: 'res', value: 42 }], completion: { kind: 'normal' } }, + }, +]); + +export const expressionV1Contract: NodeContract = { + nodeType: 'expression-v1', + preconditions: expressionV1Preconditions, + effects: expressionV1Effects, + completion: expressionV1Completion, + forbiddenRewrites: [], + fixtures: FIXTURES, +}; + +let registered = false; + +export function registerExpressionV1Contract(): void { + if (registered) return; + registerContract(expressionV1Contract); + registered = true; +} + +export function _resetExpressionV1ContractForTest(): void { + registered = false; +} diff --git a/packages/core/src/ir/semantics/portable-scalar.ts b/packages/core/src/ir/semantics/portable-scalar.ts index 7696c14d..83b099dd 100644 --- a/packages/core/src/ir/semantics/portable-scalar.ts +++ b/packages/core/src/ir/semantics/portable-scalar.ts @@ -128,11 +128,37 @@ export function evalPortableValue(node: ValueIR, env: SemanticEnv): PortableScal case 'typeAssert': case 'nonNull': return evalPortableValue(node.expression, env); + case 'tmplLit': { + let result = ''; + for (let i = 0; i < node.quasis.length; i++) { + result += node.quasis[i]; + if (i < node.expressions.length) { + const val = evalPortableValue(node.expressions[i], env); + result += coerceToString(val); + } + } + return result; + } + case 'call': { + if (node.callee.kind === 'ident' && node.callee.name === 'String') { + if (node.args.length !== 1) { + throw new Error('portable: String() expects exactly 1 argument'); + } + const val = evalPortableValue(node.args[0], env); + return coerceToString(val); + } + throw new Error(`portable: unsupported call to "${node.callee.kind === 'ident' ? node.callee.name : 'unknown'}"`); + } default: throw new Error(`portable: expression kind "${node.kind}" is outside the portable scalar domain`); } } +export function coerceToString(val: PortableScalar): string { + if (val === null) return 'null'; + return String(val); +} + export function evalPortableBinary(node: Extract, env: SemanticEnv): PortableScalar { if (node.op === '&&') { const left = evalPortableValue(node.left, env); diff --git a/packages/core/src/ir/semantics/register-all.ts b/packages/core/src/ir/semantics/register-all.ts index 657f3a7a..73885f7e 100644 --- a/packages/core/src/ir/semantics/register-all.ts +++ b/packages/core/src/ir/semantics/register-all.ts @@ -16,6 +16,7 @@ import { registerAssignContract } from './assign.js'; import { registerBranchContract } from './branch.js'; import { registerEachContract } from './each.js'; +import { registerExpressionV1Contract } from './expression-v1.js'; import { registerFmtContract } from './fmt.js'; import { registerForContract } from './for.js'; import { registerIfContract } from './if.js'; @@ -37,4 +38,5 @@ export function registerAllContracts(): void { registerFmtContract(); registerWhileContract(); registerTryContract(); + registerExpressionV1Contract(); } diff --git a/packages/core/src/ir/semantics/ts-leg.ts b/packages/core/src/ir/semantics/ts-leg.ts index 3fe3f5fc..c1ae122d 100644 --- a/packages/core/src/ir/semantics/ts-leg.ts +++ b/packages/core/src/ir/semantics/ts-leg.ts @@ -175,17 +175,20 @@ export async function runTsEmitterLeg(fixture: FixtureForLeg, env: SemanticEnv): } function shouldTraceLetAssign(ir: IRNode): boolean { - // `let` (declaration), `assign` (reassignment), and `fmt` (formatted binding) - // observe their binding write through the same `{op:"assign"}` trace hook. + // `let` (declaration), `expression-v1`, `assign` (reassignment), and `fmt` + // (formatted binding) observe their binding write through the same + // `{op:"assign"}` trace hook. // `while` fixtures opt in too: their counter setup/advance (let + assign in // body) must emit the same assign events the reference produces. const contract = ir.props?.__semanticContract; const t = ir.type; return ( t === 'let' || + t === 'expression-v1' || t === 'assign' || t === 'fmt' || contract === 'let' || + contract === 'expression-v1' || contract === 'assign' || contract === 'fmt' || contract === 'while' diff --git a/packages/core/src/node-props.ts b/packages/core/src/node-props.ts index ab8f7059..aab6a40a 100644 --- a/packages/core/src/node-props.ts +++ b/packages/core/src/node-props.ts @@ -72,6 +72,11 @@ export interface LetProps extends BaseProps { kind?: string; } +export interface ExpressionV1Props extends BaseProps { + expr?: string | ExprObject; + type?: string; +} + export interface IndexerProps extends BaseProps { keyName?: string; keyType?: string; @@ -850,6 +855,7 @@ export interface NodePropsMap { use: UseProps; from: FromProps; let: LetProps; + 'expression-v1': ExpressionV1Props; indexer: IndexerProps; overload: OverloadProps; service: ServiceProps; diff --git a/packages/core/src/parser-core.ts b/packages/core/src/parser-core.ts index 2ebe93d5..f4bae5a5 100644 --- a/packages/core/src/parser-core.ts +++ b/packages/core/src/parser-core.ts @@ -854,7 +854,9 @@ function isNativeBodyStatementChild(node: IRNode): boolean { case 'cell': case 'set': case 'comment': + case 'fn': case 'let': + case 'expression-v1': case 'assign': case 'destructure': case 'do': @@ -893,7 +895,9 @@ function isNativeBodyStatementChild(node: IRNode): boolean { function isKernHandlerBodySignal(node: IRNode): boolean { switch (node.type) { case 'cell': + case 'fn': case 'let': + case 'expression-v1': case 'assign': case 'destructure': case 'do': diff --git a/packages/core/src/parser-validate-body-statements.ts b/packages/core/src/parser-validate-body-statements.ts index fc115cdb..8286950f 100644 --- a/packages/core/src/parser-validate-body-statements.ts +++ b/packages/core/src/parser-validate-body-statements.ts @@ -1,6 +1,6 @@ /** @internal Native KERN body-statement context validator — slice 5b-pre. * - * Body-statement nodes (`assign`, `return`, `throw`, `do`, `continue`, `break`, `while`, `for`, `with`, + * Body-statement nodes (`expression-v1`, `assign`, `return`, `throw`, `do`, `continue`, `break`, `while`, `for`, `with`, * body-form `if`/`else`, body-form `try`) are valid only inside a * `handler lang="kern"` scope (or nested inside another body-statement * under such a handler). Without this rule, the parser silently accepts @@ -213,6 +213,7 @@ function isBodyStatementMisplaced(node: IRNode, ctx: WalkContext): boolean { // lower recursively. Every other non-native context stays rejected. return !ctx.inPortableRoute; case 'cell': + case 'expression-v1': case 'return': case 'throw': case 'continue': diff --git a/packages/core/src/schema.ts b/packages/core/src/schema.ts index c0890594..e10b6ae4 100644 --- a/packages/core/src/schema.ts +++ b/packages/core/src/schema.ts @@ -601,6 +601,15 @@ export const NODE_SCHEMAS: Record = { effects: { kind: 'string' }, }, }, + 'expression-v1': { + description: 'Expression v1 evaluation node for TS/Python parity', + example: 'expression-v1 name=res expr="a === b"', + props: { + name: { required: true, kind: 'identifier' }, + expr: { required: true, kind: 'rawExpr' }, + type: { kind: 'typeAnnotation' }, + }, + }, fmt: { description: 'Formatted string — declarative template literal. The `template` body is emitted verbatim between backticks, so `${expr}` placeholders interpolate normally. Three positional modes: (1) binding form `fmt name=X template=...` emits `const X = \\`...\\`;` at the current scope; (2) return form `fmt return=true template=...` emits `return \\`...\\`;` inside a `fn` body (name must be omitted); (3) inline-JSX form `fmt template=...` (no name, no return=true) appears as a direct child of `render`/`group` and emits `{\\`...\\`}` as a JSX expression — use this to replace handler-wrapped `{\\`${x} files\\`}` text inside composed renders.', @@ -654,7 +663,9 @@ export const NODE_SCHEMAS: Record = { 'catch', 'finally', 'comment', + 'fn', 'let', + 'expression-v1', 'assign', 'destructure', 'do', @@ -701,7 +712,9 @@ export const NODE_SCHEMAS: Record = { allowedChildren: [ 'handler', 'comment', + 'fn', 'let', + 'expression-v1', 'assign', 'destructure', 'do', @@ -734,7 +747,9 @@ export const NODE_SCHEMAS: Record = { props: {}, allowedChildren: [ 'comment', + 'fn', 'let', + 'expression-v1', 'assign', 'do', 'fmt', @@ -1868,7 +1883,9 @@ export const NODE_SCHEMAS: Record = { 'cell', 'set', 'comment', + 'fn', 'let', + 'expression-v1', 'assign', 'destructure', 'do', @@ -1968,7 +1985,9 @@ export const NODE_SCHEMAS: Record = { }, allowedChildren: [ 'comment', + 'fn', 'let', + 'expression-v1', 'assign', 'destructure', 'do', @@ -2007,7 +2026,9 @@ export const NODE_SCHEMAS: Record = { }, allowedChildren: [ 'comment', + 'fn', 'let', + 'expression-v1', 'assign', 'destructure', 'do', @@ -2048,7 +2069,9 @@ export const NODE_SCHEMAS: Record = { }, allowedChildren: [ 'comment', + 'fn', 'let', + 'expression-v1', 'assign', 'destructure', 'do', diff --git a/packages/core/src/spec.ts b/packages/core/src/spec.ts index 46e2c875..fc777c31 100644 --- a/packages/core/src/spec.ts +++ b/packages/core/src/spec.ts @@ -344,6 +344,7 @@ export const NODE_TYPES = [ 'description', 'sampling', 'elicitation', + 'expression-v1', ] as const; export type IRNodeType = (typeof NODE_TYPES)[number]; diff --git a/packages/core/tests/ir-semantics-expression-v1.test.ts b/packages/core/tests/ir-semantics-expression-v1.test.ts new file mode 100644 index 00000000..88a1a485 --- /dev/null +++ b/packages/core/tests/ir-semantics-expression-v1.test.ts @@ -0,0 +1,116 @@ +/** + * Executable semantic contract for body-statement `expression-v1`. + * + * This contract pins the initial portable scalar expression subset used by + * TS/Python parity: null/bool/string/number scalars, scalar equality, + * truthiness, and KERN-canonical string coercion. + */ + +import { + CONTRACT_REGISTRY, + makeEnv, + ReferenceRunnerError, + referenceRun, + runDifferential, + type Verdict, +} from '../src/index.js'; +import { + _resetExpressionV1ContractForTest, + expressionV1Contract, + registerExpressionV1Contract, +} from '../src/ir/semantics/expression-v1.js'; +import { _resetPrimitivesForTest, registerPrimitives } from '../src/ir/semantics/primitives.js'; +import type { IRNode } from '../src/types.js'; + +beforeEach(() => { + CONTRACT_REGISTRY.clear(); + _resetExpressionV1ContractForTest(); + _resetPrimitivesForTest(); + registerPrimitives(); + registerExpressionV1Contract(); +}); + +afterEach(() => { + CONTRACT_REGISTRY.clear(); + _resetExpressionV1ContractForTest(); + _resetPrimitivesForTest(); +}); + +describe('expression-v1 contract — positive fixtures', () => { + it('exposes scalar, equality, truthiness, and string coercion coverage', () => { + expect(expressionV1Contract.fixtures.length).toBeGreaterThanOrEqual(10); + expect(expressionV1Contract.fixtures.map((f) => f.description)).toEqual( + expect.arrayContaining([ + expect.stringContaining('number scalar'), + expect.stringContaining('string scalar'), + expect.stringContaining('boolean scalar'), + expect.stringContaining('null scalar'), + expect.stringContaining('equality'), + expect.stringContaining('truthiness'), + expect.stringContaining('template literal string coercion'), + expect.stringContaining('canonicalizes null'), + expect.stringContaining('canonicalizes boolean'), + expect.stringContaining('ExprObject expression prop'), + ]), + ); + }); + + it.each( + expressionV1Contract.fixtures.map((f) => [f.description, f] as const), + )('reference fixture: %s', async (_desc, fixture) => { + const result = await runDifferential(fixture, { skipTs: true, skipPython: true }); + if (result.verdict !== 'pass') { + throw new Error( + `verdict=${result.verdict}\nfixture=${fixture.description}\nreference=${JSON.stringify( + result.reference, + null, + 2, + )}`, + ); + } + expect(result.verdict).toBe('pass'); + }); + + it.each( + expressionV1Contract.fixtures.map((f) => [f.description, f] as const), + )('TS differential fixture: %s', async (_desc, fixture) => { + const result = await runDifferential(fixture, { skipPython: true }); + if (result.verdict !== 'pass') { + throw new Error( + `verdict=${result.verdict}\n` + + `fixture=${fixture.description}\n` + + `reference=${JSON.stringify(result.reference, null, 2)}\n` + + `ts=${JSON.stringify(result.ts, null, 2)}\n` + + `legError=${JSON.stringify(result.legError, null, 2)}`, + ); + } + expect(result.verdict).toBe('pass'); + }); +}); + +describe('expression-v1 contract — preconditions reject out-of-domain IR', () => { + function mustReject(ir: IRNode, label: string, bindings: Map = new Map()): void { + expect(() => referenceRun(ir, makeEnv({ bindings }))).toThrow(ReferenceRunnerError); + expect(label.length).toBeGreaterThan(0); + } + + it('rejects missing expr', () => { + mustReject({ type: 'expression-v1', props: { name: 'x' } }, 'missing expr'); + }); + + it('rejects empty ExprObject expr', () => { + mustReject({ type: 'expression-v1', props: { name: 'x', expr: { __expr: true, code: '' } } }, 'empty expr object'); + }); + + it('rejects non-portable object literals', () => { + mustReject({ type: 'expression-v1', props: { name: 'x', expr: '{ a: 1 }' } }, 'object literal'); + }); + + it('rejects builtin-shadowing names', () => { + mustReject({ type: 'expression-v1', props: { name: 'print', expr: '"x"' } }, 'builtin'); + }); + + it('rejects redeclaring a current binding', () => { + mustReject({ type: 'expression-v1', props: { name: 'x', expr: '1' } }, 'redeclaration', new Map([['x', 0]])); + }); +}); diff --git a/packages/core/tests/native-handlers.test.ts b/packages/core/tests/native-handlers.test.ts index 9c7786f6..a66148cc 100644 --- a/packages/core/tests/native-handlers.test.ts +++ b/packages/core/tests/native-handlers.test.ts @@ -376,6 +376,110 @@ describe('emitNativeKernBodyTS — slice 1 statements', () => { }); }); +describe('emitNativeKernBodyTS — expression-v1 and nested fn statements', () => { + test('expression-v1 emits a typed scalar binding', () => { + const handler = makeHandler([ + { type: 'expression-v1', props: { name: 'label', type: 'string', expr: 'String(value)' } }, + { type: 'return', props: { value: 'label' } }, + ]); + expect(emitNativeKernBodyTS(handler)).toBe(['const label: string = String(value);', 'return label;'].join('\n')); + }); + + test('expression-v1 accepts ExprObject expr props', () => { + const handler = makeHandler([ + { type: 'expression-v1', props: { name: 'total', expr: { __expr: true, code: 'amount + 1' } } }, + { type: 'return', props: { value: 'total' } }, + ]); + expect(emitNativeKernBodyTS(handler)).toBe(['const total = amount + 1;', 'return total;'].join('\n')); + }); + + test('infers kern handler language from expression-v1 child', () => { + const doc = parseDocument( + [ + 'fn name=label returns=string', + ' handler', + ' expression-v1 name=label expr="String(value)"', + ' return value=label', + ].join('\n'), + ); + const handler = doc.children?.[0]?.children?.find((child) => child.type === 'handler'); + expect(handler?.props?.lang).toBe('kern'); + }); + + test('nested fn supports legacy params and returns inside body emit', () => { + const handler = makeHandler([ + { + type: 'fn', + props: { name: 'add', params: 'a:number,b:number', returns: 'number' }, + children: [ + { type: 'handler', props: { lang: 'kern' }, children: [{ type: 'return', props: { value: 'a + b' } }] }, + ], + }, + { type: 'return', props: { value: 'add(2, 3)' } }, + ]); + expect(emitNativeKernBodyTS(handler)).toBe( + ['function add(a: number, b: number): number {', ' return a + b;', '}', 'return add(2, 3);'].join('\n'), + ); + }); + + test('nested fn supports structured param children', () => { + const handler = makeHandler([ + { + type: 'fn', + props: { name: 'add', returns: 'number' }, + children: [ + { type: 'param', props: { name: 'a', type: 'number' } }, + { type: 'param', props: { name: 'b', type: 'number' } }, + { type: 'handler', props: { lang: 'kern' }, children: [{ type: 'return', props: { value: 'a + b' } }] }, + ], + }, + { type: 'return', props: { value: 'add(2, 3)' } }, + ]); + expect(emitNativeKernBodyTS(handler)).toContain('function add(a: number, b: number): number {'); + }); + + test('nested async fn preserves await expressions in body emit', () => { + const handler = makeHandler([ + { + type: 'fn', + props: { name: 'loadTotal', params: 'amount:number', returns: 'number', async: 'true' }, + children: [ + { + type: 'handler', + props: { lang: 'kern' }, + children: [ + { type: 'let', props: { name: 'loaded', value: 'await load(amount)' } }, + { type: 'return', props: { value: 'loaded + 5' } }, + ], + }, + ], + }, + ]); + expect(emitNativeKernBodyTS(handler)).toBe( + [ + 'async function loadTotal(amount: number): Promise {', + ' const loaded = await load(amount);', + ' return loaded + 5;', + '}', + ].join('\n'), + ); + }); + + test('nested fn rejects mixed legacy and structured params', () => { + const handler = makeHandler([ + { + type: 'fn', + props: { name: 'mixed', params: 'a:number' }, + children: [ + { type: 'param', props: { name: 'b', type: 'number' } }, + { type: 'handler', props: { lang: 'kern' }, children: [] }, + ], + }, + ]); + expect(() => emitNativeKernBodyTS(handler)).toThrow(/cannot mix legacy `params=`/); + }); +}); + describe('emitNativeKernBodyTS — destructure body statement', () => { test('emits object destructuring inside native body', () => { const handler = makeHandler([ diff --git a/packages/core/tests/portable-logic-primitives.test.ts b/packages/core/tests/portable-logic-primitives.test.ts index 48cc7962..0e102c53 100644 --- a/packages/core/tests/portable-logic-primitives.test.ts +++ b/packages/core/tests/portable-logic-primitives.test.ts @@ -36,6 +36,7 @@ describe('portable logic primitive registry', () => { 'collection.indexBy', 'collection.countBy', 'logic.firstTruthy', + 'logic.coalesce', 'time.epochMs', 'logic.not', 'number.clamp', @@ -49,6 +50,8 @@ describe('portable logic primitive registry', () => { 'string.split', 'string.replaceFirst', 'string.replaceAll', + 'logic.firstDefined', + 'string.coerce', ]); }); @@ -75,6 +78,7 @@ describe('portable logic primitive registry', () => { expect(portableLogicSupportForTarget('collection.indexBy', 'python')).toBe('stable'); expect(portableLogicSupportForTarget('collection.countBy', 'python')).toBe('stable'); expect(portableLogicSupportForTarget('logic.firstTruthy', 'python')).toBe('stable'); + expect(portableLogicSupportForTarget('logic.coalesce', 'python')).toBe('stable'); expect(portableLogicSupportForTarget('time.epochMs', 'python')).toBe('stable'); expect(portableLogicSupportForTarget('logic.not', 'python')).toBe('stable'); expect(portableLogicSupportForTarget('number.clamp', 'python')).toBe('stable'); @@ -358,6 +362,7 @@ describe('portable logic primitive registry', () => { test('string parity slice has matching target support', () => { const stringPrimitives: PortableLogicPrimitiveId[] = [ + 'string.coerce', 'string.trim', 'string.split', 'string.replaceFirst', @@ -386,6 +391,8 @@ describe('portable logic primitive registry', () => { const firstTruthy = lookupPortableLogicPrimitive('logic.firstTruthy'); expect(firstTruthy?.hostPatterns).toContain('a || b || c'); expect(firstTruthy?.portabilityNotes.join(' ')).toContain('empty collections are target-specific'); + expect(lookupPortableLogicPrimitive('logic.coalesce')?.hostPatterns).toContain('a ?? b ?? c'); + expect(lookupPortableLogicPrimitive('logic.coalesce')?.portabilityNotes.join(' ')).toContain('null/None-only'); expect(lookupPortableLogicPrimitive('number.clamp')?.hostPatterns).toContain('Math.max(lo, Math.min(hi, value))'); expect(lookupPortableLogicPrimitive('number.clamp')?.intent).toBe('semantic-gap'); expect(lookupPortableLogicPrimitive('object.keys')?.hostPatterns).toContain('Object.keys(obj)'); @@ -401,6 +408,7 @@ describe('portable logic primitive registry', () => { expect(lookupPortableLogicPrimitive('string.replaceAll')?.hostPatterns).toContain( 'value.replaceAll(search, replacement)', ); + expect(lookupPortableLogicPrimitive('string.coerce')?.hostPatterns).toContain('String(value)'); expect(lookupPortableLogicPrimitive('host.randomThing')).toBeNull(); }); @@ -439,17 +447,17 @@ describe('portable logic primitive registry', () => { expect(() => validatePortableLogicPrimitiveRegistry({ - 'string.coalesceAtStart': { + 'string.coalesce': { ...valid!, - id: 'string.coalesceAtStart' as PortableLogicPrimitiveId, + id: 'string.coalesce' as PortableLogicPrimitiveId, }, }), - ).not.toThrow(); + ).toThrow(/duplicates existing language nullish\/coalesce syntax/); }); - test('does not register a named nullish/coalesce primitive', () => { - // The language already has `??`; this guards against adding a duplicate registry API by accident. - expect(PORTABLE_LOGIC_PRIMITIVE_IDS.some((id) => id.includes('nullish') || id.includes('coalesce'))).toBe(false); + test('registers exactly the portable coalesce primitive, not stray nullish aliases', () => { + expect(PORTABLE_LOGIC_PRIMITIVE_IDS.filter((id) => id.includes('coalesce'))).toEqual(['logic.coalesce']); expect(lookupPortableLogicPrimitive('logic.nullishCoalesce')).toBeNull(); + expect(lookupPortableLogicPrimitive('logic.coalesce')).not.toBeNull(); }); }); diff --git a/packages/python/src/codegen-body-python.ts b/packages/python/src/codegen-body-python.ts index 05a4d784..1875165c 100644 --- a/packages/python/src/codegen-body-python.ts +++ b/packages/python/src/codegen-body-python.ts @@ -55,12 +55,14 @@ import { parseKeys, suggestStdlibMethod, } from '@kernlang/core'; +import { buildPythonParamList } from './codegen-helpers.js'; import { KERN_FMT_HELPER_PY, KERN_I32_HELPER_PY, KERN_PAIR_HELPERS_PY, KERN_TMOD_HELPER_PY, } from './core/expr/index.js'; +import { mapTsTypeToPython } from './type-map.js'; /** Slice 3e — caller-provided options for the Python body emitter. * Currently only `symbolMap`; future slices may add diagnostics, source-map @@ -158,6 +160,7 @@ interface BodyEmitContext { /** Depth of nested `finally` blocks. Propagation from finally would * override pending control flow, so it gets a finally-specific error. */ finallyDepth: number; + standaloneExpression: boolean; } const INDENT_STEP = ' '; @@ -176,6 +179,7 @@ function freshCtx(options?: BodyEmitOptions): BodyEmitContext { usedPropagation: false, tryDepth: 0, finallyDepth: 0, + standaloneExpression: false, traceHooks: options?.traceHooks, }; } @@ -288,6 +292,7 @@ export function emitNativeKernBodyPythonWithImports(handlerNode: IRNode, options * `trailingComment=` prop. Mirrors the TS emitter's set. */ const TRAILING_COMMENT_TYPES = new Set([ 'let', + 'expression-v1', 'assign', 'fmt', 'clamp', @@ -335,6 +340,10 @@ function emitChildrenPy( for (const line of emitSetPy(child, ctx)) lines.push(`${indent}${line}`); } else if (child.type === 'let') { for (const line of emitLetPy(child, ctx)) lines.push(`${indent}${line}`); + } else if (child.type === 'expression-v1') { + for (const line of emitExpressionV1Py(child, ctx)) lines.push(`${indent}${line}`); + } else if (child.type === 'fn') { + for (const line of emitFnPy(child, ctx, indent)) lines.push(line); } else if (child.type === 'assign') { for (const line of emitAssignPy(child, ctx)) lines.push(`${indent}${line}`); } else if (child.type === 'destructure') { @@ -1646,7 +1655,9 @@ const NON_EXCEPTION_LITERAL_KINDS: ReadonlySet = new Set([ * `emitPyExprCtx` which threads the live ctx (and therefore the live * imports set) end-to-end. */ export function emitPyExpression(node: ValueIR, options?: BodyEmitOptions): string { - return emitPyExprCtx(node, freshCtx(options)); + const ctx = freshCtx(options); + ctx.standaloneExpression = true; + return emitPyExprCtx(node, ctx); } function emitPyExprCtx(node: ValueIR, ctx: BodyEmitContext): string { @@ -2089,6 +2100,15 @@ function lowerChain(node: ChainNode, ctx: BodyEmitContext): GuardedExpr { if (regex !== null) return { guard: null, expr: regex }; const stdlib = applyStdlibLoweringPython(node, ctx); if (stdlib !== null) return { guard: null, expr: stdlib }; + if (node.callee.kind === 'ident' && node.callee.name === 'String') { + if (node.args.length !== 1) { + throw new Error('String() portable coercion expects exactly one argument on Python target.'); + } + const arg = emitPyExprCtx(node.args[0], ctx); + if (ctx.standaloneExpression) return { guard: null, expr: inlineKernFmtPy(arg) }; + ctx.helpers.add(KERN_FMT_HELPER_PY); + return { guard: null, expr: `_kern_fmt(${arg})` }; + } const callee = node.callee; const inner: GuardedExpr = callee.kind === 'member' || callee.kind === 'call' || callee.kind === 'index' @@ -2501,3 +2521,75 @@ export function registerHelpers(node: ValueIR, ctx: BodyEmitContext) { break; } } + +function emitExpressionV1Py(node: IRNode, ctx: BodyEmitContext): string[] { + const props = (node.props ?? {}) as Record; + const userName = String(props.name ?? ''); + if (!userName) throw new Error('body-statement `expression-v1` requires `name=`.'); + const rawExpr = props.expr; + const exprSource = unwrapBodyExpr(rawExpr); + if (exprSource === undefined || exprSource === '') { + throw new Error('body-statement `expression-v1` requires `expr=`.'); + } + const exprIR = parseExpression(exprSource); + declareLocalBinding(ctx, userName, 'const'); + const name = maybeRenameOnShadow(ctx, userName); + setRegexBinding(ctx, userName, exprIR.kind === 'regexLit' ? exprIR : null); + const lines = [`${name} = ${emitPyExprCtx(exprIR, ctx)}`]; + if (ctx.traceHooks?.letAssign) lines.push(letAssignTracePy(name)); + return lines; +} + +function emitFnPy(node: IRNode, ctx: BodyEmitContext, indent: string): string[] { + const props = (node.props ?? {}) as Record; + const userName = String(props.name ?? ''); + if (!userName) throw new Error('body-statement `fn` requires `name=`.'); + declareLocalBinding(ctx, userName, 'const'); + const name = maybeRenameOnShadow(ctx, userName); + + const isAsync = props.async === 'true' || props.async === true; + const asyncKw = isAsync ? 'async ' : ''; + if (props.params && node.children?.some((c) => c.type === 'param')) { + throw new Error('body-statement `fn` cannot mix legacy `params=` with structured `param` children.'); + } + const paramList = buildPythonParamList(node); + + const returns = props.returns ? String(props.returns) : ''; + const retClause = returns ? ` -> ${mapTsTypeToPython(returns)}` : ''; + + const lines: string[] = []; + lines.push(`${indent}${asyncKw}def ${name}(${paramList})${retClause}:`); + + const handlerNode = node.children?.find((c) => c.type === 'handler'); + const bodyNodes = handlerNode ? (handlerNode.children ?? []) : (node.children ?? []); + const stmtNodes = bodyNodes.filter((c) => c.type !== 'param' && c.type !== 'decorator'); + + const inner = emitChildrenPy(stmtNodes, ctx, indent + INDENT_STEP, paramBindingsFromPythonSignature(paramList)); + if (inner.length === 0) { + lines.push(`${indent}${INDENT_STEP}pass`); + } else { + for (const sl of inner) { + lines.push(sl); + } + } + return lines; +} + +function paramBindingsFromPythonSignature(paramList: string): Array<[string, 'const']> { + if (!paramList.trim()) return []; + return splitBodyExpressionList(paramList, 'fn params=') + .map((part) => part.split('=')[0]?.split(':')[0]?.trim().replace(/^\*+/, '') ?? '') + .filter((name) => /^[A-Za-z_]\w*$/.test(name)) + .map((name) => [name, 'const']); +} + +function inlineKernFmtPy(expr: string): string { + return [ + '(lambda __k_v: ', + "('true' if __k_v else 'false') if isinstance(__k_v, bool) else ", + "'null' if __k_v is None else ", + 'str(int(__k_v)) if isinstance(__k_v, float) and __k_v.is_integer() else ', + 'str(__k_v))', + `(${expr})`, + ].join(''); +} diff --git a/packages/python/src/core/expr/helpers.ts b/packages/python/src/core/expr/helpers.ts index 069e832a..4f5b4534 100644 --- a/packages/python/src/core/expr/helpers.ts +++ b/packages/python/src/core/expr/helpers.ts @@ -17,6 +17,8 @@ export const KERN_FMT_HELPER_PY = [ " return 'true' if __k_v else 'false'", ' if __k_v is None:', " return 'null'", + ' if isinstance(__k_v, float) and __k_v.is_integer():', + ' return str(int(__k_v))', ' return str(__k_v)', ].join('\n'); diff --git a/packages/python/src/ir-semantics/python-leg.ts b/packages/python/src/ir-semantics/python-leg.ts index c00142db..094506aa 100644 --- a/packages/python/src/ir-semantics/python-leg.ts +++ b/packages/python/src/ir-semantics/python-leg.ts @@ -326,17 +326,20 @@ export async function runPythonEmitterLeg(fixture: NodeFixture, env: SemanticEnv } function shouldTraceLetAssign(ir: NodeFixture['ir']): boolean { - // `let` (declaration), `assign` (reassignment), and `fmt` (formatted binding) - // observe their binding write through the same `{op:"assign"}` trace hook. + // `let` (declaration), `expression-v1`, `assign` (reassignment), and `fmt` + // (formatted binding) observe their binding write through the same + // `{op:"assign"}` trace hook. // `while` fixtures opt in too: their counter setup/advance (let + assign in // body) must emit the same assign events the reference produces. const contract = ir.props?.__semanticContract; const t = ir.type; return ( t === 'let' || + t === 'expression-v1' || t === 'assign' || t === 'fmt' || contract === 'let' || + contract === 'expression-v1' || contract === 'assign' || contract === 'fmt' || contract === 'while' diff --git a/packages/python/tests/ir-semantics-python-leg.test.ts b/packages/python/tests/ir-semantics-python-leg.test.ts index a0b3e128..8de85197 100644 --- a/packages/python/tests/ir-semantics-python-leg.test.ts +++ b/packages/python/tests/ir-semantics-python-leg.test.ts @@ -25,6 +25,11 @@ import { registerBranchContract, } from '../../core/src/ir/semantics/branch.js'; import { _resetEachContractForTest, eachContract, registerEachContract } from '../../core/src/ir/semantics/each.js'; +import { + _resetExpressionV1ContractForTest, + expressionV1Contract, + registerExpressionV1Contract, +} from '../../core/src/ir/semantics/expression-v1.js'; import { _resetFmtContractForTest, fmtContract, registerFmtContract } from '../../core/src/ir/semantics/fmt.js'; import { _resetForContractForTest, forContract, registerForContract } from '../../core/src/ir/semantics/for.js'; import { _resetIfContractForTest, ifContract, registerIfContract } from '../../core/src/ir/semantics/if.js'; @@ -54,6 +59,7 @@ beforeEach(() => { CONTRACT_REGISTRY.clear(); _resetBranchContractForTest(); _resetEachContractForTest(); + _resetExpressionV1ContractForTest(); _resetIfContractForTest(); _resetForContractForTest(); _resetLambdaContractForTest(); @@ -65,6 +71,7 @@ beforeEach(() => { _resetPrimitivesForTest(); registerPrimitives(); registerEachContract(); + registerExpressionV1Contract(); registerBranchContract(); registerIfContract(); registerForContract(); @@ -80,6 +87,7 @@ afterEach(() => { CONTRACT_REGISTRY.clear(); _resetBranchContractForTest(); _resetEachContractForTest(); + _resetExpressionV1ContractForTest(); _resetIfContractForTest(); _resetForContractForTest(); _resetLambdaContractForTest(); @@ -91,6 +99,27 @@ afterEach(() => { _resetPrimitivesForTest(); }); +describeIfPython('Python emitter leg — expression-v1 fixtures (three-way differential)', () => { + it.each(expressionV1Contract.fixtures.map((f) => [f.description, f] as const))( + 'fixture: %s', + async (_desc, fixture) => { + const result = await runDifferential(fixture, { pythonLeg: runPythonEmitterLeg }); + if (result.verdict !== 'pass') { + throw new Error( + `verdict=${result.verdict}\n` + + `fixture=${fixture.description}\n` + + `reference=${JSON.stringify(result.reference, null, 2)}\n` + + `ts=${JSON.stringify(result.ts, null, 2)}\n` + + `python=${JSON.stringify(result.python, null, 2)}\n` + + `legError=${JSON.stringify(result.legError, null, 2)}`, + ); + } + expect(result.verdict).toBe('pass'); + }, + 15_000, + ); +}); + /** * PR-4 — Python emitter normalises pair-mode iteration via runtime helpers * `_kern_pairs` (sync) and `_kern_async_pairs` (async). This closes the diff --git a/packages/python/tests/native-handlers-python.test.ts b/packages/python/tests/native-handlers-python.test.ts index 7cd8695d..21f42353 100644 --- a/packages/python/tests/native-handlers-python.test.ts +++ b/packages/python/tests/native-handlers-python.test.ts @@ -117,6 +117,119 @@ describe('emitPyExpression — slice 1 lowering rules', () => { }); }); +describe('emitNativeKernBodyPython — expression-v1 and nested fn statements', () => { + test('expression-v1 emits a scalar binding through Python expression lowering', () => { + const handler = makeHandler([ + { type: 'expression-v1', props: { name: 'label', expr: 'String(value)' } }, + { type: 'return', props: { value: 'label' } }, + ]); + expect(emitNativeKernBodyPython(handler)).toBe( + [ + 'def _kern_fmt(__k_v):', + ' if isinstance(__k_v, bool):', + " return 'true' if __k_v else 'false'", + ' if __k_v is None:', + " return 'null'", + ' if isinstance(__k_v, float) and __k_v.is_integer():', + ' return str(int(__k_v))', + ' return str(__k_v)', + '', + 'label = _kern_fmt(value)', + 'return label', + ].join('\n'), + ); + }); + + test('nested fn supports legacy params and returns inside body emit', () => { + const handler = makeHandler([ + { + type: 'fn', + props: { name: 'add', params: 'a:number,b:number', returns: 'number' }, + children: [ + { type: 'handler', props: { lang: 'kern' }, children: [{ type: 'return', props: { value: 'a + b' } }] }, + ], + }, + { type: 'return', props: { value: 'add(2, 3)' } }, + ]); + expect(emitNativeKernBodyPython(handler)).toBe( + ['def add(a: float, b: float) -> float:', ' return a + b', 'return add(2, 3)'].join('\n'), + ); + }); + + test('nested fn supports structured param children', () => { + const handler = makeHandler([ + { + type: 'fn', + props: { name: 'add', returns: 'number' }, + children: [ + { type: 'param', props: { name: 'a', type: 'number' } }, + { type: 'param', props: { name: 'b', type: 'number' } }, + { type: 'handler', props: { lang: 'kern' }, children: [{ type: 'return', props: { value: 'a + b' } }] }, + ], + }, + { type: 'return', props: { value: 'add(2, 3)' } }, + ]); + expect(emitNativeKernBodyPython(handler)).toContain('def add(a: float, b: float) -> float:'); + }); + + test('nested async fn preserves await expressions in body emit', () => { + const handler = makeHandler([ + { + type: 'fn', + props: { name: 'loadTotal', params: 'amount:number', returns: 'number', async: 'true' }, + children: [ + { + type: 'handler', + props: { lang: 'kern' }, + children: [ + { type: 'let', props: { name: 'loaded', value: 'await load(amount)' } }, + { type: 'return', props: { value: 'loaded + 5' } }, + ], + }, + ], + }, + ]); + expect(emitNativeKernBodyPython(handler)).toBe( + ['async def loadTotal(amount: float) -> float:', ' loaded = await load(amount)', ' return loaded + 5'].join( + '\n', + ), + ); + }); + + test('String() portable coercion requires exactly one arg', () => { + expect(() => emitPyExpression(parseExpression('String()'))).toThrow(/expects exactly one argument/); + expect(() => emitPyExpression(parseExpression('String(a, b)'))).toThrow(/expects exactly one argument/); + }); + + test('standalone String(value) lowering is self-contained', () => { + expect(emitPyExpression(parseExpression('String(value)'))).toBe( + "(lambda __k_v: ('true' if __k_v else 'false') if isinstance(__k_v, bool) else 'null' if __k_v is None else str(int(__k_v)) if isinstance(__k_v, float) and __k_v.is_integer() else str(__k_v))(value)", + ); + }); + + test('expression-v1 accepts ExprObject expr props', () => { + const handler = makeHandler([ + { type: 'expression-v1', props: { name: 'total', expr: { __expr: true, code: 'amount + 1' } } }, + { type: 'return', props: { value: 'total' } }, + ]); + expect(emitNativeKernBodyPython(handler)).toBe(['total = amount + 1', 'return total'].join('\n')); + }); + + test('nested fn rejects mixed legacy and structured params', () => { + const handler = makeHandler([ + { + type: 'fn', + props: { name: 'mixed', params: 'a:number' }, + children: [ + { type: 'param', props: { name: 'b', type: 'number' } }, + { type: 'handler', props: { lang: 'kern' }, children: [] }, + ], + }, + ]); + expect(() => emitNativeKernBodyPython(handler)).toThrow(/cannot mix legacy `params=`/); + }); +}); + describe('emitNativeKernBodyPython — slice 1 statements', () => { test('let with simple call', () => { const h = makeHandler([{ type: 'let', props: { name: 'x', value: 'foo()' } }]); diff --git a/packages/python/tests/native-handlers-slice2-python.test.ts b/packages/python/tests/native-handlers-slice2-python.test.ts index bf183a4d..e69cb204 100644 --- a/packages/python/tests/native-handlers-slice2-python.test.ts +++ b/packages/python/tests/native-handlers-slice2-python.test.ts @@ -105,7 +105,7 @@ describe('emitPyExpression — arithmetic + comparison + unary', () => { expect(emitPyExpression(parseExpression('a instanceof B && c'))).toBe('isinstance(a, B) and c'); // The dominant idiom — mirrors the TS-side round-trip in core/expression.test.ts. expect(emitPyExpression(parseExpression('err instanceof Error ? err.message : String(err)'))).toBe( - 'err.message if (isinstance(err, Error)) else String(err)', + "err.message if (isinstance(err, Error)) else (lambda __k_v: ('true' if __k_v else 'false') if isinstance(__k_v, bool) else 'null' if __k_v is None else str(int(__k_v)) if isinstance(__k_v, float) and __k_v.is_integer() else str(__k_v))(err)", ); }); diff --git a/scripts/conformance.mjs b/scripts/conformance.mjs index ca5fec85..6b870b71 100644 --- a/scripts/conformance.mjs +++ b/scripts/conformance.mjs @@ -479,6 +479,20 @@ const FIXTURES = [ ], body: `firstDefined name=winner values="missingA, missingB, 'fallback'"\nreturn value="winner"`, expected: 'fallback' }, + { kind: 'stmt', name: 'stmt: expression-v1 string coercion canonicalizes bool and null', + params: [ + { name: 'flag', type: 'boolean', value: false }, + { name: 'missing', type: 'any', value: null }, + ], + body: `expression-v1 name=flagText expr="String(flag)"\nexpression-v1 name=nullText expr="String(missing)"\nreturn value="{ flagText: flagText, nullText: nullText }"`, + expected: { flagText: 'false', nullText: 'null' } }, + { kind: 'stmt', name: 'stmt: nested fn with let and return executes inside body', + params: [ + { name: 'left', type: 'number', value: 2 }, + { name: 'right', type: 'number', value: 3 }, + ], + body: `fn name=add params="a:number,b:number" returns=number\n handler\n let name=sum value="a + b"\n return value="sum"\nreturn value="add(left, right)"`, + expected: 5 }, { kind: 'stmt', name: 'stmt: while loop accumulates (mutable kind=let)', params: [{ name: 'n', type: 'number', value: 5 }, { name: 'min', type: 'number', value: 0 }], body: `let name=total value="0" kind=let\nlet name=i value="0" kind=let\nwhile cond="i < n"\n assign target="total" value="total + i"\n assign target="i" value="i + 1"\nreturn value="{ total: total }"`, @@ -1394,7 +1408,8 @@ for (const fx of FIXTURES) { compilerOptions: { module: tsCompiler.ModuleKind.ESNext, target: tsCompiler.ScriptTarget.ES2022 }, }).outputText, ); - writeFileSync(pyFile, `import json\n${[...(pyEmit.imports ?? [])].join('\n')}\ndef __h(${names.join(', ')}):\n${pyEmit.code.split('\n').map((l) => ` ${l}`).join('\n')}\nprint(json.dumps(__h(${fx.params.map((p) => pyVal(p.value)).join(', ')}), default=str, allow_nan=False))`); + const pyHelpers = [...(pyEmit.helpers ?? [])].join('\n\n'); + writeFileSync(pyFile, `import json\n${[...(pyEmit.imports ?? [])].join('\n')}\n${pyHelpers}\ndef __h(${names.join(', ')}):\n${pyEmit.code.split('\n').map((l) => ` ${l}`).join('\n')}\nprint(json.dumps(__h(${fx.params.map((p) => pyVal(p.value)).join(', ')}), default=str, allow_nan=False))`); const stmtOpts = { encoding: 'utf8', timeout: 10_000 }; const tsOut = execFileSync('node', [tsFile], stmtOpts).trim(); const pyOut = execFileSync('python3', [pyFile], stmtOpts).trim(); From 4d00fa5060ed60574a5a0622a58e351fb8f979df Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 7 Jun 2026 08:15:20 +0200 Subject: [PATCH 02/46] build(deps): bump the minor-and-patch group with 5 updates (#388) Bumps the minor-and-patch group with 5 updates: | Package | From | To | | --- | --- | --- | | [@types/node](https://github.com/DefinitelyTyped/DefinitelyTyped/tree/HEAD/types/node) | `25.9.1` | `25.9.2` | | [next](https://github.com/vercel/next.js) | `16.2.6` | `16.2.7` | | [react](https://github.com/facebook/react/tree/HEAD/packages/react) | `19.2.6` | `19.2.7` | | [@types/react](https://github.com/DefinitelyTyped/DefinitelyTyped/tree/HEAD/types/react) | `19.2.15` | `19.2.17` | | [react-dom](https://github.com/facebook/react/tree/HEAD/packages/react-dom) | `19.2.6` | `19.2.7` | Updates `@types/node` from 25.9.1 to 25.9.2 - [Release notes](https://github.com/DefinitelyTyped/DefinitelyTyped/releases) - [Commits](https://github.com/DefinitelyTyped/DefinitelyTyped/commits/HEAD/types/node) Updates `next` from 16.2.6 to 16.2.7 - [Release notes](https://github.com/vercel/next.js/releases) - [Changelog](https://github.com/vercel/next.js/blob/canary/release.js) - [Commits](https://github.com/vercel/next.js/compare/v16.2.6...v16.2.7) Updates `react` from 19.2.6 to 19.2.7 - [Release notes](https://github.com/facebook/react/releases) - [Changelog](https://github.com/facebook/react/blob/main/CHANGELOG.md) - [Commits](https://github.com/facebook/react/commits/v19.2.7/packages/react) Updates `@types/react` from 19.2.15 to 19.2.17 - [Release notes](https://github.com/DefinitelyTyped/DefinitelyTyped/releases) - [Commits](https://github.com/DefinitelyTyped/DefinitelyTyped/commits/HEAD/types/react) Updates `react-dom` from 19.2.6 to 19.2.7 - [Release notes](https://github.com/facebook/react/releases) - [Changelog](https://github.com/facebook/react/blob/main/CHANGELOG.md) - [Commits](https://github.com/facebook/react/commits/v19.2.7/packages/react-dom) Updates `@types/react` from 19.2.15 to 19.2.17 - [Release notes](https://github.com/DefinitelyTyped/DefinitelyTyped/releases) - [Commits](https://github.com/DefinitelyTyped/DefinitelyTyped/commits/HEAD/types/react) --- updated-dependencies: - dependency-name: "@types/node" dependency-version: 25.9.2 dependency-type: direct:development update-type: version-update:semver-patch dependency-group: minor-and-patch - dependency-name: next dependency-version: 16.2.7 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: minor-and-patch - dependency-name: react dependency-version: 19.2.7 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: minor-and-patch - dependency-name: "@types/react" dependency-version: 19.2.17 dependency-type: direct:development update-type: version-update:semver-patch dependency-group: minor-and-patch - dependency-name: react-dom dependency-version: 19.2.7 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: minor-and-patch - dependency-name: "@types/react" dependency-version: 19.2.17 dependency-type: direct:development update-type: version-update:semver-patch dependency-group: minor-and-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- package.json | 2 +- packages/playground/package.json | 10 +- packages/terminal/package.json | 4 +- pnpm-lock.yaml | 216 +++++++++++++++---------------- 4 files changed, 116 insertions(+), 116 deletions(-) diff --git a/package.json b/package.json index efa4fbec..f8f118e4 100644 --- a/package.json +++ b/package.json @@ -54,7 +54,7 @@ }, "devDependencies": { "@biomejs/biome": "^2.4.16", - "@types/node": "^25.9.1", + "@types/node": "^25.9.2", "typescript": "^6.0.3" } } diff --git a/packages/playground/package.json b/packages/playground/package.json index 79223169..8226570c 100644 --- a/packages/playground/package.json +++ b/packages/playground/package.json @@ -20,13 +20,13 @@ "@kernlang/vue": "workspace:*", "@monaco-editor/react": "^4.6.0", "monaco-editor": "^0.55.1", - "next": "^16.2.6", - "react": "^19.2.6", - "react-dom": "^19.2.6" + "next": "^16.2.7", + "react": "^19.2.7", + "react-dom": "^19.2.7" }, "devDependencies": { - "@types/node": "^25.9.1", - "@types/react": "^19.2.15", + "@types/node": "^25.9.2", + "@types/react": "^19.2.17", "@types/react-dom": "^19.0.0", "typescript": "^6.0.3" } diff --git a/packages/terminal/package.json b/packages/terminal/package.json index f6e76229..8e312eaf 100644 --- a/packages/terminal/package.json +++ b/packages/terminal/package.json @@ -46,9 +46,9 @@ } }, "devDependencies": { - "@types/react": "19.2.15", + "@types/react": "19.2.17", "@inkjs/ui": "2.0.0", "ink": "7.0.5", - "react": "19.2.6" + "react": "19.2.7" } } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 20f98d52..3bbfa045 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -22,8 +22,8 @@ importers: specifier: ^2.4.16 version: 2.4.16 '@types/node': - specifier: ^25.9.1 - version: 25.9.1 + specifier: ^25.9.2 + version: 25.9.2 typescript: specifier: ^6.0.3 version: 6.0.3 @@ -251,29 +251,29 @@ importers: version: link:../vue '@monaco-editor/react': specifier: ^4.6.0 - version: 4.7.0(monaco-editor@0.55.1)(react-dom@19.2.6(react@19.2.6))(react@19.2.6) + version: 4.7.0(monaco-editor@0.55.1)(react-dom@19.2.7(react@19.2.7))(react@19.2.7) monaco-editor: specifier: ^0.55.1 version: 0.55.1 next: - specifier: ^16.2.6 - version: 16.2.6(react-dom@19.2.6(react@19.2.6))(react@19.2.6) + specifier: ^16.2.7 + version: 16.2.7(react-dom@19.2.7(react@19.2.7))(react@19.2.7) react: - specifier: ^19.2.6 - version: 19.2.6 + specifier: ^19.2.7 + version: 19.2.7 react-dom: - specifier: ^19.2.6 - version: 19.2.6(react@19.2.6) + specifier: ^19.2.7 + version: 19.2.7(react@19.2.7) devDependencies: '@types/node': - specifier: ^25.9.1 - version: 25.9.1 + specifier: ^25.9.2 + version: 25.9.2 '@types/react': - specifier: ^19.2.15 - version: 19.2.15 + specifier: ^19.2.17 + version: 19.2.17 '@types/react-dom': specifier: ^19.0.0 - version: 19.2.3(@types/react@19.2.15) + version: 19.2.3(@types/react@19.2.17) typescript: specifier: ^6.0.3 version: 6.0.3 @@ -349,16 +349,16 @@ importers: devDependencies: '@inkjs/ui': specifier: 2.0.0 - version: 2.0.0(ink@7.0.5(@types/react@19.2.15)(react@19.2.6)) + version: 2.0.0(ink@7.0.5(@types/react@19.2.17)(react@19.2.7)) '@types/react': - specifier: 19.2.15 - version: 19.2.15 + specifier: 19.2.17 + version: 19.2.17 ink: specifier: 7.0.5 - version: 7.0.5(@types/react@19.2.15)(react@19.2.6) + version: 7.0.5(@types/react@19.2.17)(react@19.2.7) react: - specifier: 19.2.6 - version: 19.2.6 + specifier: 19.2.7 + version: 19.2.7 packages/test: dependencies: @@ -627,57 +627,57 @@ packages: react: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 react-dom: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 - '@next/env@16.2.6': - resolution: {integrity: sha512-gd8HoHN4ufj73WmR3JmVolrpJR47ILK6LouP5xElPglaVxir6e1a7VzvTvDWkOoPXT9rkkTzyCxBu4yeZfZwcw==} + '@next/env@16.2.7': + resolution: {integrity: sha512-tMJizPlj6ZYpBMMdK8S0LJufrP4QTdR6pcv9KQ/bVETPAmg0j1mlHE9G2c38UyGHxoBapgwuj7XjbGJ2RcDFOg==} - '@next/swc-darwin-arm64@16.2.6': - resolution: {integrity: sha512-ZJGkkcNfYgrrMkqOdZ7zoLa1TOy0qpcMfk/z4Mh/FKUz40gVO+HNQWqmLxf67Z5WB64DRp0dhEbyHfel+6sJUg==} + '@next/swc-darwin-arm64@16.2.7': + resolution: {integrity: sha512-vm1EDI/pVaBNNiychmxk3fft+OhQPVD9cIM/tReLZIQ3TfQ4kqI9DwKk00dzuS1ulC7icbrzCFrmRRlk9PfNdw==} engines: {node: '>= 10'} cpu: [arm64] os: [darwin] - '@next/swc-darwin-x64@16.2.6': - resolution: {integrity: sha512-v/YLBHIY132Ced3puBJ7YJKw1lqsCrgcNo2aRJlCEyQrrCeRJlvGlnmxhPxNQI3KE3N1DN5r9TPNPvka3nq5RQ==} + '@next/swc-darwin-x64@16.2.7': + resolution: {integrity: sha512-O3IRSv1ZBL1zs0WrIgefTEcTKFVn+ryxBNe54erJ6KsD+2f/Mmt7g2jOYh8PSBdUwPtKQJuCsTMlZ7tIu2AcsQ==} engines: {node: '>= 10'} cpu: [x64] os: [darwin] - '@next/swc-linux-arm64-gnu@16.2.6': - resolution: {integrity: sha512-RPOvqlYBbcQjkz9VQQDZ2T2bARIjXZV1KFlt+V2Mr6SW/e4I9fcKsaA0hdyf2FHoTlsV2xnBd5Y912rP/1Ce6w==} + '@next/swc-linux-arm64-gnu@16.2.7': + resolution: {integrity: sha512-Re6PZtjBDd0aMU+VcZcC/PrIvj4WhrjDYtMhhCVQamWN4L90EVP0pcEOBQD25prSlw7OzNw5QpHLWMilRLsRNw==} engines: {node: '>= 10'} cpu: [arm64] os: [linux] libc: [glibc] - '@next/swc-linux-arm64-musl@16.2.6': - resolution: {integrity: sha512-URUTu1+dMkxJsPFgm+OeEvq9wf5sujw0EvgYy80TDGHTSLTnIHeqb0Eu8A3sC95IRgjejQL+kC4mw+4yPxiAXA==} + '@next/swc-linux-arm64-musl@16.2.7': + resolution: {integrity: sha512-qyogG9QtBzWxgJfeGBvOEHI3851gTfCF3wLZ5RDLTBJGAmE9p1qDwKCOdrBrvBzRvYDT+gUDp72pzlSEfAXgNA==} engines: {node: '>= 10'} cpu: [arm64] os: [linux] libc: [musl] - '@next/swc-linux-x64-gnu@16.2.6': - resolution: {integrity: sha512-DOj182mPV8G3UkrayLoREM5YEYI+Dk5wv7Ox9xl1fFibAELEsFD0lDPfHIeILlutMMfdyhlzYPELG3peuKaurw==} + '@next/swc-linux-x64-gnu@16.2.7': + resolution: {integrity: sha512-Vhe4ZDuBpmMogrGi5D4R2Kq4JAQlj6+wvgaFYy31zfES0zPmt6TLA+cuYpM/OLrPZjo2MYQTHVqNUSCR6+fDZQ==} engines: {node: '>= 10'} cpu: [x64] os: [linux] libc: [glibc] - '@next/swc-linux-x64-musl@16.2.6': - resolution: {integrity: sha512-HKQ5SP/V/ub73UvF7n/zeJlxk2kLmtL7Wzrg4WfmkjmNos5onJ2tKu7yZOPdL18A6Svfn3max29ym+ry7NkK4g==} + '@next/swc-linux-x64-musl@16.2.7': + resolution: {integrity: sha512-srvian89JahFLw1YLBEuhvPJ0DO5lpUeJQMXy4xYo7g628ZlNgXdNkqoxSAv9OYrBfByh6vxISMwW/mRbzCY+g==} engines: {node: '>= 10'} cpu: [x64] os: [linux] libc: [musl] - '@next/swc-win32-arm64-msvc@16.2.6': - resolution: {integrity: sha512-LZXpTlPyS5v7HhSmnvsLGP3iIYgYOBnc8r8ArlT55sGHV89bR2HlDdBjWQ+PY6SJMmk8TuVGFuxalnP3k/0Dwg==} + '@next/swc-win32-arm64-msvc@16.2.7': + resolution: {integrity: sha512-GX3wvLpULFuRFJzwHaKfm7QZJ18F4ZSuxlPJ96BoBglCzBmdSjyeBKF+ZhWhvL/ckxNfLnNa7bsObO2ipYpszw==} engines: {node: '>= 10'} cpu: [arm64] os: [win32] - '@next/swc-win32-x64-msvc@16.2.6': - resolution: {integrity: sha512-F0+4i0h9J6C4eE3EAPWsoCk7UW/dbzOjyzxY0qnDUOYFu6FFmdZ6l97/XdV3/Nz3VYyO7UWjyEJUXkGqcoXfMA==} + '@next/swc-win32-x64-msvc@16.2.7': + resolution: {integrity: sha512-J4WlM72NMk076Qsg0jTdK3SNXatlSdnjW7L7oNGLst1tAGjHrJh/FYi+pw9wyIjEtGRKDNzD0zuiY16oWYWVaw==} engines: {node: '>= 10'} cpu: [x64] os: [win32] @@ -703,8 +703,8 @@ packages: '@types/http-errors@2.0.5': resolution: {integrity: sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==} - '@types/node@25.9.1': - resolution: {integrity: sha512-xfrlY7UD5rMJk3ZVJP8BNzS28J36YJg+xp+LPXV1TdWxr8uMH5A860QNxYDGQe/ylDSgjxE52Q9VnO7p75tJxg==} + '@types/node@25.9.2': + resolution: {integrity: sha512-G05zqtJhcDLb8uslf5EjCxXg9G1KQxiV8OS0R26IC//Eoyitzqe8z37I7cqvnZlrlSfgocQRfSn/AHBZJJFyGw==} '@types/qs@6.15.0': resolution: {integrity: sha512-JawvT8iBVWpzTrz3EGw9BTQFg3BQNmwERdKE22vlTxawwtbyUSlMppvZYKLZzB5zgACXdXxbD3m1bXaMqP/9ow==} @@ -717,8 +717,8 @@ packages: peerDependencies: '@types/react': ^19.2.0 - '@types/react@19.2.15': - resolution: {integrity: sha512-eRwcGNHve+E8qtEQSSRl6urh+rFop4v8gm6O8rGv25CodbvFdLjA1vVQ1KkiFE0w0UPOnb8tDiFKL5lp0rtY5Q==} + '@types/react@19.2.17': + resolution: {integrity: sha512-MXfmqaVPEVgkBT/aY0aGCkRWWtByiYQXo3xdQ8r5RzuFrPiRn8Gar2tQdXSUQ2GKV3bkXckek89V8wQBY2Q/Aw==} '@types/send@1.2.1': resolution: {integrity: sha512-arsCikDvlU99zl1g69TcAB3mzZPpxgw0UQnaHeC1Nwb015xp8bknZv5rIfri9xTOcMuaVgvabfIRA7PSZVuZIQ==} @@ -764,8 +764,8 @@ packages: resolution: {integrity: sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==} engines: {node: 18 || 20 || >=22} - baseline-browser-mapping@2.10.29: - resolution: {integrity: sha512-Asa2krT+XTPZINCS+2QcyS8WTkObE77RwkydwF7h6DmnKqbvlalz93m/dnphUyCa6SWSP51VgtEUf2FN+gelFQ==} + baseline-browser-mapping@2.10.34: + resolution: {integrity: sha512-IMDedajPifLnHNY0X9n8hKxRTQ6/eTHwr5bDo04WnuqxyKw6LYtQywCuuqPZwhl3aBXMvQpJov42GLCwRRdQzw==} engines: {node: '>=6.0.0'} hasBin: true @@ -789,8 +789,8 @@ packages: resolution: {integrity: sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==} engines: {node: '>= 0.4'} - caniuse-lite@1.0.30001792: - resolution: {integrity: sha512-hVLMUZFgR4JJ6ACt1uEESvQN1/dBVqPAKY0hgrV70eN3391K6juAfTjKZLKvOMsx8PxA7gsY1/tLMMTcfFLLpw==} + caniuse-lite@1.0.30001797: + resolution: {integrity: sha512-l8xKG+gwAIExZGl9FrF7KUwuOmk6wbEPC9Xoy/RtnWv1XG0Q4LFlagaLpUv3Kiza3W/wm27zy0yWJEieYKAP6w==} chalk@5.6.2: resolution: {integrity: sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA==} @@ -1121,8 +1121,8 @@ packages: resolution: {integrity: sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg==} engines: {node: '>= 0.6'} - next@16.2.6: - resolution: {integrity: sha512-qOVgKJg1+At15NpeUP+eJgCHvTCgXsogweq87Ri/Ix7PkqQHg4sdaXmSFqKlgaIXE4kW0g25LE68W87UANlHtw==} + next@16.2.7: + resolution: {integrity: sha512-eMJxgjRzBaj3olkP4cBamHDXL79A8FC6u1GcsO1D1Tsx8bw/LLXUJCaoajVxtnhD3A1IJqIT8IcRJjgBIPJq4w==} engines: {node: '>=20.9.0'} hasBin: true peerDependencies: @@ -1218,10 +1218,10 @@ packages: resolution: {integrity: sha512-K5zQjDllxWkf7Z5xJdV0/B0WTNqx6vxG70zJE4N0kBs4LovmEYWJzQGxC9bS9RAKu3bgM40lrd5zoLJ12MQ5BA==} engines: {node: '>= 0.10'} - react-dom@19.2.6: - resolution: {integrity: sha512-0prMI+hvBbPjsWnxDLxlCGyM8PN6UuWjEUCYmZhO67xIV9Xasa/r/vDnq+Xyq4Lo27g8QSbO5YzARu0D1Sps3g==} + react-dom@19.2.7: + resolution: {integrity: sha512-t0BRVXvbiE/o20Hfw669rLbMCDWtYZLvmJigy2f0MxsXF+71pxhR3xOkspmsO8h3ZlNzyibAmtCa3l4lYKk6gQ==} peerDependencies: - react: ^19.2.6 + react: ^19.2.7 react-reconciler@0.33.0: resolution: {integrity: sha512-KetWRytFv1epdpJc3J4G75I4WrplZE5jOL7Yq0p34+OVOKF4Se7WrdIdVC45XsSSmUTlht2FM/fM1FZb1mfQeA==} @@ -1229,8 +1229,8 @@ packages: peerDependencies: react: ^19.2.0 - react@19.2.6: - resolution: {integrity: sha512-sfWGGfavi0xr8Pg0sVsyHMAOziVYKgPLNrS7ig+ivMNb3wbCBw3KxtflsGBAwD3gYQlE/AEZsTLgToRrSCjb0Q==} + react@19.2.7: + resolution: {integrity: sha512-HNe9WslTbXmFK8o8cmwgAeJFSBvt1bPdHCVKtaaV+WlAN36mpT4hcRpwbf3fY56ar2oIXzsBpOAiIRHAdY0OlQ==} engines: {node: '>=0.10.0'} readdirp@5.0.0: @@ -1255,8 +1255,8 @@ packages: scheduler@0.27.0: resolution: {integrity: sha512-eNv+WrVbKu1f3vbYJT/xtiF5syA5HPIMtf9IgY/nKg0sWqzAUEvqY/xm7OcZc/qafLx/iO9FgOmeSAp4v5ti/Q==} - semver@7.8.0: - resolution: {integrity: sha512-AcM7dV/5ul4EekoQ29Agm5vri8JNqRyj39o0qpX6vDF2GZrtutZl5RwgD1XnZjiTAfncsJhMI48QQH3sN87YNA==} + semver@7.8.2: + resolution: {integrity: sha512-c8jsqUZm3omBOI66G90z1Dyw5z622G8oLG+omfsHBJf3CWQTlOcwOjvOG6wtiNfW6anKm/eA39LMwMtMez2TiQ==} engines: {node: '>=10'} hasBin: true @@ -1591,13 +1591,13 @@ snapshots: '@img/sharp-win32-x64@0.34.5': optional: true - '@inkjs/ui@2.0.0(ink@7.0.5(@types/react@19.2.15)(react@19.2.6))': + '@inkjs/ui@2.0.0(ink@7.0.5(@types/react@19.2.17)(react@19.2.7))': dependencies: chalk: 5.6.2 cli-spinners: 3.4.0 deepmerge: 4.3.1 figures: 6.1.0 - ink: 7.0.5(@types/react@19.2.15)(react@19.2.6) + ink: 7.0.5(@types/react@19.2.17)(react@19.2.7) '@modelcontextprotocol/sdk@1.29.0(zod@4.4.3)': dependencies: @@ -1625,37 +1625,37 @@ snapshots: dependencies: state-local: 1.0.7 - '@monaco-editor/react@4.7.0(monaco-editor@0.55.1)(react-dom@19.2.6(react@19.2.6))(react@19.2.6)': + '@monaco-editor/react@4.7.0(monaco-editor@0.55.1)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)': dependencies: '@monaco-editor/loader': 1.7.0 monaco-editor: 0.55.1 - react: 19.2.6 - react-dom: 19.2.6(react@19.2.6) + react: 19.2.7 + react-dom: 19.2.7(react@19.2.7) - '@next/env@16.2.6': {} + '@next/env@16.2.7': {} - '@next/swc-darwin-arm64@16.2.6': + '@next/swc-darwin-arm64@16.2.7': optional: true - '@next/swc-darwin-x64@16.2.6': + '@next/swc-darwin-x64@16.2.7': optional: true - '@next/swc-linux-arm64-gnu@16.2.6': + '@next/swc-linux-arm64-gnu@16.2.7': optional: true - '@next/swc-linux-arm64-musl@16.2.6': + '@next/swc-linux-arm64-musl@16.2.7': optional: true - '@next/swc-linux-x64-gnu@16.2.6': + '@next/swc-linux-x64-gnu@16.2.7': optional: true - '@next/swc-linux-x64-musl@16.2.6': + '@next/swc-linux-x64-musl@16.2.7': optional: true - '@next/swc-win32-arm64-msvc@16.2.6': + '@next/swc-win32-arm64-msvc@16.2.7': optional: true - '@next/swc-win32-x64-msvc@16.2.6': + '@next/swc-win32-x64-msvc@16.2.7': optional: true '@swc/helpers@0.5.15': @@ -1671,15 +1671,15 @@ snapshots: '@types/body-parser@1.19.6': dependencies: '@types/connect': 3.4.38 - '@types/node': 25.9.1 + '@types/node': 25.9.2 '@types/connect@3.4.38': dependencies: - '@types/node': 25.9.1 + '@types/node': 25.9.2 '@types/express-serve-static-core@5.1.1': dependencies: - '@types/node': 25.9.1 + '@types/node': 25.9.2 '@types/qs': 6.15.0 '@types/range-parser': 1.2.7 '@types/send': 1.2.1 @@ -1692,7 +1692,7 @@ snapshots: '@types/http-errors@2.0.5': {} - '@types/node@25.9.1': + '@types/node@25.9.2': dependencies: undici-types: 7.24.6 @@ -1700,22 +1700,22 @@ snapshots: '@types/range-parser@1.2.7': {} - '@types/react-dom@19.2.3(@types/react@19.2.15)': + '@types/react-dom@19.2.3(@types/react@19.2.17)': dependencies: - '@types/react': 19.2.15 + '@types/react': 19.2.17 - '@types/react@19.2.15': + '@types/react@19.2.17': dependencies: csstype: 3.2.3 '@types/send@1.2.1': dependencies: - '@types/node': 25.9.1 + '@types/node': 25.9.2 '@types/serve-static@2.2.0': dependencies: '@types/http-errors': 2.0.5 - '@types/node': 25.9.1 + '@types/node': 25.9.2 '@types/trusted-types@2.0.7': optional: true @@ -1748,7 +1748,7 @@ snapshots: balanced-match@4.0.4: {} - baseline-browser-mapping@2.10.29: {} + baseline-browser-mapping@2.10.34: {} body-parser@2.2.2: dependencies: @@ -1780,7 +1780,7 @@ snapshots: call-bind-apply-helpers: 1.0.2 get-intrinsic: 1.3.0 - caniuse-lite@1.0.30001792: {} + caniuse-lite@1.0.30001797: {} chalk@5.6.2: {} @@ -1994,7 +1994,7 @@ snapshots: inherits@2.0.4: {} - ink@7.0.5(@types/react@19.2.15)(react@19.2.6): + ink@7.0.5(@types/react@19.2.17)(react@19.2.7): dependencies: '@alcalzone/ansi-tokenize': 0.3.0 ansi-escapes: 7.3.0 @@ -2009,8 +2009,8 @@ snapshots: indent-string: 5.0.0 is-in-ci: 2.0.0 patch-console: 2.0.0 - react: 19.2.6 - react-reconciler: 0.33.0(react@19.2.6) + react: 19.2.7 + react-reconciler: 0.33.0(react@19.2.7) scheduler: 0.27.0 signal-exit: 3.0.7 slice-ansi: 9.0.0 @@ -2023,7 +2023,7 @@ snapshots: ws: 8.21.0 yoga-layout: 3.2.1 optionalDependencies: - '@types/react': 19.2.15 + '@types/react': 19.2.17 transitivePeerDependencies: - bufferutil - utf-8-validate @@ -2085,25 +2085,25 @@ snapshots: negotiator@1.0.0: {} - next@16.2.6(react-dom@19.2.6(react@19.2.6))(react@19.2.6): + next@16.2.7(react-dom@19.2.7(react@19.2.7))(react@19.2.7): dependencies: - '@next/env': 16.2.6 + '@next/env': 16.2.7 '@swc/helpers': 0.5.15 - baseline-browser-mapping: 2.10.29 - caniuse-lite: 1.0.30001792 + baseline-browser-mapping: 2.10.34 + caniuse-lite: 1.0.30001797 postcss: 8.5.15 - react: 19.2.6 - react-dom: 19.2.6(react@19.2.6) - styled-jsx: 5.1.6(react@19.2.6) + react: 19.2.7 + react-dom: 19.2.7(react@19.2.7) + styled-jsx: 5.1.6(react@19.2.7) optionalDependencies: - '@next/swc-darwin-arm64': 16.2.6 - '@next/swc-darwin-x64': 16.2.6 - '@next/swc-linux-arm64-gnu': 16.2.6 - '@next/swc-linux-arm64-musl': 16.2.6 - '@next/swc-linux-x64-gnu': 16.2.6 - '@next/swc-linux-x64-musl': 16.2.6 - '@next/swc-win32-arm64-msvc': 16.2.6 - '@next/swc-win32-x64-msvc': 16.2.6 + '@next/swc-darwin-arm64': 16.2.7 + '@next/swc-darwin-x64': 16.2.7 + '@next/swc-linux-arm64-gnu': 16.2.7 + '@next/swc-linux-arm64-musl': 16.2.7 + '@next/swc-linux-x64-gnu': 16.2.7 + '@next/swc-linux-x64-musl': 16.2.7 + '@next/swc-win32-arm64-msvc': 16.2.7 + '@next/swc-win32-x64-msvc': 16.2.7 sharp: 0.34.5 transitivePeerDependencies: - '@babel/core' @@ -2169,17 +2169,17 @@ snapshots: iconv-lite: 0.7.2 unpipe: 1.0.0 - react-dom@19.2.6(react@19.2.6): + react-dom@19.2.7(react@19.2.7): dependencies: - react: 19.2.6 + react: 19.2.7 scheduler: 0.27.0 - react-reconciler@0.33.0(react@19.2.6): + react-reconciler@0.33.0(react@19.2.7): dependencies: - react: 19.2.6 + react: 19.2.7 scheduler: 0.27.0 - react@19.2.6: {} + react@19.2.7: {} readdirp@5.0.0: {} @@ -2204,7 +2204,7 @@ snapshots: scheduler@0.27.0: {} - semver@7.8.0: + semver@7.8.2: optional: true send@1.2.1: @@ -2238,7 +2238,7 @@ snapshots: dependencies: '@img/colour': 1.1.0 detect-libc: 2.1.2 - semver: 7.8.0 + semver: 7.8.2 optionalDependencies: '@img/sharp-darwin-arm64': 0.34.5 '@img/sharp-darwin-x64': 0.34.5 @@ -2326,10 +2326,10 @@ snapshots: dependencies: ansi-regex: 6.2.2 - styled-jsx@5.1.6(react@19.2.6): + styled-jsx@5.1.6(react@19.2.7): dependencies: client-only: 0.0.1 - react: 19.2.6 + react: 19.2.7 tagged-tag@1.0.0: {} From a5c1bf1e1c715aedac9e2783b37c6c40661883ed Mon Sep 17 00:00:00 2001 From: Nico Date: Sun, 7 Jun 2026 09:03:25 +0200 Subject: [PATCH 03/46] Feat/portable logic foundation (#389) * Add portable expression logic foundation * Format portable expression foundation changes From e5cbb349385e778f3d07dc6ad7acaff26999ba31 Mon Sep 17 00:00:00 2001 From: cukas Date: Sun, 7 Jun 2026 09:05:20 +0200 Subject: [PATCH 04/46] Fix guard typecheck findings --- packages/core/src/ir/semantics/expression-v1.ts | 13 +++++++++++-- packages/python/src/codegen-body-python.ts | 1 + 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/packages/core/src/ir/semantics/expression-v1.ts b/packages/core/src/ir/semantics/expression-v1.ts index 166439d1..699facd7 100644 --- a/packages/core/src/ir/semantics/expression-v1.ts +++ b/packages/core/src/ir/semantics/expression-v1.ts @@ -3,7 +3,7 @@ */ import { parseExpression } from '../../parser-expression.js'; -import { type IRNode, isExprObject } from '../../types.js'; +import type { IRNode } from '../../types.js'; import { type NodeContract, type NodeFixture, registerContract, type SemanticEnv } from './index.js'; import { evalPortableValue, isPortableBindingName } from './portable-scalar.js'; import type { Trace } from './trace.js'; @@ -17,9 +17,18 @@ function asExpressionV1Props(ir: IRNode): ExpressionV1Props { return (ir.props ?? {}) as ExpressionV1Props; } +function hasExpressionCode(expr: unknown): expr is { __expr: true; code: string } { + return ( + typeof expr === 'object' && + expr !== null && + (expr as { __expr?: unknown }).__expr === true && + typeof (expr as { code?: unknown }).code === 'string' + ); +} + function expressionSource(expr: unknown): string | undefined { if (expr === undefined || expr === null) return undefined; - if (isExprObject(expr)) return expr.code; + if (hasExpressionCode(expr)) return expr.code; return String(expr); } diff --git a/packages/python/src/codegen-body-python.ts b/packages/python/src/codegen-body-python.ts index 1875165c..4fb213fa 100644 --- a/packages/python/src/codegen-body-python.ts +++ b/packages/python/src/codegen-body-python.ts @@ -1877,6 +1877,7 @@ function emitPyExprCtx(node: ValueIR, ctx: BodyEmitContext): string { `Mid-expression \`${node.op}\` is rejected — bind the call to a \`let\` first, then use the bound name.`, ); } + throw new Error(`emitPyExpression: unsupported expression kind '${(node as { kind?: string }).kind ?? 'unknown'}'.`); } function emitPyTypeof(argument: ValueIR, ctx: BodyEmitContext): string { From 270d442e63c406dcbb988b87fa69b42b374cdf5a Mon Sep 17 00:00:00 2001 From: cukas Date: Sun, 7 Jun 2026 12:11:13 +0200 Subject: [PATCH 05/46] Add KERN core runtime foundation --- packages/core/src/core-runtime/index.ts | 747 +++++++++++++++++++++++ packages/core/src/index.ts | 25 + packages/core/tests/core-runtime.test.ts | 329 ++++++++++ 3 files changed, 1101 insertions(+) create mode 100644 packages/core/src/core-runtime/index.ts create mode 100644 packages/core/tests/core-runtime.test.ts diff --git a/packages/core/src/core-runtime/index.ts b/packages/core/src/core-runtime/index.ts new file mode 100644 index 00000000..3498569b --- /dev/null +++ b/packages/core/src/core-runtime/index.ts @@ -0,0 +1,747 @@ +import { parseExpression } from '../parser-expression.js'; +import { splitPortableExpressionList } from '../portable-expression-list.js'; +import type { IRNode } from '../types.js'; +import type { ValueIR } from '../value-ir.js'; + +const KERN_VALUE_BRAND: unique symbol = Symbol('KERN core runtime value'); +const INTEGER_INDEX_RE = /^(0|[1-9]\d*)$/; + +export type KernValue = + | { kind: 'null' } + | { kind: 'undefined' } + | { kind: 'boolean'; value: boolean } + | { kind: 'number'; value: number } + | { kind: 'string'; value: string } + | { kind: 'array'; items: KernValue[] } + | { kind: 'record'; entries: Record } + | KernFunctionValue + | KernBuiltinValue; + +export interface KernFunctionValue { + kind: 'function'; + name?: string; + params: RuntimeParam[]; + body: IRNode[]; + env: CoreRuntimeEnv; +} + +export interface KernBuiltinValue { + kind: 'builtin'; + name: string; + call: (args: KernValue[]) => KernValue; +} + +export interface RuntimeParam { + name: string; + type?: string; + defaultExpr?: string; +} + +export type CoreCompletion = { kind: 'normal'; value: KernValue } | { kind: 'return'; value: KernValue }; + +export interface CoreRuntimeResult { + completion: CoreCompletion; + env: CoreRuntimeEnv; +} + +export interface CreateCoreRuntimeEnvOptions { + globals?: Record; + parent?: CoreRuntimeEnv; +} + +export class CoreRuntimeEnv { + private readonly bindings = new Map(); + + constructor(readonly parent?: CoreRuntimeEnv) {} + + define(name: string, value: KernValue): KernValue { + if (this.bindings.has(name)) throw new Error(`KERN core runtime binding already defined: ${name}`); + this.bindings.set(name, value); + return value; + } + + lookup(name: string): KernValue { + if (this.bindings.has(name)) return this.bindings.get(name) ?? kUndefined(); + if (this.parent) return this.parent.lookup(name); + throw new Error(`KERN core runtime binding not found: ${name}`); + } + + has(name: string): boolean { + return this.bindings.has(name) || (this.parent?.has(name) ?? false); + } + + child(): CoreRuntimeEnv { + return new CoreRuntimeEnv(this); + } +} + +export const kNull = (): KernValue => brandValue({ kind: 'null' }); +export const kUndefined = (): KernValue => brandValue({ kind: 'undefined' }); +export const kBoolean = (value: boolean): KernValue => brandValue({ kind: 'boolean', value }); +export const kNumber = (value: number): KernValue => { + if (!Number.isFinite(value)) throw new Error('KERN core runtime number must be finite.'); + return brandValue({ kind: 'number', value }); +}; +export const kString = (value: string): KernValue => brandValue({ kind: 'string', value }); + +export function createCoreRuntimeEnv(options: CreateCoreRuntimeEnvOptions = {}): CoreRuntimeEnv { + const env = new CoreRuntimeEnv(options.parent); + for (const [name, value] of Object.entries(options.globals ?? {})) env.define(name, fromHostValue(value)); + installPortableBuiltins(env); + return env; +} + +function installPortableBuiltins(env: CoreRuntimeEnv): void { + for (const builtin of [ + { + kind: 'builtin' as const, + name: 'String', + call: (args: KernValue[]) => { + if (args.length !== 1) throw new Error('KERN core runtime String() expects exactly one argument.'); + return kString(kernStringCoerce(args[0])); + }, + }, + ]) { + if (!env.has(builtin.name)) env.define(builtin.name, brandValue(builtin)); + } +} + +export function fromHostValue(value: unknown): KernValue { + if (isKernValue(value)) return value; + if (value === null) return kNull(); + if (value === undefined) return kUndefined(); + if (typeof value === 'boolean') return kBoolean(value); + if (typeof value === 'number') return kNumber(value); + if (typeof value === 'string') return kString(value); + if (Array.isArray(value)) return brandValue({ kind: 'array', items: Array.from(value, fromHostValue) }); + if (isPlainRecord(value)) { + const entries = createRecordEntries(); + for (const [key, entry] of Object.entries(value)) entries[key] = fromHostValue(entry); + return brandValue({ + kind: 'record', + entries, + }); + } + throw new Error(`Unsupported host value for KERN core runtime: ${typeof value}`); +} + +export function toHostValue(value: KernValue | undefined): unknown { + if (value === undefined) return undefined; + switch (value.kind) { + case 'null': + return null; + case 'undefined': + return undefined; + case 'boolean': + case 'number': + case 'string': + return value.value; + case 'array': + return value.items.map(toHostValue); + case 'record': + return Object.fromEntries(Object.entries(value.entries).map(([key, entry]) => [key, toHostValue(entry)])); + case 'function': + case 'builtin': + return `[KERN ${value.kind}${value.name ? ` ${value.name}` : ''}]`; + } +} + +export function kernTruthy(value: KernValue): boolean { + switch (value.kind) { + case 'null': + case 'undefined': + return false; + case 'boolean': + return value.value; + case 'number': + return value.value !== 0; + case 'string': + return value.value.length > 0; + case 'array': + case 'record': + case 'function': + case 'builtin': + return true; + } +} + +export function evalCoreExpression(expr: string | ValueIR, env: CoreRuntimeEnv = createCoreRuntimeEnv()): KernValue { + installPortableBuiltins(env); + const valueIR = typeof expr === 'string' ? parseExpression(expr) : expr; + return evalValueIR(valueIR, env); +} + +export function runCoreRuntime( + nodeOrNodes: IRNode | readonly IRNode[], + env = createCoreRuntimeEnv(), +): CoreRuntimeResult { + const nodes: readonly IRNode[] = isIRNodeArray(nodeOrNodes) ? nodeOrNodes : runtimeChildren(nodeOrNodes); + return { completion: executeSequence(nodes, env), env }; +} + +export function callCoreFunction( + fnNode: IRNode, + args: KernValue[], + env = createCoreRuntimeEnv(), +): { value: KernValue; env: CoreRuntimeEnv } { + if (fnNode.type !== 'fn') throw new Error('KERN core runtime callCoreFunction expects an fn node.'); + const fn = makeFunction(fnNode, env); + return callFunctionValue(fn, args); +} + +function executeSequence(nodes: readonly IRNode[], env: CoreRuntimeEnv): CoreCompletion { + for (let i = 0; i < nodes.length; i += 1) { + const node = nodes[i]; + if (node.type === 'else') throw new Error('KERN core runtime `else` must immediately follow an `if`.'); + if (node.type === 'if') { + const completion = executeIf(node, nodes[i + 1], env); + if (nodes[i + 1]?.type === 'else') i += 1; + if (completion.kind !== 'normal') return completion; + continue; + } + const completion = executeNode(node, env); + if (completion.kind !== 'normal') return completion; + } + return { kind: 'normal', value: kUndefined() }; +} + +function executeNode(node: IRNode, env: CoreRuntimeEnv): CoreCompletion { + switch (node.type) { + case 'handler': + case '__block': + return executeSequence(node.children ?? [], env); + case 'let': + case 'expression-v1': { + const name = requiredString(node.props?.name, `${node.type} name=`); + const rawExpr = node.type === 'let' ? node.props?.value : node.props?.expr; + env.define(name, evalCoreExpression(unwrapExpr(rawExpr, `${node.type} expression`), env)); + return { kind: 'normal', value: kUndefined() }; + } + case 'return': { + if (node.props && Object.hasOwn(node.props, 'value')) { + return { kind: 'return', value: evalCoreExpression(unwrapExpr(node.props.value, 'return value='), env) }; + } + return { kind: 'return', value: kUndefined() }; + } + case 'fn': { + const fn = makeFunction(node, env); + env.define(requiredString(node.props?.name, 'fn name='), fn); + return { kind: 'normal', value: kUndefined() }; + } + case 'coalesce': + case 'firstDefined': + return executeCoalesce(node, env); + case 'firstTruthy': + return executeFirstTruthy(node, env); + default: + throw new Error(`KERN core runtime unsupported node type: ${node.type}`); + } +} + +function executeIf(node: IRNode, maybeElse: IRNode | undefined, env: CoreRuntimeEnv): CoreCompletion { + const cond = evalCoreExpression(unwrapExpr(node.props?.cond, 'if cond='), env); + if (kernTruthy(cond)) return executeSequence(node.children ?? [], env.child()); + if (maybeElse?.type === 'else') return executeSequence(maybeElse.children ?? [], env.child()); + return { kind: 'normal', value: kUndefined() }; +} + +function executeCoalesce(node: IRNode, env: CoreRuntimeEnv): CoreCompletion { + const name = requiredString(node.props?.name, `${node.type} name=`); + const values = splitPortableExpressionList( + requiredString(node.props?.values, `${node.type} values=`), + `${node.type} values=`, + ); + if (values.length < 2) throw new Error(`KERN core runtime ${node.type} requires at least two values.`); + let winner = kUndefined(); + for (const value of values) { + const candidate = evalCoreExpression(value, env); + if (!isNullish(candidate)) { + winner = candidate; + break; + } + } + env.define(name, winner); + return { kind: 'normal', value: kUndefined() }; +} + +function executeFirstTruthy(node: IRNode, env: CoreRuntimeEnv): CoreCompletion { + const name = requiredString(node.props?.name, 'firstTruthy name='); + const values = splitPortableExpressionList( + requiredString(node.props?.values, 'firstTruthy values='), + 'firstTruthy values=', + ); + if (values.length < 2) throw new Error('KERN core runtime firstTruthy requires at least two values.'); + let winner = kUndefined(); + for (const value of values) { + const candidate = evalCoreExpression(value, env); + if (kernTruthy(candidate)) { + winner = candidate; + break; + } + } + env.define(name, winner); + return { kind: 'normal', value: kUndefined() }; +} + +function evalValueIR(node: ValueIR, env: CoreRuntimeEnv): KernValue { + switch (node.kind) { + case 'numLit': + if (node.bigint) throw new Error('KERN core runtime bigint literals are not supported yet.'); + return kNumber(node.value); + case 'strLit': + return kString(node.value); + case 'boolLit': + return kBoolean(node.value); + case 'nullLit': + return kNull(); + case 'undefLit': + return kUndefined(); + case 'ident': + return env.lookup(node.name); + case 'tmplLit': + return kString( + node.quasis.reduce((out, quasi, index) => { + const expr = + index < node.expressions.length ? kernStringCoerce(evalValueIR(node.expressions[index], env)) : ''; + return out + quasi + expr; + }, ''), + ); + case 'arrayLit': + return brandValue({ kind: 'array', items: node.items.map((item) => evalValueIR(item, env)) }); + case 'objectLit': + return evalObjectLiteral(node, env); + case 'unary': + return evalUnary(node, env); + case 'binary': + return evalBinary(node, env); + case 'conditional': + return kernTruthy(evalValueIR(node.test, env)) + ? evalValueIR(node.consequent, env) + : evalValueIR(node.alternate, env); + case 'typeAssert': + case 'nonNull': + return evalValueIR(node.expression, env); + case 'member': + return evalMember(node, env); + case 'index': + return evalIndex(node, env); + case 'call': + return evalCall(node, env); + case 'lambda': + throw new Error('KERN core runtime lambda expressions are not supported in the first runtime slice.'); + default: + throw new Error(`KERN core runtime unsupported expression kind: ${node.kind}`); + } +} + +function evalObjectLiteral(node: Extract, env: CoreRuntimeEnv): KernValue { + const entries = createRecordEntries(); + for (const entry of node.entries) { + if (isObjectSpreadEntry(entry)) { + const spread = evalValueIR(entry.argument, env); + if (spread.kind !== 'record') throw new Error('KERN core runtime object spread requires a record.'); + for (const [key, value] of Object.entries(spread.entries)) entries[key] = value; + } else { + entries[entry.key] = evalValueIR(entry.value, env); + } + } + return brandValue({ kind: 'record', entries }); +} + +function evalUnary(node: Extract, env: CoreRuntimeEnv): KernValue { + const arg = evalValueIR(node.argument, env); + if (node.op === '!') return kBoolean(!kernTruthy(arg)); + if (node.op === '-' || node.op === '+') { + if (arg.kind !== 'number') throw new Error(`KERN core runtime unary ${node.op} requires a number.`); + return kNumber(node.op === '-' ? -arg.value : arg.value); + } + throw new Error(`KERN core runtime unsupported unary operator: ${node.op}`); +} + +function evalBinary(node: Extract, env: CoreRuntimeEnv): KernValue { + if (node.op === '&&') { + const left = evalValueIR(node.left, env); + return kernTruthy(left) ? evalValueIR(node.right, env) : left; + } + if (node.op === '||') { + const left = evalValueIR(node.left, env); + return kernTruthy(left) ? left : evalValueIR(node.right, env); + } + if (node.op === '??') { + const left = evalValueIR(node.left, env); + return isNullish(left) ? evalValueIR(node.right, env) : left; + } + + const left = evalValueIR(node.left, env); + const right = evalValueIR(node.right, env); + switch (node.op) { + case '+': + if (left.kind === 'number' && right.kind === 'number') return kNumber(left.value + right.value); + if (left.kind === 'string' && right.kind === 'string') return kString(left.value + right.value); + throw new Error('KERN core runtime + requires two numbers or two strings.'); + case '-': + case '*': + case '/': + case '%': + return evalNumberBinary(node.op, left, right); + case '===': + case '==': + return kBoolean(kernEquals(left, right)); + case '!==': + case '!=': + return kBoolean(!kernEquals(left, right)); + case '<': + case '<=': + case '>': + case '>=': + return evalOrderedComparison(node.op, left, right); + default: + throw new Error(`KERN core runtime unsupported binary operator: ${node.op}`); + } +} + +function evalNumberBinary(op: string, left: KernValue, right: KernValue): KernValue { + if (left.kind !== 'number' || right.kind !== 'number') { + throw new Error(`KERN core runtime ${op} requires two numbers.`); + } + if (op === '-') return kNumber(left.value - right.value); + if (op === '*') return kNumber(left.value * right.value); + if (right.value === 0 && (op === '/' || op === '%')) throw new Error(`KERN core runtime ${op} division by zero.`); + if (op === '/') return kNumber(left.value / right.value); + return kNumber(left.value % right.value); +} + +function evalOrderedComparison(op: string, left: KernValue, right: KernValue): KernValue { + if (!((left.kind === 'number' && right.kind === 'number') || (left.kind === 'string' && right.kind === 'string'))) { + throw new Error(`KERN core runtime ${op} requires same-kind number or string operands.`); + } + if (op === '<') return kBoolean(left.value < right.value); + if (op === '<=') return kBoolean(left.value <= right.value); + if (op === '>') return kBoolean(left.value > right.value); + return kBoolean(left.value >= right.value); +} + +function evalMember(node: Extract, env: CoreRuntimeEnv): KernValue { + const object = evalValueIR(node.object, env); + if (isNullish(object)) { + if (node.optional) return kUndefined(); + throw new Error(`KERN core runtime cannot read .${node.property} from ${object.kind}.`); + } + if (object.kind === 'record') { + return Object.hasOwn(object.entries, node.property) ? object.entries[node.property] : kUndefined(); + } + if (object.kind === 'array' && node.property === 'length') return kNumber(object.items.length); + if (object.kind === 'string' && node.property === 'length') return kNumber(object.value.length); + return kUndefined(); +} + +function evalIndex(node: Extract, env: CoreRuntimeEnv): KernValue { + const object = evalValueIR(node.object, env); + if (isNullish(object)) { + if (node.optional) return kUndefined(); + throw new Error(`KERN core runtime cannot index ${object.kind}.`); + } + const index = evalValueIR(node.index, env); + if (object.kind === 'array') { + if (index.kind !== 'number') throw new Error('KERN core runtime array index must be a number.'); + return object.items[index.value] ?? kUndefined(); + } + if (object.kind === 'record' || object.kind === 'string') { + if (index.kind !== 'string' && index.kind !== 'number') { + throw new Error('KERN core runtime record/string index must be a string or number.'); + } + const key = String(index.value); + if (object.kind === 'record') return Object.hasOwn(object.entries, key) ? object.entries[key] : kUndefined(); + const charIndex = + index.kind === 'number' ? index.value : INTEGER_INDEX_RE.test(index.value) ? Number(index.value) : NaN; + return Number.isInteger(charIndex) && charIndex >= 0 && charIndex < object.value.length + ? kString(object.value[charIndex] ?? '') + : kUndefined(); + } + return kUndefined(); +} + +function evalCall(node: Extract, env: CoreRuntimeEnv): KernValue { + const callee = evalValueIR(node.callee, env); + if (isNullish(callee)) { + if (node.optional) return kUndefined(); + throw new Error(`KERN core runtime cannot call ${callee.kind}.`); + } + const args = node.args.map((arg) => evalValueIR(arg, env)); + if (callee.kind === 'builtin') return callee.call(args); + if (callee.kind === 'function') return callFunctionValue(callee, args).value; + throw new Error(`KERN core runtime cannot call ${callee.kind}.`); +} + +function makeFunction(node: IRNode, env: CoreRuntimeEnv): KernFunctionValue { + return brandValue({ + kind: 'function', + name: requiredString(node.props?.name, 'fn name='), + params: runtimeParams(node), + body: runtimeFunctionBody(node), + env, + }); +} + +function callFunctionValue( + fn: KernFunctionValue, + args: readonly KernValue[], +): { value: KernValue; env: CoreRuntimeEnv } { + const callEnv = fn.env.child(); + fn.params.forEach((param, index) => { + const provided = args[index]; + const value = + provided === undefined || (provided.kind === 'undefined' && param.defaultExpr) + ? param.defaultExpr + ? evalCoreExpression(param.defaultExpr, callEnv) + : kUndefined() + : provided; + callEnv.define(param.name, value); + }); + const completion = executeSequence(fn.body, callEnv); + return { value: completion.value, env: callEnv }; +} + +function runtimeFunctionBody(node: IRNode): IRNode[] { + const handler = node.children?.find((child) => child.type === 'handler'); + const body = handler ? (handler.children ?? []) : (node.children ?? []); + return body.filter((child) => child.type !== 'param' && child.type !== 'decorator'); +} + +function runtimeChildren(node: IRNode): IRNode[] { + if (node.type === 'handler' || node.type === '__block') return node.children ?? []; + return [node]; +} + +function runtimeParams(node: IRNode): RuntimeParam[] { + const childParams = + node.children + ?.filter((child) => child.type === 'param') + .map((child) => ({ + name: requiredString(child.props?.name, 'param name='), + type: typeof child.props?.type === 'string' ? child.props.type : undefined, + defaultExpr: runtimeParamDefaultExpr(child), + })) ?? []; + if (childParams.length > 0) return childParams; + + const raw = typeof node.props?.params === 'string' ? node.props.params : ''; + if (!raw.trim()) return []; + return splitPortableExpressionList(raw, 'fn params=').map((part) => { + const defaultIndex = findRuntimeDefaultSeparator(part); + const beforeDefault = defaultIndex >= 0 ? part.slice(0, defaultIndex) : part; + const defaultExpr = defaultIndex >= 0 ? part.slice(defaultIndex + 1).trim() : undefined; + const typeIndex = beforeDefault.indexOf(':'); + const name = typeIndex >= 0 ? beforeDefault.slice(0, typeIndex) : beforeDefault; + const type = typeIndex >= 0 ? beforeDefault.slice(typeIndex + 1) : ''; + return { + name: requiredString(name.trim(), 'param name='), + type: type.trim() || undefined, + defaultExpr: defaultExpr || undefined, + }; + }); +} + +function runtimeParamDefaultExpr(node: IRNode): string | undefined { + const propName = Object.hasOwn(node.props ?? {}, 'value') ? 'value' : 'default'; + const rawValue = propName === 'value' ? node.props?.value : node.props?.default; + if (rawValue === undefined || rawValue === null) return undefined; + if (typeof rawValue === 'string' && (node.__quotedProps ?? []).includes(propName)) return JSON.stringify(rawValue); + return unwrapExpr(rawValue, 'param value='); +} + +function unwrapExpr(value: unknown, label: string): string { + if (typeof value === 'string') return value; + if (isExprObject(value)) return value.code; + if (value === undefined || value === null) throw new Error(`KERN core runtime missing ${label}.`); + if (typeof value === 'number' || typeof value === 'boolean') return String(value); + throw new Error(`KERN core runtime ${label} must be a string expression.`); +} + +function requiredString(value: unknown, label: string): string { + if (typeof value !== 'string' || value.length === 0) throw new Error(`KERN core runtime requires ${label}.`); + return value; +} + +function kernStringCoerce(value: KernValue): string { + if (value.kind === 'null') return 'null'; + if (value.kind === 'undefined') return 'undefined'; + if (value.kind === 'boolean') return value.value ? 'true' : 'false'; + if (value.kind === 'number') return String(value.value); + if (value.kind === 'string') return value.value; + return String(toHostValue(value)); +} + +function kernEquals(left: KernValue, right: KernValue): boolean { + if (left.kind !== right.kind) return false; + switch (left.kind) { + case 'null': + case 'undefined': + return true; + case 'boolean': + return left.value === (right as Extract).value; + case 'number': + return left.value === (right as Extract).value; + case 'string': + return left.value === (right as Extract).value; + case 'array': { + const rightArray = right as Extract; + return ( + left.items.length === rightArray.items.length && + left.items.every((item, i) => kernEquals(item, rightArray.items[i])) + ); + } + case 'record': { + const rightRecord = right as Extract; + const leftKeys = Object.keys(left.entries); + const rightKeys = Object.keys(rightRecord.entries); + return ( + leftKeys.length === rightKeys.length && + leftKeys.every( + (key) => Object.hasOwn(rightRecord.entries, key) && kernEquals(left.entries[key], rightRecord.entries[key]), + ) + ); + } + case 'function': + case 'builtin': + return left === right; + } +} + +function isNullish(value: KernValue): boolean { + return value.kind === 'null' || value.kind === 'undefined'; +} + +function isKernValue(value: unknown): value is KernValue { + if ( + !isPlainRecord(value) || + (value as { [KERN_VALUE_BRAND]?: true })[KERN_VALUE_BRAND] !== true || + typeof value.kind !== 'string' + ) { + return false; + } + switch (value.kind) { + case 'null': + case 'undefined': + return hasOnlyKeys(value, ['kind']); + case 'boolean': + return hasOnlyKeys(value, ['kind', 'value']) && typeof value.value === 'boolean'; + case 'number': + return hasOnlyKeys(value, ['kind', 'value']) && typeof value.value === 'number' && Number.isFinite(value.value); + case 'string': + return hasOnlyKeys(value, ['kind', 'value']) && typeof value.value === 'string'; + case 'array': + return ( + hasOnlyKeys(value, ['kind', 'items']) && + Array.isArray(value.items) && + !hasArrayHoles(value.items) && + value.items.every(isKernValue) + ); + case 'record': + return ( + hasOnlyKeys(value, ['kind', 'entries']) && + isPlainRecord(value.entries) && + Object.values(value.entries).every(isKernValue) + ); + case 'function': + return ( + hasOnlyKeys(value, ['kind', 'params', 'body', 'env'], ['name']) && + (value.name === undefined || typeof value.name === 'string') && + Array.isArray(value.params) && + Array.isArray(value.body) && + value.env instanceof CoreRuntimeEnv + ); + case 'builtin': + return ( + hasOnlyKeys(value, ['kind', 'name', 'call']) && + typeof value.name === 'string' && + typeof value.call === 'function' + ); + default: + return false; + } +} + +function brandValue(value: T): T { + Object.defineProperty(value, KERN_VALUE_BRAND, { value: true }); + return value; +} + +function hasArrayHoles(value: readonly unknown[]): boolean { + for (let i = 0; i < value.length; i += 1) { + if (!Object.hasOwn(value, i)) return true; + } + return false; +} + +function hasOnlyKeys( + value: Record, + required: readonly string[], + optional: readonly string[] = [], +): boolean { + const allowed = new Set([...required, ...optional]); + const keys = Object.keys(value); + return required.every((key) => Object.hasOwn(value, key)) && keys.every((key) => allowed.has(key)); +} + +function createRecordEntries(): Record { + return Object.create(null) as Record; +} + +function isPlainRecord(value: unknown): value is Record { + if (typeof value !== 'object' || value === null) return false; + const proto = Object.getPrototypeOf(value); + return proto === Object.prototype || proto === null; +} + +function findRuntimeDefaultSeparator(value: string): number { + let depth = 0; + let quote: '"' | "'" | '`' | '' = ''; + let escaped = false; + for (let i = 0; i < value.length; i += 1) { + const ch = value[i]; + if (quote) { + if (escaped) { + escaped = false; + } else if (ch === '\\') { + escaped = true; + } else if (ch === quote) { + quote = ''; + } + continue; + } + if (ch === '"' || ch === "'" || ch === '`') { + quote = ch; + continue; + } + if (ch === '<' || ch === '(' || ch === '{' || ch === '[') depth += 1; + else if ((ch === '>' || ch === ')' || ch === '}' || ch === ']') && depth > 0) depth -= 1; + else if (ch === '=' && depth === 0) { + if ( + value[i + 1] === '>' || + value[i + 1] === '=' || + value[i - 1] === '=' || + value[i - 1] === '<' || + value[i - 1] === '>' || + value[i - 1] === '!' + ) { + continue; + } + return i; + } + } + return -1; +} + +function isExprObject(value: unknown): value is { __expr: true; code: string } { + return isPlainRecord(value) && value.__expr === true && typeof value.code === 'string'; +} + +function isIRNodeArray(value: IRNode | readonly IRNode[]): value is readonly IRNode[] { + return Array.isArray(value); +} + +function isObjectSpreadEntry( + entry: Extract['entries'][number], +): entry is { kind: 'spread'; argument: ValueIR } { + return 'kind' in entry && entry.kind === 'spread'; +} diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index d6b7b160..6cf5b14c 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -140,6 +140,31 @@ export { VALID_STRUCTURES, VALID_TARGETS, } from './config.js'; +export type { + CoreCompletion, + CoreRuntimeResult, + CreateCoreRuntimeEnvOptions, + KernBuiltinValue, + KernFunctionValue, + KernValue, + RuntimeParam, +} from './core-runtime/index.js'; +// KERN Core Runtime +export { + CoreRuntimeEnv, + callCoreFunction, + createCoreRuntimeEnv, + evalCoreExpression, + fromHostValue, + kBoolean, + kernTruthy, + kNull, + kNumber, + kString, + kUndefined, + runCoreRuntime, + toHostValue, +} from './core-runtime/index.js'; export type { CoverageGap } from './coverage-gap.js'; // Coverage gap emitter (v3) export { collectCoverageGaps, readCoverageGaps, writeCoverageGaps } from './coverage-gap.js'; diff --git a/packages/core/tests/core-runtime.test.ts b/packages/core/tests/core-runtime.test.ts new file mode 100644 index 00000000..6ded098a --- /dev/null +++ b/packages/core/tests/core-runtime.test.ts @@ -0,0 +1,329 @@ +import { + CoreRuntimeEnv, + callCoreFunction, + createCoreRuntimeEnv, + evalCoreExpression, + fromHostValue, + kBoolean, + kernTruthy, + kNull, + kNumber, + kString, + kUndefined, + runCoreRuntime, + toHostValue, +} from '../src/index.js'; +import type { IRNode } from '../src/types.js'; + +function handler(children: IRNode[]): IRNode { + return { type: 'handler', props: { lang: 'kern' }, children }; +} + +describe('KERN core runtime values and expressions', () => { + test('truthiness is owned by KERN values', () => { + expect(kernTruthy(kNull())).toBe(false); + expect(kernTruthy(kUndefined())).toBe(false); + expect(kernTruthy(kBoolean(false))).toBe(false); + expect(kernTruthy(kNumber(0))).toBe(false); + expect(kernTruthy(kString(''))).toBe(false); + expect(kernTruthy(kString('x'))).toBe(true); + }); + + test('String(value) uses KERN coercion, not host spelling', () => { + const env = createCoreRuntimeEnv({ + globals: { + n: 12, + none: null, + yes: true, + no: false, + }, + }); + expect(toHostValue(evalCoreExpression('String(n)', env))).toBe('12'); + expect(toHostValue(evalCoreExpression('String(none)', env))).toBe('null'); + expect(toHostValue(evalCoreExpression('String(yes)', env))).toBe('true'); + expect(toHostValue(evalCoreExpression('String(no)', env))).toBe('false'); + }); + + test('null and undefined are distinct but both nullish', () => { + expect(toHostValue(kNull())).toBeNull(); + expect(toHostValue(kUndefined())).toBeUndefined(); + const env = createCoreRuntimeEnv({ globals: { a: undefined, b: null, c: 5 } }); + expect(toHostValue(evalCoreExpression('a ?? c', env))).toBe(5); + expect(toHostValue(evalCoreExpression('b ?? c', env))).toBe(5); + }); + + test('plain host records with kind fields are not mistaken for KERN values', () => { + const value = fromHostValue({ kind: 'trap', label: 'Trap' }); + expect(toHostValue(value)).toEqual({ kind: 'trap', label: 'Trap' }); + expect(toHostValue(fromHostValue({ kind: 'null', label: 'Trap' }))).toEqual({ kind: 'null', label: 'Trap' }); + expect(toHostValue(fromHostValue({ kind: 'string', value: 'x', label: 'Trap' }))).toEqual({ + kind: 'string', + label: 'Trap', + value: 'x', + }); + expect(toHostValue(fromHostValue({ kind: 'string', value: 'door' }))).toEqual({ kind: 'string', value: 'door' }); + }); + + test('record maps use own properties only', () => { + const value = fromHostValue({ a: 1 }); + if (value.kind !== 'record') throw new Error('expected record value'); + expect(Object.getPrototypeOf(value.entries)).toBeNull(); + const env = createCoreRuntimeEnv({ globals: { record: value } }); + expect(toHostValue(evalCoreExpression('record.a', env))).toBe(1); + expect(toHostValue(evalCoreExpression('record.toString', env))).toBeUndefined(); + }); + + test('sparse host arrays become dense KERN arrays with undefined entries', () => { + const host = [] as unknown[]; + host[1] = 'set'; + expect(toHostValue(fromHostValue(host))).toEqual([undefined, 'set']); + }); + + test('caller-created envs still get portable builtins for expression evaluation', () => { + const env = new CoreRuntimeEnv(); + env.define('flag', kBoolean(false)); + expect(toHostValue(evalCoreExpression('String(flag)', env))).toBe('false'); + }); + + test('structural equality preserves undefined/null distinctions in arrays and records', () => { + const env = createCoreRuntimeEnv({ + globals: { + xs: [undefined], + ys: [null], + a: { value: undefined }, + b: {}, + }, + }); + expect(toHostValue(evalCoreExpression('xs === ys', env))).toBe(false); + expect(toHostValue(evalCoreExpression('a === b', env))).toBe(false); + }); + + test('string index misses return KERN undefined', () => { + const env = createCoreRuntimeEnv({ globals: { label: 'ab' } }); + expect(toHostValue(evalCoreExpression('label[1]', env))).toBe('b'); + expect(toHostValue(evalCoreExpression('label["1"]', env))).toBe('b'); + expect(toHostValue(evalCoreExpression('label[4]', env))).toBeUndefined(); + expect(toHostValue(evalCoreExpression('label[""]', env))).toBeUndefined(); + expect(toHostValue(evalCoreExpression('label["1.0"]', env))).toBeUndefined(); + }); + + test('optional index skips unresolved index expressions for nullish objects', () => { + const env = createCoreRuntimeEnv({ globals: { maybe: null } }); + expect(toHostValue(evalCoreExpression('maybe?.[missingName]', env))).toBeUndefined(); + }); + + test('optional calls skip unresolved argument expressions for nullish callees', () => { + const env = createCoreRuntimeEnv({ globals: { maybeFn: null } }); + expect(toHostValue(evalCoreExpression('maybeFn?.(missingName)', env))).toBeUndefined(); + }); + + test('division by zero fails with a KERN runtime diagnostic', () => { + const env = createCoreRuntimeEnv(); + expect(() => evalCoreExpression('4 / 0', env)).toThrow(/division by zero/); + expect(() => evalCoreExpression('4 % 0', env)).toThrow(/division by zero/); + }); +}); + +describe('KERN core runtime statements', () => { + test('runs let, expression-v1, and return', () => { + const result = runCoreRuntime( + handler([ + { type: 'let', props: { name: 'count', value: '41' } }, + { type: 'expression-v1', props: { name: 'label', expr: '`n=${count + 1}`' } }, + { type: 'return', props: { value: 'label' } }, + ]), + ); + expect(result.completion.kind).toBe('return'); + expect(toHostValue(result.completion.value)).toBe('n=42'); + }); + + test('if/else executes only the selected branch and block-local lets do not leak', () => { + const result = runCoreRuntime( + handler([ + { type: 'let', props: { name: 'x', value: '1' } }, + { type: 'if', props: { cond: 'false' }, children: [{ type: 'let', props: { name: 'x', value: '2' } }] }, + { type: 'else', children: [{ type: 'let', props: { name: 'y', value: '3' } }] }, + { type: 'return', props: { value: 'x' } }, + ]), + ); + expect(toHostValue(result.completion.value)).toBe(1); + expect(() => result.env.lookup('y')).toThrow(/not found/); + }); + + test('coalesce and firstDefined preserve falsy defined values', () => { + const result = runCoreRuntime( + handler([ + { type: 'let', props: { name: 'missing', value: 'undefined' } }, + { type: 'let', props: { name: 'zero', value: '0' } }, + { type: 'let', props: { name: 'flag', value: 'false' } }, + { type: 'let', props: { name: 'empty', value: '""' } }, + { type: 'coalesce', props: { name: 'a', values: "missing, zero, 'fallback'" } }, + { type: 'firstDefined', props: { name: 'b', values: "missing, flag, 'fallback'" } }, + { type: 'coalesce', props: { name: 'c', values: "missing, empty, 'fallback'" } }, + { type: 'return', props: { value: '{ a: a, b: b, c: c }' } }, + ]), + ); + expect(toHostValue(result.completion.value)).toEqual({ a: 0, b: false, c: '' }); + }); + + test('coalesce and firstTruthy short-circuit later expressions', () => { + const result = runCoreRuntime( + handler([ + { type: 'let', props: { name: 'present', value: '"ok"' } }, + { type: 'coalesce', props: { name: 'a', values: 'present, missingName' } }, + { type: 'firstTruthy', props: { name: 'b', values: 'present, alsoMissing' } }, + { type: 'return', props: { value: '{ a: a, b: b }' } }, + ]), + ); + expect(toHostValue(result.completion.value)).toEqual({ a: 'ok', b: 'ok' }); + }); +}); + +describe('KERN core runtime functions', () => { + test('nested fn captures the lexical environment and returns through its own frame', () => { + const result = runCoreRuntime( + handler([ + { type: 'let', props: { name: 'base', value: '10' } }, + { + type: 'fn', + props: { name: 'addBase', params: 'amount:number', returns: 'number' }, + children: [ + { + type: 'handler', + props: { lang: 'kern' }, + children: [{ type: 'return', props: { value: 'amount + base' } }], + }, + ], + }, + { type: 'let', props: { name: 'total', value: 'addBase(5)' } }, + { type: 'return', props: { value: 'total' } }, + ]), + ); + expect(toHostValue(result.completion.value)).toBe(15); + }); + + test('function params shadow outer bindings without mutating them', () => { + const result = runCoreRuntime( + handler([ + { type: 'let', props: { name: 'x', value: '1' } }, + { + type: 'fn', + props: { name: 'echo', params: 'x:number', returns: 'number' }, + children: [ + { + type: 'handler', + props: { lang: 'kern' }, + children: [{ type: 'return', props: { value: 'x' } }], + }, + ], + }, + { type: 'let', props: { name: 'inner', value: 'echo(7)' } }, + { type: 'return', props: { value: '{ outer: x, inner: inner }' } }, + ]), + ); + expect(toHostValue(result.completion.value)).toEqual({ outer: 1, inner: 7 }); + }); + + test('function parameter defaults evaluate in the call frame', () => { + const result = runCoreRuntime( + handler([ + { type: 'let', props: { name: 'base', value: '5' } }, + { + type: 'fn', + props: { name: 'fill', params: 'x:number=base + 2,y:number=x + 3', returns: 'number' }, + children: [ + { + type: 'handler', + props: { lang: 'kern' }, + children: [{ type: 'return', props: { value: 'y' } }], + }, + ], + }, + { type: 'return', props: { value: 'fill()' } }, + ]), + ); + expect(toHostValue(result.completion.value)).toBe(10); + }); + + test('explicit KERN undefined triggers function parameter defaults', () => { + const fnNode: IRNode = { + type: 'fn', + props: { name: 'fallback', params: 'value:number=3', returns: 'number' }, + children: [ + { + type: 'handler', + props: { lang: 'kern' }, + children: [{ type: 'return', props: { value: 'value' } }], + }, + ], + }; + const result = callCoreFunction(fnNode, [kUndefined()]); + expect(toHostValue(result.value)).toBe(3); + }); + + test('legacy parameter parsing preserves colons inside type text', () => { + const fnNode: IRNode = { + type: 'fn', + props: { name: 'readA', params: 'obj:{a:number,b:string}={ a: 1, b: "x" }', returns: 'number' }, + children: [ + { + type: 'handler', + props: { lang: 'kern' }, + children: [{ type: 'return', props: { value: 'obj.a' } }], + }, + ], + }; + const result = callCoreFunction(fnNode, []); + expect(toHostValue(result.value)).toBe(1); + }); + + test('structured param child defaults are supported', () => { + const fnNode: IRNode = { + type: 'fn', + props: { name: 'greet', returns: 'string' }, + children: [ + { type: 'param', props: { name: 'name', type: 'string', value: 'world' }, __quotedProps: ['value'] }, + { + type: 'handler', + props: { lang: 'kern' }, + children: [{ type: 'return', props: { value: '`hi ${name}`' } }], + }, + ], + }; + const result = callCoreFunction(fnNode, []); + expect(toHostValue(result.value)).toBe('hi world'); + }); + + test('structured default prop quoting is supported', () => { + const fnNode: IRNode = { + type: 'fn', + props: { name: 'greet', returns: 'string' }, + children: [ + { type: 'param', props: { name: 'name', type: 'string', default: 'world' }, __quotedProps: ['default'] }, + { + type: 'handler', + props: { lang: 'kern' }, + children: [{ type: 'return', props: { value: '`hi ${name}`' } }], + }, + ], + }; + const result = callCoreFunction(fnNode, []); + expect(toHostValue(result.value)).toBe('hi world'); + }); + + test('callCoreFunction executes a top-level fn with host args', () => { + const fnNode: IRNode = { + type: 'fn', + props: { name: 'label', params: 'value:number', returns: 'string' }, + children: [ + { + type: 'handler', + props: { lang: 'kern' }, + children: [{ type: 'return', props: { value: '`v=${value}`' } }], + }, + ], + }; + const result = callCoreFunction(fnNode, [fromHostValue(9)]); + expect(toHostValue(result.value)).toBe('v=9'); + }); +}); From 4a8107aa0e8534cad53f06fdf008315a13e3541d Mon Sep 17 00:00:00 2001 From: cukas Date: Sun, 7 Jun 2026 18:30:04 +0200 Subject: [PATCH 06/46] feat(core): add kern object runtime foundation --- packages/core/package.json | 4 + packages/core/src/core-contracts/boolean.ts | 111 +++ packages/core/src/core-contracts/function.ts | 8 + packages/core/src/core-contracts/index.ts | 57 ++ packages/core/src/core-contracts/list.ts | 54 ++ packages/core/src/core-contracts/nullish.ts | 15 + packages/core/src/core-contracts/number.ts | 215 ++++++ packages/core/src/core-contracts/record.ts | 32 + packages/core/src/core-contracts/schema.ts | 191 +++++ packages/core/src/core-contracts/semantics.ts | 251 +++++++ packages/core/src/core-contracts/string.ts | 337 +++++++++ .../core/src/core-runtime/contract-adapter.ts | 93 +++ packages/core/src/core-runtime/index.ts | 659 +++++++++++++++++- packages/core/src/core-runtime/value-brand.ts | 6 + packages/core/src/index.ts | 40 ++ packages/core/tests/core-contracts.test.ts | 341 +++++++++ packages/core/tests/core-runtime.test.ts | 306 ++++++++ 17 files changed, 2691 insertions(+), 29 deletions(-) create mode 100644 packages/core/src/core-contracts/boolean.ts create mode 100644 packages/core/src/core-contracts/function.ts create mode 100644 packages/core/src/core-contracts/index.ts create mode 100644 packages/core/src/core-contracts/list.ts create mode 100644 packages/core/src/core-contracts/nullish.ts create mode 100644 packages/core/src/core-contracts/number.ts create mode 100644 packages/core/src/core-contracts/record.ts create mode 100644 packages/core/src/core-contracts/schema.ts create mode 100644 packages/core/src/core-contracts/semantics.ts create mode 100644 packages/core/src/core-contracts/string.ts create mode 100644 packages/core/src/core-runtime/contract-adapter.ts create mode 100644 packages/core/src/core-runtime/value-brand.ts create mode 100644 packages/core/tests/core-contracts.test.ts diff --git a/packages/core/package.json b/packages/core/package.json index 9d166f08..ea8eec4f 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -30,6 +30,10 @@ "types": "./dist/config.d.ts", "default": "./dist/config.js" }, + "./core-contracts": { + "types": "./dist/core-contracts/index.d.ts", + "default": "./dist/core-contracts/index.js" + }, "./parser": { "types": "./dist/parser.d.ts", "default": "./dist/parser.js" diff --git a/packages/core/src/core-contracts/boolean.ts b/packages/core/src/core-contracts/boolean.ts new file mode 100644 index 00000000..25c8b8ff --- /dev/null +++ b/packages/core/src/core-contracts/boolean.ts @@ -0,0 +1,111 @@ +import type { CoreTypeContract } from './schema.js'; + +export const BOOLEAN_CONTRACT = { + name: 'Boolean', + kind: 'primitive', + strict: true, + operations: [ + { + id: 'Boolean.not', + kind: 'method', + args: ['Boolean'], + returns: 'Boolean', + lowers: { + kern: 'Boolean.not($0)', + ts: '__kernBooleanNot($0)', + python: '__kern_boolean_not($0)', + }, + fixtures: [ + { args: [true], returns: false }, + { args: [false], returns: true }, + ], + review: { + summary: 'Strict boolean negation.', + graph: ['Boolean', 'portable'], + }, + }, + { + id: 'Boolean.and', + kind: 'method', + args: ['Boolean', 'Boolean'], + returns: 'Boolean', + lowers: { + kern: 'Boolean.and($0, $1)', + ts: '__kernBooleanAnd($0, $1)', + python: '__kern_boolean_and($0, $1)', + }, + fixtures: [ + { args: [true, true], returns: true }, + { args: [true, false], returns: false }, + { args: [false, true], returns: false }, + { args: [true, 'true'], throws: { code: 'strict-type', message: 'Boolean.and expects Boolean, Boolean.' } }, + { args: [true, 1], throws: { code: 'strict-type', message: 'Boolean.and expects Boolean, Boolean.' } }, + ], + review: { + summary: 'Strict boolean conjunction; both operands must be Boolean.', + graph: ['Boolean', 'strict', 'portable'], + }, + }, + { + id: 'Boolean.or', + kind: 'method', + args: ['Boolean', 'Boolean'], + returns: 'Boolean', + lowers: { + kern: 'Boolean.or($0, $1)', + ts: '__kernBooleanOr($0, $1)', + python: '__kern_boolean_or($0, $1)', + }, + fixtures: [ + { args: [false, false], returns: false }, + { args: [true, false], returns: true }, + { args: [false, true], returns: true }, + { args: [false, 'false'], throws: { code: 'strict-type', message: 'Boolean.or expects Boolean, Boolean.' } }, + ], + review: { + summary: 'Strict boolean disjunction; both operands must be Boolean.', + graph: ['Boolean', 'strict', 'portable'], + }, + }, + { + id: 'Boolean.equals', + kind: 'method', + args: ['Boolean', 'Boolean'], + returns: 'Boolean', + lowers: { + kern: 'Boolean.equals($0, $1)', + ts: '__kernBooleanEquals($0, $1)', + python: '__kern_boolean_equals($0, $1)', + }, + fixtures: [ + { args: [true, true], returns: true }, + { args: [true, false], returns: false }, + { args: [true, 'true'], throws: { code: 'strict-type', message: 'Boolean.equals expects Boolean, Boolean.' } }, + { args: [true, 1], throws: { code: 'strict-type', message: 'Boolean.equals expects Boolean, Boolean.' } }, + ], + review: { + summary: 'Strict boolean equality; cross-type equality is a type error in schema v1.', + graph: ['Boolean', 'strict', 'portable'], + }, + }, + { + id: 'Boolean.toString', + kind: 'coercion', + args: ['Boolean'], + returns: 'String', + lowers: { + kern: 'Boolean.toString($0)', + ts: '__kernBooleanToString($0)', + python: '__kern_boolean_to_string($0)', + }, + fixtures: [ + { args: [true], returns: 'true' }, + { args: [false], returns: 'false' }, + ], + review: { + summary: 'Portable Boolean to String coercion using KERN lowercase boolean spelling.', + graph: ['Boolean', 'String', 'portable'], + }, + }, + ], +} as const satisfies CoreTypeContract; diff --git a/packages/core/src/core-contracts/function.ts b/packages/core/src/core-contracts/function.ts new file mode 100644 index 00000000..d289ad28 --- /dev/null +++ b/packages/core/src/core-contracts/function.ts @@ -0,0 +1,8 @@ +import type { CoreTypeContract } from './schema.js'; + +export const FUNCTION_CONTRACT = { + name: 'Function', + kind: 'callable', + strict: true, + operations: [], +} as const satisfies CoreTypeContract; diff --git a/packages/core/src/core-contracts/index.ts b/packages/core/src/core-contracts/index.ts new file mode 100644 index 00000000..24de1296 --- /dev/null +++ b/packages/core/src/core-contracts/index.ts @@ -0,0 +1,57 @@ +import { BOOLEAN_CONTRACT } from './boolean.js'; +import { FUNCTION_CONTRACT } from './function.js'; +import { LIST_CONTRACT } from './list.js'; +import { NULL_CONTRACT, UNDEFINED_CONTRACT } from './nullish.js'; +import { NUMBER_CONTRACT } from './number.js'; +import { RECORD_CONTRACT } from './record.js'; +import type { CoreTypeContractRegistry } from './schema.js'; +import { STRING_CONTRACT } from './string.js'; + +export { BOOLEAN_CONTRACT } from './boolean.js'; +export { FUNCTION_CONTRACT } from './function.js'; +export { LIST_CONTRACT } from './list.js'; +export { NULL_CONTRACT, UNDEFINED_CONTRACT } from './nullish.js'; +export { NUMBER_CONTRACT } from './number.js'; +export { RECORD_CONTRACT } from './record.js'; +export type { + CoreFixture, + CoreFixtureError, + CoreFixtureValue, + CoreGraphEdge, + CoreLowerings, + CoreOperation, + CoreOperationKind, + CoreOperationReturns, + CoreTypeContract, + CoreTypeContractRegistry, + CoreTypeKind, + CoreTypeName, +} from './schema.js'; +export { + CORE_FIXTURE_FUNCTION, + CORE_FIXTURE_UNDEFINED, + CORE_TYPE_NAMES, + contractToGraphEdges, + isCoreFixtureFunction, + isCoreFixtureUndefined, +} from './schema.js'; +export { + CoreContractEvaluationError, + coreFixtureValueType, + evaluateCoreContractOperation, +} from './semantics.js'; +export { STRING_CONTRACT } from './string.js'; + +export const CORE_TYPE_CONTRACTS = { + schemaVersion: 1, + types: { + String: STRING_CONTRACT, + Boolean: BOOLEAN_CONTRACT, + Number: NUMBER_CONTRACT, + List: LIST_CONTRACT, + Record: RECORD_CONTRACT, + Function: FUNCTION_CONTRACT, + Null: NULL_CONTRACT, + Undefined: UNDEFINED_CONTRACT, + }, +} as const satisfies CoreTypeContractRegistry; diff --git a/packages/core/src/core-contracts/list.ts b/packages/core/src/core-contracts/list.ts new file mode 100644 index 00000000..4ea5e38d --- /dev/null +++ b/packages/core/src/core-contracts/list.ts @@ -0,0 +1,54 @@ +import { CORE_FIXTURE_UNDEFINED, type CoreTypeContract } from './schema.js'; + +export const LIST_CONTRACT = { + name: 'List', + kind: 'collection', + strict: true, + operations: [ + { + id: 'List.length', + kind: 'property', + args: ['List'], + returns: 'Number', + lowers: { + kern: 'List.length($0)', + ts: '__kernListLength($0)', + python: '__kern_list_length($0)', + }, + fixtures: [ + { args: [[]], returns: 0 }, + { args: [[1, 2, 3]], returns: 3 }, + { args: ['not-list'], throws: { code: 'strict-type', message: 'List.length expects List.' } }, + ], + review: { + summary: 'Strict list cardinality.', + graph: ['List', 'Number', 'strict', 'portable'], + }, + }, + { + id: 'List.index', + kind: 'method', + args: ['List', 'Number'], + returns: ['String', 'Boolean', 'Number', 'List', 'Record', 'Function', 'Null', 'Undefined'], + lowers: { + kern: 'List.index($0, $1)', + ts: '__kernListIndex($0, $1)', + python: '__kern_list_index($0, $1)', + }, + fixtures: [ + { args: [[10, 20, 30], 0], returns: 10 }, + { args: [[10, 20, 30], 2], returns: 30 }, + { args: [[10, 20, 30], 3], returns: CORE_FIXTURE_UNDEFINED }, + { args: [[10, 20, 30], -1], returns: CORE_FIXTURE_UNDEFINED }, + { args: [[10, 20, 30], 1.5], returns: CORE_FIXTURE_UNDEFINED }, + { args: [[10, CORE_FIXTURE_UNDEFINED, 30], 1], returns: CORE_FIXTURE_UNDEFINED }, + { args: [[], 0], returns: CORE_FIXTURE_UNDEFINED }, + { args: [[10], '0'], throws: { code: 'strict-type', message: 'List.index expects List, Number.' } }, + ], + review: { + summary: 'Strict list index by numeric offset; misses return Undefined.', + graph: ['List', 'Number', 'Undefined', 'strict', 'portable'], + }, + }, + ], +} as const satisfies CoreTypeContract; diff --git a/packages/core/src/core-contracts/nullish.ts b/packages/core/src/core-contracts/nullish.ts new file mode 100644 index 00000000..8b59b36b --- /dev/null +++ b/packages/core/src/core-contracts/nullish.ts @@ -0,0 +1,15 @@ +import type { CoreTypeContract } from './schema.js'; + +export const NULL_CONTRACT = { + name: 'Null', + kind: 'nullish', + strict: true, + operations: [], +} as const satisfies CoreTypeContract; + +export const UNDEFINED_CONTRACT = { + name: 'Undefined', + kind: 'nullish', + strict: true, + operations: [], +} as const satisfies CoreTypeContract; diff --git a/packages/core/src/core-contracts/number.ts b/packages/core/src/core-contracts/number.ts new file mode 100644 index 00000000..33db742a --- /dev/null +++ b/packages/core/src/core-contracts/number.ts @@ -0,0 +1,215 @@ +import type { CoreTypeContract } from './schema.js'; + +export const NUMBER_CONTRACT = { + name: 'Number', + kind: 'primitive', + strict: true, + operations: [ + { + id: 'Number.negate', + kind: 'method', + args: ['Number'], + returns: 'Number', + lowers: { + kern: 'Number.negate($0)', + ts: '__kernNumberNegate($0)', + python: '__kern_number_negate($0)', + }, + fixtures: [ + { args: [3], returns: -3 }, + { args: [-3], returns: 3 }, + { args: ['3'], throws: { code: 'strict-type', message: 'Number.negate expects Number.' } }, + ], + review: { + summary: 'Strict numeric negation over finite KERN Numbers.', + graph: ['Number', 'strict', 'portable'], + }, + }, + { + id: 'Number.add', + kind: 'method', + args: ['Number', 'Number'], + returns: 'Number', + lowers: { + kern: 'Number.add($0, $1)', + ts: '__kernNumberAdd($0, $1)', + python: '__kern_number_add($0, $1)', + }, + fixtures: [ + { args: [2, 3], returns: 5 }, + { args: [-2, 3], returns: 1 }, + { args: [2, '3'], throws: { code: 'strict-type', message: 'Number.add expects Number, Number.' } }, + ], + review: { + summary: 'Strict numeric addition over finite KERN Numbers.', + graph: ['Number', 'strict', 'portable'], + }, + }, + { + id: 'Number.subtract', + kind: 'method', + args: ['Number', 'Number'], + returns: 'Number', + lowers: { + kern: 'Number.subtract($0, $1)', + ts: '__kernNumberSubtract($0, $1)', + python: '__kern_number_subtract($0, $1)', + }, + fixtures: [ + { args: [5, 3], returns: 2 }, + { args: [3, 5], returns: -2 }, + { args: [5, false], throws: { code: 'strict-type', message: 'Number.subtract expects Number, Number.' } }, + ], + review: { + summary: 'Strict numeric subtraction over finite KERN Numbers.', + graph: ['Number', 'strict', 'portable'], + }, + }, + { + id: 'Number.multiply', + kind: 'method', + args: ['Number', 'Number'], + returns: 'Number', + lowers: { + kern: 'Number.multiply($0, $1)', + ts: '__kernNumberMultiply($0, $1)', + python: '__kern_number_multiply($0, $1)', + }, + fixtures: [ + { args: [3, 4], returns: 12 }, + { args: [-3, 4], returns: -12 }, + { args: [3, null], throws: { code: 'strict-type', message: 'Number.multiply expects Number, Number.' } }, + ], + review: { + summary: 'Strict numeric multiplication over finite KERN Numbers.', + graph: ['Number', 'strict', 'portable'], + }, + }, + { + id: 'Number.divide', + kind: 'method', + args: ['Number', 'Number'], + returns: 'Number', + lowers: { + kern: 'Number.divide($0, $1)', + ts: '__kernNumberDivide($0, $1)', + python: '__kern_number_divide($0, $1)', + }, + fixtures: [ + { args: [6, 2], returns: 3 }, + { args: [5, 2], returns: 2.5 }, + { args: [1, 0], throws: { code: 'division-by-zero', message: 'Number.divide division by zero.' } }, + { args: [6, '2'], throws: { code: 'strict-type', message: 'Number.divide expects Number, Number.' } }, + ], + review: { + summary: 'Strict numeric division over finite KERN Numbers; zero divisor is a contract error.', + graph: ['Number', 'strict', 'portable'], + }, + }, + { + id: 'Number.remainder', + kind: 'method', + args: ['Number', 'Number'], + returns: 'Number', + lowers: { + kern: 'Number.remainder($0, $1)', + ts: '__kernNumberRemainder($0, $1)', + python: '__kern_number_remainder($0, $1)', + }, + fixtures: [ + { args: [5, 2], returns: 1 }, + { args: [-5, 2], returns: -1 }, + { args: [5, -2], returns: 1 }, + { args: [1, 0], throws: { code: 'division-by-zero', message: 'Number.remainder division by zero.' } }, + { args: [5, '2'], throws: { code: 'strict-type', message: 'Number.remainder expects Number, Number.' } }, + ], + review: { + summary: 'Strict numeric remainder using KERN dividend-sign semantics; zero divisor is a contract error.', + graph: ['Number', 'strict', 'portable'], + }, + }, + { + id: 'Number.lessThan', + kind: 'method', + args: ['Number', 'Number'], + returns: 'Boolean', + lowers: { + kern: 'Number.lessThan($0, $1)', + ts: '__kernNumberLessThan($0, $1)', + python: '__kern_number_less_than($0, $1)', + }, + fixtures: [ + { args: [2, 3], returns: true }, + { args: [3, 2], returns: false }, + { args: [2, '3'], throws: { code: 'strict-type', message: 'Number.lessThan expects Number, Number.' } }, + ], + review: { + summary: 'Strict numeric less-than comparison.', + graph: ['Number', 'Boolean', 'strict', 'portable'], + }, + }, + { + id: 'Number.lessThanOrEqual', + kind: 'method', + args: ['Number', 'Number'], + returns: 'Boolean', + lowers: { + kern: 'Number.lessThanOrEqual($0, $1)', + ts: '__kernNumberLessThanOrEqual($0, $1)', + python: '__kern_number_less_than_or_equal($0, $1)', + }, + fixtures: [ + { args: [2, 2], returns: true }, + { args: [3, 2], returns: false }, + { args: [2, null], throws: { code: 'strict-type', message: 'Number.lessThanOrEqual expects Number, Number.' } }, + ], + review: { + summary: 'Strict numeric less-than-or-equal comparison.', + graph: ['Number', 'Boolean', 'strict', 'portable'], + }, + }, + { + id: 'Number.greaterThan', + kind: 'method', + args: ['Number', 'Number'], + returns: 'Boolean', + lowers: { + kern: 'Number.greaterThan($0, $1)', + ts: '__kernNumberGreaterThan($0, $1)', + python: '__kern_number_greater_than($0, $1)', + }, + fixtures: [ + { args: [3, 2], returns: true }, + { args: [2, 3], returns: false }, + { args: [3, true], throws: { code: 'strict-type', message: 'Number.greaterThan expects Number, Number.' } }, + ], + review: { + summary: 'Strict numeric greater-than comparison.', + graph: ['Number', 'Boolean', 'strict', 'portable'], + }, + }, + { + id: 'Number.greaterThanOrEqual', + kind: 'method', + args: ['Number', 'Number'], + returns: 'Boolean', + lowers: { + kern: 'Number.greaterThanOrEqual($0, $1)', + ts: '__kernNumberGreaterThanOrEqual($0, $1)', + python: '__kern_number_greater_than_or_equal($0, $1)', + }, + fixtures: [ + { args: [3, 3], returns: true }, + { args: [2, 3], returns: false }, + { + args: [3, '3'], + throws: { code: 'strict-type', message: 'Number.greaterThanOrEqual expects Number, Number.' }, + }, + ], + review: { + summary: 'Strict numeric greater-than-or-equal comparison.', + graph: ['Number', 'Boolean', 'strict', 'portable'], + }, + }, + ], +} as const satisfies CoreTypeContract; diff --git a/packages/core/src/core-contracts/record.ts b/packages/core/src/core-contracts/record.ts new file mode 100644 index 00000000..d250f23f --- /dev/null +++ b/packages/core/src/core-contracts/record.ts @@ -0,0 +1,32 @@ +import { CORE_FIXTURE_UNDEFINED, type CoreTypeContract } from './schema.js'; + +export const RECORD_CONTRACT = { + name: 'Record', + kind: 'record', + strict: true, + operations: [ + { + id: 'Record.get', + kind: 'method', + args: ['Record', 'String'], + returns: ['String', 'Boolean', 'Number', 'List', 'Record', 'Function', 'Null', 'Undefined'], + lowers: { + kern: 'Record.get($0, $1)', + ts: '__kernRecordGet($0, $1)', + python: '__kern_record_get($0, $1)', + }, + fixtures: [ + { args: [{ x: 1 }, 'x'], returns: 1 }, + { args: [{ x: 1 }, 'y'], returns: CORE_FIXTURE_UNDEFINED }, + { args: [{ x: CORE_FIXTURE_UNDEFINED }, 'x'], returns: CORE_FIXTURE_UNDEFINED }, + { args: [{}, 'toString'], returns: CORE_FIXTURE_UNDEFINED }, + { args: [{}, ''], returns: CORE_FIXTURE_UNDEFINED }, + { args: [{ x: 1 }, 0], throws: { code: 'strict-type', message: 'Record.get expects Record, String.' } }, + ], + review: { + summary: 'Strict own-key record lookup; missing keys return Undefined.', + graph: ['Record', 'String', 'Undefined', 'strict', 'portable'], + }, + }, + ], +} as const satisfies CoreTypeContract; diff --git a/packages/core/src/core-contracts/schema.ts b/packages/core/src/core-contracts/schema.ts new file mode 100644 index 00000000..9436c7d8 --- /dev/null +++ b/packages/core/src/core-contracts/schema.ts @@ -0,0 +1,191 @@ +export const CORE_TYPE_NAMES = [ + 'String', + 'Boolean', + 'Number', + 'List', + 'Record', + 'Function', + 'Null', + 'Undefined', +] as const; + +export type CoreTypeName = (typeof CORE_TYPE_NAMES)[number]; + +export type CoreTypeKind = 'primitive' | 'collection' | 'callable' | 'record' | 'nullish'; + +export type CoreOperationKind = 'method' | 'property' | 'operator' | 'constructor' | 'coercion'; + +export type CoreLowerings = { + /** + * Target lowerings are semantic helper calls, not raw host snippets. + * Each helper must enforce the same strict argument and return contract as + * evaluateCoreContractOperation before using host operations internally. + */ + readonly kern?: string; + readonly ts?: string; + readonly python?: string; +}; + +// Fixture data reserves this exact record shape for Undefined so JSON fixtures +// can distinguish null from undefined without overloading result records. +export const CORE_FIXTURE_UNDEFINED = { __kernFixture: 'Undefined' } as const; +export const CORE_FIXTURE_FUNCTION = { __kernFixture: 'Function' } as const; + +export function isCoreFixtureUndefined(value: unknown): value is typeof CORE_FIXTURE_UNDEFINED { + return ( + typeof value === 'object' && + value !== null && + !Array.isArray(value) && + Object.keys(value).length === 1 && + (value as { readonly __kernFixture?: unknown }).__kernFixture === 'Undefined' + ); +} + +export function isCoreFixtureFunction(value: unknown): value is typeof CORE_FIXTURE_FUNCTION { + return ( + typeof value === 'object' && + value !== null && + !Array.isArray(value) && + Object.keys(value).length === 1 && + (value as { readonly __kernFixture?: unknown }).__kernFixture === 'Function' + ); +} + +export type CoreFixtureValue = + | string + | number + | boolean + | null + | typeof CORE_FIXTURE_UNDEFINED + | typeof CORE_FIXTURE_FUNCTION + | readonly CoreFixtureValue[] + | { readonly [key: string]: CoreFixtureValue }; + +export type CoreFixtureError = { + readonly code: 'strict-type' | 'division-by-zero'; + readonly message: string; +}; + +export type CoreFixture = + | { + readonly args: readonly CoreFixtureValue[]; + readonly returns: CoreFixtureValue; + } + | { + readonly args: readonly CoreFixtureValue[]; + readonly throws: CoreFixtureError; + }; + +export type CoreOperationReturns = CoreTypeName | readonly CoreTypeName[]; + +export type CoreOperation = { + readonly id: string; + readonly kind: CoreOperationKind; + readonly args: readonly CoreTypeName[]; + readonly returns: CoreOperationReturns; + readonly lowers?: CoreLowerings; + readonly fixtures: readonly CoreFixture[]; + readonly review: { + readonly summary: string; + readonly graph: readonly string[]; + }; +}; + +export type CoreTypeContract = { + readonly name: CoreTypeName; + readonly kind: CoreTypeKind; + readonly strict: true; + readonly operations: readonly CoreOperation[]; +}; + +export type CoreTypeContractRegistry = { + readonly schemaVersion: 1; + readonly types: { readonly [Name in CoreTypeName]: CoreTypeContract & { readonly name: Name } }; +}; + +export type CoreGraphEdge = { + readonly from: string; + readonly relation: string; + readonly to: string; + readonly operation?: string; + readonly index?: number; +}; + +export function contractToGraphEdges(contract: CoreTypeContract): CoreGraphEdge[] { + const edges: CoreGraphEdge[] = []; + + for (const operation of contract.operations) { + if (!operation.id.startsWith(`${contract.name}.`)) { + throw new Error(`Core operation id ${operation.id} must be prefixed with ${contract.name}.`); + } + const methodName = operation.id.slice(contract.name.length + 1); + const explicitArgs = operation.args.slice(1).join(', '); + const operationRelation = `${methodName}(${explicitArgs})`; + + edges.push({ + from: contract.name, + relation: operationRelation, + to: formatReturnTypes(operation.returns), + operation: operation.id, + }); + + operation.args.forEach((arg, index) => { + edges.push({ + from: operation.id, + relation: 'accepts', + to: arg, + operation: operation.id, + index, + }); + }); + + for (const returnType of returnTypeNames(operation.returns)) { + edges.push({ + from: operation.id, + relation: 'returns', + to: returnType, + operation: operation.id, + }); + } + + for (const target of ['kern', 'ts', 'python'] as const) { + const lowering = operation.lowers?.[target]; + if (!lowering) continue; + edges.push({ + from: operation.id, + relation: `lowers.${target}`, + to: lowering, + operation: operation.id, + }); + } + + operation.fixtures.forEach((_, index) => { + edges.push({ + from: operation.id, + relation: 'fixture', + to: `${operation.id}.fixture.${index}`, + operation: operation.id, + index, + }); + }); + + for (const tag of operation.review.graph) { + edges.push({ + from: operation.id, + relation: 'tagged', + to: tag, + operation: operation.id, + }); + } + } + + return edges; +} + +function returnTypeNames(returns: CoreOperationReturns): readonly CoreTypeName[] { + return typeof returns === 'string' ? [returns] : returns; +} + +function formatReturnTypes(returns: CoreOperationReturns): string { + return returnTypeNames(returns).join(' | '); +} diff --git a/packages/core/src/core-contracts/semantics.ts b/packages/core/src/core-contracts/semantics.ts new file mode 100644 index 00000000..4041189b --- /dev/null +++ b/packages/core/src/core-contracts/semantics.ts @@ -0,0 +1,251 @@ +import { + CORE_FIXTURE_UNDEFINED, + type CoreFixtureValue, + type CoreTypeName, + isCoreFixtureFunction, + isCoreFixtureUndefined, +} from './schema.js'; + +export class CoreContractEvaluationError extends Error { + constructor( + readonly code: 'strict-type' | 'division-by-zero' | 'unsupported-operation', + message: string, + ) { + super(message); + this.name = 'CoreContractEvaluationError'; + } +} + +export function coreFixtureValueType(value: CoreFixtureValue): CoreTypeName { + if (typeof value === 'string') return 'String'; + if (typeof value === 'boolean') return 'Boolean'; + if (typeof value === 'number') return 'Number'; + if (value === null) return 'Null'; + if (isCoreFixtureUndefined(value)) return 'Undefined'; + if (isCoreFixtureFunction(value)) return 'Function'; + if (Array.isArray(value)) return 'List'; + return 'Record'; +} + +export function evaluateCoreContractOperation( + operationId: string, + args: readonly CoreFixtureValue[], +): CoreFixtureValue { + switch (operationId) { + case 'Boolean.not': { + const [value] = expectCoreTypes(operationId, args, ['Boolean']); + return !value; + } + case 'Boolean.and': { + const [left, right] = expectCoreTypes(operationId, args, ['Boolean', 'Boolean']); + return left && right; + } + case 'Boolean.or': { + const [left, right] = expectCoreTypes(operationId, args, ['Boolean', 'Boolean']); + return left || right; + } + case 'Boolean.equals': { + const [left, right] = expectCoreTypes(operationId, args, ['Boolean', 'Boolean']); + return left === right; + } + case 'Boolean.toString': { + const [value] = expectCoreTypes(operationId, args, ['Boolean']); + return value ? 'true' : 'false'; + } + case 'String.length': { + const [value] = expectCoreTypes(operationId, args, ['String']); + return stringCodePoints(value).length; + } + case 'String.index': { + const [value, index] = expectCoreTypes(operationId, args, ['String', 'Number']); + if (!Number.isInteger(index) || index < 0) return CORE_FIXTURE_UNDEFINED; + const chars = stringCodePoints(value); + return index < chars.length ? (chars[index] ?? '') : CORE_FIXTURE_UNDEFINED; + } + case 'String.includes': { + const [value, search] = expectCoreTypes(operationId, args, ['String', 'String']); + return value.includes(search); + } + case 'String.startsWith': { + const [value, search] = expectCoreTypes(operationId, args, ['String', 'String']); + return value.startsWith(search); + } + case 'String.endsWith': { + const [value, search] = expectCoreTypes(operationId, args, ['String', 'String']); + return value.endsWith(search); + } + case 'String.slice': { + const [value, start, end] = expectCoreTypes(operationId, args, ['String', 'Number', 'Number']); + return stringCodePoints(value).slice(truncateOffset(start), truncateOffset(end)).join(''); + } + case 'String.trim': { + const [value] = expectCoreTypes(operationId, args, ['String']); + return value.trim(); + } + case 'String.lower': { + const [value] = expectCoreTypes(operationId, args, ['String']); + return value.toLowerCase(); + } + case 'String.upper': { + const [value] = expectCoreTypes(operationId, args, ['String']); + return value.toUpperCase(); + } + case 'String.concat': { + const [left, right] = expectCoreTypes(operationId, args, ['String', 'String']); + return left + right; + } + case 'String.equals': { + const [left, right] = expectCoreTypes(operationId, args, ['String', 'String']); + return left === right; + } + case 'String.lessThan': { + const [left, right] = expectCoreTypes(operationId, args, ['String', 'String']); + return compareStrings(left, right) < 0; + } + case 'String.lessThanOrEqual': { + const [left, right] = expectCoreTypes(operationId, args, ['String', 'String']); + return compareStrings(left, right) <= 0; + } + case 'String.greaterThan': { + const [left, right] = expectCoreTypes(operationId, args, ['String', 'String']); + return compareStrings(left, right) > 0; + } + case 'String.greaterThanOrEqual': { + const [left, right] = expectCoreTypes(operationId, args, ['String', 'String']); + return compareStrings(left, right) >= 0; + } + case 'String.toString': { + const [value] = expectCoreTypes(operationId, args, ['String']); + return value; + } + case 'Number.negate': { + const [value] = expectCoreTypes(operationId, args, ['Number']); + return finiteNumberResult(operationId, -value); + } + case 'Number.add': { + const [left, right] = expectCoreTypes(operationId, args, ['Number', 'Number']); + return finiteNumberResult(operationId, left + right); + } + case 'Number.subtract': { + const [left, right] = expectCoreTypes(operationId, args, ['Number', 'Number']); + return finiteNumberResult(operationId, left - right); + } + case 'Number.multiply': { + const [left, right] = expectCoreTypes(operationId, args, ['Number', 'Number']); + return finiteNumberResult(operationId, left * right); + } + case 'Number.divide': { + const [left, right] = expectCoreTypes(operationId, args, ['Number', 'Number']); + if (right === 0) throw new CoreContractEvaluationError('division-by-zero', 'Number.divide division by zero.'); + return finiteNumberResult(operationId, left / right); + } + case 'Number.remainder': { + const [left, right] = expectCoreTypes(operationId, args, ['Number', 'Number']); + if (right === 0) throw new CoreContractEvaluationError('division-by-zero', 'Number.remainder division by zero.'); + return finiteNumberResult(operationId, left % right); + } + case 'Number.lessThan': { + const [left, right] = expectCoreTypes(operationId, args, ['Number', 'Number']); + return left < right; + } + case 'Number.lessThanOrEqual': { + const [left, right] = expectCoreTypes(operationId, args, ['Number', 'Number']); + return left <= right; + } + case 'Number.greaterThan': { + const [left, right] = expectCoreTypes(operationId, args, ['Number', 'Number']); + return left > right; + } + case 'Number.greaterThanOrEqual': { + const [left, right] = expectCoreTypes(operationId, args, ['Number', 'Number']); + return left >= right; + } + case 'List.length': { + const [value] = expectCoreTypes(operationId, args, ['List']); + return value.length; + } + case 'List.index': { + const [value, index] = expectCoreTypes(operationId, args, ['List', 'Number']); + if (!Number.isInteger(index) || index < 0) return CORE_FIXTURE_UNDEFINED; + return index < value.length && Object.hasOwn(value, index) + ? (value[index] as CoreFixtureValue) + : CORE_FIXTURE_UNDEFINED; + } + case 'Record.get': { + const [value, key] = expectCoreTypes(operationId, args, ['Record', 'String']); + return Object.hasOwn(value, key) ? (value[key] as CoreFixtureValue) : CORE_FIXTURE_UNDEFINED; + } + default: + throw new CoreContractEvaluationError( + 'unsupported-operation', + `Unsupported core contract operation: ${operationId}`, + ); + } +} + +function expectCoreTypes( + operationId: string, + args: readonly CoreFixtureValue[], + types: T, +): CoreTypeTuple { + if (args.length !== types.length) throw strictTypeError(operationId, types); + for (let index = 0; index < types.length; index += 1) { + if (!Object.hasOwn(args, index)) throw strictTypeError(operationId, types); + const arg = args[index] as CoreFixtureValue; + if (coreFixtureValueType(arg) !== types[index]) throw strictTypeError(operationId, types); + if (types[index] === 'Number' && (typeof arg !== 'number' || !Number.isFinite(arg))) { + throw strictTypeError(operationId, types); + } + } + return args as CoreTypeTuple; +} + +function strictTypeError(operationId: string, types: readonly CoreTypeName[]): CoreContractEvaluationError { + return new CoreContractEvaluationError('strict-type', `${operationId} expects ${types.join(', ')}.`); +} + +function stringCodePoints(value: string): string[] { + return Array.from(value); +} + +function compareStrings(left: string, right: string): number { + const leftPoints = Array.from(left, (char) => char.codePointAt(0) ?? 0); + const rightPoints = Array.from(right, (char) => char.codePointAt(0) ?? 0); + const length = Math.min(leftPoints.length, rightPoints.length); + for (let index = 0; index < length; index += 1) { + const delta = (leftPoints[index] ?? 0) - (rightPoints[index] ?? 0); + if (delta !== 0) return delta; + } + return leftPoints.length - rightPoints.length; +} + +function truncateOffset(value: number): number { + return Math.trunc(value); +} + +function finiteNumberResult(operationId: string, value: number): number { + if (!Number.isFinite(value)) { + throw new CoreContractEvaluationError('strict-type', `${operationId} result must be finite.`); + } + return value; +} + +type CoreTypeTuple = { + readonly [Index in keyof T]: T[Index] extends 'Boolean' + ? boolean + : T[Index] extends 'Number' + ? number + : T[Index] extends 'String' + ? string + : T[Index] extends 'List' + ? readonly CoreFixtureValue[] + : T[Index] extends 'Record' + ? { readonly [key: string]: CoreFixtureValue } + : T[Index] extends 'Null' + ? null + : T[Index] extends 'Undefined' + ? typeof CORE_FIXTURE_UNDEFINED + : T[Index] extends 'Function' + ? never + : CoreFixtureValue; +}; diff --git a/packages/core/src/core-contracts/string.ts b/packages/core/src/core-contracts/string.ts new file mode 100644 index 00000000..1a67f6ee --- /dev/null +++ b/packages/core/src/core-contracts/string.ts @@ -0,0 +1,337 @@ +import { CORE_FIXTURE_UNDEFINED, type CoreTypeContract } from './schema.js'; + +export const STRING_CONTRACT = { + name: 'String', + kind: 'primitive', + strict: true, + operations: [ + { + id: 'String.length', + kind: 'property', + args: ['String'], + returns: 'Number', + lowers: { + kern: 'String.length($0)', + ts: '__kernStringLength($0)', + python: '__kern_string_length($0)', + }, + fixtures: [ + { args: [''], returns: 0 }, + { args: ['kern'], returns: 4 }, + { args: ['𐐷'], returns: 1 }, + { args: ['e\u0301'], returns: 2 }, + ], + review: { + summary: 'KERN string length counts Unicode code points with no normalization.', + graph: ['String', 'Number', 'portable', 'unicode-code-point'], + }, + }, + { + id: 'String.index', + kind: 'method', + args: ['String', 'Number'], + returns: ['String', 'Undefined'], + lowers: { + kern: 'String.index($0, $1)', + ts: '__kernStringIndex($0, $1)', + python: '__kern_string_index($0, $1)', + }, + fixtures: [ + { args: ['abc', 1], returns: 'b' }, + { args: ['a𐐷b', 1], returns: '𐐷' }, + { args: ['abc', 4], returns: CORE_FIXTURE_UNDEFINED }, + { args: ['abc', -1], returns: CORE_FIXTURE_UNDEFINED }, + { args: ['abc', 1.2], returns: CORE_FIXTURE_UNDEFINED }, + { args: ['abc', '1'], throws: { code: 'strict-type', message: 'String.index expects String, Number.' } }, + ], + review: { + summary: + 'Strict string indexing over Unicode code-point offsets; negative, fractional, and out-of-range misses return Undefined.', + graph: ['String', 'Undefined', 'strict', 'portable', 'unicode-code-point'], + }, + }, + { + id: 'String.includes', + kind: 'method', + args: ['String', 'String'], + returns: 'Boolean', + lowers: { + kern: 'String.includes($0, $1)', + ts: '__kernStringIncludes($0, $1)', + python: '__kern_string_includes($0, $1)', + }, + fixtures: [ + { args: ['abc', 'b'], returns: true }, + { args: ['abc', 'x'], returns: false }, + ], + review: { + summary: 'Strict string containment.', + graph: ['String', 'Boolean', 'portable'], + }, + }, + { + id: 'String.startsWith', + kind: 'method', + args: ['String', 'String'], + returns: 'Boolean', + lowers: { + kern: 'String.startsWith($0, $1)', + ts: '__kernStringStartsWith($0, $1)', + python: '__kern_string_starts_with($0, $1)', + }, + fixtures: [ + { args: ['kern', 'ke'], returns: true }, + { args: ['kern', 'rn'], returns: false }, + ], + review: { + summary: 'Strict string prefix test.', + graph: ['String', 'Boolean', 'portable'], + }, + }, + { + id: 'String.endsWith', + kind: 'method', + args: ['String', 'String'], + returns: 'Boolean', + lowers: { + kern: 'String.endsWith($0, $1)', + ts: '__kernStringEndsWith($0, $1)', + python: '__kern_string_ends_with($0, $1)', + }, + fixtures: [ + { args: ['kern', 'rn'], returns: true }, + { args: ['kern', 'ke'], returns: false }, + ], + review: { + summary: 'Strict string suffix test.', + graph: ['String', 'Boolean', 'portable'], + }, + }, + { + id: 'String.slice', + kind: 'method', + args: ['String', 'Number', 'Number'], + returns: 'String', + lowers: { + kern: 'String.slice($0, $1, $2)', + ts: '__kernStringSlice($0, $1, $2)', + python: '__kern_string_slice($0, $1, $2)', + }, + fixtures: [ + { args: ['abcdef', 1, 4], returns: 'bcd' }, + { args: ['abcdef', 1.9, 4.2], returns: 'bcd' }, + { args: ['abc', 0, 2], returns: 'ab' }, + { args: ['abcdef', -3, -1], returns: 'de' }, + { args: ['abcdef', -20, 2], returns: 'ab' }, + { args: ['abcdef', 4, 2], returns: '' }, + { args: ['a𐐷b', 1, 2], returns: '𐐷' }, + { args: ['e\u0301x', 0, 2], returns: 'e\u0301' }, + { + args: ['abc', '0', 2], + throws: { code: 'strict-type', message: 'String.slice expects String, Number, Number.' }, + }, + { + args: ['abc', 1], + throws: { code: 'strict-type', message: 'String.slice expects String, Number, Number.' }, + }, + ], + review: { + summary: + 'Strict string slice over Unicode code-point offsets with explicit start and end offsets; negative offsets count from the end.', + graph: ['String', 'Number', 'strict', 'portable', 'unicode-code-point'], + }, + }, + { + id: 'String.trim', + kind: 'method', + args: ['String'], + returns: 'String', + lowers: { + kern: 'String.trim($0)', + ts: '__kernStringTrim($0)', + python: '__kern_string_trim($0)', + }, + fixtures: [ + { args: [' kern '], returns: 'kern' }, + { args: ['\ncore\t'], returns: 'core' }, + ], + review: { + summary: 'Portable surrounding whitespace trim.', + graph: ['String', 'portable'], + }, + }, + { + id: 'String.lower', + kind: 'method', + args: ['String'], + returns: 'String', + lowers: { + kern: 'String.lower($0)', + ts: '__kernStringLower($0)', + python: '__kern_string_lower($0)', + }, + fixtures: [ + { args: ['KERN'], returns: 'kern' }, + { args: ['Core'], returns: 'core' }, + ], + review: { + summary: 'Portable lowercase conversion.', + graph: ['String', 'portable'], + }, + }, + { + id: 'String.upper', + kind: 'method', + args: ['String'], + returns: 'String', + lowers: { + kern: 'String.upper($0)', + ts: '__kernStringUpper($0)', + python: '__kern_string_upper($0)', + }, + fixtures: [ + { args: ['kern'], returns: 'KERN' }, + { args: ['Core'], returns: 'CORE' }, + ], + review: { + summary: 'Portable uppercase conversion.', + graph: ['String', 'portable'], + }, + }, + { + id: 'String.concat', + kind: 'method', + args: ['String', 'String'], + returns: 'String', + lowers: { + kern: 'String.concat($0, $1)', + ts: '__kernStringConcat($0, $1)', + python: '__kern_string_concat($0, $1)', + }, + fixtures: [ + { args: ['kern', 'lang'], returns: 'kernlang' }, + { args: ['count:', 2], throws: { code: 'strict-type', message: 'String.concat expects String, String.' } }, + ], + review: { + summary: 'Strict string concatenation; both operands must be String.', + graph: ['String', 'strict', 'portable'], + }, + }, + { + id: 'String.equals', + kind: 'method', + args: ['String', 'String'], + returns: 'Boolean', + lowers: { + kern: 'String.equals($0, $1)', + ts: '__kernStringEquals($0, $1)', + python: '__kern_string_equals($0, $1)', + }, + fixtures: [ + { args: ['kern', 'kern'], returns: true }, + { args: ['kern', 'core'], returns: false }, + { args: ['kern', true], throws: { code: 'strict-type', message: 'String.equals expects String, String.' } }, + ], + review: { + summary: 'Strict string equality; cross-type equality is a type error in schema v1.', + graph: ['String', 'Boolean', 'strict', 'portable'], + }, + }, + { + id: 'String.lessThan', + kind: 'operator', + args: ['String', 'String'], + returns: 'Boolean', + lowers: { + kern: 'String.lessThan($0, $1)', + ts: '__kernStringLessThan($0, $1)', + python: '__kern_string_less_than($0, $1)', + }, + fixtures: [ + { args: ['abc', 'abd'], returns: true }, + { args: ['abc', 'abc'], returns: false }, + { args: ['abc', true], throws: { code: 'strict-type', message: 'String.lessThan expects String, String.' } }, + ], + review: { + summary: 'Strict string less-than comparison using KERN portable code-point ordering.', + graph: ['String', 'Boolean', 'strict', 'portable', 'unicode-code-point-order'], + }, + }, + { + id: 'String.lessThanOrEqual', + kind: 'operator', + args: ['String', 'String'], + returns: 'Boolean', + lowers: { + kern: 'String.lessThanOrEqual($0, $1)', + ts: '__kernStringLessThanOrEqual($0, $1)', + python: '__kern_string_less_than_or_equal($0, $1)', + }, + fixtures: [ + { args: ['abc', 'abc'], returns: true }, + { args: ['abd', 'abc'], returns: false }, + ], + review: { + summary: 'Strict string less-than-or-equal comparison using KERN portable code-point ordering.', + graph: ['String', 'Boolean', 'strict', 'portable', 'unicode-code-point-order'], + }, + }, + { + id: 'String.greaterThan', + kind: 'operator', + args: ['String', 'String'], + returns: 'Boolean', + lowers: { + kern: 'String.greaterThan($0, $1)', + ts: '__kernStringGreaterThan($0, $1)', + python: '__kern_string_greater_than($0, $1)', + }, + fixtures: [ + { args: ['abd', 'abc'], returns: true }, + { args: ['abc', 'abc'], returns: false }, + ], + review: { + summary: 'Strict string greater-than comparison using KERN portable code-point ordering.', + graph: ['String', 'Boolean', 'strict', 'portable', 'unicode-code-point-order'], + }, + }, + { + id: 'String.greaterThanOrEqual', + kind: 'operator', + args: ['String', 'String'], + returns: 'Boolean', + lowers: { + kern: 'String.greaterThanOrEqual($0, $1)', + ts: '__kernStringGreaterThanOrEqual($0, $1)', + python: '__kern_string_greater_than_or_equal($0, $1)', + }, + fixtures: [ + { args: ['abc', 'abc'], returns: true }, + { args: ['abc', 'abd'], returns: false }, + ], + review: { + summary: 'Strict string greater-than-or-equal comparison using KERN portable code-point ordering.', + graph: ['String', 'Boolean', 'strict', 'portable', 'unicode-code-point-order'], + }, + }, + { + id: 'String.toString', + kind: 'coercion', + args: ['String'], + returns: 'String', + lowers: { + kern: 'String.toString($0)', + ts: '__kernStringToString($0)', + python: '__kern_string_to_string($0)', + }, + fixtures: [ + { args: ['kern'], returns: 'kern' }, + { args: [''], returns: '' }, + ], + review: { + summary: 'String identity coercion.', + graph: ['String', 'portable'], + }, + }, + ], +} as const satisfies CoreTypeContract; diff --git a/packages/core/src/core-runtime/contract-adapter.ts b/packages/core/src/core-runtime/contract-adapter.ts new file mode 100644 index 00000000..226c6343 --- /dev/null +++ b/packages/core/src/core-runtime/contract-adapter.ts @@ -0,0 +1,93 @@ +import { + CORE_FIXTURE_FUNCTION, + CORE_FIXTURE_UNDEFINED, + type CoreFixtureValue, + isCoreFixtureFunction, + isCoreFixtureUndefined, +} from '../core-contracts/index.js'; +import type { KernValue } from './index.js'; +import { brandValue } from './value-brand.js'; + +export class CoreRuntimeContractAdapterError extends Error { + constructor(message: string) { + super(message); + this.name = 'CoreRuntimeContractAdapterError'; + } +} + +export function kernValueToCoreFixtureValue(value: KernValue): CoreFixtureValue { + switch (value.kind) { + case 'null': + return null; + case 'undefined': + return CORE_FIXTURE_UNDEFINED; + case 'boolean': + case 'number': + case 'string': + return value.value; + case 'array': + return value.items.map(kernValueToCoreFixtureValue); + case 'record': + if (isReservedFixtureSentinelRecord(value.entries)) { + throw new CoreRuntimeContractAdapterError( + 'KERN record value uses reserved core fixture sentinel shape: __kernFixture.', + ); + } + return Object.fromEntries( + Object.entries(value.entries).map(([key, entry]) => [key, kernValueToCoreFixtureValue(entry)]), + ); + case 'function': + case 'builtin': + case 'class': + case 'bound-method': + case 'super': + return CORE_FIXTURE_FUNCTION; + case 'instance': + if (isReservedFixtureSentinelRecord(value.fields)) { + throw new CoreRuntimeContractAdapterError( + 'KERN instance value uses reserved core fixture sentinel shape: __kernFixture.', + ); + } + return Object.fromEntries( + Object.entries(value.fields).map(([key, entry]) => [key, kernValueToCoreFixtureValue(entry)]), + ); + } +} + +export function coreFixtureValueToKernValue(value: CoreFixtureValue): KernValue { + if (value === null) return brandValue({ kind: 'null' }); + if (isCoreFixtureUndefined(value)) return brandValue({ kind: 'undefined' }); + if (isCoreFixtureFunction(value)) { + throw new CoreRuntimeContractAdapterError( + 'Core Function fixture references cannot be materialized as runtime code.', + ); + } + switch (typeof value) { + case 'boolean': + return brandValue({ kind: 'boolean', value }); + case 'number': + return brandValue({ kind: 'number', value }); + case 'string': + return brandValue({ kind: 'string', value }); + case 'object': { + if (Array.isArray(value)) { + return brandValue({ kind: 'array', items: value.map(coreFixtureValueToKernValue) }); + } + const entries = Object.create(null) as Record; + for (const [key, entry] of Object.entries(value)) entries[key] = coreFixtureValueToKernValue(entry); + return brandValue({ kind: 'record', entries }); + } + } +} + +export function roundTripKernContractDataValue(value: KernValue): KernValue { + return coreFixtureValueToKernValue(kernValueToCoreFixtureValue(value)); +} + +function isReservedFixtureSentinelRecord(entries: Record): boolean { + return ( + Object.keys(entries).length === 1 && + entries.__kernFixture?.kind === 'string' && + (entries.__kernFixture.value === 'Undefined' || entries.__kernFixture.value === 'Function') + ); +} diff --git a/packages/core/src/core-runtime/index.ts b/packages/core/src/core-runtime/index.ts index 3498569b..afc7f74d 100644 --- a/packages/core/src/core-runtime/index.ts +++ b/packages/core/src/core-runtime/index.ts @@ -1,9 +1,20 @@ +import { + CORE_TYPE_CONTRACTS, + CoreContractEvaluationError, + type CoreFixtureValue, + evaluateCoreContractOperation, +} from '../core-contracts/index.js'; import { parseExpression } from '../parser-expression.js'; import { splitPortableExpressionList } from '../portable-expression-list.js'; import type { IRNode } from '../types.js'; import type { ValueIR } from '../value-ir.js'; +import { + CoreRuntimeContractAdapterError, + coreFixtureValueToKernValue, + kernValueToCoreFixtureValue, +} from './contract-adapter.js'; +import { brandValue, KERN_VALUE_BRAND } from './value-brand.js'; -const KERN_VALUE_BRAND: unique symbol = Symbol('KERN core runtime value'); const INTEGER_INDEX_RE = /^(0|[1-9]\d*)$/; export type KernValue = @@ -15,7 +26,11 @@ export type KernValue = | { kind: 'array'; items: KernValue[] } | { kind: 'record'; entries: Record } | KernFunctionValue - | KernBuiltinValue; + | KernBuiltinValue + | KernClassValue + | KernInstanceValue + | KernBoundMethodValue + | KernSuperValue; export interface KernFunctionValue { kind: 'function'; @@ -31,6 +46,35 @@ export interface KernBuiltinValue { call: (args: KernValue[]) => KernValue; } +export interface KernClassValue { + kind: 'class'; + name: string; + node: IRNode; + env: CoreRuntimeEnv; +} + +export interface KernInstanceValue { + kind: 'instance'; + classValue: KernClassValue; + fields: Record; + initializedClasses: Set; +} + +export interface KernBoundMethodValue { + kind: 'bound-method'; + name: string; + receiver: KernInstanceValue; + methodNode: IRNode; + ownerClass: KernClassValue; +} + +export interface KernSuperValue { + kind: 'super'; + receiver: KernInstanceValue; + ownerClass: KernClassValue; + mode: 'constructor' | 'method'; +} + export interface RuntimeParam { name: string; type?: string; @@ -60,6 +104,15 @@ export class CoreRuntimeEnv { return value; } + assign(name: string, value: KernValue): KernValue { + if (this.bindings.has(name)) { + this.bindings.set(name, value); + return value; + } + if (this.parent) return this.parent.assign(name, value); + throw new Error(`KERN core runtime binding not found: ${name}`); + } + lookup(name: string): KernValue { if (this.bindings.has(name)) return this.bindings.get(name) ?? kUndefined(); if (this.parent) return this.parent.lookup(name); @@ -140,9 +193,15 @@ export function toHostValue(value: KernValue | undefined): unknown { return value.items.map(toHostValue); case 'record': return Object.fromEntries(Object.entries(value.entries).map(([key, entry]) => [key, toHostValue(entry)])); + case 'instance': + return Object.fromEntries(Object.entries(value.fields).map(([key, entry]) => [key, toHostValue(entry)])); case 'function': case 'builtin': + case 'class': + case 'bound-method': return `[KERN ${value.kind}${value.name ? ` ${value.name}` : ''}]`; + case 'super': + return `[KERN super ${value.ownerClass.name}]`; } } @@ -161,6 +220,10 @@ export function kernTruthy(value: KernValue): boolean { case 'record': case 'function': case 'builtin': + case 'class': + case 'instance': + case 'bound-method': + case 'super': return true; } } @@ -228,6 +291,18 @@ function executeNode(node: IRNode, env: CoreRuntimeEnv): CoreCompletion { env.define(requiredString(node.props?.name, 'fn name='), fn); return { kind: 'normal', value: kUndefined() }; } + case 'class': { + const klass = makeClass(node, env); + env.define(klass.name, klass); + return { kind: 'normal', value: kUndefined() }; + } + case 'assign': + executeAssign(node, env); + return { kind: 'normal', value: kUndefined() }; + case 'do': { + evalCoreExpression(unwrapExpr(node.props?.value, 'do value='), env); + return { kind: 'normal', value: kUndefined() }; + } case 'coalesce': case 'firstDefined': return executeCoalesce(node, env); @@ -329,6 +404,8 @@ function evalValueIR(node: ValueIR, env: CoreRuntimeEnv): KernValue { return evalCall(node, env); case 'lambda': throw new Error('KERN core runtime lambda expressions are not supported in the first runtime slice.'); + case 'new': + return evalNew(node, env); default: throw new Error(`KERN core runtime unsupported expression kind: ${node.kind}`); } @@ -350,10 +427,13 @@ function evalObjectLiteral(node: Extract, env: C function evalUnary(node: Extract, env: CoreRuntimeEnv): KernValue { const arg = evalValueIR(node.argument, env); - if (node.op === '!') return kBoolean(!kernTruthy(arg)); + if (node.op === '!') { + if (arg.kind !== 'boolean') throw new Error('KERN core runtime unary ! requires a boolean.'); + return dispatchCoreContractOperation('Boolean.not', [arg.value]); + } if (node.op === '-' || node.op === '+') { if (arg.kind !== 'number') throw new Error(`KERN core runtime unary ${node.op} requires a number.`); - return kNumber(node.op === '-' ? -arg.value : arg.value); + return node.op === '-' ? dispatchCoreContractOperation('Number.negate', [arg.value]) : arg; } throw new Error(`KERN core runtime unsupported unary operator: ${node.op}`); } @@ -376,8 +456,12 @@ function evalBinary(node: Extract, env: CoreRuntime const right = evalValueIR(node.right, env); switch (node.op) { case '+': - if (left.kind === 'number' && right.kind === 'number') return kNumber(left.value + right.value); - if (left.kind === 'string' && right.kind === 'string') return kString(left.value + right.value); + if (left.kind === 'number' && right.kind === 'number') { + return dispatchCoreContractOperation('Number.add', [left.value, right.value]); + } + if (left.kind === 'string' && right.kind === 'string') { + return dispatchCoreContractOperation('String.concat', [left.value, right.value]); + } throw new Error('KERN core runtime + requires two numbers or two strings.'); case '-': case '*': @@ -404,21 +488,34 @@ function evalNumberBinary(op: string, left: KernValue, right: KernValue): KernVa if (left.kind !== 'number' || right.kind !== 'number') { throw new Error(`KERN core runtime ${op} requires two numbers.`); } - if (op === '-') return kNumber(left.value - right.value); - if (op === '*') return kNumber(left.value * right.value); - if (right.value === 0 && (op === '/' || op === '%')) throw new Error(`KERN core runtime ${op} division by zero.`); - if (op === '/') return kNumber(left.value / right.value); - return kNumber(left.value % right.value); + switch (op) { + case '-': + return dispatchCoreContractOperation('Number.subtract', [left.value, right.value]); + case '*': + return dispatchCoreContractOperation('Number.multiply', [left.value, right.value]); + case '/': + return dispatchCoreContractOperation('Number.divide', [left.value, right.value]); + case '%': + return dispatchCoreContractOperation('Number.remainder', [left.value, right.value]); + default: + throw new Error(`KERN core runtime unsupported numeric operator: ${op}`); + } } function evalOrderedComparison(op: string, left: KernValue, right: KernValue): KernValue { if (!((left.kind === 'number' && right.kind === 'number') || (left.kind === 'string' && right.kind === 'string'))) { throw new Error(`KERN core runtime ${op} requires same-kind number or string operands.`); } - if (op === '<') return kBoolean(left.value < right.value); - if (op === '<=') return kBoolean(left.value <= right.value); - if (op === '>') return kBoolean(left.value > right.value); - return kBoolean(left.value >= right.value); + if (left.kind === 'number' && right.kind === 'number') { + if (op === '<') return dispatchCoreContractOperation('Number.lessThan', [left.value, right.value]); + if (op === '<=') return dispatchCoreContractOperation('Number.lessThanOrEqual', [left.value, right.value]); + if (op === '>') return dispatchCoreContractOperation('Number.greaterThan', [left.value, right.value]); + return dispatchCoreContractOperation('Number.greaterThanOrEqual', [left.value, right.value]); + } + if (op === '<') return dispatchCoreContractOperation('String.lessThan', [left.value, right.value]); + if (op === '<=') return dispatchCoreContractOperation('String.lessThanOrEqual', [left.value, right.value]); + if (op === '>') return dispatchCoreContractOperation('String.greaterThan', [left.value, right.value]); + return dispatchCoreContractOperation('String.greaterThanOrEqual', [left.value, right.value]); } function evalMember(node: Extract, env: CoreRuntimeEnv): KernValue { @@ -428,10 +525,16 @@ function evalMember(node: Extract, env: CoreRuntime throw new Error(`KERN core runtime cannot read .${node.property} from ${object.kind}.`); } if (object.kind === 'record') { - return Object.hasOwn(object.entries, node.property) ? object.entries[node.property] : kUndefined(); + return evalRecordGet(object, node.property); } - if (object.kind === 'array' && node.property === 'length') return kNumber(object.items.length); - if (object.kind === 'string' && node.property === 'length') return kNumber(object.value.length); + if (object.kind === 'instance') return evalInstanceMember(object, node.property); + if (object.kind === 'super') return evalSuperMember(object, node.property); + if (object.kind === 'class') return evalClassMember(object, node.property); + if (object.kind === 'array' && node.property === 'length') { + return kNumber(object.items.length); + } + if (object.kind === 'string') return evalStringMember(object, node.property); + if (object.kind === 'boolean') return evalBooleanMember(object, node.property); return kUndefined(); } @@ -444,23 +547,115 @@ function evalIndex(node: Extract, env: CoreRuntimeEn const index = evalValueIR(node.index, env); if (object.kind === 'array') { if (index.kind !== 'number') throw new Error('KERN core runtime array index must be a number.'); - return object.items[index.value] ?? kUndefined(); + return evalListIndex(object, index.value); } if (object.kind === 'record' || object.kind === 'string') { if (index.kind !== 'string' && index.kind !== 'number') { throw new Error('KERN core runtime record/string index must be a string or number.'); } const key = String(index.value); - if (object.kind === 'record') return Object.hasOwn(object.entries, key) ? object.entries[key] : kUndefined(); + if (object.kind === 'record') return evalRecordGet(object, key); const charIndex = index.kind === 'number' ? index.value : INTEGER_INDEX_RE.test(index.value) ? Number(index.value) : NaN; - return Number.isInteger(charIndex) && charIndex >= 0 && charIndex < object.value.length - ? kString(object.value[charIndex] ?? '') - : kUndefined(); + if (!Number.isFinite(charIndex) && index.kind !== 'number') return kUndefined(); + return dispatchCoreContractOperation('String.index', [object.value, charIndex]); } return kUndefined(); } +function evalStringMember(object: Extract, property: string): KernValue { + if (property === 'length') return dispatchCoreContractOperation('String.length', [object.value]); + const operation = stringMemberOperation(property); + if (!operation) return kUndefined(); + return boundCoreContractOperation(`String.${operation}`, [object.value]); +} + +function evalBooleanMember(object: Extract, property: string): KernValue { + const operation = booleanMemberOperation(property); + if (!operation) return kUndefined(); + return boundCoreContractOperation(`Boolean.${operation}`, [object.value]); +} + +function stringMemberOperation(property: string): string | undefined { + switch (property) { + case 'includes': + case 'index': + case 'startsWith': + case 'endsWith': + case 'slice': + case 'trim': + case 'lower': + case 'upper': + case 'concat': + case 'equals': + case 'toString': + return property; + default: + return undefined; + } +} + +function booleanMemberOperation(property: string): string | undefined { + switch (property) { + case 'not': + case 'and': + case 'or': + case 'equals': + case 'toString': + return property; + default: + return undefined; + } +} + +function boundCoreContractOperation(operationId: string, receiverArgs: readonly CoreFixtureValue[]): KernValue { + return brandValue({ + kind: 'builtin', + name: operationId, + call: (args: KernValue[]) => { + try { + return dispatchCoreContractOperation(operationId, [...receiverArgs, ...args.map(kernValueToCoreFixtureValue)]); + } catch (error) { + if (error instanceof CoreRuntimeContractAdapterError) { + throw new CoreContractEvaluationError('strict-type', coreOperationStrictTypeMessage(operationId)); + } + throw error; + } + }, + }); +} + +function dispatchCoreContractOperation(operationId: string, args: readonly CoreFixtureValue[]): KernValue { + return coreFixtureValueToKernValue(evaluateCoreContractOperation(operationId, args)); +} + +function evalListIndex(object: Extract, index: number): KernValue { + if (!Number.isFinite(index) || !Number.isInteger(index) || index < 0 || index >= object.items.length) { + return kUndefined(); + } + return object.items[index] ?? kUndefined(); +} + +function evalRecordGet(object: Extract, key: string): KernValue { + if (!Object.hasOwn(object.entries, key)) + return dispatchCoreContractOperation('Record.get', [recordShapeFixture(object), key]); + return object.entries[key] ?? kUndefined(); +} + +function recordShapeFixture(object: Extract): Record { + const shape = Object.create(null) as Record; + for (const key of Object.keys(object.entries)) shape[key] = null; + return shape; +} + +function coreOperationStrictTypeMessage(operationId: string): string { + for (const contract of Object.values(CORE_TYPE_CONTRACTS.types)) { + const operation = contract.operations.find((operation) => operation.id === operationId); + if (operation) return `${operationId} expects ${operation.args.join(', ')}.`; + } + return `${operationId} received an unsupported runtime value.`; +} + function evalCall(node: Extract, env: CoreRuntimeEnv): KernValue { const callee = evalValueIR(node.callee, env); if (isNullish(callee)) { @@ -470,9 +665,250 @@ function evalCall(node: Extract, env: CoreRuntimeEnv) const args = node.args.map((arg) => evalValueIR(arg, env)); if (callee.kind === 'builtin') return callee.call(args); if (callee.kind === 'function') return callFunctionValue(callee, args).value; + if (callee.kind === 'class') return constructClassValue(callee, args); + if (callee.kind === 'bound-method') return callBoundMethodValue(callee, args).value; + if (callee.kind === 'super') return callSuperConstructor(callee, args); throw new Error(`KERN core runtime cannot call ${callee.kind}.`); } +function evalNew(node: Extract, env: CoreRuntimeEnv): KernValue { + if (node.argument.kind === 'member') { + return evalValueIR({ ...node.argument, object: { kind: 'new', argument: node.argument.object } as ValueIR }, env); + } + if (node.argument.kind === 'index') { + return evalValueIR({ ...node.argument, object: { kind: 'new', argument: node.argument.object } as ValueIR }, env); + } + if ( + node.argument.kind === 'call' && + (node.argument.callee.kind === 'member' || node.argument.callee.kind === 'index') + ) { + return evalValueIR( + { + ...node.argument, + callee: { + ...node.argument.callee, + object: { kind: 'new', argument: node.argument.callee.object } as ValueIR, + }, + }, + env, + ); + } + if (node.argument.kind !== 'call') throw new Error('KERN core runtime new expects a constructor call.'); + const callee = evalValueIR(node.argument.callee, env); + if (callee.kind !== 'class') throw new Error('KERN core runtime new expects a class value.'); + return constructClassValue( + callee, + node.argument.args.map((arg) => evalValueIR(arg, env)), + ); +} + +function makeClass(node: IRNode, env: CoreRuntimeEnv): KernClassValue { + if (node.type !== 'class') throw new Error('KERN core runtime makeClass expects a class node.'); + return brandValue({ + kind: 'class', + name: requiredString(node.props?.name, 'class name='), + node, + env, + }); +} + +function constructClassValue(klass: KernClassValue, args: readonly KernValue[]): KernInstanceValue { + const instance = brandValue({ + kind: 'instance' as const, + classValue: klass, + fields: createRecordEntries(), + initializedClasses: new Set(), + }); + initializeClassLayer(instance, klass, args, true); + return instance; +} + +function initializeClassLayer( + instance: KernInstanceValue, + klass: KernClassValue, + args: readonly KernValue[], + receivesConstructorArgs: boolean, +): void { + if (instance.initializedClasses.has(klass.name)) { + throw new Error(`KERN core runtime class already initialized: ${klass.name}`); + } + const base = resolveBaseClass(klass); + const ctor = firstRuntimeChild(klass.node, 'constructor'); + const ctorCallsSuper = Boolean(base && ctor && constructorCallsSuper(ctor)); + if (base && !ctorCallsSuper) initializeClassLayer(instance, base, [], false); + if (!ctorCallsSuper) initializeClassFields(instance, klass); + if (!ctor) { + if (receivesConstructorArgs && args.length > 0) { + throw new Error(`KERN core runtime class ${klass.name} has no constructor.`); + } + instance.initializedClasses.add(klass.name); + return; + } + callClassMemberBody(ctor, klass, instance, receivesConstructorArgs ? args : []).value; + if (base && ctorCallsSuper && !instance.initializedClasses.has(base.name)) { + throw new Error(`KERN core runtime constructor ${klass.name} must call super(...).`); + } + instance.initializedClasses.add(klass.name); +} + +function initializeClassFields(instance: KernInstanceValue, klass: KernClassValue): void { + for (const field of runtimeChildNodes(klass.node, 'field')) { + const name = requiredString(field.props?.name, 'field name='); + if (field.props?.static === true || field.props?.static === 'true') continue; + const value = + Object.hasOwn(field.props ?? {}, 'value') || Object.hasOwn(field.props ?? {}, 'default') + ? evalCoreExpression(runtimeFieldInitializerExpr(field), classThisEnv(klass, instance)) + : kUndefined(); + instance.fields[name] = value; + } +} + +function runtimeFieldInitializerExpr(node: IRNode): string { + const propName = Object.hasOwn(node.props ?? {}, 'value') ? 'value' : 'default'; + const rawValue = propName === 'value' ? node.props?.value : node.props?.default; + if (typeof rawValue === 'string' && (node.__quotedProps ?? []).includes(propName)) return JSON.stringify(rawValue); + return unwrapExpr(rawValue, 'field value='); +} + +function evalInstanceMember(object: KernInstanceValue, property: string): KernValue { + if (Object.hasOwn(object.fields, property)) return object.fields[property] ?? kUndefined(); + const getter = findClassMember(object.classValue, 'getter', property); + if (getter) return callClassMemberBody(getter.node, getter.owner, object, []).value; + const method = findClassMember(object.classValue, 'method', property); + if (method) { + return brandValue({ + kind: 'bound-method', + name: `${object.classValue.name}.${property}`, + receiver: object, + methodNode: method.node, + ownerClass: method.owner, + }); + } + return kUndefined(); +} + +function evalSuperMember(object: KernSuperValue, property: string): KernValue { + const base = resolveBaseClass(object.ownerClass); + if (!base) return kUndefined(); + const getter = findClassMember(base, 'getter', property); + if (getter) return callClassMemberBody(getter.node, getter.owner, object.receiver, []).value; + const method = findClassMember(base, 'method', property); + if (method) { + return brandValue({ + kind: 'bound-method', + name: `${base.name}.${property}`, + receiver: object.receiver, + methodNode: method.node, + ownerClass: method.owner, + }); + } + if (Object.hasOwn(object.receiver.fields, property)) return object.receiver.fields[property] ?? kUndefined(); + return kUndefined(); +} + +function evalClassMember(object: KernClassValue, property: string): KernValue { + const method = findClassMember(object, 'method', property, true); + if (method) { + return brandValue({ + kind: 'builtin', + name: `${object.name}.${property}`, + call: (args) => callClassMemberBody(method.node, method.owner, undefined, args).value, + }); + } + return kUndefined(); +} + +function callBoundMethodValue( + method: KernBoundMethodValue, + args: readonly KernValue[], +): { value: KernValue; env: CoreRuntimeEnv } { + return callClassMemberBody(method.methodNode, method.ownerClass, method.receiver, args); +} + +function callSuperConstructor(value: KernSuperValue, args: readonly KernValue[]): KernValue { + if (value.mode !== 'constructor') { + throw new Error('KERN core runtime super(...) is only valid inside a constructor.'); + } + const base = resolveBaseClass(value.ownerClass); + if (!base) throw new Error(`KERN core runtime class ${value.ownerClass.name} has no base class.`); + initializeClassLayer(value.receiver, base, args, true); + initializeClassFields(value.receiver, value.ownerClass); + return value.receiver; +} + +function callClassMemberBody( + memberNode: IRNode, + ownerClass: KernClassValue, + receiver: KernInstanceValue | undefined, + args: readonly KernValue[], +): { value: KernValue; env: CoreRuntimeEnv } { + const callEnv = ownerClass.env.child(); + if (receiver) { + callEnv.define('this', receiver); + if (resolveBaseClass(ownerClass)) { + callEnv.define( + 'super', + brandValue({ + kind: 'super', + receiver, + ownerClass, + mode: memberNode.type === 'constructor' ? 'constructor' : 'method', + }), + ); + } + } + const params = runtimeParams(memberNode); + validateRuntimeArgs(`${ownerClass.name}.${memberNode.type}`, params, args); + params.forEach((param, index) => { + const provided = args[index]; + const value = + provided === undefined || (provided.kind === 'undefined' && param.defaultExpr) + ? param.defaultExpr + ? evalCoreExpression(param.defaultExpr, callEnv) + : kUndefined() + : provided; + callEnv.define(param.name, value); + }); + const completion = executeSequence(runtimeFunctionBody(memberNode), callEnv); + return { value: completion.value, env: callEnv }; +} + +function findClassMember( + klass: KernClassValue, + type: 'method' | 'getter', + name: string, + staticOnly = false, +): { node: IRNode; owner: KernClassValue } | undefined { + for (const child of klass.node.children ?? []) { + if (child.type !== type || child.props?.name !== name) continue; + const isStatic = child.props?.static === true || child.props?.static === 'true'; + if (staticOnly !== isStatic) continue; + return { node: child, owner: klass }; + } + const base = resolveBaseClass(klass); + return base ? findClassMember(base, type, name, staticOnly) : undefined; +} + +function resolveBaseClass(klass: KernClassValue): KernClassValue | undefined { + const baseName = classBaseName(klass.node.props?.extends); + if (!baseName) return undefined; + const base = klass.env.lookup(baseName); + if (base.kind !== 'class') throw new Error(`KERN core runtime base class is not a class: ${baseName}`); + return base; +} + +function classBaseName(value: unknown): string | undefined { + if (typeof value !== 'string' || !value.trim()) return undefined; + const match = /^([A-Za-z_$][\w$]*)/.exec(value.trim()); + return match?.[1]; +} + +function classThisEnv(klass: KernClassValue, receiver: KernInstanceValue): CoreRuntimeEnv { + const env = klass.env.child(); + env.define('this', receiver); + return env; +} + function makeFunction(node: IRNode, env: CoreRuntimeEnv): KernFunctionValue { return brandValue({ kind: 'function', @@ -488,6 +924,7 @@ function callFunctionValue( args: readonly KernValue[], ): { value: KernValue; env: CoreRuntimeEnv } { const callEnv = fn.env.child(); + validateRuntimeArgs(fn.name ?? 'anonymous function', fn.params, args); fn.params.forEach((param, index) => { const provided = args[index]; const value = @@ -502,14 +939,144 @@ function callFunctionValue( return { value: completion.value, env: callEnv }; } +function validateRuntimeArgs(label: string, params: readonly RuntimeParam[], args: readonly KernValue[]): void { + if (args.length > params.length) { + throw new Error(`KERN core runtime ${label} received too many arguments.`); + } + params.forEach((param, index) => { + if (index >= args.length && !param.defaultExpr) { + throw new Error(`KERN core runtime ${label} missing required argument: ${param.name}.`); + } + }); +} + +function executeAssign(node: IRNode, env: CoreRuntimeEnv): void { + const target = requiredString(node.props?.target, 'assign target='); + if (Object.hasOwn(node.props ?? {}, 'op') && node.props?.op !== '=') { + throw new Error('KERN core runtime assign supports only direct assignment in this slice.'); + } + const value = evalCoreExpression(unwrapExpr(node.props?.value, 'assign value='), env); + assignRuntimeTarget(target, value, env); +} + +function assignRuntimeTarget(target: string, value: KernValue, env: CoreRuntimeEnv): void { + const parsed = parseExpression(target); + if (parsed.kind === 'ident') { + env.assign(parsed.name, value); + return; + } + if (parsed.kind === 'member') { + const object = evalValueIR(parsed.object, env); + if (object.kind === 'instance') { + object.fields[parsed.property] = value; + return; + } + if (object.kind === 'record') { + object.entries[parsed.property] = value; + return; + } + throw new Error(`KERN core runtime cannot assign member on ${object.kind}.`); + } + if (parsed.kind === 'index') { + const object = evalValueIR(parsed.object, env); + const index = evalValueIR(parsed.index, env); + if (object.kind === 'array') { + if (index.kind !== 'number' || !Number.isInteger(index.value) || index.value < 0) { + throw new Error('KERN core runtime array assignment index must be a non-negative integer.'); + } + object.items[index.value] = value; + return; + } + if (object.kind === 'record') { + if (index.kind !== 'string') throw new Error('KERN core runtime record assignment key must be a string.'); + object.entries[index.value] = value; + return; + } + throw new Error(`KERN core runtime cannot assign index on ${object.kind}.`); + } + throw new Error('KERN core runtime assign target must be an identifier, member, or index expression.'); +} + function runtimeFunctionBody(node: IRNode): IRNode[] { const handler = node.children?.find((child) => child.type === 'handler'); const body = handler ? (handler.children ?? []) : (node.children ?? []); return body.filter((child) => child.type !== 'param' && child.type !== 'decorator'); } +function firstRuntimeChild(node: IRNode, type: string): IRNode | undefined { + return node.children?.find((child) => child.type === type); +} + +function runtimeChildNodes(node: IRNode, type: string): IRNode[] { + return node.children?.filter((child) => child.type === type) ?? []; +} + +function constructorCallsSuper(node: IRNode): boolean { + return runtimeFunctionBody(node).some(statementCallsSuper); +} + +function statementCallsSuper(node: IRNode): boolean { + const rawValue = node.type === 'do' ? node.props?.value : undefined; + if (rawValue !== undefined && expressionCallsSuper(rawValue)) return true; + return (node.children ?? []).some(statementCallsSuper); +} + +function expressionCallsSuper(value: unknown): boolean { + try { + return valueIRCallsSuper(parseExpression(unwrapExpr(value, 'super expression'))); + } catch { + return false; + } +} + +function valueIRCallsSuper(value: ValueIR): boolean { + switch (value.kind) { + case 'call': + return ( + (value.callee.kind === 'ident' && value.callee.name === 'super') || + valueIRCallsSuper(value.callee) || + value.args.some(valueIRCallsSuper) + ); + case 'member': + return valueIRCallsSuper(value.object); + case 'index': + return valueIRCallsSuper(value.object) || valueIRCallsSuper(value.index); + case 'tmplLit': + return value.expressions.some(valueIRCallsSuper); + case 'arrayLit': + return value.items.some(valueIRCallsSuper); + case 'objectLit': + return value.entries.some((entry) => + 'kind' in entry ? valueIRCallsSuper(entry.argument) : valueIRCallsSuper(entry.value), + ); + case 'unary': + case 'await': + case 'new': + case 'spread': + case 'propagate': + return valueIRCallsSuper(value.argument); + case 'typeAssert': + case 'nonNull': + return valueIRCallsSuper(value.expression); + case 'binary': + return valueIRCallsSuper(value.left) || valueIRCallsSuper(value.right); + case 'conditional': + return valueIRCallsSuper(value.test) || valueIRCallsSuper(value.consequent) || valueIRCallsSuper(value.alternate); + case 'lambda': + return valueIRCallsSuper(value.body); + case 'numLit': + case 'strLit': + case 'boolLit': + case 'nullLit': + case 'undefLit': + case 'regexLit': + case 'ident': + return false; + } +} + function runtimeChildren(node: IRNode): IRNode[] { - if (node.type === 'handler' || node.type === '__block') return node.children ?? []; + if (node.type === 'document' || node.type === 'handler' || node.type === '__block') return node.children ?? []; return [node]; } @@ -603,6 +1170,10 @@ function kernEquals(left: KernValue, right: KernValue): boolean { } case 'function': case 'builtin': + case 'class': + case 'instance': + case 'bound-method': + case 'super': return left === right; } } @@ -656,16 +1227,46 @@ function isKernValue(value: unknown): value is KernValue { typeof value.name === 'string' && typeof value.call === 'function' ); + case 'class': + return ( + hasOnlyKeys(value, ['kind', 'name', 'node', 'env']) && + typeof value.name === 'string' && + isPlainRecord(value.node) && + value.env instanceof CoreRuntimeEnv + ); + case 'instance': + return ( + hasOnlyKeys(value, ['kind', 'classValue', 'fields', 'initializedClasses']) && + isKernValue(value.classValue) && + value.classValue.kind === 'class' && + isPlainRecord(value.fields) && + Object.values(value.fields).every(isKernValue) && + value.initializedClasses instanceof Set + ); + case 'bound-method': + return ( + hasOnlyKeys(value, ['kind', 'name', 'receiver', 'methodNode', 'ownerClass']) && + typeof value.name === 'string' && + isKernValue(value.receiver) && + value.receiver.kind === 'instance' && + isPlainRecord(value.methodNode) && + isKernValue(value.ownerClass) && + value.ownerClass.kind === 'class' + ); + case 'super': + return ( + hasOnlyKeys(value, ['kind', 'receiver', 'ownerClass', 'mode']) && + isKernValue(value.receiver) && + value.receiver.kind === 'instance' && + isKernValue(value.ownerClass) && + value.ownerClass.kind === 'class' && + (value.mode === 'constructor' || value.mode === 'method') + ); default: return false; } } -function brandValue(value: T): T { - Object.defineProperty(value, KERN_VALUE_BRAND, { value: true }); - return value; -} - function hasArrayHoles(value: readonly unknown[]): boolean { for (let i = 0; i < value.length; i += 1) { if (!Object.hasOwn(value, i)) return true; diff --git a/packages/core/src/core-runtime/value-brand.ts b/packages/core/src/core-runtime/value-brand.ts new file mode 100644 index 00000000..cc0f1a76 --- /dev/null +++ b/packages/core/src/core-runtime/value-brand.ts @@ -0,0 +1,6 @@ +export const KERN_VALUE_BRAND: unique symbol = Symbol('KERN core runtime value'); + +export function brandValue(value: T): T { + Object.defineProperty(value, KERN_VALUE_BRAND, { value: true }); + return value; +} diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 6cf5b14c..6ff3e81d 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -140,6 +140,46 @@ export { VALID_STRUCTURES, VALID_TARGETS, } from './config.js'; +export type { + CoreFixture, + CoreFixtureError, + CoreFixtureValue, + CoreGraphEdge, + CoreLowerings, + CoreOperation, + CoreOperationKind, + CoreOperationReturns, + CoreTypeContract, + CoreTypeContractRegistry, + CoreTypeKind, + CoreTypeName, +} from './core-contracts/index.js'; +export { + BOOLEAN_CONTRACT, + CORE_FIXTURE_FUNCTION, + CORE_FIXTURE_UNDEFINED, + CORE_TYPE_CONTRACTS, + CORE_TYPE_NAMES, + CoreContractEvaluationError, + contractToGraphEdges, + coreFixtureValueType, + evaluateCoreContractOperation, + FUNCTION_CONTRACT, + isCoreFixtureFunction, + isCoreFixtureUndefined, + LIST_CONTRACT, + NULL_CONTRACT, + NUMBER_CONTRACT, + RECORD_CONTRACT, + STRING_CONTRACT, + UNDEFINED_CONTRACT, +} from './core-contracts/index.js'; +export { + CoreRuntimeContractAdapterError, + coreFixtureValueToKernValue, + kernValueToCoreFixtureValue, + roundTripKernContractDataValue, +} from './core-runtime/contract-adapter.js'; export type { CoreCompletion, CoreRuntimeResult, diff --git a/packages/core/tests/core-contracts.test.ts b/packages/core/tests/core-contracts.test.ts new file mode 100644 index 00000000..c3455cbd --- /dev/null +++ b/packages/core/tests/core-contracts.test.ts @@ -0,0 +1,341 @@ +import type { CoreFixtureValue, CoreOperation } from '../src/core-contracts/index.js'; +import { + BOOLEAN_CONTRACT, + CORE_FIXTURE_FUNCTION, + CORE_FIXTURE_UNDEFINED, + CORE_TYPE_CONTRACTS, + CORE_TYPE_NAMES, + CoreContractEvaluationError, + contractToGraphEdges, + coreFixtureValueType, + evaluateCoreContractOperation, + LIST_CONTRACT, + NUMBER_CONTRACT, + RECORD_CONTRACT, + STRING_CONTRACT, +} from '../src/core-contracts/index.js'; + +describe('core type contracts registry', () => { + it('exposes schemaVersion 1 and all builtin core types', () => { + expect(CORE_TYPE_CONTRACTS.schemaVersion).toBe(1); + expect(Object.keys(CORE_TYPE_CONTRACTS.types)).toEqual([...CORE_TYPE_NAMES]); + + for (const name of CORE_TYPE_NAMES) { + const contract = CORE_TYPE_CONTRACTS.types[name]; + expect(contract.name).toBe(name); + expect(contract.strict).toBe(true); + expect(Array.isArray(contract.operations)).toBe(true); + } + }); + + it('exposes behavior contracts for VM-backed core values', () => { + expect(BOOLEAN_CONTRACT.operations.map((operation) => operation.id)).toEqual([ + 'Boolean.not', + 'Boolean.and', + 'Boolean.or', + 'Boolean.equals', + 'Boolean.toString', + ]); + expect(STRING_CONTRACT.operations.map((operation) => operation.id)).toEqual([ + 'String.length', + 'String.index', + 'String.includes', + 'String.startsWith', + 'String.endsWith', + 'String.slice', + 'String.trim', + 'String.lower', + 'String.upper', + 'String.concat', + 'String.equals', + 'String.lessThan', + 'String.lessThanOrEqual', + 'String.greaterThan', + 'String.greaterThanOrEqual', + 'String.toString', + ]); + expect(NUMBER_CONTRACT.operations.map((operation) => operation.id)).toEqual([ + 'Number.negate', + 'Number.add', + 'Number.subtract', + 'Number.multiply', + 'Number.divide', + 'Number.remainder', + 'Number.lessThan', + 'Number.lessThanOrEqual', + 'Number.greaterThan', + 'Number.greaterThanOrEqual', + ]); + expect(LIST_CONTRACT.operations.map((operation) => operation.id)).toEqual(['List.length', 'List.index']); + expect(RECORD_CONTRACT.operations.map((operation) => operation.id)).toEqual(['Record.get']); + + expect(CORE_TYPE_CONTRACTS.types.Function.operations).toHaveLength(0); + expect(CORE_TYPE_CONTRACTS.types.Null.operations).toHaveLength(0); + expect(CORE_TYPE_CONTRACTS.types.Undefined.operations).toHaveLength(0); + }); + + it('uses registry-level schemaVersion instead of operation version suffixes', () => { + for (const contract of Object.values(CORE_TYPE_CONTRACTS.types)) { + for (const operation of contract.operations) { + expect(operation.id).not.toMatch(/@v\d+/); + } + } + }); + + it('keeps every operation graphable and fixture-backed', () => { + for (const contract of [BOOLEAN_CONTRACT, STRING_CONTRACT, NUMBER_CONTRACT, LIST_CONTRACT, RECORD_CONTRACT]) { + for (const operation of contract.operations) { + expect(operation.args[0]).toBe(contract.name); + expect(operation.fixtures.length).toBeGreaterThan(0); + expect(operation.review.summary.length).toBeGreaterThan(0); + expect(operation.review.graph).toContain(contract.name); + expect(operation.lowers?.kern).toBeTruthy(); + expect(operation.lowers?.ts).toBeTruthy(); + expect(operation.lowers?.python).toBeTruthy(); + } + } + }); +}); + +describe('core type contract fixtures', () => { + it('evaluates Boolean operation fixtures including strict type errors', () => { + for (const operation of BOOLEAN_CONTRACT.operations) { + expectOperationFixtures(operation); + } + }); + + it('evaluates String operation fixtures including strict type errors', () => { + for (const operation of STRING_CONTRACT.operations) { + expectOperationFixtures(operation); + } + }); + + it('evaluates Number, List, and Record operation fixtures including strict errors', () => { + for (const operation of [ + ...NUMBER_CONTRACT.operations, + ...LIST_CONTRACT.operations, + ...RECORD_CONTRACT.operations, + ]) { + expectOperationFixtures(operation); + } + }); + + it('pins explicitly rejected strict signatures', () => { + expectErrorFixture('Boolean.and', [true, 'true']); + expectErrorFixture('String.concat', ['count:', 2]); + expectErrorFixture('String.equals', ['kern', true]); + expectErrorFixture('Number.add', [2, '3']); + expectErrorFixture('List.index', [[10], '0']); + expectErrorFixture('Record.get', [{ x: 1 }, 0]); + }); + + it('classifies all schema-level fixture value kinds for future contracts', () => { + expect(coreFixtureValueType(null)).toBe('Null'); + expect(coreFixtureValueType(CORE_FIXTURE_UNDEFINED)).toBe('Undefined'); + expect(coreFixtureValueType(CORE_FIXTURE_FUNCTION)).toBe('Function'); + expect(coreFixtureValueType(JSON.parse(JSON.stringify(CORE_FIXTURE_UNDEFINED)))).toBe('Undefined'); + expect(coreFixtureValueType(JSON.parse(JSON.stringify(CORE_FIXTURE_FUNCTION)))).toBe('Function'); + expect(coreFixtureValueType(['x'])).toBe('List'); + expect(coreFixtureValueType({ key: 'value' })).toBe('Record'); + expect(coreFixtureValueType({ kind: 'Undefined' })).toBe('Record'); + expect(coreFixtureValueType({ error: 'strict-type', message: 'valid record value' })).toBe('Record'); + }); + + it('uses unambiguous fixture result keys instead of overloading record-shaped values', () => { + for (const operation of [...BOOLEAN_CONTRACT.operations, ...STRING_CONTRACT.operations]) { + for (const fixture of operation.fixtures) { + expect(Array.isArray(fixture)).toBe(false); + expect('args' in fixture).toBe(true); + expect('returns' in fixture !== 'throws' in fixture).toBe(true); + } + } + }); + + it('pins KERN-owned Unicode code-point string semantics', () => { + expect(evaluateCoreContractOperation('String.length', ['𐐷'])).toBe(1); + expect(evaluateCoreContractOperation('String.length', ['e\u0301'])).toBe(2); + expect(evaluateCoreContractOperation('String.index', ['a𐐷b', 1])).toBe('𐐷'); + expect(evaluateCoreContractOperation('String.index', ['a𐐷b', 3])).toEqual(CORE_FIXTURE_UNDEFINED); + expect(evaluateCoreContractOperation('String.slice', ['a𐐷b', 1, 2])).toBe('𐐷'); + expect(evaluateCoreContractOperation('String.slice', ['e\u0301x', 0, 2])).toBe('e\u0301'); + expect(evaluateCoreContractOperation('String.lessThan', ['a', '𐐷'])).toBe(true); + expect(evaluateCoreContractOperation('String.greaterThan', ['𐐷', 'z'])).toBe(true); + }); + + it('rejects non-finite Number values without storing them in exported fixture data', () => { + expect(() => evaluateCoreContractOperation('String.slice', ['abc', Number.POSITIVE_INFINITY, 2])).toThrow( + 'String.slice expects String, Number, Number.', + ); + expect(() => evaluateCoreContractOperation('Number.add', [Number.NaN, 1])).toThrow( + 'Number.add expects Number, Number.', + ); + expect(() => evaluateCoreContractOperation('Number.add', [1e308, 1e308])).toThrow( + 'Number.add result must be finite.', + ); + }); + + it('pins KERN Number and collection semantics', () => { + expect(evaluateCoreContractOperation('Number.divide', [5, 2])).toBe(2.5); + expect(() => evaluateCoreContractOperation('Number.divide', [1, 0])).toThrow('Number.divide division by zero.'); + expect(() => evaluateCoreContractOperation('Number.remainder', [1, 0])).toThrow( + 'Number.remainder division by zero.', + ); + expect(evaluateCoreContractOperation('Number.remainder', [-5, 2])).toBe(-1); + expect(evaluateCoreContractOperation('Number.remainder', [5, -2])).toBe(1); + expect(evaluateCoreContractOperation('List.length', [[1, 2, 3]])).toBe(3); + expect(evaluateCoreContractOperation('List.index', [[null], 0])).toBeNull(); + expect(evaluateCoreContractOperation('List.index', [[10, 20], 2])).toEqual(CORE_FIXTURE_UNDEFINED); + expect(evaluateCoreContractOperation('List.index', [[10, 20], -1])).toEqual(CORE_FIXTURE_UNDEFINED); + expect(evaluateCoreContractOperation('Record.get', [{ x: 1 }, 'x'])).toBe(1); + expect(evaluateCoreContractOperation('Record.get', [{ x: null }, 'x'])).toBeNull(); + expect(evaluateCoreContractOperation('Record.get', [{}, 'toString'])).toEqual(CORE_FIXTURE_UNDEFINED); + }); +}); + +describe('core type contract graph extraction', () => { + it('derives type, lowering, fixture, and tag edges for String.includes', () => { + const edges = contractToGraphEdges(STRING_CONTRACT); + + expect( + hasEdge(edges, { + from: 'String', + relation: 'includes(String)', + to: 'Boolean', + operation: 'String.includes', + }), + ).toBe(true); + expect( + hasEdge(edges, { + from: 'String.includes', + relation: 'lowers.ts', + to: '__kernStringIncludes($0, $1)', + operation: 'String.includes', + }), + ).toBe(true); + expect( + hasEdge(edges, { + from: 'String.includes', + relation: 'lowers.python', + to: '__kern_string_includes($0, $1)', + operation: 'String.includes', + }), + ).toBe(true); + expect( + hasEdge(edges, { + from: 'String.includes', + relation: 'fixture', + to: 'String.includes.fixture.0', + operation: 'String.includes', + index: 0, + }), + ).toBe(true); + }); + + it('derives a Boolean.not operation edge', () => { + expect( + hasEdge(contractToGraphEdges(BOOLEAN_CONTRACT), { + from: 'Boolean', + relation: 'not()', + to: 'Boolean', + operation: 'Boolean.not', + }), + ).toBe(true); + }); + + it('rejects operation ids that do not match the owning contract name', () => { + expect(() => + contractToGraphEdges({ + ...STRING_CONTRACT, + operations: [{ ...STRING_CONTRACT.operations[0], id: 'Boolean.length' }], + }), + ).toThrow('must be prefixed with String'); + }); +}); + +function expectOperationFixtures(operation: CoreOperation): void { + for (const fixture of operation.fixtures) { + if ('throws' in fixture) { + expect(() => evaluateCoreContractOperation(operation.id, fixture.args)).toThrow(fixture.throws.message); + try { + evaluateCoreContractOperation(operation.id, fixture.args); + } catch (error) { + expect(error).toBeInstanceOf(CoreContractEvaluationError); + expect((error as CoreContractEvaluationError).code).toBe(fixture.throws.code); + } + } else { + expect(evaluateCoreContractOperation(operation.id, fixture.args)).toEqual(fixture.returns); + } + } +} + +function expectErrorFixture(operationId: string, expectedArgs: readonly CoreFixtureValue[]): void { + const operation = [ + ...BOOLEAN_CONTRACT.operations, + ...STRING_CONTRACT.operations, + ...NUMBER_CONTRACT.operations, + ...LIST_CONTRACT.operations, + ...RECORD_CONTRACT.operations, + ].find((operation) => operation.id === operationId); + if (!operation) throw new Error(`Missing operation ${operationId}`); + expect( + operation.fixtures.some( + (fixture) => + sameFixtureValueList(fixture.args, expectedArgs) && + 'throws' in fixture && + fixture.throws.code === 'strict-type', + ), + ).toBe(true); +} + +function sameFixtureValue(left: CoreFixtureValue, right: CoreFixtureValue): boolean { + const leftKind = coreFixtureValueType(left); + if (leftKind !== coreFixtureValueType(right)) return false; + if (leftKind === 'Null' || leftKind === 'Undefined') return true; + if (leftKind === 'List') { + const leftArray = left as readonly CoreFixtureValue[]; + const rightArray = right as readonly CoreFixtureValue[]; + return ( + leftArray.length === rightArray.length && + leftArray.every((item, index) => sameFixtureValue(item, rightArray[index])) + ); + } + if (leftKind === 'Record') { + const leftRecord = left as { readonly [key: string]: CoreFixtureValue }; + const rightRecord = right as { readonly [key: string]: CoreFixtureValue }; + const leftKeys = Object.keys(leftRecord).sort(); + const rightKeys = Object.keys(rightRecord).sort(); + return ( + sameStringList(leftKeys, rightKeys) && + leftKeys.every((key) => sameFixtureValue(leftRecord[key], rightRecord[key])) + ); + } + return left === right; +} + +function sameFixtureValueList(left: readonly CoreFixtureValue[], right: readonly CoreFixtureValue[]): boolean { + return left.length === right.length && left.every((item, index) => sameFixtureValue(item, right[index])); +} + +function sameStringList(left: readonly string[], right: readonly string[]): boolean { + return left.length === right.length && left.every((item, index) => item === right[index]); +} + +function hasEdge( + edges: ReturnType, + expected: { + readonly from: string; + readonly relation: string; + readonly to: string; + readonly operation?: string; + readonly index?: number; + }, +): boolean { + return edges.some( + (edge) => + edge.from === expected.from && + edge.relation === expected.relation && + edge.to === expected.to && + edge.operation === expected.operation && + edge.index === expected.index, + ); +} diff --git a/packages/core/tests/core-runtime.test.ts b/packages/core/tests/core-runtime.test.ts index 6ded098a..92bf9739 100644 --- a/packages/core/tests/core-runtime.test.ts +++ b/packages/core/tests/core-runtime.test.ts @@ -1,18 +1,25 @@ import { + CORE_FIXTURE_FUNCTION, + CORE_FIXTURE_UNDEFINED, + CoreRuntimeContractAdapterError, CoreRuntimeEnv, callCoreFunction, + coreFixtureValueToKernValue, createCoreRuntimeEnv, evalCoreExpression, fromHostValue, kBoolean, kernTruthy, + kernValueToCoreFixtureValue, kNull, kNumber, kString, kUndefined, + roundTripKernContractDataValue, runCoreRuntime, toHostValue, } from '../src/index.js'; +import { parse } from '../src/parser.js'; import type { IRNode } from '../src/types.js'; function handler(children: IRNode[]): IRNode { @@ -107,6 +114,92 @@ describe('KERN core runtime values and expressions', () => { expect(toHostValue(evalCoreExpression('label["1.0"]', env))).toBeUndefined(); }); + test('string length and index use KERN code-point semantics in the VM', () => { + const env = createCoreRuntimeEnv({ globals: { label: 'a𐐷b', combo: 'e\u0301x' } }); + expect(toHostValue(evalCoreExpression('label.length', env))).toBe(3); + expect(toHostValue(evalCoreExpression('label[1]', env))).toBe('𐐷'); + expect(toHostValue(evalCoreExpression('combo.length', env))).toBe(3); + expect(toHostValue(evalCoreExpression('combo[1]', env))).toBe('\u0301'); + }); + + test('string methods dispatch through KERN core contracts in the VM', () => { + const env = createCoreRuntimeEnv({ globals: { label: 'a𐐷b', word: ' KERN ' } }); + expect(toHostValue(evalCoreExpression('label.slice(1, 2)', env))).toBe('𐐷'); + expect(toHostValue(evalCoreExpression('label.index(1)', env))).toBe('𐐷'); + expect(toHostValue(evalCoreExpression('label.index(3)', env))).toBeUndefined(); + expect(() => evalCoreExpression('label.slice(1)', env)).toThrow('String.slice expects String, Number, Number.'); + expect(toHostValue(evalCoreExpression('label.includes("𐐷")', env))).toBe(true); + expect(toHostValue(evalCoreExpression('label.startsWith("a")', env))).toBe(true); + expect(toHostValue(evalCoreExpression('label.endsWith("b")', env))).toBe(true); + expect(toHostValue(evalCoreExpression('word.trim().lower()', env))).toBe('kern'); + expect(toHostValue(evalCoreExpression('word.trim().upper()', env))).toBe('KERN'); + expect(toHostValue(evalCoreExpression('label.concat("!")', env))).toBe('a𐐷b!'); + expect(toHostValue(evalCoreExpression('label.equals("a𐐷b")', env))).toBe(true); + }); + + test('string and boolean contract methods reject cross-type operands in the VM', () => { + const env = createCoreRuntimeEnv({ globals: { label: 'count:', flag: true } }); + expect(() => evalCoreExpression('label.concat(2)', env)).toThrow('String.concat expects String, String.'); + expect(() => evalCoreExpression('label.concat(String)', env)).toThrow('String.concat expects String, String.'); + expect(() => evalCoreExpression('label.equals(true)', env)).toThrow('String.equals expects String, String.'); + expect(() => evalCoreExpression('flag.and("true")', env)).toThrow('Boolean.and expects Boolean, Boolean.'); + expect(() => evalCoreExpression('flag.equals(1)', env)).toThrow('Boolean.equals expects Boolean, Boolean.'); + expect(toHostValue(evalCoreExpression('flag.not()', env))).toBe(false); + expect(toHostValue(evalCoreExpression('flag.and(false)', env))).toBe(false); + expect(toHostValue(evalCoreExpression('flag.or(false)', env))).toBe(true); + expect(toHostValue(evalCoreExpression('flag.toString()', env))).toBe('true'); + }); + + test('number operators dispatch through KERN core contracts in the VM', () => { + const env = createCoreRuntimeEnv(); + expect(toHostValue(evalCoreExpression('2 + 3', env))).toBe(5); + expect(toHostValue(evalCoreExpression('5 - 3', env))).toBe(2); + expect(toHostValue(evalCoreExpression('3 * 4', env))).toBe(12); + expect(toHostValue(evalCoreExpression('5 / 2', env))).toBe(2.5); + expect(toHostValue(evalCoreExpression('-3', env))).toBe(-3); + expect(toHostValue(evalCoreExpression('-5 % 2', env))).toBe(-1); + expect(toHostValue(evalCoreExpression('5 % -2', env))).toBe(1); + expect(toHostValue(evalCoreExpression('2 < 3', env))).toBe(true); + expect(toHostValue(evalCoreExpression('3 <= 2', env))).toBe(false); + expect(toHostValue(evalCoreExpression('3 > 2', env))).toBe(true); + expect(toHostValue(evalCoreExpression('2 >= 3', env))).toBe(false); + expect(() => evalCoreExpression('1 / 0', env)).toThrow('Number.divide division by zero.'); + expect(() => evalCoreExpression('1 % 0', env)).toThrow('Number.remainder division by zero.'); + }); + + test('string ordered comparisons dispatch through KERN core contracts in the VM', () => { + const env = createCoreRuntimeEnv(); + expect(toHostValue(evalCoreExpression('"abc" < "abd"', env))).toBe(true); + expect(toHostValue(evalCoreExpression('"abc" <= "abc"', env))).toBe(true); + expect(toHostValue(evalCoreExpression('"abd" > "abc"', env))).toBe(true); + expect(toHostValue(evalCoreExpression('"abc" >= "abd"', env))).toBe(false); + expect(toHostValue(evalCoreExpression('"𐐷" > "z"', env))).toBe(true); + }); + + test('unary boolean not dispatches through KERN core contracts in the VM', () => { + const env = createCoreRuntimeEnv(); + expect(toHostValue(evalCoreExpression('!true', env))).toBe(false); + expect(toHostValue(evalCoreExpression('!false', env))).toBe(true); + expect(() => evalCoreExpression('!5', env)).toThrow('KERN core runtime unary ! requires a boolean.'); + }); + + test('list and record reads dispatch through KERN core contracts in the VM', () => { + const env = createCoreRuntimeEnv({ + globals: { xs: [10, undefined, 30], user: { name: 'Ada' }, sentinel: { __kernFixture: 'Undefined' } }, + }); + expect(toHostValue(evalCoreExpression('xs.length', env))).toBe(3); + expect(toHostValue(evalCoreExpression('xs[0]', env))).toBe(10); + expect(toHostValue(evalCoreExpression('xs[1]', env))).toBeUndefined(); + expect(toHostValue(evalCoreExpression('xs[-1]', env))).toBeUndefined(); + expect(toHostValue(evalCoreExpression('xs[1.5]', env))).toBeUndefined(); + expect(toHostValue(evalCoreExpression('user.name', env))).toBe('Ada'); + expect(toHostValue(evalCoreExpression('user["missing"]', env))).toBeUndefined(); + expect(toHostValue(evalCoreExpression('user.toString', env))).toBeUndefined(); + expect(toHostValue(evalCoreExpression('sentinel.__kernFixture', env))).toBe('Undefined'); + expect(toHostValue(evalCoreExpression('[String].length', env))).toBe(1); + expect(toHostValue(evalCoreExpression('[String][0]', env))).toBe('[KERN builtin String]'); + }); + test('optional index skips unresolved index expressions for nullish objects', () => { const env = createCoreRuntimeEnv({ globals: { maybe: null } }); expect(toHostValue(evalCoreExpression('maybe?.[missingName]', env))).toBeUndefined(); @@ -124,6 +217,61 @@ describe('KERN core runtime values and expressions', () => { }); }); +describe('KERN core runtime contract adapter', () => { + test('round-trips supported KERN values through core contract fixture values', () => { + const value = fromHostValue({ + text: 'a𐐷b', + flag: true, + count: 3, + none: null, + missing: undefined, + list: [false, 'x'], + sentinelLikeRecord: { kind: 'Undefined' }, + }); + + const roundTripped = toHostValue(roundTripKernContractDataValue(value)) as Record; + const { missing: roundTrippedMissing, ...roundTrippedWithoutMissing } = roundTripped; + expect(roundTrippedWithoutMissing).toEqual({ + text: 'a𐐷b', + flag: true, + count: 3, + none: null, + list: [false, 'x'], + sentinelLikeRecord: { kind: 'Undefined' }, + }); + expect(Object.hasOwn(roundTripped, 'missing')).toBe(true); + expect(roundTrippedMissing).toBeUndefined(); + }); + + test('keeps Undefined fixture encoding stable across JSON round trips', () => { + const encoded = kernValueToCoreFixtureValue(kUndefined()); + expect(encoded).toEqual(CORE_FIXTURE_UNDEFINED); + expect(toHostValue(coreFixtureValueToKernValue(JSON.parse(JSON.stringify(encoded))))).toBeUndefined(); + }); + + test('rejects runtime records that use the reserved Undefined fixture sentinel shape', () => { + expect(() => kernValueToCoreFixtureValue(fromHostValue({ __kernFixture: 'Undefined' }))).toThrow( + 'reserved core fixture sentinel shape', + ); + }); + + test('rejects runtime instances that use reserved fixture sentinel field shape', () => { + const root = parse(['class name=Trap', ' field name=__kernFixture type=string value="Function"'].join('\n')); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => kernValueToCoreFixtureValue(evalCoreExpression('new Trap()', env))).toThrow( + 'reserved core fixture sentinel shape', + ); + }); + + test('represents runtime-only callable values as opaque Function fixture references', () => { + const env = createCoreRuntimeEnv(); + expect(kernValueToCoreFixtureValue(env.lookup('String'))).toEqual(CORE_FIXTURE_FUNCTION); + expect(() => coreFixtureValueToKernValue(CORE_FIXTURE_FUNCTION)).toThrow(CoreRuntimeContractAdapterError); + }); +}); + describe('KERN core runtime statements', () => { test('runs let, expression-v1, and return', () => { const result = runCoreRuntime( @@ -177,6 +325,164 @@ describe('KERN core runtime statements', () => { ); expect(toHostValue(result.completion.value)).toEqual({ a: 'ok', b: 'ok' }); }); + + test('executes user-defined classes with fields constructors methods and getters', () => { + const root = parse( + [ + 'class name=Counter', + ' field name=count type=number value={{ 0 }}', + ' constructor', + ' param name=initial type=number value={{ 0 }}', + ' handler', + ' assign target="this.count" value="initial"', + ' method name=inc returns=number', + ' param name=step type=number value={{ 1 }}', + ' handler', + ' assign target="this.count" value="this.count + step"', + ' return value="this.count"', + ' getter name=label returns=string', + ' handler', + ' return value="`count=${this.count}`"', + 'fn name=make returns=number', + ' handler', + ' let name=c value="new Counter(4)"', + ' do value="c.inc(2)"', + ' return value="c.count"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new Counter(3).count', env))).toBe(3); + expect(toHostValue(evalCoreExpression('new Counter(3).inc()', env))).toBe(4); + expect(toHostValue(evalCoreExpression('new Counter(3).label', env))).toBe('count=3'); + expect(toHostValue(evalCoreExpression('make()', env))).toBe(6); + }); + + test('executes inherited fields getters methods and overrides', () => { + const root = parse( + [ + 'class name=Entity', + ' field name=id type=string value="base"', + ' method name=kind returns=string', + ' handler', + ' return value="\'entity\'"', + ' getter name=summary returns=string', + ' handler', + ' return value="`${this.kind()}:${this.id}`"', + 'class name=User extends=Entity', + ' field name=name type=string value="Ada"', + ' method name=kind returns=string', + ' handler', + ' return value="`user/${super.kind()}`"', + ' method name=label returns=string', + ' handler', + ' return value="`${this.summary}:${this.name}`"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new User().id', env))).toBe('base'); + expect(toHostValue(evalCoreExpression('new User().kind()', env))).toBe('user/entity'); + expect(toHostValue(evalCoreExpression('new User().summary', env))).toBe('user/entity:base'); + expect(toHostValue(evalCoreExpression('new User().label()', env))).toBe('user/entity:base:Ada'); + }); + + test('executes derived constructors with super constructor arguments', () => { + const root = parse( + [ + 'class name=Entity', + ' field name=id type=string value="unset"', + ' constructor', + ' param name=id type=string', + ' handler', + ' assign target="this.id" value="id"', + 'class name=User extends=Entity', + ' field name=name type=string value="unset"', + ' constructor', + ' param name=id type=string', + ' param name=name type=string', + ' handler', + ' do value="super(id)"', + ' assign target="this.name" value="name"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new User("u1", "Ada").id', env))).toBe('u1'); + expect(toHostValue(evalCoreExpression('new User("u1", "Ada").name', env))).toBe('Ada'); + }); + + test('initializes derived fields after super constructor state', () => { + const root = parse( + [ + 'class name=Entity', + ' field name=id type=string value="unset"', + ' constructor', + ' param name=id type=string', + ' handler', + ' assign target="this.id" value="id"', + 'class name=User extends=Entity', + ' field name=copy type=string value={{ this.id }}', + ' constructor', + ' param name=id type=string', + ' handler', + ' do value="super(id)"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new User("u1").copy', env))).toBe('u1'); + }); + + test('rejects missing and extra runtime arguments strictly', () => { + const root = parse( + [ + 'class name=Box', + ' constructor', + ' param name=value type=number', + ' handler', + ' assign target="this.value" value="value"', + 'fn name=need returns=number', + ' param name=value type=number', + ' handler', + ' return value="value"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('need()', env)).toThrow('missing required argument: value'); + expect(() => evalCoreExpression('need(1, 2)', env)).toThrow('received too many arguments'); + expect(() => evalCoreExpression('new Box()', env)).toThrow('missing required argument: value'); + expect(() => evalCoreExpression('new Box(1, 2)', env)).toThrow('received too many arguments'); + }); + + test('detects nested constructor super calls structurally', () => { + const root = parse( + [ + 'class name=Entity', + ' field name=id type=string value="unset"', + ' constructor', + ' param name=id type=string', + ' handler', + ' assign target="this.id" value="id"', + 'class name=User extends=Entity', + ' constructor', + ' param name=id type=string', + ' handler', + ' if cond=true', + ' do value="super(id)"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new User("u1").id', env))).toBe('u1'); + }); }); describe('KERN core runtime functions', () => { From 2011262bb827670b956f30697d0e8f375a86f46f Mon Sep 17 00:00:00 2001 From: cukas Date: Sun, 7 Jun 2026 22:14:12 +0200 Subject: [PATCH 07/46] feat(core): add semantic substrate for review --- packages/core/src/core-runtime/index.ts | 1 + packages/core/src/index.ts | 17 ++ packages/core/src/semantic-substrate.ts | 231 ++++++++++++++++++ .../core/tests/semantic-substrate.test.ts | 111 +++++++++ .../src/rules/suggest-kern-primitive.ts | 26 +- .../rules-suggest-kern-primitive.test.ts | 1 + 6 files changed, 368 insertions(+), 19 deletions(-) create mode 100644 packages/core/src/semantic-substrate.ts create mode 100644 packages/core/tests/semantic-substrate.test.ts diff --git a/packages/core/src/core-runtime/index.ts b/packages/core/src/core-runtime/index.ts index afc7f74d..b50efa91 100644 --- a/packages/core/src/core-runtime/index.ts +++ b/packages/core/src/core-runtime/index.ts @@ -1073,6 +1073,7 @@ function valueIRCallsSuper(value: ValueIR): boolean { case 'ident': return false; } + return false; } function runtimeChildren(node: IRNode): IRNode[] { diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 6ff3e81d..5602ae6c 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -436,6 +436,23 @@ export { formatScanSummary, generateConfigSource, scanProject } from './scanner. export type { KernSchemaJSON, NodeSchema, PropKind, PropSchema, SchemaViolation } from './schema.js'; // Schema validation + export export { exportSchemaJSON, NODE_SCHEMAS, validateSchema } from './schema.js'; +export type { + BuildKernSemanticSubstrateOptions, + KernSemanticCoreOperation, + KernSemanticCoreType, + KernSemanticIrContract, + KernSemanticPrimitive, + KernSemanticStdlibOperation, + KernSemanticSubstrate, + KernSemanticSubstrateSource, + KernSemanticSubstrateTarget, + KernSemanticSupport, +} from './semantic-substrate.js'; +export { + buildKernSemanticSubstrate, + lookupSemanticPrimitive, + semanticPrimitiveSupportSummary, +} from './semantic-substrate.js'; // Semantic validation export type { SemanticViolation } from './semantic-validator.js'; export { validateSemantics } from './semantic-validator.js'; diff --git a/packages/core/src/semantic-substrate.ts b/packages/core/src/semantic-substrate.ts new file mode 100644 index 00000000..a310e9da --- /dev/null +++ b/packages/core/src/semantic-substrate.ts @@ -0,0 +1,231 @@ +import { KERN_STDLIB } from './codegen/kern-stdlib.js'; +import { + PORTABLE_LOGIC_PRIMITIVE_IDS, + PORTABLE_LOGIC_PRIMITIVES, + type PortableLogicPrimitiveId, + type PortableLogicSupport, + type PortableLogicTarget, +} from './codegen/portable-logic-primitives.js'; +import { CORE_TYPE_CONTRACTS, type CoreOperationReturns, contractToGraphEdges } from './core-contracts/index.js'; +import type { NodeContract } from './ir/semantics/index.js'; +import { snapshotRegistry } from './ir/semantics/index.js'; + +export type KernSemanticSubstrateSource = 'codegen-from-ts' | 'native-kern'; +export type KernSemanticSubstrateTarget = PortableLogicTarget; + +export interface KernSemanticSupport { + readonly ts: PortableLogicSupport; + readonly python: PortableLogicSupport; + readonly go: PortableLogicSupport; +} + +export interface KernSemanticCoreOperation { + readonly id: string; + readonly kind: string; + readonly args: readonly string[]; + readonly returns: readonly string[]; + readonly lowerings: Readonly>; + readonly fixtureCount: number; + readonly reviewSummary: string; + readonly reviewTags: readonly string[]; +} + +export interface KernSemanticCoreType { + readonly id: string; + readonly name: string; + readonly kind: string; + readonly strict: true; + readonly operations: readonly KernSemanticCoreOperation[]; +} + +export interface KernSemanticPrimitive { + readonly id: PortableLogicPrimitiveId; + readonly kernName: string; + readonly domain: string; + readonly description: string; + readonly intent: string; + readonly purity: string; + readonly hostPatterns: readonly string[]; + readonly portabilityNotes: readonly string[]; + readonly support: KernSemanticSupport; +} + +export interface KernSemanticStdlibOperation { + readonly id: string; + readonly module: string; + readonly method: string; + readonly arity: number; + readonly support: KernSemanticSupport; +} + +export interface KernSemanticIrContract { + readonly nodeType: string; + readonly forbiddenRewrites: readonly string[]; + readonly fixtureCount: number; +} + +export interface KernSemanticSubstrate { + readonly schemaVersion: 1; + readonly generatedBy: 'kern-semantic-substrate'; + readonly source: KernSemanticSubstrateSource; + readonly coreTypes: readonly KernSemanticCoreType[]; + readonly coreGraphEdges: readonly { + readonly from: string; + readonly relation: string; + readonly to: string; + readonly operation?: string; + readonly index?: number; + }[]; + readonly portablePrimitives: readonly KernSemanticPrimitive[]; + readonly stdlibOperations: readonly KernSemanticStdlibOperation[]; + readonly irContracts: readonly KernSemanticIrContract[]; +} + +export interface BuildKernSemanticSubstrateOptions { + readonly source?: KernSemanticSubstrateSource; + readonly irContracts?: ReadonlyMap; +} + +export function buildKernSemanticSubstrate(options: BuildKernSemanticSubstrateOptions = {}): KernSemanticSubstrate { + const coreTypes = Object.values(CORE_TYPE_CONTRACTS.types).map((contract) => ({ + id: `core.type.${contract.name}`, + name: contract.name, + kind: contract.kind, + strict: contract.strict, + operations: contract.operations.map((operation) => ({ + id: operation.id, + kind: operation.kind, + args: [...operation.args], + returns: normalizeReturns(operation.returns), + lowerings: operation.lowers ? { ...operation.lowers } : {}, + fixtureCount: operation.fixtures.length, + reviewSummary: operation.review.summary, + reviewTags: [...operation.review.graph], + })), + })); + + return { + schemaVersion: 1, + generatedBy: 'kern-semantic-substrate', + source: options.source ?? 'codegen-from-ts', + coreTypes, + coreGraphEdges: Object.values(CORE_TYPE_CONTRACTS.types).flatMap((contract) => contractToGraphEdges(contract)), + portablePrimitives: PORTABLE_LOGIC_PRIMITIVE_IDS.map((id) => { + const primitive = PORTABLE_LOGIC_PRIMITIVES[id]; + return { + id, + kernName: kernPrimitiveName(id), + domain: id.split('.')[0], + description: primitive.description, + intent: primitive.intent, + purity: primitive.purity, + hostPatterns: [...primitive.hostPatterns], + portabilityNotes: [...primitive.portabilityNotes], + support: { ...primitive.targets }, + }; + }), + stdlibOperations: Object.entries(KERN_STDLIB).flatMap(([module, entries]) => + Object.entries(entries).map(([method, entry]) => ({ + id: `stdlib.${module}.${method}`, + module, + method, + arity: entry.arity, + support: { + ts: entry.ts ? 'stable' : 'unsupported', + python: entry.py ? 'stable' : 'unsupported', + go: 'unsupported', + }, + })), + ), + irContracts: options.irContracts + ? snapshotRegistry(options.irContracts).contracts.map((contract) => ({ + nodeType: contract.nodeType, + forbiddenRewrites: [...contract.forbiddenRewrites], + fixtureCount: contract.fixtureCount, + })) + : [], + }; +} + +export function lookupSemanticPrimitive( + substrate: KernSemanticSubstrate, + id: PortableLogicPrimitiveId, +): KernSemanticPrimitive { + const primitive = substrate.portablePrimitives.find((candidate) => candidate.id === id); + if (!primitive) { + throw new Error(`KERN semantic substrate missing portable primitive '${id}'.`); + } + return primitive; +} + +export function semanticPrimitiveSupportSummary( + primitive: KernSemanticPrimitive, + targets: readonly KernSemanticSubstrateTarget[], +): string { + const bySupport: Record = { + preview: [], + stable: [], + unsupported: [], + }; + for (const target of targets) { + bySupport[primitive.support[target] ?? 'unsupported'].push(target); + } + + const parts: string[] = []; + for (const support of ['stable', 'preview', 'unsupported'] satisfies PortableLogicSupport[]) { + const targetNames = bySupport[support]; + if (targetNames.length > 0) parts.push(`${support}: ${targetNames.join(', ')}`); + } + return parts.join('; '); +} + +function normalizeReturns(returns: CoreOperationReturns): readonly string[] { + return typeof returns === 'string' ? [returns] : [...returns]; +} + +const KERN_PRIMITIVE_NAMES = { + 'collection.has': 'includes', + 'collection.count': 'count', + 'collection.filter': 'filter', + 'collection.compact': 'compact', + 'collection.pluck': 'pluck', + 'collection.take': 'take', + 'collection.drop': 'drop', + 'collection.slice': 'slice', + 'collection.reverse': 'reverse', + 'collection.at': 'at', + 'collection.join': 'join', + 'collection.concat': 'concat', + 'collection.includes': 'includes', + 'collection.indexOf': 'indexOf', + 'collection.lastIndexOf': 'lastIndexOf', + 'collection.sort': 'sort', + 'collection.uniqueBy': 'uniqueBy', + 'collection.groupBy': 'groupBy', + 'collection.partition': 'partition', + 'collection.indexBy': 'indexBy', + 'collection.countBy': 'countBy', + 'logic.firstTruthy': 'firstTruthy', + 'logic.coalesce': 'coalesce', + 'time.epochMs': 'epochMs', + 'logic.not': 'not', + 'number.clamp': 'clamp', + 'object.keys': 'objectKeys', + 'object.values': 'objectValues', + 'object.entries': 'objectEntries', + 'object.merge': 'objectMerge', + 'object.omit': 'objectOmit', + 'object.pick': 'objectPick', + 'string.trim': 'trim', + 'string.split': 'split', + 'string.replaceFirst': 'replaceFirst', + 'string.replaceAll': 'replaceAll', + 'logic.firstDefined': 'firstDefined', + 'string.coerce': 'string', +} as const satisfies Record; + +function kernPrimitiveName(id: PortableLogicPrimitiveId): string { + const name = KERN_PRIMITIVE_NAMES[id]; + if (!name) throw new Error(`KERN semantic substrate missing KERN primitive name for '${id}'.`); + return name; +} diff --git a/packages/core/tests/semantic-substrate.test.ts b/packages/core/tests/semantic-substrate.test.ts new file mode 100644 index 00000000..a7e44065 --- /dev/null +++ b/packages/core/tests/semantic-substrate.test.ts @@ -0,0 +1,111 @@ +import { + buildKernSemanticSubstrate, + lookupSemanticPrimitive, + makeEnv, + type NodeContract, + semanticPrimitiveSupportSummary, +} from '../src/index.js'; + +describe('KERN semantic substrate', () => { + test('exports core runtime contracts as reviewable semantic operations', () => { + const substrate = buildKernSemanticSubstrate(); + + expect(substrate.schemaVersion).toBe(1); + expect(substrate.generatedBy).toBe('kern-semantic-substrate'); + expect(substrate.source).toBe('codegen-from-ts'); + + const numberType = substrate.coreTypes.find((type) => type.name === 'Number'); + expect(numberType?.strict).toBe(true); + expect(numberType?.operations.map((operation) => operation.id)).toContain('Number.divide'); + + const divide = numberType?.operations.find((operation) => operation.id === 'Number.divide'); + expect(divide?.args).toEqual(['Number', 'Number']); + expect(divide?.returns).toEqual(['Number']); + expect(divide?.fixtureCount).toBeGreaterThan(0); + expect(divide?.reviewTags).toContain('strict'); + + expect( + substrate.coreGraphEdges.find( + (edge) => + edge.from === 'Number.divide' && + edge.relation === 'returns' && + edge.to === 'Number' && + edge.operation === 'Number.divide', + ), + ).toEqual( + expect.objectContaining({ + from: 'Number.divide', + relation: 'returns', + to: 'Number', + operation: 'Number.divide', + }), + ); + }); + + test('exports portable review primitives as stable query objects', () => { + const substrate = buildKernSemanticSubstrate(); + const clamp = lookupSemanticPrimitive(substrate, 'number.clamp'); + + expect(clamp.kernName).toBe('clamp'); + expect(clamp.domain).toBe('number'); + expect(clamp.support.ts).toBe('stable'); + expect(clamp.support.python).toBe('stable'); + expect(semanticPrimitiveSupportSummary(clamp, ['ts', 'python', 'go'])).toBe('stable: ts, python; unsupported: go'); + }); + + test('throws when a review consumer asks for an unknown semantic primitive', () => { + const substrate = buildKernSemanticSubstrate(); + + expect(() => lookupSemanticPrimitive(substrate, 'number.missing' as never)).toThrow( + "KERN semantic substrate missing portable primitive 'number.missing'.", + ); + }); + + test('exports stdlib operation summaries for downstream review/doc consumers', () => { + const substrate = buildKernSemanticSubstrate(); + + expect(substrate.stdlibOperations.find((operation) => operation.id === 'stdlib.Text.trim')).toEqual( + expect.objectContaining({ + module: 'Text', + method: 'trim', + arity: 1, + }), + ); + expect(substrate.stdlibOperations.find((operation) => operation.id === 'stdlib.Json.stringify')).toEqual( + expect.objectContaining({ + module: 'Json', + method: 'stringify', + }), + ); + }); + + test('can include IR semantic contract summaries without touching the global registry', () => { + const fakeContract: NodeContract = { + nodeType: 'fixtureNode', + preconditions: () => true, + effects: () => ({ events: [], completion: { kind: 'normal' } }), + completion: () => ({ kind: 'normal' }), + forbiddenRewrites: ['erase fixture node'], + fixtures: [ + { + description: 'fixture node completes normally', + ir: { type: 'fixtureNode', props: {} }, + env: makeEnv(), + expected: { events: [], completion: { kind: 'normal' } }, + }, + ], + }; + + const substrate = buildKernSemanticSubstrate({ + irContracts: new Map([[fakeContract.nodeType, fakeContract]]), + }); + + expect(substrate.irContracts).toEqual([ + { + nodeType: 'fixtureNode', + forbiddenRewrites: ['erase fixture node'], + fixtureCount: 1, + }, + ]); + }); +}); diff --git a/packages/review/src/rules/suggest-kern-primitive.ts b/packages/review/src/rules/suggest-kern-primitive.ts index fb67e391..48c37d6b 100644 --- a/packages/review/src/rules/suggest-kern-primitive.ts +++ b/packages/review/src/rules/suggest-kern-primitive.ts @@ -28,10 +28,11 @@ */ import { + buildKernSemanticSubstrate, + lookupSemanticPrimitive, type PortableLogicPrimitiveId, - type PortableLogicSupport, type PortableLogicTarget, - portableLogicSupportForTarget, + semanticPrimitiveSupportSummary, } from '@kernlang/core'; import type { ArrowFunction, @@ -88,6 +89,7 @@ const ARRAY_METHODS: Record = { }; const PORTABLE_LOGIC_TARGETS: readonly PortableLogicTarget[] = ['ts', 'python', 'go']; +const KERN_SEMANTIC_SUBSTRATE = buildKernSemanticSubstrate(); // Node kinds whose descendants should be skipped — don't flag opportunities // inside test files, type-only files, or generated code paths by path hint. @@ -235,22 +237,7 @@ function nodeColumn(node: TsNode): number { } function portableLogicSupportSummary(id: PortableLogicPrimitiveId): string { - const bySupport: Record = { - preview: [], - stable: [], - unsupported: [], - }; - for (const target of PORTABLE_LOGIC_TARGETS) { - const support = portableLogicSupportForTarget(id, target); - bySupport[support].push(target); - } - - const parts: string[] = []; - for (const support of ['stable', 'preview', 'unsupported'] satisfies PortableLogicSupport[]) { - const targets = bySupport[support]; - if (targets.length > 0) parts.push(`${support}: ${targets.join(', ')}`); - } - return parts.join('; '); + return semanticPrimitiveSupportSummary(lookupSemanticPrimitive(KERN_SEMANTIC_SUBSTRATE, id), PORTABLE_LOGIC_TARGETS); } function portableLogicFinding( @@ -259,11 +246,12 @@ function portableLogicFinding( id: PortableLogicPrimitiveId, label: string, ): ReviewFinding { + const primitive = lookupSemanticPrimitive(KERN_SEMANTIC_SUBSTRATE, id); return finding( 'suggest-kern-primitive', 'info', 'pattern', - `JS ${label} is covered by KERN portable logic primitive \`${id}\` (${portableLogicSupportSummary(id)})`, + `JS ${label} is covered by KERN portable logic primitive \`${id}\` / \`${primitive.kernName}\` (${portableLogicSupportSummary(id)})`, ctx.filePath, node.getStartLineNumber(), nodeColumn(node), diff --git a/packages/review/tests/rules-suggest-kern-primitive.test.ts b/packages/review/tests/rules-suggest-kern-primitive.test.ts index a34dc5af..d2446e29 100644 --- a/packages/review/tests/rules-suggest-kern-primitive.test.ts +++ b/packages/review/tests/rules-suggest-kern-primitive.test.ts @@ -213,6 +213,7 @@ describe('suggest-kern-primitive rule', () => { 'clamp name=inverted value={{ score }} min={{ config.min }} max={{ config.max }}', ); expect(portable[0].message).toContain('number.clamp'); + expect(portable[0].message).toContain('`clamp`'); expect(portable[0].message).toContain('stable: ts, python'); }); From 7f75f40bf36b6ee880f64c0bb8063c542df38d26 Mon Sep 17 00:00:00 2001 From: cukas Date: Sun, 7 Jun 2026 22:29:26 +0200 Subject: [PATCH 08/46] fix(core): harden semantic substrate typing --- packages/core/src/semantic-substrate.ts | 54 +++++++++++++++++-------- 1 file changed, 38 insertions(+), 16 deletions(-) diff --git a/packages/core/src/semantic-substrate.ts b/packages/core/src/semantic-substrate.ts index a310e9da..64381971 100644 --- a/packages/core/src/semantic-substrate.ts +++ b/packages/core/src/semantic-substrate.ts @@ -124,19 +124,7 @@ export function buildKernSemanticSubstrate(options: BuildKernSemanticSubstrateOp support: { ...primitive.targets }, }; }), - stdlibOperations: Object.entries(KERN_STDLIB).flatMap(([module, entries]) => - Object.entries(entries).map(([method, entry]) => ({ - id: `stdlib.${module}.${method}`, - module, - method, - arity: entry.arity, - support: { - ts: entry.ts ? 'stable' : 'unsupported', - python: entry.py ? 'stable' : 'unsupported', - go: 'unsupported', - }, - })), - ), + stdlibOperations: stdlibOperationSummaries(), irContracts: options.irContracts ? snapshotRegistry(options.irContracts).contracts.map((contract) => ({ nodeType: contract.nodeType, @@ -168,7 +156,7 @@ export function semanticPrimitiveSupportSummary( unsupported: [], }; for (const target of targets) { - bySupport[primitive.support[target] ?? 'unsupported'].push(target); + bySupport[semanticSupportForTarget(primitive.support, target)].push(target); } const parts: string[] = []; @@ -179,11 +167,45 @@ export function semanticPrimitiveSupportSummary( return parts.join('; '); } +function semanticSupportForTarget( + support: KernSemanticSupport, + target: KernSemanticSubstrateTarget, +): PortableLogicSupport { + switch (target) { + case 'ts': + return support.ts; + case 'python': + return support.python; + case 'go': + return support.go; + } +} + +function stdlibOperationSummaries(): KernSemanticStdlibOperation[] { + return typedEntries(KERN_STDLIB).flatMap(([module, entries]) => + typedEntries(entries).map(([method, entry]) => ({ + id: `stdlib.${module}.${method}`, + module, + method, + arity: entry.arity, + support: { + ts: entry.ts ? 'stable' : 'unsupported', + python: entry.py ? 'stable' : 'unsupported', + go: 'unsupported', + }, + })), + ); +} + +function typedEntries(record: Record): Array<[string, T]> { + return Object.entries(record) as Array<[string, T]>; +} + function normalizeReturns(returns: CoreOperationReturns): readonly string[] { return typeof returns === 'string' ? [returns] : [...returns]; } -const KERN_PRIMITIVE_NAMES = { +const KERN_PRIMITIVE_NAMES: Record = { 'collection.has': 'includes', 'collection.count': 'count', 'collection.filter': 'filter', @@ -222,7 +244,7 @@ const KERN_PRIMITIVE_NAMES = { 'string.replaceAll': 'replaceAll', 'logic.firstDefined': 'firstDefined', 'string.coerce': 'string', -} as const satisfies Record; +}; function kernPrimitiveName(id: PortableLogicPrimitiveId): string { const name = KERN_PRIMITIVE_NAMES[id]; From b1d7cdcdb8edfb8207c3fb161f142ea8d1766730 Mon Sep 17 00:00:00 2001 From: cukas Date: Sun, 7 Jun 2026 22:35:39 +0200 Subject: [PATCH 09/46] fix(core): avoid unknown stdlib entries --- packages/core/src/semantic-substrate.ts | 41 +++++++++++++++---------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/packages/core/src/semantic-substrate.ts b/packages/core/src/semantic-substrate.ts index 64381971..98c12293 100644 --- a/packages/core/src/semantic-substrate.ts +++ b/packages/core/src/semantic-substrate.ts @@ -1,4 +1,4 @@ -import { KERN_STDLIB } from './codegen/kern-stdlib.js'; +import { KERN_STDLIB, type StdlibEntry } from './codegen/kern-stdlib.js'; import { PORTABLE_LOGIC_PRIMITIVE_IDS, PORTABLE_LOGIC_PRIMITIVES, @@ -182,23 +182,30 @@ function semanticSupportForTarget( } function stdlibOperationSummaries(): KernSemanticStdlibOperation[] { - return typedEntries(KERN_STDLIB).flatMap(([module, entries]) => - typedEntries(entries).map(([method, entry]) => ({ - id: `stdlib.${module}.${method}`, - module, - method, - arity: entry.arity, - support: { - ts: entry.ts ? 'stable' : 'unsupported', - python: entry.py ? 'stable' : 'unsupported', - go: 'unsupported', - }, - })), - ); -} + const stdlib: Record> = KERN_STDLIB; + const operations: KernSemanticStdlibOperation[] = []; + + for (const module of Object.keys(stdlib)) { + const entries = stdlib[module]; + if (!entries) continue; + for (const method of Object.keys(entries)) { + const entry = entries[method]; + if (!entry) continue; + operations.push({ + id: `stdlib.${module}.${method}`, + module, + method, + arity: entry.arity, + support: { + ts: entry.ts ? 'stable' : 'unsupported', + python: entry.py ? 'stable' : 'unsupported', + go: 'unsupported', + }, + }); + } + } -function typedEntries(record: Record): Array<[string, T]> { - return Object.entries(record) as Array<[string, T]>; + return operations; } function normalizeReturns(returns: CoreOperationReturns): readonly string[] { From 2307eacf2aa67fe68daac315df4485a075aec16a Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 07:19:39 +0200 Subject: [PATCH 10/46] feat(core): add class object semantic validation --- packages/core/src/semantic-validator.ts | 468 ++++++++++++++++++++ packages/core/tests/class-semantics.test.ts | 262 +++++++++++ 2 files changed, 730 insertions(+) create mode 100644 packages/core/tests/class-semantics.test.ts diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index 0651b21e..f1a59767 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -17,7 +17,10 @@ import { collectExternalImportSymbols, type ExternalImportSymbolTable } from './external-symbols.js'; import { importRegistryOf } from './import-metadata.js'; +import { parseExpression } from './parser-expression.js'; +import { splitPortableExpressionList } from './portable-expression-list.js'; import type { IRNode } from './types.js'; +import type { ValueIR } from './value-ir.js'; export interface SemanticViolation { rule: string; @@ -33,6 +36,7 @@ export interface SemanticViolation { */ export function validateSemantics(root: IRNode): SemanticViolation[] { const violations: SemanticViolation[] = []; + validateClassGraph(root, violations); validateNode(root, violations, [], []); return violations; } @@ -438,6 +442,470 @@ function validateNode( } } +type ClassMemberKind = 'field' | 'method' | 'getter' | 'setter'; + +interface ClassInfo { + node: IRNode; + name: string; + baseName?: string; + members: ClassMemberInfo[]; + constructors: IRNode[]; +} + +interface ClassMemberInfo { + node: IRNode; + name: string; + kind: ClassMemberKind; + static: boolean; + arity: number; +} + +const BUILTIN_CLASS_BASES = new Set(['Error']); +const BODY_EXPRESSION_PROPS = [ + 'value', + 'expr', + 'target', + 'cond', + 'on', + 'in', + 'from', + 'to', + 'initial', + 'source', + 'sources', + 'cleanup', + 'min', + 'max', +] as const; + +function validateClassGraph(root: IRNode, violations: SemanticViolation[]): void { + const classes = collectClassInfos(root); + if (classes.length === 0) return; + + const classByName = new Map(); + const visibleNames = collectVisibleClassBaseNames(root); + for (const info of classes) { + const prev = classByName.get(info.name); + if (!prev) { + classByName.set(info.name, info); + } + visibleNames.add(info.name); + } + + for (const info of classes) { + validateClassBaseReference(info, visibleNames, violations); + validateClassConstructors(info, violations); + validateClassMemberConflicts(info, violations); + validateClassSuperUsage(info, violations); + } + + validateClassInheritanceCycles(classes, classByName, violations); + validateClassOverrides(classes, classByName, violations); +} + +function collectClassInfos(root: IRNode): ClassInfo[] { + const out: ClassInfo[] = []; + walkSemanticTree(root, (node) => { + if (node.type !== 'class') return; + const name = stringProp(node, 'name'); + if (!name) return; + out.push({ + node, + name, + baseName: classBaseName(node.props?.extends), + members: collectClassMembers(node), + constructors: (node.children ?? []).filter((child) => child.type === 'constructor'), + }); + }); + return out; +} + +function collectClassMembers(node: IRNode): ClassMemberInfo[] { + const members: ClassMemberInfo[] = []; + for (const child of node.children ?? []) { + if (!isClassMemberNode(child)) continue; + const name = stringProp(child, 'name'); + if (!name) continue; + members.push({ + node: child, + name, + kind: child.type, + static: isTrueFlag(child.props?.static), + arity: memberArity(child), + }); + } + return members; +} + +function isClassMemberNode(node: IRNode): node is IRNode & { type: ClassMemberKind } { + return node.type === 'field' || node.type === 'method' || node.type === 'getter' || node.type === 'setter'; +} + +function validateClassBaseReference( + info: ClassInfo, + visibleNames: ReadonlySet, + violations: SemanticViolation[], +): void { + if (!info.baseName) return; + if (visibleNames.has(info.baseName) || BUILTIN_CLASS_BASES.has(info.baseName)) return; + violations.push({ + rule: 'class-extends-unknown', + nodeType: 'class', + message: `Class '${info.name}' extends unknown base '${info.baseName}'. Declare or import the base class before extending it.`, + line: info.node.loc?.line, + col: info.node.loc?.col, + }); +} + +function validateClassConstructors(info: ClassInfo, violations: SemanticViolation[]): void { + if (info.constructors.length <= 1) return; + for (const extra of info.constructors.slice(1)) { + violations.push({ + rule: 'class-single-constructor-only', + nodeType: 'constructor', + message: `Class '${info.name}' declares more than one constructor. KERN classes have exactly one construction path.`, + line: extra.loc?.line, + col: extra.loc?.col, + }); + } +} + +function validateClassMemberConflicts(info: ClassInfo, violations: SemanticViolation[]): void { + const seen = new Map(); + for (const member of info.members) { + const key = `${member.static ? 'static' : 'instance'}:${member.name}`; + const prev = seen.get(key) ?? []; + const next = [...prev, member]; + if (isAllowedMemberGroup(next)) { + seen.set(key, next); + continue; + } + const first = prev[0] ?? member; + violations.push({ + rule: 'class-member-conflict', + nodeType: member.node.type, + message: `Class '${info.name}' has conflicting ${member.static ? 'static' : 'instance'} member '${member.name}' (${first.kind} and ${member.kind}). Use one field/method/accessor surface per name.`, + line: member.node.loc?.line, + col: member.node.loc?.col, + }); + seen.set(key, next); + } +} + +function validateClassSuperUsage(info: ClassInfo, violations: SemanticViolation[]): void { + const hasBase = Boolean(info.baseName); + for (const ctor of info.constructors) { + const callsSuper = nodeBodyCallsSuperConstructor(ctor); + if (hasBase && !callsSuper) { + violations.push({ + rule: 'class-constructor-missing-super', + nodeType: 'constructor', + message: `Class '${info.name}' extends '${info.baseName}' but its constructor does not call \`super(...)\`. Derived constructors must initialize the base class explicitly.`, + line: ctor.loc?.line, + col: ctor.loc?.col, + }); + } + if (!hasBase && nodeBodyUsesSuper(ctor)) { + violations.push({ + rule: 'class-super-without-base', + nodeType: 'constructor', + message: `Class '${info.name}' uses \`super\` but does not extend a base class.`, + line: ctor.loc?.line, + col: ctor.loc?.col, + }); + } + } + + if (!hasBase) { + for (const member of info.members) { + if (!nodeBodyUsesSuper(member.node)) continue; + violations.push({ + rule: 'class-super-without-base', + nodeType: member.node.type, + message: `Class '${info.name}' member '${member.name}' uses \`super\` but the class does not extend a base class.`, + line: member.node.loc?.line, + col: member.node.loc?.col, + }); + } + } +} + +function validateClassInheritanceCycles( + classes: readonly ClassInfo[], + classByName: ReadonlyMap, + violations: SemanticViolation[], +): void { + const emitted = new Set(); + for (const info of classes) { + const path: string[] = []; + const seen = new Set(); + let current: ClassInfo | undefined = info; + while (current) { + if (seen.has(current.name)) { + const cycleStart = path.indexOf(current.name); + const cycleNames = path.slice(cycleStart); + const cycleKey = normalizedCycleKey(cycleNames); + const cycle = [...cycleNames, current.name].join(' -> '); + if (!emitted.has(cycleKey)) { + emitted.add(cycleKey); + violations.push({ + rule: 'class-inheritance-cycle', + nodeType: 'class', + message: `Class inheritance cycle detected: ${cycle}.`, + line: current.node.loc?.line, + col: current.node.loc?.col, + }); + } + break; + } + seen.add(current.name); + path.push(current.name); + current = current.baseName ? classByName.get(current.baseName) : undefined; + } + } +} + +function validateClassOverrides( + classes: readonly ClassInfo[], + classByName: ReadonlyMap, + violations: SemanticViolation[], +): void { + for (const info of classes) { + for (const member of info.members) { + const baseMember = findBaseMember(info, member, classByName); + if (!baseMember) continue; + if (!sameOverrideKind(member, baseMember)) { + violations.push({ + rule: 'class-override-kind-mismatch', + nodeType: member.node.type, + message: `Class '${info.name}' member '${member.name}' overrides base ${baseMember.kind} with ${member.kind}. Overrides must preserve field/method/accessor kind.`, + line: member.node.loc?.line, + col: member.node.loc?.col, + }); + continue; + } + if (member.kind === 'method' && baseMember.kind === 'method' && member.arity !== baseMember.arity) { + violations.push({ + rule: 'class-override-arity-mismatch', + nodeType: member.node.type, + message: `Class '${info.name}' method '${member.name}' overrides a base method with ${baseMember.arity} parameter(s), but declares ${member.arity}.`, + line: member.node.loc?.line, + col: member.node.loc?.col, + }); + } + } + } +} + +function normalizedCycleKey(cycleNames: readonly string[]): string { + if (cycleNames.length === 0) return ''; + let best = cycleNames.join('\0'); + for (let index = 1; index < cycleNames.length; index++) { + const rotated = [...cycleNames.slice(index), ...cycleNames.slice(0, index)].join('\0'); + if (rotated < best) best = rotated; + } + return best; +} + +function findBaseMember( + info: ClassInfo, + member: ClassMemberInfo, + classByName: ReadonlyMap, +): ClassMemberInfo | undefined { + let current = info.baseName ? classByName.get(info.baseName) : undefined; + const visited = new Set(); + while (current) { + if (visited.has(current.name)) return undefined; + visited.add(current.name); + const found = current.members.find( + (candidate) => candidate.name === member.name && candidate.static === member.static, + ); + if (found) return found; + current = current.baseName ? classByName.get(current.baseName) : undefined; + } + return undefined; +} + +function sameOverrideKind(member: ClassMemberInfo, baseMember: ClassMemberInfo): boolean { + if (isAccessorPair(member, baseMember)) return true; + return member.kind === baseMember.kind; +} + +function isAccessorPair(a: ClassMemberInfo, b: ClassMemberInfo): boolean { + return (a.kind === 'getter' && b.kind === 'setter') || (a.kind === 'setter' && b.kind === 'getter'); +} + +function isAllowedMemberGroup(members: readonly ClassMemberInfo[]): boolean { + if (members.length <= 1) return true; + if (members.length > 2) return false; + if (!members.every((member) => member.kind === 'getter' || member.kind === 'setter')) return false; + return isAccessorPair(members[0], members[1]); +} + +function collectVisibleClassBaseNames(root: IRNode): Set { + const names = new Set(BUILTIN_CLASS_BASES); + walkSemanticTree(root, (node) => { + const name = stringProp(node, 'name'); + if (name && isVisibleClassBaseDeclaration(node.type)) names.add(name); + if (node.type === 'import') { + for (const binding of importLocalBindings(node)) names.add(binding.name); + } + if (node.type === 'use') { + for (const child of node.children ?? []) { + if (child.type !== 'from') continue; + if (!isUseClassBaseBinding(child)) continue; + const localName = stringProp(child, 'as') ?? stringProp(child, 'name'); + if (localName) names.add(localName); + } + } + }); + return names; +} + +function isVisibleClassBaseDeclaration(nodeType: string): boolean { + return nodeType === 'class' || nodeType === 'error'; +} + +function isUseClassBaseBinding(node: IRNode): boolean { + const kind = stringProp(node, 'kind'); + return !kind || kind === 'class' || kind === 'error'; +} + +function memberArity(node: IRNode): number { + const childParams = node.children?.filter((child) => child.type === 'param').length ?? 0; + if (childParams > 0) return childParams; + const params = node.props?.params; + if (typeof params !== 'string' || !params.trim()) return 0; + try { + return splitPortableExpressionList(params, `${node.type} params=`).length; + } catch { + return 0; + } +} + +function nodeBodyCallsSuperConstructor(node: IRNode): boolean { + return nodeBodyExpressions(node).some((expr) => { + try { + return valueIRCallsSuperConstructor(parseExpression(expr)); + } catch { + return false; + } + }); +} + +function nodeBodyUsesSuper(node: IRNode): boolean { + return nodeBodyExpressions(node).some((expr) => { + try { + return valueIRUsesSuper(parseExpression(expr)); + } catch { + return false; + } + }); +} + +function nodeBodyExpressions(node: IRNode): string[] { + const out: string[] = []; + walkSemanticTreeUntil(node, (candidate) => { + for (const prop of BODY_EXPRESSION_PROPS) { + const text = expressionPropText(candidate.props?.[prop]); + if (text) out.push(text); + } + return candidate !== node && candidate.type === 'class' ? 'stop' : 'continue'; + }); + return out; +} + +function expressionPropText(value: unknown): string | undefined { + if (typeof value === 'string') return value; + if (isExpressionObject(value)) return value.code; + if (typeof value === 'number' || typeof value === 'boolean') return String(value); + return undefined; +} + +function valueIRCallsSuperConstructor(value: ValueIR): boolean { + if (value.kind === 'call' && value.callee.kind === 'ident' && value.callee.name === 'super') return true; + if (value.kind === 'lambda') return false; + return valueIRChildren(value).some(valueIRCallsSuperConstructor); +} + +function valueIRUsesSuper(value: ValueIR): boolean { + if (value.kind === 'ident' && value.name === 'super') return true; + return valueIRChildren(value).some(valueIRUsesSuper); +} + +function valueIRChildren(value: ValueIR): ValueIR[] { + switch (value.kind) { + case 'call': + return [value.callee, ...value.args]; + case 'member': + return [value.object]; + case 'index': + return [value.object, value.index]; + case 'tmplLit': + return [...value.expressions]; + case 'arrayLit': + return [...value.items]; + case 'objectLit': + return value.entries.map((entry) => ('kind' in entry ? entry.argument : entry.value)); + case 'unary': + case 'await': + case 'new': + case 'spread': + case 'propagate': + return [value.argument]; + case 'typeAssert': + case 'nonNull': + return [value.expression]; + case 'binary': + return [value.left, value.right]; + case 'conditional': + return [value.test, value.consequent, value.alternate]; + case 'lambda': + return [value.body]; + case 'numLit': + case 'strLit': + case 'boolLit': + case 'nullLit': + case 'undefLit': + case 'regexLit': + case 'ident': + return []; + } +} + +function classBaseName(value: unknown): string | undefined { + if (typeof value !== 'string' || !value.trim()) return undefined; + const match = /^([A-Za-z_$][\w$]*)/.exec(value.trim()); + return match?.[1]; +} + +function stringProp(node: IRNode, prop: string): string | undefined; +function stringProp(props: IRNode['props'] | undefined, prop: string): string | undefined; +function stringProp(nodeOrProps: IRNode | IRNode['props'] | undefined, prop: string): string | undefined { + const props = nodeOrProps && 'type' in nodeOrProps ? nodeOrProps.props : nodeOrProps; + const value = props ? (props as Record)[prop] : undefined; + return typeof value === 'string' && value.length > 0 ? value : undefined; +} + +function walkSemanticTree(node: IRNode, visit: (node: IRNode) => void): void { + visit(node); + for (const child of node.children ?? []) walkSemanticTree(child, visit); +} + +function walkSemanticTreeUntil(node: IRNode, visit: (node: IRNode) => 'continue' | 'stop'): void { + if (visit(node) === 'stop') return; + for (const child of node.children ?? []) walkSemanticTreeUntil(child, visit); +} + +function isExpressionObject(value: unknown): value is { code: string } { + return ( + typeof value === 'object' && + value !== null && + (value as { readonly __expr?: unknown }).__expr === true && + typeof (value as { readonly code?: unknown }).code === 'string' + ); +} + interface ExportBinding { source: string; alias?: string; diff --git a/packages/core/tests/class-semantics.test.ts b/packages/core/tests/class-semantics.test.ts new file mode 100644 index 00000000..0ad7c116 --- /dev/null +++ b/packages/core/tests/class-semantics.test.ts @@ -0,0 +1,262 @@ +import { parseDocumentWithDiagnostics } from '../src/parser.js'; +import { validateSemantics } from '../src/semantic-validator.js'; + +function violationsFor(source: string) { + return validateSemantics(parseDocumentWithDiagnostics(source).root); +} + +function rulesFor(source: string): string[] { + return violationsFor(source).map((violation) => violation.rule); +} + +describe('semantic-validator — class object model', () => { + test('accepts valid inheritance with explicit constructor super and method override', () => { + const source = [ + 'class name=Entity', + ' field name=id type=string', + ' constructor', + ' param name=id type=string', + ' handler lang=kern', + ' assign target="this.id" value="id"', + ' method name=kind returns=string', + ' handler lang=kern', + ' return value="\'entity\'"', + 'class name=User extends=Entity', + ' constructor', + ' param name=id type=string', + ' handler lang=kern', + ' do value="super(id)"', + ' method name=kind returns=string', + ' handler lang=kern', + ' return value="`user/${super.kind()}`"', + ].join('\n'); + + expect(rulesFor(source)).toEqual([]); + }); + + test('accepts imported base class names as visible extension targets', () => { + const source = [ + 'import from="./base" names=BaseEntity', + 'class name=User extends=BaseEntity', + ' field name=id type=string', + ].join('\n'); + + expect(rulesFor(source)).not.toContain('class-extends-unknown'); + }); + + test('accepts external package imports as visible extension targets', () => { + const source = [ + 'import from="@kern/base" registry=npm names=ExternalBase', + 'class name=User extends=ExternalBase', + ' field name=id type=string', + ].join('\n'); + + expect(rulesFor(source)).not.toContain('class-extends-unknown'); + }); + + test('reports unknown base class names', () => { + const violations = violationsFor('class name=User extends=MissingBase'); + + expect(violations).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + rule: 'class-extends-unknown', + message: expect.stringContaining("extends unknown base 'MissingBase'"), + }), + ]), + ); + }); + + test('reports non-class declarations used as superclass targets', () => { + const violations = violationsFor(['interface name=Shape', 'class name=Circle extends=Shape'].join('\n')); + + expect(violations.map((violation) => violation.rule)).toContain('class-extends-unknown'); + }); + + test('reports inheritance cycles across known local classes', () => { + const violations = violationsFor( + ['class name=A extends=B', 'class name=B extends=C', 'class name=C extends=A'].join('\n'), + ); + + expect(violations).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + rule: 'class-inheritance-cycle', + message: expect.stringContaining('A -> B -> C -> A'), + }), + ]), + ); + expect(violations.filter((violation) => violation.rule === 'class-inheritance-cycle')).toHaveLength(1); + }); + + test('reports duplicate constructors', () => { + const violations = violationsFor( + [ + 'class name=User', + ' constructor', + ' handler lang=kern', + ' do value="1"', + ' constructor', + ' handler lang=kern', + ' do value="2"', + ].join('\n'), + ); + + expect(violations.map((violation) => violation.rule)).toContain('class-single-constructor-only'); + }); + + test('reports class member conflicts while allowing getter/setter pairs', () => { + const conflict = violationsFor( + [ + 'class name=Bad', + ' field name=value type=number', + ' method name=value returns=number', + ' handler lang=kern', + ' return value=1', + ].join('\n'), + ); + expect(conflict.map((violation) => violation.rule)).toContain('class-member-conflict'); + + const accessorPair = rulesFor( + [ + 'class name=Good', + ' getter name=value returns=number', + ' handler lang=kern', + ' return value="this._value"', + ' setter name=value', + ' param name=next type=number', + ' handler lang=kern', + ' assign target="this._value" value="next"', + ].join('\n'), + ); + expect(accessorPair).not.toContain('class-member-conflict'); + }); + + test('reports duplicate accessors for the same member name', () => { + const violations = violationsFor( + [ + 'class name=Bad', + ' getter name=value returns=number', + ' handler lang=kern', + ' return value=1', + ' setter name=value', + ' param name=next type=number', + ' handler lang=kern', + ' assign target="this._value" value="next"', + ' setter name=value', + ' param name=other type=number', + ' handler lang=kern', + ' assign target="this._value" value="other"', + ].join('\n'), + ); + + expect(violations.map((violation) => violation.rule)).toContain('class-member-conflict'); + }); + + test('reports derived constructors that omit super', () => { + const violations = violationsFor( + [ + 'class name=Entity', + 'class name=User extends=Entity', + ' constructor', + ' handler lang=kern', + ' assign target="this.name" value="\'Ada\'"', + ].join('\n'), + ); + + expect(violations.map((violation) => violation.rule)).toContain('class-constructor-missing-super'); + }); + + test('does not accept delayed super calls inside constructor lambdas', () => { + const violations = violationsFor( + [ + 'class name=Entity', + 'class name=User extends=Entity', + ' constructor', + ' handler lang=kern', + ' do value="(() => super())"', + ].join('\n'), + ); + + expect(violations.map((violation) => violation.rule)).toContain('class-constructor-missing-super'); + }); + + test('reports super usage in classes without a base', () => { + const violations = violationsFor( + [ + 'class name=User', + ' method name=kind returns=string', + ' handler lang=kern', + ' return value="super.kind()"', + ].join('\n'), + ); + + expect(violations.map((violation) => violation.rule)).toContain('class-super-without-base'); + }); + + test('finds super usage in control-flow expression props', () => { + const violations = violationsFor( + [ + 'class name=User', + ' method name=check returns=void', + ' handler lang=kern', + ' if cond="super.ready()"', + ' do value="1"', + ].join('\n'), + ); + + expect(violations.map((violation) => violation.rule)).toContain('class-super-without-base'); + }); + + test('does not attribute nested class super usage to the outer class', () => { + const source = [ + 'class name=Base', + 'class name=Outer', + ' method name=install returns=void', + ' handler lang=kern', + ' class name=Inner extends=Base', + ' constructor', + ' handler lang=kern', + ' do value="super()"', + ].join('\n'); + + expect(rulesFor(source)).not.toContain('class-super-without-base'); + }); + + test('reports override kind and arity mismatches', () => { + const source = [ + 'class name=Base', + ' method name=load returns=string', + ' param name=id type=string', + ' handler lang=kern', + ' return value=id', + ' field name=status type=string', + 'class name=Derived extends=Base', + ' method name=load returns=string', + ' handler lang=kern', + ' return value="\'missing id\'"', + ' method name=status returns=string', + ' handler lang=kern', + ' return value="\'ok\'"', + ].join('\n'); + const rules = rulesFor(source); + + expect(rules).toContain('class-override-arity-mismatch'); + expect(rules).toContain('class-override-kind-mismatch'); + }); + + test('override validation terminates when an inheritance cycle has no matching member', () => { + const rules = rulesFor( + [ + 'class name=A extends=B', + ' method name=onlyA returns=number', + ' handler lang=kern', + ' return value=1', + 'class name=B extends=C', + 'class name=C extends=B', + ].join('\n'), + ); + + expect(rules).toContain('class-inheritance-cycle'); + }); +}); From 2d6a94e2ac553fd481a77f6cd2366572b6544cb9 Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 08:01:24 +0200 Subject: [PATCH 11/46] feat(core): harden class runtime setters --- packages/core/src/core-runtime/index.ts | 59 ++++++++- packages/core/src/semantic-validator.ts | 2 + packages/core/tests/core-runtime.test.ts | 159 +++++++++++++++++++++++ 3 files changed, 217 insertions(+), 3 deletions(-) diff --git a/packages/core/src/core-runtime/index.ts b/packages/core/src/core-runtime/index.ts index b50efa91..920414c2 100644 --- a/packages/core/src/core-runtime/index.ts +++ b/packages/core/src/core-runtime/index.ts @@ -16,6 +16,7 @@ import { import { brandValue, KERN_VALUE_BRAND } from './value-brand.js'; const INTEGER_INDEX_RE = /^(0|[1-9]\d*)$/; +const ACTIVE_INSTANCE_SETTERS = new WeakMap>(); export type KernValue = | { kind: 'null' } @@ -818,6 +819,54 @@ function evalClassMember(object: KernClassValue, property: string): KernValue { return kUndefined(); } +function assignInstanceMember(object: KernInstanceValue, property: string, value: KernValue): void { + const setter = findClassMember(object.classValue, 'setter', property); + if (setter) { + callSetterBody(object, setter.node, setter.owner, property, value); + return; + } + if (findClassMember(object.classValue, 'getter', property)) { + throw new Error(`KERN core runtime cannot assign getter-only property: ${property}.`); + } + object.fields[property] = value; +} + +function assignSuperMember(object: KernSuperValue, property: string, value: KernValue): void { + const base = resolveBaseClass(object.ownerClass); + if (!base) throw new Error(`KERN core runtime class ${object.ownerClass.name} has no base class.`); + const setter = findClassMember(base, 'setter', property); + if (setter) { + callSetterBody(object.receiver, setter.node, setter.owner, property, value); + return; + } + if (findClassMember(base, 'getter', property)) { + throw new Error(`KERN core runtime cannot assign getter-only property: ${property}.`); + } + object.receiver.fields[property] = value; +} + +function callSetterBody( + receiver: KernInstanceValue, + setterNode: IRNode, + ownerClass: KernClassValue, + property: string, + value: KernValue, +): void { + const key = `${ownerClass.name}.${property}`; + const activeSetters = ACTIVE_INSTANCE_SETTERS.get(receiver) ?? new Set(); + if (activeSetters.has(key)) { + throw new Error(`KERN core runtime recursive setter assignment: ${key}.`); + } + activeSetters.add(key); + ACTIVE_INSTANCE_SETTERS.set(receiver, activeSetters); + try { + callClassMemberBody(setterNode, ownerClass, receiver, [value]); + } finally { + activeSetters.delete(key); + if (activeSetters.size === 0) ACTIVE_INSTANCE_SETTERS.delete(receiver); + } +} + function callBoundMethodValue( method: KernBoundMethodValue, args: readonly KernValue[], @@ -875,7 +924,7 @@ function callClassMemberBody( function findClassMember( klass: KernClassValue, - type: 'method' | 'getter', + type: 'method' | 'getter' | 'setter', name: string, staticOnly = false, ): { node: IRNode; owner: KernClassValue } | undefined { @@ -968,7 +1017,11 @@ function assignRuntimeTarget(target: string, value: KernValue, env: CoreRuntimeE if (parsed.kind === 'member') { const object = evalValueIR(parsed.object, env); if (object.kind === 'instance') { - object.fields[parsed.property] = value; + assignInstanceMember(object, parsed.property, value); + return; + } + if (object.kind === 'super') { + assignSuperMember(object, parsed.property, value); return; } if (object.kind === 'record') { @@ -1063,7 +1116,7 @@ function valueIRCallsSuper(value: ValueIR): boolean { case 'conditional': return valueIRCallsSuper(value.test) || valueIRCallsSuper(value.consequent) || valueIRCallsSuper(value.alternate); case 'lambda': - return valueIRCallsSuper(value.body); + return false; case 'numLit': case 'strLit': case 'boolLit': diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index f1a59767..3745d163 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -871,6 +871,8 @@ function valueIRChildren(value: ValueIR): ValueIR[] { case 'ident': return []; } + const exhaustive: never = value; + return exhaustive; } function classBaseName(value: unknown): string | undefined { diff --git a/packages/core/tests/core-runtime.test.ts b/packages/core/tests/core-runtime.test.ts index 92bf9739..7977b290 100644 --- a/packages/core/tests/core-runtime.test.ts +++ b/packages/core/tests/core-runtime.test.ts @@ -483,6 +483,165 @@ describe('KERN core runtime statements', () => { expect(toHostValue(evalCoreExpression('new User("u1").id', env))).toBe('u1'); }); + + test('dispatches instance assignment through setters', () => { + const root = parse( + [ + 'class name=Gauge', + ' field name=_value type=number value={{ 0 }}', + ' setter name=value', + ' param name=next type=number', + ' handler', + ' assign target="this._value" value="next * 2"', + ' getter name=value returns=number', + ' handler', + ' return value="this._value"', + 'fn name=setGauge returns=number', + ' handler', + ' let name=g value="new Gauge()"', + ' assign target="g.value" value="7"', + ' return value="g.value"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('setGauge()', env))).toBe(14); + }); + + test('dispatches inherited and super assignment through setters', () => { + const root = parse( + [ + 'class name=Base', + ' field name=_value type=number value={{ 0 }}', + ' setter name=value', + ' param name=next type=number', + ' handler', + ' assign target="this._value" value="next + 1"', + ' getter name=value returns=number', + ' handler', + ' return value="this._value"', + 'class name=Derived extends=Base', + ' method name=setViaSuper returns=number', + ' param name=next type=number', + ' handler', + ' assign target="super.value" value="next"', + ' return value="this.value"', + 'fn name=setDerived returns=number', + ' handler', + ' let name=d value="new Derived()"', + ' assign target="d.value" value="4"', + ' return value="d.setViaSuper(9) + d.value"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('setDerived()', env))).toBe(20); + }); + + test('supports setter-only properties and rejects getter-only assignment', () => { + const root = parse( + [ + 'class name=WriteOnly', + ' field name=stored type=number value={{ 0 }}', + ' setter name=value', + ' param name=next type=number', + ' handler', + ' assign target="this.stored" value="next"', + 'class name=ReadOnly', + ' getter name=value returns=number', + ' handler', + ' return value="1"', + 'fn name=setWriteOnly returns=number', + ' handler', + ' let name=w value="new WriteOnly()"', + ' assign target="w.value" value="5"', + ' return value="w.stored"', + 'fn name=setReadOnly returns=number', + ' handler', + ' let name=r value="new ReadOnly()"', + ' assign target="r.value" value="5"', + ' return value="r.value"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('setWriteOnly()', env))).toBe(5); + expect(() => evalCoreExpression('setReadOnly()', env)).toThrow('cannot assign getter-only property: value'); + }); + + test('rejects recursive setter assignment', () => { + const root = parse( + [ + 'class name=Loop', + ' setter name=value', + ' param name=next type=number', + ' handler', + ' assign target="this.value" value="next"', + 'fn name=setLoop returns=number', + ' handler', + ' let name=loop value="new Loop()"', + ' assign target="loop.value" value="5"', + ' return value="0"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('setLoop()', env)).toThrow('recursive setter assignment: Loop.value'); + }); + + test('allows chained setters for different properties', () => { + const root = parse( + [ + 'class name=Chain', + ' field name=_b type=number value={{ 0 }}', + ' setter name=a', + ' param name=next type=number', + ' handler', + ' assign target="this.b" value="next + 1"', + ' setter name=b', + ' param name=next type=number', + ' handler', + ' assign target="this._b" value="next * 2"', + ' getter name=b returns=number', + ' handler', + ' return value="this._b"', + 'fn name=setChain returns=number', + ' handler', + ' let name=chain value="new Chain()"', + ' assign target="chain.a" value="4"', + ' return value="chain.b"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('setChain()', env))).toBe(10); + }); + + test('does not count delayed lambda super calls as constructor initialization', () => { + const root = parse( + [ + 'class name=Entity', + ' constructor', + ' param name=id type=string', + ' handler', + ' assign target="this.id" value="id"', + 'class name=User extends=Entity', + ' constructor', + ' param name=id type=string', + ' handler', + ' do value="(() => super(id))"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new User("u1")', env)).toThrow('missing required argument: id'); + }); }); describe('KERN core runtime functions', () => { From cdcb60a9dd0bdac04fc6ae4b4cca3c2276388ec9 Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 13:55:18 +0200 Subject: [PATCH 12/46] feat(core): add static class member runtime --- packages/core/src/core-runtime/index.ts | 201 ++++++++++++++++++++--- packages/core/tests/core-runtime.test.ts | 123 ++++++++++++++ 2 files changed, 305 insertions(+), 19 deletions(-) diff --git a/packages/core/src/core-runtime/index.ts b/packages/core/src/core-runtime/index.ts index 920414c2..8fc48a60 100644 --- a/packages/core/src/core-runtime/index.ts +++ b/packages/core/src/core-runtime/index.ts @@ -17,6 +17,7 @@ import { brandValue, KERN_VALUE_BRAND } from './value-brand.js'; const INTEGER_INDEX_RE = /^(0|[1-9]\d*)$/; const ACTIVE_INSTANCE_SETTERS = new WeakMap>(); +const ACTIVE_CLASS_SETTERS = new WeakMap>(); export type KernValue = | { kind: 'null' } @@ -52,6 +53,7 @@ export interface KernClassValue { name: string; node: IRNode; env: CoreRuntimeEnv; + staticFields: Record; } export interface KernInstanceValue { @@ -71,9 +73,9 @@ export interface KernBoundMethodValue { export interface KernSuperValue { kind: 'super'; - receiver: KernInstanceValue; + receiver: KernInstanceValue | KernClassValue; ownerClass: KernClassValue; - mode: 'constructor' | 'method'; + mode: 'constructor' | 'method' | 'static'; } export interface RuntimeParam { @@ -295,6 +297,7 @@ function executeNode(node: IRNode, env: CoreRuntimeEnv): CoreCompletion { case 'class': { const klass = makeClass(node, env); env.define(klass.name, klass); + initializeClassStaticFields(klass); return { kind: 'normal', value: kUndefined() }; } case 'assign': @@ -710,9 +713,22 @@ function makeClass(node: IRNode, env: CoreRuntimeEnv): KernClassValue { name: requiredString(node.props?.name, 'class name='), node, env, + staticFields: createRecordEntries(), }); } +function initializeClassStaticFields(klass: KernClassValue): void { + for (const field of runtimeChildNodes(klass.node, 'field')) { + if (field.props?.static !== true && field.props?.static !== 'true') continue; + const name = requiredString(field.props?.name, 'field name='); + const value = + Object.hasOwn(field.props ?? {}, 'value') || Object.hasOwn(field.props ?? {}, 'default') + ? evalCoreExpression(runtimeFieldInitializerExpr(field), classStaticEnv(klass)) + : kUndefined(); + klass.staticFields[name] = value; + } +} + function constructClassValue(klass: KernClassValue, args: readonly KernValue[]): KernInstanceValue { const instance = brandValue({ kind: 'instance' as const, @@ -791,6 +807,7 @@ function evalInstanceMember(object: KernInstanceValue, property: string): KernVa function evalSuperMember(object: KernSuperValue, property: string): KernValue { const base = resolveBaseClass(object.ownerClass); if (!base) return kUndefined(); + if (object.receiver.kind === 'class') return evalClassMemberFrom(base, property, object.receiver); const getter = findClassMember(base, 'getter', property); if (getter) return callClassMemberBody(getter.node, getter.owner, object.receiver, []).value; const method = findClassMember(base, 'method', property); @@ -808,15 +825,23 @@ function evalSuperMember(object: KernSuperValue, property: string): KernValue { } function evalClassMember(object: KernClassValue, property: string): KernValue { - const method = findClassMember(object, 'method', property, true); + return evalClassMemberFrom(object, property, object); +} + +function evalClassMemberFrom(owner: KernClassValue, property: string, receiver: KernClassValue): KernValue { + if (Object.hasOwn(owner.staticFields, property)) return owner.staticFields[property] ?? kUndefined(); + const getter = findOwnClassMember(owner, 'getter', property, true); + if (getter) return callStaticClassMemberBody(getter.node, getter.owner, receiver, []).value; + const method = findOwnClassMember(owner, 'method', property, true); if (method) { return brandValue({ - kind: 'builtin', - name: `${object.name}.${property}`, - call: (args) => callClassMemberBody(method.node, method.owner, undefined, args).value, + kind: 'builtin' as const, + name: `${receiver.name}.${property}`, + call: (args) => callStaticClassMemberBody(method.node, method.owner, receiver, args).value, }); } - return kUndefined(); + const base = resolveBaseClass(owner); + return base ? evalClassMemberFrom(base, property, receiver) : kUndefined(); } function assignInstanceMember(object: KernInstanceValue, property: string, value: KernValue): void { @@ -834,6 +859,10 @@ function assignInstanceMember(object: KernInstanceValue, property: string, value function assignSuperMember(object: KernSuperValue, property: string, value: KernValue): void { const base = resolveBaseClass(object.ownerClass); if (!base) throw new Error(`KERN core runtime class ${object.ownerClass.name} has no base class.`); + if (object.receiver.kind === 'class') { + assignClassMemberFrom(base, object.receiver, property, value); + return; + } const setter = findClassMember(base, 'setter', property); if (setter) { callSetterBody(object.receiver, setter.node, setter.owner, property, value); @@ -845,6 +874,36 @@ function assignSuperMember(object: KernSuperValue, property: string, value: Kern object.receiver.fields[property] = value; } +function assignClassMember(object: KernClassValue, property: string, value: KernValue): void { + assignClassMemberFrom(object, object, property, value); +} + +function assignClassMemberFrom( + owner: KernClassValue, + receiver: KernClassValue, + property: string, + value: KernValue, +): void { + if (Object.hasOwn(owner.staticFields, property)) { + receiver.staticFields[property] = value; + return; + } + const setter = findOwnClassMember(owner, 'setter', property, true); + if (setter) { + callStaticSetterBody(receiver, setter.node, setter.owner, property, value); + return; + } + if (findOwnClassMember(owner, 'getter', property, true)) { + throw new Error(`KERN core runtime cannot assign getter-only static property: ${property}.`); + } + const base = resolveBaseClass(owner); + if (base) { + assignClassMemberFrom(base, receiver, property, value); + return; + } + receiver.staticFields[property] = value; +} + function callSetterBody( receiver: KernInstanceValue, setterNode: IRNode, @@ -867,6 +926,28 @@ function callSetterBody( } } +function callStaticSetterBody( + receiver: KernClassValue, + setterNode: IRNode, + ownerClass: KernClassValue, + property: string, + value: KernValue, +): void { + const key = `${ownerClass.name}.${property}`; + const activeSetters = ACTIVE_CLASS_SETTERS.get(receiver) ?? new Set(); + if (activeSetters.has(key)) { + throw new Error(`KERN core runtime recursive static setter assignment: ${key}.`); + } + activeSetters.add(key); + ACTIVE_CLASS_SETTERS.set(receiver, activeSetters); + try { + callStaticClassMemberBody(setterNode, ownerClass, receiver, [value]); + } finally { + activeSetters.delete(key); + if (activeSetters.size === 0) ACTIVE_CLASS_SETTERS.delete(receiver); + } +} + function callBoundMethodValue( method: KernBoundMethodValue, args: readonly KernValue[], @@ -878,6 +959,9 @@ function callSuperConstructor(value: KernSuperValue, args: readonly KernValue[]) if (value.mode !== 'constructor') { throw new Error('KERN core runtime super(...) is only valid inside a constructor.'); } + if (value.receiver.kind !== 'instance') { + throw new Error('KERN core runtime super(...) requires an instance receiver.'); + } const base = resolveBaseClass(value.ownerClass); if (!base) throw new Error(`KERN core runtime class ${value.ownerClass.name} has no base class.`); initializeClassLayer(value.receiver, base, args, true); @@ -922,6 +1006,56 @@ function callClassMemberBody( return { value: completion.value, env: callEnv }; } +function callStaticClassMemberBody( + memberNode: IRNode, + ownerClass: KernClassValue, + receiver: KernClassValue, + args: readonly KernValue[], +): { value: KernValue; env: CoreRuntimeEnv } { + const callEnv = ownerClass.env.child(); + callEnv.define('this', receiver); + if (resolveBaseClass(ownerClass)) { + callEnv.define( + 'super', + brandValue({ + kind: 'super', + receiver, + ownerClass, + mode: 'static', + }), + ); + } + const params = runtimeParams(memberNode); + validateRuntimeArgs(`${ownerClass.name}.${memberNode.type}`, params, args); + params.forEach((param, index) => { + const provided = args[index]; + const value = + provided === undefined || (provided.kind === 'undefined' && param.defaultExpr) + ? param.defaultExpr + ? evalCoreExpression(param.defaultExpr, callEnv) + : kUndefined() + : provided; + callEnv.define(param.name, value); + }); + const completion = executeSequence(runtimeFunctionBody(memberNode), callEnv); + return { value: completion.value, env: callEnv }; +} + +function findOwnClassMember( + klass: KernClassValue, + type: 'method' | 'getter' | 'setter', + name: string, + staticOnly = false, +): { node: IRNode; owner: KernClassValue } | undefined { + for (const child of klass.node.children ?? []) { + if (child.type !== type || child.props?.name !== name) continue; + const isStatic = child.props?.static === true || child.props?.static === 'true'; + if (staticOnly !== isStatic) continue; + return { node: child, owner: klass }; + } + return undefined; +} + function findClassMember( klass: KernClassValue, type: 'method' | 'getter' | 'setter', @@ -958,6 +1092,23 @@ function classThisEnv(klass: KernClassValue, receiver: KernInstanceValue): CoreR return env; } +function classStaticEnv(klass: KernClassValue): CoreRuntimeEnv { + const env = klass.env.child(); + env.define('this', klass); + if (resolveBaseClass(klass)) { + env.define( + 'super', + brandValue({ + kind: 'super', + receiver: klass, + ownerClass: klass, + mode: 'static', + }), + ); + } + return env; +} + function makeFunction(node: IRNode, env: CoreRuntimeEnv): KernFunctionValue { return brandValue({ kind: 'function', @@ -1028,6 +1179,10 @@ function assignRuntimeTarget(target: string, value: KernValue, env: CoreRuntimeE object.entries[parsed.property] = value; return; } + if (object.kind === 'class') { + assignClassMember(object, parsed.property, value); + return; + } throw new Error(`KERN core runtime cannot assign member on ${object.kind}.`); } if (parsed.kind === 'index') { @@ -1237,6 +1392,10 @@ function isNullish(value: KernValue): boolean { } function isKernValue(value: unknown): value is KernValue { + return isKernValueShape(value, new WeakSet()); +} + +function isKernValueShape(value: unknown, seen: WeakSet): value is KernValue { if ( !isPlainRecord(value) || (value as { [KERN_VALUE_BRAND]?: true })[KERN_VALUE_BRAND] !== true || @@ -1244,6 +1403,8 @@ function isKernValue(value: unknown): value is KernValue { ) { return false; } + if (seen.has(value)) return true; + seen.add(value); switch (value.kind) { case 'null': case 'undefined': @@ -1259,13 +1420,13 @@ function isKernValue(value: unknown): value is KernValue { hasOnlyKeys(value, ['kind', 'items']) && Array.isArray(value.items) && !hasArrayHoles(value.items) && - value.items.every(isKernValue) + value.items.every((item) => isKernValueShape(item, seen)) ); case 'record': return ( hasOnlyKeys(value, ['kind', 'entries']) && isPlainRecord(value.entries) && - Object.values(value.entries).every(isKernValue) + Object.values(value.entries).every((entry) => isKernValueShape(entry, seen)) ); case 'function': return ( @@ -1283,38 +1444,40 @@ function isKernValue(value: unknown): value is KernValue { ); case 'class': return ( - hasOnlyKeys(value, ['kind', 'name', 'node', 'env']) && + hasOnlyKeys(value, ['kind', 'name', 'node', 'env', 'staticFields']) && typeof value.name === 'string' && isPlainRecord(value.node) && - value.env instanceof CoreRuntimeEnv + value.env instanceof CoreRuntimeEnv && + isPlainRecord(value.staticFields) && + Object.values(value.staticFields).every((entry) => isKernValueShape(entry, seen)) ); case 'instance': return ( hasOnlyKeys(value, ['kind', 'classValue', 'fields', 'initializedClasses']) && - isKernValue(value.classValue) && + isKernValueShape(value.classValue, seen) && value.classValue.kind === 'class' && isPlainRecord(value.fields) && - Object.values(value.fields).every(isKernValue) && + Object.values(value.fields).every((entry) => isKernValueShape(entry, seen)) && value.initializedClasses instanceof Set ); case 'bound-method': return ( hasOnlyKeys(value, ['kind', 'name', 'receiver', 'methodNode', 'ownerClass']) && typeof value.name === 'string' && - isKernValue(value.receiver) && + isKernValueShape(value.receiver, seen) && value.receiver.kind === 'instance' && isPlainRecord(value.methodNode) && - isKernValue(value.ownerClass) && + isKernValueShape(value.ownerClass, seen) && value.ownerClass.kind === 'class' ); case 'super': return ( hasOnlyKeys(value, ['kind', 'receiver', 'ownerClass', 'mode']) && - isKernValue(value.receiver) && - value.receiver.kind === 'instance' && - isKernValue(value.ownerClass) && + isKernValueShape(value.receiver, seen) && + (value.receiver.kind === 'instance' || value.receiver.kind === 'class') && + isKernValueShape(value.ownerClass, seen) && value.ownerClass.kind === 'class' && - (value.mode === 'constructor' || value.mode === 'method') + (value.mode === 'constructor' || value.mode === 'method' || value.mode === 'static') ); default: return false; diff --git a/packages/core/tests/core-runtime.test.ts b/packages/core/tests/core-runtime.test.ts index 7977b290..4d262e66 100644 --- a/packages/core/tests/core-runtime.test.ts +++ b/packages/core/tests/core-runtime.test.ts @@ -389,6 +389,129 @@ describe('KERN core runtime statements', () => { expect(toHostValue(evalCoreExpression('new User().label()', env))).toBe('user/entity:base:Ada'); }); + test('executes static fields getters methods and inherited static receiver dispatch', () => { + const root = parse( + [ + 'class name=Base', + ' field name=count type=number static=true value={{ 1 }}', + ' field name=seed type=number static=true value={{ 2 }}', + ' getter name=label static=true returns=string', + ' handler', + ' return value="`count=${this.count}`"', + ' method name=bump static=true returns=number', + ' param name=step type=number value={{ 1 }}', + ' handler', + ' assign target="this.count" value="this.count + step"', + ' return value="this.count"', + ' method name=tag static=true returns=string', + ' handler', + ' return value="\'base\'"', + 'class name=Derived extends=Base', + ' field name=own type=number static=true value={{ this.count + 9 }}', + ' field name=fromBase type=number static=true value={{ super.seed + this.count }}', + ' method name=tag static=true returns=string', + ' handler', + ' return value="`derived/${super.tag()}/${this.own}`"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('Base.count', env))).toBe(1); + expect(toHostValue(evalCoreExpression('Derived.count', env))).toBe(1); + expect(toHostValue(evalCoreExpression('Derived.own', env))).toBe(10); + expect(toHostValue(evalCoreExpression('Derived.fromBase', env))).toBe(3); + expect(toHostValue(evalCoreExpression('Derived.label', env))).toBe('count=1'); + expect(toHostValue(evalCoreExpression('Derived.tag()', env))).toBe('derived/base/10'); + expect(toHostValue(evalCoreExpression('Derived.bump(4)', env))).toBe(5); + expect(toHostValue(evalCoreExpression('Derived.count', env))).toBe(5); + expect(toHostValue(evalCoreExpression('Base.count', env))).toBe(1); + }); + + test('dispatches static assignment through setters and rejects getter-only static assignment', () => { + const root = parse( + [ + 'class name=Gauge', + ' field name=_value type=number static=true value={{ 0 }}', + ' setter name=value static=true', + ' param name=next type=number', + ' handler', + ' assign target="this._value" value="next * 3"', + ' getter name=value static=true returns=number', + ' handler', + ' return value="this._value"', + 'class name=ReadOnly', + ' getter name=value static=true returns=number', + ' handler', + ' return value="1"', + 'class name=Dual', + ' field name=value type=number value={{ 2 }}', + ' field name=value type=number static=true value={{ 1 }}', + 'class name=ParentReadOnly', + ' getter name=value static=true returns=number', + ' handler', + ' return value="1"', + 'class name=ChildShadow extends=ParentReadOnly', + ' field name=value type=number static=true value={{ 2 }}', + 'fn name=setGaugeStatic returns=number', + ' handler', + ' assign target="Gauge.value" value="7"', + ' return value="Gauge.value"', + 'fn name=setReadOnlyStatic returns=number', + ' handler', + ' assign target="ReadOnly.value" value="7"', + ' return value="ReadOnly.value"', + 'fn name=setDualStatic returns=number', + ' handler', + ' assign target="Dual.value" value="8"', + ' return value="new Dual().value"', + 'fn name=setChildShadowStatic returns=number', + ' handler', + ' assign target="ChildShadow.value" value="3"', + ' return value="ChildShadow.value"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('setGaugeStatic()', env))).toBe(21); + expect(toHostValue(evalCoreExpression('setDualStatic()', env))).toBe(2); + expect(toHostValue(evalCoreExpression('Dual.value', env))).toBe(8); + expect(toHostValue(evalCoreExpression('setChildShadowStatic()', env))).toBe(3); + expect(toHostValue(evalCoreExpression('ParentReadOnly.value', env))).toBe(1); + expect(() => evalCoreExpression('setReadOnlyStatic()', env)).toThrow( + 'cannot assign getter-only static property: value', + ); + }); + + test('rejects recursive static setter assignment', () => { + const root = parse( + [ + 'class name=Loop', + ' setter name=value static=true', + ' param name=next type=number', + ' handler', + ' assign target="this.value" value="next"', + 'fn name=setLoopStatic returns=number', + ' handler', + ' assign target="Loop.value" value="5"', + ' return value="0"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('setLoopStatic()', env)).toThrow('recursive static setter assignment: Loop.value'); + }); + + test('accepts self-referential static fields as branded KERN values', () => { + const root = parse(['class name=SelfRef', ' field name=self static=true value={{ this }}'].join('\n')); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(fromHostValue(env.lookup('SelfRef'))).toBe(env.lookup('SelfRef')); + }); + test('executes derived constructors with super constructor arguments', () => { const root = parse( [ From 82c4581901ef35e085560ce0c5c3d8db01542d05 Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 14:24:06 +0200 Subject: [PATCH 13/46] feat(core): enforce constructor super discipline --- packages/core/src/core-runtime/index.ts | 137 ++++++------ packages/core/src/semantic-validator.ts | 232 ++++++++++++++++++-- packages/core/tests/class-semantics.test.ts | 138 ++++++++++++ packages/core/tests/core-runtime.test.ts | 85 ++++++- 4 files changed, 497 insertions(+), 95 deletions(-) diff --git a/packages/core/src/core-runtime/index.ts b/packages/core/src/core-runtime/index.ts index 8fc48a60..3df3285c 100644 --- a/packages/core/src/core-runtime/index.ts +++ b/packages/core/src/core-runtime/index.ts @@ -18,6 +18,7 @@ import { brandValue, KERN_VALUE_BRAND } from './value-brand.js'; const INTEGER_INDEX_RE = /^(0|[1-9]\d*)$/; const ACTIVE_INSTANCE_SETTERS = new WeakMap>(); const ACTIVE_CLASS_SETTERS = new WeakMap>(); +const ACTIVE_CONSTRUCTORS = new WeakMap(); export type KernValue = | { kind: 'null' } @@ -84,6 +85,11 @@ export interface RuntimeParam { defaultExpr?: string; } +interface RuntimeConstructionFrame { + ownerClass: KernClassValue; + superCalled: boolean; +} + export type CoreCompletion = { kind: 'normal'; value: KernValue } | { kind: 'return'; value: KernValue }; export interface CoreRuntimeResult { @@ -375,8 +381,11 @@ function evalValueIR(node: ValueIR, env: CoreRuntimeEnv): KernValue { return kNull(); case 'undefLit': return kUndefined(); - case 'ident': - return env.lookup(node.name); + case 'ident': { + const value = env.lookup(node.name); + if (node.name === 'this' && value.kind === 'instance') guardConstructedInstanceAccess(value); + return value; + } case 'tmplLit': return kString( node.quasis.reduce((out, quasi, index) => { @@ -751,9 +760,8 @@ function initializeClassLayer( } const base = resolveBaseClass(klass); const ctor = firstRuntimeChild(klass.node, 'constructor'); - const ctorCallsSuper = Boolean(base && ctor && constructorCallsSuper(ctor)); - if (base && !ctorCallsSuper) initializeClassLayer(instance, base, [], false); - if (!ctorCallsSuper) initializeClassFields(instance, klass); + if (base && !ctor) initializeClassLayer(instance, base, [], false); + if (!base || !ctor) initializeClassFields(instance, klass); if (!ctor) { if (receivesConstructorArgs && args.length > 0) { throw new Error(`KERN core runtime class ${klass.name} has no constructor.`); @@ -761,8 +769,14 @@ function initializeClassLayer( instance.initializedClasses.add(klass.name); return; } - callClassMemberBody(ctor, klass, instance, receivesConstructorArgs ? args : []).value; - if (base && ctorCallsSuper && !instance.initializedClasses.has(base.name)) { + if (base) { + withConstructionFrame(instance, klass, () => { + callClassMemberBody(ctor, klass, instance, receivesConstructorArgs ? args : []).value; + }); + } else { + callClassMemberBody(ctor, klass, instance, receivesConstructorArgs ? args : []).value; + } + if (base && !instance.initializedClasses.has(base.name)) { throw new Error(`KERN core runtime constructor ${klass.name} must call super(...).`); } instance.initializedClasses.add(klass.name); @@ -788,6 +802,7 @@ function runtimeFieldInitializerExpr(node: IRNode): string { } function evalInstanceMember(object: KernInstanceValue, property: string): KernValue { + guardConstructedInstanceAccess(object); if (Object.hasOwn(object.fields, property)) return object.fields[property] ?? kUndefined(); const getter = findClassMember(object.classValue, 'getter', property); if (getter) return callClassMemberBody(getter.node, getter.owner, object, []).value; @@ -808,6 +823,7 @@ function evalSuperMember(object: KernSuperValue, property: string): KernValue { const base = resolveBaseClass(object.ownerClass); if (!base) return kUndefined(); if (object.receiver.kind === 'class') return evalClassMemberFrom(base, property, object.receiver); + guardConstructedSuperAccess(object.receiver); const getter = findClassMember(base, 'getter', property); if (getter) return callClassMemberBody(getter.node, getter.owner, object.receiver, []).value; const method = findClassMember(base, 'method', property); @@ -845,6 +861,7 @@ function evalClassMemberFrom(owner: KernClassValue, property: string, receiver: } function assignInstanceMember(object: KernInstanceValue, property: string, value: KernValue): void { + guardConstructedInstanceAccess(object); const setter = findClassMember(object.classValue, 'setter', property); if (setter) { callSetterBody(object, setter.node, setter.owner, property, value); @@ -863,6 +880,7 @@ function assignSuperMember(object: KernSuperValue, property: string, value: Kern assignClassMemberFrom(base, object.receiver, property, value); return; } + guardConstructedSuperAccess(object.receiver); const setter = findClassMember(base, 'setter', property); if (setter) { callSetterBody(object.receiver, setter.node, setter.owner, property, value); @@ -964,11 +982,51 @@ function callSuperConstructor(value: KernSuperValue, args: readonly KernValue[]) } const base = resolveBaseClass(value.ownerClass); if (!base) throw new Error(`KERN core runtime class ${value.ownerClass.name} has no base class.`); + const frame = activeConstructionFrame(value.receiver); + if (!frame || frame.ownerClass !== value.ownerClass) { + throw new Error(`KERN core runtime super(...) is not active for constructor ${value.ownerClass.name}.`); + } + if (frame.superCalled || value.receiver.initializedClasses.has(base.name)) { + throw new Error(`KERN core runtime constructor ${value.ownerClass.name} called super(...) more than once.`); + } + frame.superCalled = true; initializeClassLayer(value.receiver, base, args, true); initializeClassFields(value.receiver, value.ownerClass); return value.receiver; } +function withConstructionFrame(instance: KernInstanceValue, ownerClass: KernClassValue, run: () => void): void { + const stack = ACTIVE_CONSTRUCTORS.get(instance) ?? []; + const frame: RuntimeConstructionFrame = { ownerClass, superCalled: false }; + stack.push(frame); + ACTIVE_CONSTRUCTORS.set(instance, stack); + try { + run(); + } finally { + stack.pop(); + if (stack.length === 0) ACTIVE_CONSTRUCTORS.delete(instance); + } +} + +function activeConstructionFrame(instance: KernInstanceValue): RuntimeConstructionFrame | undefined { + const stack = ACTIVE_CONSTRUCTORS.get(instance); + return stack?.[stack.length - 1]; +} + +function guardConstructedInstanceAccess(instance: KernInstanceValue): void { + const frame = activeConstructionFrame(instance); + if (!frame || frame.superCalled) return; + if (!resolveBaseClass(frame.ownerClass)) return; + throw new Error(`KERN core runtime cannot access this before super(...) in ${frame.ownerClass.name}.`); +} + +function guardConstructedSuperAccess(instance: KernInstanceValue): void { + const frame = activeConstructionFrame(instance); + if (!frame || frame.superCalled) return; + if (!resolveBaseClass(frame.ownerClass)) return; + throw new Error(`KERN core runtime cannot access super members before super(...) in ${frame.ownerClass.name}.`); +} + function callClassMemberBody( memberNode: IRNode, ownerClass: KernClassValue, @@ -1219,71 +1277,6 @@ function runtimeChildNodes(node: IRNode, type: string): IRNode[] { return node.children?.filter((child) => child.type === type) ?? []; } -function constructorCallsSuper(node: IRNode): boolean { - return runtimeFunctionBody(node).some(statementCallsSuper); -} - -function statementCallsSuper(node: IRNode): boolean { - const rawValue = node.type === 'do' ? node.props?.value : undefined; - if (rawValue !== undefined && expressionCallsSuper(rawValue)) return true; - return (node.children ?? []).some(statementCallsSuper); -} - -function expressionCallsSuper(value: unknown): boolean { - try { - return valueIRCallsSuper(parseExpression(unwrapExpr(value, 'super expression'))); - } catch { - return false; - } -} - -function valueIRCallsSuper(value: ValueIR): boolean { - switch (value.kind) { - case 'call': - return ( - (value.callee.kind === 'ident' && value.callee.name === 'super') || - valueIRCallsSuper(value.callee) || - value.args.some(valueIRCallsSuper) - ); - case 'member': - return valueIRCallsSuper(value.object); - case 'index': - return valueIRCallsSuper(value.object) || valueIRCallsSuper(value.index); - case 'tmplLit': - return value.expressions.some(valueIRCallsSuper); - case 'arrayLit': - return value.items.some(valueIRCallsSuper); - case 'objectLit': - return value.entries.some((entry) => - 'kind' in entry ? valueIRCallsSuper(entry.argument) : valueIRCallsSuper(entry.value), - ); - case 'unary': - case 'await': - case 'new': - case 'spread': - case 'propagate': - return valueIRCallsSuper(value.argument); - case 'typeAssert': - case 'nonNull': - return valueIRCallsSuper(value.expression); - case 'binary': - return valueIRCallsSuper(value.left) || valueIRCallsSuper(value.right); - case 'conditional': - return valueIRCallsSuper(value.test) || valueIRCallsSuper(value.consequent) || valueIRCallsSuper(value.alternate); - case 'lambda': - return false; - case 'numLit': - case 'strLit': - case 'boolLit': - case 'nullLit': - case 'undefLit': - case 'regexLit': - case 'ident': - return false; - } - return false; -} - function runtimeChildren(node: IRNode): IRNode[] { if (node.type === 'document' || node.type === 'handler' || node.type === '__block') return node.children ?? []; return [node]; diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index f9ec1e39..da6d4e31 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -595,15 +595,8 @@ function validateClassMemberConflicts(info: ClassInfo, violations: SemanticViola function validateClassSuperUsage(info: ClassInfo, violations: SemanticViolation[]): void { const hasBase = Boolean(info.baseName); for (const ctor of info.constructors) { - const callsSuper = nodeBodyCallsSuperConstructor(ctor); - if (hasBase && !callsSuper) { - violations.push({ - rule: 'class-constructor-missing-super', - nodeType: 'constructor', - message: `Class '${info.name}' extends '${info.baseName}' but its constructor does not call \`super(...)\`. Derived constructors must initialize the base class explicitly.`, - line: ctor.loc?.line, - col: ctor.loc?.col, - }); + if (hasBase) { + validateDerivedConstructorDiscipline(info, ctor, violations); } if (!hasBase && nodeBodyUsesSuper(ctor)) { violations.push({ @@ -630,6 +623,204 @@ function validateClassSuperUsage(info: ClassInfo, violations: SemanticViolation[ } } +type ConstructorSuperState = 'uninit' | 'init' | 'maybe'; + +interface ConstructorDisciplineContext { + info: ClassInfo; + violations: SemanticViolation[]; + sawSuper: boolean; + emittedConditionalSuper: boolean; +} + +interface ConstructorAnalysis { + state: ConstructorSuperState; + sawSuper: boolean; +} + +function validateDerivedConstructorDiscipline(info: ClassInfo, ctor: IRNode, violations: SemanticViolation[]): void { + const ctx: ConstructorDisciplineContext = { + info, + violations, + sawSuper: false, + emittedConditionalSuper: false, + }; + const analysis = analyzeConstructorStatements(constructorBodyStatements(ctor), 'uninit', ctx); + if (analysis.state !== 'init') { + if (ctx.sawSuper) { + emitConstructorConditionalSuper(ctx, ctor); + } else { + violations.push({ + rule: 'class-constructor-missing-super', + nodeType: 'constructor', + message: `Class '${info.name}' extends '${info.baseName}' but its constructor does not call \`super(...)\`. Derived constructors must initialize the base class explicitly.`, + line: ctor.loc?.line, + col: ctor.loc?.col, + }); + } + } +} + +function analyzeConstructorStatements( + statements: readonly IRNode[], + initialState: ConstructorSuperState, + ctx: ConstructorDisciplineContext, +): ConstructorAnalysis { + let state = initialState; + let sawSuper = false; + for (let index = 0; index < statements.length; index += 1) { + const statement = statements[index]; + if (statement.type === 'else') continue; + const maybeElse = + statement.type === 'if' && statements[index + 1]?.type === 'else' ? statements[index + 1] : undefined; + const result = analyzeConstructorStatement(statement, maybeElse, state, ctx); + state = result.state; + sawSuper = sawSuper || result.sawSuper; + if (maybeElse) index += 1; + } + return { state, sawSuper }; +} + +function analyzeConstructorStatement( + statement: IRNode, + maybeElse: IRNode | undefined, + state: ConstructorSuperState, + ctx: ConstructorDisciplineContext, +): ConstructorAnalysis { + if (statement.type === 'class') return { state, sawSuper: false }; + const directSuper = directSuperConstructorCall(statement); + if (directSuper) { + scanValueIRForPreSuperAccess(directSuper, state, ctx, statement); + ctx.sawSuper = true; + if (state === 'init' || state === 'maybe' || directSuper.args.some(valueIRCallsSuperConstructor)) { + emitConstructorDoubleSuper(ctx, statement); + } + if (state === 'maybe') emitConstructorConditionalSuper(ctx, statement); + return { state: 'init', sawSuper: true }; + } + if (statement.type === 'if') return analyzeConstructorIf(statement, maybeElse, state, ctx); + + const sawSuper = scanConstructorStatementExpressions(statement, state, ctx); + if (sawSuper && state === 'init') emitConstructorDoubleSuper(ctx, statement); + if (sawSuper && state !== 'init') emitConstructorConditionalSuper(ctx, statement); + return { state, sawSuper }; +} + +function analyzeConstructorIf( + statement: IRNode, + maybeElse: IRNode | undefined, + state: ConstructorSuperState, + ctx: ConstructorDisciplineContext, +): ConstructorAnalysis { + const cond = expressionPropText(statement.props?.cond); + if (cond) scanExpressionForConstructorEffects(cond, state, ctx, statement); + const thenResult = analyzeConstructorStatements(statement.children ?? [], state, ctx); + const elseResult = maybeElse + ? analyzeConstructorStatements(maybeElse.children ?? [], state, ctx) + : { state, sawSuper: false }; + const merged = mergeConstructorStates(thenResult.state, elseResult.state); + const sawSuper = thenResult.sawSuper || elseResult.sawSuper; + if (sawSuper && merged !== 'init') emitConstructorConditionalSuper(ctx, statement); + return { state: merged, sawSuper }; +} + +function mergeConstructorStates(left: ConstructorSuperState, right: ConstructorSuperState): ConstructorSuperState { + if (left === 'init' && right === 'init') return 'init'; + if (left === 'uninit' && right === 'uninit') return 'uninit'; + return 'maybe'; +} + +function constructorBodyStatements(node: IRNode): IRNode[] { + const handler = node.children?.find((child) => child.type === 'handler'); + const body = handler ? (handler.children ?? []) : (node.children ?? []); + return body.filter((child) => child.type !== 'param' && child.type !== 'decorator'); +} + +function directSuperConstructorCall(node: IRNode): Extract | undefined { + if (node.type !== 'do') return undefined; + const text = expressionPropText(node.props?.value); + if (!text) return undefined; + try { + const value = parseExpression(text); + return value.kind === 'call' && value.callee.kind === 'ident' && value.callee.name === 'super' ? value : undefined; + } catch { + return undefined; + } +} + +function scanConstructorStatementExpressions( + node: IRNode, + state: ConstructorSuperState, + ctx: ConstructorDisciplineContext, +): boolean { + let sawSuper = false; + walkSemanticTreeUntil(node, (candidate) => { + if (candidate !== node && candidate.type === 'class') return 'stop'; + for (const prop of BODY_EXPRESSION_PROPS) { + const text = expressionPropText(candidate.props?.[prop]); + if (!text) continue; + sawSuper = scanExpressionForConstructorEffects(text, state, ctx, candidate) || sawSuper; + } + return 'continue'; + }); + return sawSuper; +} + +function scanExpressionForConstructorEffects( + text: string, + state: ConstructorSuperState, + ctx: ConstructorDisciplineContext, + node: IRNode, +): boolean { + try { + const value = parseExpression(text); + scanValueIRForPreSuperAccess(value, state, ctx, node); + const sawSuper = valueIRCallsSuperConstructor(value); + if (sawSuper) ctx.sawSuper = true; + return sawSuper; + } catch { + return false; + } +} + +function scanValueIRForPreSuperAccess( + value: ValueIR, + state: ConstructorSuperState, + ctx: ConstructorDisciplineContext, + node: IRNode, +): void { + if (state === 'init') return; + if (!valueIRUsesThisOrSuperMember(value)) return; + ctx.violations.push({ + rule: 'class-constructor-this-before-super', + nodeType: node.type, + message: `Class '${ctx.info.name}' constructor uses \`this\` or \`super\` member access before \`super(...)\`. Derived constructors must initialize the base class first.`, + line: node.loc?.line, + col: node.loc?.col, + }); +} + +function emitConstructorDoubleSuper(ctx: ConstructorDisciplineContext, node: IRNode): void { + ctx.violations.push({ + rule: 'class-constructor-double-super', + nodeType: node.type, + message: `Class '${ctx.info.name}' constructor calls \`super(...)\` more than once. Derived constructors may initialize the base class once.`, + line: node.loc?.line, + col: node.loc?.col, + }); +} + +function emitConstructorConditionalSuper(ctx: ConstructorDisciplineContext, node: IRNode): void { + if (ctx.emittedConditionalSuper) return; + ctx.emittedConditionalSuper = true; + ctx.violations.push({ + rule: 'class-constructor-conditional-super', + nodeType: node.type, + message: `Class '${ctx.info.name}' constructor must call \`super(...)\` definitely on every path before using derived state. Move \`super(...)\` to a straight-line statement or cover every branch.`, + line: node.loc?.line, + col: node.loc?.col, + }); +} + function validateClassInheritanceCycles( classes: readonly ClassInfo[], classByName: ReadonlyMap, @@ -783,16 +974,6 @@ function memberArity(node: IRNode): number { } } -function nodeBodyCallsSuperConstructor(node: IRNode): boolean { - return nodeBodyExpressions(node).some((expr) => { - try { - return valueIRCallsSuperConstructor(parseExpression(expr)); - } catch { - return false; - } - }); -} - function nodeBodyUsesSuper(node: IRNode): boolean { return nodeBodyExpressions(node).some((expr) => { try { @@ -833,6 +1014,19 @@ function valueIRUsesSuper(value: ValueIR): boolean { return valueIRChildren(value).some(valueIRUsesSuper); } +function valueIRUsesThisOrSuperMember(value: ValueIR): boolean { + if (value.kind === 'ident' && value.name === 'this') return true; + if ( + (value.kind === 'member' || value.kind === 'index') && + value.object.kind === 'ident' && + value.object.name === 'super' + ) { + return true; + } + if (value.kind === 'lambda') return false; + return valueIRChildren(value).some(valueIRUsesThisOrSuperMember); +} + function valueIRChildren(value: ValueIR): ValueIR[] { switch (value.kind) { case 'call': diff --git a/packages/core/tests/class-semantics.test.ts b/packages/core/tests/class-semantics.test.ts index 0ad7c116..71b54eeb 100644 --- a/packages/core/tests/class-semantics.test.ts +++ b/packages/core/tests/class-semantics.test.ts @@ -181,6 +181,144 @@ describe('semantic-validator — class object model', () => { expect(violations.map((violation) => violation.rule)).toContain('class-constructor-missing-super'); }); + test('reports this and super member access before constructor super', () => { + const rules = rulesFor( + [ + 'class name=Entity', + ' method name=kind returns=string', + ' handler lang=kern', + ' return value="\'entity\'"', + 'class name=User extends=Entity', + ' constructor', + ' handler lang=kern', + ' assign target="this.name" value="\'Ada\'"', + ' do value="super()"', + 'class name=Admin extends=Entity', + ' constructor', + ' handler lang=kern', + ' return value="super.kind()"', + ].join('\n'), + ); + + expect(rules.filter((rule) => rule === 'class-constructor-this-before-super')).toHaveLength(2); + }); + + test('reports double constructor super calls', () => { + const rules = rulesFor( + [ + 'class name=Entity', + 'class name=User extends=Entity', + ' constructor', + ' handler lang=kern', + ' do value="super()"', + ' do value="super()"', + ].join('\n'), + ); + + expect(rules).toContain('class-constructor-double-super'); + }); + + test('reports direct constructor super after maybe-initialized state', () => { + const rules = rulesFor( + [ + 'class name=Entity', + 'class name=User extends=Entity', + ' constructor', + ' param name=ready type=boolean', + ' handler lang=kern', + ' if cond=ready', + ' do value="super()"', + ' do value="super()"', + ].join('\n'), + ); + + expect(rules).toContain('class-constructor-double-super'); + expect(rules).toContain('class-constructor-conditional-super'); + }); + + test('reports nested constructor super inside super arguments as double super', () => { + const rules = rulesFor( + [ + 'class name=Entity', + 'class name=User extends=Entity', + ' constructor', + ' handler lang=kern', + ' do value="super(super())"', + ].join('\n'), + ); + + expect(rules).toContain('class-constructor-double-super'); + }); + + test('reports non-direct constructor super after initialization as double super', () => { + const rules = rulesFor( + [ + 'class name=Entity', + 'class name=User extends=Entity', + ' constructor', + ' handler lang=kern', + ' do value="super()"', + ' return value="super()"', + ].join('\n'), + ); + + expect(rules).toContain('class-constructor-double-super'); + }); + + test('reports conditional constructor super when not every path initializes', () => { + const rules = rulesFor( + [ + 'class name=Entity', + 'class name=User extends=Entity', + ' constructor', + ' param name=ready type=boolean', + ' handler lang=kern', + ' if cond=ready', + ' do value="super()"', + ].join('\n'), + ); + + expect(rules).toContain('class-constructor-conditional-super'); + }); + + test('accepts branch-complete constructor super before derived this usage', () => { + const rules = rulesFor( + [ + 'class name=Entity', + 'class name=User extends=Entity', + ' constructor', + ' param name=ready type=boolean', + ' handler lang=kern', + ' if cond=ready', + ' do value="super()"', + ' else', + ' do value="super()"', + ' assign target="this.name" value="\'Ada\'"', + ].join('\n'), + ); + + expect(rules).not.toContain('class-constructor-conditional-super'); + expect(rules).not.toContain('class-constructor-this-before-super'); + expect(rules).not.toContain('class-constructor-missing-super'); + }); + + test('reports constructor this usage in conditions before super', () => { + const rules = rulesFor( + [ + 'class name=Entity', + 'class name=User extends=Entity', + ' constructor', + ' handler lang=kern', + ' if cond="this.ready"', + ' do value="super()"', + ' else', + ' do value="super()"', + ].join('\n'), + ); + + expect(rules).toContain('class-constructor-this-before-super'); + }); + test('reports super usage in classes without a base', () => { const violations = violationsFor( [ diff --git a/packages/core/tests/core-runtime.test.ts b/packages/core/tests/core-runtime.test.ts index 4d262e66..c072aff0 100644 --- a/packages/core/tests/core-runtime.test.ts +++ b/packages/core/tests/core-runtime.test.ts @@ -538,6 +538,22 @@ describe('KERN core runtime statements', () => { expect(toHostValue(evalCoreExpression('new User("u1", "Ada").name', env))).toBe('Ada'); }); + test('initializes fields before running a base-less constructor body', () => { + const root = parse( + [ + 'class name=Plain', + ' field name=count type=number value={{ 2 }}', + ' constructor', + ' handler', + ' assign target="this.count" value="this.count + 3"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new Plain().count', env))).toBe(5); + }); + test('initializes derived fields after super constructor state', () => { const root = parse( [ @@ -584,7 +600,66 @@ describe('KERN core runtime statements', () => { expect(() => evalCoreExpression('new Box(1, 2)', env)).toThrow('received too many arguments'); }); - test('detects nested constructor super calls structurally', () => { + test('requires explicit super before this access in derived constructors', () => { + const root = parse( + [ + 'class name=Entity', + ' constructor', + ' param name=id type=string', + ' handler', + ' assign target="this.id" value="id"', + 'class name=User extends=Entity', + ' constructor', + ' param name=id type=string', + ' handler', + ' assign target="this.id" value="id"', + ' do value="super(id)"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new User("u1")', env)).toThrow('cannot access this before super(...)'); + }); + + test('allows reading a separate initialized instance before constructor super', () => { + const root = parse( + [ + 'class name=Entity', + ' field name=id type=string value="base"', + 'class name=User extends=Entity', + ' constructor', + ' param name=other type=Entity', + ' handler', + ' let name=otherId value="other.id"', + ' do value="super()"', + ' assign target="this.id" value="otherId"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new User(new Entity()).id', env))).toBe('base'); + }); + + test('rejects double super calls in derived constructors', () => { + const root = parse( + [ + 'class name=Entity', + 'class name=User extends=Entity', + ' constructor', + ' handler', + ' do value="super()"', + ' do value="super()"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new User()', env)).toThrow('called super(...) more than once'); + }); + + test('missing runtime super path fails instead of auto-initializing the base', () => { const root = parse( [ 'class name=Entity', @@ -596,15 +671,17 @@ describe('KERN core runtime statements', () => { 'class name=User extends=Entity', ' constructor', ' param name=id type=string', + ' param name=ready type=boolean', ' handler', - ' if cond=true', + ' if cond=ready', ' do value="super(id)"', ].join('\n'), ); const env = createCoreRuntimeEnv(); runCoreRuntime(root, env); - expect(toHostValue(evalCoreExpression('new User("u1").id', env))).toBe('u1'); + expect(toHostValue(evalCoreExpression('new User("u1", true).id', env))).toBe('u1'); + expect(() => evalCoreExpression('new User("u2", false)', env)).toThrow('must call super(...)'); }); test('dispatches instance assignment through setters', () => { @@ -763,7 +840,7 @@ describe('KERN core runtime statements', () => { const env = createCoreRuntimeEnv(); runCoreRuntime(root, env); - expect(() => evalCoreExpression('new User("u1")', env)).toThrow('missing required argument: id'); + expect(() => evalCoreExpression('new User("u1")', env)).toThrow('lambda expressions are not supported'); }); }); From 274396baf9ed4abedeae22098243329685bd3bf2 Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 15:00:33 +0200 Subject: [PATCH 14/46] feat(core): enforce declared class shapes --- packages/core/src/core-runtime/index.ts | 231 +++++++++++++------- packages/core/src/semantic-validator.ts | 173 +++++++++++++++ packages/core/tests/class-semantics.test.ts | 63 ++++++ packages/core/tests/core-runtime.test.ts | 63 ++++++ 4 files changed, 451 insertions(+), 79 deletions(-) diff --git a/packages/core/src/core-runtime/index.ts b/packages/core/src/core-runtime/index.ts index 3df3285c..0f68395f 100644 --- a/packages/core/src/core-runtime/index.ts +++ b/packages/core/src/core-runtime/index.ts @@ -803,20 +803,24 @@ function runtimeFieldInitializerExpr(node: IRNode): string { function evalInstanceMember(object: KernInstanceValue, property: string): KernValue { guardConstructedInstanceAccess(object); - if (Object.hasOwn(object.fields, property)) return object.fields[property] ?? kUndefined(); - const getter = findClassMember(object.classValue, 'getter', property); - if (getter) return callClassMemberBody(getter.node, getter.owner, object, []).value; - const method = findClassMember(object.classValue, 'method', property); - if (method) { - return brandValue({ - kind: 'bound-method', - name: `${object.classValue.name}.${property}`, - receiver: object, - methodNode: method.node, - ownerClass: method.owner, - }); + const member = findReadableClassShapeMember(object.classValue, property, false); + if (!member) throw new Error(`KERN core runtime unknown instance property: ${object.classValue.name}.${property}.`); + switch (member.kind) { + case 'field': + return object.fields[property] ?? kUndefined(); + case 'getter': + return callClassMemberBody(member.node, member.owner, object, []).value; + case 'method': + return brandValue({ + kind: 'bound-method', + name: `${object.classValue.name}.${property}`, + receiver: object, + methodNode: member.node, + ownerClass: member.owner, + }); + case 'setter': + throw new Error(`KERN core runtime cannot read setter-only property: ${property}.`); } - return kUndefined(); } function evalSuperMember(object: KernSuperValue, property: string): KernValue { @@ -824,20 +828,24 @@ function evalSuperMember(object: KernSuperValue, property: string): KernValue { if (!base) return kUndefined(); if (object.receiver.kind === 'class') return evalClassMemberFrom(base, property, object.receiver); guardConstructedSuperAccess(object.receiver); - const getter = findClassMember(base, 'getter', property); - if (getter) return callClassMemberBody(getter.node, getter.owner, object.receiver, []).value; - const method = findClassMember(base, 'method', property); - if (method) { - return brandValue({ - kind: 'bound-method', - name: `${base.name}.${property}`, - receiver: object.receiver, - methodNode: method.node, - ownerClass: method.owner, - }); + const member = findReadableClassShapeMember(base, property, false); + if (!member) throw new Error(`KERN core runtime unknown super property: ${object.ownerClass.name}.${property}.`); + switch (member.kind) { + case 'field': + return object.receiver.fields[property] ?? kUndefined(); + case 'getter': + return callClassMemberBody(member.node, member.owner, object.receiver, []).value; + case 'method': + return brandValue({ + kind: 'bound-method', + name: `${base.name}.${property}`, + receiver: object.receiver, + methodNode: member.node, + ownerClass: member.owner, + }); + case 'setter': + throw new Error(`KERN core runtime cannot read setter-only super property: ${property}.`); } - if (Object.hasOwn(object.receiver.fields, property)) return object.receiver.fields[property] ?? kUndefined(); - return kUndefined(); } function evalClassMember(object: KernClassValue, property: string): KernValue { @@ -845,32 +853,42 @@ function evalClassMember(object: KernClassValue, property: string): KernValue { } function evalClassMemberFrom(owner: KernClassValue, property: string, receiver: KernClassValue): KernValue { - if (Object.hasOwn(owner.staticFields, property)) return owner.staticFields[property] ?? kUndefined(); - const getter = findOwnClassMember(owner, 'getter', property, true); - if (getter) return callStaticClassMemberBody(getter.node, getter.owner, receiver, []).value; - const method = findOwnClassMember(owner, 'method', property, true); - if (method) { - return brandValue({ - kind: 'builtin' as const, - name: `${receiver.name}.${property}`, - call: (args) => callStaticClassMemberBody(method.node, method.owner, receiver, args).value, - }); + const member = findReadableClassShapeMember(owner, property, true); + if (!member) throw new Error(`KERN core runtime unknown static property: ${receiver.name}.${property}.`); + switch (member.kind) { + case 'field': + return member.owner === receiver + ? (receiver.staticFields[property] ?? kUndefined()) + : evalClassStaticField(member.owner, receiver, property); + case 'getter': + return callStaticClassMemberBody(member.node, member.owner, receiver, []).value; + case 'method': + return brandValue({ + kind: 'builtin' as const, + name: `${receiver.name}.${property}`, + call: (args) => callStaticClassMemberBody(member.node, member.owner, receiver, args).value, + }); + case 'setter': + throw new Error(`KERN core runtime cannot read setter-only static property: ${property}.`); } - const base = resolveBaseClass(owner); - return base ? evalClassMemberFrom(base, property, receiver) : kUndefined(); } function assignInstanceMember(object: KernInstanceValue, property: string, value: KernValue): void { guardConstructedInstanceAccess(object); - const setter = findClassMember(object.classValue, 'setter', property); - if (setter) { - callSetterBody(object, setter.node, setter.owner, property, value); - return; - } - if (findClassMember(object.classValue, 'getter', property)) { - throw new Error(`KERN core runtime cannot assign getter-only property: ${property}.`); + const member = findWritableClassShapeMember(object.classValue, property, false); + if (!member) throw new Error(`KERN core runtime cannot assign undeclared instance property: ${property}.`); + switch (member.kind) { + case 'field': + object.fields[property] = value; + return; + case 'setter': + callSetterBody(object, member.node, member.owner, property, value); + return; + case 'getter': + throw new Error(`KERN core runtime cannot assign getter-only property: ${property}.`); + case 'method': + throw new Error(`KERN core runtime cannot assign method property: ${property}.`); } - object.fields[property] = value; } function assignSuperMember(object: KernSuperValue, property: string, value: KernValue): void { @@ -881,15 +899,20 @@ function assignSuperMember(object: KernSuperValue, property: string, value: Kern return; } guardConstructedSuperAccess(object.receiver); - const setter = findClassMember(base, 'setter', property); - if (setter) { - callSetterBody(object.receiver, setter.node, setter.owner, property, value); - return; - } - if (findClassMember(base, 'getter', property)) { - throw new Error(`KERN core runtime cannot assign getter-only property: ${property}.`); + const member = findWritableClassShapeMember(base, property, false); + if (!member) throw new Error(`KERN core runtime cannot assign undeclared super property: ${property}.`); + switch (member.kind) { + case 'field': + object.receiver.fields[property] = value; + return; + case 'setter': + callSetterBody(object.receiver, member.node, member.owner, property, value); + return; + case 'getter': + throw new Error(`KERN core runtime cannot assign getter-only property: ${property}.`); + case 'method': + throw new Error(`KERN core runtime cannot assign method property: ${property}.`); } - object.receiver.fields[property] = value; } function assignClassMember(object: KernClassValue, property: string, value: KernValue): void { @@ -902,24 +925,20 @@ function assignClassMemberFrom( property: string, value: KernValue, ): void { - if (Object.hasOwn(owner.staticFields, property)) { - receiver.staticFields[property] = value; - return; - } - const setter = findOwnClassMember(owner, 'setter', property, true); - if (setter) { - callStaticSetterBody(receiver, setter.node, setter.owner, property, value); - return; - } - if (findOwnClassMember(owner, 'getter', property, true)) { - throw new Error(`KERN core runtime cannot assign getter-only static property: ${property}.`); - } - const base = resolveBaseClass(owner); - if (base) { - assignClassMemberFrom(base, receiver, property, value); - return; + const member = findWritableClassShapeMember(owner, property, true); + if (!member) throw new Error(`KERN core runtime cannot assign undeclared static property: ${property}.`); + switch (member.kind) { + case 'field': + receiver.staticFields[property] = value; + return; + case 'setter': + callStaticSetterBody(receiver, member.node, member.owner, property, value); + return; + case 'getter': + throw new Error(`KERN core runtime cannot assign getter-only static property: ${property}.`); + case 'method': + throw new Error(`KERN core runtime cannot assign static method property: ${property}.`); } - receiver.staticFields[property] = value; } function callSetterBody( @@ -1114,20 +1133,74 @@ function findOwnClassMember( return undefined; } -function findClassMember( +type RuntimeClassShapeKind = 'field' | 'getter' | 'setter' | 'method'; + +interface RuntimeClassShapeMember { + kind: RuntimeClassShapeKind; + node: IRNode; + owner: KernClassValue; +} + +function findReadableClassShapeMember( klass: KernClassValue, - type: 'method' | 'getter' | 'setter', name: string, - staticOnly = false, -): { node: IRNode; owner: KernClassValue } | undefined { + staticOnly: boolean, +): RuntimeClassShapeMember | undefined { + return findClassShapeMember(klass, name, staticOnly, ['field', 'getter', 'method', 'setter']); +} + +function findWritableClassShapeMember( + klass: KernClassValue, + name: string, + staticOnly: boolean, +): RuntimeClassShapeMember | undefined { + return findClassShapeMember(klass, name, staticOnly, ['field', 'setter', 'getter', 'method']); +} + +function findClassShapeMember( + klass: KernClassValue, + name: string, + staticOnly: boolean, + precedence: readonly RuntimeClassShapeKind[], +): RuntimeClassShapeMember | undefined { + for (const kind of precedence) { + const member = + kind === 'field' + ? findOwnClassField(klass, name, staticOnly) + : findOwnClassMethodShapeMember(klass, kind, name, staticOnly); + if (member) return member; + } + const base = resolveBaseClass(klass); + return base ? findClassShapeMember(base, name, staticOnly, precedence) : undefined; +} + +function findOwnClassMethodShapeMember( + klass: KernClassValue, + kind: 'getter' | 'setter' | 'method', + name: string, + staticOnly: boolean, +): RuntimeClassShapeMember | undefined { + const member = findOwnClassMember(klass, kind, name, staticOnly); + return member ? { kind, node: member.node, owner: member.owner } : undefined; +} + +function findOwnClassField( + klass: KernClassValue, + name: string, + staticOnly: boolean, +): RuntimeClassShapeMember | undefined { for (const child of klass.node.children ?? []) { - if (child.type !== type || child.props?.name !== name) continue; + if (child.type !== 'field' || child.props?.name !== name) continue; const isStatic = child.props?.static === true || child.props?.static === 'true'; if (staticOnly !== isStatic) continue; - return { node: child, owner: klass }; + return { kind: 'field', node: child, owner: klass }; } - const base = resolveBaseClass(klass); - return base ? findClassMember(base, type, name, staticOnly) : undefined; + return undefined; +} + +function evalClassStaticField(owner: KernClassValue, receiver: KernClassValue, property: string): KernValue { + if (Object.hasOwn(receiver.staticFields, property)) return receiver.staticFields[property] ?? kUndefined(); + return owner.staticFields[property] ?? kUndefined(); } function resolveBaseClass(klass: KernClassValue): KernClassValue | undefined { diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index da6d4e31..b07e6e8f 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -501,6 +501,7 @@ function validateClassGraph(root: IRNode, violations: SemanticViolation[]): void validateClassInheritanceCycles(classes, classByName, violations); validateClassOverrides(classes, classByName, violations); + validateClassShapeUsage(classes, classByName, violations); } function collectClassInfos(root: IRNode): ClassInfo[] { @@ -888,6 +889,178 @@ function validateClassOverrides( } } +type ClassShapeAccessKind = 'read' | 'write'; + +function validateClassShapeUsage( + classes: readonly ClassInfo[], + classByName: ReadonlyMap, + violations: SemanticViolation[], +): void { + for (const info of classes) { + for (const ctor of info.constructors) validateClassShapeNode(info, ctor, false, classByName, violations); + for (const member of info.members) + validateClassShapeNode(info, member.node, member.static, classByName, violations); + } +} + +function validateClassShapeNode( + info: ClassInfo, + node: IRNode, + staticContext: boolean, + classByName: ReadonlyMap, + violations: SemanticViolation[], +): void { + walkSemanticTreeUntil(node, (candidate) => { + if (candidate !== node && candidate.type === 'class') return 'stop'; + if (candidate.type === 'assign') { + const target = expressionPropText(candidate.props?.target); + if (target && validateClassShapeTarget(info, candidate, target, staticContext, classByName, violations)) { + const value = expressionPropText(candidate.props?.value); + if (value) validateClassShapeExpression(info, candidate, value, staticContext, classByName, violations); + return 'continue'; + } + } + for (const prop of BODY_EXPRESSION_PROPS) { + const text = expressionPropText(candidate.props?.[prop]); + if (!text) continue; + validateClassShapeExpression(info, candidate, text, staticContext, classByName, violations); + } + return 'continue'; + }); +} + +function validateClassShapeTarget( + info: ClassInfo, + node: IRNode, + text: string, + staticContext: boolean, + classByName: ReadonlyMap, + violations: SemanticViolation[], +): boolean { + try { + const value = parseExpression(text); + if (value.kind !== 'member') return false; + if (value.object.kind !== 'ident' || (value.object.name !== 'this' && value.object.name !== 'super')) return false; + validateClassShapeAccess( + info, + node, + value.object.name, + value.property, + 'write', + staticContext, + classByName, + violations, + ); + return true; + } catch { + return false; + } +} + +function validateClassShapeExpression( + info: ClassInfo, + node: IRNode, + text: string, + staticContext: boolean, + classByName: ReadonlyMap, + violations: SemanticViolation[], +): void { + try { + validateClassShapeValueIR(info, node, parseExpression(text), staticContext, classByName, violations); + } catch { + return; + } +} + +function validateClassShapeValueIR( + info: ClassInfo, + node: IRNode, + value: ValueIR, + staticContext: boolean, + classByName: ReadonlyMap, + violations: SemanticViolation[], +): void { + if (value.kind === 'member' && value.object.kind === 'ident') { + if (value.object.name === 'this' || value.object.name === 'super') { + validateClassShapeAccess( + info, + node, + value.object.name, + value.property, + 'read', + staticContext, + classByName, + violations, + ); + } + } + for (const child of valueIRChildren(value)) { + validateClassShapeValueIR(info, node, child, staticContext, classByName, violations); + } +} + +function validateClassShapeAccess( + info: ClassInfo, + node: IRNode, + receiver: 'this' | 'super', + property: string, + accessKind: ClassShapeAccessKind, + staticContext: boolean, + classByName: ReadonlyMap, + violations: SemanticViolation[], +): void { + const start = receiver === 'super' ? (info.baseName ? classByName.get(info.baseName) : undefined) : info; + if (!start) return; + const member = findClassShapeMember(start, property, staticContext, classByName, accessKind); + if (!member) { + violations.push({ + rule: 'class-member-undeclared', + nodeType: node.type, + message: `Class '${info.name}' ${receiver}.${property} is not declared on the ${staticContext ? 'static' : 'instance'} class shape.`, + line: node.loc?.line, + col: node.loc?.col, + }); + return; + } + if (accessKind === 'read' && member.kind === 'setter') { + violations.push({ + rule: 'class-member-read-not-readable', + nodeType: node.type, + message: `Class '${info.name}' reads setter-only ${receiver}.${property}. Add a getter or read a declared field.`, + line: node.loc?.line, + col: node.loc?.col, + }); + } + if (accessKind === 'write' && (member.kind === 'getter' || member.kind === 'method')) { + violations.push({ + rule: 'class-member-write-not-writable', + nodeType: node.type, + message: `Class '${info.name}' writes non-writable ${receiver}.${property}. Declare a field or setter for writes.`, + line: node.loc?.line, + col: node.loc?.col, + }); + } +} + +function findClassShapeMember( + info: ClassInfo, + property: string, + staticContext: boolean, + classByName: ReadonlyMap, + accessKind: ClassShapeAccessKind, +): ClassMemberInfo | undefined { + const precedence: readonly ClassMemberKind[] = + accessKind === 'read' ? ['field', 'getter', 'method', 'setter'] : ['field', 'setter', 'getter', 'method']; + for (const kind of precedence) { + const found = info.members.find( + (member) => member.name === property && member.static === staticContext && member.kind === kind, + ); + if (found) return found; + } + const base = info.baseName ? classByName.get(info.baseName) : undefined; + return base ? findClassShapeMember(base, property, staticContext, classByName, accessKind) : undefined; +} + function normalizedCycleKey(cycleNames: readonly string[]): string { if (cycleNames.length === 0) return ''; let best = cycleNames.join('\0'); diff --git a/packages/core/tests/class-semantics.test.ts b/packages/core/tests/class-semantics.test.ts index 71b54eeb..50cb7d0b 100644 --- a/packages/core/tests/class-semantics.test.ts +++ b/packages/core/tests/class-semantics.test.ts @@ -223,6 +223,7 @@ describe('semantic-validator — class object model', () => { [ 'class name=Entity', 'class name=User extends=Entity', + ' field name=name type=string', ' constructor', ' param name=ready type=boolean', ' handler lang=kern', @@ -302,6 +303,68 @@ describe('semantic-validator — class object model', () => { expect(rules).not.toContain('class-constructor-missing-super'); }); + test('reports undeclared this and super class-shape member access', () => { + const rules = rulesFor( + [ + 'class name=Base', + ' field name=known type=number', + 'class name=User extends=Base', + ' field name=own type=number', + ' method name=readMissing returns=number', + ' handler lang=kern', + ' return value="this.missing"', + ' method name=writeMissing returns=void', + ' handler lang=kern', + ' assign target="this.missing" value=1', + ' method name=readMissingSuper returns=number', + ' handler lang=kern', + ' return value="super.missing"', + ].join('\n'), + ); + + expect(rules.filter((rule) => rule === 'class-member-undeclared')).toHaveLength(3); + }); + + test('reports static and instance shape mismatches for this access', () => { + const rules = rulesFor( + [ + 'class name=Shape', + ' field name=instanceOnly type=number', + ' field name=staticOnly type=number static=true', + ' method name=badInstance returns=number', + ' handler lang=kern', + ' return value="this.staticOnly"', + ' method name=badStatic static=true returns=number', + ' handler lang=kern', + ' return value="this.instanceOnly"', + ].join('\n'), + ); + + expect(rules.filter((rule) => rule === 'class-member-undeclared')).toHaveLength(2); + }); + + test('reports non-readable and non-writable class-shape members', () => { + const rules = rulesFor( + [ + 'class name=Access', + ' setter name=writeOnly', + ' param name=value type=number', + ' handler lang=kern', + ' do value=value', + ' getter name=readOnly returns=number', + ' handler lang=kern', + ' return value=1', + ' method name=run returns=number', + ' handler lang=kern', + ' assign target="this.readOnly" value=2', + ' return value="this.writeOnly"', + ].join('\n'), + ); + + expect(rules).toContain('class-member-read-not-readable'); + expect(rules).toContain('class-member-write-not-writable'); + }); + test('reports constructor this usage in conditions before super', () => { const rules = rulesFor( [ diff --git a/packages/core/tests/core-runtime.test.ts b/packages/core/tests/core-runtime.test.ts index c072aff0..8c9cbd17 100644 --- a/packages/core/tests/core-runtime.test.ts +++ b/packages/core/tests/core-runtime.test.ts @@ -772,6 +772,69 @@ describe('KERN core runtime statements', () => { expect(() => evalCoreExpression('setReadOnly()', env)).toThrow('cannot assign getter-only property: value'); }); + test('rejects undeclared instance and super property reads and writes', () => { + const root = parse( + [ + 'class name=Base', + ' field name=known type=number value={{ 1 }}', + 'class name=Derived extends=Base', + ' method name=readMissingSuper returns=number', + ' handler', + ' return value="super.missing"', + ' method name=writeMissingSuper returns=number', + ' handler', + ' assign target="super.missing" value="2"', + ' return value="this.known"', + 'fn name=readMissing returns=number', + ' handler', + ' let name=d value="new Derived()"', + ' return value="d.missing"', + 'fn name=writeMissing returns=number', + ' handler', + ' let name=d value="new Derived()"', + ' assign target="d.missing" value="2"', + ' return value="d.known"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('readMissing()', env)).toThrow('unknown instance property'); + expect(() => evalCoreExpression('writeMissing()', env)).toThrow('undeclared instance property'); + expect(() => evalCoreExpression('new Derived().readMissingSuper()', env)).toThrow('unknown super property'); + expect(() => evalCoreExpression('new Derived().writeMissingSuper()', env)).toThrow('undeclared super property'); + }); + + test('rejects undeclared static property reads and writes', () => { + const root = parse( + [ + 'class name=Closed', + ' field name=known type=number static=true value={{ 1 }}', + 'fn name=writeMissingStatic returns=number', + ' handler', + ' assign target="Closed.missing" value="2"', + ' return value="Closed.known"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('Closed.missing', env)).toThrow('unknown static property'); + expect(() => evalCoreExpression('writeMissingStatic()', env)).toThrow('undeclared static property'); + }); + + test('keeps records open while class instances are shape-checked', () => { + const result = runCoreRuntime( + handler([ + { type: 'let', props: { name: 'r', value: '{ a: 1 }' } }, + { type: 'assign', props: { target: 'r.b', value: '2' } }, + { type: 'return', props: { value: 'r.b' } }, + ]), + ); + + expect(toHostValue(result.completion.value)).toBe(2); + }); + test('rejects recursive setter assignment', () => { const root = parse( [ From 4decdf4c3a2788f00fbaa19e3d89d8fc7f35a5e9 Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 15:50:07 +0200 Subject: [PATCH 15/46] feat(core): expose class semantic facts --- packages/core/src/index.ts | 15 +- packages/core/src/semantic-substrate.ts | 32 +++ packages/core/src/semantic-validator.ts | 220 +++++++++++++++++- .../core/tests/semantic-substrate.test.ts | 192 +++++++++++++++ 4 files changed, 450 insertions(+), 9 deletions(-) diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 5602ae6c..309fb323 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -447,6 +447,7 @@ export type { KernSemanticSubstrateSource, KernSemanticSubstrateTarget, KernSemanticSupport, + KernSemanticValidationSummary, } from './semantic-substrate.js'; export { buildKernSemanticSubstrate, @@ -454,8 +455,18 @@ export { semanticPrimitiveSupportSummary, } from './semantic-substrate.js'; // Semantic validation -export type { SemanticViolation } from './semantic-validator.js'; -export { validateSemantics } from './semantic-validator.js'; +export type { + ClassSemanticClassFact, + ClassSemanticFacts, + ClassSemanticInheritanceEdge, + ClassSemanticLocation, + ClassSemanticMemberFact, + ClassSemanticMemberKind, + ClassSemanticOverrideFact, + ClassSemanticOverrideStatus, + SemanticViolation, +} from './semantic-validator.js'; +export { collectClassSemanticFacts, validateClassSemantics, validateSemantics } from './semantic-validator.js'; export type { ShadowAnalyzeOptions, ShadowDiagnostic } from './shadow-analyzer.js'; export { analyzeShadow } from './shadow-analyzer.js'; export type { SourceMapV3 } from './source-map.js'; diff --git a/packages/core/src/semantic-substrate.ts b/packages/core/src/semantic-substrate.ts index 98c12293..cccc594e 100644 --- a/packages/core/src/semantic-substrate.ts +++ b/packages/core/src/semantic-substrate.ts @@ -9,6 +9,13 @@ import { import { CORE_TYPE_CONTRACTS, type CoreOperationReturns, contractToGraphEdges } from './core-contracts/index.js'; import type { NodeContract } from './ir/semantics/index.js'; import { snapshotRegistry } from './ir/semantics/index.js'; +import { + type ClassSemanticFacts, + collectClassSemanticFacts, + type SemanticViolation, + validateClassSemantics, +} from './semantic-validator.js'; +import type { IRNode } from './types.js'; export type KernSemanticSubstrateSource = 'codegen-from-ts' | 'native-kern'; export type KernSemanticSubstrateTarget = PortableLogicTarget; @@ -64,6 +71,11 @@ export interface KernSemanticIrContract { readonly fixtureCount: number; } +export interface KernSemanticValidationSummary { + readonly total: number; + readonly byRule: Readonly>; +} + export interface KernSemanticSubstrate { readonly schemaVersion: 1; readonly generatedBy: 'kern-semantic-substrate'; @@ -79,11 +91,15 @@ export interface KernSemanticSubstrate { readonly portablePrimitives: readonly KernSemanticPrimitive[]; readonly stdlibOperations: readonly KernSemanticStdlibOperation[]; readonly irContracts: readonly KernSemanticIrContract[]; + readonly classFacts?: ClassSemanticFacts; + readonly classValidationSummary?: KernSemanticValidationSummary; } export interface BuildKernSemanticSubstrateOptions { readonly source?: KernSemanticSubstrateSource; readonly irContracts?: ReadonlyMap; + readonly documentClasses?: IRNode | readonly IRNode[]; + readonly includeClassValidationSummary?: boolean; } export function buildKernSemanticSubstrate(options: BuildKernSemanticSubstrateOptions = {}): KernSemanticSubstrate { @@ -132,6 +148,10 @@ export function buildKernSemanticSubstrate(options: BuildKernSemanticSubstrateOp fixtureCount: contract.fixtureCount, })) : [], + ...(options.documentClasses ? { classFacts: collectClassSemanticFacts(options.documentClasses) } : {}), + ...(options.documentClasses && options.includeClassValidationSummary + ? { classValidationSummary: semanticValidationSummary(options.documentClasses) } + : {}), }; } @@ -212,6 +232,18 @@ function normalizeReturns(returns: CoreOperationReturns): readonly string[] { return typeof returns === 'string' ? [returns] : [...returns]; } +function semanticValidationSummary(root: IRNode | readonly IRNode[]): KernSemanticValidationSummary { + return summarizeSemanticViolations(validateClassSemantics(root)); +} + +function summarizeSemanticViolations(violations: readonly SemanticViolation[]): KernSemanticValidationSummary { + const byRule: Record = {}; + for (const violation of violations) { + byRule[violation.rule] = (byRule[violation.rule] ?? 0) + 1; + } + return { total: violations.length, byRule }; +} + const KERN_PRIMITIVE_NAMES: Record = { 'collection.has': 'includes', 'collection.count': 'count', diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index b07e6e8f..397309e3 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -30,6 +30,65 @@ export interface SemanticViolation { col?: number; } +export type ClassSemanticMemberKind = 'field' | 'method' | 'getter' | 'setter'; + +export type ClassSemanticOverrideStatus = 'compatible' | 'kind-mismatch' | 'arity-mismatch'; + +export interface ClassSemanticLocation { + readonly line: number; + readonly col: number; +} + +export interface ClassSemanticMemberFact { + readonly className: string; + readonly owner: string; + readonly name: string; + readonly kind: ClassSemanticMemberKind; + readonly static: boolean; + readonly arity: number; + readonly readable: boolean; + readonly writable: boolean; + readonly loc?: ClassSemanticLocation; +} + +export interface ClassSemanticClassFact { + readonly name: string; + readonly baseName?: string; + readonly hasConstructor: boolean; + readonly constructorCount: number; + readonly members: readonly ClassSemanticMemberFact[]; + readonly loc?: ClassSemanticLocation; +} + +export interface ClassSemanticInheritanceEdge { + readonly from: string; + readonly to: string; + readonly relation: 'extends'; + readonly resolved: boolean; + readonly builtin: boolean; +} + +export interface ClassSemanticOverrideFact { + readonly className: string; + readonly memberName: string; + readonly static: boolean; + readonly kind: ClassSemanticMemberKind; + readonly arity: number; + readonly baseClassName: string; + readonly baseKind: ClassSemanticMemberKind; + readonly baseArity: number; + readonly status: ClassSemanticOverrideStatus; + readonly loc?: ClassSemanticLocation; +} + +export interface ClassSemanticFacts { + readonly classes: readonly ClassSemanticClassFact[]; + readonly inheritanceEdges: readonly ClassSemanticInheritanceEdge[]; + readonly overrides: readonly ClassSemanticOverrideFact[]; + readonly unresolvedBases: readonly string[]; + readonly cycles: readonly (readonly string[])[]; +} + /** * Run semantic validation on an IR tree. * Returns an empty array when the tree is valid. @@ -41,6 +100,12 @@ export function validateSemantics(root: IRNode): SemanticViolation[] { return violations; } +export function validateClassSemantics(root: IRNode | readonly IRNode[]): SemanticViolation[] { + const violations: SemanticViolation[] = []; + validateClassGraphRoots(Array.isArray(root) ? root : [root], violations); + return violations; +} + // True when the *innermost* handler ancestor is opted into native body- // statement mode (`lang="kern"`). Body statements like `let`/`assign`/`do`/ // `if`/`try` nest freely inside that scope, so the let-parent rule has to @@ -446,6 +511,7 @@ type ClassMemberKind = 'field' | 'method' | 'getter' | 'setter'; interface ClassInfo { node: IRNode; + rootIndex: number; name: string; baseName?: string; members: ClassMemberInfo[]; @@ -454,6 +520,7 @@ interface ClassInfo { interface ClassMemberInfo { node: IRNode; + owner: string; name: string; kind: ClassMemberKind; static: boolean; @@ -479,21 +546,31 @@ const BODY_EXPRESSION_PROPS = [ ] as const; function validateClassGraph(root: IRNode, violations: SemanticViolation[]): void { - const classes = collectClassInfos(root); + validateClassGraphRoots([root], violations); +} + +function validateClassGraphRoots(roots: readonly IRNode[], violations: SemanticViolation[]): void { + const classesByRoot = roots.map((root, rootIndex) => collectClassInfos(root, rootIndex)); + const classes = classesByRoot.flat(); if (classes.length === 0) return; const classByName = new Map(); - const visibleNames = collectVisibleClassBaseNames(root); + const declaredClassNames = new Set(); for (const info of classes) { const prev = classByName.get(info.name); if (!prev) { classByName.set(info.name, info); } - visibleNames.add(info.name); + declaredClassNames.add(info.name); } + const visibleNamesByRoot = roots.map((root) => { + const visibleNames = collectVisibleClassBaseNames(root); + for (const className of declaredClassNames) visibleNames.add(className); + return visibleNames; + }); for (const info of classes) { - validateClassBaseReference(info, visibleNames, violations); + validateClassBaseReference(info, visibleNamesByRoot[info.rootIndex] ?? declaredClassNames, violations); validateClassConstructors(info, violations); validateClassMemberConflicts(info, violations); validateClassSuperUsage(info, violations); @@ -504,7 +581,7 @@ function validateClassGraph(root: IRNode, violations: SemanticViolation[]): void validateClassShapeUsage(classes, classByName, violations); } -function collectClassInfos(root: IRNode): ClassInfo[] { +function collectClassInfos(root: IRNode, rootIndex = 0): ClassInfo[] { const out: ClassInfo[] = []; walkSemanticTree(root, (node) => { if (node.type !== 'class') return; @@ -512,16 +589,17 @@ function collectClassInfos(root: IRNode): ClassInfo[] { if (!name) return; out.push({ node, + rootIndex, name, baseName: classBaseName(node.props?.extends), - members: collectClassMembers(node), + members: collectClassMembers(node, name), constructors: (node.children ?? []).filter((child) => child.type === 'constructor'), }); }); return out; } -function collectClassMembers(node: IRNode): ClassMemberInfo[] { +function collectClassMembers(node: IRNode, owner: string): ClassMemberInfo[] { const members: ClassMemberInfo[] = []; for (const child of node.children ?? []) { if (!isClassMemberNode(child)) continue; @@ -529,6 +607,7 @@ function collectClassMembers(node: IRNode): ClassMemberInfo[] { if (!name) continue; members.push({ node: child, + owner, name, kind: child.type, static: isTrueFlag(child.props?.static), @@ -538,6 +617,133 @@ function collectClassMembers(node: IRNode): ClassMemberInfo[] { return members; } +export function collectClassSemanticFacts(root: IRNode | readonly IRNode[]): ClassSemanticFacts { + const roots = Array.isArray(root) ? root : [root]; + const classes = roots.flatMap((candidate, rootIndex) => collectClassInfos(candidate, rootIndex)); + const classByName = new Map(); + for (const info of classes) { + if (!classByName.has(info.name)) classByName.set(info.name, info); + } + const visibleNamesByRoot = roots.map((candidate) => collectVisibleClassBaseNames(candidate)); + + const inheritanceEdges: ClassSemanticInheritanceEdge[] = []; + const unresolvedBases = new Set(); + for (const info of classes) { + if (!info.baseName) continue; + const resolved = + classByName.has(info.baseName) || (visibleNamesByRoot[info.rootIndex] ?? BUILTIN_CLASS_BASES).has(info.baseName); + const builtin = BUILTIN_CLASS_BASES.has(info.baseName); + inheritanceEdges.push({ + from: info.name, + to: info.baseName, + relation: 'extends', + resolved, + builtin, + }); + if (!resolved) unresolvedBases.add(info.baseName); + } + + return { + classes: classes.map(classSemanticFact), + inheritanceEdges, + overrides: collectClassOverrideFacts(classes, classByName), + unresolvedBases: [...unresolvedBases].sort(), + cycles: collectClassCycleFacts(classes, classByName), + }; +} + +function classSemanticFact(info: ClassInfo): ClassSemanticClassFact { + return { + name: info.name, + ...(info.baseName ? { baseName: info.baseName } : {}), + hasConstructor: info.constructors.length > 0, + constructorCount: info.constructors.length, + members: info.members.map(classMemberSemanticFact), + ...(info.node.loc ? { loc: semanticLocation(info.node) } : {}), + }; +} + +function classMemberSemanticFact(member: ClassMemberInfo): ClassSemanticMemberFact { + return { + className: member.owner, + owner: member.owner, + name: member.name, + kind: member.kind, + static: member.static, + arity: member.arity, + readable: member.kind === 'field' || member.kind === 'getter' || member.kind === 'method', + writable: member.kind === 'field' || member.kind === 'setter', + ...(member.node.loc ? { loc: semanticLocation(member.node) } : {}), + }; +} + +function collectClassOverrideFacts( + classes: readonly ClassInfo[], + classByName: ReadonlyMap, +): ClassSemanticOverrideFact[] { + const overrides: ClassSemanticOverrideFact[] = []; + for (const info of classes) { + for (const member of info.members) { + const baseMember = findBaseMember(info, member, classByName); + if (!baseMember) continue; + overrides.push({ + className: info.name, + memberName: member.name, + static: member.static, + kind: member.kind, + arity: member.arity, + baseClassName: baseMember.owner, + baseKind: baseMember.kind, + baseArity: baseMember.arity, + status: classOverrideStatus(member, baseMember), + ...(member.node.loc ? { loc: semanticLocation(member.node) } : {}), + }); + } + } + return overrides; +} + +function classOverrideStatus(member: ClassMemberInfo, baseMember: ClassMemberInfo): ClassSemanticOverrideStatus { + if (!sameOverrideKind(member, baseMember)) return 'kind-mismatch'; + if (member.kind === 'method' && baseMember.kind === 'method' && member.arity !== baseMember.arity) { + return 'arity-mismatch'; + } + return 'compatible'; +} + +function collectClassCycleFacts( + classes: readonly ClassInfo[], + classByName: ReadonlyMap, +): readonly (readonly string[])[] { + const cycles: string[][] = []; + const emitted = new Set(); + for (const info of classes) { + const path: string[] = []; + const seen = new Set(); + let current: ClassInfo | undefined = info; + while (current) { + if (seen.has(current.name)) { + const cycleStart = path.indexOf(current.name); + const cycleNames = path.slice(cycleStart); + const cycleKey = normalizedCycleKey(cycleNames); + if (!emitted.has(cycleKey)) { + emitted.add(cycleKey); + cycles.push([...cycleNames, current.name]); + } + break; + } + seen.add(current.name); + path.push(current.name); + current = current.baseName ? classByName.get(current.baseName) : undefined; + } + } + return cycles; +} + +function semanticLocation(node: IRNode): ClassSemanticLocation | undefined { + return node.loc ? { line: node.loc.line, col: node.loc.col } : undefined; +} + function isClassMemberNode(node: IRNode): node is IRNode & { type: ClassMemberKind } { return node.type === 'field' || node.type === 'method' || node.type === 'getter' || node.type === 'setter'; } diff --git a/packages/core/tests/semantic-substrate.test.ts b/packages/core/tests/semantic-substrate.test.ts index a7e44065..a51d2ccd 100644 --- a/packages/core/tests/semantic-substrate.test.ts +++ b/packages/core/tests/semantic-substrate.test.ts @@ -1,10 +1,16 @@ import { buildKernSemanticSubstrate, + collectClassSemanticFacts, lookupSemanticPrimitive, makeEnv, type NodeContract, semanticPrimitiveSupportSummary, } from '../src/index.js'; +import { parseDocumentWithDiagnostics } from '../src/parser.js'; + +function parseRoot(source: string) { + return parseDocumentWithDiagnostics(source).root; +} describe('KERN semantic substrate', () => { test('exports core runtime contracts as reviewable semantic operations', () => { @@ -42,6 +48,192 @@ describe('KERN semantic substrate', () => { ); }); + test('keeps document class facts opt-in for existing review consumers', () => { + const substrate = buildKernSemanticSubstrate(); + + expect(Object.keys(substrate)).toEqual([ + 'schemaVersion', + 'generatedBy', + 'source', + 'coreTypes', + 'coreGraphEdges', + 'portablePrimitives', + 'stdlibOperations', + 'irContracts', + ]); + expect(Object.hasOwn(substrate, 'classFacts')).toBe(false); + expect(Object.hasOwn(substrate, 'classValidationSummary')).toBe(false); + }); + + test('exports document class member inheritance and override facts when requested', () => { + const root = parseRoot( + [ + 'class name=Base', + ' field name=id type=string', + ' method name=load returns=string', + ' param name=id type=string', + ' getter name=label returns=string', + 'class name=Derived extends=Base', + ' constructor', + ' handler lang=kern', + ' do value="super()"', + ' method name=load returns=string', + ' param name=id type=string', + ' param name=extra type=string', + ' field name=count type=number static=true', + ' setter name=label', + ' param name=value type=string', + ].join('\n'), + ); + + const substrate = buildKernSemanticSubstrate({ documentClasses: root }); + + expect(substrate.classFacts?.inheritanceEdges).toEqual([ + { from: 'Derived', to: 'Base', relation: 'extends', resolved: true, builtin: false }, + ]); + expect(substrate.classFacts?.unresolvedBases).toEqual([]); + + const derived = substrate.classFacts?.classes.find((candidate) => candidate.name === 'Derived'); + expect(derived).toEqual( + expect.objectContaining({ + name: 'Derived', + baseName: 'Base', + hasConstructor: true, + constructorCount: 1, + }), + ); + expect(derived?.members).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + owner: 'Derived', + name: 'count', + kind: 'field', + static: true, + arity: 0, + readable: true, + writable: true, + }), + expect.objectContaining({ + owner: 'Derived', + name: 'label', + kind: 'setter', + static: false, + arity: 1, + readable: false, + writable: true, + }), + ]), + ); + + expect(substrate.classFacts?.overrides).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + className: 'Derived', + memberName: 'load', + baseClassName: 'Base', + baseKind: 'method', + kind: 'method', + arity: 2, + baseArity: 1, + status: 'arity-mismatch', + }), + expect.objectContaining({ + className: 'Derived', + memberName: 'label', + baseClassName: 'Base', + baseKind: 'getter', + kind: 'setter', + status: 'compatible', + }), + ]), + ); + }); + + test('reports unresolved bases and inheritance cycles as class facts', () => { + const facts = collectClassSemanticFacts( + parseRoot( + ['class name=UsesExternal extends=ExternalBase', 'class name=A extends=B', 'class name=B extends=A'].join('\n'), + ), + ); + + expect(facts.unresolvedBases).toEqual(['ExternalBase']); + expect(facts.inheritanceEdges).toEqual( + expect.arrayContaining([ + { from: 'UsesExternal', to: 'ExternalBase', relation: 'extends', resolved: false, builtin: false }, + { from: 'A', to: 'B', relation: 'extends', resolved: true, builtin: false }, + ]), + ); + expect(facts.cycles).toEqual([['A', 'B', 'A']]); + }); + + test('resolves imported and cross-root class bases consistently with validation', () => { + const importedFacts = collectClassSemanticFacts( + parseRoot(['import from="./base" names=ExternalBase', 'class name=UsesExternal extends=ExternalBase'].join('\n')), + ); + expect(importedFacts.unresolvedBases).toEqual([]); + expect(importedFacts.inheritanceEdges).toEqual([ + { from: 'UsesExternal', to: 'ExternalBase', relation: 'extends', resolved: true, builtin: false }, + ]); + + const importedElsewhere = collectClassSemanticFacts([ + parseRoot('import from="./base" names=ExternalBase'), + parseRoot('class name=Leaky extends=ExternalBase'), + ]); + expect(importedElsewhere.unresolvedBases).toEqual(['ExternalBase']); + expect(importedElsewhere.inheritanceEdges).toEqual([ + { from: 'Leaky', to: 'ExternalBase', relation: 'extends', resolved: false, builtin: false }, + ]); + + const baseRoot = parseRoot('class name=Base'); + const childRoot = parseRoot('class name=Child extends=Base'); + const substrate = buildKernSemanticSubstrate({ + documentClasses: [baseRoot, childRoot], + includeClassValidationSummary: true, + }); + + expect(substrate.classFacts?.inheritanceEdges).toEqual([ + { from: 'Child', to: 'Base', relation: 'extends', resolved: true, builtin: false }, + ]); + expect(substrate.classValidationSummary?.byRule['class-extends-unknown']).toBeUndefined(); + + const invalidSubstrate = buildKernSemanticSubstrate({ + documentClasses: [baseRoot, parseRoot('class name=Broken extends=Missing')], + includeClassValidationSummary: true, + }); + expect(invalidSubstrate.classValidationSummary?.byRule['class-extends-unknown']).toBe(1); + }); + + test('can summarize class validation rules alongside class facts', () => { + const root = parseRoot( + [ + 'class name=Base', + 'class name=Bad extends=Base', + ' constructor', + ' handler lang=kern', + ' do value="super()"', + ' constructor', + ' handler lang=kern', + ' do value="super()"', + 'machine name=Flow', + ' transition name=go from=Missing to=Missing', + ].join('\n'), + ); + + const substrate = buildKernSemanticSubstrate({ + documentClasses: root, + includeClassValidationSummary: true, + }); + + expect(substrate.classFacts?.classes.find((candidate) => candidate.name === 'Bad')?.constructorCount).toBe(2); + expect(substrate.classValidationSummary?.total).toBeGreaterThan(0); + expect(substrate.classValidationSummary?.byRule).toEqual( + expect.objectContaining({ + 'class-single-constructor-only': 1, + }), + ); + expect(substrate.classValidationSummary?.byRule['machine-transition-from']).toBeUndefined(); + }); + test('exports portable review primitives as stable query objects', () => { const substrate = buildKernSemanticSubstrate(); const clamp = lookupSemanticPrimitive(substrate, 'number.clamp'); From 6ea95d18fefc0a34f8855ca53a92b4c6b04be184 Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 16:24:51 +0200 Subject: [PATCH 16/46] test(core): expand class object conformance --- examples/native-test/conformance-classes.kern | 63 +++++++++++++++++++ .../native-test/conformance-classes.test.kern | 19 ++++++ packages/test/src/index.ts | 24 ++++--- 3 files changed, 96 insertions(+), 10 deletions(-) diff --git a/examples/native-test/conformance-classes.kern b/examples/native-test/conformance-classes.kern index a1e84ff1..3a5dffad 100644 --- a/examples/native-test/conformance-classes.kern +++ b/examples/native-test/conformance-classes.kern @@ -1,9 +1,16 @@ class name=UserDirectory export=true field name=items type="object[]" private=true value={{ [] }} + field name=kind type=string static=true value={{ "directory" }} + field name=labelStore type=string static=true value={{ "UserDirectory" }} + field name=empty type=object static=true value={{ new UserDirectory([]) }} constructor param name=initial type="object[]" value={{ [] }} handler assign target="this.items" value="initial" + method name=make static=true returns=object + param name=users type="object[]" + handler + return value="new UserDirectory(users)" method name=list returns="object[]" handler return value="this.items" @@ -16,6 +23,41 @@ class name=UserDirectory export=true getter name=count returns=number handler return value="this.items.length" + getter name=label static=true returns=string + handler + return value="this.labelStore" + setter name=label static=true + param name=next type=string + handler + assign target="this.labelStore" value="next" + setter name=users + param name=next type="object[]" + handler + assign target="this.items" value="next" + +class name=AuditedDirectory extends=UserDirectory export=true + field name=audit type=string value={{ "" }} + field name=kind type=string static=true value={{ "audited" }} + constructor + param name=initial type="object[]" value={{ [] }} + param name=audit type=string value={{ "sync" }} + handler + do value="super(initial)" + assign target="this.audit" value="audit" + method name=describe returns=string + handler + return value="`${this.audit}:${super.active().length}:${this.count}`" + getter name=summary returns=object + handler + return value="{ total: this.count, active: super.active().length, audit: this.audit }" + getter name=label static=true returns=string + handler + return value="`audited:${super.label}`" + method name=makeAudited static=true returns=object + param name=users type="object[]" + param name=audit type=string value={{ "sync" }} + handler + return value="new AuditedDirectory(users, audit)" fn name=makeDirectory returns=object param name=users type="object[]" @@ -28,4 +70,25 @@ fn name=activeNames returns="string[]" let name=directory value="makeDirectory(users)" return value="directory.names()" +fn name=setDirectoryLabel returns=string + param name=label type=string + handler + assign target="UserDirectory.label" value="label" + return value="UserDirectory.label" + +fn name=auditedDescription returns=string + param name=users type="object[]" + param name=audit type=string + handler + return value="AuditedDirectory.makeAudited(users, audit).describe()" + +fn name=resetAuditedCount returns=number + param name=users type="object[]" + param name=next type="object[]" + handler + let name=directory value="new AuditedDirectory(users, 'reset')" + assign target="directory.users" value="next" + return value="directory.count" + derive name=emptyDirectoryCount expr={{new UserDirectory([]).count}} +derive name=emptyAuditedSummary expr={{new AuditedDirectory([], "empty").summary}} diff --git a/examples/native-test/conformance-classes.test.kern b/examples/native-test/conformance-classes.test.kern index 29fc73f8..3e03b870 100644 --- a/examples/native-test/conformance-classes.test.kern +++ b/examples/native-test/conformance-classes.test.kern @@ -1,5 +1,6 @@ test name="Class and function conformance" target="./conformance-classes.kern" fixture name=sampleUsers value={{[{ id: "u1", name: "Ada", active: true }, { id: "u2", name: "Grace", active: false }, { id: "u3", name: "Lin", active: true }]}} + fixture name=nextUsers value={{[{ id: "u4", name: "Katherine", active: true }]}} it name="class target stays schema and semantic valid" expect no=schemaViolations @@ -10,17 +11,35 @@ test name="Class and function conformance" target="./conformance-classes.kern" expect node=class name=UserDirectory child=constructor expect node=class name=UserDirectory child=method childName=active expect node=class name=UserDirectory child=getter childName=count + expect node=class name=UserDirectory child=field childName=kind + expect node=class name=AuditedDirectory child=method childName=describe + expect node=class name=AuditedDirectory child=getter childName=summary expect node=fn name=activeNames child=param count=1 it name="classes execute before codegen" expect expr={{new UserDirectory(sampleUsers).count}} equals=3 expect expr={{new UserDirectory(sampleUsers).active().map((user) => user.id)}} equals={{["u1", "u3"]}} expect expr={{new UserDirectory(sampleUsers).names()}} equals={{["Ada", "Lin"]}} + expect expr={{new AuditedDirectory(sampleUsers, "sync").summary}} equals={{({ total: 3, active: 2, audit: "sync" })}} + expect expr={{new AuditedDirectory(sampleUsers, "sync").describe()}} equals={{"sync:2:3"}} + + it name="static members and inherited dispatch execute before codegen" + expect expr={{UserDirectory.kind}} equals={{"directory"}} + expect expr={{UserDirectory.label}} equals={{"UserDirectory"}} + expect expr={{UserDirectory.empty.count}} equals=0 + expect expr={{UserDirectory.make(sampleUsers).count}} equals=3 + expect expr={{AuditedDirectory.kind}} equals={{"audited"}} + expect expr={{AuditedDirectory.label}} equals={{"audited:UserDirectory"}} + expect expr={{AuditedDirectory.makeAudited(sampleUsers, "sync").describe()}} equals={{"sync:2:3"}} it name="functions compose class behavior" expect fn=makeDirectory with=sampleUsers equals={{new UserDirectory(sampleUsers)}} expect fn=activeNames with=sampleUsers equals={{["Ada", "Lin"]}} + expect fn=setDirectoryLabel with={{"Directory"}} equals={{"Directory"}} + expect fn=auditedDescription args={{[sampleUsers, "sync"]}} equals={{"sync:2:3"}} + expect fn=resetAuditedCount args={{[sampleUsers, nextUsers]}} equals=1 expect derive=emptyDirectoryCount equals=0 + expect derive=emptyAuditedSummary equals={{({ total: 0, active: 0, audit: "empty" })}} it name="classes and functions reach core codegen" expect no=codegenErrors diff --git a/packages/test/src/index.ts b/packages/test/src/index.ts index 9dbfa0ad..d521e382 100644 --- a/packages/test/src/index.ts +++ b/packages/test/src/index.ts @@ -2977,15 +2977,16 @@ function runtimeHandlerLines(node: IRNode, spaces = 4): string[] { return code.split('\n').map((line) => `${prefix}${line}`); } -function runtimeClassFieldInitializers(node: IRNode): string[] { +function runtimeClassFieldLines(node: IRNode): string[] | undefined { const lines: string[] = []; for (const field of getChildren(node, 'field')) { const props = getProps(field); - if (isTruthy(props.static)) continue; const name = str(props.name); - if (!isRuntimeBindingName(name)) return []; + if (!isRuntimeBindingName(name)) return undefined; const value = exprPropToRuntimeSource(field, 'value') || rawPropToRuntimeSource(field, 'default'); - if (value) lines.push(` this.${name} = (${value});`); + if (!value) continue; + const staticKw = isTruthy(props.static) ? 'static ' : ''; + lines.push(` ${staticKw}${name} = (${value});`); } return lines; } @@ -3034,17 +3035,20 @@ function runtimeClassSetterLines(node: IRNode): string[] | undefined { function runtimeClassExpr(node: IRNode): string { const name = str(getProps(node).name); if (!isRuntimeBindingName(name)) return ''; + const baseName = str(getProps(node).extends); + if (baseName && !isRuntimeBindingName(baseName)) return ''; const ctorNode = getChildren(node, 'constructor')[0]; const ctorParams = ctorNode ? runtimeParamNames(ctorNode) : []; if (!ctorParams.every(isRuntimeBindingName)) return ''; - const fieldInitializers = runtimeClassFieldInitializers(node); - const lines = ['(class {']; - if (ctorNode || fieldInitializers.length > 0) { + const fieldLines = runtimeClassFieldLines(node); + if (!fieldLines) return ''; + const lines = [`(class ${name}${baseName ? ` extends ${baseName}` : ''} {`]; + lines.push(...fieldLines); + if (ctorNode) { lines.push(` constructor(${ctorParams.join(', ')}) {`); - lines.push(...fieldInitializers); - if (ctorNode) lines.push(...runtimeHandlerLines(ctorNode)); + lines.push(...runtimeHandlerLines(ctorNode)); lines.push(' }'); } @@ -4429,7 +4433,7 @@ function orderRuntimeBindings(bindings: RuntimeBinding[], entryExpr: string): Ru visiting.add(name); stack.push(name); for (const dep of depsIn(binding.expr)) { - if (dep === name && binding.kind === 'fn') continue; + if (dep === name && (binding.kind === 'fn' || binding.kind === 'class')) continue; const error = visit(dep); if (error) return error; } From eebf2942154509bf790e1e1b01ad25cec02835bb Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 17:02:39 +0200 Subject: [PATCH 17/46] test(core): add object protocol negative conformance --- .../native-test/conformance-bad-cases.kern | 105 ++++++++++++++++++ .../conformance-bad-cases.test.kern | 17 +++ packages/test/src/index.ts | 73 ++++++------ packages/test/tests/native-test.test.ts | 62 +++++++++++ 4 files changed, 224 insertions(+), 33 deletions(-) diff --git a/examples/native-test/conformance-bad-cases.kern b/examples/native-test/conformance-bad-cases.kern index 412b3d20..fa25261d 100644 --- a/examples/native-test/conformance-bad-cases.kern +++ b/examples/native-test/conformance-bad-cases.kern @@ -61,3 +61,108 @@ fn name=loadRemote handler <<< return fetch(url); >>> + +class name=UnknownBase extends=MissingProtocolBase + +class name=CycleA extends=CycleB +class name=CycleB extends=CycleA + +class name=MultiCtor + constructor + handler + do value="1" + constructor + handler + do value="2" + +class name=MemberConflict + field name=value type=number + method name=value returns=number + handler + return value=1 + +class name=PlainSuper + method name=kind returns=string + handler + return value="super.kind()" + +class name=ProtocolBase + field name=id type=string + method name=load returns=string + param name=id type=string + handler + return value=id + field name=status type=string + +class name=MissingSuper extends=ProtocolBase + constructor + handler + do value=1 + +class name=ThisBeforeSuper extends=ProtocolBase + field name=label type=string + constructor + handler + assign target="this.label" value="'early'" + do value="super('u1')" + +class name=DoubleSuper extends=ProtocolBase + constructor + handler + do value="super('u1')" + do value="super('u2')" + +class name=ConditionalSuper extends=ProtocolBase + constructor + param name=ready type=boolean + handler + if cond=ready + do value="super('u1')" + +class name=ClosedShape + field name=known type=number + method name=readMissing returns=number + handler + return value="this.missing" + method name=writeMissing returns=void + handler + assign target="this.missing" value=2 + +class name=ShapeBase + field name=known type=number + +class name=ShapeChild extends=ShapeBase + method name=readMissingSuper returns=number + handler + return value="super.missing" + +class name=StaticShape + field name=instanceOnly type=number + field name=staticOnly type=number static=true + method name=badInstance returns=number + handler + return value="this.staticOnly" + method name=badStatic returns=number static=true + handler + return value="this.instanceOnly" + +class name=AccessShape + setter name=writeOnly + param name=next type=number + handler + do value=next + getter name=readOnly returns=number + handler + return value=1 + method name=misuseAccessors returns=number + handler + assign target="this.readOnly" value=2 + return value="this.writeOnly" + +class name=OverrideShape extends=ProtocolBase + method name=load returns=string + handler + return value="'missing id'" + method name=status returns=string + handler + return value="'ok'" diff --git a/examples/native-test/conformance-bad-cases.test.kern b/examples/native-test/conformance-bad-cases.test.kern index 482f73dc..ab8b8c51 100644 --- a/examples/native-test/conformance-bad-cases.test.kern +++ b/examples/native-test/conformance-bad-cases.test.kern @@ -17,3 +17,20 @@ test name="Bad KERN conformance" target="./conformance-bad-cases.kern" coverage= expect has=routePathParams count=1 matches="id" expect has=effectWithoutCleanup count=1 matches="cleanup" expect has=unrecoveredAsync count=1 matches="recover" + expect has=semanticViolations matches="extends unknown base 'MissingProtocolBase'" + expect has=semanticViolations matches="Class inheritance cycle detected: CycleA -> CycleB -> CycleA" + expect has=semanticViolations matches="declares more than one constructor" + expect has=semanticViolations matches="conflicting instance member 'value'" + expect has=semanticViolations matches="uses .*super.* does not extend a base class" + expect has=semanticViolations matches="constructor does not call .*super" + expect has=semanticViolations matches="member access before .*super" + expect has=semanticViolations matches="calls .*super.* more than once" + expect has=semanticViolations matches="must call .*super.* definitely on every path" + expect has=semanticViolations matches="this.missing is not declared on the instance class shape" + expect has=semanticViolations matches="super.missing is not declared on the instance class shape" + expect has=semanticViolations matches="this.staticOnly is not declared on the instance class shape" + expect has=semanticViolations matches="this.instanceOnly is not declared on the static class shape" + expect has=semanticViolations matches="reads setter-only this.writeOnly" + expect has=semanticViolations matches="writes non-writable this.readOnly" + expect has=semanticViolations matches="overrides base field with method" + expect has=semanticViolations matches="overrides a base method with 1 parameter" diff --git a/packages/test/src/index.ts b/packages/test/src/index.ts index d521e382..21d2849b 100644 --- a/packages/test/src/index.ts +++ b/packages/test/src/index.ts @@ -6111,6 +6111,28 @@ function nativeInvariantFindings( return { message: `Unsupported native invariant: ${propName}=${str(props.has) || str(props.no)}` }; } +function evaluateFindingsMatch( + invariant: string, + pattern: string, + findings: readonly string[], +): { passed: boolean; message?: string } { + const message = findings.join('; '); + try { + const regex = new RegExp(pattern); + return findings.some((finding) => regex.test(finding)) + ? { passed: true } + : { + passed: false, + message: `Expected ${invariant || ''} findings to match /${pattern}/, got: ${message || ''}`, + }; + } catch (error) { + return { + passed: false, + message: `Native has assertion has invalid matches regex: ${error instanceof Error ? error.message : String(error)}`, + }; + } +} + function evaluateHasInvariant( node: IRNode, target: LoadedKernDocument, @@ -6143,21 +6165,7 @@ function evaluateHasInvariant( if ('matches' in props) { const pattern = runtimePatternValue(node, 'matches') || ''; - const message = findings.join('; '); - try { - const regex = new RegExp(pattern); - return regex.test(message) - ? { passed: true } - : { - passed: false, - message: `Expected ${invariant || ''} findings to match /${pattern}/, got: ${message || ''}`, - }; - } catch (error) { - return { - passed: false, - message: `Native has assertion has invalid matches regex: ${error instanceof Error ? error.message : String(error)}`, - }; - } + return evaluateFindingsMatch(invariant, pattern, findings); } return { passed: true }; @@ -6167,6 +6175,23 @@ function evaluateHasInvariant( const blocking = targetBlockingMessage(target); if (blocking) return { passed: false, message: blocking }; } + + if ('matches' in props) { + const collected = nativeInvariantFindings(node, target, context); + if (collected.message) return { passed: false, message: collected.message }; + + const findings = collected.findings || []; + if (findings.length === 0) { + return { + passed: false, + message: `Expected target to have ${invariant || ''}, but none was found`, + }; + } + + const pattern = runtimePatternValue(node, 'matches') || ''; + return evaluateFindingsMatch(invariant, pattern, findings); + } + const evaluated = evaluateNoInvariant(nodeWithProps(node, { ...props, no: invariant }), target, context); if (isAssertionConfigurationFailure(evaluated.message)) { @@ -6180,24 +6205,6 @@ function evaluateHasInvariant( }; } - if ('matches' in props) { - const pattern = runtimePatternValue(node, 'matches') || ''; - try { - const regex = new RegExp(pattern); - return regex.test(evaluated.message || '') - ? { passed: true } - : { - passed: false, - message: `Expected ${invariant || ''} message to match /${pattern}/, got: ${evaluated.message || ''}`, - }; - } catch (error) { - return { - passed: false, - message: `Native has assertion has invalid matches regex: ${error instanceof Error ? error.message : String(error)}`, - }; - } - } - return { passed: true }; } diff --git a/packages/test/tests/native-test.test.ts b/packages/test/tests/native-test.test.ts index 40ce986b..45f79f2f 100644 --- a/packages/test/tests/native-test.test.ts +++ b/packages/test/tests/native-test.test.ts @@ -148,6 +148,68 @@ describe('native kern test runner', () => { expect(summary.results.map((result) => result.ruleId)).toEqual(['has:duplicatenames', 'has:derivecycles']); }); + test('matches positive semantic invariant assertions against all findings', () => { + writeFileSync( + join(tmpDir, 'bad-semantics.kern'), + [ + 'class name=UnknownBase extends=MissingBase', + 'class name=MultiCtor', + ' constructor', + ' handler', + ' do value=1', + ' constructor', + ' handler', + ' do value=2', + ].join('\n'), + ); + const testFile = join(tmpDir, 'bad-semantics.test.kern'); + writeFileSync( + testFile, + [ + 'test name="Bad semantic target" target="./bad-semantics.kern"', + ' it name="matches non-first semantic violation"', + ' expect has=semanticViolations matches="declares more than one constructor"', + ].join('\n'), + ); + + const summary = runNativeKernTests(testFile); + + expect(summary.failed).toBe(0); + expect(summary.passed).toBe(1); + expect(summary.results[0].ruleId).toBe('has:semanticviolations'); + }); + + test('does not match positive invariant regexes across unrelated findings', () => { + writeFileSync( + join(tmpDir, 'bad-semantics-span.kern'), + [ + 'class name=UnknownBase extends=MissingBase', + 'class name=MultiCtor', + ' constructor', + ' handler', + ' do value=1', + ' constructor', + ' handler', + ' do value=2', + ].join('\n'), + ); + const testFile = join(tmpDir, 'bad-semantics-span.test.kern'); + writeFileSync( + testFile, + [ + 'test name="Bad semantic target" target="./bad-semantics-span.kern"', + ' it name="does not span diagnostics"', + ' expect has=semanticViolations matches="MissingBase.*more than one constructor"', + ].join('\n'), + ); + + const summary = runNativeKernTests(testFile); + + expect(summary.failed).toBe(1); + expect(summary.results[0].ruleId).toBe('has:semanticviolations'); + expect(summary.results[0].message).toContain('findings to match'); + }); + test('fails positive invariant assertions with incorrect expected counts', () => { writeFileSync( join(tmpDir, 'bad-count.kern'), From 732a8f96ad3ac615f91ed012020357b3768b9f71 Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 18:21:17 +0200 Subject: [PATCH 18/46] feat(core): add rag language contracts --- .../conformance-rag-bad-cases.kern | 16 + .../conformance-rag-bad-cases.test.kern | 17 + examples/native-test/conformance-rag.kern | 11 + .../native-test/conformance-rag.test.kern | 12 + packages/core/src/codegen-core.ts | 21 + packages/core/src/index.ts | 18 +- packages/core/src/schema.ts | 108 +++ packages/core/src/semantic-substrate.ts | 15 + packages/core/src/semantic-validator.ts | 796 ++++++++++++++++++ packages/core/src/spec.ts | 9 + packages/core/tests/rag-semantics.test.ts | 241 ++++++ packages/core/tests/schema-validation.test.ts | 43 + .../core/tests/semantic-substrate.test.ts | 55 ++ 13 files changed, 1361 insertions(+), 1 deletion(-) create mode 100644 examples/native-test/conformance-rag-bad-cases.kern create mode 100644 examples/native-test/conformance-rag-bad-cases.test.kern create mode 100644 examples/native-test/conformance-rag.kern create mode 100644 examples/native-test/conformance-rag.test.kern create mode 100644 packages/core/tests/rag-semantics.test.ts diff --git a/examples/native-test/conformance-rag-bad-cases.kern b/examples/native-test/conformance-rag-bad-cases.kern new file mode 100644 index 00000000..e989e2e8 --- /dev/null +++ b/examples/native-test/conformance-rag-bad-cases.kern @@ -0,0 +1,16 @@ +corpus name=Docs + source name=manuals uri="./docs/**/*.md" + chunking source=missing strategy=semantic maxTokens=64 overlap=64 + +embed name=BadEmbedding corpus=Missing dims=0 +embed name=OtherEmbedding corpus=Docs + +corpus name=OtherDocs + +retriever name=BadRetriever corpus=Missing embed=MissingEmbed topK=0 minScore=1.1 +retriever name=MismatchRetriever corpus=OtherDocs embed=OtherEmbedding + +rag name=BadRag retriever=MissingRetriever citations=true + +grounding rag=MissingRag maxContext=0 +ragEval rag=MissingRag threshold=1.1 diff --git a/examples/native-test/conformance-rag-bad-cases.test.kern b/examples/native-test/conformance-rag-bad-cases.test.kern new file mode 100644 index 00000000..c6fb8223 --- /dev/null +++ b/examples/native-test/conformance-rag-bad-cases.test.kern @@ -0,0 +1,17 @@ +test name="Bad RAG conformance" target="./conformance-rag-bad-cases.kern" coverage=false + it name="rag semantic assertions prove detector coverage" + expect has=semanticViolations matches="RAG chunking references unknown source 'missing'" + expect has=semanticViolations matches="RAG chunking overlap must be smaller than maxTokens" + expect has=semanticViolations matches="RAG embed 'BadEmbedding' references unknown corpus 'Missing'" + expect has=semanticViolations matches="RAG embed dims must be a positive integer" + expect has=semanticViolations matches="RAG retriever 'BadRetriever' references unknown corpus 'Missing'" + expect has=semanticViolations matches="RAG retriever 'BadRetriever' references unknown embed 'MissingEmbed'" + expect has=semanticViolations matches="RAG retriever topK must be a positive integer" + expect has=semanticViolations matches="RAG retriever minScore must be between 0 and 1" + expect has=semanticViolations matches="RAG retriever 'MismatchRetriever' uses embed 'OtherEmbedding'" + expect has=semanticViolations matches="RAG pipeline 'BadRag' references unknown retriever 'MissingRetriever'" + expect has=semanticViolations matches="RAG pipeline 'BadRag' requires citations" + expect has=semanticViolations matches="RAG grounding references unknown rag 'MissingRag'" + expect has=semanticViolations matches="RAG grounding maxContext must be a positive integer" + expect has=semanticViolations matches="RAG eval references unknown rag 'MissingRag'" + expect has=semanticViolations matches="RAG eval threshold must be between 0 and 1" diff --git a/examples/native-test/conformance-rag.kern b/examples/native-test/conformance-rag.kern new file mode 100644 index 00000000..652d877f --- /dev/null +++ b/examples/native-test/conformance-rag.kern @@ -0,0 +1,11 @@ +corpus name=Docs title="Support docs" + source name=manuals kind=local uri="./docs/**/*.md" media=markdown + chunking source=manuals strategy=semantic maxTokens=600 overlap=80 unit=tokens + +embed name=DocsEmbedding corpus=Docs model=text-embedding-3-small dims=1536 metric=cosine + +retriever name=DocsSearch corpus=Docs embed=DocsEmbedding mode=hybrid topK=8 minScore=0.72 + +rag name=AnswerDocs retriever=DocsSearch prompt="./answer.md" citations=true + grounding name=StrictGrounding requireCitations=true policy=strict maxContext=6000 + ragEval name=Faithfulness metric=faithfulness threshold=0.85 diff --git a/examples/native-test/conformance-rag.test.kern b/examples/native-test/conformance-rag.test.kern new file mode 100644 index 00000000..89b7ac9c --- /dev/null +++ b/examples/native-test/conformance-rag.test.kern @@ -0,0 +1,12 @@ +test name="RAG language conformance" target="./conformance-rag.kern" coverage=false + it name="rag declarations stay schema and semantic valid" + expect no=schemaViolations + expect no=semanticViolations + + it name="rag declarations keep their KERN shape" + expect node=corpus name=Docs child=source childName=manuals + expect node=corpus name=Docs child=chunking + expect node=embed name=DocsEmbedding prop=corpus is=Docs + expect node=retriever name=DocsSearch prop=topK is=8 + expect node=rag name=AnswerDocs child=grounding childName=StrictGrounding + expect node=rag name=AnswerDocs child=ragEval childName=Faithfulness diff --git a/packages/core/src/codegen-core.ts b/packages/core/src/codegen-core.ts index 4a3e962e..7d49750f 100644 --- a/packages/core/src/codegen-core.ts +++ b/packages/core/src/codegen-core.ts @@ -721,6 +721,15 @@ export const CORE_NODE_TYPES = new Set([ 'evidence', // Confidence layer 'needs', + // RAG contract layer + 'corpus', + 'source', + 'chunking', + 'embed', + 'retriever', + 'rag', + 'grounding', + 'ragEval', // Backend data layer (graduated nodes) 'model', 'column', @@ -1010,6 +1019,18 @@ export function generateCoreNode(node: IRNode, target?: string, runtime?: KernRu return []; case 'needs': return []; + // RAG declarations are semantic contracts consumed by validators, + // substrate, MCP/review tooling, and future adapters. They intentionally + // emit no JavaScript in core codegen. + case 'corpus': + case 'source': + case 'chunking': + case 'embed': + case 'retriever': + case 'rag': + case 'grounding': + case 'ragEval': + return []; // Graduated nodes — backend data layer case 'model': return generateModel(node); diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 309fb323..835b6ff0 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -464,9 +464,25 @@ export type { ClassSemanticMemberKind, ClassSemanticOverrideFact, ClassSemanticOverrideStatus, + RagSemanticChunkingFact, + RagSemanticCorpusFact, + RagSemanticEmbedFact, + RagSemanticEvalFact, + RagSemanticFacts, + RagSemanticGroundingFact, + RagSemanticLocation, + RagSemanticPipelineFact, + RagSemanticRetrieverFact, + RagSemanticSourceFact, SemanticViolation, } from './semantic-validator.js'; -export { collectClassSemanticFacts, validateClassSemantics, validateSemantics } from './semantic-validator.js'; +export { + collectClassSemanticFacts, + collectRagSemanticFacts, + validateClassSemantics, + validateRagSemantics, + validateSemantics, +} from './semantic-validator.js'; export type { ShadowAnalyzeOptions, ShadowDiagnostic } from './shadow-analyzer.js'; export { analyzeShadow } from './shadow-analyzer.js'; export type { SourceMapV3 } from './source-map.js'; diff --git a/packages/core/src/schema.ts b/packages/core/src/schema.ts index e10b6ae4..3172e8e1 100644 --- a/packages/core/src/schema.ts +++ b/packages/core/src/schema.ts @@ -2381,6 +2381,114 @@ export const NODE_SCHEMAS: Record = { }, }, + // ── RAG (retrieval-augmented generation) contract nodes ───────────── + + corpus: { + description: + 'RAG corpus declaration — names a document collection and its source/chunking contract without binding to a provider runtime.', + example: + 'corpus name=Docs title="Support docs"\n source name=manuals kind=local uri="./docs/**/*.md"\n chunking strategy=semantic maxTokens=600 overlap=80', + props: { + name: { required: true, kind: 'identifier' }, + title: { kind: 'string' }, + tenant: { kind: 'identifier' }, + refresh: { kind: 'string' }, + }, + allowedChildren: ['source', 'chunking'], + }, + source: { + description: 'RAG corpus source — a raw document location such as local files, S3, HTTP, or an MCP resource.', + example: 'source name=manuals kind=local uri="./docs/**/*.md" media=markdown', + props: { + name: { kind: 'identifier' }, + kind: { kind: 'identifier' }, + uri: { required: true, kind: 'string' }, + media: { kind: 'identifier' }, + acl: { kind: 'identifier' }, + }, + allowedChildren: [], + }, + chunking: { + description: + 'RAG chunking policy — describes document segmentation. Named `chunking` to avoid colliding with the collection `chunk` primitive.', + example: 'chunking corpus=Docs source=manuals strategy=semantic maxTokens=600 overlap=80 unit=tokens', + props: { + name: { kind: 'identifier' }, + corpus: { kind: 'identifier' }, + source: { kind: 'identifier' }, + strategy: { kind: 'identifier' }, + maxTokens: { kind: 'number' }, + overlap: { kind: 'number' }, + unit: { kind: 'identifier' }, + }, + allowedChildren: [], + }, + embed: { + description: + 'RAG embedding contract — names the embedding model/dimension contract for a corpus. Provider execution is adapter-owned.', + example: 'embed name=DocsEmbedding corpus=Docs model=text-embedding-3-small dims=1536 metric=cosine', + props: { + name: { required: true, kind: 'identifier' }, + corpus: { required: true, kind: 'identifier' }, + model: { kind: 'string' }, + dims: { kind: 'number' }, + metric: { kind: 'identifier' }, + }, + allowedChildren: [], + }, + retriever: { + description: + 'RAG retriever declaration — binds a corpus and optional embedding contract to search policy such as topK/minScore.', + example: 'retriever name=DocsSearch corpus=Docs embed=DocsEmbedding mode=hybrid topK=8 minScore=0.72', + props: { + name: { required: true, kind: 'identifier' }, + corpus: { required: true, kind: 'identifier' }, + embed: { kind: 'identifier' }, + mode: { kind: 'identifier' }, + topK: { kind: 'number' }, + minScore: { kind: 'number' }, + rerank: { kind: 'string' }, + }, + allowedChildren: [], + }, + rag: { + description: + 'RAG pipeline declaration — connects a query/answer flow to a retriever and grounding/evaluation requirements.', + example: + 'rag name=AnswerDocs retriever=DocsSearch prompt="./answer.md" citations=true\n grounding requireCitations=true policy=strict\n ragEval metric=faithfulness threshold=0.85', + props: { + name: { required: true, kind: 'identifier' }, + retriever: { required: true, kind: 'identifier' }, + prompt: { kind: 'string' }, + answer: { kind: 'string' }, + citations: { kind: 'boolean' }, + }, + allowedChildren: ['grounding', 'ragEval'], + }, + grounding: { + description: 'RAG grounding policy — declares citation and context constraints for a RAG pipeline.', + example: 'grounding rag=AnswerDocs requireCitations=true policy=strict maxContext=6000', + props: { + name: { kind: 'identifier' }, + rag: { kind: 'identifier' }, + requireCitations: { kind: 'boolean' }, + policy: { kind: 'identifier' }, + maxContext: { kind: 'number' }, + }, + allowedChildren: [], + }, + ragEval: { + description: 'RAG evaluation contract — declares a metric threshold for a RAG pipeline.', + example: 'ragEval rag=AnswerDocs metric=faithfulness threshold=0.85', + props: { + name: { kind: 'identifier' }, + rag: { kind: 'identifier' }, + metric: { kind: 'identifier' }, + threshold: { kind: 'number' }, + }, + allowedChildren: [], + }, + // ── React / UI element nodes ────────────────────────────────────────── screen: { diff --git a/packages/core/src/semantic-substrate.ts b/packages/core/src/semantic-substrate.ts index cccc594e..b4bca982 100644 --- a/packages/core/src/semantic-substrate.ts +++ b/packages/core/src/semantic-substrate.ts @@ -12,8 +12,11 @@ import { snapshotRegistry } from './ir/semantics/index.js'; import { type ClassSemanticFacts, collectClassSemanticFacts, + collectRagSemanticFacts, + type RagSemanticFacts, type SemanticViolation, validateClassSemantics, + validateRagSemantics, } from './semantic-validator.js'; import type { IRNode } from './types.js'; @@ -93,6 +96,8 @@ export interface KernSemanticSubstrate { readonly irContracts: readonly KernSemanticIrContract[]; readonly classFacts?: ClassSemanticFacts; readonly classValidationSummary?: KernSemanticValidationSummary; + readonly ragFacts?: RagSemanticFacts; + readonly ragValidationSummary?: KernSemanticValidationSummary; } export interface BuildKernSemanticSubstrateOptions { @@ -100,6 +105,8 @@ export interface BuildKernSemanticSubstrateOptions { readonly irContracts?: ReadonlyMap; readonly documentClasses?: IRNode | readonly IRNode[]; readonly includeClassValidationSummary?: boolean; + readonly documentRag?: IRNode | readonly IRNode[]; + readonly includeRagValidationSummary?: boolean; } export function buildKernSemanticSubstrate(options: BuildKernSemanticSubstrateOptions = {}): KernSemanticSubstrate { @@ -152,6 +159,10 @@ export function buildKernSemanticSubstrate(options: BuildKernSemanticSubstrateOp ...(options.documentClasses && options.includeClassValidationSummary ? { classValidationSummary: semanticValidationSummary(options.documentClasses) } : {}), + ...(options.documentRag ? { ragFacts: collectRagSemanticFacts(options.documentRag) } : {}), + ...(options.documentRag && options.includeRagValidationSummary + ? { ragValidationSummary: ragValidationSummary(options.documentRag) } + : {}), }; } @@ -236,6 +247,10 @@ function semanticValidationSummary(root: IRNode | readonly IRNode[]): KernSemant return summarizeSemanticViolations(validateClassSemantics(root)); } +function ragValidationSummary(root: IRNode | readonly IRNode[]): KernSemanticValidationSummary { + return summarizeSemanticViolations(validateRagSemantics(root)); +} + function summarizeSemanticViolations(violations: readonly SemanticViolation[]): KernSemanticValidationSummary { const byRule: Record = {}; for (const violation of violations) { diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index 397309e3..9d3becb2 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -89,6 +89,102 @@ export interface ClassSemanticFacts { readonly cycles: readonly (readonly string[])[]; } +export interface RagSemanticLocation { + readonly line: number; + readonly col: number; +} + +export interface RagSemanticSourceFact { + readonly name?: string; + readonly corpusName?: string; + readonly kind?: string; + readonly uri: string; + readonly media?: string; + readonly acl?: string; + readonly loc?: RagSemanticLocation; +} + +export interface RagSemanticChunkingFact { + readonly name?: string; + readonly corpusName?: string; + readonly sourceName?: string; + readonly strategy?: string; + readonly maxTokens?: number; + readonly overlap?: number; + readonly unit?: string; + readonly loc?: RagSemanticLocation; +} + +export interface RagSemanticEmbedFact { + readonly name: string; + readonly corpusName: string; + readonly model?: string; + readonly dims?: number; + readonly metric?: string; + readonly loc?: RagSemanticLocation; +} + +export interface RagSemanticCorpusFact { + readonly name: string; + readonly title?: string; + readonly tenant?: string; + readonly refresh?: string; + readonly sources: readonly RagSemanticSourceFact[]; + readonly chunking: readonly RagSemanticChunkingFact[]; + readonly embeds: readonly RagSemanticEmbedFact[]; + readonly loc?: RagSemanticLocation; +} + +export interface RagSemanticRetrieverFact { + readonly name: string; + readonly corpusName: string; + readonly embedName?: string; + readonly mode?: string; + readonly topK?: number; + readonly minScore?: number; + readonly rerank?: string; + readonly loc?: RagSemanticLocation; +} + +export interface RagSemanticGroundingFact { + readonly name?: string; + readonly ragName?: string; + readonly requireCitations: boolean; + readonly policy?: string; + readonly maxContext?: number; + readonly loc?: RagSemanticLocation; +} + +export interface RagSemanticEvalFact { + readonly name?: string; + readonly ragName?: string; + readonly metric?: string; + readonly threshold?: number; + readonly loc?: RagSemanticLocation; +} + +export interface RagSemanticPipelineFact { + readonly name: string; + readonly retrieverName: string; + readonly prompt?: string; + readonly answer?: string; + readonly citations: boolean; + readonly groundings: readonly RagSemanticGroundingFact[]; + readonly evals: readonly RagSemanticEvalFact[]; + readonly loc?: RagSemanticLocation; +} + +export interface RagSemanticFacts { + readonly corpora: readonly RagSemanticCorpusFact[]; + readonly retrievers: readonly RagSemanticRetrieverFact[]; + readonly pipelines: readonly RagSemanticPipelineFact[]; + readonly unresolvedCorpusRefs: readonly string[]; + readonly unresolvedRetrieverRefs: readonly string[]; + readonly unresolvedEmbedRefs: readonly string[]; + readonly unresolvedRagRefs: readonly string[]; + readonly unresolvedSourceRefs: readonly string[]; +} + /** * Run semantic validation on an IR tree. * Returns an empty array when the tree is valid. @@ -96,6 +192,7 @@ export interface ClassSemanticFacts { export function validateSemantics(root: IRNode): SemanticViolation[] { const violations: SemanticViolation[] = []; validateClassGraph(root, violations); + validateRagGraph(root, violations); validateNode(root, violations, [], []); return violations; } @@ -106,6 +203,12 @@ export function validateClassSemantics(root: IRNode | readonly IRNode[]): Semant return violations; } +export function validateRagSemantics(root: IRNode | readonly IRNode[]): SemanticViolation[] { + const violations: SemanticViolation[] = []; + validateRagGraphRoots(Array.isArray(root) ? root : [root], violations); + return violations; +} + // True when the *innermost* handler ancestor is opted into native body- // statement mode (`lang="kern"`). Body statements like `let`/`assign`/`do`/ // `if`/`try` nest freely inside that scope, so the let-parent rule has to @@ -507,6 +610,699 @@ function validateNode( } } +interface RagCorpusInfo { + node: IRNode; + rootIndex: number; + name: string; +} + +interface RagSourceInfo { + node: IRNode; + rootIndex: number; + name?: string; + corpusName?: string; +} + +interface RagChunkingInfo { + node: IRNode; + rootIndex: number; + name?: string; + corpusName?: string; + sourceName?: string; +} + +interface RagEmbedInfo { + node: IRNode; + rootIndex: number; + name: string; + corpusName: string; +} + +interface RagRetrieverInfo { + node: IRNode; + rootIndex: number; + name: string; + corpusName: string; + embedName?: string; +} + +interface RagPipelineInfo { + node: IRNode; + rootIndex: number; + name: string; + retrieverName: string; +} + +interface RagGroundingInfo { + node: IRNode; + rootIndex: number; + ragName?: string; +} + +interface RagEvalInfo { + node: IRNode; + rootIndex: number; + ragName?: string; +} + +interface RagInfos { + corpora: RagCorpusInfo[]; + sources: RagSourceInfo[]; + chunking: RagChunkingInfo[]; + embeds: RagEmbedInfo[]; + retrievers: RagRetrieverInfo[]; + pipelines: RagPipelineInfo[]; + groundings: RagGroundingInfo[]; + evals: RagEvalInfo[]; +} + +function validateRagGraph(root: IRNode, violations: SemanticViolation[]): void { + validateRagGraphRoots([root], violations); +} + +function validateRagGraphRoots(roots: readonly IRNode[], violations: SemanticViolation[]): void { + const infos = collectRagInfosForRoots(roots); + if ( + infos.corpora.length === 0 && + infos.sources.length === 0 && + infos.chunking.length === 0 && + infos.embeds.length === 0 && + infos.retrievers.length === 0 && + infos.pipelines.length === 0 && + infos.groundings.length === 0 && + infos.evals.length === 0 + ) { + return; + } + + const corpusByName = new Map(infos.corpora.map((info) => [info.name, info])); + const embedByName = new Map(infos.embeds.map((info) => [info.name, info])); + const retrieverByName = new Map(infos.retrievers.map((info) => [info.name, info])); + const ragByName = new Map(infos.pipelines.map((info) => [info.name, info])); + const sourceNamesByCorpus = collectRagSourceNamesByCorpus(infos.sources); + const globalSourceNames = new Set(infos.sources.map((info) => info.name).filter((name): name is string => !!name)); + + validateRagUniqueNames(infos, violations); + + for (const source of infos.sources) { + validateRagSource(source, violations); + } + for (const chunking of infos.chunking) { + validateRagChunking(chunking, corpusByName, sourceNamesByCorpus, globalSourceNames, violations); + } + for (const embed of infos.embeds) { + validateRagEmbed(embed, corpusByName, violations); + } + for (const retriever of infos.retrievers) { + validateRagRetriever(retriever, corpusByName, embedByName, violations); + } + for (const pipeline of infos.pipelines) { + validateRagPipeline(pipeline, retrieverByName, infos.groundings, violations); + } + for (const grounding of infos.groundings) { + validateRagGrounding(grounding, ragByName, violations); + } + for (const evaluation of infos.evals) { + validateRagEval(evaluation, ragByName, violations); + } +} + +function collectRagInfosForRoots(roots: readonly IRNode[]): RagInfos { + const out: RagInfos = { + corpora: [], + sources: [], + chunking: [], + embeds: [], + retrievers: [], + pipelines: [], + groundings: [], + evals: [], + }; + for (const [rootIndex, root] of roots.entries()) { + collectRagInfos(root, rootIndex, out); + } + return out; +} + +function collectRagInfos(root: IRNode, rootIndex: number, out: RagInfos): void { + function visit(node: IRNode, nearestCorpusName?: string, nearestRagName?: string): void { + const nextCorpusName = node.type === 'corpus' ? stringProp(node, 'name') || nearestCorpusName : nearestCorpusName; + const nextRagName = node.type === 'rag' ? stringProp(node, 'name') || nearestRagName : nearestRagName; + + if (node.type === 'corpus') { + const name = stringProp(node, 'name'); + if (name) out.corpora.push({ node, rootIndex, name }); + } else if (node.type === 'source') { + out.sources.push({ node, rootIndex, name: stringProp(node, 'name'), corpusName: nearestCorpusName }); + } else if (node.type === 'chunking') { + out.chunking.push({ + node, + rootIndex, + name: stringProp(node, 'name'), + corpusName: stringProp(node, 'corpus') || nearestCorpusName, + sourceName: stringProp(node, 'source'), + }); + } else if (node.type === 'embed') { + const name = stringProp(node, 'name'); + const corpusName = stringProp(node, 'corpus') || nearestCorpusName; + if (name && corpusName) out.embeds.push({ node, rootIndex, name, corpusName }); + } else if (node.type === 'retriever') { + const name = stringProp(node, 'name'); + const corpusName = stringProp(node, 'corpus'); + if (name && corpusName) { + out.retrievers.push({ node, rootIndex, name, corpusName, embedName: stringProp(node, 'embed') }); + } + } else if (node.type === 'rag') { + const name = stringProp(node, 'name'); + const retrieverName = stringProp(node, 'retriever'); + if (name && retrieverName) out.pipelines.push({ node, rootIndex, name, retrieverName }); + } else if (node.type === 'grounding') { + out.groundings.push({ node, rootIndex, ragName: stringProp(node, 'rag') || nearestRagName }); + } else if (node.type === 'ragEval') { + out.evals.push({ node, rootIndex, ragName: stringProp(node, 'rag') || nearestRagName }); + } + + for (const child of node.children ?? []) visit(child, nextCorpusName, nextRagName); + } + visit(root); +} + +function collectRagSourceNamesByCorpus(sources: readonly RagSourceInfo[]): Map> { + const out = new Map>(); + for (const source of sources) { + if (!source.corpusName || !source.name) continue; + const names = out.get(source.corpusName) ?? new Set(); + names.add(source.name); + out.set(source.corpusName, names); + } + return out; +} + +function validateRagUniqueNames(infos: RagInfos, violations: SemanticViolation[]): void { + validateRagUniqueNameSet('corpus', infos.corpora, violations); + validateRagUniqueSourceNames(infos.sources, violations); + validateRagUniqueNameSet('embed', infos.embeds, violations); + validateRagUniqueNameSet('retriever', infos.retrievers, violations); + validateRagUniqueNameSet('rag', infos.pipelines, violations); +} + +function validateRagUniqueNameSet( + kind: string, + infos: readonly { name: string; node: IRNode }[], + violations: SemanticViolation[], +): void { + const seen = new Map(); + for (const info of infos) { + const prev = seen.get(info.name); + if (prev) { + pushRagViolation( + violations, + `rag-duplicate-${kind}-name`, + info.node, + `Duplicate RAG ${kind} named '${info.name}' — first defined at line ${prev.loc?.line ?? '?'}.`, + ); + } else { + seen.set(info.name, info.node); + } + } +} + +function validateRagUniqueSourceNames(sources: readonly RagSourceInfo[], violations: SemanticViolation[]): void { + const seen = new Map(); + for (const source of sources) { + if (!source.name || !source.corpusName) continue; + const key = `${source.corpusName}:${source.name}`; + const prev = seen.get(key); + if (prev) { + pushRagViolation( + violations, + 'rag-duplicate-source-name', + source.node, + `Duplicate RAG source named '${source.name}' in corpus '${source.corpusName}' — first defined at line ${prev.loc?.line ?? '?'}.`, + ); + } else { + seen.set(key, source.node); + } + } +} + +function validateRagSource(source: RagSourceInfo, violations: SemanticViolation[]): void { + if (!source.corpusName) { + pushRagViolation(violations, 'rag-source-missing-corpus', source.node, 'RAG source must be nested under a corpus.'); + } + + const uri = stringProp(source.node, 'uri'); + if (uri !== undefined && uri.trim() === '') { + pushRagViolation( + violations, + 'rag-source-uri-empty', + source.node, + "RAG source 'uri=' must be a non-empty document location.", + ); + } +} + +function validateRagChunking( + chunking: RagChunkingInfo, + corpusByName: ReadonlyMap, + sourceNamesByCorpus: ReadonlyMap>, + globalSourceNames: ReadonlySet, + violations: SemanticViolation[], +): void { + if (!chunking.corpusName) { + pushRagViolation( + violations, + 'rag-chunking-missing-corpus', + chunking.node, + 'RAG chunking must be nested under a corpus or declare corpus=.', + ); + } + if (chunking.corpusName && !corpusByName.has(chunking.corpusName)) { + pushRagViolation( + violations, + 'rag-chunking-unknown-corpus', + chunking.node, + `RAG chunking references unknown corpus '${chunking.corpusName}'. Declare a corpus before chunking it.`, + ); + } + + if (chunking.sourceName) { + const sourceNames = chunking.corpusName ? sourceNamesByCorpus.get(chunking.corpusName) : undefined; + const sourceKnown = chunking.corpusName + ? Boolean(sourceNames?.has(chunking.sourceName)) + : globalSourceNames.has(chunking.sourceName); + if (!sourceKnown) { + pushRagViolation( + violations, + 'rag-chunking-unknown-source', + chunking.node, + `RAG chunking references unknown source '${chunking.sourceName}'. Declare a named source in the same corpus.`, + ); + } + } + + const maxTokens = numberProp(chunking.node, 'maxTokens'); + if ( + invalidNumberProp(chunking.node, 'maxTokens') || + (maxTokens !== undefined && (!Number.isInteger(maxTokens) || maxTokens <= 0)) + ) { + pushRagViolation( + violations, + 'rag-chunking-max-tokens-invalid', + chunking.node, + 'RAG chunking maxTokens must be a positive integer.', + ); + } + + const overlap = numberProp(chunking.node, 'overlap'); + if ( + invalidNumberProp(chunking.node, 'overlap') || + (overlap !== undefined && (!Number.isInteger(overlap) || overlap < 0)) + ) { + pushRagViolation( + violations, + 'rag-chunking-overlap-invalid', + chunking.node, + 'RAG chunking overlap must be a non-negative integer.', + ); + } else if (overlap !== undefined && maxTokens !== undefined && overlap >= maxTokens) { + pushRagViolation( + violations, + 'rag-chunking-overlap-invalid', + chunking.node, + 'RAG chunking overlap must be smaller than maxTokens.', + ); + } +} + +function validateRagEmbed( + embed: RagEmbedInfo, + corpusByName: ReadonlyMap, + violations: SemanticViolation[], +): void { + if (!corpusByName.has(embed.corpusName)) { + pushRagViolation( + violations, + 'rag-embed-unknown-corpus', + embed.node, + `RAG embed '${embed.name}' references unknown corpus '${embed.corpusName}'.`, + ); + } + + const dims = numberProp(embed.node, 'dims'); + if (invalidNumberProp(embed.node, 'dims') || (dims !== undefined && (!Number.isInteger(dims) || dims <= 0))) { + pushRagViolation(violations, 'rag-embed-dims-invalid', embed.node, 'RAG embed dims must be a positive integer.'); + } +} + +function validateRagRetriever( + retriever: RagRetrieverInfo, + corpusByName: ReadonlyMap, + embedByName: ReadonlyMap, + violations: SemanticViolation[], +): void { + if (!corpusByName.has(retriever.corpusName)) { + pushRagViolation( + violations, + 'rag-retriever-unknown-corpus', + retriever.node, + `RAG retriever '${retriever.name}' references unknown corpus '${retriever.corpusName}'.`, + ); + } + + if (retriever.embedName) { + const embed = embedByName.get(retriever.embedName); + if (!embed) { + pushRagViolation( + violations, + 'rag-retriever-unknown-embed', + retriever.node, + `RAG retriever '${retriever.name}' references unknown embed '${retriever.embedName}'.`, + ); + } else if (embed.corpusName !== retriever.corpusName) { + pushRagViolation( + violations, + 'rag-retriever-embed-corpus-mismatch', + retriever.node, + `RAG retriever '${retriever.name}' uses embed '${retriever.embedName}' for corpus '${embed.corpusName}', not '${retriever.corpusName}'.`, + ); + } + } + + const topK = numberProp(retriever.node, 'topK'); + if (invalidNumberProp(retriever.node, 'topK') || (topK !== undefined && (!Number.isInteger(topK) || topK <= 0))) { + pushRagViolation( + violations, + 'rag-retriever-topk-invalid', + retriever.node, + 'RAG retriever topK must be a positive integer.', + ); + } + + const minScore = numberProp(retriever.node, 'minScore'); + if (invalidNumberProp(retriever.node, 'minScore') || (minScore !== undefined && (minScore < 0 || minScore > 1))) { + pushRagViolation( + violations, + 'rag-retriever-minscore-invalid', + retriever.node, + 'RAG retriever minScore must be between 0 and 1.', + ); + } +} + +function validateRagPipeline( + pipeline: RagPipelineInfo, + retrieverByName: ReadonlyMap, + groundings: readonly RagGroundingInfo[], + violations: SemanticViolation[], +): void { + if (!retrieverByName.has(pipeline.retrieverName)) { + pushRagViolation( + violations, + 'rag-unknown-retriever', + pipeline.node, + `RAG pipeline '${pipeline.name}' references unknown retriever '${pipeline.retrieverName}'.`, + ); + } + + if (ragBooleanProp(pipeline.node, 'citations')) { + const hasCitationGrounding = groundings.some( + (grounding) => grounding.ragName === pipeline.name && ragBooleanProp(grounding.node, 'requireCitations'), + ); + if (!hasCitationGrounding) { + pushRagViolation( + violations, + 'rag-citations-require-grounding', + pipeline.node, + `RAG pipeline '${pipeline.name}' requires citations but has no grounding requireCitations=true policy.`, + ); + } + } +} + +function validateRagGrounding( + grounding: RagGroundingInfo, + ragByName: ReadonlyMap, + violations: SemanticViolation[], +): void { + if (!grounding.ragName) { + pushRagViolation( + violations, + 'rag-grounding-missing-rag', + grounding.node, + 'RAG grounding must be nested under a rag pipeline or declare rag=.', + ); + } + if (grounding.ragName && !ragByName.has(grounding.ragName)) { + pushRagViolation( + violations, + 'rag-grounding-unknown-rag', + grounding.node, + `RAG grounding references unknown rag '${grounding.ragName}'.`, + ); + } + + const maxContext = numberProp(grounding.node, 'maxContext'); + if ( + invalidNumberProp(grounding.node, 'maxContext') || + (maxContext !== undefined && (!Number.isInteger(maxContext) || maxContext <= 0)) + ) { + pushRagViolation( + violations, + 'rag-grounding-max-context-invalid', + grounding.node, + 'RAG grounding maxContext must be a positive integer.', + ); + } +} + +function validateRagEval( + evaluation: RagEvalInfo, + ragByName: ReadonlyMap, + violations: SemanticViolation[], +): void { + if (!evaluation.ragName) { + pushRagViolation( + violations, + 'rag-eval-missing-rag', + evaluation.node, + 'RAG eval must be nested under a rag pipeline or declare rag=.', + ); + } + if (evaluation.ragName && !ragByName.has(evaluation.ragName)) { + pushRagViolation( + violations, + 'rag-eval-unknown-rag', + evaluation.node, + `RAG eval references unknown rag '${evaluation.ragName}'.`, + ); + } + + const threshold = numberProp(evaluation.node, 'threshold'); + if ( + invalidNumberProp(evaluation.node, 'threshold') || + (threshold !== undefined && (threshold < 0 || threshold > 1)) + ) { + pushRagViolation( + violations, + 'rag-eval-threshold-invalid', + evaluation.node, + 'RAG eval threshold must be between 0 and 1.', + ); + } +} + +function pushRagViolation(violations: SemanticViolation[], rule: string, node: IRNode, message: string): void { + violations.push({ rule, nodeType: node.type, message, line: node.loc?.line, col: node.loc?.col }); +} + +export function collectRagSemanticFacts(root: IRNode | readonly IRNode[]): RagSemanticFacts { + const roots = Array.isArray(root) ? root : [root]; + const infos = collectRagInfosForRoots(roots); + const corpusNames = new Set(infos.corpora.map((info) => info.name)); + const embedNames = new Set(infos.embeds.map((info) => info.name)); + const retrieverNames = new Set(infos.retrievers.map((info) => info.name)); + const ragNames = new Set(infos.pipelines.map((info) => info.name)); + const sourceNamesByCorpus = collectRagSourceNamesByCorpus(infos.sources); + const globalSourceNames = new Set(infos.sources.map((info) => info.name).filter((name): name is string => !!name)); + + return { + corpora: infos.corpora.map((info) => ragCorpusFact(info, infos)), + retrievers: infos.retrievers.map(ragRetrieverFact), + pipelines: infos.pipelines.map((info) => ragPipelineFact(info, infos.groundings, infos.evals)), + unresolvedCorpusRefs: sortedUnique([ + ...infos.chunking + .map((info) => info.corpusName) + .filter((name): name is string => !!name && !corpusNames.has(name)), + ...infos.embeds.map((info) => info.corpusName).filter((name) => !corpusNames.has(name)), + ...infos.retrievers.map((info) => info.corpusName).filter((name) => !corpusNames.has(name)), + ]), + unresolvedRetrieverRefs: sortedUnique( + infos.pipelines.map((info) => info.retrieverName).filter((name) => !retrieverNames.has(name)), + ), + unresolvedEmbedRefs: sortedUnique( + infos.retrievers.map((info) => info.embedName).filter((name): name is string => !!name && !embedNames.has(name)), + ), + unresolvedRagRefs: sortedUnique( + [...infos.groundings.map((info) => info.ragName), ...infos.evals.map((info) => info.ragName)].filter( + (name): name is string => !!name && !ragNames.has(name), + ), + ), + unresolvedSourceRefs: sortedUnique( + infos.chunking + .filter((info) => { + if (!info.sourceName) return false; + const sourceNames = info.corpusName ? sourceNamesByCorpus.get(info.corpusName) : undefined; + return info.corpusName ? !sourceNames?.has(info.sourceName) : !globalSourceNames.has(info.sourceName); + }) + .map((info) => info.sourceName) + .filter((name): name is string => !!name), + ), + }; +} + +function ragCorpusFact(info: RagCorpusInfo, all: RagInfos): RagSemanticCorpusFact { + return { + name: info.name, + ...optionalStringFact(info.node, 'title', 'title'), + ...optionalStringFact(info.node, 'tenant', 'tenant'), + ...optionalStringFact(info.node, 'refresh', 'refresh'), + sources: all.sources.filter((source) => source.corpusName === info.name).map(ragSourceFact), + chunking: all.chunking.filter((chunking) => chunking.corpusName === info.name).map(ragChunkingFact), + embeds: all.embeds.filter((embed) => embed.corpusName === info.name).map(ragEmbedFact), + ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), + }; +} + +function ragSourceFact(info: RagSourceInfo): RagSemanticSourceFact { + return { + ...optionalStringValue('name', info.name), + ...optionalStringValue('corpusName', info.corpusName), + ...optionalStringFact(info.node, 'kind', 'kind'), + uri: stringProp(info.node, 'uri') ?? '', + ...optionalStringFact(info.node, 'media', 'media'), + ...optionalStringFact(info.node, 'acl', 'acl'), + ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), + }; +} + +function ragChunkingFact(info: RagChunkingInfo): RagSemanticChunkingFact { + return { + ...optionalStringValue('name', info.name), + ...optionalStringValue('corpusName', info.corpusName), + ...optionalStringValue('sourceName', info.sourceName), + ...optionalStringFact(info.node, 'strategy', 'strategy'), + ...optionalNumberFact(info.node, 'maxTokens', 'maxTokens'), + ...optionalNumberFact(info.node, 'overlap', 'overlap'), + ...optionalStringFact(info.node, 'unit', 'unit'), + ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), + }; +} + +function ragEmbedFact(info: RagEmbedInfo): RagSemanticEmbedFact { + return { + name: info.name, + corpusName: info.corpusName, + ...optionalStringFact(info.node, 'model', 'model'), + ...optionalNumberFact(info.node, 'dims', 'dims'), + ...optionalStringFact(info.node, 'metric', 'metric'), + ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), + }; +} + +function ragRetrieverFact(info: RagRetrieverInfo): RagSemanticRetrieverFact { + return { + name: info.name, + corpusName: info.corpusName, + ...optionalStringValue('embedName', info.embedName), + ...optionalStringFact(info.node, 'mode', 'mode'), + ...optionalNumberFact(info.node, 'topK', 'topK'), + ...optionalNumberFact(info.node, 'minScore', 'minScore'), + ...optionalStringFact(info.node, 'rerank', 'rerank'), + ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), + }; +} + +function ragPipelineFact( + info: RagPipelineInfo, + groundings: readonly RagGroundingInfo[], + evals: readonly RagEvalInfo[], +): RagSemanticPipelineFact { + return { + name: info.name, + retrieverName: info.retrieverName, + ...optionalStringFact(info.node, 'prompt', 'prompt'), + ...optionalStringFact(info.node, 'answer', 'answer'), + citations: ragBooleanProp(info.node, 'citations'), + groundings: groundings.filter((grounding) => grounding.ragName === info.name).map(ragGroundingFact), + evals: evals.filter((evaluation) => evaluation.ragName === info.name).map(ragEvalFact), + ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), + }; +} + +function ragGroundingFact(info: RagGroundingInfo): RagSemanticGroundingFact { + return { + ...optionalStringFact(info.node, 'name', 'name'), + ...optionalStringValue('ragName', info.ragName), + requireCitations: ragBooleanProp(info.node, 'requireCitations'), + ...optionalStringFact(info.node, 'policy', 'policy'), + ...optionalNumberFact(info.node, 'maxContext', 'maxContext'), + ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), + }; +} + +function ragEvalFact(info: RagEvalInfo): RagSemanticEvalFact { + return { + ...optionalStringFact(info.node, 'name', 'name'), + ...optionalStringValue('ragName', info.ragName), + ...optionalStringFact(info.node, 'metric', 'metric'), + ...optionalNumberFact(info.node, 'threshold', 'threshold'), + ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), + }; +} + +function ragLocation(node: IRNode): RagSemanticLocation | undefined { + return node.loc ? { line: node.loc.line, col: node.loc.col } : undefined; +} + +function optionalStringFact(node: IRNode, prop: string, factName: string): Record { + return optionalStringValue(factName, stringProp(node, prop)); +} + +function optionalStringValue(factName: string, value: string | undefined): Record { + return value ? { [factName]: value } : {}; +} + +function optionalNumberFact(node: IRNode, prop: string, factName: string): Record { + const value = numberProp(node, prop); + return value === undefined ? {} : { [factName]: value }; +} + +function numberProp(node: IRNode, prop: string): number | undefined { + const raw = node.props?.[prop]; + if (typeof raw === 'number') return Number.isFinite(raw) ? raw : undefined; + if (typeof raw !== 'string' || raw.trim() === '') return undefined; + const value = Number(raw); + return Number.isFinite(value) ? value : undefined; +} + +function invalidNumberProp(node: IRNode, prop: string): boolean { + const raw = node.props?.[prop]; + if (raw === undefined || raw === null || raw === '') return false; + if (typeof raw === 'number') return !Number.isFinite(raw); + if (typeof raw === 'string') return raw.trim() !== '' && !Number.isFinite(Number(raw)); + return true; +} + +function ragBooleanProp(node: IRNode, prop: string): boolean { + const raw = node.props?.[prop]; + return raw === true || (typeof raw === 'string' && raw.trim().toLowerCase() === 'true'); +} + +function sortedUnique(values: readonly string[]): string[] { + return [...new Set(values)].sort(); +} + type ClassMemberKind = 'field' | 'method' | 'getter' | 'setter'; interface ClassInfo { diff --git a/packages/core/src/spec.ts b/packages/core/src/spec.ts index fc777c31..328fa458 100644 --- a/packages/core/src/spec.ts +++ b/packages/core/src/spec.ts @@ -344,6 +344,15 @@ export const NODE_TYPES = [ 'description', 'sampling', 'elicitation', + // RAG — retrieval, grounding, and evaluation contracts + 'corpus', + 'source', + 'chunking', + 'embed', + 'retriever', + 'rag', + 'grounding', + 'ragEval', 'expression-v1', ] as const; diff --git a/packages/core/tests/rag-semantics.test.ts b/packages/core/tests/rag-semantics.test.ts new file mode 100644 index 00000000..abf23434 --- /dev/null +++ b/packages/core/tests/rag-semantics.test.ts @@ -0,0 +1,241 @@ +import { generateCoreNode, isCoreNode } from '../src/codegen-core.js'; +import { parseDocumentWithDiagnostics } from '../src/parser.js'; +import { validateSchema } from '../src/schema.js'; +import { collectRagSemanticFacts, validateRagSemantics, validateSemantics } from '../src/semantic-validator.js'; + +function parseRoot(source: string) { + return parseDocumentWithDiagnostics(source).root; +} + +function rulesFor(source: string): string[] { + return validateSemantics(parseRoot(source)).map((violation) => violation.rule); +} + +describe('RAG language semantics', () => { + test('registers RAG declarations as core language nodes', () => { + for (const type of ['corpus', 'source', 'chunking', 'embed', 'retriever', 'rag', 'grounding', 'ragEval']) { + expect(isCoreNode(type)).toBe(true); + expect(generateCoreNode({ type, props: {} })).toEqual([]); + } + }); + + test('accepts a minimal grounded RAG declaration graph', () => { + const source = [ + 'corpus name=Docs title="Support docs"', + ' source name=manuals kind=local uri="./docs/**/*.md" media=markdown', + ' chunking source=manuals strategy=semantic maxTokens=600 overlap=80 unit=tokens', + 'embed name=DocsEmbedding corpus=Docs model=text-embedding-3-small dims=1536 metric=cosine', + 'retriever name=DocsSearch corpus=Docs embed=DocsEmbedding mode=hybrid topK=8 minScore=0.72', + 'rag name=AnswerDocs retriever=DocsSearch prompt="./answer.md" citations=true', + ' grounding requireCitations=true policy=strict maxContext=6000', + ' ragEval metric=faithfulness threshold=0.85', + ].join('\n'); + + expect(validateSchema(parseRoot(source))).toEqual([]); + expect(validateSemantics(parseRoot(source))).toEqual([]); + }); + + test('collects RAG semantic facts for corpus retriever and pipeline contracts', () => { + const root = parseRoot( + [ + 'corpus name=Docs title="Support docs"', + ' source name=manuals kind=local uri="./docs/**/*.md" media=markdown', + ' chunking source=manuals strategy=semantic maxTokens=600 overlap=80 unit=tokens', + 'embed name=DocsEmbedding corpus=Docs model=text-embedding-3-small dims=1536 metric=cosine', + 'retriever name=DocsSearch corpus=Docs embed=DocsEmbedding mode=hybrid topK=8 minScore=0.72', + 'rag name=AnswerDocs retriever=DocsSearch prompt="./answer.md" citations=true', + ' grounding name=StrictGrounding requireCitations=true policy=strict maxContext=6000', + ' ragEval name=Faithfulness metric=faithfulness threshold=0.85', + ].join('\n'), + ); + + const facts = collectRagSemanticFacts(root); + + expect(facts.unresolvedCorpusRefs).toEqual([]); + expect(facts.unresolvedRetrieverRefs).toEqual([]); + expect(facts.corpora).toEqual([ + expect.objectContaining({ + name: 'Docs', + title: 'Support docs', + sources: [ + expect.objectContaining({ + name: 'manuals', + corpusName: 'Docs', + kind: 'local', + uri: './docs/**/*.md', + media: 'markdown', + }), + ], + chunking: [ + expect.objectContaining({ + corpusName: 'Docs', + sourceName: 'manuals', + strategy: 'semantic', + maxTokens: 600, + overlap: 80, + unit: 'tokens', + }), + ], + embeds: [ + expect.objectContaining({ + name: 'DocsEmbedding', + corpusName: 'Docs', + model: 'text-embedding-3-small', + dims: 1536, + metric: 'cosine', + }), + ], + }), + ]); + expect(facts.retrievers).toEqual([ + expect.objectContaining({ + name: 'DocsSearch', + corpusName: 'Docs', + embedName: 'DocsEmbedding', + mode: 'hybrid', + topK: 8, + minScore: 0.72, + }), + ]); + expect(facts.pipelines).toEqual([ + expect.objectContaining({ + name: 'AnswerDocs', + retrieverName: 'DocsSearch', + citations: true, + groundings: [ + expect.objectContaining({ + name: 'StrictGrounding', + ragName: 'AnswerDocs', + requireCitations: true, + policy: 'strict', + maxContext: 6000, + }), + ], + evals: [ + expect.objectContaining({ + name: 'Faithfulness', + ragName: 'AnswerDocs', + metric: 'faithfulness', + threshold: 0.85, + }), + ], + }), + ]); + }); + + test('treats explicit false RAG booleans as false', () => { + const root = parseRoot( + [ + 'corpus name=Docs', + ' source name=manuals uri="./docs/**/*.md"', + 'embed name=DocsEmbedding corpus=Docs', + 'retriever name=DocsSearch corpus=Docs embed=DocsEmbedding', + 'rag name=AnswerDocs retriever=DocsSearch citations=false', + ' grounding requireCitations=false', + ].join('\n'), + ); + + expect(validateSemantics(root)).toEqual([]); + expect(collectRagSemanticFacts(root).pipelines[0]).toEqual( + expect.objectContaining({ + citations: false, + groundings: [expect.objectContaining({ requireCitations: false })], + }), + ); + }); + + test('reports invalid RAG references and numeric contracts', () => { + const source = [ + 'corpus name=Docs', + ' source name=manuals uri="./docs/**/*.md"', + ' chunking source=missing strategy=semantic maxTokens=64 overlap=64', + 'embed name=BadEmbedding corpus=Missing dims=0', + 'embed name=OtherEmbedding corpus=Docs', + 'corpus name=OtherDocs', + 'retriever name=BadRetriever corpus=Missing embed=MissingEmbed topK=0 minScore=1.1', + 'retriever name=MismatchRetriever corpus=OtherDocs embed=OtherEmbedding', + 'rag name=BadRag retriever=MissingRetriever citations=true', + 'grounding rag=MissingRag maxContext=0', + 'ragEval rag=MissingRag threshold=1.1', + ].join('\n'); + + expect(rulesFor(source)).toEqual( + expect.arrayContaining([ + 'rag-chunking-unknown-source', + 'rag-chunking-overlap-invalid', + 'rag-embed-unknown-corpus', + 'rag-embed-dims-invalid', + 'rag-retriever-unknown-corpus', + 'rag-retriever-unknown-embed', + 'rag-retriever-topk-invalid', + 'rag-retriever-minscore-invalid', + 'rag-retriever-embed-corpus-mismatch', + 'rag-unknown-retriever', + 'rag-citations-require-grounding', + 'rag-grounding-unknown-rag', + 'rag-grounding-max-context-invalid', + 'rag-eval-unknown-rag', + 'rag-eval-threshold-invalid', + ]), + ); + }); + + test('reports disconnected and duplicate RAG declarations', () => { + const source = [ + 'corpus name=Docs', + 'corpus name=Docs', + 'source name=topLevel uri="./loose.md"', + 'corpus name=DuplicatedSources', + ' source name=manuals uri="./a.md"', + ' source name=manuals uri="./b.md"', + 'embed name=DocsEmbedding corpus=Docs', + 'embed name=DocsEmbedding corpus=Docs', + 'retriever name=DocsSearch corpus=Docs', + 'retriever name=DocsSearch corpus=Docs', + 'rag name=AnswerDocs retriever=DocsSearch', + 'rag name=AnswerDocs retriever=DocsSearch', + 'chunking source=manuals maxTokens=abc', + 'grounding maxContext=abc', + 'ragEval threshold=abc', + ].join('\n'); + + expect(rulesFor(source)).toEqual( + expect.arrayContaining([ + 'rag-duplicate-corpus-name', + 'rag-source-missing-corpus', + 'rag-duplicate-source-name', + 'rag-duplicate-embed-name', + 'rag-duplicate-retriever-name', + 'rag-duplicate-rag-name', + 'rag-chunking-missing-corpus', + 'rag-chunking-max-tokens-invalid', + 'rag-grounding-missing-rag', + 'rag-grounding-max-context-invalid', + 'rag-eval-missing-rag', + 'rag-eval-threshold-invalid', + ]), + ); + }); + + test('requires chunking source refs to resolve inside the referenced corpus', () => { + const source = [ + 'corpus name=Docs', + 'corpus name=OtherDocs', + ' source name=manuals uri="./other/**/*.md"', + 'chunking corpus=Docs source=manuals maxTokens=100', + ].join('\n'); + + expect(rulesFor(source)).toContain('rag-chunking-unknown-source'); + expect(collectRagSemanticFacts(parseRoot(source)).unresolvedSourceRefs).toEqual(['manuals']); + }); + + test('can validate only RAG rules when consumers need a focused pass', () => { + const root = parseRoot( + ['machine name=Flow', ' transition name=go from=Missing to=Missing', 'rag name=Bad retriever=Missing'].join( + '\n', + ), + ); + + expect(validateRagSemantics(root).map((violation) => violation.rule)).toEqual(['rag-unknown-retriever']); + }); +}); diff --git a/packages/core/tests/schema-validation.test.ts b/packages/core/tests/schema-validation.test.ts index df1afe75..397b8f3c 100644 --- a/packages/core/tests/schema-validation.test.ts +++ b/packages/core/tests/schema-validation.test.ts @@ -95,6 +95,49 @@ describe('Schema Validation', () => { expect(v).toHaveLength(0); }); + it('passes valid RAG declarations and flags missing required graph props', () => { + const valid = validate( + [ + 'corpus name=Docs', + ' source name=manuals uri="./docs/**/*.md"', + ' chunking source=manuals strategy=semantic maxTokens=600 overlap=80', + 'embed name=DocsEmbedding corpus=Docs', + 'retriever name=DocsSearch corpus=Docs embed=DocsEmbedding topK=8 minScore=0.72', + 'rag name=AnswerDocs retriever=DocsSearch', + ' grounding requireCitations=true maxContext=6000', + ' ragEval metric=faithfulness threshold=0.85', + ].join('\n'), + ); + expect(valid).toHaveLength(0); + + const missing = validate( + [ + 'corpus', + 'source name=missingUri', + 'embed name=NoCorpus', + 'retriever name=NoCorpus', + 'rag name=NoRetriever', + ].join('\n'), + ); + expect(missing.some((violation) => violation.message.includes("'corpus' requires prop 'name'"))).toBe(true); + expect(missing.some((violation) => violation.message.includes("'source' requires prop 'uri'"))).toBe(true); + expect(missing.some((violation) => violation.message.includes("'embed' requires prop 'corpus'"))).toBe(true); + expect(missing.some((violation) => violation.message.includes("'retriever' requires prop 'corpus'"))).toBe(true); + expect(missing.some((violation) => violation.message.includes("'rag' requires prop 'retriever'"))).toBe(true); + + const misplaced = validate( + ['retriever name=DocsSearch corpus=Docs', ' grounding requireCitations=true'].join('\n'), + ); + expect( + misplaced.some((violation) => violation.message.includes("'retriever' does not allow child type 'grounding'")), + ).toBe(true); + + const nestedEmbed = validate(['corpus name=Docs', ' embed name=DocsEmbedding corpus=Docs'].join('\n')); + expect( + nestedEmbed.some((violation) => violation.message.includes("'corpus' does not allow child type 'embed'")), + ).toBe(true); + }); + it('passes explicit foreign handler metadata', () => { const v = validate( [ diff --git a/packages/core/tests/semantic-substrate.test.ts b/packages/core/tests/semantic-substrate.test.ts index a51d2ccd..35ce6a00 100644 --- a/packages/core/tests/semantic-substrate.test.ts +++ b/packages/core/tests/semantic-substrate.test.ts @@ -63,6 +63,8 @@ describe('KERN semantic substrate', () => { ]); expect(Object.hasOwn(substrate, 'classFacts')).toBe(false); expect(Object.hasOwn(substrate, 'classValidationSummary')).toBe(false); + expect(Object.hasOwn(substrate, 'ragFacts')).toBe(false); + expect(Object.hasOwn(substrate, 'ragValidationSummary')).toBe(false); }); test('exports document class member inheritance and override facts when requested', () => { @@ -234,6 +236,59 @@ describe('KERN semantic substrate', () => { expect(substrate.classValidationSummary?.byRule['machine-transition-from']).toBeUndefined(); }); + test('exports document RAG facts and validation summaries when requested', () => { + const root = parseRoot( + [ + 'corpus name=Docs title="Support docs"', + ' source name=manuals kind=local uri="./docs/**/*.md"', + ' chunking source=manuals strategy=semantic maxTokens=600 overlap=80', + 'embed name=DocsEmbedding corpus=Docs model=text-embedding-3-small dims=1536 metric=cosine', + 'retriever name=DocsSearch corpus=Docs embed=DocsEmbedding mode=hybrid topK=8 minScore=0.72', + 'rag name=AnswerDocs retriever=DocsSearch citations=true', + ' grounding requireCitations=true policy=strict maxContext=6000', + ' ragEval metric=faithfulness threshold=0.85', + ].join('\n'), + ); + + const substrate = buildKernSemanticSubstrate({ + documentRag: root, + includeRagValidationSummary: true, + }); + + expect(substrate.ragValidationSummary).toEqual({ total: 0, byRule: {} }); + expect(substrate.ragFacts?.corpora).toEqual([ + expect.objectContaining({ + name: 'Docs', + sources: [expect.objectContaining({ name: 'manuals', uri: './docs/**/*.md' })], + embeds: [expect.objectContaining({ name: 'DocsEmbedding', corpusName: 'Docs' })], + }), + ]); + expect(substrate.ragFacts?.retrievers).toEqual([ + expect.objectContaining({ + name: 'DocsSearch', + corpusName: 'Docs', + embedName: 'DocsEmbedding', + topK: 8, + minScore: 0.72, + }), + ]); + expect(substrate.ragFacts?.pipelines).toEqual([ + expect.objectContaining({ + name: 'AnswerDocs', + retrieverName: 'DocsSearch', + citations: true, + groundings: [expect.objectContaining({ requireCitations: true, policy: 'strict' })], + evals: [expect.objectContaining({ metric: 'faithfulness', threshold: 0.85 })], + }), + ]); + + const invalidSubstrate = buildKernSemanticSubstrate({ + documentRag: parseRoot('rag name=Broken retriever=Missing'), + includeRagValidationSummary: true, + }); + expect(invalidSubstrate.ragValidationSummary?.byRule['rag-unknown-retriever']).toBe(1); + }); + test('exports portable review primitives as stable query objects', () => { const substrate = buildKernSemanticSubstrate(); const clamp = lookupSemanticPrimitive(substrate, 'number.clamp'); From 27eff2eaa9996bc47800f485f10a46af7cdb1b92 Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 19:34:51 +0200 Subject: [PATCH 19/46] feat(core): bind mcp tools to rag contracts --- .../conformance-mcp-rag-bad-cases.kern | 16 ++ .../conformance-mcp-rag-bad-cases.test.kern | 13 + examples/native-test/conformance-mcp-rag.kern | 17 ++ .../native-test/conformance-mcp-rag.test.kern | 5 + packages/core/src/schema.ts | 20 +- packages/core/src/semantic-validator.ts | 252 +++++++++++++++++- packages/core/src/spec.ts | 1 + packages/core/tests/rag-semantics.test.ts | 122 +++++++++ .../core/tests/semantic-substrate.test.ts | 15 ++ 9 files changed, 453 insertions(+), 8 deletions(-) create mode 100644 examples/native-test/conformance-mcp-rag-bad-cases.kern create mode 100644 examples/native-test/conformance-mcp-rag-bad-cases.test.kern create mode 100644 examples/native-test/conformance-mcp-rag.kern create mode 100644 examples/native-test/conformance-mcp-rag.test.kern diff --git a/examples/native-test/conformance-mcp-rag-bad-cases.kern b/examples/native-test/conformance-mcp-rag-bad-cases.kern new file mode 100644 index 00000000..89f629b2 --- /dev/null +++ b/examples/native-test/conformance-mcp-rag-bad-cases.kern @@ -0,0 +1,16 @@ +corpus name=Docs +retriever name=DocsSearch corpus=Docs + +rag name=AnswerDocs retriever=DocsSearch citations=true + grounding requireCitations=true + +mcp name=Support + tool name=badTool + param name=question type=string required=true + retrieve rag=AnswerDocs retriever=MissingRetriever queryParam=missing query={{question}} topK=0 minScore=1.2 requireGrounding=false + retrieve retriever=AlsoMissing queryParam=question + + prompt name=badPrompt + retrieve retriever=DocsSearch + +retrieve rag=MissingRag diff --git a/examples/native-test/conformance-mcp-rag-bad-cases.test.kern b/examples/native-test/conformance-mcp-rag-bad-cases.test.kern new file mode 100644 index 00000000..dcbd4a2f --- /dev/null +++ b/examples/native-test/conformance-mcp-rag-bad-cases.test.kern @@ -0,0 +1,13 @@ +test name="Bad MCP RAG conformance" target="./conformance-mcp-rag-bad-cases.kern" coverage=false + it name="mcp rag semantic assertions prove detector coverage" + expect has=semanticViolations matches="MCP retrieve cannot combine retriever= and rag=" + expect has=semanticViolations matches="MCP retrieve references unknown retriever 'MissingRetriever'" + expect has=semanticViolations matches="MCP retrieve queryParam 'missing' is not declared on tool 'badTool'" + expect has=semanticViolations matches="MCP retrieve cannot combine queryParam= and query=" + expect has=semanticViolations matches="MCP retrieve must declare queryParam= or query=" + expect has=semanticViolations matches="MCP retrieve topK must be a positive integer" + expect has=semanticViolations matches="MCP retrieve minScore must be between 0 and 1" + expect has=semanticViolations matches="MCP retrieve references citation-grounded rag 'AnswerDocs' but sets requireGrounding=false" + expect has=semanticViolations matches="MCP tool 'badTool' cannot declare more than one retrieve binding" + expect has=semanticViolations matches="MCP retrieve must be nested under a tool or prompt" + expect has=semanticViolations matches="MCP retrieve references unknown rag 'MissingRag'" diff --git a/examples/native-test/conformance-mcp-rag.kern b/examples/native-test/conformance-mcp-rag.kern new file mode 100644 index 00000000..2c4e2585 --- /dev/null +++ b/examples/native-test/conformance-mcp-rag.kern @@ -0,0 +1,17 @@ +corpus name=Docs + source name=manuals uri="./docs/**/*.md" + chunking source=manuals strategy=semantic maxTokens=600 overlap=80 + +retriever name=DocsSearch corpus=Docs mode=hybrid topK=8 minScore=0.72 + +rag name=AnswerDocs retriever=DocsSearch citations=true + grounding requireCitations=true policy=strict + +mcp name=Support + tool name=answerQuestion + param name=question type=string required=true + retrieve rag=AnswerDocs queryParam=question as=context topK=4 + + prompt name=summarizeDocs + param name=question type=string required=true + retrieve retriever=DocsSearch queryParam=question as=chunks diff --git a/examples/native-test/conformance-mcp-rag.test.kern b/examples/native-test/conformance-mcp-rag.test.kern new file mode 100644 index 00000000..c4bf6d17 --- /dev/null +++ b/examples/native-test/conformance-mcp-rag.test.kern @@ -0,0 +1,5 @@ +test name="MCP RAG conformance" target="./conformance-mcp-rag.kern" coverage=false + it name="mcp retrieve declarations bind to rag contracts" + expect no=schemaViolations + expect no=semanticViolations + expect node=retrieve count=2 diff --git a/packages/core/src/schema.ts b/packages/core/src/schema.ts index 3172e8e1..99267c62 100644 --- a/packages/core/src/schema.ts +++ b/packages/core/src/schema.ts @@ -2288,6 +2288,7 @@ export const NODE_SCHEMAS: Record = { 'guard', 'sampling', 'elicitation', + 'retrieve', 'derive', 'effect', 'respond', @@ -2355,7 +2356,24 @@ export const NODE_SCHEMAS: Record = { props: { name: { required: true, kind: 'identifier' }, }, - allowedChildren: ['param', 'handler', 'description'], + allowedChildren: ['param', 'handler', 'description', 'retrieve'], + }, + retrieve: { + description: + 'MCP retrieval intent — declaratively binds a tool or prompt to a RAG retriever or pipeline without executing provider retrieval in core.', + example: 'retrieve rag=AnswerDocs queryParam=question as=context requireGrounding=true topK=4', + props: { + name: { kind: 'identifier' }, + retriever: { kind: 'identifier' }, + rag: { kind: 'identifier' }, + queryParam: { kind: 'identifier' }, + query: { kind: 'expression' }, + as: { kind: 'identifier' }, + topK: { kind: 'number' }, + minScore: { kind: 'number' }, + requireGrounding: { kind: 'boolean' }, + }, + allowedChildren: [], }, description: { description: 'Documentation text for a tool, resource, or prompt', diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index 9d3becb2..02d93585 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -174,10 +174,26 @@ export interface RagSemanticPipelineFact { readonly loc?: RagSemanticLocation; } +export interface RagSemanticMcpRetrievalFact { + readonly containerKind?: 'tool' | 'prompt'; + readonly containerName?: string; + readonly targetKind: 'retriever' | 'rag'; + readonly targetName: string; + readonly name?: string; + readonly queryParam?: string; + readonly query?: string; + readonly as?: string; + readonly topK?: number; + readonly minScore?: number; + readonly requireGrounding: boolean; + readonly loc?: RagSemanticLocation; +} + export interface RagSemanticFacts { readonly corpora: readonly RagSemanticCorpusFact[]; readonly retrievers: readonly RagSemanticRetrieverFact[]; readonly pipelines: readonly RagSemanticPipelineFact[]; + readonly mcpRetrievals: readonly RagSemanticMcpRetrievalFact[]; readonly unresolvedCorpusRefs: readonly string[]; readonly unresolvedRetrieverRefs: readonly string[]; readonly unresolvedEmbedRefs: readonly string[]; @@ -665,6 +681,20 @@ interface RagEvalInfo { ragName?: string; } +interface RagMcpContainerInfo { + node: IRNode; + rootIndex: number; + kind: 'tool' | 'prompt'; + name?: string; + paramNames: ReadonlySet; +} + +interface RagMcpRetrievalInfo { + node: IRNode; + rootIndex: number; + container?: RagMcpContainerInfo; +} + interface RagInfos { corpora: RagCorpusInfo[]; sources: RagSourceInfo[]; @@ -674,6 +704,7 @@ interface RagInfos { pipelines: RagPipelineInfo[]; groundings: RagGroundingInfo[]; evals: RagEvalInfo[]; + mcpRetrievals: RagMcpRetrievalInfo[]; } function validateRagGraph(root: IRNode, violations: SemanticViolation[]): void { @@ -690,7 +721,8 @@ function validateRagGraphRoots(roots: readonly IRNode[], violations: SemanticVio infos.retrievers.length === 0 && infos.pipelines.length === 0 && infos.groundings.length === 0 && - infos.evals.length === 0 + infos.evals.length === 0 && + infos.mcpRetrievals.length === 0 ) { return; } @@ -725,6 +757,10 @@ function validateRagGraphRoots(roots: readonly IRNode[], violations: SemanticVio for (const evaluation of infos.evals) { validateRagEval(evaluation, ragByName, violations); } + validateRagMcpRetrievalDuplicates(infos.mcpRetrievals, violations); + for (const retrieval of infos.mcpRetrievals) { + validateRagMcpRetrieval(retrieval, retrieverByName, ragByName, violations); + } } function collectRagInfosForRoots(roots: readonly IRNode[]): RagInfos { @@ -737,6 +773,7 @@ function collectRagInfosForRoots(roots: readonly IRNode[]): RagInfos { pipelines: [], groundings: [], evals: [], + mcpRetrievals: [], }; for (const [rootIndex, root] of roots.entries()) { collectRagInfos(root, rootIndex, out); @@ -745,9 +782,18 @@ function collectRagInfosForRoots(roots: readonly IRNode[]): RagInfos { } function collectRagInfos(root: IRNode, rootIndex: number, out: RagInfos): void { - function visit(node: IRNode, nearestCorpusName?: string, nearestRagName?: string): void { + function visit( + node: IRNode, + nearestCorpusName?: string, + nearestRagName?: string, + nearestMcpContainer?: RagMcpContainerInfo, + ): void { const nextCorpusName = node.type === 'corpus' ? stringProp(node, 'name') || nearestCorpusName : nearestCorpusName; const nextRagName = node.type === 'rag' ? stringProp(node, 'name') || nearestRagName : nearestRagName; + const nextMcpContainer = + node.type === 'tool' || node.type === 'prompt' + ? ragMcpContainerInfo(node, rootIndex, node.type === 'tool' ? 'tool' : 'prompt') + : nearestMcpContainer; if (node.type === 'corpus') { const name = stringProp(node, 'name'); @@ -780,13 +826,26 @@ function collectRagInfos(root: IRNode, rootIndex: number, out: RagInfos): void { out.groundings.push({ node, rootIndex, ragName: stringProp(node, 'rag') || nearestRagName }); } else if (node.type === 'ragEval') { out.evals.push({ node, rootIndex, ragName: stringProp(node, 'rag') || nearestRagName }); + } else if (node.type === 'retrieve') { + out.mcpRetrievals.push({ node, rootIndex, container: nearestMcpContainer }); } - for (const child of node.children ?? []) visit(child, nextCorpusName, nextRagName); + for (const child of node.children ?? []) visit(child, nextCorpusName, nextRagName, nextMcpContainer); } visit(root); } +function ragMcpContainerInfo(node: IRNode, rootIndex: number, kind: 'tool' | 'prompt'): RagMcpContainerInfo { + const name = stringProp(node, 'name'); + const paramNames = new Set(); + for (const child of node.children ?? []) { + if (child.type !== 'param') continue; + const paramName = stringProp(child, 'name'); + if (paramName) paramNames.add(paramName); + } + return { node, rootIndex, kind, ...optionalStringValue('name', name), paramNames }; +} + function collectRagSourceNamesByCorpus(sources: readonly RagSourceInfo[]): Map> { const out = new Map>(); for (const source of sources) { @@ -1112,6 +1171,139 @@ function validateRagEval( } } +function validateRagMcpRetrievalDuplicates( + retrievals: readonly RagMcpRetrievalInfo[], + violations: SemanticViolation[], +): void { + const seen = new Map(); + for (const retrieval of retrievals) { + const containerNode = retrieval.container?.node; + if (!containerNode) continue; + const prev = seen.get(containerNode); + if (prev) { + pushRagViolation( + violations, + 'mcp-retrieve-duplicate', + retrieval.node, + `MCP ${retrieval.container?.kind} '${retrieval.container?.name ?? ''}' cannot declare more than one retrieve binding — first defined at line ${prev.loc?.line ?? '?'}.`, + ); + } else { + seen.set(containerNode, retrieval.node); + } + } +} + +function validateRagMcpRetrieval( + retrieval: RagMcpRetrievalInfo, + retrieverByName: ReadonlyMap, + ragByName: ReadonlyMap, + violations: SemanticViolation[], +): void { + if (!retrieval.container) { + pushRagViolation( + violations, + 'mcp-retrieve-missing-container', + retrieval.node, + 'MCP retrieve must be nested under a tool or prompt.', + ); + } + + const retrieverName = stringProp(retrieval.node, 'retriever'); + const ragName = stringProp(retrieval.node, 'rag'); + if (!retrieverName && !ragName) { + pushRagViolation( + violations, + 'mcp-retrieve-target-required', + retrieval.node, + 'MCP retrieve must declare retriever= or rag=.', + ); + } + if (retrieverName && ragName) { + pushRagViolation( + violations, + 'mcp-retrieve-target-exclusive', + retrieval.node, + 'MCP retrieve cannot combine retriever= and rag=.', + ); + } + if (retrieverName && !retrieverByName.has(retrieverName)) { + pushRagViolation( + violations, + 'mcp-retrieve-unknown-retriever', + retrieval.node, + `MCP retrieve references unknown retriever '${retrieverName}'.`, + ); + } + if (ragName && !ragByName.has(ragName)) { + pushRagViolation( + violations, + 'mcp-retrieve-unknown-rag', + retrieval.node, + `MCP retrieve references unknown rag '${ragName}'.`, + ); + } + + const queryParam = stringProp(retrieval.node, 'queryParam'); + const query = expressionPropText(retrieval.node.props?.query); + if (!queryParam && !query) { + pushRagViolation( + violations, + 'mcp-retrieve-query-required', + retrieval.node, + 'MCP retrieve must declare queryParam= or query={{...}}.', + ); + } + if (queryParam && query) { + pushRagViolation( + violations, + 'mcp-retrieve-query-exclusive', + retrieval.node, + 'MCP retrieve cannot combine queryParam= and query={{...}}.', + ); + } + if (queryParam && retrieval.container && !retrieval.container.paramNames.has(queryParam)) { + pushRagViolation( + violations, + 'mcp-retrieve-query-param-unknown', + retrieval.node, + `MCP retrieve queryParam '${queryParam}' is not declared on ${retrieval.container.kind} '${retrieval.container.name ?? ''}'.`, + ); + } + + const topK = numberProp(retrieval.node, 'topK'); + if (invalidNumberProp(retrieval.node, 'topK') || (topK !== undefined && (!Number.isInteger(topK) || topK <= 0))) { + pushRagViolation( + violations, + 'mcp-retrieve-topk-invalid', + retrieval.node, + 'MCP retrieve topK must be a positive integer.', + ); + } + + const minScore = numberProp(retrieval.node, 'minScore'); + if (invalidNumberProp(retrieval.node, 'minScore') || (minScore !== undefined && (minScore < 0 || minScore > 1))) { + pushRagViolation( + violations, + 'mcp-retrieve-minscore-invalid', + retrieval.node, + 'MCP retrieve minScore must be between 0 and 1.', + ); + } + + if (ragName && ragBooleanPropIsFalse(retrieval.node, 'requireGrounding')) { + const pipeline = ragByName.get(ragName); + const requiresCitations = pipeline && ragBooleanProp(pipeline.node, 'citations'); + if (requiresCitations) { + pushRagViolation( + violations, + 'mcp-retrieve-citations-require-grounding', + retrieval.node, + `MCP retrieve references citation-grounded rag '${ragName}' but sets requireGrounding=false.`, + ); + } + } +} + function pushRagViolation(violations: SemanticViolation[], rule: string, node: IRNode, message: string): void { violations.push({ rule, nodeType: node.type, message, line: node.loc?.line, col: node.loc?.col }); } @@ -1123,6 +1315,7 @@ export function collectRagSemanticFacts(root: IRNode | readonly IRNode[]): RagSe const embedNames = new Set(infos.embeds.map((info) => info.name)); const retrieverNames = new Set(infos.retrievers.map((info) => info.name)); const ragNames = new Set(infos.pipelines.map((info) => info.name)); + const ragByName = new Map(infos.pipelines.map((info) => [info.name, info])); const sourceNamesByCorpus = collectRagSourceNamesByCorpus(infos.sources); const globalSourceNames = new Set(infos.sources.map((info) => info.name).filter((name): name is string => !!name)); @@ -1130,6 +1323,7 @@ export function collectRagSemanticFacts(root: IRNode | readonly IRNode[]): RagSe corpora: infos.corpora.map((info) => ragCorpusFact(info, infos)), retrievers: infos.retrievers.map(ragRetrieverFact), pipelines: infos.pipelines.map((info) => ragPipelineFact(info, infos.groundings, infos.evals)), + mcpRetrievals: infos.mcpRetrievals.map((info) => ragMcpRetrievalFact(info, ragByName)), unresolvedCorpusRefs: sortedUnique([ ...infos.chunking .map((info) => info.corpusName) @@ -1138,15 +1332,20 @@ export function collectRagSemanticFacts(root: IRNode | readonly IRNode[]): RagSe ...infos.retrievers.map((info) => info.corpusName).filter((name) => !corpusNames.has(name)), ]), unresolvedRetrieverRefs: sortedUnique( - infos.pipelines.map((info) => info.retrieverName).filter((name) => !retrieverNames.has(name)), + [ + ...infos.pipelines.map((info) => info.retrieverName), + ...infos.mcpRetrievals.map((info) => stringProp(info.node, 'retriever')), + ].filter((name): name is string => !!name && !retrieverNames.has(name)), ), unresolvedEmbedRefs: sortedUnique( infos.retrievers.map((info) => info.embedName).filter((name): name is string => !!name && !embedNames.has(name)), ), unresolvedRagRefs: sortedUnique( - [...infos.groundings.map((info) => info.ragName), ...infos.evals.map((info) => info.ragName)].filter( - (name): name is string => !!name && !ragNames.has(name), - ), + [ + ...infos.groundings.map((info) => info.ragName), + ...infos.evals.map((info) => info.ragName), + ...infos.mcpRetrievals.map((info) => stringProp(info.node, 'rag')), + ].filter((name): name is string => !!name && !ragNames.has(name)), ), unresolvedSourceRefs: sortedUnique( infos.chunking @@ -1261,6 +1460,40 @@ function ragEvalFact(info: RagEvalInfo): RagSemanticEvalFact { }; } +function ragMcpRetrievalFact( + info: RagMcpRetrievalInfo, + ragByName: ReadonlyMap, +): RagSemanticMcpRetrievalFact { + const ragName = stringProp(info.node, 'rag'); + const retrieverName = stringProp(info.node, 'retriever'); + const targetKind = ragName ? 'rag' : 'retriever'; + const targetName = ragName || retrieverName || ''; + return { + ...(info.container ? { containerKind: info.container.kind, containerName: info.container.name ?? '' } : {}), + targetKind, + targetName, + ...optionalStringFact(info.node, 'name', 'name'), + ...optionalStringFact(info.node, 'queryParam', 'queryParam'), + ...optionalStringValue('query', expressionPropText(info.node.props?.query)), + ...optionalStringFact(info.node, 'as', 'as'), + ...optionalNumberFact(info.node, 'topK', 'topK'), + ...optionalNumberFact(info.node, 'minScore', 'minScore'), + requireGrounding: ragMcpRetrieveRequiresGrounding(info.node, ragName, ragByName), + ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), + }; +} + +function ragMcpRetrieveRequiresGrounding( + node: IRNode, + ragName: string | undefined, + ragByName: ReadonlyMap, +): boolean { + if (ragBooleanPropIsFalse(node, 'requireGrounding')) return false; + if (ragBooleanProp(node, 'requireGrounding')) return true; + const pipeline = ragName ? ragByName.get(ragName) : undefined; + return pipeline ? ragBooleanProp(pipeline.node, 'citations') : false; +} + function ragLocation(node: IRNode): RagSemanticLocation | undefined { return node.loc ? { line: node.loc.line, col: node.loc.col } : undefined; } @@ -1299,6 +1532,11 @@ function ragBooleanProp(node: IRNode, prop: string): boolean { return raw === true || (typeof raw === 'string' && raw.trim().toLowerCase() === 'true'); } +function ragBooleanPropIsFalse(node: IRNode, prop: string): boolean { + const raw = node.props?.[prop]; + return raw === false || (typeof raw === 'string' && raw.trim().toLowerCase() === 'false'); +} + function sortedUnique(values: readonly string[]): string[] { return [...new Set(values)].sort(); } diff --git a/packages/core/src/spec.ts b/packages/core/src/spec.ts index 328fa458..7b76bff6 100644 --- a/packages/core/src/spec.ts +++ b/packages/core/src/spec.ts @@ -344,6 +344,7 @@ export const NODE_TYPES = [ 'description', 'sampling', 'elicitation', + 'retrieve', // RAG — retrieval, grounding, and evaluation contracts 'corpus', 'source', diff --git a/packages/core/tests/rag-semantics.test.ts b/packages/core/tests/rag-semantics.test.ts index abf23434..94d525b9 100644 --- a/packages/core/tests/rag-semantics.test.ts +++ b/packages/core/tests/rag-semantics.test.ts @@ -123,6 +123,74 @@ describe('RAG language semantics', () => { ]); }); + test('accepts MCP tool and prompt retrieval intents against RAG contracts', () => { + const source = [ + 'corpus name=Docs', + ' source name=manuals uri="./docs/**/*.md"', + 'embed name=DocsEmbedding corpus=Docs', + 'retriever name=DocsSearch corpus=Docs embed=DocsEmbedding topK=8 minScore=0.72', + 'rag name=AnswerDocs retriever=DocsSearch citations=true', + ' grounding requireCitations=true policy=strict', + 'mcp name=Support', + ' tool name=answerQuestion', + ' param name=question type=string required=true', + ' retrieve rag=AnswerDocs queryParam=question as=context topK=4 minScore=0.8', + ' prompt name=summarizeDocs', + ' param name=question type=string required=true', + ' retrieve retriever=DocsSearch queryParam=question as=chunks requireGrounding=true', + ].join('\n'); + + expect(validateSchema(parseRoot(source))).toEqual([]); + expect(validateSemantics(parseRoot(source))).toEqual([]); + }); + + test('collects MCP retrieval intent facts from tools and prompts', () => { + const facts = collectRagSemanticFacts( + parseRoot( + [ + 'corpus name=Docs', + ' source name=manuals uri="./docs/**/*.md"', + 'retriever name=DocsSearch corpus=Docs', + 'rag name=AnswerDocs retriever=DocsSearch citations=true', + ' grounding requireCitations=true', + 'mcp name=Support', + ' tool name=answerQuestion', + ' param name=question type=string required=true', + ' retrieve name=answerDocs rag=AnswerDocs queryParam=question as=context topK=4 minScore=0.8', + ' prompt name=summarizeDocs', + ' param name=question type=string required=true', + ' retrieve retriever=DocsSearch queryParam=question as=chunks requireGrounding=true', + ].join('\n'), + ), + ); + + expect(facts.unresolvedRetrieverRefs).toEqual([]); + expect(facts.unresolvedRagRefs).toEqual([]); + expect(facts.mcpRetrievals).toEqual([ + expect.objectContaining({ + containerKind: 'tool', + containerName: 'answerQuestion', + targetKind: 'rag', + targetName: 'AnswerDocs', + name: 'answerDocs', + queryParam: 'question', + as: 'context', + topK: 4, + minScore: 0.8, + requireGrounding: true, + }), + expect.objectContaining({ + containerKind: 'prompt', + containerName: 'summarizeDocs', + targetKind: 'retriever', + targetName: 'DocsSearch', + queryParam: 'question', + as: 'chunks', + requireGrounding: true, + }), + ]); + }); + test('treats explicit false RAG booleans as false', () => { const root = parseRoot( [ @@ -180,6 +248,60 @@ describe('RAG language semantics', () => { ); }); + test('reports invalid MCP retrieval bindings into RAG contracts', () => { + const source = [ + 'corpus name=Docs', + 'retriever name=DocsSearch corpus=Docs', + 'rag name=AnswerDocs retriever=DocsSearch citations=true', + ' grounding requireCitations=true', + 'mcp name=Support', + ' tool name=badTool', + ' param name=question type=string required=true', + ' retrieve rag=AnswerDocs retriever=MissingRetriever queryParam=missing query={{question}} topK=0 minScore=1.2 requireGrounding=false', + ' retrieve retriever=AlsoMissing queryParam=question', + 'retrieve rag=MissingRag', + ].join('\n'); + + expect(rulesFor(source)).toEqual( + expect.arrayContaining([ + 'mcp-retrieve-target-exclusive', + 'mcp-retrieve-unknown-retriever', + 'mcp-retrieve-query-param-unknown', + 'mcp-retrieve-query-exclusive', + 'mcp-retrieve-topk-invalid', + 'mcp-retrieve-minscore-invalid', + 'mcp-retrieve-citations-require-grounding', + 'mcp-retrieve-duplicate', + 'mcp-retrieve-missing-container', + 'mcp-retrieve-unknown-rag', + ]), + ); + + const facts = collectRagSemanticFacts(parseRoot(source)); + expect(facts.unresolvedRetrieverRefs).toEqual(['AlsoMissing', 'MissingRetriever']); + expect(facts.unresolvedRagRefs).toEqual(['MissingRag']); + }); + + test('reports MCP retrieval declarations without a target', () => { + expect( + rulesFor(['mcp name=Support', ' tool name=badTool', ' retrieve queryParam=question'].join('\n')), + ).toContain('mcp-retrieve-target-required'); + }); + + test('reports MCP retrieval declarations without a query source', () => { + expect( + rulesFor( + [ + 'corpus name=Docs', + 'retriever name=DocsSearch corpus=Docs', + 'mcp name=Support', + ' tool name=badTool', + ' retrieve retriever=DocsSearch', + ].join('\n'), + ), + ).toContain('mcp-retrieve-query-required'); + }); + test('reports disconnected and duplicate RAG declarations', () => { const source = [ 'corpus name=Docs', diff --git a/packages/core/tests/semantic-substrate.test.ts b/packages/core/tests/semantic-substrate.test.ts index 35ce6a00..3c2862b8 100644 --- a/packages/core/tests/semantic-substrate.test.ts +++ b/packages/core/tests/semantic-substrate.test.ts @@ -247,6 +247,10 @@ describe('KERN semantic substrate', () => { 'rag name=AnswerDocs retriever=DocsSearch citations=true', ' grounding requireCitations=true policy=strict maxContext=6000', ' ragEval metric=faithfulness threshold=0.85', + 'mcp name=Support', + ' tool name=answerQuestion', + ' param name=question type=string required=true', + ' retrieve rag=AnswerDocs queryParam=question as=context', ].join('\n'), ); @@ -281,6 +285,17 @@ describe('KERN semantic substrate', () => { evals: [expect.objectContaining({ metric: 'faithfulness', threshold: 0.85 })], }), ]); + expect(substrate.ragFacts?.mcpRetrievals).toEqual([ + expect.objectContaining({ + containerKind: 'tool', + containerName: 'answerQuestion', + targetKind: 'rag', + targetName: 'AnswerDocs', + queryParam: 'question', + as: 'context', + requireGrounding: true, + }), + ]); const invalidSubstrate = buildKernSemanticSubstrate({ documentRag: parseRoot('rag name=Broken retriever=Missing'), From 335225a515489fb2248fa7e0c95209123ab1a14c Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 20:24:22 +0200 Subject: [PATCH 20/46] feat(core): add mcp resource rag ingress --- .../conformance-mcp-rag-bad-cases.kern | 13 ++ .../conformance-mcp-rag-bad-cases.test.kern | 6 + examples/native-test/conformance-mcp-rag.kern | 4 + .../native-test/conformance-mcp-rag.test.kern | 1 + packages/core/src/schema.ts | 1 + packages/core/src/semantic-validator.ts | 145 +++++++++++++++++- packages/core/tests/rag-semantics.test.ts | 73 +++++++++ .../core/tests/semantic-substrate.test.ts | 14 +- 8 files changed, 252 insertions(+), 5 deletions(-) diff --git a/examples/native-test/conformance-mcp-rag-bad-cases.kern b/examples/native-test/conformance-mcp-rag-bad-cases.kern index 89f629b2..b0a62f8d 100644 --- a/examples/native-test/conformance-mcp-rag-bad-cases.kern +++ b/examples/native-test/conformance-mcp-rag-bad-cases.kern @@ -5,6 +5,8 @@ rag name=AnswerDocs retriever=DocsSearch citations=true grounding requireCitations=true mcp name=Support + resource name=DocsResource uri="docs://manuals" + tool name=badTool param name=question type=string required=true retrieve rag=AnswerDocs retriever=MissingRetriever queryParam=missing query={{question}} topK=0 minScore=1.2 requireGrounding=false @@ -13,4 +15,15 @@ mcp name=Support prompt name=badPrompt retrieve retriever=DocsSearch +mcp name=OtherSupport + resource name=DocsResource uri="docs://other-manuals" + +corpus name=BadIngress + source name=missingResource kind=mcp uri="mcp://MissingResource" + source name=unknownResource kind=mcp resource=MissingResource uri="mcp://MissingResource" + source name=toolResource kind=mcp resource=badTool uri="mcp://badTool" + source name=promptResource kind=mcp resource=badPrompt uri="mcp://badPrompt" + source name=ambiguousResource kind=mcp resource=DocsResource uri="mcp://DocsResource" + source name=fileResource kind=local resource=DocsResource uri="./docs/**/*.md" + retrieve rag=MissingRag diff --git a/examples/native-test/conformance-mcp-rag-bad-cases.test.kern b/examples/native-test/conformance-mcp-rag-bad-cases.test.kern index dcbd4a2f..8f1b1170 100644 --- a/examples/native-test/conformance-mcp-rag-bad-cases.test.kern +++ b/examples/native-test/conformance-mcp-rag-bad-cases.test.kern @@ -11,3 +11,9 @@ test name="Bad MCP RAG conformance" target="./conformance-mcp-rag-bad-cases.kern expect has=semanticViolations matches="MCP tool 'badTool' cannot declare more than one retrieve binding" expect has=semanticViolations matches="MCP retrieve must be nested under a tool or prompt" expect has=semanticViolations matches="MCP retrieve references unknown rag 'MissingRag'" + expect has=semanticViolations matches="RAG source kind=mcp requires resource=" + expect has=semanticViolations matches="RAG source references unknown MCP resource 'MissingResource'" + expect has=semanticViolations matches="RAG source resource 'badTool' resolves to MCP tool" + expect has=semanticViolations matches="RAG source resource 'badPrompt' resolves to MCP prompt" + expect has=semanticViolations matches="RAG source resource 'DocsResource' is ambiguous" + expect has=semanticViolations matches="RAG source resource= is only valid with kind=mcp" diff --git a/examples/native-test/conformance-mcp-rag.kern b/examples/native-test/conformance-mcp-rag.kern index 2c4e2585..d367ac46 100644 --- a/examples/native-test/conformance-mcp-rag.kern +++ b/examples/native-test/conformance-mcp-rag.kern @@ -1,6 +1,8 @@ corpus name=Docs source name=manuals uri="./docs/**/*.md" + source name=mcpManuals kind=mcp resource=DocsResource uri="mcp://DocsResource" chunking source=manuals strategy=semantic maxTokens=600 overlap=80 + chunking source=mcpManuals strategy=semantic maxTokens=600 overlap=80 retriever name=DocsSearch corpus=Docs mode=hybrid topK=8 minScore=0.72 @@ -8,6 +10,8 @@ rag name=AnswerDocs retriever=DocsSearch citations=true grounding requireCitations=true policy=strict mcp name=Support + resource name=DocsResource uri="docs://manuals" + tool name=answerQuestion param name=question type=string required=true retrieve rag=AnswerDocs queryParam=question as=context topK=4 diff --git a/examples/native-test/conformance-mcp-rag.test.kern b/examples/native-test/conformance-mcp-rag.test.kern index c4bf6d17..d164bdab 100644 --- a/examples/native-test/conformance-mcp-rag.test.kern +++ b/examples/native-test/conformance-mcp-rag.test.kern @@ -2,4 +2,5 @@ test name="MCP RAG conformance" target="./conformance-mcp-rag.kern" coverage=fal it name="mcp retrieve declarations bind to rag contracts" expect no=schemaViolations expect no=semanticViolations + expect node=source count=2 expect node=retrieve count=2 diff --git a/packages/core/src/schema.ts b/packages/core/src/schema.ts index 99267c62..9bf22f8b 100644 --- a/packages/core/src/schema.ts +++ b/packages/core/src/schema.ts @@ -2421,6 +2421,7 @@ export const NODE_SCHEMAS: Record = { name: { kind: 'identifier' }, kind: { kind: 'identifier' }, uri: { required: true, kind: 'string' }, + resource: { kind: 'identifier' }, media: { kind: 'identifier' }, acl: { kind: 'identifier' }, }, diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index 02d93585..ea386e93 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -99,6 +99,7 @@ export interface RagSemanticSourceFact { readonly corpusName?: string; readonly kind?: string; readonly uri: string; + readonly resourceName?: string; readonly media?: string; readonly acl?: string; readonly loc?: RagSemanticLocation; @@ -189,16 +190,26 @@ export interface RagSemanticMcpRetrievalFact { readonly loc?: RagSemanticLocation; } +export interface RagSemanticResourceFeedFact { + readonly corpusName?: string; + readonly sourceName?: string; + readonly resourceName: string; + readonly uri: string; + readonly loc?: RagSemanticLocation; +} + export interface RagSemanticFacts { readonly corpora: readonly RagSemanticCorpusFact[]; readonly retrievers: readonly RagSemanticRetrieverFact[]; readonly pipelines: readonly RagSemanticPipelineFact[]; readonly mcpRetrievals: readonly RagSemanticMcpRetrievalFact[]; + readonly resourceFeedsCorpora: readonly RagSemanticResourceFeedFact[]; readonly unresolvedCorpusRefs: readonly string[]; readonly unresolvedRetrieverRefs: readonly string[]; readonly unresolvedEmbedRefs: readonly string[]; readonly unresolvedRagRefs: readonly string[]; readonly unresolvedSourceRefs: readonly string[]; + readonly unresolvedResourceRefs: readonly string[]; } /** @@ -695,6 +706,13 @@ interface RagMcpRetrievalInfo { container?: RagMcpContainerInfo; } +interface RagMcpSymbolInfo { + node: IRNode; + rootIndex: number; + kind: 'resource' | 'tool' | 'prompt'; + name: string; +} + interface RagInfos { corpora: RagCorpusInfo[]; sources: RagSourceInfo[]; @@ -705,6 +723,9 @@ interface RagInfos { groundings: RagGroundingInfo[]; evals: RagEvalInfo[]; mcpRetrievals: RagMcpRetrievalInfo[]; + mcpResources: RagMcpSymbolInfo[]; + mcpTools: RagMcpSymbolInfo[]; + mcpPrompts: RagMcpSymbolInfo[]; } function validateRagGraph(root: IRNode, violations: SemanticViolation[]): void { @@ -722,7 +743,10 @@ function validateRagGraphRoots(roots: readonly IRNode[], violations: SemanticVio infos.pipelines.length === 0 && infos.groundings.length === 0 && infos.evals.length === 0 && - infos.mcpRetrievals.length === 0 + infos.mcpRetrievals.length === 0 && + infos.mcpResources.length === 0 && + infos.mcpTools.length === 0 && + infos.mcpPrompts.length === 0 ) { return; } @@ -731,13 +755,18 @@ function validateRagGraphRoots(roots: readonly IRNode[], violations: SemanticVio const embedByName = new Map(infos.embeds.map((info) => [info.name, info])); const retrieverByName = new Map(infos.retrievers.map((info) => [info.name, info])); const ragByName = new Map(infos.pipelines.map((info) => [info.name, info])); + const mcpResourcesByName = collectRagMcpSymbolsByName(infos.mcpResources); + const mcpCallableByName = new Map([ + ...infos.mcpTools.map((info) => [info.name, info] as const), + ...infos.mcpPrompts.map((info) => [info.name, info] as const), + ]); const sourceNamesByCorpus = collectRagSourceNamesByCorpus(infos.sources); const globalSourceNames = new Set(infos.sources.map((info) => info.name).filter((name): name is string => !!name)); validateRagUniqueNames(infos, violations); for (const source of infos.sources) { - validateRagSource(source, violations); + validateRagSource(source, mcpResourcesByName, mcpCallableByName, violations); } for (const chunking of infos.chunking) { validateRagChunking(chunking, corpusByName, sourceNamesByCorpus, globalSourceNames, violations); @@ -774,6 +803,9 @@ function collectRagInfosForRoots(roots: readonly IRNode[]): RagInfos { groundings: [], evals: [], mcpRetrievals: [], + mcpResources: [], + mcpTools: [], + mcpPrompts: [], }; for (const [rootIndex, root] of roots.entries()) { collectRagInfos(root, rootIndex, out); @@ -787,9 +819,11 @@ function collectRagInfos(root: IRNode, rootIndex: number, out: RagInfos): void { nearestCorpusName?: string, nearestRagName?: string, nearestMcpContainer?: RagMcpContainerInfo, + nearestMcpName?: string, ): void { const nextCorpusName = node.type === 'corpus' ? stringProp(node, 'name') || nearestCorpusName : nearestCorpusName; const nextRagName = node.type === 'rag' ? stringProp(node, 'name') || nearestRagName : nearestRagName; + const nextMcpName = node.type === 'mcp' ? stringProp(node, 'name') || '' : nearestMcpName; const nextMcpContainer = node.type === 'tool' || node.type === 'prompt' ? ragMcpContainerInfo(node, rootIndex, node.type === 'tool' ? 'tool' : 'prompt') @@ -828,9 +862,21 @@ function collectRagInfos(root: IRNode, rootIndex: number, out: RagInfos): void { out.evals.push({ node, rootIndex, ragName: stringProp(node, 'rag') || nearestRagName }); } else if (node.type === 'retrieve') { out.mcpRetrievals.push({ node, rootIndex, container: nearestMcpContainer }); + } else if ( + nextMcpName !== undefined && + (node.type === 'resource' || node.type === 'tool' || node.type === 'prompt') + ) { + const name = stringProp(node, 'name'); + if (name) { + const kind = node.type === 'resource' ? 'resource' : node.type === 'tool' ? 'tool' : 'prompt'; + const info: RagMcpSymbolInfo = { node, rootIndex, kind, name }; + if (node.type === 'resource') out.mcpResources.push(info); + else if (node.type === 'tool') out.mcpTools.push(info); + else out.mcpPrompts.push(info); + } } - for (const child of node.children ?? []) visit(child, nextCorpusName, nextRagName, nextMcpContainer); + for (const child of node.children ?? []) visit(child, nextCorpusName, nextRagName, nextMcpContainer, nextMcpName); } visit(root); } @@ -857,6 +903,16 @@ function collectRagSourceNamesByCorpus(sources: readonly RagSourceInfo[]): Map { + const out = new Map(); + for (const symbol of symbols) { + const matches = out.get(symbol.name) ?? []; + matches.push(symbol); + out.set(symbol.name, matches); + } + return out; +} + function validateRagUniqueNames(infos: RagInfos, violations: SemanticViolation[]): void { validateRagUniqueNameSet('corpus', infos.corpora, violations); validateRagUniqueSourceNames(infos.sources, violations); @@ -905,11 +961,63 @@ function validateRagUniqueSourceNames(sources: readonly RagSourceInfo[], violati } } -function validateRagSource(source: RagSourceInfo, violations: SemanticViolation[]): void { +function validateRagSource( + source: RagSourceInfo, + mcpResourcesByName: ReadonlyMap, + mcpCallableByName: ReadonlyMap, + violations: SemanticViolation[], +): void { if (!source.corpusName) { pushRagViolation(violations, 'rag-source-missing-corpus', source.node, 'RAG source must be nested under a corpus.'); } + const kind = stringProp(source.node, 'kind'); + const resourceName = stringProp(source.node, 'resource'); + if (kind === 'mcp') { + if (!resourceName) { + pushRagViolation( + violations, + 'rag-source-mcp-resource-required', + source.node, + 'RAG source kind=mcp requires resource=.', + ); + } else { + const resources = mcpResourcesByName.get(resourceName) ?? []; + if (resources.length > 1) { + pushRagViolation( + violations, + 'rag-source-mcp-resource-ambiguous', + source.node, + `RAG source resource '${resourceName}' is ambiguous because multiple MCP resources use that name.`, + ); + } else if (resources.length === 0) { + const callable = mcpCallableByName.get(resourceName); + if (callable) { + pushRagViolation( + violations, + 'rag-source-mcp-resource-kind', + source.node, + `RAG source resource '${resourceName}' resolves to MCP ${callable.kind}, expected MCP resource.`, + ); + } else { + pushRagViolation( + violations, + 'rag-source-mcp-resource-unknown', + source.node, + `RAG source references unknown MCP resource '${resourceName}'.`, + ); + } + } + } + } else if (resourceName) { + pushRagViolation( + violations, + 'rag-source-resource-requires-mcp-kind', + source.node, + 'RAG source resource= is only valid with kind=mcp.', + ); + } + const uri = stringProp(source.node, 'uri'); if (uri !== undefined && uri.trim() === '') { pushRagViolation( @@ -1316,6 +1424,11 @@ export function collectRagSemanticFacts(root: IRNode | readonly IRNode[]): RagSe const retrieverNames = new Set(infos.retrievers.map((info) => info.name)); const ragNames = new Set(infos.pipelines.map((info) => info.name)); const ragByName = new Map(infos.pipelines.map((info) => [info.name, info])); + const mcpResourcesByName = collectRagMcpSymbolsByName(infos.mcpResources); + const mcpCallableNames = new Set([ + ...infos.mcpTools.map((info) => info.name), + ...infos.mcpPrompts.map((info) => info.name), + ]); const sourceNamesByCorpus = collectRagSourceNamesByCorpus(infos.sources); const globalSourceNames = new Set(infos.sources.map((info) => info.name).filter((name): name is string => !!name)); @@ -1324,6 +1437,13 @@ export function collectRagSemanticFacts(root: IRNode | readonly IRNode[]): RagSe retrievers: infos.retrievers.map(ragRetrieverFact), pipelines: infos.pipelines.map((info) => ragPipelineFact(info, infos.groundings, infos.evals)), mcpRetrievals: infos.mcpRetrievals.map((info) => ragMcpRetrievalFact(info, ragByName)), + resourceFeedsCorpora: infos.sources + .filter( + (info) => + stringProp(info.node, 'kind') === 'mcp' && + (mcpResourcesByName.get(stringProp(info.node, 'resource') ?? '')?.length ?? 0) === 1, + ) + .map(ragResourceFeedFact), unresolvedCorpusRefs: sortedUnique([ ...infos.chunking .map((info) => info.corpusName) @@ -1357,6 +1477,12 @@ export function collectRagSemanticFacts(root: IRNode | readonly IRNode[]): RagSe .map((info) => info.sourceName) .filter((name): name is string => !!name), ), + unresolvedResourceRefs: sortedUnique( + infos.sources + .filter((info) => stringProp(info.node, 'kind') === 'mcp') + .map((info) => stringProp(info.node, 'resource')) + .filter((name): name is string => !!name && !mcpResourcesByName.has(name) && !mcpCallableNames.has(name)), + ), }; } @@ -1379,12 +1505,23 @@ function ragSourceFact(info: RagSourceInfo): RagSemanticSourceFact { ...optionalStringValue('corpusName', info.corpusName), ...optionalStringFact(info.node, 'kind', 'kind'), uri: stringProp(info.node, 'uri') ?? '', + ...optionalStringFact(info.node, 'resource', 'resourceName'), ...optionalStringFact(info.node, 'media', 'media'), ...optionalStringFact(info.node, 'acl', 'acl'), ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), }; } +function ragResourceFeedFact(info: RagSourceInfo): RagSemanticResourceFeedFact { + return { + ...optionalStringValue('corpusName', info.corpusName), + ...optionalStringValue('sourceName', info.name), + resourceName: stringProp(info.node, 'resource') ?? '', + uri: stringProp(info.node, 'uri') ?? '', + ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), + }; +} + function ragChunkingFact(info: RagChunkingInfo): RagSemanticChunkingFact { return { ...optionalStringValue('name', info.name), diff --git a/packages/core/tests/rag-semantics.test.ts b/packages/core/tests/rag-semantics.test.ts index 94d525b9..e3523032 100644 --- a/packages/core/tests/rag-semantics.test.ts +++ b/packages/core/tests/rag-semantics.test.ts @@ -123,6 +123,40 @@ describe('RAG language semantics', () => { ]); }); + test('accepts MCP resource-backed corpus sources as static ingress contracts', () => { + const source = [ + 'mcp name=Support', + ' resource name=DocsResource uri="docs://manuals"', + 'corpus name=Docs', + ' source name=manuals kind=mcp resource=DocsResource uri="mcp://DocsResource" media=markdown', + ' chunking source=manuals strategy=semantic maxTokens=600 overlap=80', + 'retriever name=DocsSearch corpus=Docs', + ].join('\n'); + + expect(validateSchema(parseRoot(source))).toEqual([]); + expect(validateSemantics(parseRoot(source))).toEqual([]); + + const facts = collectRagSemanticFacts(parseRoot(source)); + expect(facts.unresolvedResourceRefs).toEqual([]); + expect(facts.corpora[0]?.sources).toEqual([ + expect.objectContaining({ + name: 'manuals', + corpusName: 'Docs', + kind: 'mcp', + uri: 'mcp://DocsResource', + resourceName: 'DocsResource', + }), + ]); + expect(facts.resourceFeedsCorpora).toEqual([ + expect.objectContaining({ + corpusName: 'Docs', + sourceName: 'manuals', + resourceName: 'DocsResource', + uri: 'mcp://DocsResource', + }), + ]); + }); + test('accepts MCP tool and prompt retrieval intents against RAG contracts', () => { const source = [ 'corpus name=Docs', @@ -282,6 +316,45 @@ describe('RAG language semantics', () => { expect(facts.unresolvedRagRefs).toEqual(['MissingRag']); }); + test('reports invalid MCP resource-backed corpus source bindings', () => { + const source = [ + 'mcp name=Support', + ' tool name=DocsTool', + ' prompt name=DocsPrompt', + ' resource name=DocsResource uri="docs://manuals"', + ' resource name=UniqueResource uri="docs://unique"', + 'mcp name=OtherSupport', + ' resource name=DocsResource uri="docs://other-manuals"', + 'corpus name=Docs', + ' source name=missingResource kind=mcp uri="mcp://MissingResource"', + ' source name=unknownResource kind=mcp resource=MissingResource uri="mcp://MissingResource"', + ' source name=toolResource kind=mcp resource=DocsTool uri="mcp://DocsTool"', + ' source name=promptResource kind=mcp resource=DocsPrompt uri="mcp://DocsPrompt"', + ' source name=ambiguousMcp kind=mcp resource=DocsResource uri="mcp://DocsResource"', + ' source name=validMcp kind=mcp resource=UniqueResource uri="mcp://UniqueResource"', + ' source name=fileResource kind=local resource=DocsResource uri="./docs/**/*.md"', + ].join('\n'); + + expect(rulesFor(source)).toEqual( + expect.arrayContaining([ + 'rag-source-mcp-resource-required', + 'rag-source-mcp-resource-unknown', + 'rag-source-mcp-resource-kind', + 'rag-source-mcp-resource-ambiguous', + 'rag-source-resource-requires-mcp-kind', + ]), + ); + + const facts = collectRagSemanticFacts(parseRoot(source)); + expect(facts.resourceFeedsCorpora).toEqual([ + expect.objectContaining({ + sourceName: 'validMcp', + resourceName: 'UniqueResource', + }), + ]); + expect(facts.unresolvedResourceRefs).toEqual(['MissingResource']); + }); + test('reports MCP retrieval declarations without a target', () => { expect( rulesFor(['mcp name=Support', ' tool name=badTool', ' retrieve queryParam=question'].join('\n')), diff --git a/packages/core/tests/semantic-substrate.test.ts b/packages/core/tests/semantic-substrate.test.ts index 3c2862b8..fd0a3e17 100644 --- a/packages/core/tests/semantic-substrate.test.ts +++ b/packages/core/tests/semantic-substrate.test.ts @@ -241,6 +241,7 @@ describe('KERN semantic substrate', () => { [ 'corpus name=Docs title="Support docs"', ' source name=manuals kind=local uri="./docs/**/*.md"', + ' source name=mcpManuals kind=mcp resource=DocsResource uri="mcp://DocsResource"', ' chunking source=manuals strategy=semantic maxTokens=600 overlap=80', 'embed name=DocsEmbedding corpus=Docs model=text-embedding-3-small dims=1536 metric=cosine', 'retriever name=DocsSearch corpus=Docs embed=DocsEmbedding mode=hybrid topK=8 minScore=0.72', @@ -248,6 +249,7 @@ describe('KERN semantic substrate', () => { ' grounding requireCitations=true policy=strict maxContext=6000', ' ragEval metric=faithfulness threshold=0.85', 'mcp name=Support', + ' resource name=DocsResource uri="docs://manuals"', ' tool name=answerQuestion', ' param name=question type=string required=true', ' retrieve rag=AnswerDocs queryParam=question as=context', @@ -263,7 +265,10 @@ describe('KERN semantic substrate', () => { expect(substrate.ragFacts?.corpora).toEqual([ expect.objectContaining({ name: 'Docs', - sources: [expect.objectContaining({ name: 'manuals', uri: './docs/**/*.md' })], + sources: [ + expect.objectContaining({ name: 'manuals', uri: './docs/**/*.md' }), + expect.objectContaining({ name: 'mcpManuals', resourceName: 'DocsResource', uri: 'mcp://DocsResource' }), + ], embeds: [expect.objectContaining({ name: 'DocsEmbedding', corpusName: 'Docs' })], }), ]); @@ -296,6 +301,13 @@ describe('KERN semantic substrate', () => { requireGrounding: true, }), ]); + expect(substrate.ragFacts?.resourceFeedsCorpora).toEqual([ + expect.objectContaining({ + corpusName: 'Docs', + sourceName: 'mcpManuals', + resourceName: 'DocsResource', + }), + ]); const invalidSubstrate = buildKernSemanticSubstrate({ documentRag: parseRoot('rag name=Broken retriever=Missing'), From 177c69570ce6415cc706467387dba51a7aebae43 Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 21:11:26 +0200 Subject: [PATCH 21/46] feat(core): add typed rag retrieval outputs --- .../conformance-mcp-rag-bad-cases.kern | 28 +++ .../conformance-mcp-rag-bad-cases.test.kern | 7 + examples/native-test/conformance-mcp-rag.kern | 4 +- .../native-test/conformance-mcp-rag.test.kern | 1 + packages/core/src/schema.ts | 9 +- packages/core/src/semantic-validator.ts | 162 ++++++++++++++++-- packages/core/tests/rag-semantics.test.ts | 77 ++++++++- .../core/tests/semantic-substrate.test.ts | 11 +- 8 files changed, 281 insertions(+), 18 deletions(-) diff --git a/examples/native-test/conformance-mcp-rag-bad-cases.kern b/examples/native-test/conformance-mcp-rag-bad-cases.kern index b0a62f8d..bd111914 100644 --- a/examples/native-test/conformance-mcp-rag-bad-cases.kern +++ b/examples/native-test/conformance-mcp-rag-bad-cases.kern @@ -15,6 +15,34 @@ mcp name=Support prompt name=badPrompt retrieve retriever=DocsSearch + tool name=badOutput + param name=question type=string required=true + retrieve retriever=DocsSearch queryParam=question output="Foo[]" + + tool name=scalarOutput + param name=question type=string required=true + retrieve retriever=DocsSearch queryParam=question output=RetrievedChunk + + tool name=fieldWithoutOutput + param name=question type=string required=true + retrieve retriever=DocsSearch queryParam=question citationField=citation + + tool name=requireCitationsWithoutOutput + param name=question type=string required=true + retrieve retriever=DocsSearch queryParam=question requireCitations=true + + tool name=missingOutputCitation + param name=question type=string required=true + retrieve retriever=DocsSearch queryParam=question output="RetrievedChunk[]" requireCitations=true provenance=source + + tool name=missingOutputSource + param name=question type=string required=true + retrieve retriever=DocsSearch queryParam=question output="RetrievedChunk[]" requireCitations=true citationField=citation + + tool name=weakensRagCitations + param name=question type=string required=true + retrieve rag=AnswerDocs queryParam=question output="RetrievedChunk[]" requireCitations=false + mcp name=OtherSupport resource name=DocsResource uri="docs://other-manuals" diff --git a/examples/native-test/conformance-mcp-rag-bad-cases.test.kern b/examples/native-test/conformance-mcp-rag-bad-cases.test.kern index 8f1b1170..d6745e79 100644 --- a/examples/native-test/conformance-mcp-rag-bad-cases.test.kern +++ b/examples/native-test/conformance-mcp-rag-bad-cases.test.kern @@ -11,6 +11,13 @@ test name="Bad MCP RAG conformance" target="./conformance-mcp-rag-bad-cases.kern expect has=semanticViolations matches="MCP tool 'badTool' cannot declare more than one retrieve binding" expect has=semanticViolations matches="MCP retrieve must be nested under a tool or prompt" expect has=semanticViolations matches="MCP retrieve references unknown rag 'MissingRag'" + expect has=semanticViolations matches="MCP retrieve output 'Foo\\[\\]' is not supported" + expect has=semanticViolations matches="MCP retrieve output must be RetrievedChunk\\[\\]" + expect has=semanticViolations matches="MCP retrieve output fields require output=RetrievedChunk\\[\\]" + expect has=semanticViolations matches="MCP retrieve requireCitations=.*requires output=RetrievedChunk\\[\\]" + expect has=semanticViolations matches="MCP retrieve output requires citationField=" + expect has=semanticViolations matches="MCP retrieve output requires sourceField=" + expect has=semanticViolations matches="MCP retrieve references citation-grounded rag 'AnswerDocs' but sets requireCitations=false" expect has=semanticViolations matches="RAG source kind=mcp requires resource=" expect has=semanticViolations matches="RAG source references unknown MCP resource 'MissingResource'" expect has=semanticViolations matches="RAG source resource 'badTool' resolves to MCP tool" diff --git a/examples/native-test/conformance-mcp-rag.kern b/examples/native-test/conformance-mcp-rag.kern index d367ac46..25934568 100644 --- a/examples/native-test/conformance-mcp-rag.kern +++ b/examples/native-test/conformance-mcp-rag.kern @@ -14,8 +14,8 @@ mcp name=Support tool name=answerQuestion param name=question type=string required=true - retrieve rag=AnswerDocs queryParam=question as=context topK=4 + retrieve rag=AnswerDocs queryParam=question as=context topK=4 output="RetrievedChunk[]" requireCitations=true provenance=source citationField=citation sourceField=uri scoreField=score prompt name=summarizeDocs param name=question type=string required=true - retrieve retriever=DocsSearch queryParam=question as=chunks + retrieve retriever=DocsSearch queryParam=question as=chunks output="RetrievedChunk[]" scoreField=score diff --git a/examples/native-test/conformance-mcp-rag.test.kern b/examples/native-test/conformance-mcp-rag.test.kern index d164bdab..c1c0d317 100644 --- a/examples/native-test/conformance-mcp-rag.test.kern +++ b/examples/native-test/conformance-mcp-rag.test.kern @@ -4,3 +4,4 @@ test name="MCP RAG conformance" target="./conformance-mcp-rag.kern" coverage=fal expect no=semanticViolations expect node=source count=2 expect node=retrieve count=2 + expect node=retrieve prop=output is="RetrievedChunk[]" diff --git a/packages/core/src/schema.ts b/packages/core/src/schema.ts index 9bf22f8b..062454d6 100644 --- a/packages/core/src/schema.ts +++ b/packages/core/src/schema.ts @@ -2361,7 +2361,8 @@ export const NODE_SCHEMAS: Record = { retrieve: { description: 'MCP retrieval intent — declaratively binds a tool or prompt to a RAG retriever or pipeline without executing provider retrieval in core.', - example: 'retrieve rag=AnswerDocs queryParam=question as=context requireGrounding=true topK=4', + example: + 'retrieve rag=AnswerDocs queryParam=question as=context output="RetrievedChunk[]" requireCitations=true provenance=source citationField=citation sourceField=uri scoreField=score', props: { name: { kind: 'identifier' }, retriever: { kind: 'identifier' }, @@ -2372,6 +2373,12 @@ export const NODE_SCHEMAS: Record = { topK: { kind: 'number' }, minScore: { kind: 'number' }, requireGrounding: { kind: 'boolean' }, + output: { kind: 'typeAnnotation' }, + requireCitations: { kind: 'boolean' }, + provenance: { kind: 'identifier' }, + citationField: { kind: 'identifier' }, + sourceField: { kind: 'identifier' }, + scoreField: { kind: 'identifier' }, }, allowedChildren: [], }, diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index ea386e93..e1699fd8 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -187,6 +187,15 @@ export interface RagSemanticMcpRetrievalFact { readonly topK?: number; readonly minScore?: number; readonly requireGrounding: boolean; + readonly outputShape?: string; + readonly outputItemShape?: string; + readonly requireCitations?: boolean; + readonly effectiveRequiresCitations: boolean; + readonly provenance?: string; + readonly citationField?: string; + readonly sourceField?: string; + readonly scoreField?: string; + readonly contractStatus: 'absent' | 'valid' | 'invalid'; readonly loc?: RagSemanticLocation; } @@ -760,6 +769,7 @@ function validateRagGraphRoots(roots: readonly IRNode[], violations: SemanticVio ...infos.mcpTools.map((info) => [info.name, info] as const), ...infos.mcpPrompts.map((info) => [info.name, info] as const), ]); + const citationRequiredRagNames = collectRagCitationRequiredNames(infos.pipelines, infos.groundings); const sourceNamesByCorpus = collectRagSourceNamesByCorpus(infos.sources); const globalSourceNames = new Set(infos.sources.map((info) => info.name).filter((name): name is string => !!name)); @@ -788,7 +798,7 @@ function validateRagGraphRoots(roots: readonly IRNode[], violations: SemanticVio } validateRagMcpRetrievalDuplicates(infos.mcpRetrievals, violations); for (const retrieval of infos.mcpRetrievals) { - validateRagMcpRetrieval(retrieval, retrieverByName, ragByName, violations); + validateRagMcpRetrieval(retrieval, retrieverByName, ragByName, citationRequiredRagNames, violations); } } @@ -1305,6 +1315,7 @@ function validateRagMcpRetrieval( retrieval: RagMcpRetrievalInfo, retrieverByName: ReadonlyMap, ragByName: ReadonlyMap, + citationRequiredRagNames: ReadonlySet, violations: SemanticViolation[], ): void { if (!retrieval.container) { @@ -1399,8 +1410,7 @@ function validateRagMcpRetrieval( } if (ragName && ragBooleanPropIsFalse(retrieval.node, 'requireGrounding')) { - const pipeline = ragByName.get(ragName); - const requiresCitations = pipeline && ragBooleanProp(pipeline.node, 'citations'); + const requiresCitations = citationRequiredRagNames.has(ragName); if (requiresCitations) { pushRagViolation( violations, @@ -1410,6 +1420,101 @@ function validateRagMcpRetrieval( ); } } + + validateRagMcpRetrievalOutput(retrieval, ragName, citationRequiredRagNames, violations); +} + +function validateRagMcpRetrievalOutput( + retrieval: RagMcpRetrievalInfo, + ragName: string | undefined, + citationRequiredRagNames: ReadonlySet, + violations: SemanticViolation[], +): void { + const outputShape = stringProp(retrieval.node, 'output'); + const provenance = stringProp(retrieval.node, 'provenance'); + const citationField = stringProp(retrieval.node, 'citationField'); + const sourceField = stringProp(retrieval.node, 'sourceField'); + const scoreField = stringProp(retrieval.node, 'scoreField'); + const hasRequireCitations = Object.hasOwn(retrieval.node.props ?? {}, 'requireCitations'); + const hasOutputField = Boolean(provenance || citationField || sourceField || scoreField); + + if (outputShape === RAG_MCP_RETRIEVE_OUTPUT_ITEM_SHAPE) { + pushRagViolation( + violations, + 'mcp-retrieve-output-array-required', + retrieval.node, + 'MCP retrieve output must be RetrievedChunk[] because retrieval bindings expose ranked context sets.', + ); + } else if (outputShape && outputShape !== RAG_MCP_RETRIEVE_OUTPUT_SHAPE) { + pushRagViolation( + violations, + 'mcp-retrieve-output-unknown', + retrieval.node, + `MCP retrieve output '${outputShape}' is not supported; use RetrievedChunk[] for this slice.`, + ); + } + + if (!outputShape && hasOutputField) { + pushRagViolation( + violations, + 'mcp-retrieve-output-field-without-output', + retrieval.node, + 'MCP retrieve output fields require output=RetrievedChunk[].', + ); + } + if (!outputShape && hasRequireCitations) { + pushRagViolation( + violations, + 'mcp-retrieve-output-required', + retrieval.node, + 'MCP retrieve requireCitations= requires output=RetrievedChunk[].', + ); + } + + const targetRequiresCitations = ragName ? citationRequiredRagNames.has(ragName) : false; + if (ragBooleanPropIsFalse(retrieval.node, 'requireCitations') && targetRequiresCitations) { + pushRagViolation( + violations, + 'mcp-retrieve-output-citations-cannot-weaken-rag', + retrieval.node, + `MCP retrieve references citation-grounded rag '${ragName}' but sets requireCitations=false.`, + ); + } + + if (outputShape !== RAG_MCP_RETRIEVE_OUTPUT_SHAPE) return; + + const explicitRequiresCitations = ragBooleanProp(retrieval.node, 'requireCitations'); + const effectiveRequiresCitations = explicitRequiresCitations || targetRequiresCitations; + if (effectiveRequiresCitations && !citationField) { + pushRagViolation( + violations, + 'mcp-retrieve-output-citation-field-required', + retrieval.node, + 'MCP retrieve output requires citationField= when citations are required.', + ); + } + if (effectiveRequiresCitations && !sourceField && provenance !== 'source') { + pushRagViolation( + violations, + 'mcp-retrieve-output-source-required', + retrieval.node, + 'MCP retrieve output requires sourceField= or provenance=source when citations are required.', + ); + } +} + +function collectRagCitationRequiredNames( + pipelines: readonly RagPipelineInfo[], + groundings: readonly RagGroundingInfo[], +): ReadonlySet { + const out = new Set(); + for (const pipeline of pipelines) { + if (ragBooleanProp(pipeline.node, 'citations')) out.add(pipeline.name); + } + for (const grounding of groundings) { + if (grounding.ragName && ragBooleanProp(grounding.node, 'requireCitations')) out.add(grounding.ragName); + } + return out; } function pushRagViolation(violations: SemanticViolation[], rule: string, node: IRNode, message: string): void { @@ -1423,7 +1528,7 @@ export function collectRagSemanticFacts(root: IRNode | readonly IRNode[]): RagSe const embedNames = new Set(infos.embeds.map((info) => info.name)); const retrieverNames = new Set(infos.retrievers.map((info) => info.name)); const ragNames = new Set(infos.pipelines.map((info) => info.name)); - const ragByName = new Map(infos.pipelines.map((info) => [info.name, info])); + const citationRequiredRagNames = collectRagCitationRequiredNames(infos.pipelines, infos.groundings); const mcpResourcesByName = collectRagMcpSymbolsByName(infos.mcpResources); const mcpCallableNames = new Set([ ...infos.mcpTools.map((info) => info.name), @@ -1436,7 +1541,7 @@ export function collectRagSemanticFacts(root: IRNode | readonly IRNode[]): RagSe corpora: infos.corpora.map((info) => ragCorpusFact(info, infos)), retrievers: infos.retrievers.map(ragRetrieverFact), pipelines: infos.pipelines.map((info) => ragPipelineFact(info, infos.groundings, infos.evals)), - mcpRetrievals: infos.mcpRetrievals.map((info) => ragMcpRetrievalFact(info, ragByName)), + mcpRetrievals: infos.mcpRetrievals.map((info) => ragMcpRetrievalFact(info, citationRequiredRagNames)), resourceFeedsCorpora: infos.sources .filter( (info) => @@ -1599,12 +1704,15 @@ function ragEvalFact(info: RagEvalInfo): RagSemanticEvalFact { function ragMcpRetrievalFact( info: RagMcpRetrievalInfo, - ragByName: ReadonlyMap, + citationRequiredRagNames: ReadonlySet, ): RagSemanticMcpRetrievalFact { const ragName = stringProp(info.node, 'rag'); const retrieverName = stringProp(info.node, 'retriever'); const targetKind = ragName ? 'rag' : 'retriever'; const targetName = ragName || retrieverName || ''; + const outputShape = stringProp(info.node, 'output'); + const targetRequiresCitations = ragName ? citationRequiredRagNames.has(ragName) : false; + const explicitRequiresCitations = ragBooleanProp(info.node, 'requireCitations'); return { ...(info.container ? { containerKind: info.container.kind, containerName: info.container.name ?? '' } : {}), targetKind, @@ -1615,20 +1723,52 @@ function ragMcpRetrievalFact( ...optionalStringFact(info.node, 'as', 'as'), ...optionalNumberFact(info.node, 'topK', 'topK'), ...optionalNumberFact(info.node, 'minScore', 'minScore'), - requireGrounding: ragMcpRetrieveRequiresGrounding(info.node, ragName, ragByName), + requireGrounding: ragMcpRetrieveRequiresGrounding(info.node, ragName, citationRequiredRagNames), + ...optionalStringValue('outputShape', outputShape), + ...(outputShape === RAG_MCP_RETRIEVE_OUTPUT_SHAPE ? { outputItemShape: RAG_MCP_RETRIEVE_OUTPUT_ITEM_SHAPE } : {}), + ...(Object.hasOwn(info.node.props ?? {}, 'requireCitations') + ? { requireCitations: explicitRequiresCitations } + : {}), + effectiveRequiresCitations: explicitRequiresCitations || targetRequiresCitations, + ...optionalStringFact(info.node, 'provenance', 'provenance'), + ...optionalStringFact(info.node, 'citationField', 'citationField'), + ...optionalStringFact(info.node, 'sourceField', 'sourceField'), + ...optionalStringFact(info.node, 'scoreField', 'scoreField'), + contractStatus: ragMcpRetrieveContractStatus(info.node, targetRequiresCitations), ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), }; } +function ragMcpRetrieveContractStatus( + node: IRNode, + targetRequiresCitations: boolean, +): RagSemanticMcpRetrievalFact['contractStatus'] { + const outputShape = stringProp(node, 'output'); + const hasRequireCitations = Object.hasOwn(node.props ?? {}, 'requireCitations'); + const hasOutputField = ['provenance', 'citationField', 'sourceField', 'scoreField'].some((prop) => + Boolean(stringProp(node, prop)), + ); + if (!outputShape) return hasOutputField || hasRequireCitations ? 'invalid' : 'absent'; + if (outputShape !== RAG_MCP_RETRIEVE_OUTPUT_SHAPE) return 'invalid'; + if (ragBooleanPropIsFalse(node, 'requireCitations') && targetRequiresCitations) return 'invalid'; + if (ragBooleanProp(node, 'requireCitations') || targetRequiresCitations) { + const citationField = stringProp(node, 'citationField'); + const sourceField = stringProp(node, 'sourceField'); + const provenance = stringProp(node, 'provenance'); + if (!citationField) return 'invalid'; + if (!sourceField && provenance !== 'source') return 'invalid'; + } + return 'valid'; +} + function ragMcpRetrieveRequiresGrounding( node: IRNode, ragName: string | undefined, - ragByName: ReadonlyMap, + citationRequiredRagNames: ReadonlySet, ): boolean { if (ragBooleanPropIsFalse(node, 'requireGrounding')) return false; if (ragBooleanProp(node, 'requireGrounding')) return true; - const pipeline = ragName ? ragByName.get(ragName) : undefined; - return pipeline ? ragBooleanProp(pipeline.node, 'citations') : false; + return ragName ? citationRequiredRagNames.has(ragName) : false; } function ragLocation(node: IRNode): RagSemanticLocation | undefined { @@ -1699,6 +1839,8 @@ interface ClassMemberInfo { } const BUILTIN_CLASS_BASES = new Set(['Error']); +const RAG_MCP_RETRIEVE_OUTPUT_SHAPE = 'RetrievedChunk[]'; +const RAG_MCP_RETRIEVE_OUTPUT_ITEM_SHAPE = 'RetrievedChunk'; const BODY_EXPRESSION_PROPS = [ 'value', 'expr', diff --git a/packages/core/tests/rag-semantics.test.ts b/packages/core/tests/rag-semantics.test.ts index e3523032..90e8d7a9 100644 --- a/packages/core/tests/rag-semantics.test.ts +++ b/packages/core/tests/rag-semantics.test.ts @@ -168,10 +168,10 @@ describe('RAG language semantics', () => { 'mcp name=Support', ' tool name=answerQuestion', ' param name=question type=string required=true', - ' retrieve rag=AnswerDocs queryParam=question as=context topK=4 minScore=0.8', + ' retrieve rag=AnswerDocs queryParam=question as=context topK=4 minScore=0.8 output="RetrievedChunk[]" requireCitations=true provenance=source citationField=citation sourceField=uri scoreField=score', ' prompt name=summarizeDocs', ' param name=question type=string required=true', - ' retrieve retriever=DocsSearch queryParam=question as=chunks requireGrounding=true', + ' retrieve retriever=DocsSearch queryParam=question as=chunks requireGrounding=true output="RetrievedChunk[]" scoreField=score', ].join('\n'); expect(validateSchema(parseRoot(source))).toEqual([]); @@ -190,10 +190,10 @@ describe('RAG language semantics', () => { 'mcp name=Support', ' tool name=answerQuestion', ' param name=question type=string required=true', - ' retrieve name=answerDocs rag=AnswerDocs queryParam=question as=context topK=4 minScore=0.8', + ' retrieve name=answerDocs rag=AnswerDocs queryParam=question as=context topK=4 minScore=0.8 output="RetrievedChunk[]" requireCitations=true provenance=source citationField=citation sourceField=uri scoreField=score', ' prompt name=summarizeDocs', ' param name=question type=string required=true', - ' retrieve retriever=DocsSearch queryParam=question as=chunks requireGrounding=true', + ' retrieve retriever=DocsSearch queryParam=question as=chunks requireGrounding=true output="RetrievedChunk[]" scoreField=score', ].join('\n'), ), ); @@ -212,6 +212,15 @@ describe('RAG language semantics', () => { topK: 4, minScore: 0.8, requireGrounding: true, + outputShape: 'RetrievedChunk[]', + outputItemShape: 'RetrievedChunk', + requireCitations: true, + effectiveRequiresCitations: true, + provenance: 'source', + citationField: 'citation', + sourceField: 'uri', + scoreField: 'score', + contractStatus: 'valid', }), expect.objectContaining({ containerKind: 'prompt', @@ -221,6 +230,11 @@ describe('RAG language semantics', () => { queryParam: 'question', as: 'chunks', requireGrounding: true, + outputShape: 'RetrievedChunk[]', + outputItemShape: 'RetrievedChunk', + effectiveRequiresCitations: false, + scoreField: 'score', + contractStatus: 'valid', }), ]); }); @@ -316,6 +330,61 @@ describe('RAG language semantics', () => { expect(facts.unresolvedRagRefs).toEqual(['MissingRag']); }); + test('reports invalid MCP retrieval output contracts', () => { + const source = [ + 'corpus name=Docs', + 'retriever name=DocsSearch corpus=Docs', + 'rag name=AnswerDocs retriever=DocsSearch citations=true', + ' grounding requireCitations=true', + 'rag name=PlainAnswer retriever=DocsSearch', + 'mcp name=Support', + ' tool name=badOutput', + ' param name=question type=string required=true', + ' retrieve retriever=DocsSearch queryParam=question output="Foo[]"', + ' tool name=scalarOutput', + ' param name=question type=string required=true', + ' retrieve retriever=DocsSearch queryParam=question output=RetrievedChunk', + ' tool name=fieldWithoutOutput', + ' param name=question type=string required=true', + ' retrieve retriever=DocsSearch queryParam=question citationField=citation', + ' tool name=requireCitationsWithoutOutput', + ' param name=question type=string required=true', + ' retrieve retriever=DocsSearch queryParam=question requireCitations=true', + ' tool name=missingCitationField', + ' param name=question type=string required=true', + ' retrieve rag=PlainAnswer queryParam=question output="RetrievedChunk[]" requireCitations=true provenance=source', + ' tool name=missingSourceField', + ' param name=question type=string required=true', + ' retrieve rag=PlainAnswer queryParam=question output="RetrievedChunk[]" requireCitations=true citationField=citation', + ' tool name=weakensCitations', + ' param name=question type=string required=true', + ' retrieve rag=AnswerDocs queryParam=question output="RetrievedChunk[]" requireCitations=false', + ].join('\n'); + + expect(rulesFor(source)).toEqual( + expect.arrayContaining([ + 'mcp-retrieve-output-unknown', + 'mcp-retrieve-output-array-required', + 'mcp-retrieve-output-field-without-output', + 'mcp-retrieve-output-required', + 'mcp-retrieve-output-citation-field-required', + 'mcp-retrieve-output-source-required', + 'mcp-retrieve-output-citations-cannot-weaken-rag', + ]), + ); + + const facts = collectRagSemanticFacts(parseRoot(source)); + expect(facts.mcpRetrievals.map((fact) => fact.contractStatus)).toEqual([ + 'invalid', + 'invalid', + 'invalid', + 'invalid', + 'invalid', + 'invalid', + 'invalid', + ]); + }); + test('reports invalid MCP resource-backed corpus source bindings', () => { const source = [ 'mcp name=Support', diff --git a/packages/core/tests/semantic-substrate.test.ts b/packages/core/tests/semantic-substrate.test.ts index fd0a3e17..87ee071a 100644 --- a/packages/core/tests/semantic-substrate.test.ts +++ b/packages/core/tests/semantic-substrate.test.ts @@ -252,7 +252,7 @@ describe('KERN semantic substrate', () => { ' resource name=DocsResource uri="docs://manuals"', ' tool name=answerQuestion', ' param name=question type=string required=true', - ' retrieve rag=AnswerDocs queryParam=question as=context', + ' retrieve rag=AnswerDocs queryParam=question as=context output="RetrievedChunk[]" requireCitations=true provenance=source citationField=citation sourceField=uri scoreField=score', ].join('\n'), ); @@ -299,6 +299,15 @@ describe('KERN semantic substrate', () => { queryParam: 'question', as: 'context', requireGrounding: true, + outputShape: 'RetrievedChunk[]', + outputItemShape: 'RetrievedChunk', + requireCitations: true, + effectiveRequiresCitations: true, + provenance: 'source', + citationField: 'citation', + sourceField: 'uri', + scoreField: 'score', + contractStatus: 'valid', }), ]); expect(substrate.ragFacts?.resourceFeedsCorpora).toEqual([ From 922b18b5ea41e9720dad5fa91a44a6e7cc2ae8bf Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 21:50:32 +0200 Subject: [PATCH 22/46] feat(core): add rag eval contract cases --- .../conformance-rag-bad-cases.kern | 17 + .../conformance-rag-bad-cases.test.kern | 17 + examples/native-test/conformance-rag.kern | 6 +- .../native-test/conformance-rag.test.kern | 2 + packages/core/src/codegen-core.ts | 4 + packages/core/src/index.ts | 2 + packages/core/src/schema.ts | 34 +- packages/core/src/semantic-validator.ts | 460 +++++++++++++++++- packages/core/src/spec.ts | 2 + packages/core/tests/rag-semantics.test.ts | 172 ++++++- 10 files changed, 707 insertions(+), 9 deletions(-) diff --git a/examples/native-test/conformance-rag-bad-cases.kern b/examples/native-test/conformance-rag-bad-cases.kern index e989e2e8..84e51325 100644 --- a/examples/native-test/conformance-rag-bad-cases.kern +++ b/examples/native-test/conformance-rag-bad-cases.kern @@ -11,6 +11,23 @@ retriever name=BadRetriever corpus=Missing embed=MissingEmbed topK=0 minScore=1. retriever name=MismatchRetriever corpus=OtherDocs embed=OtherEmbedding rag name=BadRag retriever=MissingRetriever citations=true + ragEval metric=faithfulness threshold=0.85 + ragCase name=badCase query="What changed?" topK=0 minScore=1.2 chunkCount=-1 + ragAssert kind=unknownKind + ragAssert kind=scoreGte threshold=1.5 + ragAssert kind=scoreLte + ragAssert kind=chunkHash value=not-a-hash + ragAssert kind=chunkCountEq count=-1 + ragAssert kind=latencyLte valueMs=-1 + ragAssert kind=sourceEq + +rag name=PlainRag retriever=MismatchRetriever + ragEval name=PlainEval metric=faithfulness threshold=0.85 mode=contract + ragCase name=needsCitations query="needs cite" sources="docs/refunds.md" + ragAssert kind=citesRequired grounding rag=MissingRag maxContext=0 ragEval rag=MissingRag threshold=1.1 + +ragCase name=loose query="outside eval" +ragAssert kind=citesRequired diff --git a/examples/native-test/conformance-rag-bad-cases.test.kern b/examples/native-test/conformance-rag-bad-cases.test.kern index c6fb8223..83d7c393 100644 --- a/examples/native-test/conformance-rag-bad-cases.test.kern +++ b/examples/native-test/conformance-rag-bad-cases.test.kern @@ -15,3 +15,20 @@ test name="Bad RAG conformance" target="./conformance-rag-bad-cases.kern" covera expect has=semanticViolations matches="RAG grounding maxContext must be a positive integer" expect has=semanticViolations matches="RAG eval references unknown rag 'MissingRag'" expect has=semanticViolations matches="RAG eval threshold must be between 0 and 1" + expect has=semanticViolations matches="RAG eval with ragCase children must declare name=" + expect has=semanticViolations matches="RAG eval with ragCase children must declare mode=contract" + expect has=semanticViolations matches="RAG eval case topK must be a positive integer" + expect has=semanticViolations matches="RAG eval case minScore must be between 0 and 1" + expect has=semanticViolations matches="RAG eval case chunkCount must be a non-negative integer" + expect has=semanticViolations matches="RAG eval case sources=.*requires a citation-grounded rag" + expect has=semanticViolations matches="RAG eval case must be nested under ragEval" + expect has=semanticViolations matches="RAG assert kind must be one of" + expect has=semanticViolations matches="RAG assert kind=scoreLte requires threshold" + expect has=semanticViolations matches="RAG assert kind=scoreGte threshold must be between 0 and 1" + expect has=semanticViolations matches="RAG assert kind=chunkHash value must be a 32-128 character hex hash" + expect has=semanticViolations matches="RAG assert kind=chunkCountEq requires a non-negative integer count" + expect has=semanticViolations matches="RAG assert kind=latencyLte requires a non-negative integer valueMs" + expect has=semanticViolations matches="RAG assert kind=sourceEq requires value=" + expect has=semanticViolations matches="RAG assert kind=citesRequired requires a citation-grounded rag" + expect has=semanticViolations matches="RAG assert must be nested under ragEval" + expect has=semanticViolations matches="RAG assert must be nested under ragCase" diff --git a/examples/native-test/conformance-rag.kern b/examples/native-test/conformance-rag.kern index 652d877f..84654d01 100644 --- a/examples/native-test/conformance-rag.kern +++ b/examples/native-test/conformance-rag.kern @@ -8,4 +8,8 @@ retriever name=DocsSearch corpus=Docs embed=DocsEmbedding mode=hybrid topK=8 min rag name=AnswerDocs retriever=DocsSearch prompt="./answer.md" citations=true grounding name=StrictGrounding requireCitations=true policy=strict maxContext=6000 - ragEval name=Faithfulness metric=faithfulness threshold=0.85 + ragEval name=Faithfulness metric=faithfulness threshold=0.85 mode=contract + ragCase name=refunds query="How do refunds work?" tags="smoke,policy" topK=4 minScore=0.72 sources="docs/refunds.md" + ragAssert kind=scoreGte threshold=0.72 required=true + ragAssert kind=sourceGlob value="docs/refunds.md" required=true + ragAssert kind=citesRequired diff --git a/examples/native-test/conformance-rag.test.kern b/examples/native-test/conformance-rag.test.kern index 89b7ac9c..a66308c0 100644 --- a/examples/native-test/conformance-rag.test.kern +++ b/examples/native-test/conformance-rag.test.kern @@ -10,3 +10,5 @@ test name="RAG language conformance" target="./conformance-rag.kern" coverage=fa expect node=retriever name=DocsSearch prop=topK is=8 expect node=rag name=AnswerDocs child=grounding childName=StrictGrounding expect node=rag name=AnswerDocs child=ragEval childName=Faithfulness + expect node=ragEval name=Faithfulness child=ragCase childName=refunds + expect node=ragAssert count=3 diff --git a/packages/core/src/codegen-core.ts b/packages/core/src/codegen-core.ts index 7d49750f..5d3290d2 100644 --- a/packages/core/src/codegen-core.ts +++ b/packages/core/src/codegen-core.ts @@ -730,6 +730,8 @@ export const CORE_NODE_TYPES = new Set([ 'rag', 'grounding', 'ragEval', + 'ragCase', + 'ragAssert', // Backend data layer (graduated nodes) 'model', 'column', @@ -1030,6 +1032,8 @@ export function generateCoreNode(node: IRNode, target?: string, runtime?: KernRu case 'rag': case 'grounding': case 'ragEval': + case 'ragCase': + case 'ragAssert': return []; // Graduated nodes — backend data layer case 'model': diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 835b6ff0..3a624744 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -467,6 +467,8 @@ export type { RagSemanticChunkingFact, RagSemanticCorpusFact, RagSemanticEmbedFact, + RagSemanticEvalAssertFact, + RagSemanticEvalCaseFact, RagSemanticEvalFact, RagSemanticFacts, RagSemanticGroundingFact, diff --git a/packages/core/src/schema.ts b/packages/core/src/schema.ts index 062454d6..67ed0bbc 100644 --- a/packages/core/src/schema.ts +++ b/packages/core/src/schema.ts @@ -2504,13 +2504,43 @@ export const NODE_SCHEMAS: Record = { allowedChildren: [], }, ragEval: { - description: 'RAG evaluation contract — declares a metric threshold for a RAG pipeline.', - example: 'ragEval rag=AnswerDocs metric=faithfulness threshold=0.85', + description: 'RAG evaluation contract — declares metric thresholds and static eval cases for a RAG pipeline.', + example: + 'ragEval rag=AnswerDocs metric=faithfulness threshold=0.85 mode=contract\n ragCase name=refunds query="How do refunds work?"\n ragAssert kind=sourceGlob value="docs/refunds.md" required=true', props: { name: { kind: 'identifier' }, rag: { kind: 'identifier' }, metric: { kind: 'identifier' }, threshold: { kind: 'number' }, + mode: { kind: 'identifier' }, + }, + allowedChildren: ['ragCase'], + }, + ragCase: { + description: 'RAG evaluation case — declares a single query and expected retrieval contract facts.', + example: + 'ragCase name=refunds query="How do refunds work?" tags=smoke minScore=0.72\n ragAssert kind=sourceGlob value="docs/refunds.md" required=true', + props: { + name: { required: true, kind: 'identifier' }, + query: { required: true, kind: 'string' }, + tags: { kind: 'string' }, + topK: { kind: 'number' }, + minScore: { kind: 'number' }, + chunkCount: { kind: 'number' }, + sources: { kind: 'string' }, + }, + allowedChildren: ['ragAssert'], + }, + ragAssert: { + description: 'RAG evaluation assertion — declares a closed static check over retrieved chunks or grounding.', + example: 'ragAssert kind=scoreGte threshold=0.72 required=true', + props: { + kind: { required: true, kind: 'identifier' }, + value: { kind: 'string' }, + threshold: { kind: 'number' }, + count: { kind: 'number' }, + valueMs: { kind: 'number' }, + required: { kind: 'boolean' }, }, allowedChildren: [], }, diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index e1699fd8..30e79f67 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -161,6 +161,38 @@ export interface RagSemanticEvalFact { readonly ragName?: string; readonly metric?: string; readonly threshold?: number; + readonly mode?: string; + readonly caseCount?: number; + readonly assertCount?: number; + readonly cases?: readonly RagSemanticEvalCaseFact[]; + readonly loc?: RagSemanticLocation; +} + +export interface RagSemanticEvalCaseFact { + readonly name: string; + readonly ragName?: string; + readonly evalName?: string; + readonly query: string; + readonly tags: readonly string[]; + readonly expected: { + readonly topK?: number; + readonly minScore?: number; + readonly chunkCount?: number; + readonly sources?: readonly string[]; + }; + readonly asserts: readonly RagSemanticEvalAssertFact[]; + readonly loc?: RagSemanticLocation; +} + +export interface RagSemanticEvalAssertFact { + readonly ragName?: string; + readonly evalName?: string; + readonly caseName?: string; + readonly kind: string; + readonly target: 'retrieved-chunk' | 'retrieved-chunks' | 'grounding' | 'latency'; + readonly op: 'eq' | 'gte' | 'lte' | 'contains' | 'glob' | 'present'; + readonly value?: string | number | boolean; + readonly required: boolean; readonly loc?: RagSemanticLocation; } @@ -701,6 +733,29 @@ interface RagEvalInfo { ragName?: string; } +interface RagCaseInfo { + node: IRNode; + rootIndex: number; + name?: string; + query?: string; + ragName?: string; + evalName?: string; + evalNode?: IRNode; + evalBound: boolean; +} + +interface RagAssertInfo { + node: IRNode; + rootIndex: number; + ragName?: string; + evalName?: string; + caseName?: string; + evalNode?: IRNode; + caseNode?: IRNode; + evalBound: boolean; + caseBound: boolean; +} + interface RagMcpContainerInfo { node: IRNode; rootIndex: number; @@ -731,6 +786,8 @@ interface RagInfos { pipelines: RagPipelineInfo[]; groundings: RagGroundingInfo[]; evals: RagEvalInfo[]; + cases: RagCaseInfo[]; + asserts: RagAssertInfo[]; mcpRetrievals: RagMcpRetrievalInfo[]; mcpResources: RagMcpSymbolInfo[]; mcpTools: RagMcpSymbolInfo[]; @@ -752,6 +809,8 @@ function validateRagGraphRoots(roots: readonly IRNode[], violations: SemanticVio infos.pipelines.length === 0 && infos.groundings.length === 0 && infos.evals.length === 0 && + infos.cases.length === 0 && + infos.asserts.length === 0 && infos.mcpRetrievals.length === 0 && infos.mcpResources.length === 0 && infos.mcpTools.length === 0 && @@ -796,6 +855,12 @@ function validateRagGraphRoots(roots: readonly IRNode[], violations: SemanticVio for (const evaluation of infos.evals) { validateRagEval(evaluation, ragByName, violations); } + for (const evaluationCase of infos.cases) { + validateRagCase(evaluationCase, citationRequiredRagNames, violations); + } + for (const assertion of infos.asserts) { + validateRagAssert(assertion, citationRequiredRagNames, violations); + } validateRagMcpRetrievalDuplicates(infos.mcpRetrievals, violations); for (const retrieval of infos.mcpRetrievals) { validateRagMcpRetrieval(retrieval, retrieverByName, ragByName, citationRequiredRagNames, violations); @@ -812,6 +877,8 @@ function collectRagInfosForRoots(roots: readonly IRNode[]): RagInfos { pipelines: [], groundings: [], evals: [], + cases: [], + asserts: [], mcpRetrievals: [], mcpResources: [], mcpTools: [], @@ -828,11 +895,28 @@ function collectRagInfos(root: IRNode, rootIndex: number, out: RagInfos): void { node: IRNode, nearestCorpusName?: string, nearestRagName?: string, + nearestRagEvalName?: string, + nearestRagCaseName?: string, + nearestRagEvalBound = false, + nearestRagCaseBound = false, + nearestRagEvalNode?: IRNode, + nearestRagCaseNode?: IRNode, nearestMcpContainer?: RagMcpContainerInfo, nearestMcpName?: string, ): void { const nextCorpusName = node.type === 'corpus' ? stringProp(node, 'name') || nearestCorpusName : nearestCorpusName; - const nextRagName = node.type === 'rag' ? stringProp(node, 'name') || nearestRagName : nearestRagName; + const nextRagName = + node.type === 'rag' + ? stringProp(node, 'name') || nearestRagName + : node.type === 'ragEval' + ? stringProp(node, 'rag') || nearestRagName + : nearestRagName; + const nextRagEvalName = node.type === 'ragEval' ? stringProp(node, 'name') : nearestRagEvalName; + const nextRagCaseName = node.type === 'ragCase' ? stringProp(node, 'name') : nearestRagCaseName; + const nextRagEvalBound = node.type === 'ragEval' || nearestRagEvalBound; + const nextRagCaseBound = node.type === 'ragCase' || nearestRagCaseBound; + const nextRagEvalNode = node.type === 'ragEval' ? node : nearestRagEvalNode; + const nextRagCaseNode = node.type === 'ragCase' ? node : nearestRagCaseNode; const nextMcpName = node.type === 'mcp' ? stringProp(node, 'name') || '' : nearestMcpName; const nextMcpContainer = node.type === 'tool' || node.type === 'prompt' @@ -870,6 +954,29 @@ function collectRagInfos(root: IRNode, rootIndex: number, out: RagInfos): void { out.groundings.push({ node, rootIndex, ragName: stringProp(node, 'rag') || nearestRagName }); } else if (node.type === 'ragEval') { out.evals.push({ node, rootIndex, ragName: stringProp(node, 'rag') || nearestRagName }); + } else if (node.type === 'ragCase') { + out.cases.push({ + node, + rootIndex, + name: stringProp(node, 'name'), + query: stringProp(node, 'query'), + ragName: nearestRagName, + evalName: nearestRagEvalName, + evalNode: nearestRagEvalNode, + evalBound: nearestRagEvalBound, + }); + } else if (node.type === 'ragAssert') { + out.asserts.push({ + node, + rootIndex, + ragName: nearestRagName, + evalName: nearestRagEvalName, + caseName: nearestRagCaseName, + evalNode: nearestRagEvalNode, + caseNode: nearestRagCaseNode, + evalBound: nearestRagEvalBound, + caseBound: nearestRagCaseBound, + }); } else if (node.type === 'retrieve') { out.mcpRetrievals.push({ node, rootIndex, container: nearestMcpContainer }); } else if ( @@ -886,7 +993,20 @@ function collectRagInfos(root: IRNode, rootIndex: number, out: RagInfos): void { } } - for (const child of node.children ?? []) visit(child, nextCorpusName, nextRagName, nextMcpContainer, nextMcpName); + for (const child of node.children ?? []) + visit( + child, + nextCorpusName, + nextRagName, + nextRagEvalName, + nextRagCaseName, + nextRagEvalBound, + nextRagCaseBound, + nextRagEvalNode, + nextRagCaseNode, + nextMcpContainer, + nextMcpName, + ); } visit(root); } @@ -1287,6 +1407,222 @@ function validateRagEval( 'RAG eval threshold must be between 0 and 1.', ); } + + const mode = stringProp(evaluation.node, 'mode'); + const hasCases = (evaluation.node.children ?? []).some((child) => child.type === 'ragCase'); + if (hasCases && !stringProp(evaluation.node, 'name')) { + pushRagViolation( + violations, + 'rag-eval-name-required', + evaluation.node, + 'RAG eval with ragCase children must declare name= for stable eval facts.', + ); + } + if (hasCases && !mode) { + pushRagViolation( + violations, + 'rag-eval-mode-required', + evaluation.node, + 'RAG eval with ragCase children must declare mode=contract.', + ); + } + if (mode && mode !== 'contract') { + pushRagViolation( + violations, + 'rag-eval-mode-invalid', + evaluation.node, + "RAG eval mode only supports 'contract' in this slice.", + ); + } +} + +function validateRagCase( + evaluationCase: RagCaseInfo, + citationRequiredRagNames: ReadonlySet, + violations: SemanticViolation[], +): void { + if (!evaluationCase.evalBound) { + pushRagViolation( + violations, + 'rag-case-missing-eval', + evaluationCase.node, + 'RAG eval case must be nested under ragEval.', + ); + } + if (!evaluationCase.name) { + pushRagViolation(violations, 'rag-case-name-required', evaluationCase.node, 'RAG eval case requires name=.'); + } + if (!evaluationCase.query) { + pushRagViolation( + violations, + 'rag-case-query-required', + evaluationCase.node, + 'RAG eval case requires query=.', + ); + } + + const topK = numberProp(evaluationCase.node, 'topK'); + if ( + invalidNumberProp(evaluationCase.node, 'topK') || + (topK !== undefined && (!Number.isInteger(topK) || topK <= 0)) + ) { + pushRagViolation( + violations, + 'rag-case-topk-invalid', + evaluationCase.node, + 'RAG eval case topK must be a positive integer.', + ); + } + + const minScore = numberProp(evaluationCase.node, 'minScore'); + if ( + invalidNumberProp(evaluationCase.node, 'minScore') || + (minScore !== undefined && (minScore < 0 || minScore > 1)) + ) { + pushRagViolation( + violations, + 'rag-case-minscore-invalid', + evaluationCase.node, + 'RAG eval case minScore must be between 0 and 1.', + ); + } + + const chunkCount = numberProp(evaluationCase.node, 'chunkCount'); + if ( + invalidNumberProp(evaluationCase.node, 'chunkCount') || + (chunkCount !== undefined && (!Number.isInteger(chunkCount) || chunkCount < 0)) + ) { + pushRagViolation( + violations, + 'rag-case-chunk-count-invalid', + evaluationCase.node, + 'RAG eval case chunkCount must be a non-negative integer.', + ); + } + + if ( + stringProp(evaluationCase.node, 'sources') && + (!evaluationCase.ragName || !citationRequiredRagNames.has(evaluationCase.ragName)) + ) { + pushRagViolation( + violations, + 'rag-case-sources-require-citations', + evaluationCase.node, + 'RAG eval case sources=<...> requires a citation-grounded rag.', + ); + } +} + +function validateRagAssert( + assertion: RagAssertInfo, + citationRequiredRagNames: ReadonlySet, + violations: SemanticViolation[], +): void { + if (!assertion.evalBound) { + pushRagViolation(violations, 'rag-assert-missing-eval', assertion.node, 'RAG assert must be nested under ragEval.'); + } + if (!assertion.caseBound) { + pushRagViolation(violations, 'rag-assert-missing-case', assertion.node, 'RAG assert must be nested under ragCase.'); + } + + const kind = stringProp(assertion.node, 'kind'); + if (!kind || !RAG_ASSERT_KINDS.has(kind)) { + pushRagViolation( + violations, + 'rag-assert-kind-invalid', + assertion.node, + `RAG assert kind must be one of ${[...RAG_ASSERT_KINDS].join(', ')}.`, + ); + return; + } + + if ( + ['factId', 'chunkHash', 'contains', 'sourceEq', 'sourceGlob'].includes(kind) && + !stringProp(assertion.node, 'value') + ) { + pushRagViolation( + violations, + 'rag-assert-value-required', + assertion.node, + `RAG assert kind=${kind} requires value=.`, + ); + } + + const chunkHash = kind === 'chunkHash' ? stringProp(assertion.node, 'value') : undefined; + if (chunkHash && !/^[a-fA-F0-9]{32,128}$/.test(chunkHash)) { + pushRagViolation( + violations, + 'rag-assert-chunk-hash-invalid', + assertion.node, + 'RAG assert kind=chunkHash value must be a 32-128 character hex hash.', + ); + } + + if (kind === 'scoreGte' || kind === 'scoreLte') { + const threshold = numberProp(assertion.node, 'threshold'); + if (threshold === undefined && !invalidNumberProp(assertion.node, 'threshold')) { + pushRagViolation( + violations, + 'rag-assert-threshold-required', + assertion.node, + `RAG assert kind=${kind} requires threshold=.`, + ); + } else if ( + invalidNumberProp(assertion.node, 'threshold') || + threshold === undefined || + threshold < 0 || + threshold > 1 + ) { + pushRagViolation( + violations, + 'rag-assert-threshold-invalid', + assertion.node, + `RAG assert kind=${kind} threshold must be between 0 and 1.`, + ); + } + } + + if ( + kind === 'citesRequired' && + assertion.caseBound && + (!assertion.ragName || !citationRequiredRagNames.has(assertion.ragName)) + ) { + pushRagViolation( + violations, + 'rag-assert-citations-require-grounding', + assertion.node, + 'RAG assert kind=citesRequired requires a citation-grounded rag.', + ); + } + + if (kind === 'uniqueSourcesGte' || kind === 'chunkCountEq') { + const count = numberProp(assertion.node, 'count'); + if (invalidNumberProp(assertion.node, 'count') || count === undefined || !Number.isInteger(count) || count < 0) { + pushRagViolation( + violations, + 'rag-assert-count-invalid', + assertion.node, + `RAG assert kind=${kind} requires a non-negative integer count.`, + ); + } + } + + if (kind === 'latencyLte') { + const valueMs = numberProp(assertion.node, 'valueMs'); + if ( + invalidNumberProp(assertion.node, 'valueMs') || + valueMs === undefined || + !Number.isInteger(valueMs) || + valueMs < 0 + ) { + pushRagViolation( + violations, + 'rag-assert-value-ms-invalid', + assertion.node, + 'RAG assert kind=latencyLte requires a non-negative integer valueMs.', + ); + } + } } function validateRagMcpRetrievalDuplicates( @@ -1540,7 +1876,9 @@ export function collectRagSemanticFacts(root: IRNode | readonly IRNode[]): RagSe return { corpora: infos.corpora.map((info) => ragCorpusFact(info, infos)), retrievers: infos.retrievers.map(ragRetrieverFact), - pipelines: infos.pipelines.map((info) => ragPipelineFact(info, infos.groundings, infos.evals)), + pipelines: infos.pipelines.map((info) => + ragPipelineFact(info, infos.groundings, infos.evals, infos.cases, infos.asserts), + ), mcpRetrievals: infos.mcpRetrievals.map((info) => ragMcpRetrievalFact(info, citationRequiredRagNames)), resourceFeedsCorpora: infos.sources .filter( @@ -1668,6 +2006,8 @@ function ragPipelineFact( info: RagPipelineInfo, groundings: readonly RagGroundingInfo[], evals: readonly RagEvalInfo[], + cases: readonly RagCaseInfo[], + asserts: readonly RagAssertInfo[], ): RagSemanticPipelineFact { return { name: info.name, @@ -1676,7 +2016,9 @@ function ragPipelineFact( ...optionalStringFact(info.node, 'answer', 'answer'), citations: ragBooleanProp(info.node, 'citations'), groundings: groundings.filter((grounding) => grounding.ragName === info.name).map(ragGroundingFact), - evals: evals.filter((evaluation) => evaluation.ragName === info.name).map(ragEvalFact), + evals: evals + .filter((evaluation) => evaluation.ragName === info.name) + .map((evaluation) => ragEvalFact(evaluation, cases, asserts)), ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), }; } @@ -1692,16 +2034,111 @@ function ragGroundingFact(info: RagGroundingInfo): RagSemanticGroundingFact { }; } -function ragEvalFact(info: RagEvalInfo): RagSemanticEvalFact { +function ragEvalFact( + info: RagEvalInfo, + cases: readonly RagCaseInfo[], + asserts: readonly RagAssertInfo[], +): RagSemanticEvalFact { + const evalCases = cases.filter((evaluationCase) => evaluationCase.evalNode === info.node); + const caseFacts = evalCases.map((evaluationCase) => ragEvalCaseFact(evaluationCase, asserts)); return { ...optionalStringFact(info.node, 'name', 'name'), ...optionalStringValue('ragName', info.ragName), ...optionalStringFact(info.node, 'metric', 'metric'), ...optionalNumberFact(info.node, 'threshold', 'threshold'), + ...optionalStringFact(info.node, 'mode', 'mode'), + caseCount: caseFacts.length, + assertCount: caseFacts.reduce((count, evaluationCase) => count + evaluationCase.asserts.length, 0), + cases: caseFacts, + ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), + }; +} + +function ragEvalCaseFact(info: RagCaseInfo, asserts: readonly RagAssertInfo[]): RagSemanticEvalCaseFact { + const caseAsserts = asserts.filter((assertion) => assertion.caseNode === info.node); + return { + name: info.name ?? '', + ...optionalStringValue('ragName', info.ragName), + ...optionalStringValue('evalName', info.evalName), + query: info.query ?? '', + tags: splitRagList(stringProp(info.node, 'tags')), + expected: { + ...optionalNumberFact(info.node, 'topK', 'topK'), + ...optionalNumberFact(info.node, 'minScore', 'minScore'), + ...optionalNumberFact(info.node, 'chunkCount', 'chunkCount'), + ...(stringProp(info.node, 'sources') ? { sources: splitRagList(stringProp(info.node, 'sources')) } : {}), + }, + asserts: caseAsserts.map(ragEvalAssertFact), ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), }; } +function ragEvalAssertFact(info: RagAssertInfo): RagSemanticEvalAssertFact { + const kind = stringProp(info.node, 'kind') ?? ''; + return { + ...optionalStringValue('ragName', info.ragName), + ...optionalStringValue('evalName', info.evalName), + ...optionalStringValue('caseName', info.caseName), + kind, + target: ragAssertTarget(kind), + op: ragAssertOp(kind), + ...ragAssertValueFact(info.node, kind), + required: ragBooleanProp(info.node, 'required'), + ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), + }; +} + +function ragAssertTarget(kind: string): RagSemanticEvalAssertFact['target'] { + if (kind === 'uniqueSourcesGte' || kind === 'chunkCountEq') return 'retrieved-chunks'; + if (kind === 'latencyLte') return 'latency'; + if (kind === 'citesRequired') return 'grounding'; + return 'retrieved-chunk'; +} + +function ragAssertOp(kind: string): RagSemanticEvalAssertFact['op'] { + switch (kind) { + case 'scoreGte': + case 'uniqueSourcesGte': + return 'gte'; + case 'scoreLte': + case 'latencyLte': + return 'lte'; + case 'contains': + return 'contains'; + case 'sourceGlob': + return 'glob'; + case 'citesRequired': + return 'present'; + default: + return 'eq'; + } +} + +function ragAssertValueFact(node: IRNode, kind: string): Record { + if (kind === 'scoreGte' || kind === 'scoreLte') { + const threshold = numberProp(node, 'threshold'); + return threshold === undefined ? {} : { value: threshold }; + } + if (kind === 'uniqueSourcesGte' || kind === 'chunkCountEq') { + const count = numberProp(node, 'count'); + return count === undefined ? {} : { value: count }; + } + if (kind === 'latencyLte') { + const valueMs = numberProp(node, 'valueMs'); + return valueMs === undefined ? {} : { value: valueMs }; + } + if (kind === 'citesRequired') return { value: true }; + return optionalStringFact(node, 'value', 'value'); +} + +function splitRagList(value: string | undefined): string[] { + if (!value) return []; + return value + .split(',') + .map((item) => item.trim()) + .filter((item) => item.length > 0); +} + function ragMcpRetrievalFact( info: RagMcpRetrievalInfo, citationRequiredRagNames: ReadonlySet, @@ -1841,6 +2278,19 @@ interface ClassMemberInfo { const BUILTIN_CLASS_BASES = new Set(['Error']); const RAG_MCP_RETRIEVE_OUTPUT_SHAPE = 'RetrievedChunk[]'; const RAG_MCP_RETRIEVE_OUTPUT_ITEM_SHAPE = 'RetrievedChunk'; +const RAG_ASSERT_KINDS = new Set([ + 'factId', + 'chunkHash', + 'scoreGte', + 'scoreLte', + 'contains', + 'sourceEq', + 'sourceGlob', + 'uniqueSourcesGte', + 'chunkCountEq', + 'latencyLte', + 'citesRequired', +]); const BODY_EXPRESSION_PROPS = [ 'value', 'expr', diff --git a/packages/core/src/spec.ts b/packages/core/src/spec.ts index 7b76bff6..5bded53f 100644 --- a/packages/core/src/spec.ts +++ b/packages/core/src/spec.ts @@ -354,6 +354,8 @@ export const NODE_TYPES = [ 'rag', 'grounding', 'ragEval', + 'ragCase', + 'ragAssert', 'expression-v1', ] as const; diff --git a/packages/core/tests/rag-semantics.test.ts b/packages/core/tests/rag-semantics.test.ts index 90e8d7a9..5771851e 100644 --- a/packages/core/tests/rag-semantics.test.ts +++ b/packages/core/tests/rag-semantics.test.ts @@ -13,7 +13,18 @@ function rulesFor(source: string): string[] { describe('RAG language semantics', () => { test('registers RAG declarations as core language nodes', () => { - for (const type of ['corpus', 'source', 'chunking', 'embed', 'retriever', 'rag', 'grounding', 'ragEval']) { + for (const type of [ + 'corpus', + 'source', + 'chunking', + 'embed', + 'retriever', + 'rag', + 'grounding', + 'ragEval', + 'ragCase', + 'ragAssert', + ]) { expect(isCoreNode(type)).toBe(true); expect(generateCoreNode({ type, props: {} })).toEqual([]); } @@ -123,6 +134,123 @@ describe('RAG language semantics', () => { ]); }); + test('collects RAG eval case and assertion contracts as semantic facts', () => { + const facts = collectRagSemanticFacts( + parseRoot( + [ + 'corpus name=Docs', + ' source name=manuals uri="./docs/**/*.md"', + 'retriever name=DocsSearch corpus=Docs', + 'rag name=AnswerDocs retriever=DocsSearch citations=true', + ' grounding requireCitations=true', + ' ragEval name=SupportEval metric=faithfulness threshold=0.85 mode=contract', + ' ragCase name=refunds query="How do refunds work?" tags="smoke,policy" topK=4 minScore=0.72 chunkCount=2 sources="docs/refunds.md,docs/policies.md"', + ' ragAssert kind=scoreGte threshold=0.72 required=true', + ' ragAssert kind=sourceGlob value="docs/refunds.md" required=true', + ' ragAssert kind=uniqueSourcesGte count=2', + ' ragAssert kind=latencyLte valueMs=250', + ' ragAssert kind=citesRequired', + ].join('\n'), + ), + ); + + expect(facts.pipelines[0]?.evals).toEqual([ + expect.objectContaining({ + name: 'SupportEval', + ragName: 'AnswerDocs', + metric: 'faithfulness', + threshold: 0.85, + mode: 'contract', + caseCount: 1, + assertCount: 5, + cases: [ + expect.objectContaining({ + name: 'refunds', + ragName: 'AnswerDocs', + evalName: 'SupportEval', + query: 'How do refunds work?', + tags: ['smoke', 'policy'], + expected: { + topK: 4, + minScore: 0.72, + chunkCount: 2, + sources: ['docs/refunds.md', 'docs/policies.md'], + }, + asserts: [ + expect.objectContaining({ + kind: 'scoreGte', + target: 'retrieved-chunk', + op: 'gte', + value: 0.72, + required: true, + }), + expect.objectContaining({ + kind: 'sourceGlob', + target: 'retrieved-chunk', + op: 'glob', + value: 'docs/refunds.md', + required: true, + }), + expect.objectContaining({ + kind: 'uniqueSourcesGte', + target: 'retrieved-chunks', + op: 'gte', + value: 2, + required: false, + }), + expect.objectContaining({ + kind: 'latencyLte', + target: 'latency', + op: 'lte', + value: 250, + required: false, + }), + expect.objectContaining({ + kind: 'citesRequired', + target: 'grounding', + op: 'present', + value: true, + required: false, + }), + ], + }), + ], + }), + ]); + }); + + test('keeps RAG eval case facts scoped to their parent eval node', () => { + const facts = collectRagSemanticFacts( + parseRoot( + [ + 'corpus name=Docs', + 'retriever name=DocsSearch corpus=Docs', + 'rag name=AnswerDocs retriever=DocsSearch citations=true', + ' grounding requireCitations=true', + ' ragEval name=SupportEval metric=faithfulness threshold=0.85 mode=contract', + ' ragCase name=nested query="nested case"', + ' ragAssert kind=contains value="nested"', + 'ragEval rag=AnswerDocs name=SupportEval metric=faithfulness threshold=0.85 mode=contract', + ' ragCase name=topLevel query="top-level case"', + ' ragAssert kind=contains value="top-level"', + ].join('\n'), + ), + ); + + expect(facts.pipelines[0]?.evals).toEqual([ + expect.objectContaining({ + caseCount: 1, + assertCount: 1, + cases: [expect.objectContaining({ name: 'nested', query: 'nested case' })], + }), + expect.objectContaining({ + caseCount: 1, + assertCount: 1, + cases: [expect.objectContaining({ name: 'topLevel', query: 'top-level case' })], + }), + ]); + }); + test('accepts MCP resource-backed corpus sources as static ingress contracts', () => { const source = [ 'mcp name=Support', @@ -296,6 +424,48 @@ describe('RAG language semantics', () => { ); }); + test('reports invalid RAG eval case and assertion contracts', () => { + const source = [ + 'corpus name=Docs', + 'retriever name=DocsSearch corpus=Docs', + 'rag name=AnswerDocs retriever=DocsSearch', + ' ragEval metric=faithfulness threshold=0.85', + ' ragCase name=missingMode query="What changed?" sources="docs/refunds.md" topK=0 minScore=1.2 chunkCount=-1', + ' ragAssert kind=unknownKind', + ' ragAssert kind=scoreGte threshold=1.5', + ' ragAssert kind=scoreLte', + ' ragAssert kind=chunkHash value=not-a-hash', + ' ragAssert kind=chunkCountEq count=-1', + ' ragAssert kind=latencyLte valueMs=-1', + ' ragAssert kind=sourceEq', + ' ragAssert kind=citesRequired', + 'ragCase name=loose query="outside eval"', + 'ragAssert kind=citesRequired', + ].join('\n'); + + expect(rulesFor(source)).toEqual( + expect.arrayContaining([ + 'rag-eval-name-required', + 'rag-eval-mode-required', + 'rag-case-topk-invalid', + 'rag-case-minscore-invalid', + 'rag-case-chunk-count-invalid', + 'rag-case-sources-require-citations', + 'rag-case-missing-eval', + 'rag-assert-kind-invalid', + 'rag-assert-threshold-required', + 'rag-assert-threshold-invalid', + 'rag-assert-chunk-hash-invalid', + 'rag-assert-count-invalid', + 'rag-assert-value-ms-invalid', + 'rag-assert-value-required', + 'rag-assert-citations-require-grounding', + 'rag-assert-missing-eval', + 'rag-assert-missing-case', + ]), + ); + }); + test('reports invalid MCP retrieval bindings into RAG contracts', () => { const source = [ 'corpus name=Docs', From 99422a5b6b39713f3c546939f9a0f5a7d3238446 Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 22:04:14 +0200 Subject: [PATCH 23/46] feat(core): harden rag eval contracts --- .../conformance-rag-bad-cases.kern | 4 ++ .../conformance-rag-bad-cases.test.kern | 2 + packages/core/src/index.ts | 2 + packages/core/src/rag-assertions.ts | 17 ++++++ packages/core/src/schema.ts | 21 ++++++- packages/core/src/semantic-validator.ts | 60 ++++++++++++++----- packages/core/tests/rag-semantics.test.ts | 32 ++++++++++ packages/core/tests/schema-validation.test.ts | 18 +++++- 8 files changed, 139 insertions(+), 17 deletions(-) create mode 100644 packages/core/src/rag-assertions.ts diff --git a/examples/native-test/conformance-rag-bad-cases.kern b/examples/native-test/conformance-rag-bad-cases.kern index 84e51325..27b4463a 100644 --- a/examples/native-test/conformance-rag-bad-cases.kern +++ b/examples/native-test/conformance-rag-bad-cases.kern @@ -25,6 +25,10 @@ rag name=PlainRag retriever=MismatchRetriever ragEval name=PlainEval metric=faithfulness threshold=0.85 mode=contract ragCase name=needsCitations query="needs cite" sources="docs/refunds.md" ragAssert kind=citesRequired + ragCase name=needsCitations query="duplicate case" + +ragEval rag=PlainRag name=PlainEval metric=faithfulness threshold=0.85 mode=contract + ragCase name=outside query="duplicate eval" grounding rag=MissingRag maxContext=0 ragEval rag=MissingRag threshold=1.1 diff --git a/examples/native-test/conformance-rag-bad-cases.test.kern b/examples/native-test/conformance-rag-bad-cases.test.kern index 83d7c393..5362de69 100644 --- a/examples/native-test/conformance-rag-bad-cases.test.kern +++ b/examples/native-test/conformance-rag-bad-cases.test.kern @@ -11,6 +11,8 @@ test name="Bad RAG conformance" target="./conformance-rag-bad-cases.kern" covera expect has=semanticViolations matches="RAG retriever 'MismatchRetriever' uses embed 'OtherEmbedding'" expect has=semanticViolations matches="RAG pipeline 'BadRag' references unknown retriever 'MissingRetriever'" expect has=semanticViolations matches="RAG pipeline 'BadRag' requires citations" + expect has=semanticViolations matches="Duplicate RAG eval named 'PlainEval'" + expect has=semanticViolations matches="Duplicate RAG eval case named 'needsCitations'" expect has=semanticViolations matches="RAG grounding references unknown rag 'MissingRag'" expect has=semanticViolations matches="RAG grounding maxContext must be a positive integer" expect has=semanticViolations matches="RAG eval references unknown rag 'MissingRag'" diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 3a624744..01b18119 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -427,6 +427,8 @@ export { validatePortablePredicateAST, } from './portable-predicate.js'; export { parsePortableNonNegativeIntLiteral, parsePortablePathSegments } from './portable-route-collection.js'; +export type { RagAssertionKind } from './rag-assertions.js'; +export { RAG_ASSERTION_KIND_SET, RAG_ASSERTION_KINDS } from './rag-assertions.js'; export type { ParserHintsConfig } from './runtime.js'; // Runtime (instance-based state) export { defaultRuntime, KernRuntime } from './runtime.js'; diff --git a/packages/core/src/rag-assertions.ts b/packages/core/src/rag-assertions.ts new file mode 100644 index 00000000..5e812c26 --- /dev/null +++ b/packages/core/src/rag-assertions.ts @@ -0,0 +1,17 @@ +export const RAG_ASSERTION_KINDS = [ + 'factId', + 'chunkHash', + 'scoreGte', + 'scoreLte', + 'contains', + 'sourceEq', + 'sourceGlob', + 'uniqueSourcesGte', + 'chunkCountEq', + 'latencyLte', + 'citesRequired', +] as const; + +export type RagAssertionKind = (typeof RAG_ASSERTION_KINDS)[number]; + +export const RAG_ASSERTION_KIND_SET: ReadonlySet = new Set(RAG_ASSERTION_KINDS); diff --git a/packages/core/src/schema.ts b/packages/core/src/schema.ts index 67ed0bbc..9e456045 100644 --- a/packages/core/src/schema.ts +++ b/packages/core/src/schema.ts @@ -24,6 +24,7 @@ import { import { type KernTarget, VALID_TARGETS } from './config.js'; import { validateCapabilityMetadata, validateImportMetadata } from './import-metadata.js'; import { parsePortablePredicateProp, validatePortablePredicateAST } from './portable-predicate.js'; +import { RAG_ASSERTION_KINDS } from './rag-assertions.js'; import { defaultRuntime, type KernRuntime } from './runtime.js'; import { KERN_VERSION, NODE_TYPES, STYLE_SHORTHANDS, VALUE_SHORTHANDS } from './spec.js'; import type { IRNode } from './types.js'; @@ -43,6 +44,7 @@ export type PropKind = export interface PropSchema { required?: boolean; kind: PropKind; + values?: readonly string[]; } export interface NodeSchema { @@ -2535,7 +2537,7 @@ export const NODE_SCHEMAS: Record = { description: 'RAG evaluation assertion — declares a closed static check over retrieved chunks or grounding.', example: 'ragAssert kind=scoreGte threshold=0.72 required=true', props: { - kind: { required: true, kind: 'identifier' }, + kind: { required: true, kind: 'identifier', values: RAG_ASSERTION_KINDS }, value: { kind: 'string' }, threshold: { kind: 'number' }, count: { kind: 'number' }, @@ -4316,11 +4318,28 @@ function checkAllowedChildren(node: IRNode, schema: NodeSchema, violations: Sche } } +function checkAllowedPropValues(node: IRNode, schema: NodeSchema, violations: SchemaViolation[]): void { + const props = node.props || {}; + for (const [propName, propSchema] of Object.entries(schema.props)) { + if (!propSchema.values || !(propName in props)) continue; + const value = props[propName]; + if (typeof value !== 'string' || !propSchema.values.includes(value)) { + violations.push({ + nodeType: node.type, + message: `'${node.type}' prop '${propName}' must be one of ${propSchema.values.join(', ')}`, + line: node.loc?.line, + col: node.loc?.col, + }); + } + } +} + function validateNode(node: IRNode, violations: SchemaViolation[], parent?: IRNode): void { const schema = Object.hasOwn(NODE_SCHEMAS, node.type) ? NODE_SCHEMAS[node.type] : undefined; if (schema) { checkRequiredProps(node, schema, violations, parent); checkCrossProps(node, violations, parent); + checkAllowedPropValues(node, schema, violations); checkAllowedChildren(node, schema, violations); } if (node.children) { diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index 30e79f67..de067972 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -19,6 +19,7 @@ import { collectExternalImportSymbols, type ExternalImportSymbolTable } from './ import { importRegistryOf } from './import-metadata.js'; import { parseExpression } from './parser-expression.js'; import { splitPortableExpressionList } from './portable-expression-list.js'; +import { RAG_ASSERTION_KIND_SET, RAG_ASSERTION_KINDS } from './rag-assertions.js'; import type { IRNode } from './types.js'; import type { ValueIR } from './value-ir.js'; @@ -1049,6 +1050,8 @@ function validateRagUniqueNames(infos: RagInfos, violations: SemanticViolation[] validateRagUniqueNameSet('embed', infos.embeds, violations); validateRagUniqueNameSet('retriever', infos.retrievers, violations); validateRagUniqueNameSet('rag', infos.pipelines, violations); + validateRagUniqueEvalNames(infos.evals, violations); + validateRagUniqueCaseNames(infos.cases, violations); } function validateRagUniqueNameSet( @@ -1091,6 +1094,46 @@ function validateRagUniqueSourceNames(sources: readonly RagSourceInfo[], violati } } +function validateRagUniqueEvalNames(evals: readonly RagEvalInfo[], violations: SemanticViolation[]): void { + const seen = new Map(); + for (const evaluation of evals) { + const name = stringProp(evaluation.node, 'name'); + if (!name || !evaluation.ragName) continue; + const key = `${evaluation.ragName}:${name}`; + const prev = seen.get(key); + if (prev) { + pushRagViolation( + violations, + 'rag-duplicate-eval-name', + evaluation.node, + `Duplicate RAG eval named '${name}' in rag '${evaluation.ragName}' — first defined at line ${prev.loc?.line ?? '?'}.`, + ); + } else { + seen.set(key, evaluation.node); + } + } +} + +function validateRagUniqueCaseNames(cases: readonly RagCaseInfo[], violations: SemanticViolation[]): void { + const seen = new Map>(); + for (const evaluationCase of cases) { + if (!evaluationCase.name || !evaluationCase.evalNode) continue; + const evalCases = seen.get(evaluationCase.evalNode) ?? new Map(); + const prev = evalCases.get(evaluationCase.name); + if (prev) { + pushRagViolation( + violations, + 'rag-duplicate-case-name', + evaluationCase.node, + `Duplicate RAG eval case named '${evaluationCase.name}' in eval '${evaluationCase.evalName ?? '?'}' — first defined at line ${prev.loc?.line ?? '?'}.`, + ); + } else { + evalCases.set(evaluationCase.name, evaluationCase.node); + seen.set(evaluationCase.evalNode, evalCases); + } + } +} + function validateRagSource( source: RagSourceInfo, mcpResourcesByName: ReadonlyMap, @@ -1526,12 +1569,12 @@ function validateRagAssert( } const kind = stringProp(assertion.node, 'kind'); - if (!kind || !RAG_ASSERT_KINDS.has(kind)) { + if (!kind || !RAG_ASSERTION_KIND_SET.has(kind)) { pushRagViolation( violations, 'rag-assert-kind-invalid', assertion.node, - `RAG assert kind must be one of ${[...RAG_ASSERT_KINDS].join(', ')}.`, + `RAG assert kind must be one of ${RAG_ASSERTION_KINDS.join(', ')}.`, ); return; } @@ -2278,19 +2321,6 @@ interface ClassMemberInfo { const BUILTIN_CLASS_BASES = new Set(['Error']); const RAG_MCP_RETRIEVE_OUTPUT_SHAPE = 'RetrievedChunk[]'; const RAG_MCP_RETRIEVE_OUTPUT_ITEM_SHAPE = 'RetrievedChunk'; -const RAG_ASSERT_KINDS = new Set([ - 'factId', - 'chunkHash', - 'scoreGte', - 'scoreLte', - 'contains', - 'sourceEq', - 'sourceGlob', - 'uniqueSourcesGte', - 'chunkCountEq', - 'latencyLte', - 'citesRequired', -]); const BODY_EXPRESSION_PROPS = [ 'value', 'expr', diff --git a/packages/core/tests/rag-semantics.test.ts b/packages/core/tests/rag-semantics.test.ts index 5771851e..41e81290 100644 --- a/packages/core/tests/rag-semantics.test.ts +++ b/packages/core/tests/rag-semantics.test.ts @@ -651,6 +651,38 @@ describe('RAG language semantics', () => { ); }); + test('reports duplicate RAG eval and case names in their contract namespaces', () => { + const source = [ + 'corpus name=Docs', + 'retriever name=DocsSearch corpus=Docs', + 'rag name=AnswerDocs retriever=DocsSearch', + ' ragEval name=Faithfulness metric=faithfulness threshold=0.85 mode=contract', + ' ragCase name=refunds query="first"', + ' ragCase name=refunds query="duplicate"', + 'ragEval rag=AnswerDocs name=Faithfulness metric=faithfulness threshold=0.9 mode=contract', + ' ragCase name=external query="duplicate eval"', + ].join('\n'); + + expect(rulesFor(source)).toEqual(expect.arrayContaining(['rag-duplicate-eval-name', 'rag-duplicate-case-name'])); + }); + + test('allows RAG eval and case name reuse across separate namespaces', () => { + const source = [ + 'corpus name=Docs', + 'retriever name=DocsSearch corpus=Docs', + 'rag name=AnswerDocs retriever=DocsSearch', + ' ragEval name=Faithfulness metric=faithfulness threshold=0.85 mode=contract', + ' ragCase name=refunds query="answer docs"', + 'rag name=AuditDocs retriever=DocsSearch', + ' ragEval name=Faithfulness metric=faithfulness threshold=0.85 mode=contract', + ' ragCase name=refunds query="audit docs"', + 'ragEval rag=AnswerDocs name=Relevance metric=relevance threshold=0.85 mode=contract', + ' ragCase name=refunds query="same case name, different eval"', + ].join('\n'); + + expect(validateSemantics(parseRoot(source))).toEqual([]); + }); + test('requires chunking source refs to resolve inside the referenced corpus', () => { const source = [ 'corpus name=Docs', diff --git a/packages/core/tests/schema-validation.test.ts b/packages/core/tests/schema-validation.test.ts index 397b8f3c..46242a26 100644 --- a/packages/core/tests/schema-validation.test.ts +++ b/packages/core/tests/schema-validation.test.ts @@ -105,7 +105,9 @@ describe('Schema Validation', () => { 'retriever name=DocsSearch corpus=Docs embed=DocsEmbedding topK=8 minScore=0.72', 'rag name=AnswerDocs retriever=DocsSearch', ' grounding requireCitations=true maxContext=6000', - ' ragEval metric=faithfulness threshold=0.85', + ' ragEval name=Faithfulness metric=faithfulness threshold=0.85 mode=contract', + ' ragCase name=refunds query="How do refunds work?"', + ' ragAssert kind=scoreGte threshold=0.72', ].join('\n'), ); expect(valid).toHaveLength(0); @@ -136,6 +138,20 @@ describe('Schema Validation', () => { expect( nestedEmbed.some((violation) => violation.message.includes("'corpus' does not allow child type 'embed'")), ).toBe(true); + + const invalidAssertKind = validate( + [ + 'corpus name=Docs', + 'retriever name=DocsSearch corpus=Docs', + 'rag name=AnswerDocs retriever=DocsSearch', + ' ragEval name=Faithfulness metric=faithfulness threshold=0.85 mode=contract', + ' ragCase name=refunds query="How do refunds work?"', + ' ragAssert kind=unsupported', + ].join('\n'), + ); + expect( + invalidAssertKind.some((violation) => violation.message.includes("'ragAssert' prop 'kind' must be one of")), + ).toBe(true); }); it('passes explicit foreign handler metadata', () => { From 0d7b5f3ec030d796445880934a0e4994d1a86f2d Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 22:22:55 +0200 Subject: [PATCH 24/46] feat(core): add in-memory rag runtime --- packages/core/src/index.ts | 15 ++ packages/core/src/rag-runtime.ts | 185 ++++++++++++++++++++++++ packages/core/tests/rag-runtime.test.ts | 167 +++++++++++++++++++++ 3 files changed, 367 insertions(+) create mode 100644 packages/core/src/rag-runtime.ts create mode 100644 packages/core/tests/rag-runtime.test.ts diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 01b18119..e4364a0f 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -429,6 +429,21 @@ export { export { parsePortableNonNegativeIntLiteral, parsePortablePathSegments } from './portable-route-collection.js'; export type { RagAssertionKind } from './rag-assertions.js'; export { RAG_ASSERTION_KIND_SET, RAG_ASSERTION_KINDS } from './rag-assertions.js'; +export type { + InMemoryRagRetriever, + RagChunkInput, + RagCitation, + RetrievedChunk, + RetrieveOptions, + RetrieveResult, +} from './rag-runtime.js'; +export { + createInMemoryRetriever, + InMemoryRagCorpus, + MAX_IN_MEMORY_RAG_TOP_K, + retrieveFromInMemoryCorpus, + tokenizeForRetrieval, +} from './rag-runtime.js'; export type { ParserHintsConfig } from './runtime.js'; // Runtime (instance-based state) export { defaultRuntime, KernRuntime } from './runtime.js'; diff --git a/packages/core/src/rag-runtime.ts b/packages/core/src/rag-runtime.ts new file mode 100644 index 00000000..74999209 --- /dev/null +++ b/packages/core/src/rag-runtime.ts @@ -0,0 +1,185 @@ +export interface RagCitation { + readonly uri?: string; + readonly locator?: string; +} + +export interface RagChunkInput { + readonly id: string; + readonly text: string; + readonly source: string; + readonly citation?: RagCitation; + readonly metadata?: Record; +} + +export interface RetrievedChunk { + readonly id: string; + readonly text: string; + readonly score: number; + readonly source: string; + readonly citation: RagCitation; + readonly metadata?: Record; +} + +export interface RetrieveOptions { + readonly topK?: number; + readonly minScore?: number; +} + +export interface RetrieveResult { + readonly query: string; + readonly chunks: RetrievedChunk[]; +} + +export type InMemoryRagRetriever = (query: string, options?: RetrieveOptions) => RetrieveResult; + +export const MAX_IN_MEMORY_RAG_TOP_K = 1000; + +interface StoredRagChunk { + readonly chunk: RagChunkInput; + readonly terms: ReadonlySet; +} + +export class InMemoryRagCorpus { + private readonly chunks = new Map(); + + constructor(chunks: Iterable = []) { + for (const chunk of chunks) this.add(chunk); + } + + get size(): number { + return this.chunks.size; + } + + add(chunk: RagChunkInput): void { + if (typeof chunk.id !== 'string' || !chunk.id.trim()) { + throw new Error('KERN RAG runtime chunk id must be a non-empty string.'); + } + if (typeof chunk.text !== 'string' || !chunk.text.trim()) { + throw new Error(`KERN RAG runtime chunk '${chunk.id}' text must be a non-empty string.`); + } + if (typeof chunk.source !== 'string' || !chunk.source.trim()) { + throw new Error(`KERN RAG runtime chunk '${chunk.id}' source must be a non-empty string.`); + } + const storedChunk = { + ...chunk, + citation: chunk.citation ? { ...chunk.citation } : undefined, + metadata: chunk.metadata ? cloneMetadata(chunk.metadata) : undefined, + }; + this.chunks.set(chunk.id, { chunk: storedChunk, terms: tokenizeForRetrieval(storedChunk.text) }); + } + + get(id: string): RagChunkInput | undefined { + const stored = this.chunks.get(id); + return stored ? cloneChunkInput(stored.chunk) : undefined; + } + + all(): RagChunkInput[] { + return Array.from(this.chunks.values(), (stored) => cloneChunkInput(stored.chunk)); + } + + retrieve(query: string, options: RetrieveOptions = {}): RetrieveResult { + if (typeof query !== 'string') throw new Error('KERN RAG runtime query must be a string.'); + const { topK, minScore } = normalizeRetrieveOptions(options); + const queryTerms = tokenizeForRetrieval(query); + if (queryTerms.size === 0) return { query, chunks: [] }; + + const chunks = Array.from(this.chunks.values()) + .map((stored) => ({ chunk: stored.chunk, score: jaccardScore(queryTerms, stored.terms) })) + .filter((candidate) => candidate.score > 0 && candidate.score >= minScore) + .sort((a, b) => b.score - a.score || a.chunk.id.localeCompare(b.chunk.id)) + .slice(0, topK) + .map(({ chunk, score }) => retrievedChunk(chunk, score)); + + return { query, chunks }; + } +} + +export function createInMemoryRetriever(corpus: InMemoryRagCorpus): InMemoryRagRetriever { + return (query: string, options: RetrieveOptions = {}): RetrieveResult => corpus.retrieve(query, options); +} + +export function retrieveFromInMemoryCorpus( + corpus: InMemoryRagCorpus, + query: string, + options: RetrieveOptions = {}, +): RetrieveResult { + return corpus.retrieve(query, options); +} + +function normalizeRetrieveOptions(options: RetrieveOptions): Required { + const topK = options.topK ?? 5; + const minScore = options.minScore ?? 0; + if (!Number.isInteger(topK) || topK <= 0 || topK > MAX_IN_MEMORY_RAG_TOP_K) { + throw new Error(`KERN RAG runtime topK must be a positive integer up to ${MAX_IN_MEMORY_RAG_TOP_K}.`); + } + if (!Number.isFinite(minScore) || minScore < 0 || minScore > 1) { + throw new Error('KERN RAG runtime minScore must be between 0 and 1.'); + } + return { topK, minScore }; +} + +export function tokenizeForRetrieval(value: string): ReadonlySet { + return new Set(value.toLowerCase().match(/[\p{L}\p{N}]+/gu) ?? []); +} + +function jaccardScore(queryTerms: ReadonlySet, chunkTerms: ReadonlySet): number { + if (queryTerms.size === 0 || chunkTerms.size === 0) return 0; + let intersection = 0; + for (const term of queryTerms) { + if (chunkTerms.has(term)) intersection += 1; + } + const union = queryTerms.size + chunkTerms.size - intersection; + return union === 0 ? 0 : intersection / union; +} + +function retrievedChunk(chunk: RagChunkInput, score: number): RetrievedChunk { + return { + id: chunk.id, + text: chunk.text, + score, + source: chunk.source, + citation: chunk.citation ? { ...chunk.citation } : { uri: chunk.source }, + ...(chunk.metadata ? { metadata: cloneMetadata(chunk.metadata) } : {}), + }; +} + +function cloneChunkInput(chunk: RagChunkInput): RagChunkInput { + return { + ...chunk, + citation: chunk.citation ? { ...chunk.citation } : undefined, + metadata: chunk.metadata ? cloneMetadata(chunk.metadata) : undefined, + }; +} + +function cloneMetadata(metadata: Record): Record { + return cloneMetadataValue(metadata, new WeakMap()) as Record; +} + +function cloneMetadataValue(value: unknown, seen: WeakMap): unknown { + if (Array.isArray(value)) { + const existing = seen.get(value); + if (existing) return existing; + const out: unknown[] = []; + seen.set(value, out); + for (const item of value) out.push(cloneMetadataValue(item, seen)); + return out; + } + if (isPlainMetadataObject(value)) { + const existing = seen.get(value); + if (existing) return existing; + const out: Record = {}; + seen.set(value, out); + for (const [key, entry] of Object.entries(value)) { + if (key === '__proto__' || key === 'constructor' || key === 'prototype') continue; + out[key] = cloneMetadataValue(entry, seen); + } + return out; + } + return value; +} + +function isPlainMetadataObject(value: unknown): value is Record { + if (value === null || typeof value !== 'object') return false; + const prototype = Object.getPrototypeOf(value); + return prototype === Object.prototype || prototype === null; +} diff --git a/packages/core/tests/rag-runtime.test.ts b/packages/core/tests/rag-runtime.test.ts new file mode 100644 index 00000000..28bca45e --- /dev/null +++ b/packages/core/tests/rag-runtime.test.ts @@ -0,0 +1,167 @@ +import { + createInMemoryRetriever, + InMemoryRagCorpus, + MAX_IN_MEMORY_RAG_TOP_K, + retrieveFromInMemoryCorpus, + tokenizeForRetrieval, +} from '../src/index.js'; + +describe('RAG in-memory runtime retrieval', () => { + test('ranks exact lexical matches first', () => { + const corpus = new InMemoryRagCorpus([ + { id: 'b', text: 'refund policy', source: 'docs/refunds.md' }, + { id: 'a', text: 'shipping policy', source: 'docs/shipping.md' }, + ]); + + const result = retrieveFromInMemoryCorpus(corpus, 'refund policy'); + + expect(result.query).toBe('refund policy'); + expect(result.chunks[0]).toEqual( + expect.objectContaining({ + id: 'b', + score: 1, + source: 'docs/refunds.md', + citation: { uri: 'docs/refunds.md' }, + }), + ); + }); + + test('limits results by topK', () => { + const corpus = new InMemoryRagCorpus( + Array.from({ length: 10 }, (_, index) => ({ + id: `chunk-${index}`, + text: `refund policy ${index}`, + source: `docs/${index}.md`, + })), + ); + + expect(retrieveFromInMemoryCorpus(corpus, 'refund policy', { topK: 3 }).chunks).toHaveLength(3); + }); + + test('filters results by minScore', () => { + const corpus = new InMemoryRagCorpus([ + { id: 'weak', text: 'refund unrelated unrelated unrelated', source: 'docs/weak.md' }, + { id: 'none', text: 'shipping delivery', source: 'docs/none.md' }, + ]); + + expect(retrieveFromInMemoryCorpus(corpus, 'refund policy', { minScore: 0.5 }).chunks).toEqual([]); + }); + + test('orders results by descending score', () => { + const corpus = new InMemoryRagCorpus([ + { id: 'partial', text: 'refund shipping', source: 'docs/partial.md' }, + { id: 'exact', text: 'refund policy', source: 'docs/exact.md' }, + { id: 'weak', text: 'refund shipping returns', source: 'docs/weak.md' }, + ]); + + const scores = retrieveFromInMemoryCorpus(corpus, 'refund policy').chunks.map((chunk) => chunk.score); + + expect(scores.length).toBeGreaterThan(1); + for (let index = 0; index < scores.length - 1; index += 1) { + expect(scores[index]).toBeGreaterThanOrEqual(scores[index + 1]); + } + }); + + test('breaks score ties by chunk id deterministically', () => { + const corpus = new InMemoryRagCorpus([ + { id: 'b', text: 'refund', source: 'docs/b.md' }, + { id: 'a', text: 'refund', source: 'docs/a.md' }, + ]); + const retrieve = createInMemoryRetriever(corpus); + + expect(retrieve('refund').chunks.map((chunk) => chunk.id)).toEqual(['a', 'b']); + expect(retrieve('refund').chunks.map((chunk) => chunk.id)).toEqual(['a', 'b']); + }); + + test('returns empty results for empty corpus and empty queries', () => { + const empty = new InMemoryRagCorpus(); + expect(retrieveFromInMemoryCorpus(empty, 'refund').chunks).toEqual([]); + + const corpus = new InMemoryRagCorpus([{ id: 'refunds', text: 'refund policy', source: 'docs/refunds.md' }]); + expect(retrieveFromInMemoryCorpus(corpus, ' ').chunks).toEqual([]); + }); + + test('preserves citation and metadata provenance', () => { + const corpus = new InMemoryRagCorpus([ + { + id: 'refunds', + text: 'refund policy', + source: 'docs/refunds.md', + citation: { uri: 'file:///docs/refunds.md', locator: 'L10-L20' }, + metadata: { section: 'policy' }, + }, + ]); + + expect(retrieveFromInMemoryCorpus(corpus, 'refund policy').chunks[0]).toEqual( + expect.objectContaining({ + id: 'refunds', + citation: { uri: 'file:///docs/refunds.md', locator: 'L10-L20' }, + metadata: { section: 'policy' }, + }), + ); + }); + + test('returns defensive copies from corpus reads', () => { + const circularMetadata: Record = { section: 'policy', nested: { owner: 'support' } }; + circularMetadata.self = circularMetadata; + const corpus = new InMemoryRagCorpus([ + { + id: 'refunds', + text: 'refund policy', + source: 'docs/refunds.md', + citation: { uri: 'docs/refunds.md' }, + metadata: circularMetadata, + }, + ]); + + const snapshot = corpus.get('refunds'); + if (!snapshot) throw new Error('missing fixture chunk'); + (snapshot.metadata as Record).section = 'mutated'; + ((snapshot.metadata as Record).nested as Record).owner = 'mutated'; + (snapshot.citation as Record).uri = 'mutated'; + + expect(corpus.retrieve('refund policy').chunks[0]).toEqual( + expect.objectContaining({ + citation: { uri: 'docs/refunds.md' }, + metadata: expect.objectContaining({ section: 'policy', nested: { owner: 'support' } }), + }), + ); + }); + + test('upserts chunks by id without changing retrieval determinism', () => { + const corpus = new InMemoryRagCorpus([{ id: 'refunds', text: 'old refund policy', source: 'docs/old.md' }]); + + corpus.add({ id: 'refunds', text: 'updated return policy', source: 'docs/new.md' }); + + expect(corpus.size).toBe(1); + expect(corpus.retrieve('updated return policy').chunks[0]).toEqual( + expect.objectContaining({ id: 'refunds', source: 'docs/new.md' }), + ); + }); + + test('validates retrieval options and chunk identity inputs', () => { + const corpus = new InMemoryRagCorpus([{ id: 'refunds', text: 'refund policy', source: 'docs/refunds.md' }]); + + expect(() => retrieveFromInMemoryCorpus(corpus, 'refund', { topK: 0 })).toThrow('topK'); + expect(() => retrieveFromInMemoryCorpus(corpus, 'refund', { topK: MAX_IN_MEMORY_RAG_TOP_K + 1 })).toThrow('topK'); + expect(() => retrieveFromInMemoryCorpus(corpus, 'refund', { minScore: 1.1 })).toThrow('minScore'); + expect(() => retrieveFromInMemoryCorpus(corpus, 1 as unknown as string)).toThrow('query'); + expect(() => corpus.add({ id: ' ', text: 'bad', source: 'docs/bad.md' })).toThrow('chunk id'); + expect(() => corpus.add({ id: 'bad', text: ' ', source: 'docs/bad.md' })).toThrow('text'); + expect(() => corpus.add({ id: 'bad', text: 'bad', source: ' ' })).toThrow('source'); + expect(() => + corpus.add({ id: 1, text: 'bad', source: 'docs/bad.md' } as unknown as Parameters[0]), + ).toThrow('chunk id'); + }); + + test('tokenizes Unicode text for non-English retrieval', () => { + const corpus = new InMemoryRagCorpus([ + { id: 'resume', text: 'résumé policy', source: 'docs/resume.md' }, + { id: 'jp', text: '日本語 ガイド', source: 'docs/jp.md' }, + ]); + + expect([...tokenizeForRetrieval('résumé 日本語')]).toEqual(['résumé', '日本語']); + expect(retrieveFromInMemoryCorpus(corpus, 'résumé').chunks[0]?.id).toBe('resume'); + expect(retrieveFromInMemoryCorpus(corpus, '日本語').chunks[0]?.id).toBe('jp'); + }); +}); From 970bb843c293f83e2fb9d12a6b9c396341ce3554 Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 23:13:40 +0200 Subject: [PATCH 25/46] feat(core): evaluate rag runtime contracts --- .../kernlang-typescript-surface.kern | 1 + packages/core/src/index.ts | 8 + packages/core/src/rag-runtime.ts | 498 +++++++++++++++++- packages/core/tests/rag-runtime.test.ts | 252 +++++++++ packages/core/tests/type-guards.test.ts | 2 +- 5 files changed, 759 insertions(+), 2 deletions(-) diff --git a/packages/core/native-test/kernlang-typescript-surface.kern b/packages/core/native-test/kernlang-typescript-surface.kern index 40f73103..e1ba92cb 100644 --- a/packages/core/native-test/kernlang-typescript-surface.kern +++ b/packages/core/native-test/kernlang-typescript-surface.kern @@ -71,6 +71,7 @@ fn name=assertUser params="x:unknown" returns="asserts x is User" throw value="new Error(\"not a user\")" class name=Account + field name=role type=string method name=isAdmin returns="this is AdminUser" handler return value="this.role === \"admin\"" diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index e4364a0f..3fa0ae67 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -433,12 +433,20 @@ export type { InMemoryRagRetriever, RagChunkInput, RagCitation, + RagContractRetriever, + RagEvalAssertionCode, + RagEvalAssertionResult, + RagEvalCaseResult, + RagEvalContractOptions, + RagEvalContractResult, RetrievedChunk, RetrieveOptions, RetrieveResult, } from './rag-runtime.js'; export { createInMemoryRetriever, + evaluateRagEvalContract, + hashRetrievedChunkText, InMemoryRagCorpus, MAX_IN_MEMORY_RAG_TOP_K, retrieveFromInMemoryCorpus, diff --git a/packages/core/src/rag-runtime.ts b/packages/core/src/rag-runtime.ts index 74999209..8bcb6294 100644 --- a/packages/core/src/rag-runtime.ts +++ b/packages/core/src/rag-runtime.ts @@ -1,3 +1,5 @@ +import type { RagSemanticEvalAssertFact, RagSemanticEvalCaseFact, RagSemanticEvalFact } from './semantic-validator.js'; + export interface RagCitation { readonly uri?: string; readonly locator?: string; @@ -31,9 +33,54 @@ export interface RetrieveResult { } export type InMemoryRagRetriever = (query: string, options?: RetrieveOptions) => RetrieveResult; +export type RagContractRetriever = (query: string, options?: RetrieveOptions) => RetrieveResult; export const MAX_IN_MEMORY_RAG_TOP_K = 1000; +export type RagEvalAssertionCode = + | 'PASS' + | 'ASSERTION_FAIL' + | 'INVALID_ASSERTION' + | 'RETRIEVER_ERROR' + | 'UNSUPPORTED_ASSERTION'; + +export interface RagEvalContractOptions { + readonly sourceGlobCaseSensitive?: boolean; + readonly now?: () => number; +} + +export interface RagEvalAssertionResult { + readonly kind: string; + readonly required?: boolean; + readonly passed: boolean; + readonly code: RagEvalAssertionCode; + readonly message: string; + readonly expected?: unknown; + readonly actual?: unknown; +} + +export interface RagEvalCaseResult { + readonly name: string; + readonly query: string; + readonly passed: boolean; + readonly durationMs: number; + readonly retrieveOptions: RetrieveOptions; + readonly chunks: readonly RetrievedChunk[]; + readonly assertions: readonly RagEvalAssertionResult[]; +} + +export interface RagEvalContractResult { + readonly passed: boolean; + readonly ragName?: string; + readonly evalName?: string; + readonly caseCount: number; + readonly passedCaseCount: number; + readonly assertionCount: number; + readonly passedAssertionCount: number; + readonly durationMs: number; + readonly cases: readonly RagEvalCaseResult[]; +} + interface StoredRagChunk { readonly chunk: RagChunkInput; readonly terms: ReadonlySet; @@ -106,6 +153,47 @@ export function retrieveFromInMemoryCorpus( return corpus.retrieve(query, options); } +export function evaluateRagEvalContract( + evaluation: RagSemanticEvalFact, + retriever: RagContractRetriever, + options: RagEvalContractOptions = {}, +): RagEvalContractResult { + const startedAt = runtimeNow(options); + const cases = (evaluation.cases ?? []).map((evaluationCase) => + evaluateRagCase(evaluation, evaluationCase, retriever, options), + ); + const assertionCount = cases.reduce((count, evaluationCase) => count + evaluationCase.assertions.length, 0); + const passedAssertionCount = cases.reduce( + (count, evaluationCase) => count + evaluationCase.assertions.filter((assertion) => assertion.passed).length, + 0, + ); + return { + // Empty eval contracts fail closed; a vacuous pass would hide unconfigured evals. + passed: cases.length > 0 && cases.every((evaluationCase) => evaluationCase.passed), + ...optionalStringValue('ragName', evaluation.ragName), + ...optionalStringValue('evalName', evaluation.name), + caseCount: cases.length, + passedCaseCount: cases.filter((evaluationCase) => evaluationCase.passed).length, + assertionCount, + passedAssertionCount, + durationMs: runtimeNow(options) - startedAt, + cases, + }; +} + +export function hashRetrievedChunkText(text: string): string { + let left = 0xcbf29ce484222325n; + let right = 0x84222325cbf29ce4n; + for (const byte of new TextEncoder().encode(text)) { + const value = BigInt(byte); + left ^= value; + left = BigInt.asUintN(64, left * 0x100000001b3n); + right ^= value + 0x9en; + right = BigInt.asUintN(64, right * 0x100000001b3n); + } + return `${left.toString(16).padStart(16, '0')}${right.toString(16).padStart(16, '0')}`; +} + function normalizeRetrieveOptions(options: RetrieveOptions): Required { const topK = options.topK ?? 5; const minScore = options.minScore ?? 0; @@ -118,8 +206,416 @@ function normalizeRetrieveOptions(options: RetrieveOptions): Required + evaluateRagAssertion(evaluation, evaluationCase, assertion, chunks, durationMs, options), + ), + ]; + } + return { + name: evaluationCase.name, + query: evaluationCase.query, + passed: assertions.every(isPassingOrAdvisoryAssertion), + durationMs, + retrieveOptions, + chunks, + assertions, + }; +} + +function caseRetrieveOptions(evaluationCase: RagSemanticEvalCaseFact): RetrieveOptions { + return { + ...optionalNumberValue('topK', evaluationCase.expected?.topK), + ...optionalNumberValue('minScore', evaluationCase.expected?.minScore), + }; +} + +function evaluateExpectedCaseContracts( + evaluationCase: RagSemanticEvalCaseFact, + chunks: readonly RetrievedChunk[], +): RagEvalAssertionResult[] { + const results: RagEvalAssertionResult[] = []; + const { topK, minScore, chunkCount, sources } = evaluationCase.expected ?? {}; + if (topK !== undefined) { + results.push( + assertionResult('expected.topK', chunks.length <= topK, `expected at most ${topK} chunks`, topK, chunks.length), + ); + } + if (minScore !== undefined) { + const actual = + chunks.length === 0 + ? 0 + : chunks.reduce((minimumScore, chunk) => Math.min(minimumScore, chunk.score), Number.POSITIVE_INFINITY); + results.push( + assertionResult( + 'expected.minScore', + chunks.length > 0 && chunks.every((chunk) => chunk.score >= minScore), + `expected all retrieved chunks to score >= ${minScore}`, + minScore, + actual, + ), + ); + } + if (chunkCount !== undefined) { + results.push( + assertionResult( + 'expected.chunkCount', + chunks.length === chunkCount, + `expected ${chunkCount} chunks`, + chunkCount, + chunks.length, + ), + ); + } + if (sources?.length) { + const actualSources = [...new Set(chunks.map((chunk) => chunk.source))].sort(); + const expectedSources = [...sources].sort(); + const allowed = new Set(expectedSources); + results.push( + assertionResult( + 'expected.sources', + expectedSources.every((source) => actualSources.includes(source)) && + chunks.every((chunk) => allowed.has(chunk.source)), + `expected retrieved chunks to cover only sources ${expectedSources.join(', ')}`, + expectedSources, + actualSources, + ), + ); + } + return results; +} + +function evaluateRagAssertion( + evaluation: RagSemanticEvalFact, + evaluationCase: RagSemanticEvalCaseFact, + assertion: RagSemanticEvalAssertFact, + chunks: readonly RetrievedChunk[], + durationMs: number, + options: RagEvalContractOptions, +): RagEvalAssertionResult { + switch (assertion.kind) { + case 'scoreGte': + return withRagAssertRequired( + assertion, + numericChunkAssertion(assertion, chunks, (chunk, value) => chunk.score >= value, 'score >='), + ); + case 'scoreLte': + return withRagAssertRequired( + assertion, + numericChunkAssertion(assertion, chunks, (chunk, value) => chunk.score <= value, 'score <='), + ); + case 'contains': + return withRagAssertRequired( + assertion, + stringChunkAssertion( + assertion, + chunks, + 'text', + (chunk, value) => chunk.text.toLowerCase().includes(value.toLowerCase()), + 'text contains', + ), + ); + case 'sourceEq': + return withRagAssertRequired( + assertion, + stringChunkAssertion(assertion, chunks, 'source', (chunk, value) => chunk.source === value, 'source equals'), + ); + case 'sourceGlob': + return withRagAssertRequired( + assertion, + stringChunkAssertion( + assertion, + chunks, + 'source', + (chunk, value) => globMatches(value, chunk.source, options.sourceGlobCaseSensitive ?? false), + 'source matches', + ), + ); + case 'uniqueSourcesGte': { + const expected = numberAssertionValue(assertion); + if (expected === undefined) return invalidAssertionResult(assertion, 'requires numeric value.'); + const actual = new Set(chunks.map((chunk) => chunk.source)).size; + return withRagAssertRequired( + assertion, + assertionResult( + assertion.kind, + actual >= expected, + `expected at least ${expected} unique sources`, + expected, + actual, + ), + ); + } + case 'chunkCountEq': { + const expected = numberAssertionValue(assertion); + if (expected === undefined) return invalidAssertionResult(assertion, 'requires numeric value.'); + return withRagAssertRequired( + assertion, + assertionResult( + assertion.kind, + chunks.length === expected, + `expected exactly ${expected} chunks`, + expected, + chunks.length, + ), + ); + } + case 'citesRequired': { + const actual = chunks.length > 0 && chunks.every((chunk) => !!chunk.citation.uri || !!chunk.citation.locator); + return withRagAssertRequired( + assertion, + assertionResult(assertion.kind, actual, 'expected every chunk to carry citation data', true, actual), + ); + } + case 'factId': { + const expected = stringAssertionValue(assertion); + if (expected === undefined) return invalidAssertionResult(assertion, 'requires string value.'); + const actual = ragEvalCaseFactId(evaluation, evaluationCase); + return withRagAssertRequired( + assertion, + assertionResult(assertion.kind, actual === expected, `expected fact id ${expected}`, expected, actual), + ); + } + case 'chunkHash': { + const expected = stringAssertionValue(assertion); + if (expected === undefined) return invalidAssertionResult(assertion, 'requires string value.'); + const actual = chunks.map((chunk) => hashRetrievedChunkText(chunk.text)); + return withRagAssertRequired( + assertion, + assertionResult( + assertion.kind, + actual.includes(expected), + `expected retrieved chunk hash ${expected}`, + expected, + actual, + ), + ); + } + case 'latencyLte': { + const expected = numberAssertionValue(assertion); + if (expected === undefined) return invalidAssertionResult(assertion, 'requires numeric value.'); + return withRagAssertRequired( + assertion, + assertionResult( + assertion.kind, + durationMs <= expected, + `expected retrieval latency <= ${expected}ms`, + expected, + durationMs, + ), + ); + } + default: + return { + kind: assertion.kind, + required: assertion.required, + passed: false, + code: 'UNSUPPORTED_ASSERTION', + message: `Unsupported RAG eval assertion kind '${assertion.kind}'.`, + }; + } +} + +function numericChunkAssertion( + assertion: RagSemanticEvalAssertFact, + chunks: readonly RetrievedChunk[], + check: (chunk: RetrievedChunk, value: number) => boolean, + label: string, +): RagEvalAssertionResult { + const expected = numberAssertionValue(assertion); + if (expected === undefined) return invalidAssertionResult(assertion, 'requires numeric value.'); + const actual = chunks.map((chunk) => chunk.score); + return assertionResult( + assertion.kind, + chunks.length > 0 && chunks.every((chunk) => check(chunk, expected)), + `expected every retrieved chunk ${label} ${expected}`, + expected, + actual, + ); +} + +function stringChunkAssertion( + assertion: RagSemanticEvalAssertFact, + chunks: readonly RetrievedChunk[], + actualField: 'source' | 'text', + check: (chunk: RetrievedChunk, value: string) => boolean, + label: string, +): RagEvalAssertionResult { + const expected = stringAssertionValue(assertion); + if (expected === undefined) return invalidAssertionResult(assertion, 'requires non-empty string value.'); + const actual = chunks.map((chunk) => (actualField === 'source' ? chunk.source : chunk.text)); + return assertionResult( + assertion.kind, + chunks.some((chunk) => check(chunk, expected)), + `expected a retrieved chunk ${label} ${expected}`, + expected, + actual, + ); +} + +function invalidAssertionResult(assertion: RagSemanticEvalAssertFact, reason: string): RagEvalAssertionResult { + return { + kind: assertion.kind, + required: assertion.required, + passed: false, + code: 'INVALID_ASSERTION', + message: `RAG eval assertion kind=${assertion.kind} ${reason}`, + }; +} + +function assertionResult( + kind: string, + passed: boolean, + message: string, + expected?: unknown, + actual?: unknown, +): RagEvalAssertionResult { + return { + kind, + passed, + code: passed ? 'PASS' : 'ASSERTION_FAIL', + message, + ...optionalAssertionValue('expected', expected), + ...optionalAssertionValue('actual', actual), + }; +} + +function numberAssertionValue(assertion: RagSemanticEvalAssertFact): number | undefined { + return typeof assertion.value === 'number' && Number.isFinite(assertion.value) ? assertion.value : undefined; +} + +function stringAssertionValue(assertion: RagSemanticEvalAssertFact): string | undefined { + return typeof assertion.value === 'string' && assertion.value.length > 0 ? assertion.value : undefined; +} + +function withRagAssertRequired( + assertion: RagSemanticEvalAssertFact, + result: RagEvalAssertionResult, +): RagEvalAssertionResult { + return { ...result, required: assertion.required }; +} + +function isPassingOrAdvisoryAssertion(assertion: RagEvalAssertionResult): boolean { + return assertion.passed || (assertion.required === false && assertion.code === 'ASSERTION_FAIL'); +} + +function ragEvalCaseFactId(evaluation: RagSemanticEvalFact, evaluationCase: RagSemanticEvalCaseFact): string { + return [evaluationCase.ragName ?? evaluation.ragName, evaluationCase.evalName ?? evaluation.name, evaluationCase.name] + .filter((part): part is string => !!part) + .join(':'); +} + +function globMatches(pattern: string, value: string, caseSensitive: boolean): boolean { + const normalizedPattern = caseSensitive ? pattern : pattern.toLowerCase(); + const normalizedValue = caseSensitive ? value : value.toLowerCase(); + return wildcardMatches(normalizedPattern, normalizedValue); +} + +function wildcardMatches(pattern: string, value: string): boolean { + let patternIndex = 0; + let valueIndex = 0; + let starIndex = -1; + let starValueIndex = 0; + while (valueIndex < value.length) { + if ( + patternIndex < pattern.length && + (pattern[patternIndex] === '?' || pattern[patternIndex] === value[valueIndex]) + ) { + patternIndex += 1; + valueIndex += 1; + } else if (patternIndex < pattern.length && pattern[patternIndex] === '*') { + starIndex = patternIndex; + starValueIndex = valueIndex; + patternIndex += 1; + } else if (starIndex !== -1) { + patternIndex = starIndex + 1; + starValueIndex += 1; + valueIndex = starValueIndex; + } else { + return false; + } + } + while (patternIndex < pattern.length && pattern[patternIndex] === '*') patternIndex += 1; + return patternIndex === pattern.length; +} + +function runtimeNow(options: RagEvalContractOptions): number { + return options.now?.() ?? Date.now(); +} + +function optionalStringValue(key: string, value: string | undefined): Record { + return value === undefined ? {} : { [key]: value }; +} + +function optionalNumberValue(key: string, value: number | undefined): Record { + return value === undefined ? {} : { [key]: value }; +} + +function optionalAssertionValue(key: 'expected' | 'actual', value: unknown): Record { + return value === undefined ? {} : { [key]: value }; +} + +function validateRetrieveResult(result: RetrieveResult): RetrieveResult { + if (!result || !Array.isArray(result.chunks)) throw new Error('retriever result must include chunks array.'); + for (const [index, chunk] of result.chunks.entries()) { + if ( + !chunk || + typeof chunk.id !== 'string' || + typeof chunk.text !== 'string' || + typeof chunk.score !== 'number' || + !Number.isFinite(chunk.score) || + chunk.score < 0 || + chunk.score > 1 || + typeof chunk.source !== 'string' || + !isValidCitation(chunk.citation) + ) { + throw new Error(`retriever chunk at index ${index} is not a RetrievedChunk.`); + } + } + return result; +} + +function isValidCitation(value: unknown): value is RagCitation { + if (!value || typeof value !== 'object' || Array.isArray(value)) return false; + const citation = value as RagCitation; + return ( + (citation.uri === undefined || typeof citation.uri === 'string') && + (citation.locator === undefined || typeof citation.locator === 'string') + ); +} + export function tokenizeForRetrieval(value: string): ReadonlySet { - return new Set(value.toLowerCase().match(/[\p{L}\p{N}]+/gu) ?? []); + return new Set( + value + .normalize('NFKC') + .toLowerCase() + .match(/[\p{L}\p{M}\p{N}]+/gu) ?? [], + ); } function jaccardScore(queryTerms: ReadonlySet, chunkTerms: ReadonlySet): number { diff --git a/packages/core/tests/rag-runtime.test.ts b/packages/core/tests/rag-runtime.test.ts index 28bca45e..3bebf945 100644 --- a/packages/core/tests/rag-runtime.test.ts +++ b/packages/core/tests/rag-runtime.test.ts @@ -1,5 +1,8 @@ +import type { RagSemanticEvalFact } from '../src/index.js'; import { createInMemoryRetriever, + evaluateRagEvalContract, + hashRetrievedChunkText, InMemoryRagCorpus, MAX_IN_MEMORY_RAG_TOP_K, retrieveFromInMemoryCorpus, @@ -161,7 +164,256 @@ describe('RAG in-memory runtime retrieval', () => { ]); expect([...tokenizeForRetrieval('résumé 日本語')]).toEqual(['résumé', '日本語']); + expect([...tokenizeForRetrieval('résumé')]).toEqual(['résumé']); expect(retrieveFromInMemoryCorpus(corpus, 'résumé').chunks[0]?.id).toBe('resume'); + expect(retrieveFromInMemoryCorpus(corpus, 'résumé').chunks[0]?.id).toBe('resume'); expect(retrieveFromInMemoryCorpus(corpus, '日本語').chunks[0]?.id).toBe('jp'); }); }); + +describe('RAG eval runtime contracts', () => { + test('evaluates passing RAG eval cases against retrieved chunks', () => { + const corpus = new InMemoryRagCorpus([ + { + id: 'refunds', + text: 'refund policy', + source: 'docs/refunds.md', + citation: { uri: 'docs/refunds.md', locator: 'L1-L2' }, + metadata: { section: 'policy' }, + }, + { + id: 'policy', + text: 'policy details', + source: 'docs/policies.md', + citation: { uri: 'docs/policies.md' }, + }, + ]); + const refundHash = hashRetrievedChunkText('refund policy'); + expect(refundHash).toMatch(/^[a-f0-9]{32}$/); + const evalFact: RagSemanticEvalFact = { + name: 'Faithfulness', + ragName: 'AnswerDocs', + mode: 'contract', + cases: [ + { + name: 'refunds', + ragName: 'AnswerDocs', + evalName: 'Faithfulness', + query: 'refund policy', + tags: ['smoke'], + expected: { topK: 1, minScore: 0.25, sources: ['docs/refunds.md'] }, + asserts: [ + assertFact('scoreGte', 0.25), + assertFact('sourceGlob', 'docs/*.md'), + assertFact('contains', 'refund'), + assertFact('uniqueSourcesGte', 1), + assertFact('chunkCountEq', 1), + assertFact('citesRequired', true), + assertFact('factId', 'AnswerDocs:Faithfulness:refunds'), + assertFact('chunkHash', refundHash), + assertFact('latencyLte', 1), + ], + }, + ], + }; + let now = 10; + + const result = evaluateRagEvalContract(evalFact, createInMemoryRetriever(corpus), { + now: () => now++, + }); + + expect(result.passed).toBe(true); + expect(result.caseCount).toBe(1); + expect(result.passedAssertionCount).toBe(result.assertionCount); + expect(result.cases[0]?.retrieveOptions).toEqual({ topK: 1, minScore: 0.25 }); + expect(result.cases[0]?.assertions.map((assertion) => assertion.code)).toEqual( + new Array(result.cases[0]?.assertions.length).fill('PASS'), + ); + expect(JSON.parse(JSON.stringify(result))).toEqual(result); + }); + + test('reports failing RAG eval contracts and retriever errors as structured diagnostics', () => { + const corpus = new InMemoryRagCorpus([ + { id: 'shipping', text: 'shipping details', source: 'docs/shipping.md', citation: { uri: 'docs/shipping.md' } }, + ]); + const evalFact: RagSemanticEvalFact = { + name: 'Faithfulness', + ragName: 'AnswerDocs', + mode: 'contract', + cases: [ + { + name: 'refunds', + query: 'refund policy', + tags: [], + expected: { chunkCount: 1, sources: ['docs/refunds.md'] }, + asserts: [ + assertFact('scoreGte', 0.5), + assertFact('sourceEq', 'docs/refunds.md'), + assertFact('scoreLte', 0.1), + { ...assertFact('contains', ''), value: '' }, + { ...assertFact('unknownKind', 'x'), kind: 'unknownKind' }, + ], + }, + ], + }; + + const result = evaluateRagEvalContract(evalFact, createInMemoryRetriever(corpus)); + const errorResult = evaluateRagEvalContract(evalFact, () => { + throw new Error('offline'); + }); + + expect(result.passed).toBe(false); + expect(result.cases[0]?.assertions).toEqual( + expect.arrayContaining([ + expect.objectContaining({ kind: 'expected.chunkCount', passed: false, code: 'ASSERTION_FAIL' }), + expect.objectContaining({ kind: 'expected.sources', passed: false, code: 'ASSERTION_FAIL' }), + expect.objectContaining({ kind: 'contains', required: false, passed: false, code: 'INVALID_ASSERTION' }), + expect.objectContaining({ kind: 'unknownKind', required: false, passed: false, code: 'UNSUPPORTED_ASSERTION' }), + ]), + ); + expect(errorResult.cases[0]?.assertions).toEqual([ + expect.objectContaining({ kind: 'retriever', passed: false, code: 'RETRIEVER_ERROR' }), + ]); + const missingChunksResult = evaluateRagEvalContract( + evalFact, + () => + ({ + query: 'refund policy', + }) as unknown as ReturnType>, + ); + const malformedChunkResult = evaluateRagEvalContract( + evalFact, + () => + ({ + query: 'refund policy', + chunks: [{ id: 'bad' }], + }) as unknown as ReturnType>, + ); + const invalidScoreAndCitationResult = evaluateRagEvalContract( + evalFact, + () => + ({ + query: 'refund policy', + chunks: [{ id: 'bad', text: 'bad', score: Number.NaN, source: 'docs/bad.md', citation: { uri: 1 } }], + }) as unknown as ReturnType>, + ); + + for (const invalidResult of [missingChunksResult, malformedChunkResult, invalidScoreAndCitationResult]) { + expect(invalidResult).toEqual(expect.objectContaining({ passed: false })); + expect(invalidResult.cases[0]?.assertions).toEqual([ + expect.objectContaining({ kind: 'retriever', passed: false, code: 'RETRIEVER_ERROR' }), + ]); + } + }); + + test('handles empty and assertion-less eval facts without crashing', () => { + const corpus = new InMemoryRagCorpus([{ id: 'refunds', text: 'refund policy', source: 'docs/refunds.md' }]); + + expect(evaluateRagEvalContract({ name: 'Empty', cases: [] }, createInMemoryRetriever(corpus))).toEqual( + expect.objectContaining({ passed: false, caseCount: 0 }), + ); + expect( + evaluateRagEvalContract( + { + name: 'NoAsserts', + cases: [ + { + name: 'refunds', + query: 'refund policy', + tags: [], + expected: {}, + } as unknown as NonNullable[number], + ], + }, + createInMemoryRetriever(corpus), + ), + ).toEqual(expect.objectContaining({ passed: true, caseCount: 1 })); + }); + + test('treats non-required assertion failures as advisory diagnostics', () => { + const corpus = new InMemoryRagCorpus([ + { id: 'refunds', text: 'refund policy', source: 'docs/refunds.md', citation: { uri: 'docs/refunds.md' } }, + ]); + const optionalFailure = evaluateRagEvalContract( + { + name: 'Advisory', + ragName: 'AnswerDocs', + cases: [ + { + name: 'refunds', + query: 'refund policy', + tags: [], + expected: { chunkCount: 1 }, + asserts: [assertFact('sourceEq', 'docs/missing.md')], + }, + ], + }, + createInMemoryRetriever(corpus), + ); + const requiredFailure = evaluateRagEvalContract( + { + name: 'Required', + ragName: 'AnswerDocs', + cases: [ + { + name: 'refunds', + query: 'refund policy', + tags: [], + expected: { chunkCount: 1 }, + asserts: [{ ...assertFact('sourceEq', 'docs/missing.md'), required: true }], + }, + ], + }, + createInMemoryRetriever(corpus), + ); + + expect(optionalFailure).toEqual(expect.objectContaining({ passed: true, passedCaseCount: 1 })); + expect(optionalFailure.cases[0]?.assertions).toEqual( + expect.arrayContaining([ + expect.objectContaining({ kind: 'sourceEq', required: false, passed: false, code: 'ASSERTION_FAIL' }), + ]), + ); + expect(requiredFailure).toEqual(expect.objectContaining({ passed: false, passedCaseCount: 0 })); + expect(requiredFailure.cases[0]?.assertions).toEqual( + expect.arrayContaining([ + expect.objectContaining({ kind: 'sourceEq', required: true, passed: false, code: 'ASSERTION_FAIL' }), + ]), + ); + }); +}); + +function assertFact(kind: string, value: string | number | boolean) { + return { + kind, + target: ragAssertTarget(kind), + op: ragAssertOp(kind), + value, + required: false, + }; +} + +function ragAssertTarget(kind: string) { + if (kind === 'uniqueSourcesGte' || kind === 'chunkCountEq') return 'retrieved-chunks' as const; + if (kind === 'latencyLte') return 'latency' as const; + if (kind === 'citesRequired') return 'grounding' as const; + return 'retrieved-chunk' as const; +} + +function ragAssertOp(kind: string) { + switch (kind) { + case 'scoreGte': + case 'uniqueSourcesGte': + return 'gte' as const; + case 'scoreLte': + case 'latencyLte': + return 'lte' as const; + case 'contains': + return 'contains' as const; + case 'sourceGlob': + return 'glob' as const; + case 'citesRequired': + return 'present' as const; + default: + return 'eq' as const; + } +} diff --git a/packages/core/tests/type-guards.test.ts b/packages/core/tests/type-guards.test.ts index 7af95c8e..51362cb5 100644 --- a/packages/core/tests/type-guards.test.ts +++ b/packages/core/tests/type-guards.test.ts @@ -51,7 +51,7 @@ describe('Type guards (Slice 2d)', () => { // Gemini review of slice 2d: confirm method-in-class path also preserves // predicate return types via emitClassBody's emitTypeAnnotation route. const src = - 'class name=User\n method name=isAdmin returns="this is AdminUser"\n handler <<<\n return this.role === "admin";\n >>>'; + 'class name=User\n field name=role type=string\n method name=isAdmin returns="this is AdminUser"\n handler <<<\n return this.role === "admin";\n >>>'; const out = gen(src); expect(out).toContain('export class User {'); expect(out).toContain('isAdmin(): this is AdminUser {'); From 479a88c30a1168adbcf4cc6416835407c9a8267c Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 00:09:28 +0200 Subject: [PATCH 26/46] feat(core): add rag runtime provenance --- packages/core/src/index.ts | 11 ++ packages/core/src/rag-runtime.ts | 230 +++++++++++++++++++++- packages/core/src/semantic-validator.ts | 4 +- packages/core/tests/rag-runtime.test.ts | 250 ++++++++++++++++++++++++ 4 files changed, 490 insertions(+), 5 deletions(-) diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 3fa0ae67..2f9110f8 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -431,6 +431,7 @@ export type { RagAssertionKind } from './rag-assertions.js'; export { RAG_ASSERTION_KIND_SET, RAG_ASSERTION_KINDS } from './rag-assertions.js'; export type { InMemoryRagRetriever, + ProvenancedRetrieveResult, RagChunkInput, RagCitation, RagContractRetriever, @@ -439,18 +440,25 @@ export type { RagEvalCaseResult, RagEvalContractOptions, RagEvalContractResult, + RagMcpRetrieveProvenanceMapping, + RagRuntimeProvenance, + RagRuntimeProvenanceOptions, + RagRuntimeProvenanceStatus, RetrievedChunk, RetrieveOptions, RetrieveResult, } from './rag-runtime.js'; export { createInMemoryRetriever, + createRagRuntimeProvenance, evaluateRagEvalContract, hashRetrievedChunkText, InMemoryRagCorpus, MAX_IN_MEMORY_RAG_TOP_K, + ragMcpRetrieveProvenanceMapping, retrieveFromInMemoryCorpus, tokenizeForRetrieval, + withRagRuntimeProvenance, } from './rag-runtime.js'; export type { ParserHintsConfig } from './runtime.js'; // Runtime (instance-based state) @@ -498,6 +506,7 @@ export type { RagSemanticFacts, RagSemanticGroundingFact, RagSemanticLocation, + RagSemanticMcpRetrievalFact, RagSemanticPipelineFact, RagSemanticRetrieverFact, RagSemanticSourceFact, @@ -506,6 +515,8 @@ export type { export { collectClassSemanticFacts, collectRagSemanticFacts, + RAG_MCP_RETRIEVE_OUTPUT_ITEM_SHAPE, + RAG_MCP_RETRIEVE_OUTPUT_SHAPE, validateClassSemantics, validateRagSemantics, validateSemantics, diff --git a/packages/core/src/rag-runtime.ts b/packages/core/src/rag-runtime.ts index 8bcb6294..a7af688e 100644 --- a/packages/core/src/rag-runtime.ts +++ b/packages/core/src/rag-runtime.ts @@ -1,4 +1,10 @@ -import type { RagSemanticEvalAssertFact, RagSemanticEvalCaseFact, RagSemanticEvalFact } from './semantic-validator.js'; +import type { + RagSemanticEvalAssertFact, + RagSemanticEvalCaseFact, + RagSemanticEvalFact, + RagSemanticMcpRetrievalFact, +} from './semantic-validator.js'; +import { RAG_MCP_RETRIEVE_OUTPUT_ITEM_SHAPE, RAG_MCP_RETRIEVE_OUTPUT_SHAPE } from './semantic-validator.js'; export interface RagCitation { readonly uri?: string; @@ -29,7 +35,7 @@ export interface RetrieveOptions { export interface RetrieveResult { readonly query: string; - readonly chunks: RetrievedChunk[]; + readonly chunks: readonly RetrievedChunk[]; } export type InMemoryRagRetriever = (query: string, options?: RetrieveOptions) => RetrieveResult; @@ -37,6 +43,52 @@ export type RagContractRetriever = (query: string, options?: RetrieveOptions) => export const MAX_IN_MEMORY_RAG_TOP_K = 1000; +export type RagRuntimeProvenanceStatus = 'success' | 'retriever_error' | 'eval_failed'; + +export interface RagRuntimeProvenance { + readonly runId: string; + readonly retrieverName?: string; + readonly targetKind?: 'retriever' | 'rag'; + readonly targetName?: string; + readonly query: string; + readonly retrieveOptions: RetrieveOptions; + readonly citationsRequired: boolean; + readonly startedAtMs: number; + readonly durationMs: number; + readonly chunkCount: number; + readonly chunkHashes: readonly string[]; + readonly sources: readonly string[]; + readonly contractStatus: RagRuntimeProvenanceStatus; +} + +export interface RagRuntimeProvenanceOptions { + readonly runId?: string; + readonly retrieverName?: string; + readonly targetKind?: 'retriever' | 'rag'; + readonly targetName?: string; + readonly retrieveOptions?: RetrieveOptions; + readonly citationsRequired?: boolean; + readonly startedAtMs?: number; + readonly durationMs?: number; + readonly contractStatus?: RagRuntimeProvenanceStatus; +} + +export interface ProvenancedRetrieveResult extends RetrieveResult { + readonly provenance: RagRuntimeProvenance; +} + +export interface RagMcpRetrieveProvenanceMapping { + readonly outputShape?: string; + readonly outputItemShape?: string; + readonly citationField?: string; + readonly sourceField?: string; + readonly scoreField?: string; + readonly provenance?: string; + readonly citationsRequired: boolean; + readonly contractStatus: RagSemanticMcpRetrievalFact['contractStatus']; + readonly compatible: boolean; +} + export type RagEvalAssertionCode = | 'PASS' | 'ASSERTION_FAIL' @@ -153,6 +205,87 @@ export function retrieveFromInMemoryCorpus( return corpus.retrieve(query, options); } +export function createRagRuntimeProvenance( + result: RetrieveResult, + options: RagRuntimeProvenanceOptions = {}, +): RagRuntimeProvenance { + const validResult = validateRetrieveResult(result); + const retrieveOptions = normalizeProvenanceRetrieveOptions(options.retrieveOptions); + const chunkHashes = validResult.chunks.map((chunk) => hashRetrievedChunkText(chunk.text)); + const chunkProvenance = validResult.chunks.map((chunk, index) => ({ + index, + id: chunk.id, + source: chunk.source, + score: chunk.score, + citation: { ...chunk.citation }, + textHash: chunkHashes[index], + })); + const sources = uniqueOrdered(validResult.chunks.map((chunk) => chunk.source)); + const startedAtMs = options.startedAtMs ?? Date.now(); + const durationMs = options.durationMs ?? 0; + const contractStatus = options.contractStatus ?? 'success'; + return { + runId: + options.runId ?? + hashRetrievedChunkText( + stableStringify({ + retrieverName: options.retrieverName, + targetKind: options.targetKind, + targetName: options.targetName, + query: validResult.query, + retrieveOptions, + citationsRequired: options.citationsRequired ?? false, + chunks: chunkProvenance, + contractStatus, + }), + ), + ...optionalStringValue('retrieverName', options.retrieverName), + ...(options.targetKind ? { targetKind: options.targetKind } : {}), + ...optionalStringValue('targetName', options.targetName), + query: validResult.query, + retrieveOptions, + citationsRequired: options.citationsRequired ?? false, + startedAtMs, + durationMs, + chunkCount: validResult.chunks.length, + chunkHashes, + sources, + contractStatus, + }; +} + +export function withRagRuntimeProvenance( + result: RetrieveResult, + options: RagRuntimeProvenanceOptions = {}, +): ProvenancedRetrieveResult { + const validResult = validateRetrieveResult(result); + return { + query: validResult.query, + chunks: validResult.chunks.map(cloneRetrievedChunk), + provenance: createRagRuntimeProvenance(validResult, options), + }; +} + +export function ragMcpRetrieveProvenanceMapping( + retrieval: RagSemanticMcpRetrievalFact | null | undefined, +): RagMcpRetrieveProvenanceMapping { + if (!retrieval) throw new Error('KERN RAG MCP provenance mapping requires a retrieval fact.'); + return { + ...optionalStringValue('outputShape', retrieval.outputShape), + ...optionalStringValue('outputItemShape', retrieval.outputItemShape), + ...optionalStringValue('citationField', retrieval.citationField), + ...optionalStringValue('sourceField', retrieval.sourceField), + ...optionalStringValue('scoreField', retrieval.scoreField), + ...optionalStringValue('provenance', retrieval.provenance), + citationsRequired: retrieval.effectiveRequiresCitations, + contractStatus: retrieval.contractStatus, + compatible: + retrieval.contractStatus === 'valid' && + retrieval.outputShape === RAG_MCP_RETRIEVE_OUTPUT_SHAPE && + (retrieval.outputItemShape === undefined || retrieval.outputItemShape === RAG_MCP_RETRIEVE_OUTPUT_ITEM_SHAPE), + }; +} + export function evaluateRagEvalContract( evaluation: RagSemanticEvalFact, retriever: RagContractRetriever, @@ -580,9 +713,92 @@ function optionalAssertionValue(key: 'expected' | 'actual', value: unknown): Rec return value === undefined ? {} : { [key]: value }; } +function normalizeProvenanceRetrieveOptions(options: RetrieveOptions | undefined): RetrieveOptions { + if (options === undefined) return {}; + const out: { topK?: number; minScore?: number } = {}; + if (options.topK !== undefined) { + if (!Number.isInteger(options.topK) || options.topK <= 0 || options.topK > MAX_IN_MEMORY_RAG_TOP_K) { + throw new Error(`KERN RAG runtime topK must be a positive integer up to ${MAX_IN_MEMORY_RAG_TOP_K}.`); + } + out.topK = options.topK; + } + if (options.minScore !== undefined) { + if (!Number.isFinite(options.minScore) || options.minScore < 0 || options.minScore > 1) { + throw new Error('KERN RAG runtime minScore must be between 0 and 1.'); + } + out.minScore = options.minScore; + } + return out; +} + +function uniqueOrdered(values: readonly string[]): string[] { + const seen = new Set(); + const out: string[] = []; + for (const value of values) { + if (seen.has(value)) continue; + seen.add(value); + out.push(value); + } + return out; +} + +function stableStringify(value: unknown): string { + return JSON.stringify(stableJsonValue(value, new WeakSet())); +} + +function stableJsonValue(value: unknown, seen: WeakSet): unknown { + if (typeof value === 'bigint') return value.toString(); + if (typeof value === 'symbol') return value.description ?? value.toString(); + if (typeof value === 'function') return `[Function:${value.name || 'anonymous'}]`; + if (value === null || typeof value !== 'object') return value; + if (seen.has(value)) return '[Circular]'; + seen.add(value); + try { + if (Array.isArray(value)) return value.map((item) => stableJsonValue(item, seen)); + if (value instanceof Date) return value.toISOString(); + if (value instanceof RegExp) return value.toString(); + if (value instanceof Map) { + return Array.from(value.entries()) + .map(([key, entry]) => [stableJsonValue(key, seen), stableJsonValue(entry, seen)] as const) + .sort(([left], [right]) => stableStringCompare(left, right)); + } + if (value instanceof Set) { + return Array.from(value.values()) + .map((entry) => stableJsonValue(entry, seen)) + .sort(stableStringCompare); + } + if (isPlainMetadataObject(value)) { + const out: Record = {}; + for (const key of Object.keys(value).sort()) { + const entry = value[key]; + if (entry !== undefined) out[key] = stableJsonValue(entry, seen); + } + return out; + } + return String(value); + } finally { + seen.delete(value); + } +} + +function stableStringCompare(left: unknown, right: unknown): number { + const leftText = String(left); + const rightText = String(right); + return leftText < rightText ? -1 : leftText > rightText ? 1 : 0; +} + function validateRetrieveResult(result: RetrieveResult): RetrieveResult { - if (!result || !Array.isArray(result.chunks)) throw new Error('retriever result must include chunks array.'); + if (!result || typeof result.query !== 'string' || !Array.isArray(result.chunks)) { + throw new Error('retriever result must include query string and chunks array.'); + } for (const [index, chunk] of result.chunks.entries()) { + if ( + chunk && + typeof chunk.score === 'number' && + (!Number.isFinite(chunk.score) || chunk.score < 0 || chunk.score > 1) + ) { + throw new Error(`retriever chunk at index ${index} score must be between 0 and 1.`); + } if ( !chunk || typeof chunk.id !== 'string' || @@ -639,6 +855,14 @@ function retrievedChunk(chunk: RagChunkInput, score: number): RetrievedChunk { }; } +function cloneRetrievedChunk(chunk: RetrievedChunk): RetrievedChunk { + return { + ...chunk, + citation: { ...chunk.citation }, + ...(chunk.metadata ? { metadata: cloneMetadata(chunk.metadata) } : {}), + }; +} + function cloneChunkInput(chunk: RagChunkInput): RagChunkInput { return { ...chunk, diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index de067972..4a59e838 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -2319,8 +2319,8 @@ interface ClassMemberInfo { } const BUILTIN_CLASS_BASES = new Set(['Error']); -const RAG_MCP_RETRIEVE_OUTPUT_SHAPE = 'RetrievedChunk[]'; -const RAG_MCP_RETRIEVE_OUTPUT_ITEM_SHAPE = 'RetrievedChunk'; +export const RAG_MCP_RETRIEVE_OUTPUT_SHAPE = 'RetrievedChunk[]'; +export const RAG_MCP_RETRIEVE_OUTPUT_ITEM_SHAPE = 'RetrievedChunk'; const BODY_EXPRESSION_PROPS = [ 'value', 'expr', diff --git a/packages/core/tests/rag-runtime.test.ts b/packages/core/tests/rag-runtime.test.ts index 3bebf945..a39c5191 100644 --- a/packages/core/tests/rag-runtime.test.ts +++ b/packages/core/tests/rag-runtime.test.ts @@ -1,12 +1,15 @@ import type { RagSemanticEvalFact } from '../src/index.js'; import { createInMemoryRetriever, + createRagRuntimeProvenance, evaluateRagEvalContract, hashRetrievedChunkText, InMemoryRagCorpus, MAX_IN_MEMORY_RAG_TOP_K, + ragMcpRetrieveProvenanceMapping, retrieveFromInMemoryCorpus, tokenizeForRetrieval, + withRagRuntimeProvenance, } from '../src/index.js'; describe('RAG in-memory runtime retrieval', () => { @@ -382,6 +385,253 @@ describe('RAG eval runtime contracts', () => { }); }); +describe('RAG runtime provenance envelopes', () => { + test('creates deterministic provenance for retrieved chunks', () => { + const corpus = new InMemoryRagCorpus([ + { id: 'refunds', text: 'refund policy', source: 'docs/refunds.md', citation: { uri: 'docs/refunds.md' } }, + { id: 'shipping', text: 'refund shipping', source: 'docs/refunds.md', citation: { uri: 'docs/refunds.md' } }, + { id: 'policy', text: 'refund terms', source: 'docs/policies.md', citation: { uri: 'docs/policies.md' } }, + ]); + const result = retrieveFromInMemoryCorpus(corpus, 'refund', { topK: 3, minScore: 0.25 }); + const firstChunk = result.chunks[0]; + if (!firstChunk) throw new Error('missing provenance fixture chunk'); + + const provenance = createRagRuntimeProvenance(result, { + retrieverName: 'DocsSearch', + targetKind: 'rag', + targetName: 'AnswerDocs', + retrieveOptions: { minScore: 0.25, topK: 3 }, + citationsRequired: true, + startedAtMs: 100, + durationMs: 7, + }); + const sameProvenance = createRagRuntimeProvenance(result, { + targetName: 'AnswerDocs', + targetKind: 'rag', + retrieverName: 'DocsSearch', + retrieveOptions: { topK: 3, minScore: 0.25 }, + citationsRequired: true, + startedAtMs: 999, + durationMs: 1, + }); + const differentQuery = createRagRuntimeProvenance( + { query: 'shipping', chunks: result.chunks }, + { retrieverName: 'DocsSearch', targetKind: 'rag', targetName: 'AnswerDocs' }, + ); + const differentSource = createRagRuntimeProvenance( + { + query: 'refund', + chunks: [ + { + ...firstChunk, + id: 'mirror', + source: 'docs/mirror.md', + citation: { uri: 'docs/mirror.md' }, + }, + ], + }, + { + retrieverName: 'DocsSearch', + targetKind: 'rag', + targetName: 'AnswerDocs', + retrieveOptions: { topK: 3, minScore: 0.25 }, + }, + ); + const citationUriThenLocator = createRagRuntimeProvenance( + { + query: 'refund', + chunks: [ + { + ...firstChunk, + citation: { uri: 'docs/refunds.md', locator: 'L1' }, + }, + ], + }, + { retrieverName: 'DocsSearch', targetKind: 'rag', targetName: 'AnswerDocs' }, + ); + const citationLocatorThenUri = createRagRuntimeProvenance( + { + query: 'refund', + chunks: [ + { + ...firstChunk, + citation: { locator: 'L1', uri: 'docs/refunds.md' }, + }, + ], + }, + { retrieverName: 'DocsSearch', targetKind: 'rag', targetName: 'AnswerDocs' }, + ); + + expect(provenance).toEqual( + expect.objectContaining({ + retrieverName: 'DocsSearch', + targetKind: 'rag', + targetName: 'AnswerDocs', + query: 'refund', + retrieveOptions: { topK: 3, minScore: 0.25 }, + citationsRequired: true, + startedAtMs: 100, + durationMs: 7, + chunkCount: 3, + sources: ['docs/policies.md', 'docs/refunds.md'], + contractStatus: 'success', + }), + ); + expect(provenance.runId).toMatch(/^[a-f0-9]{32}$/); + expect(sameProvenance.runId).toBe(provenance.runId); + expect(differentQuery.runId).not.toBe(provenance.runId); + expect(differentSource.runId).not.toBe(provenance.runId); + expect(citationUriThenLocator.runId).toBe(citationLocatorThenUri.runId); + expect(provenance.chunkHashes).toHaveLength(result.chunks.length); + expect(JSON.parse(JSON.stringify(provenance))).toEqual(provenance); + }); + + test('records only retrieve options supplied for provenance', () => { + const result = { + query: 'refund', + chunks: [ + { + id: 'refunds', + text: 'refund policy', + score: 0.9, + source: 'docs/refunds.md', + citation: { uri: 'docs/refunds.md' }, + }, + ], + }; + + expect(createRagRuntimeProvenance(result).retrieveOptions).toEqual({}); + expect(createRagRuntimeProvenance(result, { retrieveOptions: { topK: 1 } }).retrieveOptions).toEqual({ topK: 1 }); + }); + + test('wraps retrieval results with provenance without mutating chunks', () => { + const result = { + query: 'refund', + chunks: [ + { + id: 'refunds', + text: 'refund policy', + score: 0.9, + source: 'docs/refunds.md', + citation: { uri: 'docs/refunds.md', locator: 'L1' }, + metadata: { section: 'policy' }, + }, + ], + }; + + const wrapped = withRagRuntimeProvenance(result, { + retrieverName: 'DocsSearch', + targetKind: 'retriever', + targetName: 'DocsSearch', + retrieveOptions: { topK: 1 }, + }); + (wrapped.chunks[0]?.metadata as Record).section = 'mutated'; + (wrapped.chunks[0]?.citation as Record).uri = 'mutated'; + + expect(result.chunks[0]?.metadata).toEqual({ section: 'policy' }); + expect(result.chunks[0]?.citation).toEqual({ uri: 'docs/refunds.md', locator: 'L1' }); + expect(wrapped.provenance).toEqual( + expect.objectContaining({ + targetKind: 'retriever', + targetName: 'DocsSearch', + chunkCount: 1, + sources: ['docs/refunds.md'], + }), + ); + }); + + test('validates malformed retrieval results before provenance creation', () => { + expect(() => + createRagRuntimeProvenance({ + query: 1 as unknown as string, + chunks: [], + }), + ).toThrow('query string'); + expect(() => + createRagRuntimeProvenance({ + query: 'refund', + chunks: [ + { + id: 'bad', + text: 'bad', + score: 1.5, + source: 'docs/bad.md', + citation: { uri: 'docs/bad.md' }, + }, + ], + }), + ).toThrow('score'); + expect(() => createRagRuntimeProvenance({ query: 'refund', chunks: [] }, { retrieveOptions: { topK: 0 } })).toThrow( + 'topK', + ); + expect(() => + createRagRuntimeProvenance( + { query: 'refund', chunks: [] }, + { retrieveOptions: { topK: MAX_IN_MEMORY_RAG_TOP_K + 1 } }, + ), + ).toThrow('topK'); + expect(() => + createRagRuntimeProvenance({ query: 'refund', chunks: [] }, { retrieveOptions: { minScore: Number.NaN } }), + ).toThrow('minScore'); + }); + + test('maps MCP retrieve facts to provenance-compatible output contracts', () => { + expect( + ragMcpRetrieveProvenanceMapping({ + targetKind: 'rag', + targetName: 'AnswerDocs', + outputShape: 'RetrievedChunk[]', + outputItemShape: 'RetrievedChunk', + citationField: 'citation', + sourceField: 'uri', + scoreField: 'score', + provenance: 'source', + requireGrounding: true, + effectiveRequiresCitations: true, + contractStatus: 'valid', + }), + ).toEqual({ + outputShape: 'RetrievedChunk[]', + outputItemShape: 'RetrievedChunk', + citationField: 'citation', + sourceField: 'uri', + scoreField: 'score', + provenance: 'source', + citationsRequired: true, + contractStatus: 'valid', + compatible: true, + }); + expect( + ragMcpRetrieveProvenanceMapping({ + targetKind: 'retriever', + targetName: 'DocsSearch', + requireGrounding: false, + effectiveRequiresCitations: false, + contractStatus: 'absent', + }), + ).toEqual({ citationsRequired: false, contractStatus: 'absent', compatible: false }); + expect( + ragMcpRetrieveProvenanceMapping({ + targetKind: 'retriever', + targetName: 'DocsSearch', + outputShape: 'RetrievedChunk[]', + outputItemShape: 'OtherChunk', + requireGrounding: false, + effectiveRequiresCitations: false, + contractStatus: 'valid', + }), + ).toEqual({ + outputShape: 'RetrievedChunk[]', + outputItemShape: 'OtherChunk', + citationsRequired: false, + contractStatus: 'valid', + compatible: false, + }); + expect(() => ragMcpRetrieveProvenanceMapping(undefined)).toThrow('retrieval fact'); + expect(() => ragMcpRetrieveProvenanceMapping(null)).toThrow('retrieval fact'); + }); +}); + function assertFact(kind: string, value: string | number | boolean) { return { kind, From c9e1737f40861d4bb72e1a501d464784e1474c65 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 00:47:35 +0200 Subject: [PATCH 27/46] feat(core): add rag answer contracts --- packages/core/src/index.ts | 9 + packages/core/src/rag-runtime.ts | 314 ++++++++++++++++++ packages/core/src/semantic-substrate.ts | 136 +++++++- packages/core/tests/rag-runtime.test.ts | 235 +++++++++++++ .../core/tests/semantic-substrate.test.ts | 34 +- 5 files changed, 725 insertions(+), 3 deletions(-) diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 2f9110f8..4f432b34 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -432,6 +432,12 @@ export { RAG_ASSERTION_KIND_SET, RAG_ASSERTION_KINDS } from './rag-assertions.js export type { InMemoryRagRetriever, ProvenancedRetrieveResult, + RagAnswerContract, + RagAnswerContractDiagnostic, + RagAnswerContractDiagnosticCode, + RagAnswerContractResult, + RagAnswerContractStatus, + RagAnswerGroundingSpan, RagChunkInput, RagCitation, RagContractRetriever, @@ -451,6 +457,7 @@ export type { export { createInMemoryRetriever, createRagRuntimeProvenance, + evaluateRagAnswerContract, evaluateRagEvalContract, hashRetrievedChunkText, InMemoryRagCorpus, @@ -475,6 +482,8 @@ export type { KernSemanticCoreType, KernSemanticIrContract, KernSemanticPrimitive, + KernSemanticRagAnswerReviewFact, + KernSemanticRagAnswerReviewStatus, KernSemanticStdlibOperation, KernSemanticSubstrate, KernSemanticSubstrateSource, diff --git a/packages/core/src/rag-runtime.ts b/packages/core/src/rag-runtime.ts index a7af688e..fa2295f1 100644 --- a/packages/core/src/rag-runtime.ts +++ b/packages/core/src/rag-runtime.ts @@ -89,6 +89,61 @@ export interface RagMcpRetrieveProvenanceMapping { readonly compatible: boolean; } +export type RagAnswerContractStatus = 'grounded' | 'partially_grounded' | 'ungrounded' | 'invalid'; + +export type RagAnswerContractDiagnosticCode = + | 'ANSWER_EMPTY' + | 'QUERY_MISMATCH' + | 'RETRIEVER_ERROR' + | 'PROVENANCE_MISMATCH' + | 'SPAN_INVALID' + | 'SPAN_UNGROUNDED' + | 'CHUNK_REF_UNKNOWN' + | 'CITATION_REQUIRED' + | 'GROUNDING_BELOW_THRESHOLD'; + +export interface RagAnswerGroundingSpan { + readonly start: number; + readonly end: number; + readonly chunkIds: readonly string[]; + readonly required?: boolean; +} + +export interface RagAnswerContract { + readonly id?: string; + readonly ragName?: string; + readonly prompt?: string; + readonly query: string; + readonly answer: string; + readonly retrieval: RetrieveResult | ProvenancedRetrieveResult; + readonly provenance?: RagRuntimeProvenance; + readonly groundingSpans?: readonly RagAnswerGroundingSpan[]; + readonly requireCitations?: boolean; + readonly minGroundingCoverage?: number; +} + +export interface RagAnswerContractDiagnostic { + readonly code: RagAnswerContractDiagnosticCode; + readonly message: string; + readonly spanIndex?: number; + readonly chunkId?: string; +} + +export interface RagAnswerContractResult { + readonly id?: string; + readonly ragName?: string; + readonly query: string; + readonly passed: boolean; + readonly status: RagAnswerContractStatus; + readonly groundingCoverage: number; + readonly groundedChars: number; + readonly answerChars: number; + readonly citedChunkIds: readonly string[]; + readonly sources: readonly string[]; + readonly provenance?: RagRuntimeProvenance; + readonly diagnostics: readonly RagAnswerContractDiagnostic[]; +} + export type RagEvalAssertionCode = | 'PASS' | 'ASSERTION_FAIL' @@ -286,6 +341,155 @@ export function ragMcpRetrieveProvenanceMapping( }; } +export function evaluateRagAnswerContract(contract: RagAnswerContract): RagAnswerContractResult { + const base = { + ...optionalStringValue('id', contract.id), + ...optionalStringValue('ragName', contract.ragName), + query: typeof contract.query === 'string' ? contract.query : '', + }; + let retrieval: RetrieveResult; + try { + retrieval = validateRetrieveResult(contract.retrieval); + } catch (error) { + return { + ...base, + passed: false, + status: 'invalid', + groundingCoverage: 0, + groundedChars: 0, + answerChars: 0, + citedChunkIds: [], + sources: [], + ...(contract.provenance ? { provenance: contract.provenance } : {}), + diagnostics: [ + { + code: 'RETRIEVER_ERROR', + message: `RAG answer contract retrieval failed: ${error instanceof Error ? error.message : String(error)}.`, + }, + ], + }; + } + + const answer = typeof contract.answer === 'string' ? contract.answer : ''; + const answerChars = countAnswerChars(answer); + const diagnostics: RagAnswerContractDiagnostic[] = []; + if (typeof contract.answer !== 'string' || answerChars === 0) { + diagnostics.push({ code: 'ANSWER_EMPTY', message: 'RAG answer contract answer must be a non-empty string.' }); + } + + const minGroundingCoverage = normalizeGroundingCoverageThreshold(contract.minGroundingCoverage); + const provenance = contract.provenance ?? retrieveResultProvenance(contract.retrieval); + if (contract.query !== retrieval.query) { + diagnostics.push({ + code: 'QUERY_MISMATCH', + message: 'RAG answer contract query does not match the retrieval result query.', + }); + } + if (provenance && !provenanceMatchesRetrieval(provenance, retrieval)) { + diagnostics.push({ + code: 'PROVENANCE_MISMATCH', + message: 'RAG answer contract provenance does not match the retrieval result.', + }); + } + + const chunkById = new Map(retrieval.chunks.map((chunk) => [chunk.id, chunk])); + const grounded = new Array(answer.length).fill(false) as boolean[]; + const citedChunkIds = new Set(); + const citationBearingChunkIds = new Set(); + const groundingSpans = Array.isArray(contract.groundingSpans) ? contract.groundingSpans : []; + if (contract.groundingSpans !== undefined && !Array.isArray(contract.groundingSpans)) { + diagnostics.push({ + code: 'SPAN_INVALID', + message: 'RAG answer contract groundingSpans must be an array.', + }); + } + + for (const [spanIndex, span] of groundingSpans.entries()) { + if (!isValidGroundingSpan(span, answer.length)) { + diagnostics.push({ + code: 'SPAN_INVALID', + spanIndex, + message: `RAG answer grounding span at index ${spanIndex} is invalid or outside the answer text.`, + }); + continue; + } + + const validChunkIds: string[] = []; + let spanHasCitation = false; + for (const chunkId of span.chunkIds) { + if (chunkById.has(chunkId)) { + validChunkIds.push(chunkId); + citedChunkIds.add(chunkId); + if (chunkHasCitation(chunkById.get(chunkId))) { + spanHasCitation = true; + citationBearingChunkIds.add(chunkId); + } + } else { + diagnostics.push({ + code: 'CHUNK_REF_UNKNOWN', + spanIndex, + chunkId, + message: `RAG answer grounding span at index ${spanIndex} references unknown chunk '${chunkId}'.`, + }); + } + } + + if (validChunkIds.length === 0) { + diagnostics.push({ + code: span.required ? 'CITATION_REQUIRED' : 'SPAN_UNGROUNDED', + spanIndex, + message: `RAG answer grounding span at index ${spanIndex} has no valid retrieved chunk citation.`, + }); + continue; + } + + if (contract.requireCitations && !spanHasCitation) { + diagnostics.push({ + code: 'CITATION_REQUIRED', + spanIndex, + message: `RAG answer grounding span at index ${spanIndex} requires a non-empty retrieved chunk citation.`, + }); + continue; + } + + for (let index = span.start; index < span.end; index += 1) grounded[index] = true; + } + + const groundedChars = countGroundedAnswerChars(answer, grounded); + const groundingCoverage = answerChars === 0 ? 0 : groundedChars / answerChars; + if ( + answerChars > 0 && + contract.requireCitations && + citationBearingChunkIds.size === 0 && + groundingSpans.length === 0 + ) { + diagnostics.push({ + code: 'CITATION_REQUIRED', + message: 'RAG answer contract requires citations but no retrieved chunks were cited.', + }); + } + if (answerChars > 0 && groundingCoverage < minGroundingCoverage) { + diagnostics.push({ + code: 'GROUNDING_BELOW_THRESHOLD', + message: `RAG answer grounding coverage ${groundingCoverage.toFixed(3)} is below required threshold ${minGroundingCoverage.toFixed(3)}.`, + }); + } + + const passed = diagnostics.length === 0; + return { + ...base, + passed, + status: passed ? 'grounded' : ragAnswerStatus(diagnostics, groundingCoverage), + groundingCoverage, + groundedChars, + answerChars, + citedChunkIds: [...citedChunkIds].sort(stableStringCompare), + sources: uniqueOrdered([...citedChunkIds].map((chunkId) => chunkById.get(chunkId)?.source).filter(isString)), + ...(provenance ? { provenance } : {}), + diagnostics, + }; +} + export function evaluateRagEvalContract( evaluation: RagSemanticEvalFact, retriever: RagContractRetriever, @@ -339,6 +543,101 @@ function normalizeRetrieveOptions(options: RetrieveOptions): Required 1) { + throw new Error('KERN RAG answer contract minGroundingCoverage must be between 0 and 1.'); + } + return value; +} + +function retrieveResultProvenance( + result: RetrieveResult | ProvenancedRetrieveResult, +): RagRuntimeProvenance | undefined { + return result && typeof result === 'object' && 'provenance' in result ? result.provenance : undefined; +} + +function provenanceMatchesRetrieval(provenance: RagRuntimeProvenance, retrieval: RetrieveResult): boolean { + if ( + !provenance || + typeof provenance.query !== 'string' || + typeof provenance.chunkCount !== 'number' || + !Array.isArray(provenance.chunkHashes) || + !Array.isArray(provenance.sources) + ) { + return false; + } + const chunkHashes = retrieval.chunks.map((chunk) => hashRetrievedChunkText(chunk.text)); + const sources = uniqueOrdered(retrieval.chunks.map((chunk) => chunk.source)); + return ( + provenance.query === retrieval.query && + provenance.chunkCount === retrieval.chunks.length && + arraysEqual(provenance.chunkHashes, chunkHashes) && + arraysEqual(provenance.sources, sources) + ); +} + +function chunkHasCitation(chunk: RetrievedChunk | undefined): boolean { + return ( + !!chunk && + ((typeof chunk.citation.uri === 'string' && chunk.citation.uri.trim().length > 0) || + (typeof chunk.citation.locator === 'string' && chunk.citation.locator.trim().length > 0)) + ); +} + +function isValidGroundingSpan(span: RagAnswerGroundingSpan, answerLength: number): boolean { + return ( + !!span && + Number.isInteger(span.start) && + Number.isInteger(span.end) && + span.start >= 0 && + span.end > span.start && + span.end <= answerLength && + Array.isArray(span.chunkIds) && + span.chunkIds.every(isString) + ); +} + +function countAnswerChars(answer: string): number { + if (typeof answer !== 'string') return 0; + let count = 0; + for (let index = 0; index < answer.length; index += 1) { + if (!/\s/u.test(answer[index] ?? '')) count += 1; + } + return count; +} + +function countGroundedAnswerChars(answer: string, grounded: readonly boolean[]): number { + let count = 0; + for (let index = 0; index < answer.length; index += 1) { + if (grounded[index] && !/\s/u.test(answer[index] ?? '')) count += 1; + } + return count; +} + +function ragAnswerStatus( + diagnostics: readonly RagAnswerContractDiagnostic[], + groundingCoverage: number, +): RagAnswerContractStatus { + if ( + diagnostics.some((diagnostic) => + [ + 'ANSWER_EMPTY', + 'CITATION_REQUIRED', + 'QUERY_MISMATCH', + 'RETRIEVER_ERROR', + 'PROVENANCE_MISMATCH', + 'SPAN_INVALID', + 'CHUNK_REF_UNKNOWN', + ].includes(diagnostic.code), + ) + ) { + return 'invalid'; + } + if (groundingCoverage === 0) return 'ungrounded'; + return 'partially_grounded'; +} + function evaluateRagCase( evaluation: RagSemanticEvalFact, evaluationCase: RagSemanticEvalCaseFact, @@ -787,6 +1086,10 @@ function stableStringCompare(left: unknown, right: unknown): number { return leftText < rightText ? -1 : leftText > rightText ? 1 : 0; } +function arraysEqual(left: readonly string[], right: readonly string[]): boolean { + return left.length === right.length && left.every((value, index) => value === right[index]); +} + function validateRetrieveResult(result: RetrieveResult): RetrieveResult { if (!result || typeof result.query !== 'string' || !Array.isArray(result.chunks)) { throw new Error('retriever result must include query string and chunks array.'); @@ -813,6 +1116,13 @@ function validateRetrieveResult(result: RetrieveResult): RetrieveResult { throw new Error(`retriever chunk at index ${index} is not a RetrievedChunk.`); } } + const chunkIds = new Set(); + for (const [index, chunk] of result.chunks.entries()) { + if (chunkIds.has(chunk.id)) { + throw new Error(`retriever chunk at index ${index} duplicates chunk id '${chunk.id}'.`); + } + chunkIds.add(chunk.id); + } return result; } @@ -825,6 +1135,10 @@ function isValidCitation(value: unknown): value is RagCitation { ); } +function isString(value: unknown): value is string { + return typeof value === 'string'; +} + export function tokenizeForRetrieval(value: string): ReadonlySet { return new Set( value diff --git a/packages/core/src/semantic-substrate.ts b/packages/core/src/semantic-substrate.ts index b4bca982..fb2f0758 100644 --- a/packages/core/src/semantic-substrate.ts +++ b/packages/core/src/semantic-substrate.ts @@ -13,7 +13,10 @@ import { type ClassSemanticFacts, collectClassSemanticFacts, collectRagSemanticFacts, + RAG_MCP_RETRIEVE_OUTPUT_ITEM_SHAPE, + RAG_MCP_RETRIEVE_OUTPUT_SHAPE, type RagSemanticFacts, + type RagSemanticMcpRetrievalFact, type SemanticViolation, validateClassSemantics, validateRagSemantics, @@ -79,6 +82,25 @@ export interface KernSemanticValidationSummary { readonly byRule: Readonly>; } +export type KernSemanticRagAnswerReviewStatus = 'ready' | 'incomplete' | 'invalid'; + +export interface KernSemanticRagAnswerReviewFact { + readonly pipelineName: string; + readonly retrieverName: string; + readonly prompt?: string; + readonly answer?: string; + readonly citationsRequired: boolean; + readonly groundingCount: number; + readonly evalCount: number; + readonly evalCaseCount: number; + readonly mcpRetrievalCount: number; + readonly compatibleMcpRetrievalCount: number; + readonly provenanceRequired: boolean; + readonly provenanceComplete: boolean; + readonly validationStatus: KernSemanticRagAnswerReviewStatus; + readonly issues: readonly string[]; +} + export interface KernSemanticSubstrate { readonly schemaVersion: 1; readonly generatedBy: 'kern-semantic-substrate'; @@ -98,6 +120,7 @@ export interface KernSemanticSubstrate { readonly classValidationSummary?: KernSemanticValidationSummary; readonly ragFacts?: RagSemanticFacts; readonly ragValidationSummary?: KernSemanticValidationSummary; + readonly ragAnswerReviewFacts?: readonly KernSemanticRagAnswerReviewFact[]; } export interface BuildKernSemanticSubstrateOptions { @@ -110,6 +133,7 @@ export interface BuildKernSemanticSubstrateOptions { } export function buildKernSemanticSubstrate(options: BuildKernSemanticSubstrateOptions = {}): KernSemanticSubstrate { + const ragFacts = options.documentRag ? collectRagSemanticFacts(options.documentRag) : undefined; const coreTypes = Object.values(CORE_TYPE_CONTRACTS.types).map((contract) => ({ id: `core.type.${contract.name}`, name: contract.name, @@ -159,7 +183,7 @@ export function buildKernSemanticSubstrate(options: BuildKernSemanticSubstrateOp ...(options.documentClasses && options.includeClassValidationSummary ? { classValidationSummary: semanticValidationSummary(options.documentClasses) } : {}), - ...(options.documentRag ? { ragFacts: collectRagSemanticFacts(options.documentRag) } : {}), + ...(ragFacts ? { ragFacts, ragAnswerReviewFacts: buildRagAnswerReviewFacts(ragFacts) } : {}), ...(options.documentRag && options.includeRagValidationSummary ? { ragValidationSummary: ragValidationSummary(options.documentRag) } : {}), @@ -259,6 +283,116 @@ function summarizeSemanticViolations(violations: readonly SemanticViolation[]): return { total: violations.length, byRule }; } +function buildRagAnswerReviewFacts(facts: RagSemanticFacts): KernSemanticRagAnswerReviewFact[] { + const unresolvedRetrievers = new Set(facts.unresolvedRetrieverRefs); + return facts.pipelines.map((pipeline) => { + const citationsRequired = pipeline.citations || pipeline.groundings.some((grounding) => grounding.requireCitations); + const mcpRetrievals = facts.mcpRetrievals.filter( + (retrieval) => retrieval.targetKind === 'rag' && retrieval.targetName === pipeline.name, + ); + const compatibleMcpRetrievals = mcpRetrievals.filter((retrieval) => + isRagAnswerCompatibleMcpRetrieval(retrieval, citationsRequired), + ); + const evalCaseCount = pipeline.evals.reduce((count, evaluation) => count + (evaluation.caseCount ?? 0), 0); + const issues = ragAnswerReviewIssues( + facts, + pipeline.name, + pipeline.retrieverName, + pipeline.prompt, + pipeline.answer, + citationsRequired, + pipeline.groundings.length, + pipeline.groundings.some((grounding) => grounding.requireCitations), + pipeline.evals.length, + evalCaseCount, + mcpRetrievals.length, + compatibleMcpRetrievals.length, + unresolvedRetrievers.has(pipeline.retrieverName), + ); + return { + pipelineName: pipeline.name, + retrieverName: pipeline.retrieverName, + ...optionalStringValue('prompt', pipeline.prompt), + ...optionalStringValue('answer', pipeline.answer), + citationsRequired, + groundingCount: pipeline.groundings.length, + evalCount: pipeline.evals.length, + evalCaseCount, + mcpRetrievalCount: mcpRetrievals.length, + compatibleMcpRetrievalCount: compatibleMcpRetrievals.length, + provenanceRequired: citationsRequired || mcpRetrievals.some((retrieval) => retrieval.requireGrounding), + provenanceComplete: mcpRetrievals.length === 0 || compatibleMcpRetrievals.length === mcpRetrievals.length, + validationStatus: ragAnswerReviewStatus(issues), + issues, + }; + }); +} + +function ragAnswerReviewIssues( + facts: RagSemanticFacts, + pipelineName: string, + retrieverName: string, + prompt: string | undefined, + answer: string | undefined, + citationsRequired: boolean, + groundingCount: number, + hasCitationGrounding: boolean, + evalCount: number, + evalCaseCount: number, + mcpRetrievalCount: number, + compatibleMcpRetrievalCount: number, + unresolvedRetriever: boolean, +): string[] { + const issues: string[] = []; + if (unresolvedRetriever) issues.push(`unresolved-retriever:${retrieverName}`); + if (!prompt && !answer) issues.push('missing-answer-surface'); + if (citationsRequired && groundingCount === 0) issues.push('missing-grounding'); + if (citationsRequired && !hasCitationGrounding) issues.push('missing-citation-grounding'); + if (evalCount === 0) issues.push('missing-eval'); + if (evalCount > 0 && evalCaseCount === 0) issues.push('missing-eval-case'); + if ( + facts.mcpRetrievals.some( + (retrieval) => + retrieval.targetKind === 'rag' && + retrieval.targetName === pipelineName && + retrieval.contractStatus === 'invalid', + ) + ) { + issues.push('invalid-mcp-retrieve-contract'); + } + if (mcpRetrievalCount > 0 && compatibleMcpRetrievalCount === 0) issues.push('missing-compatible-mcp-retrieve'); + return issues; +} + +function ragAnswerReviewStatus(issues: readonly string[]): KernSemanticRagAnswerReviewStatus { + if (issues.some((issue) => INVALID_RAG_ANSWER_REVIEW_ISSUES.has(issue.split(':', 1)[0] ?? ''))) return 'invalid'; + return issues.length === 0 ? 'ready' : 'incomplete'; +} + +function isRagAnswerCompatibleMcpRetrieval( + retrieval: RagSemanticMcpRetrievalFact, + citationsRequired: boolean, +): boolean { + if (retrieval.contractStatus !== 'valid') return false; + if (retrieval.outputShape !== RAG_MCP_RETRIEVE_OUTPUT_SHAPE) return false; + if (retrieval.outputItemShape !== undefined && retrieval.outputItemShape !== RAG_MCP_RETRIEVE_OUTPUT_ITEM_SHAPE) { + return false; + } + if (citationsRequired || retrieval.effectiveRequiresCitations) { + return ( + !!retrieval.citationField && + (!!retrieval.sourceField || retrieval.provenance === 'source' || retrieval.provenance === 'citation') + ); + } + return true; +} + +function optionalStringValue(key: string, value: string | undefined): Record { + return value === undefined ? {} : { [key]: value }; +} + +const INVALID_RAG_ANSWER_REVIEW_ISSUES = new Set(['unresolved-retriever', 'invalid-mcp-retrieve-contract']); + const KERN_PRIMITIVE_NAMES: Record = { 'collection.has': 'includes', 'collection.count': 'count', diff --git a/packages/core/tests/rag-runtime.test.ts b/packages/core/tests/rag-runtime.test.ts index a39c5191..b0a1f6b6 100644 --- a/packages/core/tests/rag-runtime.test.ts +++ b/packages/core/tests/rag-runtime.test.ts @@ -2,6 +2,7 @@ import type { RagSemanticEvalFact } from '../src/index.js'; import { createInMemoryRetriever, createRagRuntimeProvenance, + evaluateRagAnswerContract, evaluateRagEvalContract, hashRetrievedChunkText, InMemoryRagCorpus, @@ -632,6 +633,240 @@ describe('RAG runtime provenance envelopes', () => { }); }); +describe('RAG answer runtime contracts', () => { + test('validates a grounded answer against retrieved chunks and provenance', () => { + const retrieval = withRagRuntimeProvenance( + { + query: 'refund policy', + chunks: [ + { + id: 'refunds', + text: 'Refunds are allowed for thirty days.', + score: 0.95, + source: 'docs/refunds.md', + citation: { uri: 'docs/refunds.md', locator: 'L1-L2' }, + }, + ], + }, + { + retrieverName: 'DocsSearch', + targetKind: 'rag', + targetName: 'AnswerDocs', + retrieveOptions: { topK: 1, minScore: 0.8 }, + citationsRequired: true, + startedAtMs: 100, + }, + ); + const answer = 'Refunds are allowed for thirty days.'; + + const result = evaluateRagAnswerContract({ + id: 'AnswerDocs:refunds', + ragName: 'AnswerDocs', + prompt: './answer.md', + query: retrieval.query, + answer, + retrieval, + requireCitations: true, + minGroundingCoverage: 1, + groundingSpans: [{ start: 0, end: answer.length, chunkIds: ['refunds'], required: true }], + }); + + expect(result).toEqual( + expect.objectContaining({ + id: 'AnswerDocs:refunds', + ragName: 'AnswerDocs', + query: 'refund policy', + passed: true, + status: 'grounded', + groundingCoverage: 1, + citedChunkIds: ['refunds'], + sources: ['docs/refunds.md'], + provenance: retrieval.provenance, + diagnostics: [], + }), + ); + expect(JSON.parse(JSON.stringify(result))).toEqual(result); + }); + + test('reports partial and ungrounded answer contract failures', () => { + const retrieval = { + query: 'refund policy', + chunks: [ + { + id: 'refunds', + text: 'Refunds are allowed.', + score: 0.9, + source: 'docs/refunds.md', + citation: { uri: 'docs/refunds.md' }, + }, + ], + }; + const answer = 'Refunds are allowed. Shipping is separate.'; + const partial = evaluateRagAnswerContract({ + query: retrieval.query, + answer, + retrieval, + minGroundingCoverage: 0.9, + groundingSpans: [{ start: 0, end: 'Refunds are allowed.'.length, chunkIds: ['refunds'] }], + }); + const ungrounded = evaluateRagAnswerContract({ + query: retrieval.query, + answer, + retrieval, + requireCitations: true, + groundingSpans: [], + }); + + expect(partial.passed).toBe(false); + expect(partial.status).toBe('partially_grounded'); + expect(partial.diagnostics).toEqual([expect.objectContaining({ code: 'GROUNDING_BELOW_THRESHOLD' })]); + expect(ungrounded.passed).toBe(false); + expect(ungrounded.status).toBe('invalid'); + expect(ungrounded.diagnostics).toEqual( + expect.arrayContaining([ + expect.objectContaining({ code: 'CITATION_REQUIRED' }), + expect.objectContaining({ code: 'GROUNDING_BELOW_THRESHOLD' }), + ]), + ); + }); + + test('reports invalid answer contracts for bad spans chunk refs and provenance mismatches', () => { + const retrieval = withRagRuntimeProvenance( + { + query: 'refund policy', + chunks: [ + { + id: 'refunds', + text: 'Refunds are allowed.', + score: 0.9, + source: 'docs/refunds.md', + citation: { uri: 'docs/refunds.md' }, + }, + ], + }, + { targetKind: 'rag', targetName: 'AnswerDocs' }, + ); + const answer = 'Refunds are allowed.'; + const staleChunk = retrieval.chunks[0]; + if (!staleChunk) throw new Error('missing answer contract fixture chunk'); + + const invalid = evaluateRagAnswerContract({ + query: retrieval.query, + answer, + retrieval, + provenance: { ...retrieval.provenance, query: 'other query' }, + groundingSpans: [ + { start: 0, end: answer.length + 1, chunkIds: ['refunds'] }, + { start: 0, end: answer.length, chunkIds: ['missing'], required: true }, + ], + }); + const queryMismatch = evaluateRagAnswerContract({ + query: 'shipping policy', + answer, + retrieval, + groundingSpans: [{ start: 0, end: answer.length, chunkIds: ['refunds'] }], + }); + const staleProvenance = evaluateRagAnswerContract({ + query: retrieval.query, + answer, + retrieval: { + ...retrieval, + chunks: [{ ...staleChunk, text: 'Different retrieved text.' }], + }, + groundingSpans: [{ start: 0, end: answer.length, chunkIds: ['refunds'] }], + }); + const badAnswer = evaluateRagAnswerContract({ + query: retrieval.query, + answer: undefined as unknown as string, + retrieval, + }); + const badGroundingSpans = evaluateRagAnswerContract({ + query: retrieval.query, + answer, + retrieval, + groundingSpans: {} as unknown as [], + }); + const emptyCitation = evaluateRagAnswerContract({ + query: retrieval.query, + answer, + retrieval: { + ...retrieval, + chunks: [{ ...staleChunk, citation: { uri: '' } }], + }, + requireCitations: true, + groundingSpans: [{ start: 0, end: answer.length, chunkIds: ['refunds'] }], + }); + const nonStringChunkRef = evaluateRagAnswerContract({ + query: retrieval.query, + answer, + retrieval, + groundingSpans: [{ start: 0, end: answer.length, chunkIds: [1 as unknown as string] }], + }); + + expect(invalid.passed).toBe(false); + expect(invalid.status).toBe('invalid'); + expect(invalid.diagnostics.map((diagnostic) => diagnostic.code)).toEqual( + expect.arrayContaining([ + 'PROVENANCE_MISMATCH', + 'SPAN_INVALID', + 'CHUNK_REF_UNKNOWN', + 'CITATION_REQUIRED', + 'GROUNDING_BELOW_THRESHOLD', + ]), + ); + expect(queryMismatch.diagnostics).toEqual([expect.objectContaining({ code: 'QUERY_MISMATCH' })]); + expect(staleProvenance.diagnostics).toEqual([expect.objectContaining({ code: 'PROVENANCE_MISMATCH' })]); + expect(badAnswer.diagnostics).toEqual([expect.objectContaining({ code: 'ANSWER_EMPTY' })]); + expect(badGroundingSpans.diagnostics).toEqual( + expect.arrayContaining([ + expect.objectContaining({ code: 'SPAN_INVALID' }), + expect.objectContaining({ code: 'GROUNDING_BELOW_THRESHOLD' }), + ]), + ); + expect(emptyCitation.diagnostics).toEqual( + expect.arrayContaining([ + expect.objectContaining({ code: 'CITATION_REQUIRED' }), + expect.objectContaining({ code: 'GROUNDING_BELOW_THRESHOLD' }), + ]), + ); + expect(nonStringChunkRef.diagnostics).toEqual( + expect.arrayContaining([ + expect.objectContaining({ code: 'SPAN_INVALID' }), + expect.objectContaining({ code: 'GROUNDING_BELOW_THRESHOLD' }), + ]), + ); + expect(() => + evaluateRagAnswerContract({ + query: retrieval.query, + answer, + retrieval, + minGroundingCoverage: 1.1, + }), + ).toThrow('minGroundingCoverage'); + expect(() => + createRagRuntimeProvenance({ + query: 'refund', + chunks: [ + { + id: 'dupe', + text: 'one', + score: 0.5, + source: 'docs/one.md', + citation: { uri: 'docs/one.md' }, + }, + { + id: 'dupe', + text: 'two', + score: 0.5, + source: 'docs/two.md', + citation: { uri: 'docs/two.md' }, + }, + ], + }), + ).toThrow('duplicates chunk id'); + }); +}); + function assertFact(kind: string, value: string | number | boolean) { return { kind, diff --git a/packages/core/tests/semantic-substrate.test.ts b/packages/core/tests/semantic-substrate.test.ts index 87ee071a..82308957 100644 --- a/packages/core/tests/semantic-substrate.test.ts +++ b/packages/core/tests/semantic-substrate.test.ts @@ -65,6 +65,7 @@ describe('KERN semantic substrate', () => { expect(Object.hasOwn(substrate, 'classValidationSummary')).toBe(false); expect(Object.hasOwn(substrate, 'ragFacts')).toBe(false); expect(Object.hasOwn(substrate, 'ragValidationSummary')).toBe(false); + expect(Object.hasOwn(substrate, 'ragAnswerReviewFacts')).toBe(false); }); test('exports document class member inheritance and override facts when requested', () => { @@ -245,9 +246,10 @@ describe('KERN semantic substrate', () => { ' chunking source=manuals strategy=semantic maxTokens=600 overlap=80', 'embed name=DocsEmbedding corpus=Docs model=text-embedding-3-small dims=1536 metric=cosine', 'retriever name=DocsSearch corpus=Docs embed=DocsEmbedding mode=hybrid topK=8 minScore=0.72', - 'rag name=AnswerDocs retriever=DocsSearch citations=true', + 'rag name=AnswerDocs retriever=DocsSearch prompt="./answer.md" answer="grounded" citations=true', ' grounding requireCitations=true policy=strict maxContext=6000', - ' ragEval metric=faithfulness threshold=0.85', + ' ragEval name=Faithfulness metric=faithfulness threshold=0.85 mode=contract', + ' ragCase name=refunds query="refund policy"', 'mcp name=Support', ' resource name=DocsResource uri="docs://manuals"', ' tool name=answerQuestion', @@ -285,6 +287,8 @@ describe('KERN semantic substrate', () => { expect.objectContaining({ name: 'AnswerDocs', retrieverName: 'DocsSearch', + prompt: './answer.md', + answer: 'grounded', citations: true, groundings: [expect.objectContaining({ requireCitations: true, policy: 'strict' })], evals: [expect.objectContaining({ metric: 'faithfulness', threshold: 0.85 })], @@ -317,12 +321,38 @@ describe('KERN semantic substrate', () => { resourceName: 'DocsResource', }), ]); + expect(substrate.ragAnswerReviewFacts).toEqual([ + { + pipelineName: 'AnswerDocs', + retrieverName: 'DocsSearch', + prompt: './answer.md', + answer: 'grounded', + citationsRequired: true, + groundingCount: 1, + evalCount: 1, + evalCaseCount: 1, + mcpRetrievalCount: 1, + compatibleMcpRetrievalCount: 1, + provenanceRequired: true, + provenanceComplete: true, + validationStatus: 'ready', + issues: [], + }, + ]); const invalidSubstrate = buildKernSemanticSubstrate({ documentRag: parseRoot('rag name=Broken retriever=Missing'), includeRagValidationSummary: true, }); expect(invalidSubstrate.ragValidationSummary?.byRule['rag-unknown-retriever']).toBe(1); + expect(invalidSubstrate.ragAnswerReviewFacts).toEqual([ + expect.objectContaining({ + pipelineName: 'Broken', + retrieverName: 'Missing', + validationStatus: 'invalid', + issues: expect.arrayContaining(['unresolved-retriever:Missing', 'missing-answer-surface', 'missing-eval']), + }), + ]); }); test('exports portable review primitives as stable query objects', () => { From 398bcac9a3439a243413d1c3e4fa2a1ab8839a2a Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 01:18:38 +0200 Subject: [PATCH 28/46] feat(core): add rag answer contract surface --- .../native-test/kernlang-rag-contracts.kern | 16 + .../kernlang-rag-contracts.test.kern | 10 + packages/core/src/codegen-core.ts | 4 + packages/core/src/decompiler.ts | 42 +++ packages/core/src/index.ts | 5 + packages/core/src/rag-runtime.ts | 37 ++ packages/core/src/schema.ts | 29 +- packages/core/src/semantic-substrate.ts | 7 +- packages/core/src/semantic-validator.ts | 320 +++++++++++++++++- packages/core/src/spec.ts | 2 + packages/core/tests/rag-runtime.test.ts | 53 ++- packages/core/tests/rag-semantics.test.ts | 83 +++++ packages/core/tests/schema-validation.test.ts | 11 + .../core/tests/semantic-substrate.test.ts | 10 + 14 files changed, 625 insertions(+), 4 deletions(-) create mode 100644 packages/core/native-test/kernlang-rag-contracts.kern create mode 100644 packages/core/native-test/kernlang-rag-contracts.test.kern diff --git a/packages/core/native-test/kernlang-rag-contracts.kern b/packages/core/native-test/kernlang-rag-contracts.kern new file mode 100644 index 00000000..35ce7940 --- /dev/null +++ b/packages/core/native-test/kernlang-rag-contracts.kern @@ -0,0 +1,16 @@ +corpus name=Docs title="Support docs" + source name=manuals kind=local uri="./docs/**/*.md" media=markdown + chunking source=manuals strategy=semantic maxTokens=600 overlap=80 unit=tokens + +embed name=DocsEmbedding corpus=Docs model=text-embedding-3-small dims=1536 metric=cosine + +retriever name=DocsSearch corpus=Docs embed=DocsEmbedding mode=hybrid topK=8 minScore=0.72 + +rag name=AnswerDocs retriever=DocsSearch prompt="./answer.md" citations=true + grounding requireCitations=true policy=strict maxContext=6000 + ragEval name=Faithfulness metric=faithfulness threshold=0.85 mode=contract + ragCase name=refunds query="How do refunds work?" tags="smoke,policy" topK=4 minScore=0.72 chunkCount=1 sources="docs/refunds.md" + ragAssert kind=sourceGlob value="docs/refunds.md" required=true + ragAssert kind=citesRequired + ragAnswerContract name=RefundAnswer query="How do refunds work?" answer="Refunds follow the refund policy." requireCitations=true minGroundingCoverage=0.8 + answerSpan start=0 end=33 chunks=refunds required=true diff --git a/packages/core/native-test/kernlang-rag-contracts.test.kern b/packages/core/native-test/kernlang-rag-contracts.test.kern new file mode 100644 index 00000000..824bc443 --- /dev/null +++ b/packages/core/native-test/kernlang-rag-contracts.test.kern @@ -0,0 +1,10 @@ +test name="KERNlang RAG contract surface parity" target="./kernlang-rag-contracts.kern" coverage=false + it name="RAG contracts stay valid and roundtrip through native decompile" + expect no=schemaViolations + expect no=semanticViolations + expect decompile contains="corpus name=Docs title=\"Support docs\"" + expect decompile contains="retriever name=DocsSearch corpus=Docs" + expect decompile contains="rag name=AnswerDocs retriever=DocsSearch" + expect decompile contains="ragAnswerContract name=RefundAnswer" + expect decompile contains="answerSpan start=0 end=33 chunks=refunds required=true" + expect roundtrip=true diff --git a/packages/core/src/codegen-core.ts b/packages/core/src/codegen-core.ts index 5d3290d2..c1eda47d 100644 --- a/packages/core/src/codegen-core.ts +++ b/packages/core/src/codegen-core.ts @@ -732,6 +732,8 @@ export const CORE_NODE_TYPES = new Set([ 'ragEval', 'ragCase', 'ragAssert', + 'ragAnswerContract', + 'answerSpan', // Backend data layer (graduated nodes) 'model', 'column', @@ -1034,6 +1036,8 @@ export function generateCoreNode(node: IRNode, target?: string, runtime?: KernRu case 'ragEval': case 'ragCase': case 'ragAssert': + case 'ragAnswerContract': + case 'answerSpan': return []; // Graduated nodes — backend data layer case 'model': diff --git a/packages/core/src/decompiler.ts b/packages/core/src/decompiler.ts index 4cb505a8..2b9efccf 100644 --- a/packages/core/src/decompiler.ts +++ b/packages/core/src/decompiler.ts @@ -106,6 +106,23 @@ export function decompile(root: IRNode): DecompileResult { renderIndexer(node, indent); return; } + if ( + node.type === 'corpus' || + node.type === 'source' || + node.type === 'chunking' || + node.type === 'embed' || + node.type === 'retriever' || + node.type === 'rag' || + node.type === 'grounding' || + node.type === 'ragEval' || + node.type === 'ragCase' || + node.type === 'ragAssert' || + node.type === 'ragAnswerContract' || + node.type === 'answerSpan' + ) { + renderRagNode(node, indent); + return; + } if (node.type === 'handler') { pushHandler(node, indent); return; @@ -359,6 +376,31 @@ export function decompile(root: IRNode): DecompileResult { lines.push(`${indent}${parts.join(' ')}`); } + function renderRagNode(node: IRNode, indent: string): void { + const propOrderByType: Record = { + corpus: ['name', 'title', 'tenant', 'refresh'], + source: ['name', 'kind', 'uri', 'resource', 'media', 'acl'], + chunking: ['name', 'corpus', 'source', 'strategy', 'maxTokens', 'overlap', 'unit'], + embed: ['name', 'corpus', 'model', 'dims', 'metric'], + retriever: ['name', 'corpus', 'embed', 'mode', 'topK', 'minScore', 'rerank'], + rag: ['name', 'retriever', 'prompt', 'answer', 'citations'], + grounding: ['name', 'rag', 'requireCitations', 'policy', 'maxContext'], + ragEval: ['name', 'rag', 'metric', 'threshold', 'mode'], + ragCase: ['name', 'query', 'tags', 'topK', 'minScore', 'chunkCount', 'sources'], + ragAssert: ['kind', 'value', 'threshold', 'count', 'valueMs', 'required'], + ragAnswerContract: ['name', 'rag', 'query', 'answer', 'prompt', 'requireCitations', 'minGroundingCoverage'], + answerSpan: ['start', 'end', 'chunks', 'required'], + }; + const props = node.props || {}; + const quoted = node.__quotedProps ?? []; + const parts = [node.type]; + for (const propName of propOrderByType[node.type] ?? []) { + if (props[propName] !== undefined) parts.push(renderScalarProp(propName, props[propName], quoted)); + } + lines.push(`${indent}${parts.join(' ')}`); + for (const child of node.children || []) render(child, `${indent} `); + } + function renderClassLike(node: IRNode, indent: string): void { const props = node.props || {}; const quoted = node.__quotedProps ?? []; diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 4f432b34..177e4d7d 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -450,6 +450,7 @@ export type { RagRuntimeProvenance, RagRuntimeProvenanceOptions, RagRuntimeProvenanceStatus, + RagSemanticAnswerContractOptions, RetrievedChunk, RetrieveOptions, RetrieveResult, @@ -459,9 +460,11 @@ export { createRagRuntimeProvenance, evaluateRagAnswerContract, evaluateRagEvalContract, + evaluateRagSemanticAnswerContract, hashRetrievedChunkText, InMemoryRagCorpus, MAX_IN_MEMORY_RAG_TOP_K, + ragAnswerContractFromSemanticFact, ragMcpRetrieveProvenanceMapping, retrieveFromInMemoryCorpus, tokenizeForRetrieval, @@ -506,6 +509,8 @@ export type { ClassSemanticMemberKind, ClassSemanticOverrideFact, ClassSemanticOverrideStatus, + RagSemanticAnswerContractFact, + RagSemanticAnswerSpanFact, RagSemanticChunkingFact, RagSemanticCorpusFact, RagSemanticEmbedFact, diff --git a/packages/core/src/rag-runtime.ts b/packages/core/src/rag-runtime.ts index fa2295f1..f7c99639 100644 --- a/packages/core/src/rag-runtime.ts +++ b/packages/core/src/rag-runtime.ts @@ -1,4 +1,5 @@ import type { + RagSemanticAnswerContractFact, RagSemanticEvalAssertFact, RagSemanticEvalCaseFact, RagSemanticEvalFact, @@ -188,6 +189,10 @@ export interface RagEvalContractResult { readonly cases: readonly RagEvalCaseResult[]; } +export interface RagSemanticAnswerContractOptions { + readonly provenance?: RagRuntimeProvenance; +} + interface StoredRagChunk { readonly chunk: RagChunkInput; readonly terms: ReadonlySet; @@ -490,6 +495,38 @@ export function evaluateRagAnswerContract(contract: RagAnswerContract): RagAnswe }; } +export function ragAnswerContractFromSemanticFact( + fact: RagSemanticAnswerContractFact, + retrieval: RetrieveResult | ProvenancedRetrieveResult, + options: RagSemanticAnswerContractOptions = {}, +): RagAnswerContract { + return { + id: fact.name, + ...optionalStringValue('ragName', fact.ragName), + ...optionalStringValue('prompt', fact.prompt), + query: fact.query, + answer: fact.answer, + retrieval, + ...(options.provenance ? { provenance: options.provenance } : {}), + groundingSpans: fact.spans.map((span) => ({ + start: span.start, + end: span.end, + chunkIds: [...span.chunkIds], + ...(span.required ? { required: true } : {}), + })), + requireCitations: fact.requireCitations, + ...optionalNumberValue('minGroundingCoverage', fact.minGroundingCoverage), + }; +} + +export function evaluateRagSemanticAnswerContract( + fact: RagSemanticAnswerContractFact, + retrieval: RetrieveResult | ProvenancedRetrieveResult, + options: RagSemanticAnswerContractOptions = {}, +): RagAnswerContractResult { + return evaluateRagAnswerContract(ragAnswerContractFromSemanticFact(fact, retrieval, options)); +} + export function evaluateRagEvalContract( evaluation: RagSemanticEvalFact, retriever: RagContractRetriever, diff --git a/packages/core/src/schema.ts b/packages/core/src/schema.ts index 9e456045..4f33dedd 100644 --- a/packages/core/src/schema.ts +++ b/packages/core/src/schema.ts @@ -2491,7 +2491,7 @@ export const NODE_SCHEMAS: Record = { answer: { kind: 'string' }, citations: { kind: 'boolean' }, }, - allowedChildren: ['grounding', 'ragEval'], + allowedChildren: ['grounding', 'ragEval', 'ragAnswerContract'], }, grounding: { description: 'RAG grounding policy — declares citation and context constraints for a RAG pipeline.', @@ -2546,6 +2546,33 @@ export const NODE_SCHEMAS: Record = { }, allowedChildren: [], }, + ragAnswerContract: { + description: + 'RAG answer contract — declares the provider-free answer grounding shape evaluated by the core RAG runtime.', + example: + 'ragAnswerContract name=RefundAnswer query="How do refunds work?" answer="Refunds follow the refund policy." requireCitations=true minGroundingCoverage=0.8\n answerSpan start=0 end=34 chunks=refunds required=true', + props: { + name: { required: true, kind: 'identifier' }, + rag: { kind: 'identifier' }, + query: { required: true, kind: 'string' }, + answer: { required: true, kind: 'string' }, + prompt: { kind: 'string' }, + requireCitations: { kind: 'boolean' }, + minGroundingCoverage: { kind: 'number' }, + }, + allowedChildren: ['answerSpan'], + }, + answerSpan: { + description: 'RAG answer grounding span — maps answer text character ranges to retrieved chunk ids.', + example: 'answerSpan start=0 end=34 chunks="refunds,policy" required=true', + props: { + start: { required: true, kind: 'number' }, + end: { required: true, kind: 'number' }, + chunks: { required: true, kind: 'string' }, + required: { kind: 'boolean' }, + }, + allowedChildren: [], + }, // ── React / UI element nodes ────────────────────────────────────────── diff --git a/packages/core/src/semantic-substrate.ts b/packages/core/src/semantic-substrate.ts index fb2f0758..44892036 100644 --- a/packages/core/src/semantic-substrate.ts +++ b/packages/core/src/semantic-substrate.ts @@ -93,6 +93,7 @@ export interface KernSemanticRagAnswerReviewFact { readonly groundingCount: number; readonly evalCount: number; readonly evalCaseCount: number; + readonly answerContractCount: number; readonly mcpRetrievalCount: number; readonly compatibleMcpRetrievalCount: number; readonly provenanceRequired: boolean; @@ -294,12 +295,14 @@ function buildRagAnswerReviewFacts(facts: RagSemanticFacts): KernSemanticRagAnsw isRagAnswerCompatibleMcpRetrieval(retrieval, citationsRequired), ); const evalCaseCount = pipeline.evals.reduce((count, evaluation) => count + (evaluation.caseCount ?? 0), 0); + const answerContractCount = pipeline.answerContracts.length; const issues = ragAnswerReviewIssues( facts, pipeline.name, pipeline.retrieverName, pipeline.prompt, pipeline.answer, + answerContractCount, citationsRequired, pipeline.groundings.length, pipeline.groundings.some((grounding) => grounding.requireCitations), @@ -318,6 +321,7 @@ function buildRagAnswerReviewFacts(facts: RagSemanticFacts): KernSemanticRagAnsw groundingCount: pipeline.groundings.length, evalCount: pipeline.evals.length, evalCaseCount, + answerContractCount, mcpRetrievalCount: mcpRetrievals.length, compatibleMcpRetrievalCount: compatibleMcpRetrievals.length, provenanceRequired: citationsRequired || mcpRetrievals.some((retrieval) => retrieval.requireGrounding), @@ -334,6 +338,7 @@ function ragAnswerReviewIssues( retrieverName: string, prompt: string | undefined, answer: string | undefined, + answerContractCount: number, citationsRequired: boolean, groundingCount: number, hasCitationGrounding: boolean, @@ -345,7 +350,7 @@ function ragAnswerReviewIssues( ): string[] { const issues: string[] = []; if (unresolvedRetriever) issues.push(`unresolved-retriever:${retrieverName}`); - if (!prompt && !answer) issues.push('missing-answer-surface'); + if (!prompt && !answer && answerContractCount === 0) issues.push('missing-answer-surface'); if (citationsRequired && groundingCount === 0) issues.push('missing-grounding'); if (citationsRequired && !hasCitationGrounding) issues.push('missing-citation-grounding'); if (evalCount === 0) issues.push('missing-eval'); diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index 4a59e838..78b09946 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -197,6 +197,26 @@ export interface RagSemanticEvalAssertFact { readonly loc?: RagSemanticLocation; } +export interface RagSemanticAnswerSpanFact { + readonly start: number; + readonly end: number; + readonly chunkIds: readonly string[]; + readonly required: boolean; + readonly loc?: RagSemanticLocation; +} + +export interface RagSemanticAnswerContractFact { + readonly name: string; + readonly ragName?: string; + readonly query: string; + readonly answer: string; + readonly prompt?: string; + readonly requireCitations: boolean; + readonly minGroundingCoverage?: number; + readonly spans: readonly RagSemanticAnswerSpanFact[]; + readonly loc?: RagSemanticLocation; +} + export interface RagSemanticPipelineFact { readonly name: string; readonly retrieverName: string; @@ -205,6 +225,7 @@ export interface RagSemanticPipelineFact { readonly citations: boolean; readonly groundings: readonly RagSemanticGroundingFact[]; readonly evals: readonly RagSemanticEvalFact[]; + readonly answerContracts: readonly RagSemanticAnswerContractFact[]; readonly loc?: RagSemanticLocation; } @@ -757,6 +778,21 @@ interface RagAssertInfo { caseBound: boolean; } +interface RagAnswerContractInfo { + node: IRNode; + rootIndex: number; + name?: string; + ragName?: string; +} + +interface RagAnswerSpanInfo { + node: IRNode; + rootIndex: number; + contractName?: string; + contractNode?: IRNode; + contractBound: boolean; +} + interface RagMcpContainerInfo { node: IRNode; rootIndex: number; @@ -789,6 +825,8 @@ interface RagInfos { evals: RagEvalInfo[]; cases: RagCaseInfo[]; asserts: RagAssertInfo[]; + answerContracts: RagAnswerContractInfo[]; + answerSpans: RagAnswerSpanInfo[]; mcpRetrievals: RagMcpRetrievalInfo[]; mcpResources: RagMcpSymbolInfo[]; mcpTools: RagMcpSymbolInfo[]; @@ -812,6 +850,8 @@ function validateRagGraphRoots(roots: readonly IRNode[], violations: SemanticVio infos.evals.length === 0 && infos.cases.length === 0 && infos.asserts.length === 0 && + infos.answerContracts.length === 0 && + infos.answerSpans.length === 0 && infos.mcpRetrievals.length === 0 && infos.mcpResources.length === 0 && infos.mcpTools.length === 0 && @@ -862,6 +902,12 @@ function validateRagGraphRoots(roots: readonly IRNode[], violations: SemanticVio for (const assertion of infos.asserts) { validateRagAssert(assertion, citationRequiredRagNames, violations); } + for (const contract of infos.answerContracts) { + validateRagAnswerContract(contract, infos.answerSpans, ragByName, citationRequiredRagNames, violations); + } + for (const span of infos.answerSpans) { + validateRagAnswerSpan(span, violations); + } validateRagMcpRetrievalDuplicates(infos.mcpRetrievals, violations); for (const retrieval of infos.mcpRetrievals) { validateRagMcpRetrieval(retrieval, retrieverByName, ragByName, citationRequiredRagNames, violations); @@ -880,6 +926,8 @@ function collectRagInfosForRoots(roots: readonly IRNode[]): RagInfos { evals: [], cases: [], asserts: [], + answerContracts: [], + answerSpans: [], mcpRetrievals: [], mcpResources: [], mcpTools: [], @@ -902,6 +950,9 @@ function collectRagInfos(root: IRNode, rootIndex: number, out: RagInfos): void { nearestRagCaseBound = false, nearestRagEvalNode?: IRNode, nearestRagCaseNode?: IRNode, + nearestRagAnswerContractName?: string, + nearestRagAnswerContractNode?: IRNode, + nearestRagAnswerContractBound = false, nearestMcpContainer?: RagMcpContainerInfo, nearestMcpName?: string, ): void { @@ -918,6 +969,10 @@ function collectRagInfos(root: IRNode, rootIndex: number, out: RagInfos): void { const nextRagCaseBound = node.type === 'ragCase' || nearestRagCaseBound; const nextRagEvalNode = node.type === 'ragEval' ? node : nearestRagEvalNode; const nextRagCaseNode = node.type === 'ragCase' ? node : nearestRagCaseNode; + const nextRagAnswerContractName = + node.type === 'ragAnswerContract' ? stringProp(node, 'name') : nearestRagAnswerContractName; + const nextRagAnswerContractNode = node.type === 'ragAnswerContract' ? node : nearestRagAnswerContractNode; + const nextRagAnswerContractBound = node.type === 'ragAnswerContract' || nearestRagAnswerContractBound; const nextMcpName = node.type === 'mcp' ? stringProp(node, 'name') || '' : nearestMcpName; const nextMcpContainer = node.type === 'tool' || node.type === 'prompt' @@ -978,6 +1033,21 @@ function collectRagInfos(root: IRNode, rootIndex: number, out: RagInfos): void { evalBound: nearestRagEvalBound, caseBound: nearestRagCaseBound, }); + } else if (node.type === 'ragAnswerContract') { + out.answerContracts.push({ + node, + rootIndex, + name: stringProp(node, 'name'), + ragName: stringProp(node, 'rag') || nearestRagName, + }); + } else if (node.type === 'answerSpan') { + out.answerSpans.push({ + node, + rootIndex, + contractName: nearestRagAnswerContractName, + contractNode: nearestRagAnswerContractNode, + contractBound: nearestRagAnswerContractBound, + }); } else if (node.type === 'retrieve') { out.mcpRetrievals.push({ node, rootIndex, container: nearestMcpContainer }); } else if ( @@ -1005,6 +1075,9 @@ function collectRagInfos(root: IRNode, rootIndex: number, out: RagInfos): void { nextRagCaseBound, nextRagEvalNode, nextRagCaseNode, + nextRagAnswerContractName, + nextRagAnswerContractNode, + nextRagAnswerContractBound, nextMcpContainer, nextMcpName, ); @@ -1052,6 +1125,7 @@ function validateRagUniqueNames(infos: RagInfos, violations: SemanticViolation[] validateRagUniqueNameSet('rag', infos.pipelines, violations); validateRagUniqueEvalNames(infos.evals, violations); validateRagUniqueCaseNames(infos.cases, violations); + validateRagUniqueAnswerContractNames(infos.answerContracts, violations); } function validateRagUniqueNameSet( @@ -1134,6 +1208,28 @@ function validateRagUniqueCaseNames(cases: readonly RagCaseInfo[], violations: S } } +function validateRagUniqueAnswerContractNames( + contracts: readonly RagAnswerContractInfo[], + violations: SemanticViolation[], +): void { + const seen = new Map(); + for (const contract of contracts) { + if (!contract.name || !contract.ragName) continue; + const key = `${contract.ragName}:${contract.name}`; + const prev = seen.get(key); + if (prev) { + pushRagViolation( + violations, + 'rag-duplicate-answer-contract-name', + contract.node, + `Duplicate RAG answer contract named '${contract.name}' in rag '${contract.ragName}' — first defined at line ${prev.loc?.line ?? '?'}.`, + ); + } else { + seen.set(key, contract.node); + } + } +} + function validateRagSource( source: RagSourceInfo, mcpResourcesByName: ReadonlyMap, @@ -1668,6 +1764,170 @@ function validateRagAssert( } } +function validateRagAnswerContract( + contract: RagAnswerContractInfo, + spans: readonly RagAnswerSpanInfo[], + ragByName: ReadonlyMap, + citationRequiredRagNames: ReadonlySet, + violations: SemanticViolation[], +): void { + if (!contract.ragName) { + pushRagViolation( + violations, + 'rag-answer-contract-missing-rag', + contract.node, + 'RAG answer contract must be nested under a rag pipeline or declare rag=.', + ); + } else if (!ragByName.has(contract.ragName)) { + pushRagViolation( + violations, + 'rag-answer-contract-unknown-rag', + contract.node, + `RAG answer contract references unknown rag '${contract.ragName}'.`, + ); + } + + if (!contract.name) { + pushRagViolation( + violations, + 'rag-answer-contract-name-required', + contract.node, + 'RAG answer contract requires name=.', + ); + } + if (!stringProp(contract.node, 'query')) { + pushRagViolation( + violations, + 'rag-answer-contract-query-required', + contract.node, + 'RAG answer contract requires query=.', + ); + } + if (!stringProp(contract.node, 'answer')) { + pushRagViolation( + violations, + 'rag-answer-contract-answer-required', + contract.node, + 'RAG answer contract requires answer=.', + ); + } + + const minGroundingCoverage = numberProp(contract.node, 'minGroundingCoverage'); + if ( + invalidNumberProp(contract.node, 'minGroundingCoverage') || + (minGroundingCoverage !== undefined && (minGroundingCoverage < 0 || minGroundingCoverage > 1)) + ) { + pushRagViolation( + violations, + 'rag-answer-contract-min-grounding-coverage-invalid', + contract.node, + 'RAG answer contract minGroundingCoverage must be between 0 and 1.', + ); + } + + validateRagAnswerContractCoverage(contract, spans, minGroundingCoverage, violations); + + if ( + ragBooleanProp(contract.node, 'requireCitations') && + (!contract.ragName || !citationRequiredRagNames.has(contract.ragName)) + ) { + pushRagViolation( + violations, + 'rag-answer-contract-citations-require-grounding', + contract.node, + 'RAG answer contract requireCitations=true requires a citation-grounded rag.', + ); + } +} + +function validateRagAnswerContractCoverage( + contract: RagAnswerContractInfo, + spans: readonly RagAnswerSpanInfo[], + minGroundingCoverage: number | undefined, + violations: SemanticViolation[], +): void { + const answer = stringProp(contract.node, 'answer'); + if (!answer) return; + + const contractSpans = spans.filter((span) => span.contractNode === contract.node); + const grounded = new Array(answer.length).fill(false) as boolean[]; + for (const span of contractSpans) { + const start = numberProp(span.node, 'start'); + const end = numberProp(span.node, 'end'); + if ( + start !== undefined && + end !== undefined && + Number.isInteger(start) && + Number.isInteger(end) && + start >= 0 && + end > start && + end <= answer.length + ) { + for (let index = start; index < end; index += 1) grounded[index] = true; + } else if (end !== undefined && end > answer.length) { + pushRagViolation( + violations, + 'rag-answer-span-range-invalid', + span.node, + 'RAG answer span end must not exceed the parent answer length.', + ); + } + } + + if (minGroundingCoverage === undefined || minGroundingCoverage < 0 || minGroundingCoverage > 1) return; + const answerChars = countRagAnswerChars(answer); + const groundedChars = countRagGroundedAnswerChars(answer, grounded); + const coverage = answerChars === 0 ? 0 : groundedChars / answerChars; + if (answerChars > 0 && coverage < minGroundingCoverage) { + pushRagViolation( + violations, + 'rag-answer-contract-grounding-coverage-insufficient', + contract.node, + `RAG answer contract grounding coverage ${coverage.toFixed(3)} is below minGroundingCoverage ${minGroundingCoverage.toFixed(3)}.`, + ); + } +} + +function validateRagAnswerSpan(span: RagAnswerSpanInfo, violations: SemanticViolation[]): void { + if (!span.contractBound) { + pushRagViolation( + violations, + 'rag-answer-span-missing-contract', + span.node, + 'RAG answer span must be nested under ragAnswerContract.', + ); + } + + const start = numberProp(span.node, 'start'); + const end = numberProp(span.node, 'end'); + if ( + invalidNumberProp(span.node, 'start') || + invalidNumberProp(span.node, 'end') || + start === undefined || + end === undefined || + !Number.isInteger(start) || + !Number.isInteger(end) || + start < 0 || + end <= start + ) { + pushRagViolation( + violations, + 'rag-answer-span-range-invalid', + span.node, + 'RAG answer span start/end must be non-negative integers with start < end.', + ); + } + + if (splitRagList(stringProp(span.node, 'chunks')).length === 0) { + pushRagViolation( + violations, + 'rag-answer-span-chunks-required', + span.node, + 'RAG answer span requires chunks=.', + ); + } +} + function validateRagMcpRetrievalDuplicates( retrievals: readonly RagMcpRetrievalInfo[], violations: SemanticViolation[], @@ -1920,7 +2180,15 @@ export function collectRagSemanticFacts(root: IRNode | readonly IRNode[]): RagSe corpora: infos.corpora.map((info) => ragCorpusFact(info, infos)), retrievers: infos.retrievers.map(ragRetrieverFact), pipelines: infos.pipelines.map((info) => - ragPipelineFact(info, infos.groundings, infos.evals, infos.cases, infos.asserts), + ragPipelineFact( + info, + infos.groundings, + infos.evals, + infos.cases, + infos.asserts, + infos.answerContracts, + infos.answerSpans, + ), ), mcpRetrievals: infos.mcpRetrievals.map((info) => ragMcpRetrievalFact(info, citationRequiredRagNames)), resourceFeedsCorpora: infos.sources @@ -1950,6 +2218,7 @@ export function collectRagSemanticFacts(root: IRNode | readonly IRNode[]): RagSe [ ...infos.groundings.map((info) => info.ragName), ...infos.evals.map((info) => info.ragName), + ...infos.answerContracts.map((info) => info.ragName), ...infos.mcpRetrievals.map((info) => stringProp(info.node, 'rag')), ].filter((name): name is string => !!name && !ragNames.has(name)), ), @@ -2051,6 +2320,8 @@ function ragPipelineFact( evals: readonly RagEvalInfo[], cases: readonly RagCaseInfo[], asserts: readonly RagAssertInfo[], + answerContracts: readonly RagAnswerContractInfo[], + answerSpans: readonly RagAnswerSpanInfo[], ): RagSemanticPipelineFact { return { name: info.name, @@ -2062,6 +2333,9 @@ function ragPipelineFact( evals: evals .filter((evaluation) => evaluation.ragName === info.name) .map((evaluation) => ragEvalFact(evaluation, cases, asserts)), + answerContracts: answerContracts + .filter((contract) => contract.ragName === info.name) + .map((contract) => ragAnswerContractFact(contract, answerSpans)), ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), }; } @@ -2131,6 +2405,34 @@ function ragEvalAssertFact(info: RagAssertInfo): RagSemanticEvalAssertFact { }; } +function ragAnswerContractFact( + info: RagAnswerContractInfo, + spans: readonly RagAnswerSpanInfo[], +): RagSemanticAnswerContractFact { + const contractSpans = spans.filter((span) => span.contractNode === info.node); + return { + name: info.name ?? '', + ...optionalStringValue('ragName', info.ragName), + query: stringProp(info.node, 'query') ?? '', + answer: stringProp(info.node, 'answer') ?? '', + ...optionalStringFact(info.node, 'prompt', 'prompt'), + requireCitations: ragBooleanProp(info.node, 'requireCitations'), + ...optionalNumberFact(info.node, 'minGroundingCoverage', 'minGroundingCoverage'), + spans: contractSpans.map(ragAnswerSpanFact), + ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), + }; +} + +function ragAnswerSpanFact(info: RagAnswerSpanInfo): RagSemanticAnswerSpanFact { + return { + start: numberProp(info.node, 'start') ?? 0, + end: numberProp(info.node, 'end') ?? 0, + chunkIds: splitRagList(stringProp(info.node, 'chunks')), + required: ragBooleanProp(info.node, 'required'), + ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), + }; +} + function ragAssertTarget(kind: string): RagSemanticEvalAssertFact['target'] { if (kind === 'uniqueSourcesGte' || kind === 'chunkCountEq') return 'retrieved-chunks'; if (kind === 'latencyLte') return 'latency'; @@ -2182,6 +2484,22 @@ function splitRagList(value: string | undefined): string[] { .filter((item) => item.length > 0); } +function countRagAnswerChars(answer: string): number { + let count = 0; + for (let index = 0; index < answer.length; index += 1) { + if (!/\s/u.test(answer[index] ?? '')) count += 1; + } + return count; +} + +function countRagGroundedAnswerChars(answer: string, grounded: readonly boolean[]): number { + let count = 0; + for (let index = 0; index < answer.length; index += 1) { + if (grounded[index] && !/\s/u.test(answer[index] ?? '')) count += 1; + } + return count; +} + function ragMcpRetrievalFact( info: RagMcpRetrievalInfo, citationRequiredRagNames: ReadonlySet, diff --git a/packages/core/src/spec.ts b/packages/core/src/spec.ts index 5bded53f..5bdaf48d 100644 --- a/packages/core/src/spec.ts +++ b/packages/core/src/spec.ts @@ -356,6 +356,8 @@ export const NODE_TYPES = [ 'ragEval', 'ragCase', 'ragAssert', + 'ragAnswerContract', + 'answerSpan', 'expression-v1', ] as const; diff --git a/packages/core/tests/rag-runtime.test.ts b/packages/core/tests/rag-runtime.test.ts index b0a1f6b6..1bf7ee30 100644 --- a/packages/core/tests/rag-runtime.test.ts +++ b/packages/core/tests/rag-runtime.test.ts @@ -1,12 +1,14 @@ -import type { RagSemanticEvalFact } from '../src/index.js'; +import type { RagSemanticAnswerContractFact, RagSemanticEvalFact } from '../src/index.js'; import { createInMemoryRetriever, createRagRuntimeProvenance, evaluateRagAnswerContract, evaluateRagEvalContract, + evaluateRagSemanticAnswerContract, hashRetrievedChunkText, InMemoryRagCorpus, MAX_IN_MEMORY_RAG_TOP_K, + ragAnswerContractFromSemanticFact, ragMcpRetrieveProvenanceMapping, retrieveFromInMemoryCorpus, tokenizeForRetrieval, @@ -688,6 +690,55 @@ describe('RAG answer runtime contracts', () => { expect(JSON.parse(JSON.stringify(result))).toEqual(result); }); + test('evaluates semantic answer contract facts through the runtime contract engine', () => { + const answer = 'Refunds follow the refund policy.'; + const fact: RagSemanticAnswerContractFact = { + name: 'RefundAnswer', + ragName: 'AnswerDocs', + query: 'refund policy', + answer, + prompt: './answer.md', + requireCitations: true, + minGroundingCoverage: 1, + spans: [{ start: 0, end: answer.length, chunkIds: ['refunds'], required: true }], + }; + const retrieval = { + query: 'refund policy', + chunks: [ + { + id: 'refunds', + text: 'Refunds follow the refund policy.', + score: 1, + source: 'docs/refunds.md', + citation: { uri: 'docs/refunds.md' }, + }, + ], + }; + + const contract = ragAnswerContractFromSemanticFact(fact, retrieval); + const result = evaluateRagSemanticAnswerContract(fact, retrieval); + + expect(contract).toEqual( + expect.objectContaining({ + id: 'RefundAnswer', + ragName: 'AnswerDocs', + prompt: './answer.md', + requireCitations: true, + minGroundingCoverage: 1, + }), + ); + expect(contract.groundingSpans).toEqual([{ start: 0, end: answer.length, chunkIds: ['refunds'], required: true }]); + expect(result).toEqual( + expect.objectContaining({ + id: 'RefundAnswer', + passed: true, + status: 'grounded', + citedChunkIds: ['refunds'], + sources: ['docs/refunds.md'], + }), + ); + }); + test('reports partial and ungrounded answer contract failures', () => { const retrieval = { query: 'refund policy', diff --git a/packages/core/tests/rag-semantics.test.ts b/packages/core/tests/rag-semantics.test.ts index 41e81290..b7b9c184 100644 --- a/packages/core/tests/rag-semantics.test.ts +++ b/packages/core/tests/rag-semantics.test.ts @@ -24,12 +24,28 @@ describe('RAG language semantics', () => { 'ragEval', 'ragCase', 'ragAssert', + 'ragAnswerContract', + 'answerSpan', ]) { expect(isCoreNode(type)).toBe(true); expect(generateCoreNode({ type, props: {} })).toEqual([]); } }); + test('parses RAG answer contract nodes without unknown-node diagnostics', () => { + const diagnostics = parseDocumentWithDiagnostics( + [ + 'corpus name=Docs', + 'retriever name=DocsSearch corpus=Docs', + 'rag name=AnswerDocs retriever=DocsSearch', + ' ragAnswerContract name=RefundAnswer query="q" answer="a"', + ' answerSpan start=0 end=1 chunks=refunds', + ].join('\n'), + ).diagnostics; + + expect(diagnostics.filter((diagnostic) => diagnostic.code === 'UNKNOWN_NODE_TYPE')).toEqual([]); + }); + test('accepts a minimal grounded RAG declaration graph', () => { const source = [ 'corpus name=Docs title="Support docs"', @@ -219,6 +235,73 @@ describe('RAG language semantics', () => { ]); }); + test('collects RAG answer contracts as semantic facts', () => { + const facts = collectRagSemanticFacts( + parseRoot( + [ + 'corpus name=Docs', + 'retriever name=DocsSearch corpus=Docs', + 'rag name=AnswerDocs retriever=DocsSearch citations=true', + ' grounding requireCitations=true', + ' ragAnswerContract name=RefundAnswer query="How do refunds work?" answer="Refunds follow the refund policy." prompt="./answer.md" requireCitations=true minGroundingCoverage=0.8', + ' answerSpan start=0 end=33 chunks="refunds,policy" required=true', + ].join('\n'), + ), + ); + + expect(facts.pipelines[0]?.answerContracts).toEqual([ + expect.objectContaining({ + name: 'RefundAnswer', + ragName: 'AnswerDocs', + query: 'How do refunds work?', + answer: 'Refunds follow the refund policy.', + prompt: './answer.md', + requireCitations: true, + minGroundingCoverage: 0.8, + spans: [ + expect.objectContaining({ + start: 0, + end: 33, + chunkIds: ['refunds', 'policy'], + required: true, + }), + ], + }), + ]); + }); + + test('reports invalid RAG answer contract declarations', () => { + const rules = rulesFor( + [ + 'corpus name=Docs', + 'retriever name=DocsSearch corpus=Docs', + 'rag name=AnswerDocs retriever=DocsSearch', + ' ragAnswerContract name=Bad query="" answer="" requireCitations=true minGroundingCoverage=1.5', + ' answerSpan start=4 end=4 chunks=""', + ' ragAnswerContract name=LowCoverage query="q" answer="abcd" minGroundingCoverage=1', + ' answerSpan start=0 end=2 chunks=half', + ' ragAnswerContract name=LongSpan query="q" answer="abcd"', + ' answerSpan start=0 end=10 chunks=tooLong', + 'ragAnswerContract name=Detached rag=Missing query="q" answer="a"', + 'answerSpan start=0 end=1 chunks=orphan', + ].join('\n'), + ); + + expect(rules).toEqual( + expect.arrayContaining([ + 'rag-answer-contract-query-required', + 'rag-answer-contract-answer-required', + 'rag-answer-contract-min-grounding-coverage-invalid', + 'rag-answer-contract-citations-require-grounding', + 'rag-answer-span-range-invalid', + 'rag-answer-span-chunks-required', + 'rag-answer-contract-grounding-coverage-insufficient', + 'rag-answer-contract-unknown-rag', + 'rag-answer-span-missing-contract', + ]), + ); + }); + test('keeps RAG eval case facts scoped to their parent eval node', () => { const facts = collectRagSemanticFacts( parseRoot( diff --git a/packages/core/tests/schema-validation.test.ts b/packages/core/tests/schema-validation.test.ts index 46242a26..b259a609 100644 --- a/packages/core/tests/schema-validation.test.ts +++ b/packages/core/tests/schema-validation.test.ts @@ -108,6 +108,8 @@ describe('Schema Validation', () => { ' ragEval name=Faithfulness metric=faithfulness threshold=0.85 mode=contract', ' ragCase name=refunds query="How do refunds work?"', ' ragAssert kind=scoreGte threshold=0.72', + ' ragAnswerContract name=RefundAnswer query="How do refunds work?" answer="Refunds follow policy." minGroundingCoverage=0.8', + ' answerSpan start=0 end=22 chunks=refunds required=true', ].join('\n'), ); expect(valid).toHaveLength(0); @@ -119,6 +121,8 @@ describe('Schema Validation', () => { 'embed name=NoCorpus', 'retriever name=NoCorpus', 'rag name=NoRetriever', + 'ragAnswerContract query="q"', + 'answerSpan start=0 end=1', ].join('\n'), ); expect(missing.some((violation) => violation.message.includes("'corpus' requires prop 'name'"))).toBe(true); @@ -126,6 +130,13 @@ describe('Schema Validation', () => { expect(missing.some((violation) => violation.message.includes("'embed' requires prop 'corpus'"))).toBe(true); expect(missing.some((violation) => violation.message.includes("'retriever' requires prop 'corpus'"))).toBe(true); expect(missing.some((violation) => violation.message.includes("'rag' requires prop 'retriever'"))).toBe(true); + expect(missing.some((violation) => violation.message.includes("'ragAnswerContract' requires prop 'name'"))).toBe( + true, + ); + expect( + missing.some((violation) => violation.message.includes("'ragAnswerContract' requires prop 'answer'")), + ).toBe(true); + expect(missing.some((violation) => violation.message.includes("'answerSpan' requires prop 'chunks'"))).toBe(true); const misplaced = validate( ['retriever name=DocsSearch corpus=Docs', ' grounding requireCitations=true'].join('\n'), diff --git a/packages/core/tests/semantic-substrate.test.ts b/packages/core/tests/semantic-substrate.test.ts index 82308957..4850943c 100644 --- a/packages/core/tests/semantic-substrate.test.ts +++ b/packages/core/tests/semantic-substrate.test.ts @@ -250,6 +250,8 @@ describe('KERN semantic substrate', () => { ' grounding requireCitations=true policy=strict maxContext=6000', ' ragEval name=Faithfulness metric=faithfulness threshold=0.85 mode=contract', ' ragCase name=refunds query="refund policy"', + ' ragAnswerContract name=RefundAnswer query="refund policy" answer="Refunds are policy-backed." requireCitations=true minGroundingCoverage=0.8', + ' answerSpan start=0 end=26 chunks=refunds required=true', 'mcp name=Support', ' resource name=DocsResource uri="docs://manuals"', ' tool name=answerQuestion', @@ -292,6 +294,13 @@ describe('KERN semantic substrate', () => { citations: true, groundings: [expect.objectContaining({ requireCitations: true, policy: 'strict' })], evals: [expect.objectContaining({ metric: 'faithfulness', threshold: 0.85 })], + answerContracts: [ + expect.objectContaining({ + name: 'RefundAnswer', + query: 'refund policy', + spans: [expect.objectContaining({ chunkIds: ['refunds'] })], + }), + ], }), ]); expect(substrate.ragFacts?.mcpRetrievals).toEqual([ @@ -331,6 +340,7 @@ describe('KERN semantic substrate', () => { groundingCount: 1, evalCount: 1, evalCaseCount: 1, + answerContractCount: 1, mcpRetrievalCount: 1, compatibleMcpRetrievalCount: 1, provenanceRequired: true, From 3aa88134e40d02c61bbcef3683adb9f7b9eeba4d Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 01:33:41 +0200 Subject: [PATCH 29/46] test(core): add rag contract conformance fixtures --- packages/core/src/decompiler.ts | 5 + .../rag-answer-contracts/full-grounded.kern | 14 + .../multi-span-grounded.kern | 14 + .../rag-answer-contracts/unknown-chunk.kern | 12 + packages/core/tests/rag-conformance.test.ts | 270 ++++++++++++++++++ 5 files changed, 315 insertions(+) create mode 100644 packages/core/tests/fixtures/rag-answer-contracts/full-grounded.kern create mode 100644 packages/core/tests/fixtures/rag-answer-contracts/multi-span-grounded.kern create mode 100644 packages/core/tests/fixtures/rag-answer-contracts/unknown-chunk.kern create mode 100644 packages/core/tests/rag-conformance.test.ts diff --git a/packages/core/src/decompiler.ts b/packages/core/src/decompiler.ts index 2b9efccf..05f9afe2 100644 --- a/packages/core/src/decompiler.ts +++ b/packages/core/src/decompiler.ts @@ -64,6 +64,11 @@ export function decompile(root: IRNode): DecompileResult { } const props = node.props || {}; + if (node.type === 'document') { + for (const child of node.children || []) render(child, indent); + return; + } + // Canonical-grammar cases — emit re-parseable KERN. Other node types // still fall through to the debug-shape serializer below; make them // canonical in a follow-up PR. diff --git a/packages/core/tests/fixtures/rag-answer-contracts/full-grounded.kern b/packages/core/tests/fixtures/rag-answer-contracts/full-grounded.kern new file mode 100644 index 00000000..a1a07821 --- /dev/null +++ b/packages/core/tests/fixtures/rag-answer-contracts/full-grounded.kern @@ -0,0 +1,14 @@ +corpus name=Docs + source name=manuals kind=local uri="./docs/**/*.md" media=markdown + chunking source=manuals strategy=semantic maxTokens=600 overlap=80 + +retriever name=DocsSearch corpus=Docs mode=hybrid topK=2 minScore=0.5 + +rag name=AnswerDocs retriever=DocsSearch prompt="./answer.md" citations=true + grounding requireCitations=true policy=strict maxContext=6000 + ragEval name=Faithfulness metric=faithfulness threshold=0.85 mode=contract + ragCase name=refunds query="refund policy" topK=2 minScore=0.5 chunkCount=1 sources="docs/refunds.md" + ragAssert kind=sourceGlob value="docs/refunds.md" required=true + ragAssert kind=citesRequired + ragAnswerContract name=RefundAnswer query="refund policy" answer="Refunds follow the refund policy." requireCitations=true minGroundingCoverage=1 + answerSpan start=0 end=33 chunks=refunds required=true diff --git a/packages/core/tests/fixtures/rag-answer-contracts/multi-span-grounded.kern b/packages/core/tests/fixtures/rag-answer-contracts/multi-span-grounded.kern new file mode 100644 index 00000000..7b4587ae --- /dev/null +++ b/packages/core/tests/fixtures/rag-answer-contracts/multi-span-grounded.kern @@ -0,0 +1,14 @@ +corpus name=Docs + source name=manuals kind=local uri="./docs/**/*.md" media=markdown + +retriever name=DocsSearch corpus=Docs topK=2 + +rag name=AnswerDocs retriever=DocsSearch answer="grounded" citations=true + grounding requireCitations=true policy=strict + ragEval name=Faithfulness metric=faithfulness threshold=0.85 mode=contract + ragCase name=refunds query="refund policy" chunkCount=2 sources="docs/refunds.md,docs/policies.md" + ragAssert kind=uniqueSourcesGte count=2 + ragAssert kind=citesRequired + ragAnswerContract name=MultiSpanAnswer query="refund policy" answer="Refunds follow policy. Shipping is separate." requireCitations=true minGroundingCoverage=1 + answerSpan start=0 end=22 chunks=refunds required=true + answerSpan start=23 end=44 chunks=shipping required=true diff --git a/packages/core/tests/fixtures/rag-answer-contracts/unknown-chunk.kern b/packages/core/tests/fixtures/rag-answer-contracts/unknown-chunk.kern new file mode 100644 index 00000000..8c3f9e29 --- /dev/null +++ b/packages/core/tests/fixtures/rag-answer-contracts/unknown-chunk.kern @@ -0,0 +1,12 @@ +corpus name=Docs + source name=manuals kind=local uri="./docs/**/*.md" media=markdown + +retriever name=DocsSearch corpus=Docs topK=1 + +rag name=AnswerDocs retriever=DocsSearch prompt="./answer.md" + grounding policy=strict maxContext=6000 + ragEval name=Faithfulness metric=faithfulness threshold=0.85 mode=contract + ragCase name=refunds query="refund policy" chunkCount=1 + ragAssert kind=contains value="refund" required=true + ragAnswerContract name=MissingChunkAnswer query="refund policy" answer="Refunds follow the refund policy." minGroundingCoverage=1 + answerSpan start=0 end=33 chunks=missing required=true diff --git a/packages/core/tests/rag-conformance.test.ts b/packages/core/tests/rag-conformance.test.ts new file mode 100644 index 00000000..8cf83361 --- /dev/null +++ b/packages/core/tests/rag-conformance.test.ts @@ -0,0 +1,270 @@ +import { readFileSync } from 'node:fs'; +import { dirname, resolve } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { + collectRagSemanticFacts, + decompile, + evaluateRagEvalContract, + evaluateRagSemanticAnswerContract, + generateCoreNode, + parseDocumentWithDiagnostics, + type RagSemanticFacts, + type RetrieveResult, + validateRagSemantics, + validateSchema, + withRagRuntimeProvenance, +} from '../src/index.js'; +import type { IRNode } from '../src/types.js'; + +interface RagConformanceFixture { + readonly file: string; + readonly answerContractName: string; + readonly evalContractName: string; + readonly expectedAnswerPassed: boolean; + readonly expectedAnswerStatus: string; + readonly expectedEvalPassed: boolean; + readonly retrieval: RetrieveResult; +} + +const FIXTURE_DIR = resolve(dirname(fileURLToPath(import.meta.url)), 'fixtures/rag-answer-contracts'); + +const FIXTURES: readonly RagConformanceFixture[] = [ + { + file: 'full-grounded.kern', + answerContractName: 'RefundAnswer', + evalContractName: 'Faithfulness', + expectedAnswerPassed: true, + expectedAnswerStatus: 'grounded', + expectedEvalPassed: true, + retrieval: { + query: 'refund policy', + chunks: [ + { + id: 'refunds', + text: 'Refunds follow the refund policy.', + score: 1, + source: 'docs/refunds.md', + citation: { uri: 'docs/refunds.md', locator: 'L1-L2' }, + }, + ], + }, + }, + { + file: 'multi-span-grounded.kern', + answerContractName: 'MultiSpanAnswer', + evalContractName: 'Faithfulness', + expectedAnswerPassed: true, + expectedAnswerStatus: 'grounded', + expectedEvalPassed: true, + retrieval: { + query: 'refund policy', + chunks: [ + { + id: 'refunds', + text: 'Refunds follow policy.', + score: 0.95, + source: 'docs/refunds.md', + citation: { uri: 'docs/refunds.md' }, + }, + { + id: 'shipping', + text: 'Shipping is separate.', + score: 0.9, + source: 'docs/policies.md', + citation: { uri: 'docs/policies.md' }, + }, + ], + }, + }, + { + file: 'unknown-chunk.kern', + answerContractName: 'MissingChunkAnswer', + evalContractName: 'Faithfulness', + expectedAnswerPassed: false, + expectedAnswerStatus: 'invalid', + expectedEvalPassed: true, + retrieval: { + query: 'refund policy', + chunks: [ + { + id: 'refunds', + text: 'refund policy', + score: 1, + source: 'docs/refunds.md', + citation: { uri: 'docs/refunds.md' }, + }, + ], + }, + }, +]; + +describe('RAG eval and answer contract conformance', () => { + for (const fixture of FIXTURES) { + test(`${fixture.file} agrees across semantic, decompile, codegen, and runtime views`, () => { + const source = readFixture(fixture.file); + const original = parseValidRagSource(source, fixture.file); + const decompiled = decompile(original).code; + expect(decompiled, `${fixture.file} decompiled answer contract`).toContain( + `ragAnswerContract name=${fixture.answerContractName}`, + ); + expect(decompiled, `${fixture.file} decompiled answer span`).toContain('answerSpan '); + expect(decompiled, `${fixture.file} decompiled eval contract`).toContain( + `ragEval name=${fixture.evalContractName}`, + ); + const reparsed = parseValidRagSource(decompiled, `${fixture.file}:decompiled`); + const originalFacts = collectRagSemanticFacts(original); + const decompiledFacts = collectRagSemanticFacts(reparsed); + + expect(normalizedRagFacts(decompiledFacts)).toEqual(normalizedRagFacts(originalFacts)); + expect(generateCoreNode(original)).toEqual([]); + expect(generateCoreNode(reparsed)).toEqual([]); + + const originalResult = evaluateFixtureAnswerContract(originalFacts, fixture); + const decompiledResult = evaluateFixtureAnswerContract(decompiledFacts, fixture); + expect(normalizedAnswerResult(decompiledResult)).toEqual(normalizedAnswerResult(originalResult)); + expect(originalResult.passed).toBe(fixture.expectedAnswerPassed); + expect(originalResult.status).toBe(fixture.expectedAnswerStatus); + + const originalEval = evaluateFixtureEval(originalFacts, fixture); + const decompiledEval = evaluateFixtureEval(decompiledFacts, fixture); + expect(normalizedEvalResult(decompiledEval)).toEqual(normalizedEvalResult(originalEval)); + expect(originalEval.passed).toBe(fixture.expectedEvalPassed); + }); + } +}); + +function readFixture(file: string): string { + const path = resolve(FIXTURE_DIR, file); + try { + return readFileSync(path, 'utf-8'); + } catch (error) { + throw new Error(`failed to read RAG conformance fixture ${path}`, { cause: error }); + } +} + +function parseValidRagSource(source: string, label: string): IRNode { + const parsed = parseDocumentWithDiagnostics(source); + const parseErrors = parsed.diagnostics.filter((diagnostic) => diagnostic.severity === 'error'); + expect(parseErrors, `${label} parse errors`).toEqual([]); + expect(parsed.diagnostics.filter((diagnostic) => diagnostic.code === 'UNKNOWN_NODE_TYPE')).toEqual([]); + expect(validateSchema(parsed.root), `${label} schema violations`).toEqual([]); + expect(validateRagSemantics(parsed.root), `${label} RAG semantic violations`).toEqual([]); + return parsed.root; +} + +function evaluateFixtureAnswerContract(facts: RagSemanticFacts, fixture: RagConformanceFixture) { + const answerContract = facts.pipelines + .flatMap((pipeline) => pipeline.answerContracts) + .find((contract) => contract.name === fixture.answerContractName); + if (!answerContract) throw new Error(`missing answer contract ${fixture.answerContractName}`); + const retrieval = withRagRuntimeProvenance(fixture.retrieval, { + retrieverName: 'DocsSearch', + targetKind: 'rag', + targetName: answerContract.ragName, + citationsRequired: answerContract.requireCitations, + startedAtMs: 100, + durationMs: 5, + }); + return evaluateRagSemanticAnswerContract(answerContract, retrieval); +} + +function evaluateFixtureEval(facts: RagSemanticFacts, fixture: RagConformanceFixture) { + const evaluation = facts.pipelines + .flatMap((pipeline) => pipeline.evals) + .find((contract) => contract.name === fixture.evalContractName); + if (!evaluation) throw new Error(`missing eval ${fixture.evalContractName} in ${fixture.file}`); + return evaluateRagEvalContract(evaluation, () => fixture.retrieval, { now: fixedNow() }); +} + +function fixedNow(): () => number { + let now = 1000; + return () => { + now += 5; + return now; + }; +} + +function normalizedRagFacts(facts: RagSemanticFacts) { + return { + corpora: facts.corpora.map((corpus) => ({ + name: corpus.name, + sources: corpus.sources.map((source) => source.name), + chunking: corpus.chunking.length, + })), + retrievers: facts.retrievers.map((retriever) => ({ + name: retriever.name, + corpusName: retriever.corpusName, + topK: retriever.topK, + minScore: retriever.minScore, + })), + pipelines: facts.pipelines.map((pipeline) => ({ + name: pipeline.name, + retrieverName: pipeline.retrieverName, + citations: pipeline.citations, + groundingCount: pipeline.groundings.length, + evals: pipeline.evals.map((evaluation) => ({ + name: evaluation.name, + caseCount: evaluation.caseCount, + assertCount: evaluation.assertCount, + })), + answerContracts: pipeline.answerContracts.map((contract) => ({ + name: contract.name, + ragName: contract.ragName, + query: contract.query, + answer: contract.answer, + requireCitations: contract.requireCitations, + minGroundingCoverage: contract.minGroundingCoverage, + spans: contract.spans.map((span) => ({ + start: span.start, + end: span.end, + chunkIds: [...span.chunkIds], + required: span.required, + })), + })), + })), + }; +} + +function normalizedAnswerResult(result: ReturnType) { + return { + id: result.id, + ragName: result.ragName, + query: result.query, + passed: result.passed, + status: result.status, + groundingCoverage: result.groundingCoverage, + groundedChars: result.groundedChars, + answerChars: result.answerChars, + citedChunkIds: [...result.citedChunkIds], + sources: [...result.sources], + diagnostics: result.diagnostics.map((diagnostic) => ({ + code: diagnostic.code, + spanIndex: diagnostic.spanIndex, + chunkId: diagnostic.chunkId, + })), + }; +} + +function normalizedEvalResult(result: ReturnType) { + return { + passed: result.passed, + ragName: result.ragName, + evalName: result.evalName, + caseCount: result.caseCount, + passedCaseCount: result.passedCaseCount, + assertionCount: result.assertionCount, + passedAssertionCount: result.passedAssertionCount, + cases: result.cases.map((evaluationCase) => ({ + name: evaluationCase.name, + query: evaluationCase.query, + passed: evaluationCase.passed, + retrieveOptions: evaluationCase.retrieveOptions, + chunks: evaluationCase.chunks.map((chunk) => ({ id: chunk.id, source: chunk.source })), + assertions: evaluationCase.assertions.map((assertion) => ({ + kind: assertion.kind, + passed: assertion.passed, + code: assertion.code, + })), + })), + }; +} From 7d287b7ea2b409708cd9cedfedfc83c1d43fc29b Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 02:15:30 +0200 Subject: [PATCH 30/46] feat(core): add declared shape validators --- .../core/src/core-runtime/shape-validator.ts | 633 ++++++++++++++++++ packages/core/src/index.ts | 10 + packages/core/src/semantic-substrate.ts | 4 + .../core/tests/core-shape-runtime.test.ts | 291 ++++++++ .../core/tests/semantic-substrate.test.ts | 40 ++ 5 files changed, 978 insertions(+) create mode 100644 packages/core/src/core-runtime/shape-validator.ts create mode 100644 packages/core/tests/core-shape-runtime.test.ts diff --git a/packages/core/src/core-runtime/shape-validator.ts b/packages/core/src/core-runtime/shape-validator.ts new file mode 100644 index 00000000..670df190 --- /dev/null +++ b/packages/core/src/core-runtime/shape-validator.ts @@ -0,0 +1,633 @@ +import type { IRNode } from '../types.js'; +import type { KernInstanceValue, KernValue } from './index.js'; + +export type CoreShapeDiagnosticCode = + | 'shape-extends-cycle' + | 'shape-extends-unknown' + | 'shape-field-conflict' + | 'shape-field-duplicate' + | 'shape-field-missing' + | 'shape-field-type' + | 'shape-generic-unsupported' + | 'shape-indexer-key-unsupported' + | 'shape-interface-not-found' + | 'shape-object-expected' + | 'shape-type-reference-unknown' + | 'shape-type-unsupported' + | 'shape-unexpected-field' + | 'shape-value-cycle'; + +export interface CoreShapeDiagnostic { + readonly code: CoreShapeDiagnosticCode; + readonly message: string; + readonly interfaceName?: string; + readonly fieldName?: string; + readonly path?: string; + readonly expected?: string; + readonly actual?: string; +} + +export interface CoreShapeFieldFact { + readonly name: string; + readonly type?: string; + readonly optional: boolean; + readonly inheritedFrom?: string; +} + +export interface CoreShapeIndexerFact { + readonly keyName: string; + readonly keyType: string; + readonly type: string; + readonly readonly: boolean; +} + +export interface CoreShapeInterfaceFact { + readonly name: string; + readonly extends: readonly string[]; + readonly fields: readonly CoreShapeFieldFact[]; + readonly indexers: readonly CoreShapeIndexerFact[]; + readonly generic: boolean; + readonly validatorAvailable: boolean; + readonly unsupportedReasons: readonly string[]; +} + +export interface CoreShapeFacts { + readonly interfaces: readonly CoreShapeInterfaceFact[]; + readonly extendsEdges: readonly { + readonly from: string; + readonly to: string; + readonly resolved: boolean; + }[]; + readonly validationDiagnostics: readonly CoreShapeDiagnostic[]; +} + +export interface CoreShapeValidationResult { + readonly passed: boolean; + readonly interfaceName: string; + readonly diagnostics: readonly CoreShapeDiagnostic[]; +} + +interface ShapeInterface { + readonly name: string; + readonly extendsNames: readonly string[]; + readonly fields: readonly ShapeField[]; + readonly indexers: readonly ShapeIndexer[]; + readonly generic: boolean; +} + +interface ShapeField { + readonly name: string; + readonly type?: string; + readonly optional: boolean; + readonly inheritedFrom?: string; +} + +interface ShapeIndexer { + readonly keyName: string; + readonly keyType: string; + readonly type: string; + readonly readonly: boolean; +} + +interface ShapeRegistry { + readonly interfaces: ReadonlyMap; + readonly diagnostics: readonly CoreShapeDiagnostic[]; +} + +interface ResolvedShape { + readonly fields: readonly ShapeField[]; + readonly indexers: readonly ShapeIndexer[]; + readonly diagnostics: readonly CoreShapeDiagnostic[]; +} + +/** + * Validate a runtime record or class instance against a declared interface shape. + * V1 supports primitives, arrays, nested interfaces, extends, and indexers. + * Class instances are checked against initialized fields only; getters and + * methods are not invoked during validation. + */ +export function validateCoreShape( + value: KernValue, + interfaceName: string, + rootOrNodes: IRNode | readonly IRNode[], +): CoreShapeValidationResult { + const registry = collectShapeRegistry(rootOrNodes); + const diagnostics: CoreShapeDiagnostic[] = []; + const shape = registry.interfaces.get(interfaceName); + if (!shape) { + diagnostics.push({ + code: 'shape-interface-not-found', + message: `KERN core shape '${interfaceName}' is not declared.`, + interfaceName, + }); + return { passed: false, interfaceName, diagnostics }; + } + diagnostics.push(...validateAgainstInterface(value, shape, registry, interfaceName, [], new WeakMap())); + return { passed: diagnostics.length === 0, interfaceName, diagnostics }; +} + +export function assertCoreShape( + value: KernValue, + interfaceName: string, + rootOrNodes: IRNode | readonly IRNode[], +): void { + const result = validateCoreShape(value, interfaceName, rootOrNodes); + if (result.passed) return; + throw new Error( + `KERN core shape validation failed for ${interfaceName}:\n${result.diagnostics + .map((diagnostic) => diagnostic.message) + .join('\n')}`, + ); +} + +/** + * Collect review/substrate facts for declared interface shapes without + * changing runtime behavior. The facts include effective inherited fields and + * indexers plus diagnostics for unsupported v1 contracts. + */ +export function collectCoreShapeFacts(rootOrNodes: IRNode | readonly IRNode[]): CoreShapeFacts { + const registry = collectShapeRegistry(rootOrNodes); + const resolvedByName = new Map(); + const resolvedShape = (shape: ShapeInterface): ResolvedShape => { + const cached = resolvedByName.get(shape.name); + if (cached) return cached; + const resolved = resolveShape(shape, registry, []); + resolvedByName.set(shape.name, resolved); + return resolved; + }; + const interfaces = Array.from(registry.interfaces.values()).map((shape) => { + const resolved = resolvedShape(shape); + const unsupportedReasons = shapeUnsupportedReasons(shape, resolved, registry); + return { + name: shape.name, + extends: [...shape.extendsNames], + fields: resolved.fields.map((field) => ({ ...field })), + indexers: resolved.indexers.map((indexer) => ({ ...indexer })), + generic: shape.generic, + validatorAvailable: unsupportedReasons.length === 0, + unsupportedReasons, + }; + }); + return { + interfaces, + extendsEdges: Array.from(registry.interfaces.values()).flatMap((shape) => + shape.extendsNames.map((base) => ({ + from: shape.name, + to: base, + resolved: registry.interfaces.has(base) && !extendsEdgeParticipatesInCycle(shape.name, base, registry), + })), + ), + validationDiagnostics: dedupeDiagnostics([ + ...registry.diagnostics, + ...Array.from(registry.interfaces.values()).flatMap((shape) => resolvedShape(shape).diagnostics), + ]), + }; +} + +function collectShapeRegistry(rootOrNodes: IRNode | readonly IRNode[]): ShapeRegistry { + const diagnostics: CoreShapeDiagnostic[] = []; + const interfaces = new Map(); + for (const node of interfaceNodes(rootOrNodes)) { + if (node.type !== 'interface') continue; + const name = stringProp(node.props?.name); + if (!name) continue; + const shape: ShapeInterface = { + name, + extendsNames: splitExtends(node.props?.extends), + fields: (node.children ?? []).filter((child) => child.type === 'field').map((field) => shapeField(field)), + indexers: (node.children ?? []) + .filter((child) => child.type === 'indexer') + .map((indexer) => shapeIndexer(indexer)), + generic: !!stringProp(node.props?.generics), + }; + if (shape.generic) { + diagnostics.push({ + code: 'shape-generic-unsupported', + message: `KERN core shape '${shape.name}' uses generics, which are not executable shape contracts in v1.`, + interfaceName: shape.name, + }); + } + interfaces.set(name, shape); + } + return { interfaces, diagnostics }; +} + +function validateAgainstInterface( + value: KernValue, + shape: ShapeInterface, + registry: ShapeRegistry, + path: string, + stack: readonly string[], + visited: WeakMap, Set>, +): CoreShapeDiagnostic[] { + const diagnostics: CoreShapeDiagnostic[] = []; + if (shape.generic) { + diagnostics.push({ + code: 'shape-generic-unsupported', + message: `KERN core shape '${shape.name}' uses generics, which are not executable shape contracts in v1.`, + interfaceName: shape.name, + path, + }); + } + const object = recordEntries(value); + if (!object) { + return [ + { + code: 'shape-object-expected', + message: `KERN core shape '${shape.name}' expected a record or instance at ${path}.`, + interfaceName: shape.name, + path, + expected: shape.name, + actual: value.kind, + }, + ]; + } + const activeForValue = visited.get(object) ?? new Set(); + if (activeForValue.has(shape.name)) { + return [ + { + code: 'shape-value-cycle', + message: `KERN core shape '${shape.name}' encountered a recursive value at ${path}.`, + interfaceName: shape.name, + path, + }, + ]; + } + activeForValue.add(shape.name); + visited.set(object, activeForValue); + const resolved = resolveShape(shape, registry, stack); + diagnostics.push(...resolved.diagnostics); + + try { + const declaredFieldNames = new Set(resolved.fields.map((field) => field.name)); + for (const field of resolved.fields) { + if (!Object.hasOwn(object, field.name)) { + if (!field.optional) { + diagnostics.push({ + code: 'shape-field-missing', + message: `KERN core shape '${shape.name}' missing required field ${fieldPath(path, field.name)}.`, + interfaceName: shape.name, + fieldName: field.name, + path: fieldPath(path, field.name), + expected: field.type, + }); + } + continue; + } + diagnostics.push( + ...validateType( + object[field.name] ?? kUndefinedValue(), + field.type, + registry, + fieldPath(path, field.name), + stack, + visited, + ), + ); + } + + for (const [key, entry] of Object.entries(object)) { + if (declaredFieldNames.has(key)) continue; + const matchingIndexers = resolved.indexers.filter((candidate) => keyMatchesIndexer(key, candidate)); + if (matchingIndexers.length === 0) { + diagnostics.push({ + code: 'shape-unexpected-field', + message: `KERN core shape '${shape.name}' does not declare field ${fieldPath(path, key)}.`, + interfaceName: shape.name, + fieldName: key, + path: fieldPath(path, key), + }); + continue; + } + for (const indexer of matchingIndexers) { + diagnostics.push(...validateType(entry, indexer.type, registry, fieldPath(path, key), stack, visited)); + } + } + } finally { + activeForValue.delete(shape.name); + if (activeForValue.size === 0) visited.delete(object); + } + + return diagnostics; +} + +function validateType( + value: KernValue, + rawType: string | undefined, + registry: ShapeRegistry, + path: string, + stack: readonly string[], + visited: WeakMap, Set>, +): CoreShapeDiagnostic[] { + const type = normalizeType(rawType); + if (!type || type === 'any' || type === 'unknown') return []; + if (type.endsWith('[]')) return validateArrayType(value, type.slice(0, -2), registry, path, stack, visited); + const arrayMatch = /^Array<(.+)>$/.exec(type); + if (arrayMatch) return validateArrayType(value, arrayMatch[1] ?? '', registry, path, stack, visited); + if (isPrimitiveType(type)) { + if (value.kind === type) return []; + return [ + { + code: 'shape-field-type', + message: `KERN core shape expected ${path} to be ${type}, got ${value.kind}.`, + path, + expected: type, + actual: value.kind, + }, + ]; + } + if (isSimpleIdentifier(type)) { + const nested = registry.interfaces.get(type); + if (!nested) { + return [ + { + code: 'shape-type-reference-unknown', + message: `KERN core shape field ${path} references unknown interface '${type}'.`, + path, + expected: type, + }, + ]; + } + return validateAgainstInterface(value, nested, registry, path, stack, visited); + } + return [ + { + code: 'shape-type-unsupported', + message: `KERN core shape field ${path} uses unsupported v1 type '${type}'.`, + path, + expected: type, + actual: value.kind, + }, + ]; +} + +function validateArrayType( + value: KernValue, + itemType: string, + registry: ShapeRegistry, + path: string, + stack: readonly string[], + visited: WeakMap, Set>, +): CoreShapeDiagnostic[] { + if (value.kind !== 'array') { + return [ + { + code: 'shape-field-type', + message: `KERN core shape expected ${path} to be array, got ${value.kind}.`, + path, + expected: `${itemType}[]`, + actual: value.kind, + }, + ]; + } + return value.items.flatMap((item, index) => + validateType(item, itemType, registry, `${path}[${index}]`, stack, visited), + ); +} + +function resolveShape(shape: ShapeInterface, registry: ShapeRegistry, stack: readonly string[]): ResolvedShape { + const diagnostics: CoreShapeDiagnostic[] = []; + if (stack.includes(shape.name)) { + return { + fields: [], + indexers: [], + diagnostics: [ + { + code: 'shape-extends-cycle', + message: `KERN core shape inheritance cycle: ${[...stack, shape.name].join(' -> ')}.`, + interfaceName: shape.name, + }, + ], + }; + } + const fields = new Map(); + const indexers: ShapeIndexer[] = []; + for (const baseName of shape.extendsNames) { + const base = registry.interfaces.get(baseName); + if (!base) { + diagnostics.push({ + code: 'shape-extends-unknown', + message: `KERN core shape '${shape.name}' extends unknown interface '${baseName}'.`, + interfaceName: shape.name, + expected: baseName, + }); + continue; + } + const resolved = resolveShape(base, registry, [...stack, shape.name]); + diagnostics.push(...resolved.diagnostics); + for (const field of resolved.fields) { + const inheritedField = { ...field, inheritedFrom: field.inheritedFrom ?? base.name }; + const existing = fields.get(field.name); + if (existing && !sameShapeField(existing, inheritedField)) { + diagnostics.push({ + code: 'shape-field-conflict', + message: `KERN core shape '${shape.name}' has conflicting inherited field '${field.name}'.`, + interfaceName: shape.name, + fieldName: field.name, + expected: existing.type, + actual: field.type, + }); + continue; + } + fields.set(field.name, inheritedField); + } + indexers.push(...resolved.indexers); + } + const ownFieldNames = new Set(); + for (const field of shape.fields) { + if (ownFieldNames.has(field.name)) { + diagnostics.push({ + code: 'shape-field-duplicate', + message: `KERN core shape '${shape.name}' declares duplicate field '${field.name}'.`, + interfaceName: shape.name, + fieldName: field.name, + expected: field.type, + actual: field.type, + }); + continue; + } + ownFieldNames.add(field.name); + const existing = fields.get(field.name); + if (existing && (existing.type !== field.type || existing.optional !== field.optional)) { + diagnostics.push({ + code: 'shape-field-conflict', + message: `KERN core shape '${shape.name}' conflicts with inherited field '${field.name}'.`, + interfaceName: shape.name, + fieldName: field.name, + expected: existing.type, + actual: field.type, + }); + } + fields.set(field.name, field); + } + indexers.push(...shape.indexers); + diagnostics.push(...indexers.flatMap((indexer) => validateIndexerShape(shape.name, indexer))); + return { fields: Array.from(fields.values()), indexers, diagnostics }; +} + +function validateIndexerShape(interfaceName: string, indexer: ShapeIndexer): CoreShapeDiagnostic[] { + if (indexer.keyType === 'string' || indexer.keyType === 'number') return []; + return [ + { + code: 'shape-indexer-key-unsupported', + message: `KERN core shape '${interfaceName}' indexer key type '${indexer.keyType}' is not supported in v1.`, + interfaceName, + expected: 'string|number', + actual: indexer.keyType, + }, + ]; +} + +function shapeUnsupportedReasons( + shape: ShapeInterface, + resolved: ResolvedShape, + registry: ShapeRegistry, +): readonly string[] { + const reasons = new Set(); + if (shape.generic) reasons.add('generic-interface'); + for (const diagnostic of resolved.diagnostics) reasons.add(diagnostic.code); + for (const field of resolved.fields) { + for (const issue of unsupportedTypeReasons(field.type, registry)) reasons.add(issue); + } + for (const indexer of resolved.indexers) { + if (indexer.keyType !== 'string' && indexer.keyType !== 'number') reasons.add('shape-indexer-key-unsupported'); + for (const issue of unsupportedTypeReasons(indexer.type, registry)) reasons.add(issue); + } + return [...reasons].sort(); +} + +function unsupportedTypeReasons(rawType: string | undefined, registry: ShapeRegistry): string[] { + const type = normalizeType(rawType); + if (!type || type === 'any' || type === 'unknown' || isPrimitiveType(type)) return []; + if (type.endsWith('[]')) return unsupportedTypeReasons(type.slice(0, -2), registry); + const arrayMatch = /^Array<(.+)>$/.exec(type); + if (arrayMatch) return unsupportedTypeReasons(arrayMatch[1], registry); + if (isSimpleIdentifier(type)) return registry.interfaces.has(type) ? [] : [`unknown-type:${type}`]; + return [`unsupported-type:${type}`]; +} + +function interfaceNodes(rootOrNodes: IRNode | readonly IRNode[]): readonly IRNode[] { + const found: IRNode[] = []; + for (const node of topLevelNodes(rootOrNodes)) visitInterfaceNodes(node, found); + return found; +} + +function topLevelNodes(rootOrNodes: IRNode | readonly IRNode[]): readonly IRNode[] { + return isIRNodeArray(rootOrNodes) ? rootOrNodes : [rootOrNodes]; +} + +function visitInterfaceNodes(node: IRNode, found: IRNode[]): void { + if (node.type === 'interface') found.push(node); + for (const child of node.children ?? []) visitInterfaceNodes(child, found); +} + +function isIRNodeArray(value: IRNode | readonly IRNode[]): value is readonly IRNode[] { + return Array.isArray(value); +} + +function shapeField(node: IRNode): ShapeField { + return { + name: stringProp(node.props?.name) ?? '', + type: stringProp(node.props?.type), + optional: trueFlag(node.props?.optional), + }; +} + +function shapeIndexer(node: IRNode): ShapeIndexer { + return { + keyName: stringProp(node.props?.keyName) ?? 'key', + keyType: normalizeType(stringProp(node.props?.keyType)) ?? '', + type: normalizeType(stringProp(node.props?.type)) ?? '', + readonly: trueFlag(node.props?.readonly), + }; +} + +function recordEntries(value: KernValue): Record | undefined { + if (value.kind === 'record') return value.entries; + if (value.kind === 'instance') return instanceEntries(value); + return undefined; +} + +function instanceEntries(value: KernInstanceValue): Record { + return value.fields; +} + +function sameShapeField(left: ShapeField, right: ShapeField): boolean { + return left.type === right.type && left.optional === right.optional; +} + +function keyMatchesIndexer(key: string, indexer: ShapeIndexer): boolean { + if (indexer.keyType === 'string') return true; + return indexer.keyType === 'number' && /^-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?$/.test(key); +} + +function fieldPath(path: string, field: string): string { + return `${path}.${field}`; +} + +function isPrimitiveType(type: string): type is KernValue['kind'] { + return type === 'string' || type === 'number' || type === 'boolean' || type === 'null' || type === 'undefined'; +} + +function isSimpleIdentifier(type: string): boolean { + return /^[A-Za-z_$][\w$]*$/.test(type); +} + +function normalizeType(value: string | undefined): string | undefined { + const trimmed = value?.trim(); + return trimmed ? trimmed : undefined; +} + +function splitExtends(value: unknown): string[] { + const raw = stringProp(value); + if (!raw) return []; + return raw + .split(',') + .map((part) => part.trim()) + .filter(Boolean); +} + +function extendsEdgeParticipatesInCycle(from: string, to: string, registry: ShapeRegistry): boolean { + return reachesInterface(to, from, registry, new Set()); +} + +function reachesInterface(current: string, target: string, registry: ShapeRegistry, seen: Set): boolean { + if (current === target) return true; + if (seen.has(current)) return false; + seen.add(current); + const shape = registry.interfaces.get(current); + if (!shape) return false; + return shape.extendsNames.some((base) => reachesInterface(base, target, registry, seen)); +} + +function dedupeDiagnostics(diagnostics: readonly CoreShapeDiagnostic[]): CoreShapeDiagnostic[] { + const seen = new Set(); + const unique: CoreShapeDiagnostic[] = []; + for (const diagnostic of diagnostics) { + const key = [ + diagnostic.code, + diagnostic.interfaceName, + diagnostic.fieldName, + diagnostic.path, + diagnostic.expected, + diagnostic.actual, + diagnostic.message, + ].join('\0'); + if (seen.has(key)) continue; + seen.add(key); + unique.push(diagnostic); + } + return unique; +} + +function stringProp(value: unknown): string | undefined { + return typeof value === 'string' ? value : undefined; +} + +function trueFlag(value: unknown): boolean { + return value === true || value === 'true'; +} + +function kUndefinedValue(): KernValue { + return { kind: 'undefined' }; +} diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 177e4d7d..c200561e 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -205,6 +205,16 @@ export { runCoreRuntime, toHostValue, } from './core-runtime/index.js'; +export type { + CoreShapeDiagnostic, + CoreShapeDiagnosticCode, + CoreShapeFacts, + CoreShapeFieldFact, + CoreShapeIndexerFact, + CoreShapeInterfaceFact, + CoreShapeValidationResult, +} from './core-runtime/shape-validator.js'; +export { assertCoreShape, collectCoreShapeFacts, validateCoreShape } from './core-runtime/shape-validator.js'; export type { CoverageGap } from './coverage-gap.js'; // Coverage gap emitter (v3) export { collectCoverageGaps, readCoverageGaps, writeCoverageGaps } from './coverage-gap.js'; diff --git a/packages/core/src/semantic-substrate.ts b/packages/core/src/semantic-substrate.ts index 44892036..dd0a9e6f 100644 --- a/packages/core/src/semantic-substrate.ts +++ b/packages/core/src/semantic-substrate.ts @@ -7,6 +7,7 @@ import { type PortableLogicTarget, } from './codegen/portable-logic-primitives.js'; import { CORE_TYPE_CONTRACTS, type CoreOperationReturns, contractToGraphEdges } from './core-contracts/index.js'; +import { type CoreShapeFacts, collectCoreShapeFacts } from './core-runtime/shape-validator.js'; import type { NodeContract } from './ir/semantics/index.js'; import { snapshotRegistry } from './ir/semantics/index.js'; import { @@ -122,6 +123,7 @@ export interface KernSemanticSubstrate { readonly ragFacts?: RagSemanticFacts; readonly ragValidationSummary?: KernSemanticValidationSummary; readonly ragAnswerReviewFacts?: readonly KernSemanticRagAnswerReviewFact[]; + readonly coreShapeFacts?: CoreShapeFacts; } export interface BuildKernSemanticSubstrateOptions { @@ -131,6 +133,7 @@ export interface BuildKernSemanticSubstrateOptions { readonly includeClassValidationSummary?: boolean; readonly documentRag?: IRNode | readonly IRNode[]; readonly includeRagValidationSummary?: boolean; + readonly documentShapes?: IRNode | readonly IRNode[]; } export function buildKernSemanticSubstrate(options: BuildKernSemanticSubstrateOptions = {}): KernSemanticSubstrate { @@ -188,6 +191,7 @@ export function buildKernSemanticSubstrate(options: BuildKernSemanticSubstrateOp ...(options.documentRag && options.includeRagValidationSummary ? { ragValidationSummary: ragValidationSummary(options.documentRag) } : {}), + ...(options.documentShapes ? { coreShapeFacts: collectCoreShapeFacts(options.documentShapes) } : {}), }; } diff --git a/packages/core/tests/core-shape-runtime.test.ts b/packages/core/tests/core-shape-runtime.test.ts new file mode 100644 index 00000000..1d2db4ac --- /dev/null +++ b/packages/core/tests/core-shape-runtime.test.ts @@ -0,0 +1,291 @@ +import { + assertCoreShape, + collectCoreShapeFacts, + createCoreRuntimeEnv, + evalCoreExpression, + fromHostValue, + type KernValue, + runCoreRuntime, + toHostValue, + validateCoreShape, +} from '../src/index.js'; +import { parse } from '../src/parser.js'; +import type { IRNode } from '../src/types.js'; + +function codes(result: ReturnType): string[] { + return result.diagnostics.map((diagnostic) => diagnostic.code); +} + +function classNodes(root: IRNode): IRNode[] { + return (root.children ?? []).filter((child) => child.type === 'class'); +} + +function cyclicRecord(): KernValue { + const entries = Object.create(null) as Record; + const value = { kind: 'record' as const, entries }; + entries.id = fromHostValue('n1'); + entries.next = value; + return value; +} + +describe('KERN core declared shape validators', () => { + test('validates required, optional, array, nested, and inherited fields', () => { + const root = parse( + [ + 'interface name=Entity', + ' field name=id type=string', + 'interface name=Profile', + ' field name=age type=number optional=true', + 'interface name=User extends=Entity', + ' field name=name type=string', + ' field name=active type=boolean', + ' field name=tags type="string[]"', + ' field name=profile type=Profile optional=true', + ].join('\n'), + ); + + const result = validateCoreShape( + fromHostValue({ id: 'u1', name: 'Ada', active: true, tags: ['admin'], profile: {} }), + 'User', + root, + ); + + expect(result).toEqual({ passed: true, interfaceName: 'User', diagnostics: [] }); + expect(() => + assertCoreShape(fromHostValue({ id: 'u1', name: 'Ada', active: true, tags: [] }), 'User', root), + ).not.toThrow(); + }); + + test('reports missing required and wrong primitive fields with stable paths', () => { + const root = parse( + ['interface name=User', ' field name=id type=string', ' field name=count type=number'].join('\n'), + ); + + const result = validateCoreShape(fromHostValue({ id: 7 }), 'User', root); + + expect(result.passed).toBe(false); + expect(result.diagnostics).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + code: 'shape-field-type', + path: 'User.id', + expected: 'string', + actual: 'number', + }), + expect.objectContaining({ + code: 'shape-field-missing', + path: 'User.count', + expected: 'number', + }), + ]), + ); + }); + + test('keeps runtime records open unless explicit shape validation is requested', () => { + const runtimeResult = runCoreRuntime({ + type: 'handler', + props: { lang: 'kern' }, + children: [ + { type: 'let', props: { name: 'record', value: '{ id: "u1" }' } }, + { type: 'assign', props: { target: 'record.extra', value: '2' } }, + { type: 'return', props: { value: 'record.extra' } }, + ], + }); + expect(toHostValue(runtimeResult.completion.value)).toBe(2); + + const root = parse('interface name=User\n field name=id type=string'); + const explicit = validateCoreShape(fromHostValue({ id: 'u1', extra: 2 }), 'User', root); + expect(codes(explicit)).toContain('shape-unexpected-field'); + }); + + test('allows explicit extra fields only through compatible indexers', () => { + const root = parse( + ['interface name=Scores', ' field name=id type=string', ' indexer keyType=string type=number'].join('\n'), + ); + + expect(validateCoreShape(fromHostValue({ id: 'u1', math: 10 }), 'Scores', root).passed).toBe(true); + + const result = validateCoreShape(fromHostValue({ id: 'u1', math: 'A' }), 'Scores', root); + expect(result.passed).toBe(false); + expect(result.diagnostics).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + code: 'shape-field-type', + path: 'Scores.math', + expected: 'number', + actual: 'string', + }), + ]), + ); + }); + + test('matches number indexers against numeric record keys', () => { + const root = parse('interface name=NumericMap\n indexer keyType=number type=string'); + + expect( + validateCoreShape(fromHostValue({ '-1': 'left', '1.5': 'half', '2e3': 'large' }), 'NumericMap', root).passed, + ).toBe(true); + expect(codes(validateCoreShape(fromHostValue({ label: 'not numeric' }), 'NumericMap', root))).toContain( + 'shape-unexpected-field', + ); + }); + + test('validates numeric keys against both string and number indexers', () => { + const root = parse( + ['interface name=DualMap', ' indexer keyType=string type=unknown', ' indexer keyType=number type=number'].join( + '\n', + ), + ); + + expect(validateCoreShape(fromHostValue({ label: 'free-form', 1: 7 }), 'DualMap', root).passed).toBe(true); + expect(codes(validateCoreShape(fromHostValue({ 1: 'bad' }), 'DualMap', root))).toContain('shape-field-type'); + }); + + test('reports inherited field conflicts and unknown type references', () => { + const root = parse( + [ + 'interface name=Entity', + ' field name=id type=string', + 'interface name=User extends=Entity', + ' field name=id type=number', + ' field name=profile type=MissingProfile', + ].join('\n'), + ); + + const result = validateCoreShape(fromHostValue({ id: 1, profile: {} }), 'User', root); + + expect(result.passed).toBe(false); + expect(codes(result)).toEqual(expect.arrayContaining(['shape-field-conflict', 'shape-type-reference-unknown'])); + }); + + test('reports recursive values instead of recursing through self-referential shape fields', () => { + const root = parse( + ['interface name=Node', ' field name=id type=string', ' field name=next type=Node optional=true'].join('\n'), + ); + + const result = validateCoreShape(cyclicRecord(), 'Node', root); + + expect(result.passed).toBe(false); + expect(result.diagnostics).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + code: 'shape-value-cycle', + path: 'Node.next', + interfaceName: 'Node', + }), + ]), + ); + }); + + test('reports unsupported generic and complex type contracts instead of silently passing', () => { + const root = parse( + [ + 'interface name=Box generics=""', + ' field name=value type=T', + 'interface name=MaybeName', + ' field name=name type="string | null"', + ].join('\n'), + ); + + expect(codes(validateCoreShape(fromHostValue({ value: 'x' }), 'Box', root))).toContain('shape-generic-unsupported'); + const maybeNameCodes = codes(validateCoreShape(fromHostValue({ name: 'Ada' }), 'MaybeName', root)); + expect(maybeNameCodes).toContain('shape-type-unsupported'); + expect(maybeNameCodes).not.toContain('shape-generic-unsupported'); + }); + + test('accepts field-backed class instances through declared shape validation', () => { + const root = parse( + [ + 'interface name=UserLike', + ' field name=id type=string', + 'class name=User', + ' field name=id type=string value="u1"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(classNodes(root), env); + + expect(validateCoreShape(evalCoreExpression('new User()', env), 'UserLike', root).passed).toBe(true); + }); + + test('exports shape facts for review and guard consumers', () => { + const root = parse( + [ + 'interface name=Entity', + ' field name=id type=string', + 'interface name=User extends=Entity', + ' field name=name type=string optional=true', + ' indexer keyType=string type=unknown', + ].join('\n'), + ); + + expect(collectCoreShapeFacts(root)).toEqual( + expect.objectContaining({ + extendsEdges: [{ from: 'User', to: 'Entity', resolved: true }], + validationDiagnostics: [], + interfaces: expect.arrayContaining([ + expect.objectContaining({ + name: 'User', + extends: ['Entity'], + validatorAvailable: true, + fields: expect.arrayContaining([ + expect.objectContaining({ name: 'id', type: 'string', inheritedFrom: 'Entity' }), + expect.objectContaining({ name: 'name', type: 'string', optional: true }), + ]), + }), + ]), + }), + ); + }); + + test('collects nested interface declarations and invalid graph diagnostics into facts', () => { + const root = parse( + [ + 'module name=Domain', + ' interface name=Nested', + ' field name=id type=string', + 'interface name=Left', + ' field name=id type=string', + 'interface name=Right', + ' field name=id type=number', + 'interface name=Joined extends=Left,Right', + 'interface name=Broken extends=Missing', + ' indexer keyType=symbol type=string', + 'interface name=Indexed', + ' indexer keyType=string type=unknown', + 'interface name=IndexedChild extends=Indexed', + 'interface name=Duplicate', + ' field name=id type=string', + ' field name=id type=string', + ].join('\n'), + ); + + const facts = collectCoreShapeFacts(root); + + expect(facts.interfaces.map((shape) => shape.name)).toEqual( + expect.arrayContaining(['Nested', 'Left', 'Right', 'Joined', 'Broken', 'Indexed', 'IndexedChild', 'Duplicate']), + ); + expect(facts.extendsEdges).toEqual( + expect.arrayContaining([ + { from: 'Joined', to: 'Left', resolved: true }, + { from: 'Joined', to: 'Right', resolved: true }, + { from: 'Broken', to: 'Missing', resolved: false }, + { from: 'IndexedChild', to: 'Indexed', resolved: true }, + ]), + ); + expect(facts.validationDiagnostics).toEqual( + expect.arrayContaining([ + expect.objectContaining({ code: 'shape-field-conflict', interfaceName: 'Joined' }), + expect.objectContaining({ code: 'shape-extends-unknown', interfaceName: 'Broken' }), + expect.objectContaining({ code: 'shape-indexer-key-unsupported', interfaceName: 'Broken' }), + expect.objectContaining({ code: 'shape-field-duplicate', interfaceName: 'Duplicate' }), + ]), + ); + expect(facts.interfaces.find((shape) => shape.name === 'Broken')?.unsupportedReasons).toEqual( + expect.arrayContaining(['shape-extends-unknown', 'shape-indexer-key-unsupported']), + ); + expect(facts.interfaces.find((shape) => shape.name === 'IndexedChild')?.indexers).toEqual([ + expect.objectContaining({ keyType: 'string', type: 'unknown' }), + ]); + }); +}); diff --git a/packages/core/tests/semantic-substrate.test.ts b/packages/core/tests/semantic-substrate.test.ts index 4850943c..c35929ff 100644 --- a/packages/core/tests/semantic-substrate.test.ts +++ b/packages/core/tests/semantic-substrate.test.ts @@ -66,6 +66,7 @@ describe('KERN semantic substrate', () => { expect(Object.hasOwn(substrate, 'ragFacts')).toBe(false); expect(Object.hasOwn(substrate, 'ragValidationSummary')).toBe(false); expect(Object.hasOwn(substrate, 'ragAnswerReviewFacts')).toBe(false); + expect(Object.hasOwn(substrate, 'coreShapeFacts')).toBe(false); }); test('exports document class member inheritance and override facts when requested', () => { @@ -365,6 +366,45 @@ describe('KERN semantic substrate', () => { ]); }); + test('exports declared interface shape facts when requested', () => { + const root = parseRoot( + [ + 'interface name=Entity', + ' field name=id type=string', + 'interface name=User extends=Entity', + ' field name=name type=string optional=true', + ' indexer keyType=string type=unknown', + 'interface name=Box generics=""', + ' field name=value type=T', + ].join('\n'), + ); + + const substrate = buildKernSemanticSubstrate({ documentShapes: root }); + + expect(substrate.coreShapeFacts?.extendsEdges).toEqual([{ from: 'User', to: 'Entity', resolved: true }]); + expect(substrate.coreShapeFacts?.interfaces).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + name: 'User', + extends: ['Entity'], + generic: false, + validatorAvailable: true, + fields: expect.arrayContaining([ + expect.objectContaining({ name: 'id', type: 'string', inheritedFrom: 'Entity' }), + expect.objectContaining({ name: 'name', type: 'string', optional: true }), + ]), + indexers: [expect.objectContaining({ keyType: 'string', type: 'unknown' })], + }), + expect.objectContaining({ + name: 'Box', + generic: true, + validatorAvailable: false, + unsupportedReasons: expect.arrayContaining(['generic-interface', 'unknown-type:T']), + }), + ]), + ); + }); + test('exports portable review primitives as stable query objects', () => { const substrate = buildKernSemanticSubstrate(); const clamp = lookupSemanticPrimitive(substrate, 'number.clamp'); From 2a2059a8ad7e5861b50f42ff770cea3f7680da83 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 02:43:37 +0200 Subject: [PATCH 31/46] feat(core): harden declared shape validators --- .../core/src/core-runtime/shape-validator.ts | 103 +++++++++++++++--- .../core/tests/core-shape-runtime.test.ts | 76 ++++++++++++- 2 files changed, 158 insertions(+), 21 deletions(-) diff --git a/packages/core/src/core-runtime/shape-validator.ts b/packages/core/src/core-runtime/shape-validator.ts index 670df190..2d5618e8 100644 --- a/packages/core/src/core-runtime/shape-validator.ts +++ b/packages/core/src/core-runtime/shape-validator.ts @@ -10,6 +10,7 @@ export type CoreShapeDiagnosticCode = | 'shape-field-type' | 'shape-generic-unsupported' | 'shape-indexer-key-unsupported' + | 'shape-interface-duplicate' | 'shape-interface-not-found' | 'shape-object-expected' | 'shape-type-reference-unknown' @@ -114,6 +115,7 @@ export function validateCoreShape( const registry = collectShapeRegistry(rootOrNodes); const diagnostics: CoreShapeDiagnostic[] = []; const shape = registry.interfaces.get(interfaceName); + diagnostics.push(...interfaceDuplicateDiagnostics(registry, interfaceName)); if (!shape) { diagnostics.push({ code: 'shape-interface-not-found', @@ -123,7 +125,8 @@ export function validateCoreShape( return { passed: false, interfaceName, diagnostics }; } diagnostics.push(...validateAgainstInterface(value, shape, registry, interfaceName, [], new WeakMap())); - return { passed: diagnostics.length === 0, interfaceName, diagnostics }; + const uniqueDiagnostics = dedupeDiagnostics(diagnostics); + return { passed: uniqueDiagnostics.length === 0, interfaceName, diagnostics: uniqueDiagnostics }; } export function assertCoreShape( @@ -191,6 +194,13 @@ function collectShapeRegistry(rootOrNodes: IRNode | readonly IRNode[]): ShapeReg if (node.type !== 'interface') continue; const name = stringProp(node.props?.name); if (!name) continue; + if (interfaces.has(name)) { + diagnostics.push({ + code: 'shape-interface-duplicate', + message: `KERN core shape '${name}' is declared more than once; the last declaration is used.`, + interfaceName: name, + }); + } const shape: ShapeInterface = { name, extendsNames: splitExtends(node.props?.extends), @@ -274,16 +284,9 @@ function validateAgainstInterface( } continue; } - diagnostics.push( - ...validateType( - object[field.name] ?? kUndefinedValue(), - field.type, - registry, - fieldPath(path, field.name), - stack, - visited, - ), - ); + const value = object[field.name] ?? kUndefinedValue(); + if (field.optional && value.kind === 'undefined') continue; + diagnostics.push(...validateType(value, field.type, registry, fieldPath(path, field.name), stack, visited)); } for (const [key, entry] of Object.entries(object)) { @@ -348,7 +351,10 @@ function validateType( }, ]; } - return validateAgainstInterface(value, nested, registry, path, stack, visited); + return [ + ...interfaceDuplicateDiagnostics(registry, type), + ...validateAgainstInterface(value, nested, registry, path, stack, visited), + ]; } return [ { @@ -413,6 +419,11 @@ function resolveShape(shape: ShapeInterface, registry: ShapeRegistry, stack: rea }); continue; } + diagnostics.push( + ...registry.diagnostics.filter( + (diagnostic) => diagnostic.code === 'shape-interface-duplicate' && diagnostic.interfaceName === baseName, + ), + ); const resolved = resolveShape(base, registry, [...stack, shape.name]); diagnostics.push(...resolved.diagnostics); for (const field of resolved.fields) { @@ -485,6 +496,7 @@ function shapeUnsupportedReasons( ): readonly string[] { const reasons = new Set(); if (shape.generic) reasons.add('generic-interface'); + for (const diagnostic of interfaceDuplicateDiagnostics(registry, shape.name)) reasons.add(diagnostic.code); for (const diagnostic of resolved.diagnostics) reasons.add(diagnostic.code); for (const field of resolved.fields) { for (const issue of unsupportedTypeReasons(field.type, registry)) reasons.add(issue); @@ -496,13 +508,22 @@ function shapeUnsupportedReasons( return [...reasons].sort(); } +function interfaceDuplicateDiagnostics(registry: ShapeRegistry, interfaceName: string): readonly CoreShapeDiagnostic[] { + return registry.diagnostics.filter( + (diagnostic) => diagnostic.interfaceName === interfaceName && diagnostic.code === 'shape-interface-duplicate', + ); +} + function unsupportedTypeReasons(rawType: string | undefined, registry: ShapeRegistry): string[] { const type = normalizeType(rawType); if (!type || type === 'any' || type === 'unknown' || isPrimitiveType(type)) return []; if (type.endsWith('[]')) return unsupportedTypeReasons(type.slice(0, -2), registry); const arrayMatch = /^Array<(.+)>$/.exec(type); if (arrayMatch) return unsupportedTypeReasons(arrayMatch[1], registry); - if (isSimpleIdentifier(type)) return registry.interfaces.has(type) ? [] : [`unknown-type:${type}`]; + if (isSimpleIdentifier(type)) { + if (!registry.interfaces.has(type)) return [`unknown-type:${type}`]; + return interfaceDuplicateDiagnostics(registry, type).map((diagnostic) => diagnostic.code); + } return [`unsupported-type:${type}`]; } @@ -558,7 +579,7 @@ function sameShapeField(left: ShapeField, right: ShapeField): boolean { function keyMatchesIndexer(key: string, indexer: ShapeIndexer): boolean { if (indexer.keyType === 'string') return true; - return indexer.keyType === 'number' && /^-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?$/.test(key); + return indexer.keyType === 'number' && /^-?(?:0|[1-9]\d*)$/.test(key); } function fieldPath(path: string, field: string): string { @@ -581,10 +602,56 @@ function normalizeType(value: string | undefined): string | undefined { function splitExtends(value: unknown): string[] { const raw = stringProp(value); if (!raw) return []; - return raw - .split(',') - .map((part) => part.trim()) - .filter(Boolean); + return splitTopLevelCommas(raw); +} + +function splitTopLevelCommas(value: string): string[] { + const parts: string[] = []; + let current = ''; + let angleDepth = 0; + let parenDepth = 0; + let bracketDepth = 0; + let braceDepth = 0; + let quote: '"' | "'" | '`' | undefined; + let escaped = false; + for (const char of value) { + if (quote) { + current += char; + if (escaped) { + escaped = false; + continue; + } + if (char === '\\') { + escaped = true; + continue; + } + if (char === quote) quote = undefined; + continue; + } + if (char === '"' || char === "'" || char === '`') { + quote = char; + current += char; + continue; + } + if (char === '<') angleDepth += 1; + else if (char === '>' && angleDepth > 0) angleDepth -= 1; + else if (char === '(') parenDepth += 1; + else if (char === ')' && parenDepth > 0) parenDepth -= 1; + else if (char === '[') bracketDepth += 1; + else if (char === ']' && bracketDepth > 0) bracketDepth -= 1; + else if (char === '{') braceDepth += 1; + else if (char === '}' && braceDepth > 0) braceDepth -= 1; + if (char === ',' && angleDepth === 0 && parenDepth === 0 && bracketDepth === 0 && braceDepth === 0) { + const part = current.trim(); + if (part) parts.push(part); + current = ''; + continue; + } + current += char; + } + const part = current.trim(); + if (part) parts.push(part); + return parts; } function extendsEdgeParticipatesInCycle(from: string, to: string, registry: ShapeRegistry): boolean { diff --git a/packages/core/tests/core-shape-runtime.test.ts b/packages/core/tests/core-shape-runtime.test.ts index 1d2db4ac..d9db8a40 100644 --- a/packages/core/tests/core-shape-runtime.test.ts +++ b/packages/core/tests/core-shape-runtime.test.ts @@ -123,11 +123,14 @@ describe('KERN core declared shape validators', () => { const root = parse('interface name=NumericMap\n indexer keyType=number type=string'); expect( - validateCoreShape(fromHostValue({ '-1': 'left', '1.5': 'half', '2e3': 'large' }), 'NumericMap', root).passed, + validateCoreShape(fromHostValue({ '-1': 'left', '0': 'zero', '42': 'answer' }), 'NumericMap', root).passed, ).toBe(true); expect(codes(validateCoreShape(fromHostValue({ label: 'not numeric' }), 'NumericMap', root))).toContain( 'shape-unexpected-field', ); + expect(codes(validateCoreShape(fromHostValue({ '1.5': 'half', '2e3': 'large' }), 'NumericMap', root))).toEqual( + expect.arrayContaining(['shape-unexpected-field', 'shape-unexpected-field']), + ); }); test('validates numeric keys against both string and number indexers', () => { @@ -158,6 +161,17 @@ describe('KERN core declared shape validators', () => { expect(codes(result)).toEqual(expect.arrayContaining(['shape-field-conflict', 'shape-type-reference-unknown'])); }); + test('allows explicit undefined for optional fields', () => { + const root = parse( + ['interface name=User', ' field name=id type=string', ' field name=nickname type=string optional=true'].join( + '\n', + ), + ); + + expect(validateCoreShape(fromHostValue({ id: 'u1', nickname: undefined }), 'User', root).passed).toBe(true); + expect(codes(validateCoreShape(fromHostValue({ id: undefined }), 'User', root))).toContain('shape-field-type'); + }); + test('reports recursive values instead of recursing through self-referential shape fields', () => { const root = parse( ['interface name=Node', ' field name=id type=string', ' field name=next type=Node optional=true'].join('\n'), @@ -187,7 +201,9 @@ describe('KERN core declared shape validators', () => { ].join('\n'), ); - expect(codes(validateCoreShape(fromHostValue({ value: 'x' }), 'Box', root))).toContain('shape-generic-unsupported'); + const boxCodes = codes(validateCoreShape(fromHostValue({ value: 'x' }), 'Box', root)); + expect(boxCodes).toContain('shape-generic-unsupported'); + expect(boxCodes.filter((code) => code === 'shape-generic-unsupported')).toHaveLength(1); const maybeNameCodes = codes(validateCoreShape(fromHostValue({ name: 'Ada' }), 'MaybeName', root)); expect(maybeNameCodes).toContain('shape-type-unsupported'); expect(maybeNameCodes).not.toContain('shape-generic-unsupported'); @@ -249,6 +265,8 @@ describe('KERN core declared shape validators', () => { 'interface name=Right', ' field name=id type=number', 'interface name=Joined extends=Left,Right', + 'interface name=GenericChild extends="Pair,Left"', + 'interface name=ObjectGenericChild extends="Wrapper<{ a: string, b: number }>,Left"', 'interface name=Broken extends=Missing', ' indexer keyType=symbol type=string', 'interface name=Indexed', @@ -257,28 +275,57 @@ describe('KERN core declared shape validators', () => { 'interface name=Duplicate', ' field name=id type=string', ' field name=id type=string', + 'interface name=Shadowed', + ' field name=id type=string', + 'interface name=Shadowed', + ' field name=id type=number', + 'interface name=ShadowedChild extends=Shadowed', + 'interface name=UsesShadowed', + ' field name=child type=Shadowed', ].join('\n'), ); const facts = collectCoreShapeFacts(root); expect(facts.interfaces.map((shape) => shape.name)).toEqual( - expect.arrayContaining(['Nested', 'Left', 'Right', 'Joined', 'Broken', 'Indexed', 'IndexedChild', 'Duplicate']), + expect.arrayContaining([ + 'Nested', + 'Left', + 'Right', + 'Joined', + 'GenericChild', + 'ObjectGenericChild', + 'Broken', + 'Indexed', + 'IndexedChild', + 'Duplicate', + 'Shadowed', + 'ShadowedChild', + 'UsesShadowed', + ]), ); expect(facts.extendsEdges).toEqual( expect.arrayContaining([ { from: 'Joined', to: 'Left', resolved: true }, { from: 'Joined', to: 'Right', resolved: true }, + { from: 'GenericChild', to: 'Pair', resolved: false }, + { from: 'GenericChild', to: 'Left', resolved: true }, + { from: 'ObjectGenericChild', to: 'Wrapper<{ a: string, b: number }>', resolved: false }, + { from: 'ObjectGenericChild', to: 'Left', resolved: true }, { from: 'Broken', to: 'Missing', resolved: false }, { from: 'IndexedChild', to: 'Indexed', resolved: true }, + { from: 'ShadowedChild', to: 'Shadowed', resolved: true }, ]), ); expect(facts.validationDiagnostics).toEqual( expect.arrayContaining([ expect.objectContaining({ code: 'shape-field-conflict', interfaceName: 'Joined' }), expect.objectContaining({ code: 'shape-extends-unknown', interfaceName: 'Broken' }), + expect.objectContaining({ code: 'shape-extends-unknown', interfaceName: 'GenericChild' }), + expect.objectContaining({ code: 'shape-extends-unknown', interfaceName: 'ObjectGenericChild' }), expect.objectContaining({ code: 'shape-indexer-key-unsupported', interfaceName: 'Broken' }), expect.objectContaining({ code: 'shape-field-duplicate', interfaceName: 'Duplicate' }), + expect.objectContaining({ code: 'shape-interface-duplicate', interfaceName: 'Shadowed' }), ]), ); expect(facts.interfaces.find((shape) => shape.name === 'Broken')?.unsupportedReasons).toEqual( @@ -287,5 +334,28 @@ describe('KERN core declared shape validators', () => { expect(facts.interfaces.find((shape) => shape.name === 'IndexedChild')?.indexers).toEqual([ expect.objectContaining({ keyType: 'string', type: 'unknown' }), ]); + expect(facts.interfaces.find((shape) => shape.name === 'Shadowed')).toEqual( + expect.objectContaining({ + validatorAvailable: false, + unsupportedReasons: expect.arrayContaining(['shape-interface-duplicate']), + }), + ); + expect(facts.interfaces.find((shape) => shape.name === 'UsesShadowed')).toEqual( + expect.objectContaining({ + validatorAvailable: false, + unsupportedReasons: expect.arrayContaining(['shape-interface-duplicate']), + }), + ); + const shadowed = validateCoreShape(fromHostValue({ id: 1 }), 'Shadowed', root); + expect(shadowed.passed).toBe(false); + expect(shadowed.diagnostics).toEqual( + expect.arrayContaining([expect.objectContaining({ code: 'shape-interface-duplicate' })]), + ); + expect(codes(validateCoreShape(fromHostValue({ id: 1 }), 'ShadowedChild', root))).toContain( + 'shape-interface-duplicate', + ); + expect(codes(validateCoreShape(fromHostValue({ child: { id: 1 } }), 'UsesShadowed', root))).toContain( + 'shape-interface-duplicate', + ); }); }); From 36db5f0e6a2b62bcee405dddcae5d40b679a0cee Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 02:59:26 +0200 Subject: [PATCH 32/46] feat(core): expose effective class member facts --- packages/core/src/semantic-validator.ts | 81 +++++++++++++++-- .../core/tests/semantic-substrate.test.ts | 88 ++++++++++++++++++- 2 files changed, 163 insertions(+), 6 deletions(-) diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index 78b09946..809e5311 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -49,6 +49,7 @@ export interface ClassSemanticMemberFact { readonly arity: number; readonly readable: boolean; readonly writable: boolean; + readonly inheritedFrom?: string; readonly loc?: ClassSemanticLocation; } @@ -58,6 +59,7 @@ export interface ClassSemanticClassFact { readonly hasConstructor: boolean; readonly constructorCount: number; readonly members: readonly ClassSemanticMemberFact[]; + readonly effectiveMembers: readonly ClassSemanticMemberFact[]; readonly loc?: ClassSemanticLocation; } @@ -2755,7 +2757,7 @@ export function collectClassSemanticFacts(root: IRNode | readonly IRNode[]): Cla } return { - classes: classes.map(classSemanticFact), + classes: classes.map((info) => classSemanticFact(info, classByName)), inheritanceEdges, overrides: collectClassOverrideFacts(classes, classByName), unresolvedBases: [...unresolvedBases].sort(), @@ -2763,20 +2765,25 @@ export function collectClassSemanticFacts(root: IRNode | readonly IRNode[]): Cla }; } -function classSemanticFact(info: ClassInfo): ClassSemanticClassFact { +function classSemanticFact(info: ClassInfo, classByName: ReadonlyMap): ClassSemanticClassFact { return { name: info.name, ...(info.baseName ? { baseName: info.baseName } : {}), hasConstructor: info.constructors.length > 0, constructorCount: info.constructors.length, - members: info.members.map(classMemberSemanticFact), + members: info.members.map((member) => classMemberSemanticFact(member)), + effectiveMembers: effectiveClassMemberFacts(info, classByName), ...(info.node.loc ? { loc: semanticLocation(info.node) } : {}), }; } -function classMemberSemanticFact(member: ClassMemberInfo): ClassSemanticMemberFact { +function classMemberSemanticFact( + member: ClassMemberInfo, + className = member.owner, + inheritedFrom?: string, +): ClassSemanticMemberFact { return { - className: member.owner, + className, owner: member.owner, name: member.name, kind: member.kind, @@ -2784,10 +2791,74 @@ function classMemberSemanticFact(member: ClassMemberInfo): ClassSemanticMemberFa arity: member.arity, readable: member.kind === 'field' || member.kind === 'getter' || member.kind === 'method', writable: member.kind === 'field' || member.kind === 'setter', + ...(inheritedFrom ? { inheritedFrom } : {}), ...(member.node.loc ? { loc: semanticLocation(member.node) } : {}), }; } +function effectiveClassMemberFacts( + info: ClassInfo, + classByName: ReadonlyMap, + seen: ReadonlySet = new Set(), +): ClassSemanticMemberFact[] { + const effective = new Map(); + if (seen.has(info.name) || classInfoParticipatesInCycle(info, classByName)) { + return info.members.map((member) => classMemberSemanticFact(member, info.name)); + } + const nextSeen = new Set(seen); + nextSeen.add(info.name); + const base = info.baseName ? classByName.get(info.baseName) : undefined; + if (base) { + for (const member of effectiveClassMemberFacts(base, classByName, nextSeen)) { + effective.set(classMemberEffectiveKey(member), { + ...member, + className: info.name, + inheritedFrom: member.inheritedFrom ?? member.owner, + }); + } + } + const ownGroups = new Map(); + for (const member of info.members) { + const group = ownGroups.get(classMemberShapeKey(member)) ?? []; + group.push(member); + ownGroups.set(classMemberShapeKey(member), group); + } + for (const [shapeKey, members] of ownGroups) { + const first = members[0]; + if (!first) continue; + for (const key of [...effective.keys()]) { + if (classMemberShapeKey(effective.get(key) ?? first) === shapeKey) effective.delete(key); + } + for (const member of members) { + effective.set(classMemberEffectiveKey(member), classMemberSemanticFact(member, info.name)); + } + } + return [...effective.values()]; +} + +function classMemberShapeKey(member: { readonly static: boolean; readonly name: string }): string { + return `${member.static ? 'static' : 'instance'}:${member.name}`; +} + +function classMemberEffectiveKey(member: { + readonly static: boolean; + readonly name: string; + readonly kind: ClassSemanticMemberKind | ClassMemberKind; +}): string { + return `${classMemberShapeKey(member)}:${member.kind}`; +} + +function classInfoParticipatesInCycle(info: ClassInfo, classByName: ReadonlyMap): boolean { + const seen = new Set(); + let current: ClassInfo | undefined = info; + while (current) { + if (seen.has(current.name)) return true; + seen.add(current.name); + current = current.baseName ? classByName.get(current.baseName) : undefined; + } + return false; +} + function collectClassOverrideFacts( classes: readonly ClassInfo[], classByName: ReadonlyMap, diff --git a/packages/core/tests/semantic-substrate.test.ts b/packages/core/tests/semantic-substrate.test.ts index c35929ff..f7f0f290 100644 --- a/packages/core/tests/semantic-substrate.test.ts +++ b/packages/core/tests/semantic-substrate.test.ts @@ -74,9 +74,13 @@ describe('KERN semantic substrate', () => { [ 'class name=Base', ' field name=id type=string', + ' field name=version type=number static=true', ' method name=load returns=string', ' param name=id type=string', ' getter name=label returns=string', + ' getter name=status returns=string', + ' setter name=status', + ' param name=value type=string', 'class name=Derived extends=Base', ' constructor', ' handler lang=kern', @@ -128,6 +132,79 @@ describe('KERN semantic substrate', () => { }), ]), ); + expect(derived?.effectiveMembers).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + className: 'Derived', + owner: 'Base', + inheritedFrom: 'Base', + name: 'id', + kind: 'field', + static: false, + readable: true, + writable: true, + }), + expect.objectContaining({ + className: 'Derived', + owner: 'Base', + inheritedFrom: 'Base', + name: 'status', + kind: 'getter', + static: false, + readable: true, + writable: false, + }), + expect.objectContaining({ + className: 'Derived', + owner: 'Base', + inheritedFrom: 'Base', + name: 'status', + kind: 'setter', + static: false, + readable: false, + writable: true, + }), + expect.objectContaining({ + className: 'Derived', + owner: 'Base', + inheritedFrom: 'Base', + name: 'version', + kind: 'field', + static: true, + readable: true, + writable: true, + }), + expect.objectContaining({ + className: 'Derived', + owner: 'Derived', + name: 'load', + kind: 'method', + arity: 2, + }), + expect.objectContaining({ + className: 'Derived', + owner: 'Derived', + name: 'label', + kind: 'setter', + readable: false, + writable: true, + }), + ]), + ); + expect(derived?.effectiveMembers).not.toEqual( + expect.arrayContaining([ + expect.objectContaining({ + className: 'Derived', + owner: 'Base', + name: 'load', + }), + expect.objectContaining({ + className: 'Derived', + owner: 'Base', + name: 'label', + }), + ]), + ); expect(substrate.classFacts?.overrides).toEqual( expect.arrayContaining([ @@ -156,7 +233,13 @@ describe('KERN semantic substrate', () => { test('reports unresolved bases and inheritance cycles as class facts', () => { const facts = collectClassSemanticFacts( parseRoot( - ['class name=UsesExternal extends=ExternalBase', 'class name=A extends=B', 'class name=B extends=A'].join('\n'), + [ + 'class name=UsesExternal extends=ExternalBase', + 'class name=A extends=B', + ' field name=onlyA type=string', + 'class name=B extends=A', + ' field name=onlyB type=string', + ].join('\n'), ), ); @@ -168,6 +251,9 @@ describe('KERN semantic substrate', () => { ]), ); expect(facts.cycles).toEqual([['A', 'B', 'A']]); + expect(facts.classes.find((candidate) => candidate.name === 'A')?.effectiveMembers).toEqual([ + expect.objectContaining({ className: 'A', owner: 'A', name: 'onlyA' }), + ]); }); test('resolves imported and cross-root class bases consistently with validation', () => { From fc06188e01b93189202cc0a3db8fe19be6e68b3b Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 03:37:33 +0200 Subject: [PATCH 33/46] feat(core): add class implements conformance facts --- .../kernlang-typescript-surface.test.kern | 1 + packages/core/src/index.ts | 3 + packages/core/src/semantic-validator.ts | 425 ++++++++++++++++++ packages/core/tests/class-semantics.test.ts | 110 +++++ .../core/tests/semantic-substrate.test.ts | 72 +++ 5 files changed, 611 insertions(+) diff --git a/packages/core/native-test/kernlang-typescript-surface.test.kern b/packages/core/native-test/kernlang-typescript-surface.test.kern index 2f8cf122..7069878d 100644 --- a/packages/core/native-test/kernlang-typescript-surface.test.kern +++ b/packages/core/native-test/kernlang-typescript-surface.test.kern @@ -66,4 +66,5 @@ test name="KERNlang TypeScript surface parity" target="./kernlang-typescript-sur expect decompile contains="indexer keyType=string type=number" expect decompile contains="fn name=add" expect decompile contains="overload params=\"a:number,b:number\" returns=number" + expect decompile contains="field name=role type=string" expect roundtrip=true diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index c200561e..6b657d35 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -513,12 +513,15 @@ export { export type { ClassSemanticClassFact, ClassSemanticFacts, + ClassSemanticImplementsEdge, ClassSemanticInheritanceEdge, ClassSemanticLocation, ClassSemanticMemberFact, ClassSemanticMemberKind, ClassSemanticOverrideFact, ClassSemanticOverrideStatus, + ClassSemanticProtocolConformanceFact, + ClassSemanticProtocolStatus, RagSemanticAnswerContractFact, RagSemanticAnswerSpanFact, RagSemanticChunkingFact, diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index 809e5311..81f3ba6e 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -15,6 +15,11 @@ * symbols that the resolver proved exist. */ +import { + type CoreShapeDiagnostic, + type CoreShapeInterfaceFact, + collectCoreShapeFacts, +} from './core-runtime/shape-validator.js'; import { collectExternalImportSymbols, type ExternalImportSymbolTable } from './external-symbols.js'; import { importRegistryOf } from './import-metadata.js'; import { parseExpression } from './parser-expression.js'; @@ -46,6 +51,8 @@ export interface ClassSemanticMemberFact { readonly name: string; readonly kind: ClassSemanticMemberKind; readonly static: boolean; + readonly type?: string; + readonly returns?: string; readonly arity: number; readonly readable: boolean; readonly writable: boolean; @@ -71,6 +78,14 @@ export interface ClassSemanticInheritanceEdge { readonly builtin: boolean; } +export interface ClassSemanticImplementsEdge { + readonly from: string; + readonly to: string; + readonly relation: 'implements'; + readonly resolved: boolean; + readonly external: boolean; +} + export interface ClassSemanticOverrideFact { readonly className: string; readonly memberName: string; @@ -84,11 +99,33 @@ export interface ClassSemanticOverrideFact { readonly loc?: ClassSemanticLocation; } +export type ClassSemanticProtocolStatus = + | 'satisfied' + | 'missing-members' + | 'external' + | 'unknown-interface' + | 'invalid-interface' + | 'unsupported-protocol'; + +export interface ClassSemanticProtocolConformanceFact { + readonly className: string; + readonly interfaceName: string; + readonly status: ClassSemanticProtocolStatus; + readonly missingMembers: readonly string[]; + readonly satisfiedMembers: readonly string[]; + readonly diagnostics?: readonly string[]; + readonly unsupportedReasons?: readonly string[]; + readonly loc?: ClassSemanticLocation; +} + export interface ClassSemanticFacts { readonly classes: readonly ClassSemanticClassFact[]; readonly inheritanceEdges: readonly ClassSemanticInheritanceEdge[]; + readonly implementsEdges: readonly ClassSemanticImplementsEdge[]; readonly overrides: readonly ClassSemanticOverrideFact[]; readonly unresolvedBases: readonly string[]; + readonly unresolvedImplements: readonly string[]; + readonly protocolConformance: readonly ClassSemanticProtocolConformanceFact[]; readonly cycles: readonly (readonly string[])[]; } @@ -2625,6 +2662,8 @@ interface ClassInfo { rootIndex: number; name: string; baseName?: string; + implementsNames: string[]; + implementsMalformed: boolean; members: ClassMemberInfo[]; constructors: IRNode[]; } @@ -2635,9 +2674,38 @@ interface ClassMemberInfo { name: string; kind: ClassMemberKind; static: boolean; + type?: string; + returns?: string; arity: number; } +interface InterfaceInfo { + node: IRNode; + rootIndex: number; + name: string; + extendsNames: string[]; + fields: InterfaceFieldInfo[]; +} + +interface InterfaceFieldInfo { + name: string; + type?: string; + optional: boolean; +} + +interface ClassProtocolShapeContext { + shapeByName: ReadonlyMap; + diagnosticsByName: ReadonlyMap; +} + +interface ClassInterfaceConformanceResult { + status: Exclude; + missingMembers: string[]; + satisfiedMembers: string[]; + diagnostics: string[]; + unsupportedReasons: string[]; +} + const BUILTIN_CLASS_BASES = new Set(['Error']); export const RAG_MCP_RETRIEVE_OUTPUT_SHAPE = 'RetrievedChunk[]'; export const RAG_MCP_RETRIEVE_OUTPUT_ITEM_SHAPE = 'RetrievedChunk'; @@ -2667,6 +2735,12 @@ function validateClassGraphRoots(roots: readonly IRNode[], violations: SemanticV const classes = classesByRoot.flat(); if (classes.length === 0) return; + const interfaces = roots.flatMap((root, rootIndex) => collectInterfaceInfos(root, rootIndex)); + const interfaceByName = new Map(); + for (const info of interfaces) { + if (!interfaceByName.has(info.name)) interfaceByName.set(info.name, info); + } + const protocolShapeContext = collectClassProtocolShapeContext(roots); const classByName = new Map(); const declaredClassNames = new Set(); for (const info of classes) { @@ -2681,9 +2755,18 @@ function validateClassGraphRoots(roots: readonly IRNode[], violations: SemanticV for (const className of declaredClassNames) visibleNames.add(className); return visibleNames; }); + const visibleProtocolNamesByRoot = roots.map((root) => collectVisibleProtocolNames(root)); for (const info of classes) { validateClassBaseReference(info, visibleNamesByRoot[info.rootIndex] ?? declaredClassNames, violations); + validateClassImplements( + info, + interfaceByName, + visibleProtocolNamesByRoot[info.rootIndex] ?? new Set(), + protocolShapeContext, + classByName, + violations, + ); validateClassConstructors(info, violations); validateClassMemberConflicts(info, violations); validateClassSuperUsage(info, violations); @@ -2705,6 +2788,8 @@ function collectClassInfos(root: IRNode, rootIndex = 0): ClassInfo[] { rootIndex, name, baseName: classBaseName(node.props?.extends), + implementsNames: classReferenceNames(node.props?.implements, 'class implements='), + implementsMalformed: classReferenceListMalformed(node.props?.implements, 'class implements='), members: collectClassMembers(node, name), constructors: (node.children ?? []).filter((child) => child.type === 'constructor'), }); @@ -2712,6 +2797,52 @@ function collectClassInfos(root: IRNode, rootIndex = 0): ClassInfo[] { return out; } +function collectInterfaceInfos(root: IRNode, rootIndex = 0): InterfaceInfo[] { + const out: InterfaceInfo[] = []; + walkSemanticTree(root, (node) => { + if (node.type !== 'interface') return; + const name = stringProp(node, 'name'); + if (!name) return; + out.push({ + node, + rootIndex, + name, + extendsNames: classReferenceNames(node.props?.extends, 'interface extends='), + fields: collectInterfaceFields(node), + }); + }); + return out; +} + +function collectInterfaceFields(node: IRNode): InterfaceFieldInfo[] { + const fields: InterfaceFieldInfo[] = []; + for (const child of node.children ?? []) { + if (child.type !== 'field') continue; + const name = stringProp(child, 'name'); + if (!name) continue; + fields.push({ + name, + ...(stringProp(child, 'type') ? { type: stringProp(child, 'type') } : {}), + optional: isTrueFlag(child.props?.optional), + }); + } + return fields; +} + +function collectClassProtocolShapeContext(roots: readonly IRNode[]): ClassProtocolShapeContext { + const facts = collectCoreShapeFacts(roots); + const shapeByName = new Map(); + for (const shape of facts.interfaces) shapeByName.set(shape.name, shape); + const diagnosticsByName = new Map(); + for (const diagnostic of facts.validationDiagnostics) { + if (!diagnostic.interfaceName) continue; + const diagnostics = diagnosticsByName.get(diagnostic.interfaceName) ?? []; + diagnostics.push(diagnostic); + diagnosticsByName.set(diagnostic.interfaceName, diagnostics); + } + return { shapeByName, diagnosticsByName }; +} + function collectClassMembers(node: IRNode, owner: string): ClassMemberInfo[] { const members: ClassMemberInfo[] = []; for (const child of node.children ?? []) { @@ -2724,6 +2855,8 @@ function collectClassMembers(node: IRNode, owner: string): ClassMemberInfo[] { name, kind: child.type, static: isTrueFlag(child.props?.static), + ...(stringProp(child, 'type') ? { type: stringProp(child, 'type') } : {}), + ...(stringProp(child, 'returns') ? { returns: stringProp(child, 'returns') } : {}), arity: memberArity(child), }); } @@ -2733,11 +2866,18 @@ function collectClassMembers(node: IRNode, owner: string): ClassMemberInfo[] { export function collectClassSemanticFacts(root: IRNode | readonly IRNode[]): ClassSemanticFacts { const roots = Array.isArray(root) ? root : [root]; const classes = roots.flatMap((candidate, rootIndex) => collectClassInfos(candidate, rootIndex)); + const interfaces = roots.flatMap((candidate, rootIndex) => collectInterfaceInfos(candidate, rootIndex)); const classByName = new Map(); for (const info of classes) { if (!classByName.has(info.name)) classByName.set(info.name, info); } + const interfaceByName = new Map(); + for (const info of interfaces) { + if (!interfaceByName.has(info.name)) interfaceByName.set(info.name, info); + } + const protocolShapeContext = collectClassProtocolShapeContext(roots); const visibleNamesByRoot = roots.map((candidate) => collectVisibleClassBaseNames(candidate)); + const visibleProtocolNamesByRoot = roots.map((candidate) => collectVisibleProtocolNames(candidate)); const inheritanceEdges: ClassSemanticInheritanceEdge[] = []; const unresolvedBases = new Set(); @@ -2756,11 +2896,39 @@ export function collectClassSemanticFacts(root: IRNode | readonly IRNode[]): Cla if (!resolved) unresolvedBases.add(info.baseName); } + const implementsEdges: ClassSemanticImplementsEdge[] = []; + const unresolvedImplements = new Set(); + for (const info of classes) { + for (const interfaceName of info.implementsNames) { + const external = + !interfaceByName.has(interfaceName) && + (visibleProtocolNamesByRoot[info.rootIndex] ?? new Set()).has(interfaceName); + const resolved = interfaceByName.has(interfaceName) || external; + implementsEdges.push({ + from: info.name, + to: interfaceName, + relation: 'implements', + resolved, + external, + }); + if (!resolved) unresolvedImplements.add(interfaceName); + } + } + return { classes: classes.map((info) => classSemanticFact(info, classByName)), inheritanceEdges, + implementsEdges, overrides: collectClassOverrideFacts(classes, classByName), unresolvedBases: [...unresolvedBases].sort(), + unresolvedImplements: [...unresolvedImplements].sort(), + protocolConformance: collectClassProtocolConformanceFacts( + classes, + interfaceByName, + visibleProtocolNamesByRoot, + protocolShapeContext, + classByName, + ), cycles: collectClassCycleFacts(classes, classByName), }; } @@ -2788,6 +2956,8 @@ function classMemberSemanticFact( name: member.name, kind: member.kind, static: member.static, + ...(member.type ? { type: member.type } : {}), + ...(member.returns ? { returns: member.returns } : {}), arity: member.arity, readable: member.kind === 'field' || member.kind === 'getter' || member.kind === 'method', writable: member.kind === 'field' || member.kind === 'setter', @@ -2922,6 +3092,111 @@ function collectClassCycleFacts( return cycles; } +function collectClassProtocolConformanceFacts( + classes: readonly ClassInfo[], + interfaceByName: ReadonlyMap, + visibleProtocolNamesByRoot: readonly ReadonlySet[], + protocolShapeContext: ClassProtocolShapeContext, + classByName: ReadonlyMap, +): ClassSemanticProtocolConformanceFact[] { + const facts: ClassSemanticProtocolConformanceFact[] = []; + for (const info of classes) { + for (const interfaceName of info.implementsNames) { + const protocol = interfaceByName.get(interfaceName); + if (!protocol) { + const visible = (visibleProtocolNamesByRoot[info.rootIndex] ?? new Set()).has(interfaceName); + facts.push({ + className: info.name, + interfaceName, + status: visible ? 'external' : 'unknown-interface', + missingMembers: [], + satisfiedMembers: [], + ...(info.node.loc ? { loc: semanticLocation(info.node) } : {}), + }); + continue; + } + const result = classInterfaceConformance(info, protocol, protocolShapeContext, classByName); + facts.push({ + className: info.name, + interfaceName, + status: result.status, + missingMembers: result.missingMembers, + satisfiedMembers: result.satisfiedMembers, + ...(result.diagnostics.length > 0 ? { diagnostics: result.diagnostics } : {}), + ...(result.unsupportedReasons.length > 0 ? { unsupportedReasons: result.unsupportedReasons } : {}), + ...(info.node.loc ? { loc: semanticLocation(info.node) } : {}), + }); + } + } + return facts; +} + +function classInterfaceConformance( + info: ClassInfo, + protocol: InterfaceInfo, + protocolShapeContext: ClassProtocolShapeContext, + classByName: ReadonlyMap, +): ClassInterfaceConformanceResult { + const shape = protocolShapeContext.shapeByName.get(protocol.name); + const diagnostics = (protocolShapeContext.diagnosticsByName.get(protocol.name) ?? []).map( + (diagnostic) => diagnostic.code, + ); + if (diagnostics.length > 0) { + return { + status: 'invalid-interface', + missingMembers: [], + satisfiedMembers: [], + diagnostics: sortedUnique(diagnostics), + unsupportedReasons: [], + }; + } + if (shape && (shape.indexers.length > 0 || !shape.validatorAvailable)) { + return { + status: 'unsupported-protocol', + missingMembers: [], + satisfiedMembers: [], + diagnostics: [], + unsupportedReasons: sortedUnique([ + ...shape.unsupportedReasons, + ...(shape.indexers.length > 0 ? ['indexer'] : []), + ]), + }; + } + const effectiveMembers = effectiveClassMemberFacts(info, classByName); + const fields = shape?.fields ?? protocol.fields; + const requiredFields = fields.filter((field) => !field.optional); + const missingMembers: string[] = []; + const satisfiedMembers: string[] = []; + for (const field of requiredFields) { + if (classHasReadableInstanceMember(effectiveMembers, field)) { + satisfiedMembers.push(field.name); + } else { + missingMembers.push(field.name); + } + } + const missing = sortedUnique(missingMembers); + const satisfied = sortedUnique(satisfiedMembers); + return { + status: missing.length > 0 ? 'missing-members' : 'satisfied', + missingMembers: missing, + satisfiedMembers: satisfied, + diagnostics: [], + unsupportedReasons: [], + }; +} + +function classHasReadableInstanceMember( + members: readonly ClassSemanticMemberFact[], + field: { readonly name: string; readonly type?: string }, +): boolean { + return members.some((member) => { + if (member.name !== field.name || member.static) return false; + if (member.kind !== 'field' && member.kind !== 'getter') return false; + const actualType = member.kind === 'getter' ? member.returns : member.type; + return !field.type || actualType === field.type; + }); +} + function semanticLocation(node: IRNode): ClassSemanticLocation | undefined { return node.loc ? { line: node.loc.line, col: node.loc.col } : undefined; } @@ -2946,6 +3221,69 @@ function validateClassBaseReference( }); } +function validateClassImplements( + info: ClassInfo, + interfaceByName: ReadonlyMap, + visibleProtocolNames: ReadonlySet, + protocolShapeContext: ClassProtocolShapeContext, + classByName: ReadonlyMap, + violations: SemanticViolation[], +): void { + if (info.implementsMalformed) { + violations.push({ + rule: 'class-implements-invalid-reference-list', + nodeType: 'class', + message: `Class '${info.name}' has an invalid implements= reference list. Use a comma-separated list of interface names.`, + line: info.node.loc?.line, + col: info.node.loc?.col, + }); + } + for (const interfaceName of info.implementsNames) { + const protocol = interfaceByName.get(interfaceName); + if (!protocol) { + if (!visibleProtocolNames.has(interfaceName)) { + violations.push({ + rule: 'class-implements-unknown', + nodeType: 'class', + message: `Class '${info.name}' implements unknown interface '${interfaceName}'. Declare or import the interface before implementing it.`, + line: info.node.loc?.line, + col: info.node.loc?.col, + }); + } + continue; + } + const conformance = classInterfaceConformance(info, protocol, protocolShapeContext, classByName); + if (conformance.status === 'invalid-interface') { + violations.push({ + rule: 'class-implements-invalid-interface', + nodeType: 'class', + message: `Class '${info.name}' implements invalid interface '${interfaceName}' (${conformance.diagnostics.join(', ')}). Fix the interface shape before relying on protocol conformance.`, + line: info.node.loc?.line, + col: info.node.loc?.col, + }); + continue; + } + if (conformance.status === 'unsupported-protocol') { + violations.push({ + rule: 'class-implements-unsupported-protocol', + nodeType: 'class', + message: `Class '${info.name}' implements interface '${interfaceName}' whose shape is not class-satisfiable in protocol v1 (${conformance.unsupportedReasons.join(', ')}).`, + line: info.node.loc?.line, + col: info.node.loc?.col, + }); + continue; + } + if (conformance.missingMembers.length === 0) continue; + violations.push({ + rule: 'class-implements-missing-member', + nodeType: 'class', + message: `Class '${info.name}' does not satisfy interface '${interfaceName}'. Missing readable instance member(s): ${conformance.missingMembers.join(', ')}.`, + line: info.node.loc?.line, + col: info.node.loc?.col, + }); + } +} + function validateClassConstructors(info: ClassInfo, violations: SemanticViolation[]): void { if (info.constructors.length <= 1) return; for (const extra of info.constructors.slice(1)) { @@ -3514,6 +3852,27 @@ function collectVisibleClassBaseNames(root: IRNode): Set { return names; } +function collectVisibleProtocolNames(root: IRNode): Set { + const names = new Set(); + walkSemanticTree(root, (node) => { + const name = stringProp(node, 'name'); + if (name && node.type === 'interface') names.add(name); + if (node.type === 'import') { + for (const binding of importLocalBindings(node)) names.add(binding.name); + } + if (node.type === 'use') { + for (const child of node.children ?? []) { + if (child.type !== 'from') continue; + const kind = stringProp(child, 'kind'); + if (kind && kind !== 'interface' && kind !== 'type') continue; + const localName = stringProp(child, 'as') ?? stringProp(child, 'name'); + if (localName) names.add(localName); + } + } + }); + return names; +} + function isVisibleClassBaseDeclaration(nodeType: string): boolean { return nodeType === 'class' || nodeType === 'error'; } @@ -3635,6 +3994,72 @@ function classBaseName(value: unknown): string | undefined { return match?.[1]; } +function classReferenceNames(value: unknown, propName: string): string[] { + if (typeof value !== 'string' || !value.trim()) return []; + let parts: string[]; + try { + parts = splitClassReferenceList(value, propName); + } catch { + parts = []; + } + const names = new Set(); + for (const part of parts) { + const name = classBaseName(part); + if (name) names.add(name); + } + return [...names]; +} + +function classReferenceListMalformed(value: unknown, propName: string): boolean { + if (typeof value !== 'string' || !value.trim()) return false; + try { + splitClassReferenceList(value, propName); + return false; + } catch { + return true; + } +} + +function splitClassReferenceList(raw: string, propName: string): string[] { + const out: string[] = []; + let current = ''; + let depth = 0; + let angleDepth = 0; + let quote: '"' | "'" | '`' | null = null; + for (let index = 0; index < raw.length; index++) { + const ch = raw[index]; + if (quote !== null) { + current += ch; + if (ch === '\\' && index + 1 < raw.length) current += raw[++index]; + else if (ch === quote) quote = null; + continue; + } + if (ch === '"' || ch === "'" || ch === '`') { + quote = ch; + current += ch; + continue; + } + if (ch === '(' || ch === '[' || ch === '{') depth++; + else if (ch === ')' || ch === ']' || ch === '}') depth--; + else if (ch === '<') angleDepth++; + else if (ch === '>' && angleDepth > 0) angleDepth--; + if (depth < 0 || angleDepth < 0) throw new Error(`${propName} has unbalanced delimiters.`); + if (ch === ',' && depth === 0 && angleDepth === 0) { + const part = current.trim(); + if (part.length === 0) throw new Error(`${propName} contains an empty reference.`); + out.push(part); + current = ''; + continue; + } + current += ch; + } + if (quote !== null || depth !== 0 || angleDepth !== 0) throw new Error(`${propName} has unbalanced delimiters.`); + const tail = current.trim(); + if (tail.length === 0 && raw.trim().endsWith(',')) throw new Error(`${propName} contains an empty reference.`); + if (tail.length > 0) out.push(tail); + return out; +} + function stringProp(node: IRNode, prop: string): string | undefined; function stringProp(props: IRNode['props'] | undefined, prop: string): string | undefined; function stringProp(nodeOrProps: IRNode | IRNode['props'] | undefined, prop: string): string | undefined { diff --git a/packages/core/tests/class-semantics.test.ts b/packages/core/tests/class-semantics.test.ts index 50cb7d0b..31e5f544 100644 --- a/packages/core/tests/class-semantics.test.ts +++ b/packages/core/tests/class-semantics.test.ts @@ -54,6 +54,116 @@ describe('semantic-validator — class object model', () => { expect(rulesFor(source)).not.toContain('class-extends-unknown'); }); + test('accepts class implements when effective readable instance members satisfy local interfaces', () => { + const source = [ + 'interface name=Entity', + ' field name=id type=string', + 'interface name=Named extends=Entity', + ' field name=name type=string', + ' field name=nickname type=string optional=true', + 'class name=Base', + ' field name=id type=string', + 'class name=User extends=Base implements=Named', + ' getter name=name returns=string', + ' handler lang=kern', + ' return value="this.id"', + ].join('\n'); + + const rules = rulesFor(source); + expect(rules).not.toContain('class-implements-unknown'); + expect(rules).not.toContain('class-implements-missing-member'); + }); + + test('reports unknown class implements targets unless imported', () => { + const localRules = rulesFor('class name=User implements=MissingProtocol'); + expect(localRules).toContain('class-implements-unknown'); + + const importedRules = rulesFor( + ['import from="./protocols" names=ExternalProtocol', 'class name=User implements=ExternalProtocol'].join('\n'), + ); + expect(importedRules).not.toContain('class-implements-unknown'); + }); + + test('reports malformed class implements reference lists', () => { + const rules = rulesFor('class name=User implements="Known,"'); + + expect(rules).toContain('class-implements-invalid-reference-list'); + }); + + test('reports missing required readable instance members for class implements', () => { + const violations = violationsFor( + [ + 'interface name=RoleBearing', + ' field name=role type=string', + ' field name=status type=string optional=true', + 'class name=Account implements=RoleBearing', + ' field name=role type=string static=true', + ].join('\n'), + ); + + expect(violations).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + rule: 'class-implements-missing-member', + message: expect.stringContaining('role'), + }), + ]), + ); + }); + + test('does not satisfy interface fields with methods or mismatched field types', () => { + const rules = rulesFor( + [ + 'interface name=RoleBearing', + ' field name=role type=string', + 'class name=MethodRole implements=RoleBearing', + ' method name=role returns=string', + ' handler lang=kern', + ' return value="\'admin\'"', + 'class name=NumberRole implements=RoleBearing', + ' field name=role type=number', + ].join('\n'), + ); + + expect(rules.filter((rule) => rule === 'class-implements-missing-member')).toHaveLength(2); + }); + + test('reports invalid interface shapes before class implements conformance', () => { + const unknownBaseRules = rulesFor( + [ + 'interface name=Protocol extends=MissingProtocol', + ' field name=id type=string', + 'class name=User implements=Protocol', + ' field name=id type=string', + ].join('\n'), + ); + expect(unknownBaseRules).toContain('class-implements-invalid-interface'); + + const optionalityConflictRules = rulesFor( + [ + 'interface name=BaseProtocol', + ' field name=id type=string', + 'interface name=Protocol extends=BaseProtocol', + ' field name=id type=string optional=true', + 'class name=User implements=Protocol', + ' field name=id type=string', + ].join('\n'), + ); + expect(optionalityConflictRules).toContain('class-implements-invalid-interface'); + }); + + test('reports interface indexers as unsupported class implements protocols in v1', () => { + const rules = rulesFor( + [ + 'interface name=DictionaryProtocol', + ' indexer keyName=key keyType=string type=number', + 'class name=Dictionary implements=DictionaryProtocol', + ].join('\n'), + ); + + expect(rules).toContain('class-implements-unsupported-protocol'); + }); + test('reports unknown base class names', () => { const violations = violationsFor('class name=User extends=MissingBase'); diff --git a/packages/core/tests/semantic-substrate.test.ts b/packages/core/tests/semantic-substrate.test.ts index f7f0f290..03f68a86 100644 --- a/packages/core/tests/semantic-substrate.test.ts +++ b/packages/core/tests/semantic-substrate.test.ts @@ -293,6 +293,78 @@ describe('KERN semantic substrate', () => { expect(invalidSubstrate.classValidationSummary?.byRule['class-extends-unknown']).toBe(1); }); + test('exports class implements edges and protocol conformance facts', () => { + const facts = collectClassSemanticFacts( + parseRoot( + [ + 'import from="./protocols" names=ExternalProtocol', + 'interface name=Entity', + ' field name=id type=string', + 'interface name=Named extends=Entity', + ' field name=name type=string', + 'interface name=BrokenProtocol extends=MissingBaseProtocol', + ' field name=id type=string', + 'interface name=DictionaryProtocol', + ' indexer keyName=key keyType=string type=number', + 'class name=Base', + ' field name=id type=string', + 'class name=User extends=Base implements="Named,ExternalProtocol,MissingProtocol"', + ' getter name=name returns=string', + ' handler lang=kern', + ' return value="this.id"', + 'class name=Broken implements=Named', + ' field name=id type=string', + 'class name=Invalid implements=BrokenProtocol', + ' field name=id type=string', + 'class name=Dictionary implements=DictionaryProtocol', + ].join('\n'), + ), + ); + + expect(facts.implementsEdges).toEqual( + expect.arrayContaining([ + { from: 'User', to: 'Named', relation: 'implements', resolved: true, external: false }, + { from: 'User', to: 'ExternalProtocol', relation: 'implements', resolved: true, external: true }, + { from: 'User', to: 'MissingProtocol', relation: 'implements', resolved: false, external: false }, + ]), + ); + expect(facts.unresolvedImplements).toEqual(['MissingProtocol']); + expect(facts.protocolConformance).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + className: 'User', + interfaceName: 'Named', + status: 'satisfied', + satisfiedMembers: ['id', 'name'], + missingMembers: [], + }), + expect.objectContaining({ + className: 'User', + interfaceName: 'ExternalProtocol', + status: 'external', + }), + expect.objectContaining({ + className: 'Broken', + interfaceName: 'Named', + status: 'missing-members', + missingMembers: ['name'], + }), + expect.objectContaining({ + className: 'Invalid', + interfaceName: 'BrokenProtocol', + status: 'invalid-interface', + diagnostics: ['shape-extends-unknown'], + }), + expect.objectContaining({ + className: 'Dictionary', + interfaceName: 'DictionaryProtocol', + status: 'unsupported-protocol', + unsupportedReasons: ['indexer'], + }), + ]), + ); + }); + test('can summarize class validation rules alongside class facts', () => { const root = parseRoot( [ From 189122193de8b615937ea37fc95a5ffedb411b13 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 04:21:45 +0200 Subject: [PATCH 34/46] feat(core): add constructor discipline facts --- packages/core/src/index.ts | 2 + packages/core/src/semantic-validator.ts | 266 ++++++++++++++++++ .../core/tests/semantic-substrate.test.ts | 225 +++++++++++++++ 3 files changed, 493 insertions(+) diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 6b657d35..a83c3795 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -512,6 +512,8 @@ export { // Semantic validation export type { ClassSemanticClassFact, + ClassSemanticConstructorFact, + ClassSemanticConstructorSuperStatus, ClassSemanticFacts, ClassSemanticImplementsEdge, ClassSemanticInheritanceEdge, diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index 81f3ba6e..b9cef9f5 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -70,6 +70,30 @@ export interface ClassSemanticClassFact { readonly loc?: ClassSemanticLocation; } +export type ClassSemanticConstructorSuperStatus = + | 'not-required' + | 'satisfied' + | 'missing' + | 'conditional' + | 'double' + | 'this-before-super'; + +export interface ClassSemanticConstructorFact { + readonly className: string; + readonly hasConstructor: boolean; + readonly constructorCount: number; + readonly hasBase: boolean; + readonly requiresSuper: boolean; + readonly superStatus: ClassSemanticConstructorSuperStatus; + readonly superCallCount: number; + readonly thisBeforeSuper: boolean; + readonly declaredFields: readonly string[]; + readonly initializedFields: readonly string[]; + readonly uninitializedRequiredFields: readonly string[]; + readonly provenance: 'static-analysis'; + readonly loc?: ClassSemanticLocation; +} + export interface ClassSemanticInheritanceEdge { readonly from: string; readonly to: string; @@ -120,6 +144,7 @@ export interface ClassSemanticProtocolConformanceFact { export interface ClassSemanticFacts { readonly classes: readonly ClassSemanticClassFact[]; + readonly constructorFacts: readonly ClassSemanticConstructorFact[]; readonly inheritanceEdges: readonly ClassSemanticInheritanceEdge[]; readonly implementsEdges: readonly ClassSemanticImplementsEdge[]; readonly overrides: readonly ClassSemanticOverrideFact[]; @@ -2917,6 +2942,7 @@ export function collectClassSemanticFacts(root: IRNode | readonly IRNode[]): Cla return { classes: classes.map((info) => classSemanticFact(info, classByName)), + constructorFacts: classes.map((info) => classConstructorSemanticFact(info)), inheritanceEdges, implementsEdges, overrides: collectClassOverrideFacts(classes, classByName), @@ -2945,6 +2971,37 @@ function classSemanticFact(info: ClassInfo, classByName: ReadonlyMap + count + constructorBodyStatements(ctor).reduce((sum, statement) => sum + superCallCountInNode(statement), 0), + 0, + ); + const declaredFields = declaredInstanceFieldNames(info); + const declaredFieldSet = new Set(declaredFields); + const initializedFields = sortedUnique( + [...fieldInitializerNames(info), ...constructorThisAssignmentNames(info)].filter((name) => + declaredFieldSet.has(name), + ), + ); + return { + className: info.name, + hasConstructor: info.constructors.length > 0, + constructorCount: info.constructors.length, + hasBase: Boolean(info.baseName), + requiresSuper: Boolean(info.baseName) && info.constructors.length > 0, + superStatus: constructorSuperStatus(info, superDiagnostics), + superCallCount, + thisBeforeSuper: superDiagnostics.some((diagnostic) => diagnostic.rule === 'class-constructor-this-before-super'), + declaredFields, + initializedFields, + uninitializedRequiredFields: uninitializedRequiredFieldNames(info, initializedFields), + provenance: 'static-analysis', + ...(info.node.loc ? { loc: semanticLocation(info.node) } : {}), + }; +} + function classMemberSemanticFact( member: ClassMemberInfo, className = member.owner, @@ -2966,6 +3023,215 @@ function classMemberSemanticFact( }; } +function constructorSuperDiagnostics(info: ClassInfo): SemanticViolation[] { + const violations: SemanticViolation[] = []; + if (!info.baseName) return violations; + for (const ctor of info.constructors) validateDerivedConstructorDiscipline(info, ctor, violations); + return violations; +} + +function constructorSuperStatus( + info: ClassInfo, + diagnostics: readonly SemanticViolation[], +): ClassSemanticConstructorSuperStatus { + if (!info.baseName || info.constructors.length === 0) return 'not-required'; + const rules = new Set(diagnostics.map((diagnostic) => diagnostic.rule)); + if (rules.has('class-constructor-this-before-super')) return 'this-before-super'; + if (rules.has('class-constructor-double-super')) return 'double'; + if (rules.has('class-constructor-conditional-super')) return 'conditional'; + if (rules.has('class-constructor-missing-super')) return 'missing'; + return 'satisfied'; +} + +function declaredInstanceFieldNames(info: ClassInfo): string[] { + return sortedUnique( + info.members.filter((member) => member.kind === 'field' && !member.static).map((member) => member.name), + ); +} + +function requiredInstanceFieldNames(info: ClassInfo): string[] { + return sortedUnique( + info.members + .filter((member) => member.kind === 'field' && !member.static && !isTrueFlag(member.node.props?.optional)) + .map((member) => member.name), + ); +} + +function fieldInitializerNames(info: ClassInfo): string[] { + return sortedUnique( + info.members + .filter( + (member) => + member.kind === 'field' && + !member.static && + (Object.hasOwn(member.node.props ?? {}, 'value') || Object.hasOwn(member.node.props ?? {}, 'default')), + ) + .map((member) => member.name), + ); +} + +function constructorThisAssignmentNames(info: ClassInfo): string[] { + if (info.constructors.length === 0) return []; + const constructorAssignments: string[][] = []; + for (const ctor of info.constructors) { + constructorAssignments.push([...definiteThisAssignmentsInStatements(constructorBodyStatements(ctor))]); + } + const [first = [], ...rest] = constructorAssignments; + return sortedUnique([...rest.reduce((common, names) => setIntersection(common, new Set(names)), new Set(first))]); +} + +interface ThisAssignmentPathStates { + readonly continuing: Set[]; + readonly exited: Set[]; +} + +function definiteThisAssignmentsInStatements(statements: readonly IRNode[], initial = new Set()): Set { + const states = thisAssignmentPathStatesInStatements(statements, [new Set(initial)]); + const [first = new Set(), ...rest] = states.exited.concat(states.continuing); + return rest.reduce((common, names) => setIntersection(common, names), new Set(first)); +} + +function thisAssignmentPathStatesInStatements( + statements: readonly IRNode[], + initialStates: readonly ReadonlySet[], +): ThisAssignmentPathStates { + let continuing = initialStates.map((state) => new Set(state)); + const exited: Set[] = []; + for (let index = 0; index < statements.length; index += 1) { + const statement = statements[index]; + if (statement.type === 'else') continue; + const nextContinuing: Set[] = []; + if (statement.type === 'if') { + const maybeElse = statements[index + 1]?.type === 'else' ? statements[index + 1] : undefined; + for (const state of continuing) { + const thenStates = thisAssignmentPathStatesInStatements(statement.children ?? [], [state]); + const elseStates = maybeElse + ? thisAssignmentPathStatesInStatements(maybeElse.children ?? [], [state]) + : { continuing: [new Set(state)], exited: [] }; + nextContinuing.push(...thenStates.continuing, ...elseStates.continuing); + exited.push(...thenStates.exited, ...elseStates.exited); + } + continuing = nextContinuing; + if (maybeElse) index += 1; + continue; + } + if (statement.type === 'try') { + const tryStates = thisAssignmentTryPathStates(statement, continuing); + continuing = tryStates.continuing; + exited.push(...tryStates.exited); + continue; + } + if (statement.type === 'return') { + exited.push(...continuing.map((state) => new Set(state))); + continuing = []; + continue; + } + if (statement.type === 'throw' || statement.type === 'break' || statement.type === 'continue') { + continuing = []; + continue; + } + if (statement.type === 'while' || statement.type === 'for' || statement.type === 'each') { + continue; + } + for (const state of continuing) { + const next = new Set(state); + const directName = + statement.type === 'assign' && isSimpleAssignment(statement) + ? thisMemberName(expressionPropText(statement.props?.target)) + : undefined; + if (directName) next.add(directName); + nextContinuing.push(next); + } + continuing = nextContinuing; + } + return { continuing, exited }; +} + +function thisAssignmentTryPathStates( + statement: IRNode, + initialStates: readonly ReadonlySet[], +): ThisAssignmentPathStates { + const children = statement.children ?? []; + const catchNode = children.find((child) => child.type === 'catch'); + const finallyNode = children.find((child) => child.type === 'finally'); + const tryChildren = children.filter((child) => child.type !== 'catch' && child.type !== 'finally'); + const tryStates = thisAssignmentPathStatesInStatements(tryChildren, initialStates); + const catchStates = catchNode + ? thisAssignmentPathStatesInStatements(catchNode.children ?? [], initialStates) + : { continuing: [], exited: [] }; + const continuing = [...tryStates.continuing, ...catchStates.continuing]; + const exited = [...tryStates.exited, ...catchStates.exited]; + if (!finallyNode) return { continuing, exited }; + + const continuingAfterFinally = thisAssignmentPathStatesInStatements(finallyNode.children ?? [], continuing); + const exitingAfterFinally = thisAssignmentPathStatesInStatements(finallyNode.children ?? [], exited); + return { + continuing: continuingAfterFinally.continuing, + exited: [...continuingAfterFinally.exited, ...exitingAfterFinally.continuing, ...exitingAfterFinally.exited], + }; +} + +function isSimpleAssignment(statement: IRNode): boolean { + const op = statement.props?.op; + return op === undefined || op === null || op === '' || op === '='; +} + +function setIntersection(left: ReadonlySet, right: ReadonlySet): Set { + const out = new Set(); + for (const value of left) { + if (right.has(value)) out.add(value); + } + return out; +} + +function uninitializedRequiredFieldNames(info: ClassInfo, initializedFields: readonly string[]): string[] { + const initialized = new Set(initializedFields); + return requiredInstanceFieldNames(info).filter((name) => !initialized.has(name)); +} + +function thisMemberName(text: string | undefined): string | undefined { + if (!text) return undefined; + try { + const value = parseExpression(text); + if (value.kind === 'member' && value.object.kind === 'ident' && value.object.name === 'this') { + return value.property; + } + if ( + value.kind === 'index' && + value.object.kind === 'ident' && + value.object.name === 'this' && + value.index.kind === 'strLit' + ) { + return value.index.value; + } + return undefined; + } catch { + return undefined; + } +} + +function superCallCountInNode(node: IRNode): number { + let count = 0; + walkSemanticTreeUntil(node, (candidate) => { + if (candidate !== node && candidate.type === 'class') return 'stop'; + for (const prop of BODY_EXPRESSION_PROPS) { + const text = expressionPropText(candidate.props?.[prop]); + if (!text) continue; + try { + count += valueIRSuperConstructorCallCount(parseExpression(text)); + } catch {} + } + return 'continue'; + }); + return count; +} + +function valueIRSuperConstructorCallCount(value: ValueIR): number { + if (value.kind === 'lambda') return 0; + const own = value.kind === 'call' && value.callee.kind === 'ident' && value.callee.name === 'super' ? 1 : 0; + return own + valueIRChildren(value).reduce((count, child) => count + valueIRSuperConstructorCallCount(child), 0); +} + function effectiveClassMemberFacts( info: ClassInfo, classByName: ReadonlyMap, diff --git a/packages/core/tests/semantic-substrate.test.ts b/packages/core/tests/semantic-substrate.test.ts index 03f68a86..fc8a1649 100644 --- a/packages/core/tests/semantic-substrate.test.ts +++ b/packages/core/tests/semantic-substrate.test.ts @@ -230,6 +230,231 @@ describe('KERN semantic substrate', () => { ); }); + test('exports constructor discipline and field initialization facts', () => { + const facts = collectClassSemanticFacts( + parseRoot( + [ + 'class name=Base', + ' field name=id type=string', + 'class name=Good extends=Base', + ' field name=name type=string', + ' constructor', + ' param name=id type=string', + ' param name=name type=string', + ' handler lang=kern', + ' do value="super(id)"', + ' assign target="this.name" value="name"', + 'class name=MissingSuper extends=Base', + ' constructor', + ' handler lang=kern', + ' do value=1', + 'class name=DoubleSuper extends=Base', + ' constructor', + ' handler lang=kern', + ' do value="super()"', + ' do value="super()"', + 'class name=ConditionalSuper extends=Base', + ' constructor', + ' param name=ready type=boolean', + ' handler lang=kern', + ' if cond=ready', + ' do value="super()"', + 'class name=ThisBeforeSuper extends=Base', + ' constructor', + ' handler lang=kern', + ' assign target="this.name" value="\'Ada\'"', + ' do value="super()"', + 'class name=DelayedSuper extends=Base', + ' constructor', + ' handler lang=kern', + ' do value="(() => super())"', + 'class name=BranchInit', + ' field name=name type=string', + ' constructor', + ' param name=ready type=boolean', + ' handler lang=kern', + ' if cond=ready', + ' assign target="this.name" value="\'Ada\'"', + 'class name=CompleteBranchInit', + ' field name=name type=string', + ' constructor', + ' param name=ready type=boolean', + ' handler lang=kern', + ' if cond=ready', + ' assign target="this.name" value="\'Ada\'"', + ' else', + ' assign target="this.name" value="\'Grace\'"', + 'class name=CompoundInit', + ' field name=count type=number', + ' constructor', + ' handler lang=kern', + ' assign target="this.count" op="+=" value=1', + 'class name=IndexInit', + ' field name=name type=string', + ' constructor', + ' handler lang=kern', + ' assign target="this[\'name\']" value="\'Ada\'"', + 'class name=UndeclaredInit', + ' field name=name type=string', + ' constructor', + ' handler lang=kern', + ' assign target="this.extra" value="\'ignored\'"', + ' assign target="this.name" value="\'Ada\'"', + 'class name=EarlyReturnInit', + ' field name=name type=string', + ' constructor', + ' param name=ready type=boolean', + ' handler lang=kern', + ' if cond=ready', + ' return value=undefined', + ' assign target="this.name" value="\'Ada\'"', + 'class name=TryCatchInit', + ' field name=name type=string', + ' constructor', + ' handler lang=kern', + ' try', + ' assign target="this.name" value="\'Ada\'"', + ' catch name=err', + ' assign target="this.name" value="\'Grace\'"', + 'class name=FinallyReturnInit', + ' field name=name type=string', + ' constructor', + ' handler lang=kern', + ' try', + ' return value=undefined', + ' finally', + ' assign target="this.name" value="\'Ada\'"', + 'class name=LoopInit', + ' field name=name type=string', + ' constructor', + ' param name=ready type=boolean', + ' handler lang=kern', + ' while cond=ready', + ' assign target="this.name" value="\'Ada\'"', + 'class name=ThrowBranchInit', + ' field name=name type=string', + ' constructor', + ' param name=ready type=boolean', + ' handler lang=kern', + ' if cond=ready', + ' throw value="new Error(\'stop\')"', + ' assign target="this.name" value="\'Ada\'"', + 'class name=Defaults', + ' field name=ready type=boolean value=true', + ' field name=optionalName type=string optional=true', + ' field name=missing type=string', + ].join('\n'), + ), + ); + + expect(facts.constructorFacts).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + className: 'Base', + requiresSuper: false, + superStatus: 'not-required', + declaredFields: ['id'], + initializedFields: [], + uninitializedRequiredFields: ['id'], + provenance: 'static-analysis', + }), + expect.objectContaining({ + className: 'Good', + requiresSuper: true, + superStatus: 'satisfied', + superCallCount: 1, + thisBeforeSuper: false, + declaredFields: ['name'], + initializedFields: ['name'], + uninitializedRequiredFields: [], + }), + expect.objectContaining({ + className: 'MissingSuper', + superStatus: 'missing', + superCallCount: 0, + }), + expect.objectContaining({ + className: 'DoubleSuper', + superStatus: 'double', + superCallCount: 2, + }), + expect.objectContaining({ + className: 'ConditionalSuper', + superStatus: 'conditional', + superCallCount: 1, + }), + expect.objectContaining({ + className: 'ThisBeforeSuper', + superStatus: 'this-before-super', + thisBeforeSuper: true, + }), + expect.objectContaining({ + className: 'DelayedSuper', + superStatus: 'missing', + superCallCount: 0, + }), + expect.objectContaining({ + className: 'BranchInit', + initializedFields: [], + uninitializedRequiredFields: ['name'], + }), + expect.objectContaining({ + className: 'CompleteBranchInit', + initializedFields: ['name'], + uninitializedRequiredFields: [], + }), + expect.objectContaining({ + className: 'CompoundInit', + initializedFields: [], + uninitializedRequiredFields: ['count'], + }), + expect.objectContaining({ + className: 'IndexInit', + initializedFields: ['name'], + uninitializedRequiredFields: [], + }), + expect.objectContaining({ + className: 'UndeclaredInit', + initializedFields: ['name'], + uninitializedRequiredFields: [], + }), + expect.objectContaining({ + className: 'EarlyReturnInit', + initializedFields: [], + uninitializedRequiredFields: ['name'], + }), + expect.objectContaining({ + className: 'TryCatchInit', + initializedFields: ['name'], + uninitializedRequiredFields: [], + }), + expect.objectContaining({ + className: 'FinallyReturnInit', + initializedFields: ['name'], + uninitializedRequiredFields: [], + }), + expect.objectContaining({ + className: 'LoopInit', + initializedFields: [], + uninitializedRequiredFields: ['name'], + }), + expect.objectContaining({ + className: 'ThrowBranchInit', + initializedFields: ['name'], + uninitializedRequiredFields: [], + }), + expect.objectContaining({ + className: 'Defaults', + requiresSuper: false, + superStatus: 'not-required', + declaredFields: ['missing', 'optionalName', 'ready'], + initializedFields: ['ready'], + uninitializedRequiredFields: ['missing'], + }), + ]), + ); + }); + test('reports unresolved bases and inheritance cycles as class facts', () => { const facts = collectClassSemanticFacts( parseRoot( From fb1febf0477eb575224ba1511884d23ded06fdce Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 05:14:34 +0200 Subject: [PATCH 35/46] feat(core): enforce class implements at runtime --- packages/core/src/core-runtime/index.ts | 174 ++++++++++++++- packages/core/tests/core-runtime.test.ts | 273 +++++++++++++++++++++++ 2 files changed, 446 insertions(+), 1 deletion(-) diff --git a/packages/core/src/core-runtime/index.ts b/packages/core/src/core-runtime/index.ts index 0f68395f..fc6b7863 100644 --- a/packages/core/src/core-runtime/index.ts +++ b/packages/core/src/core-runtime/index.ts @@ -13,6 +13,7 @@ import { coreFixtureValueToKernValue, kernValueToCoreFixtureValue, } from './contract-adapter.js'; +import { collectCoreShapeFacts, validateCoreShape } from './shape-validator.js'; import { brandValue, KERN_VALUE_BRAND } from './value-brand.js'; const INTEGER_INDEX_RE = /^(0|[1-9]\d*)$/; @@ -55,6 +56,7 @@ export interface KernClassValue { node: IRNode; env: CoreRuntimeEnv; staticFields: Record; + runtimeRootContext?: IRNode | readonly IRNode[]; } export interface KernInstanceValue { @@ -104,6 +106,7 @@ export interface CreateCoreRuntimeEnvOptions { export class CoreRuntimeEnv { private readonly bindings = new Map(); + private runtimeRootContext?: IRNode | readonly IRNode[]; constructor(readonly parent?: CoreRuntimeEnv) {} @@ -135,6 +138,14 @@ export class CoreRuntimeEnv { child(): CoreRuntimeEnv { return new CoreRuntimeEnv(this); } + + setRuntimeRootContext(root: IRNode | readonly IRNode[]): void { + this.runtimeRootContext = root; + } + + getRuntimeRootContext(): IRNode | readonly IRNode[] | undefined { + return this.runtimeRootContext ?? this.parent?.getRuntimeRootContext(); + } } export const kNull = (): KernValue => brandValue({ kind: 'null' }); @@ -247,6 +258,7 @@ export function runCoreRuntime( nodeOrNodes: IRNode | readonly IRNode[], env = createCoreRuntimeEnv(), ): CoreRuntimeResult { + env.setRuntimeRootContext(nodeOrNodes); const nodes: readonly IRNode[] = isIRNodeArray(nodeOrNodes) ? nodeOrNodes : runtimeChildren(nodeOrNodes); return { completion: executeSequence(nodes, env), env }; } @@ -279,6 +291,10 @@ function executeSequence(nodes: readonly IRNode[], env: CoreRuntimeEnv): CoreCom function executeNode(node: IRNode, env: CoreRuntimeEnv): CoreCompletion { switch (node.type) { + case 'interface': + case 'import': + case 'use': + return { kind: 'normal', value: kUndefined() }; case 'handler': case '__block': return executeSequence(node.children ?? [], env); @@ -723,6 +739,7 @@ function makeClass(node: IRNode, env: CoreRuntimeEnv): KernClassValue { node, env, staticFields: createRecordEntries(), + ...(env.getRuntimeRootContext() ? { runtimeRootContext: env.getRuntimeRootContext() } : {}), }); } @@ -746,9 +763,102 @@ function constructClassValue(klass: KernClassValue, args: readonly KernValue[]): initializedClasses: new Set(), }); initializeClassLayer(instance, klass, args, true); + validateImplementedClassProtocols(instance, klass); return instance; } +function validateImplementedClassProtocols(instance: KernInstanceValue, klass: KernClassValue): void { + const factsByRoot = new Map>(); + for (const layer of classHierarchyFromBase(klass)) { + const root = layer.runtimeRootContext ?? layer.env.getRuntimeRootContext(); + if (!root) continue; + const facts = factsByRoot.get(root) ?? collectCoreShapeFacts(root); + factsByRoot.set(root, facts); + const shapeByName = new Map(facts.interfaces.map((shape) => [shape.name, shape])); + const importedProtocolNames = runtimeImportedProtocolNames(root); + for (const interfaceName of runtimeClassReferenceNames(layer.node.props?.implements)) { + const shape = shapeByName.get(interfaceName); + if (!shape) { + if (importedProtocolNames.has(interfaceName)) continue; + throw new Error(`KERN core runtime class '${klass.name}' implements unknown interface '${interfaceName}'.`); + } + if (!shape.validatorAvailable || shape.indexers.length > 0) { + throw new Error( + `KERN core runtime class '${klass.name}' implements interface '${interfaceName}' that is not executable as a class protocol in v1.`, + ); + } + const projection = classProtocolProjection( + instance, + shape.fields.map((field) => field.name), + ); + const result = validateCoreShape(projection, interfaceName, root); + if (result.passed) continue; + throw new Error( + `KERN core runtime class '${klass.name}' violates implemented interface '${interfaceName}':\n${result.diagnostics + .map((diagnostic) => diagnostic.message) + .join('\n')}`, + ); + } + } +} + +function classProtocolProjection(instance: KernInstanceValue, fieldNames: readonly string[]): KernValue { + const entries = createRecordEntries(); + for (const fieldName of fieldNames) { + if (Object.hasOwn(instance.fields, fieldName)) { + entries[fieldName] = instance.fields[fieldName] ?? kUndefined(); + continue; + } + const member = findReadableClassShapeMember(instance.classValue, fieldName, false); + if (member?.kind !== 'getter') continue; + entries[fieldName] = evalInstanceMember(instance, fieldName); + } + return brandValue({ kind: 'record', entries }); +} + +function classHierarchyFromBase(klass: KernClassValue): KernClassValue[] { + const base = resolveBaseClass(klass); + return base ? [...classHierarchyFromBase(base), klass] : [klass]; +} + +function runtimeImportedProtocolNames(rootOrNodes: IRNode | readonly IRNode[]): Set { + const names = new Set(); + const visit = (node: IRNode): void => { + if (node.type === 'import') { + for (const name of runtimeImportLocalNames(node)) names.add(name); + } + if (node.type === 'use') { + for (const child of node.children ?? []) { + if (child.type !== 'from') continue; + const kind = runtimeStringProp(child.props?.kind); + if (kind && kind !== 'interface' && kind !== 'type') continue; + const localName = runtimeStringProp(child.props?.as) ?? runtimeStringProp(child.props?.name); + if (localName) names.add(localName); + } + } + for (const child of node.children ?? []) visit(child); + }; + for (const node of isIRNodeArray(rootOrNodes) ? rootOrNodes : [rootOrNodes]) visit(node); + return names; +} + +function runtimeImportLocalNames(node: IRNode): string[] { + const names: string[] = []; + const props = node.props ?? {}; + const defaultName = runtimeStringProp(props.default); + if (defaultName && defaultName !== 'true') names.push(defaultName); + const rawNames = runtimeStringProp(props.names); + if (rawNames) { + for (const raw of rawNames.split(',')) { + const name = raw.trim(); + const aliasMatch = /^([A-Za-z_$][\w$]*)(?:\s+as\s+([A-Za-z_$][\w$]*))?$/u.exec(name); + if (aliasMatch) names.push(aliasMatch[2] ?? aliasMatch[1]); + else if (/^[A-Za-z_$][\w$]*$/u.test(name)) names.push(name); + } + } + return names; +} + function initializeClassLayer( instance: KernInstanceValue, klass: KernClassValue, @@ -1217,6 +1327,68 @@ function classBaseName(value: unknown): string | undefined { return match?.[1]; } +function runtimeStringProp(value: unknown): string | undefined { + return typeof value === 'string' && value.length > 0 ? value : undefined; +} + +function runtimeClassReferenceNames(value: unknown): string[] { + if (typeof value !== 'string' || !value.trim()) return []; + const parts = splitRuntimeClassReferenceList(value); + const names = new Set(); + for (const part of parts) { + const name = runtimeClassReferenceName(part); + if (!name) throw new Error(`implements= contains an invalid reference: ${part}.`); + names.add(name); + } + return [...names]; +} + +function runtimeClassReferenceName(value: string): string | undefined { + const trimmed = value.trim(); + const match = /^([A-Za-z_$][\w$]*)(?:\s*<[\s\S]*>)?$/u.exec(trimmed); + return match?.[1]; +} + +function splitRuntimeClassReferenceList(raw: string): string[] { + const out: string[] = []; + let current = ''; + let depth = 0; + let angleDepth = 0; + let quote: '"' | "'" | '`' | null = null; + for (let index = 0; index < raw.length; index += 1) { + const ch = raw[index]; + if (quote !== null) { + current += ch; + if (ch === '\\' && index + 1 < raw.length) current += raw[++index]; + else if (ch === quote) quote = null; + continue; + } + if (ch === '"' || ch === "'" || ch === '`') { + quote = ch; + current += ch; + continue; + } + if (ch === '(' || ch === '[' || ch === '{') depth += 1; + else if (ch === ')' || ch === ']' || ch === '}') depth -= 1; + else if (ch === '<') angleDepth += 1; + else if (ch === '>' && angleDepth > 0) angleDepth -= 1; + if (depth < 0 || angleDepth < 0) throw new Error('implements= has unbalanced delimiters.'); + if (ch === ',' && depth === 0 && angleDepth === 0) { + const part = current.trim(); + if (part.length === 0) throw new Error('implements= contains an empty reference.'); + out.push(part); + current = ''; + continue; + } + current += ch; + } + if (quote !== null || depth !== 0 || angleDepth !== 0) throw new Error('implements= has unbalanced delimiters.'); + const tail = current.trim(); + if (tail.length === 0 && raw.trim().endsWith(',')) throw new Error('implements= contains an empty reference.'); + if (tail.length > 0) out.push(tail); + return out; +} + function classThisEnv(klass: KernClassValue, receiver: KernInstanceValue): CoreRuntimeEnv { const env = klass.env.child(); env.define('this', receiver); @@ -1510,7 +1682,7 @@ function isKernValueShape(value: unknown, seen: WeakSet): value is KernV ); case 'class': return ( - hasOnlyKeys(value, ['kind', 'name', 'node', 'env', 'staticFields']) && + hasOnlyKeys(value, ['kind', 'name', 'node', 'env', 'staticFields'], ['runtimeRootContext']) && typeof value.name === 'string' && isPlainRecord(value.node) && value.env instanceof CoreRuntimeEnv && diff --git a/packages/core/tests/core-runtime.test.ts b/packages/core/tests/core-runtime.test.ts index 8c9cbd17..10d7e6bd 100644 --- a/packages/core/tests/core-runtime.test.ts +++ b/packages/core/tests/core-runtime.test.ts @@ -359,6 +359,279 @@ describe('KERN core runtime statements', () => { expect(toHostValue(evalCoreExpression('make()', env))).toBe(6); }); + test('enforces implemented interface fields after class construction', () => { + const root = parse( + [ + 'interface name=Named', + ' field name=id type=string', + 'class name=User implements=Named', + ' field name=id type=string value="unset"', + ' field name=name type=string value="Ada"', + ' constructor', + ' param name=id type=string', + ' handler', + ' assign target="this.id" value="id"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new User("u1").id', env))).toBe('u1'); + expect(toHostValue(evalCoreExpression('new User("u1").name', env))).toBe('Ada'); + }); + + test('rejects constructed classes that miss implemented interface fields', () => { + const root = parse( + [ + 'interface name=Named', + ' field name=name type=string', + 'class name=User implements=Named', + ' constructor', + ' handler', + ' do value="1"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new User()', env)).toThrow("class 'User' violates implemented interface 'Named'"); + expect(() => evalCoreExpression('new User()', env)).toThrow('missing required field Named.name'); + }); + + test('rejects constructed classes with wrong implemented interface field types', () => { + const root = parse( + [ + 'interface name=Named', + ' field name=id type=string', + 'class name=User implements=Named', + ' field name=id type=number value={{ 1 }}', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new User()', env)).toThrow('expected Named.id to be string, got number'); + }); + + test('enforces inherited interface fields for implemented protocols', () => { + const root = parse( + [ + 'interface name=Entity', + ' field name=id type=string', + 'interface name=Named extends=Entity', + ' field name=name type=string', + 'class name=User implements=Named', + ' field name=id type=string value="u1"', + ' field name=name type=string value="Ada"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new User().id', env))).toBe('u1'); + expect(toHostValue(evalCoreExpression('new User().name', env))).toBe('Ada'); + }); + + test('rejects classes missing inherited implemented interface fields', () => { + const root = parse( + [ + 'interface name=Entity', + ' field name=id type=string', + 'interface name=Named extends=Entity', + ' field name=name type=string', + 'class name=User implements=Named', + ' field name=name type=string value="Ada"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new User()', env)).toThrow('missing required field Named.id'); + }); + + test('validates getter-backed implemented interface fields', () => { + const root = parse( + [ + 'interface name=Named', + ' field name=name type=string', + 'class name=User implements=Named', + ' field name=first type=string value="Ada"', + ' getter name=name returns=string', + ' handler', + ' return value="this.first"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new User().name', env))).toBe('Ada'); + }); + + test('enforces base class implemented protocols on derived instances', () => { + const root = parse( + [ + 'interface name=EntityLike', + ' field name=id type=string', + 'class name=Entity implements=EntityLike', + ' field name=id type=string value="base"', + 'class name=User extends=Entity', + ' field name=name type=string value="Ada"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new User().id', env))).toBe('base'); + expect(toHostValue(evalCoreExpression('new User().name', env))).toBe('Ada'); + }); + + test('rejects derived instances when a base implemented protocol is unsatisfied', () => { + const root = parse( + [ + 'interface name=EntityLike', + ' field name=id type=string', + 'class name=Entity implements=EntityLike', + 'class name=User extends=Entity', + ' field name=name type=string value="Ada"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new User()', env)).toThrow( + "class 'User' violates implemented interface 'EntityLike'", + ); + }); + + test('class implements validation uses the declaration root context', () => { + const firstRoot = parse( + [ + 'interface name=Named', + ' field name=id type=string', + 'class name=User implements=Named', + ' field name=id type=string value="u1"', + ].join('\n'), + ); + const secondRoot = parse(['interface name=Named', ' field name=id type=number'].join('\n')); + const env = createCoreRuntimeEnv(); + runCoreRuntime(firstRoot, env); + runCoreRuntime(secondRoot, env); + + expect(toHostValue(evalCoreExpression('new User().id', env))).toBe('u1'); + }); + + test('base implemented protocols use the base declaration root context', () => { + const firstRoot = parse( + [ + 'interface name=EntityLike', + ' field name=id type=string', + 'class name=Entity implements=EntityLike', + ' field name=id type=string value="base"', + ].join('\n'), + ); + const secondRoot = parse( + [ + 'interface name=EntityLike', + ' field name=id type=number', + 'class name=User extends=Entity', + ' field name=name type=string value="Ada"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(firstRoot, env); + runCoreRuntime(secondRoot, env); + + expect(toHostValue(evalCoreExpression('new User().id', env))).toBe('base'); + }); + + test('runtime class protocols reject unsupported indexer interfaces', () => { + const root = parse( + [ + 'interface name=Dictionary', + ' indexer keyType=string type=string', + 'class name=User implements=Dictionary', + ' field name=id type=string value="u1"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new User()', env)).toThrow( + "implements interface 'Dictionary' that is not executable as a class protocol in v1", + ); + }); + + test('malformed runtime implements lists fail instead of skipping validation', () => { + const root = parse( + [ + 'interface name=Named', + ' field name=id type=string', + 'class name=User implements="Named,"', + ' field name=id type=string value="u1"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new User()', env)).toThrow('implements= contains an empty reference'); + }); + + test('invalid runtime implements entries fail instead of being ignored', () => { + const root = parse( + [ + 'interface name=Named', + ' field name=id type=string', + 'class name=User implements="123"', + ' field name=id type=string value="u1"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new User()', env)).toThrow('implements= contains an invalid reference: 123'); + }); + + test('runtime implements entries reject trailing junk', () => { + const root = parse( + [ + 'interface name=Named', + ' field name=id type=string', + 'class name=User implements="Named junk"', + ' field name=id type=string value="u1"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new User()', env)).toThrow( + 'implements= contains an invalid reference: Named junk', + ); + }); + + test('unknown local runtime implements targets fail instead of being ignored', () => { + const root = parse(['class name=User implements=MissingProtocol'].join('\n')); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new User()', env)).toThrow( + "class 'User' implements unknown interface 'MissingProtocol'", + ); + }); + + test('imported runtime implements targets are treated as external protocols', () => { + const root = parse( + [ + 'import from="./protocols" names=ExternalProtocol', + 'class name=User implements=ExternalProtocol', + ' field name=id type=string value="u1"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new User().id', env))).toBe('u1'); + }); + test('executes inherited fields getters methods and overrides', () => { const root = parse( [ From 4d85443411da95b6c117ffd0249ed836a13f5f72 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 06:47:19 +0200 Subject: [PATCH 36/46] feat(core): enforce interface method protocols --- packages/core/src/codegen/type-system.ts | 25 ++ packages/core/src/core-runtime/index.ts | 233 ++++++++++++++- packages/core/src/schema.ts | 3 +- packages/core/src/semantic-validator.ts | 281 +++++++++++++++++- packages/core/tests/class-semantics.test.ts | 216 ++++++++++++++ packages/core/tests/codegen-core.test.ts | 33 ++ packages/core/tests/core-runtime.test.ts | 269 +++++++++++++++++ packages/core/tests/schema-validation.test.ts | 9 +- .../core/tests/semantic-substrate.test.ts | 8 +- 9 files changed, 1061 insertions(+), 16 deletions(-) diff --git a/packages/core/src/codegen/type-system.ts b/packages/core/src/codegen/type-system.ts index 8d812e9f..318e4f64 100644 --- a/packages/core/src/codegen/type-system.ts +++ b/packages/core/src/codegen/type-system.ts @@ -92,6 +92,14 @@ export function generateInterface(node: IRNode): string[] { const opt = fp.optional === 'true' || fp.optional === true ? '?' : ''; lines.push(` ${fieldName}${opt}: ${emitTypeAnnotation(fp.type, 'unknown', field)};`); } + for (const method of kids(node, 'method')) { + const mp = propsOf<'method'>(method); + const methodName = emitIdentifier(mp.name, 'method', method); + const generics = mp.generics ? emitTypeAnnotation(mp.generics, '', method) : ''; + const params = emitParamList(method, { stripDefaults: true }); + const returns = interfaceMethodReturnType(method, mp); + lines.push(` ${methodName}${generics}(${params}): ${returns};`); + } for (const idx of kids(node, 'indexer')) { const ip = propsOf<'indexer'>(idx); // `||` (not `??`) so an empty-string keyName also falls back to 'key'. @@ -105,6 +113,23 @@ export function generateInterface(node: IRNode): string[] { return lines; } +function interfaceMethodReturnType( + node: IRNode, + props: { returns?: string; async?: unknown; stream?: unknown; generator?: unknown }, +): string { + const isAsync = props.async === 'true' || props.async === true; + const isStream = props.stream === 'true' || props.stream === true; + const isGenerator = props.generator === 'true' || props.generator === true; + const returns = props.returns ? emitTypeAnnotation(props.returns, 'unknown', node) : ''; + const generatorPrefix = isAsync ? 'AsyncGenerator<' : 'Generator<'; + if (isStream) return returns.startsWith('AsyncGenerator<') ? returns : `AsyncGenerator<${returns || 'unknown'}>`; + if (isGenerator) { + if (returns.startsWith('Generator<') || returns.startsWith('AsyncGenerator<')) return returns; + return `${generatorPrefix}${returns || 'unknown'}>`; + } + return returns || 'void'; +} + // ── Discriminated Union ────────────────────────────────────────────────── export function generateUnion(node: IRNode): string[] { diff --git a/packages/core/src/core-runtime/index.ts b/packages/core/src/core-runtime/index.ts index fc6b7863..44d921cd 100644 --- a/packages/core/src/core-runtime/index.ts +++ b/packages/core/src/core-runtime/index.ts @@ -792,12 +792,21 @@ function validateImplementedClassProtocols(instance: KernInstanceValue, klass: K shape.fields.map((field) => field.name), ); const result = validateCoreShape(projection, interfaceName, root); - if (result.passed) continue; - throw new Error( - `KERN core runtime class '${klass.name}' violates implemented interface '${interfaceName}':\n${result.diagnostics - .map((diagnostic) => diagnostic.message) - .join('\n')}`, - ); + if (!result.passed) { + throw new Error( + `KERN core runtime class '${klass.name}' violates implemented interface '${interfaceName}':\n${result.diagnostics + .map((diagnostic) => diagnostic.message) + .join('\n')}`, + ); + } + const missingMethods = runtimeInterfaceProtocolMethods(root, interfaceName) + .filter((method) => !classHasRuntimeProtocolMethod(instance.classValue, method)) + .map((method) => method.name); + if (missingMethods.length > 0) { + throw new Error( + `KERN core runtime class '${klass.name}' violates implemented interface '${interfaceName}': missing or incompatible method(s): ${missingMethods.join(', ')}.`, + ); + } } } } @@ -816,6 +825,147 @@ function classProtocolProjection(instance: KernInstanceValue, fieldNames: readon return brandValue({ kind: 'record', entries }); } +interface RuntimeInterfaceProtocolMethod { + readonly name: string; + readonly arity: number; + readonly paramTypes: readonly string[]; + readonly async: boolean; + readonly stream: boolean; + readonly generator: boolean; + readonly returns?: string; +} + +function runtimeInterfaceProtocolMethods( + rootOrNodes: IRNode | readonly IRNode[], + interfaceName: string, +): RuntimeInterfaceProtocolMethod[] { + const interfaceByName = new Map(); + const visit = (node: IRNode): void => { + if (node.type === 'interface') { + const name = runtimeStringProp(node.props?.name); + if (name && !interfaceByName.has(name)) interfaceByName.set(name, node); + } + for (const child of node.children ?? []) visit(child); + }; + for (const node of isIRNodeArray(rootOrNodes) ? rootOrNodes : [rootOrNodes]) visit(node); + + const resolve = (name: string, seen: ReadonlySet): RuntimeInterfaceProtocolMethod[] => { + if (seen.has(name)) return []; + const node = interfaceByName.get(name); + if (!node) return []; + const nextSeen = new Set(seen); + nextSeen.add(name); + const methods = new Map(); + for (const baseName of runtimeClassReferenceNames(node.props?.extends)) { + for (const method of resolve(baseName, nextSeen)) methods.set(method.name, method); + } + for (const child of node.children ?? []) { + if (child.type !== 'method') continue; + const name = runtimeStringProp(child.props?.name); + if (!name) continue; + methods.set(name, { + name, + arity: runtimeParams(child).length, + paramTypes: runtimeParams(child).map((param) => param.type ?? ''), + async: runtimeBooleanProp(child.props?.async), + stream: runtimeBooleanProp(child.props?.stream), + generator: runtimeBooleanProp(child.props?.generator), + ...(runtimeStringProp(child.props?.returns) ? { returns: runtimeStringProp(child.props?.returns) } : {}), + }); + } + return [...methods.values()]; + }; + + return resolve(interfaceName, new Set()); +} + +function classHasRuntimeProtocolMethod(klass: KernClassValue, method: RuntimeInterfaceProtocolMethod): boolean { + const member = findReadableClassShapeMember(klass, method.name, false); + if (member?.kind !== 'method') return false; + if (runtimeBooleanProp(member.node.props?.private)) return false; + const params = runtimeParams(member.node); + if (params.length !== method.arity) return false; + if ( + !runtimeProtocolParamTypesCompatible( + params.map((param) => param.type ?? ''), + method.paramTypes, + ) + ) + return false; + if (runtimeBooleanProp(member.node.props?.async) !== method.async) return false; + if (runtimeBooleanProp(member.node.props?.stream) !== method.stream) return false; + if (runtimeBooleanProp(member.node.props?.generator) !== method.generator) return false; + const returns = runtimeStringProp(member.node.props?.returns); + return runtimeProtocolReturnTypesCompatible( + returns, + { + async: runtimeBooleanProp(member.node.props?.async), + stream: runtimeBooleanProp(member.node.props?.stream), + generator: runtimeBooleanProp(member.node.props?.generator), + }, + method.returns, + method, + ); +} + +function runtimeProtocolParamTypesCompatible(actual: readonly string[], expected: readonly string[]): boolean { + return expected.every( + (type, index) => !type || normalizeRuntimeProtocolType(actual[index]) === normalizeRuntimeProtocolType(type), + ); +} + +function normalizeRuntimeProtocolType(type: string | undefined): string { + return compactRuntimeProtocolTypeWhitespace(type); +} + +function compactRuntimeProtocolTypeWhitespace(type: string | undefined): string { + let out = ''; + let quote: '"' | "'" | '`' | null = null; + for (let index = 0; index < (type ?? '').length; index += 1) { + const ch = (type ?? '')[index]; + if (quote !== null) { + out += ch; + if (ch === '\\' && index + 1 < (type ?? '').length) out += (type ?? '')[++index]; + else if (ch === quote) quote = null; + continue; + } + if (ch === '"' || ch === "'" || ch === '`') { + quote = ch; + out += ch; + continue; + } + if (!/\s/.test(ch)) out += ch; + } + return out; +} + +function runtimeProtocolReturnTypesCompatible( + actual: string | undefined, + actualFlags: { readonly async: boolean; readonly stream: boolean; readonly generator: boolean }, + expected: string | undefined, + expectedFlags: { readonly async: boolean; readonly stream: boolean; readonly generator: boolean }, +): boolean { + return ( + normalizeRuntimeProtocolReturnType(actual, actualFlags) === + normalizeRuntimeProtocolReturnType(expected, expectedFlags) + ); +} + +function normalizeRuntimeProtocolReturnType( + returns: string | undefined, + flags: { readonly async: boolean; readonly stream: boolean; readonly generator: boolean }, +): string { + if (flags.stream) { + if (returns?.startsWith('AsyncGenerator<')) return returns; + return `AsyncGenerator<${returns || 'unknown'}>`; + } + if (flags.generator) { + if (returns?.startsWith('Generator<') || returns?.startsWith('AsyncGenerator<')) return returns; + return `${flags.async ? 'AsyncGenerator' : 'Generator'}<${returns || 'unknown'}>`; + } + return !returns || returns === 'void' ? 'void' : returns; +} + function classHierarchyFromBase(klass: KernClassValue): KernClassValue[] { const base = resolveBaseClass(klass); return base ? [...classHierarchyFromBase(base), klass] : [klass]; @@ -1331,6 +1481,10 @@ function runtimeStringProp(value: unknown): string | undefined { return typeof value === 'string' && value.length > 0 ? value : undefined; } +function runtimeBooleanProp(value: unknown): boolean { + return value === true || (typeof value === 'string' && value.trim().toLowerCase() === 'true'); +} + function runtimeClassReferenceNames(value: unknown): string[] { if (typeof value !== 'string' || !value.trim()) return []; const parts = splitRuntimeClassReferenceList(value); @@ -1389,6 +1543,28 @@ function splitRuntimeClassReferenceList(raw: string): string[] { return out; } +function runtimeAngleClosesBeforeNextTopLevelComma(raw: string, start: number): boolean { + let depth = 0; + let quote: '"' | "'" | '`' | null = null; + for (let index = start; index < raw.length; index += 1) { + const ch = raw[index]; + if (quote !== null) { + if (ch === '\\' && index + 1 < raw.length) index += 1; + else if (ch === quote) quote = null; + continue; + } + if (ch === '"' || ch === "'" || ch === '`') { + quote = ch; + continue; + } + if (ch === '(' || ch === '[' || ch === '{') depth += 1; + else if ((ch === ')' || ch === ']' || ch === '}') && depth > 0) depth -= 1; + else if (ch === '>' && depth === 0) return true; + else if (ch === ',' && depth === 0) return false; + } + return false; +} + function classThisEnv(klass: KernClassValue, receiver: KernInstanceValue): CoreRuntimeEnv { const env = klass.env.child(); env.define('this', receiver); @@ -1540,7 +1716,7 @@ function runtimeParams(node: IRNode): RuntimeParam[] { const raw = typeof node.props?.params === 'string' ? node.props.params : ''; if (!raw.trim()) return []; - return splitPortableExpressionList(raw, 'fn params=').map((part) => { + return splitRuntimeParamList(raw, 'fn params=').map((part) => { const defaultIndex = findRuntimeDefaultSeparator(part); const beforeDefault = defaultIndex >= 0 ? part.slice(0, defaultIndex) : part; const defaultExpr = defaultIndex >= 0 ? part.slice(defaultIndex + 1).trim() : undefined; @@ -1555,6 +1731,49 @@ function runtimeParams(node: IRNode): RuntimeParam[] { }); } +function splitRuntimeParamList(raw: string, propName: string): string[] { + const out: string[] = []; + let current = ''; + let depth = 0; + let angleDepth = 0; + let inDefault = false; + let quote: '"' | "'" | '`' | null = null; + for (let index = 0; index < raw.length; index += 1) { + const ch = raw[index]; + if (quote !== null) { + current += ch; + if (ch === '\\' && index + 1 < raw.length) current += raw[++index]; + else if (ch === quote) quote = null; + continue; + } + if (ch === '"' || ch === "'" || ch === '`') { + quote = ch; + current += ch; + continue; + } + if (ch === '(' || ch === '[' || ch === '{') depth += 1; + else if (ch === ')' || ch === ']' || ch === '}') depth -= 1; + else if (ch === '=' && depth === 0 && angleDepth === 0 && raw[index + 1] !== '>') inDefault = true; + else if (ch === '<' && (!inDefault || runtimeAngleClosesBeforeNextTopLevelComma(raw, index + 1))) angleDepth += 1; + else if (ch === '>' && angleDepth > 0) angleDepth -= 1; + if (depth < 0 || angleDepth < 0) throw new Error(`${propName} has unbalanced delimiters.`); + if (ch === ',' && depth === 0 && angleDepth === 0) { + const part = current.trim(); + if (part.length === 0) throw new Error(`${propName} contains an empty expression.`); + out.push(part); + current = ''; + inDefault = false; + continue; + } + current += ch; + } + if (quote !== null || depth !== 0 || angleDepth !== 0) throw new Error(`${propName} has unbalanced delimiters.`); + const tail = current.trim(); + if (tail.length === 0 && raw.trim().endsWith(',')) throw new Error(`${propName} contains an empty expression.`); + if (tail.length > 0) out.push(tail); + return out; +} + function runtimeParamDefaultExpr(node: IRNode): string | undefined { const propName = Object.hasOwn(node.props ?? {}, 'value') ? 'value' : 'default'; const rawValue = propName === 'value' ? node.props?.value : node.props?.default; diff --git a/packages/core/src/schema.ts b/packages/core/src/schema.ts index 4f33dedd..01471c37 100644 --- a/packages/core/src/schema.ts +++ b/packages/core/src/schema.ts @@ -96,7 +96,7 @@ export const NODE_SCHEMAS: Record = { generics: { kind: 'rawExpr' }, export: { kind: 'boolean' }, }, - allowedChildren: ['field', 'indexer'], + allowedChildren: ['field', 'indexer', 'method'], }, indexer: { description: 'Index signature for an interface — [keyName: keyType]: type', @@ -219,6 +219,7 @@ export const NODE_SCHEMAS: Record = { returns: { kind: 'typeAnnotation' }, async: { kind: 'boolean' }, stream: { kind: 'boolean' }, + generator: { kind: 'boolean' }, private: { kind: 'boolean' }, static: { kind: 'boolean' }, generics: { kind: 'rawExpr' }, diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index b9cef9f5..27b523ee 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -23,7 +23,6 @@ import { import { collectExternalImportSymbols, type ExternalImportSymbolTable } from './external-symbols.js'; import { importRegistryOf } from './import-metadata.js'; import { parseExpression } from './parser-expression.js'; -import { splitPortableExpressionList } from './portable-expression-list.js'; import { RAG_ASSERTION_KIND_SET, RAG_ASSERTION_KINDS } from './rag-assertions.js'; import type { IRNode } from './types.js'; import type { ValueIR } from './value-ir.js'; @@ -51,8 +50,13 @@ export interface ClassSemanticMemberFact { readonly name: string; readonly kind: ClassSemanticMemberKind; readonly static: boolean; + readonly private?: boolean; + readonly async?: boolean; + readonly stream?: boolean; + readonly generator?: boolean; readonly type?: string; readonly returns?: string; + readonly paramTypes?: readonly string[]; readonly arity: number; readonly readable: boolean; readonly writable: boolean; @@ -2699,8 +2703,13 @@ interface ClassMemberInfo { name: string; kind: ClassMemberKind; static: boolean; + private: boolean; + async: boolean; + stream: boolean; + generator: boolean; type?: string; returns?: string; + paramTypes: readonly string[]; arity: number; } @@ -2710,6 +2719,7 @@ interface InterfaceInfo { name: string; extendsNames: string[]; fields: InterfaceFieldInfo[]; + methods: InterfaceMethodInfo[]; } interface InterfaceFieldInfo { @@ -2718,6 +2728,16 @@ interface InterfaceFieldInfo { optional: boolean; } +interface InterfaceMethodInfo { + name: string; + returns?: string; + paramTypes: readonly string[]; + arity: number; + async: boolean; + stream: boolean; + generator: boolean; +} + interface ClassProtocolShapeContext { shapeByName: ReadonlyMap; diagnosticsByName: ReadonlyMap; @@ -2834,6 +2854,7 @@ function collectInterfaceInfos(root: IRNode, rootIndex = 0): InterfaceInfo[] { name, extendsNames: classReferenceNames(node.props?.extends, 'interface extends='), fields: collectInterfaceFields(node), + methods: collectInterfaceMethods(node), }); }); return out; @@ -2854,6 +2875,25 @@ function collectInterfaceFields(node: IRNode): InterfaceFieldInfo[] { return fields; } +function collectInterfaceMethods(node: IRNode): InterfaceMethodInfo[] { + const methods: InterfaceMethodInfo[] = []; + for (const child of node.children ?? []) { + if (child.type !== 'method') continue; + const name = stringProp(child, 'name'); + if (!name) continue; + methods.push({ + name, + ...(stringProp(child, 'returns') ? { returns: stringProp(child, 'returns') } : {}), + paramTypes: memberParamTypes(child), + arity: memberArity(child), + async: isTrueFlag(child.props?.async), + stream: isTrueFlag(child.props?.stream), + generator: isTrueFlag(child.props?.generator), + }); + } + return methods; +} + function collectClassProtocolShapeContext(roots: readonly IRNode[]): ClassProtocolShapeContext { const facts = collectCoreShapeFacts(roots); const shapeByName = new Map(); @@ -2880,8 +2920,13 @@ function collectClassMembers(node: IRNode, owner: string): ClassMemberInfo[] { name, kind: child.type, static: isTrueFlag(child.props?.static), + private: isTrueFlag(child.props?.private), + async: isTrueFlag(child.props?.async), + stream: isTrueFlag(child.props?.stream), + generator: isTrueFlag(child.props?.generator), ...(stringProp(child, 'type') ? { type: stringProp(child, 'type') } : {}), ...(stringProp(child, 'returns') ? { returns: stringProp(child, 'returns') } : {}), + paramTypes: memberParamTypes(child), arity: memberArity(child), }); } @@ -3013,8 +3058,13 @@ function classMemberSemanticFact( name: member.name, kind: member.kind, static: member.static, + ...(member.private ? { private: true } : {}), + ...(member.async ? { async: true } : {}), + ...(member.stream ? { stream: true } : {}), + ...(member.generator ? { generator: true } : {}), ...(member.type ? { type: member.type } : {}), ...(member.returns ? { returns: member.returns } : {}), + ...(member.kind === 'method' && member.paramTypes.length > 0 ? { paramTypes: member.paramTypes } : {}), arity: member.arity, readable: member.kind === 'field' || member.kind === 'getter' || member.kind === 'method', writable: member.kind === 'field' || member.kind === 'setter', @@ -3381,7 +3431,7 @@ function collectClassProtocolConformanceFacts( }); continue; } - const result = classInterfaceConformance(info, protocol, protocolShapeContext, classByName); + const result = classInterfaceConformance(info, protocol, protocolShapeContext, classByName, interfaceByName); facts.push({ className: info.name, interfaceName, @@ -3402,6 +3452,7 @@ function classInterfaceConformance( protocol: InterfaceInfo, protocolShapeContext: ClassProtocolShapeContext, classByName: ReadonlyMap, + interfaceByName: ReadonlyMap, ): ClassInterfaceConformanceResult { const shape = protocolShapeContext.shapeByName.get(protocol.name); const diagnostics = (protocolShapeContext.diagnosticsByName.get(protocol.name) ?? []).map( @@ -3431,6 +3482,7 @@ function classInterfaceConformance( const effectiveMembers = effectiveClassMemberFacts(info, classByName); const fields = shape?.fields ?? protocol.fields; const requiredFields = fields.filter((field) => !field.optional); + const requiredMethods = effectiveInterfaceMethods(protocol, interfaceByName); const missingMembers: string[] = []; const satisfiedMembers: string[] = []; for (const field of requiredFields) { @@ -3440,6 +3492,13 @@ function classInterfaceConformance( missingMembers.push(field.name); } } + for (const method of requiredMethods) { + if (classHasCallableInstanceMethod(effectiveMembers, method)) { + satisfiedMembers.push(method.name); + } else { + missingMembers.push(method.name); + } + } const missing = sortedUnique(missingMembers); const satisfied = sortedUnique(satisfiedMembers); return { @@ -3451,6 +3510,24 @@ function classInterfaceConformance( }; } +function effectiveInterfaceMethods( + protocol: InterfaceInfo, + interfaceByName: ReadonlyMap, + seen: ReadonlySet = new Set(), +): InterfaceMethodInfo[] { + if (seen.has(protocol.name)) return []; + const nextSeen = new Set(seen); + nextSeen.add(protocol.name); + const methods = new Map(); + for (const baseName of protocol.extendsNames) { + const base = interfaceByName.get(baseName); + if (!base) continue; + for (const method of effectiveInterfaceMethods(base, interfaceByName, nextSeen)) methods.set(method.name, method); + } + for (const method of protocol.methods) methods.set(method.name, method); + return [...methods.values()]; +} + function classHasReadableInstanceMember( members: readonly ClassSemanticMemberFact[], field: { readonly name: string; readonly type?: string }, @@ -3463,6 +3540,83 @@ function classHasReadableInstanceMember( }); } +function classHasCallableInstanceMethod( + members: readonly ClassSemanticMemberFact[], + method: InterfaceMethodInfo, +): boolean { + return members.some((member) => { + if (member.name !== method.name || member.static || member.private || member.kind !== 'method') return false; + if (member.arity !== method.arity) return false; + if (!methodParamTypesCompatible(member.paramTypes ?? [], method.paramTypes)) return false; + if ((member.async === true) !== method.async) return false; + if ((member.stream === true) !== method.stream) return false; + if ((member.generator === true) !== method.generator) return false; + return methodReturnTypesCompatible( + member.returns, + { + async: member.async === true, + stream: member.stream === true, + generator: member.generator === true, + }, + method.returns, + method, + ); + }); +} + +function methodParamTypesCompatible(actual: readonly string[], expected: readonly string[]): boolean { + return expected.every((type, index) => !type || normalizeProtocolType(actual[index]) === normalizeProtocolType(type)); +} + +function normalizeProtocolType(type: string | undefined): string { + return compactProtocolTypeWhitespace(type); +} + +function compactProtocolTypeWhitespace(type: string | undefined): string { + let out = ''; + let quote: '"' | "'" | '`' | null = null; + for (let index = 0; index < (type ?? '').length; index += 1) { + const ch = (type ?? '')[index]; + if (quote !== null) { + out += ch; + if (ch === '\\' && index + 1 < (type ?? '').length) out += (type ?? '')[++index]; + else if (ch === quote) quote = null; + continue; + } + if (ch === '"' || ch === "'" || ch === '`') { + quote = ch; + out += ch; + continue; + } + if (!/\s/.test(ch)) out += ch; + } + return out; +} + +function methodReturnTypesCompatible( + actual: string | undefined, + actualFlags: { readonly async: boolean; readonly stream: boolean; readonly generator: boolean }, + expected: string | undefined, + expectedFlags: { readonly async: boolean; readonly stream: boolean; readonly generator: boolean }, +): boolean { + return normalizeMethodReturnType(actual, actualFlags) === normalizeMethodReturnType(expected, expectedFlags); +} + +function normalizeMethodReturnType( + returns: string | undefined, + flags: { readonly async: boolean; readonly stream: boolean; readonly generator: boolean }, +): string { + if (flags.stream) { + if (returns?.startsWith('AsyncGenerator<')) return returns; + return `AsyncGenerator<${returns || 'unknown'}>`; + } + if (flags.generator) { + if (returns?.startsWith('Generator<') || returns?.startsWith('AsyncGenerator<')) return returns; + return `${flags.async ? 'AsyncGenerator' : 'Generator'}<${returns || 'unknown'}>`; + } + return !returns || returns === 'void' ? 'void' : returns; +} + function semanticLocation(node: IRNode): ClassSemanticLocation | undefined { return node.loc ? { line: node.loc.line, col: node.loc.col } : undefined; } @@ -3518,7 +3672,7 @@ function validateClassImplements( } continue; } - const conformance = classInterfaceConformance(info, protocol, protocolShapeContext, classByName); + const conformance = classInterfaceConformance(info, protocol, protocolShapeContext, classByName, interfaceByName); if (conformance.status === 'invalid-interface') { violations.push({ rule: 'class-implements-invalid-interface', @@ -4154,12 +4308,131 @@ function memberArity(node: IRNode): number { const params = node.props?.params; if (typeof params !== 'string' || !params.trim()) return 0; try { - return splitPortableExpressionList(params, `${node.type} params=`).length; + return splitSemanticParamList(params, `${node.type} params=`).length; } catch { return 0; } } +function memberParamTypes(node: IRNode): string[] { + const childParams = node.children?.filter((child) => child.type === 'param') ?? []; + if (childParams.length > 0) { + return childParams.map((param) => stringProp(param, 'type') ?? ''); + } + const params = node.props?.params; + if (typeof params !== 'string' || !params.trim()) return []; + try { + return splitSemanticParamList(params, `${node.type} params=`).map((part) => { + const typeIndex = part.indexOf(':'); + if (typeIndex < 0) return ''; + const typeAndMaybeDefault = part.slice(typeIndex + 1); + const defaultIndex = paramDefaultSeparatorIndex(typeAndMaybeDefault); + return (defaultIndex >= 0 ? typeAndMaybeDefault.slice(0, defaultIndex) : typeAndMaybeDefault).trim(); + }); + } catch { + return []; + } +} + +function paramDefaultSeparatorIndex(value: string): number { + let depth = 0; + let quote: '"' | "'" | '`' | null = null; + for (let index = 0; index < value.length; index += 1) { + const ch = value[index]; + if (quote !== null) { + if (ch === '\\' && index + 1 < value.length) index += 1; + else if (ch === quote) quote = null; + continue; + } + if (ch === '"' || ch === "'" || ch === '`') { + quote = ch; + continue; + } + if (ch === '<' || ch === '(' || ch === '{' || ch === '[') depth += 1; + else if ((ch === '>' || ch === ')' || ch === '}' || ch === ']') && depth > 0) depth -= 1; + else if (ch === '=' && depth === 0) { + if ( + value[index + 1] === '>' || + value[index + 1] === '=' || + value[index - 1] === '=' || + value[index - 1] === '<' || + value[index - 1] === '>' || + value[index - 1] === '!' + ) { + continue; + } + return index; + } + } + return -1; +} + +function splitSemanticParamList(raw: string, propName: string): string[] { + const out: string[] = []; + let current = ''; + let depth = 0; + let angleDepth = 0; + let inDefault = false; + let quote: '"' | "'" | '`' | null = null; + for (let index = 0; index < raw.length; index += 1) { + const ch = raw[index]; + if (quote !== null) { + current += ch; + if (ch === '\\' && index + 1 < raw.length) current += raw[++index]; + else if (ch === quote) quote = null; + continue; + } + if (ch === '"' || ch === "'" || ch === '`') { + quote = ch; + current += ch; + continue; + } + if (ch === '(' || ch === '[' || ch === '{') depth += 1; + else if (ch === ')' || ch === ']' || ch === '}') depth -= 1; + else if (ch === '=' && depth === 0 && angleDepth === 0 && raw[index + 1] !== '>') inDefault = true; + else if (ch === '<' && (!inDefault || angleClosesBeforeNextTopLevelComma(raw, index + 1))) angleDepth += 1; + else if (!inDefault && ch === '>' && angleDepth > 0) angleDepth -= 1; + else if (inDefault && ch === '>' && angleDepth > 0) angleDepth -= 1; + if (depth < 0 || angleDepth < 0) throw new Error(`${propName} has unbalanced delimiters.`); + if (ch === ',' && depth === 0 && angleDepth === 0) { + const part = current.trim(); + if (part.length === 0) throw new Error(`${propName} contains an empty expression.`); + out.push(part); + current = ''; + inDefault = false; + continue; + } + current += ch; + } + if (quote !== null || depth !== 0 || angleDepth !== 0) throw new Error(`${propName} has unbalanced delimiters.`); + const tail = current.trim(); + if (tail.length === 0 && raw.trim().endsWith(',')) throw new Error(`${propName} contains an empty expression.`); + if (tail.length > 0) out.push(tail); + return out; +} + +function angleClosesBeforeNextTopLevelComma(raw: string, start: number): boolean { + let depth = 0; + let quote: '"' | "'" | '`' | null = null; + for (let index = start; index < raw.length; index += 1) { + const ch = raw[index]; + if (quote !== null) { + if (ch === '\\' && index + 1 < raw.length) index += 1; + else if (ch === quote) quote = null; + continue; + } + if (ch === '"' || ch === "'" || ch === '`') { + quote = ch; + continue; + } + if (ch === '(' || ch === '[' || ch === '{') depth += 1; + else if ((ch === ')' || ch === ']' || ch === '}') && depth > 0) depth -= 1; + else if (ch === '>' && depth === 0) return true; + else if (ch === ',' && depth === 0) return false; + } + return false; +} + function nodeBodyUsesSuper(node: IRNode): boolean { return nodeBodyExpressions(node).some((expr) => { try { diff --git a/packages/core/tests/class-semantics.test.ts b/packages/core/tests/class-semantics.test.ts index 31e5f544..74c8dcaf 100644 --- a/packages/core/tests/class-semantics.test.ts +++ b/packages/core/tests/class-semantics.test.ts @@ -74,6 +74,195 @@ describe('semantic-validator — class object model', () => { expect(rules).not.toContain('class-implements-missing-member'); }); + test('accepts class implements when instance methods satisfy interface methods', () => { + const rules = rulesFor( + [ + 'interface name=Runnable', + ' method name=run params="input:string" returns=number', + 'class name=Base', + ' method name=run params="input:string" returns=number', + ' handler lang=kern', + ' return value="input.length"', + 'class name=Job extends=Base implements=Runnable', + ].join('\n'), + ); + + expect(rules).not.toContain('class-implements-missing-member'); + }); + + test('reports missing and incompatible interface methods for class implements', () => { + const violations = violationsFor( + [ + 'interface name=Runnable', + ' method name=run params="input:string" returns=number', + ' method name=stop returns=void', + 'class name=Job implements=Runnable', + ' method name=run returns=number', + ' handler lang=kern', + ' return value="1"', + ' getter name=stop returns=void', + ' handler lang=kern', + ' return value="undefined"', + ].join('\n'), + ); + + const violation = violations.find((candidate) => candidate.rule === 'class-implements-missing-member'); + expect(violation?.message).toContain('run'); + expect(violation?.message).toContain('stop'); + }); + + test('checks interface method parameter types and accepts implicit void returns', () => { + const acceptedRules = rulesFor( + [ + 'interface name=Lifecycle', + ' method name=close returns=void', + 'class name=Socket implements=Lifecycle', + ' method name=close', + ' handler lang=kern', + ' do value="undefined"', + ].join('\n'), + ); + expect(acceptedRules).not.toContain('class-implements-missing-member'); + + const rejectedRules = rulesFor( + [ + 'interface name=Runnable', + ' method name=run params="input:string" returns=number', + 'class name=Job implements=Runnable', + ' method name=run params="input:number" returns=number', + ' handler lang=kern', + ' return value="input"', + ].join('\n'), + ); + expect(rejectedRules).toContain('class-implements-missing-member'); + }); + + test('requires stream interface methods to be implemented as stream methods', () => { + const acceptedRules = rulesFor( + [ + 'interface name=Events', + ' method name=read returns=Event stream=true', + 'class name=Reader implements=Events', + ' method name=read returns=Event stream=true', + ' handler lang=kern', + ' return value="undefined"', + ].join('\n'), + ); + expect(acceptedRules).not.toContain('class-implements-missing-member'); + + const rejectedRules = rulesFor( + [ + 'interface name=Events', + ' method name=read returns=Event stream=true', + 'class name=Reader implements=Events', + ' method name=read returns=Event', + ' handler lang=kern', + ' return value="undefined"', + ].join('\n'), + ); + expect(rejectedRules).toContain('class-implements-missing-member'); + }); + + test('normalizes streamed method returns and generic parameter types for class implements', () => { + const streamedRules = rulesFor( + [ + 'interface name=Events', + ' method name=read returns="AsyncGenerator" stream=true', + 'class name=Reader implements=Events', + ' method name=read returns=Event stream=true', + ' handler lang=kern', + ' return value="undefined"', + ].join('\n'), + ); + expect(streamedRules).not.toContain('class-implements-missing-member'); + + const genericParamRules = rulesFor( + [ + 'interface name=Sink', + ' method name=write params="item:Record" returns=void', + 'class name=BadSink implements=Sink', + ' method name=write params="item:Record" returns=void', + ' handler lang=kern', + ' do value="undefined"', + ].join('\n'), + ); + expect(genericParamRules).toContain('class-implements-missing-member'); + + const literalWhitespaceRules = rulesFor( + [ + 'interface name=Sink', + ' method name=write params="item:\'a b\'" returns=void', + 'class name=BadSink implements=Sink', + ' method name=write params="item:\'ab\'" returns=void', + ' handler lang=kern', + ' do value="undefined"', + ].join('\n'), + ); + expect(literalWhitespaceRules).toContain('class-implements-missing-member'); + }); + + test('rejects private protocol methods and tolerates whitespace/default comparison params', () => { + const privateRules = rulesFor( + [ + 'interface name=Runnable', + ' method name=run returns=number', + 'class name=Job implements=Runnable', + ' method name=run private=true returns=number', + ' handler lang=kern', + ' return value="1"', + ].join('\n'), + ); + expect(privateRules).toContain('class-implements-missing-member'); + + const whitespaceRules = rulesFor( + [ + 'interface name=Sink', + ' method name=write params="item:Record" returns=void', + 'class name=GoodSink implements=Sink', + ' method name=write params="item:Record" returns=void', + ' handler lang=kern', + ' do value="undefined"', + ].join('\n'), + ); + expect(whitespaceRules).not.toContain('class-implements-missing-member'); + + const defaultComparisonRules = rulesFor( + [ + 'interface name=Calculator', + ' method name=calc params="value:number=1 < 2,unit:string" returns=number', + 'class name=DefaultCalc implements=Calculator', + ' method name=calc params="value:number=1 < 2,unit:string" returns=number', + ' handler lang=kern', + ' return value="value"', + ].join('\n'), + ); + expect(defaultComparisonRules).not.toContain('class-implements-missing-member'); + + const defaultEqualityRules = rulesFor( + [ + 'interface name=Comparator', + ' method name=cmp params="value:number=a==b,unit:string" returns=number', + 'class name=DefaultCmp implements=Comparator', + ' method name=cmp params="value:number=a==b,unit:string" returns=number', + ' handler lang=kern', + ' return value="value"', + ].join('\n'), + ); + expect(defaultEqualityRules).not.toContain('class-implements-missing-member'); + + const genericDefaultRules = rulesFor( + [ + 'interface name=Formatter', + ' method name=format params="value:Map=make>(),unit:string" returns=number', + 'class name=DefaultFormatter implements=Formatter', + ' method name=format params="value:Map=make>(),unit:string" returns=number', + ' handler lang=kern', + ' return value="1"', + ].join('\n'), + ); + expect(genericDefaultRules).not.toContain('class-implements-missing-member'); + }); + test('reports unknown class implements targets unless imported', () => { const localRules = rulesFor('class name=User implements=MissingProtocol'); expect(localRules).toContain('class-implements-unknown'); @@ -90,6 +279,15 @@ describe('semantic-validator — class object model', () => { expect(rules).toContain('class-implements-invalid-reference-list'); }); + test('parses generic implements references with default types containing commas', () => { + const rules = rulesFor( + ['interface name=Protocol', 'class name=User implements="Protocol>"'].join('\n'), + ); + + expect(rules).not.toContain('class-implements-invalid-reference-list'); + expect(rules).not.toContain('class-implements-unknown'); + }); + test('reports missing required readable instance members for class implements', () => { const violations = violationsFor( [ @@ -152,6 +350,24 @@ describe('semantic-validator — class object model', () => { expect(optionalityConflictRules).toContain('class-implements-invalid-interface'); }); + test('reports cyclic method protocols as invalid interfaces', () => { + const rules = rulesFor( + [ + 'interface name=A extends=B', + ' method name=a returns=void', + 'interface name=B extends=A', + ' method name=b returns=void', + 'class name=CycleImpl implements=A', + ' method name=a returns=void', + ' handler lang=kern', + ' do value="undefined"', + ].join('\n'), + ); + + expect(rules).toContain('class-implements-invalid-interface'); + expect(rules).not.toContain('class-implements-missing-member'); + }); + test('reports interface indexers as unsupported class implements protocols in v1', () => { const rules = rulesFor( [ diff --git a/packages/core/tests/codegen-core.test.ts b/packages/core/tests/codegen-core.test.ts index 3cf70c23..b58771d1 100644 --- a/packages/core/tests/codegen-core.test.ts +++ b/packages/core/tests/codegen-core.test.ts @@ -562,6 +562,39 @@ describe('Core Language Codegen', () => { }); }); + describe('interface', () => { + it('generates method signatures', () => { + const code = gen( + [ + 'interface name=Formatter', + ' field name=id type=string', + ' method name=format params="value:string,count:number" returns=string', + ].join('\n'), + ); + + expect(code).toContain('export interface Formatter {'); + expect(code).toContain('id: string;'); + expect(code).toContain('format(value: string, count: number): string;'); + }); + + it('strips defaults from interface method signatures', () => { + const code = gen( + ['interface name=Formatter', ' method name=format params="value:string,count:number=1" returns=string'].join( + '\n', + ), + ); + + expect(code).toContain('format(value: string, count: number): string;'); + expect(code).not.toContain('count: number = 1'); + }); + + it('generates streamed interface method signatures', () => { + const code = gen('interface name=Events\n method name=read returns=Event stream=true'); + + expect(code).toContain('read(): AsyncGenerator;'); + }); + }); + // ── Gap 1: Service (class) ── describe('service', () => { diff --git a/packages/core/tests/core-runtime.test.ts b/packages/core/tests/core-runtime.test.ts index 10d7e6bd..fb30cd58 100644 --- a/packages/core/tests/core-runtime.test.ts +++ b/packages/core/tests/core-runtime.test.ts @@ -467,6 +467,275 @@ describe('KERN core runtime statements', () => { expect(toHostValue(evalCoreExpression('new User().name', env))).toBe('Ada'); }); + test('validates implemented interface methods without invoking them', () => { + const root = parse( + [ + 'interface name=Runnable', + ' method name=run params="input:string" returns=number', + 'class name=Job implements=Runnable', + ' field name=count type=number value={{ 0 }}', + ' method name=run params="input:string" returns=number', + ' handler', + ' assign target="this.count" value="this.count + 1"', + ' return value="input.length"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new Job().count', env))).toBe(0); + expect(toHostValue(evalCoreExpression('new Job().run("abc")', env))).toBe(3); + }); + + test('rejects missing implemented interface methods', () => { + const root = parse( + [ + 'interface name=Runnable', + ' method name=run params="input:string" returns=number', + 'class name=Job implements=Runnable', + ' field name=id type=string value="j1"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new Job()', env)).toThrow('missing or incompatible method(s): run'); + }); + + test('rejects incompatible implemented interface method signatures', () => { + const root = parse( + [ + 'interface name=Runnable', + ' method name=run params="input:string" returns=number', + 'class name=Job implements=Runnable', + ' method name=run returns=number', + ' handler', + ' return value="1"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new Job()', env)).toThrow('missing or incompatible method(s): run'); + }); + + test('rejects implemented interface methods with incompatible parameter types', () => { + const root = parse( + [ + 'interface name=Runnable', + ' method name=run params="input:string" returns=number', + 'class name=Job implements=Runnable', + ' method name=run params="input:number" returns=number', + ' handler', + ' return value="input"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new Job()', env)).toThrow('missing or incompatible method(s): run'); + }); + + test('accepts implicit void methods for explicit void interface methods', () => { + const root = parse( + [ + 'interface name=Lifecycle', + ' method name=close returns=void', + 'class name=Socket implements=Lifecycle', + ' method name=close', + ' handler', + ' do value="undefined"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new Socket().close()', env))).toBeUndefined(); + }); + + test('rejects non-stream methods for stream interface methods', () => { + const root = parse( + [ + 'interface name=Events', + ' method name=read returns=Event stream=true', + 'class name=Reader implements=Events', + ' method name=read returns=Event', + ' handler', + ' return value="undefined"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new Reader()', env)).toThrow('missing or incompatible method(s): read'); + }); + + test('normalizes streamed method returns for implemented interface methods', () => { + const root = parse( + [ + 'interface name=Events', + ' method name=read returns="AsyncGenerator" stream=true', + 'class name=Reader implements=Events', + ' method name=read returns=Event stream=true', + ' handler', + ' return value="undefined"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new Reader()', env)).not.toThrow(); + }); + + test('rejects generic parameter type mismatches in implemented interface methods', () => { + const root = parse( + [ + 'interface name=Sink', + ' method name=write params="item:Record" returns=void', + 'class name=BadSink implements=Sink', + ' method name=write params="item:Record" returns=void', + ' handler', + ' do value="undefined"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new BadSink()', env)).toThrow('missing or incompatible method(s): write'); + }); + + test('preserves quoted whitespace in implemented interface method parameter types', () => { + const root = parse( + [ + 'interface name=Sink', + ' method name=write params="item:\'a b\'" returns=void', + 'class name=BadSink implements=Sink', + ' method name=write params="item:\'ab\'" returns=void', + ' handler', + ' do value="undefined"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new BadSink()', env)).toThrow('missing or incompatible method(s): write'); + }); + + test('rejects private methods for implemented interface methods', () => { + const root = parse( + [ + 'interface name=Runnable', + ' method name=run returns=number', + 'class name=Job implements=Runnable', + ' method name=run private=true returns=number', + ' handler', + ' return value="1"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new Job()', env)).toThrow('missing or incompatible method(s): run'); + }); + + test('normalizes whitespace in implemented interface method parameter types', () => { + const root = parse( + [ + 'interface name=Sink', + ' method name=write params="item:Record" returns=void', + 'class name=GoodSink implements=Sink', + ' method name=write params="item:Record" returns=void', + ' handler', + ' do value="undefined"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new GoodSink()', env)).not.toThrow(); + }); + + test('parses default comparison expressions in implemented interface method params', () => { + const root = parse( + [ + 'interface name=Calculator', + ' method name=calc params="value:number=1 < 2,unit:string=\'m\'" returns=number', + 'class name=DefaultCalc implements=Calculator', + ' method name=calc params="value:number=1 < 2,unit:string=\'m\'" returns=number', + ' handler', + ' return value="value"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new DefaultCalc().calc()', env))).toBe(true); + }); + + test('parses default equality expressions in implemented interface method params', () => { + const root = parse( + [ + 'interface name=Comparator', + ' method name=cmp params="value:number=1==1,unit:string=\'m\'" returns=number', + 'class name=DefaultCmp implements=Comparator', + ' method name=cmp params="value:number=1==1,unit:string=\'m\'" returns=number', + ' handler', + ' return value="value"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new DefaultCmp().cmp()', env))).toBe(true); + }); + + test('parses generic default expressions in implemented interface method params', () => { + const root = parse( + [ + 'interface name=Formatter', + ' method name=format params="value:Map=make>(),unit:string" returns=number', + 'class name=DefaultFormatter implements=Formatter', + ' method name=format params="value:Map=make>(),unit:string" returns=number', + ' handler', + ' return value="1"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new DefaultFormatter()', env)).not.toThrow(); + }); + + test('enforces inherited interface methods for implemented protocols', () => { + const root = parse( + [ + 'interface name=Runnable', + ' method name=run params="input:string" returns=number', + 'interface name=NamedRunnable extends=Runnable', + ' field name=name type=string', + 'class name=Job implements=NamedRunnable', + ' field name=name type=string value="job"', + ' method name=run params="input:string" returns=number', + ' handler', + ' return value="input.length"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new Job().run("abcd")', env))).toBe(4); + }); + + test('parses generic implements references with default types containing commas', () => { + const root = parse( + ['interface name=Protocol', 'class name=User implements="Protocol>"'].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new User()', env)).not.toThrow(); + }); + test('enforces base class implemented protocols on derived instances', () => { const root = parse( [ diff --git a/packages/core/tests/schema-validation.test.ts b/packages/core/tests/schema-validation.test.ts index b259a609..40012e58 100644 --- a/packages/core/tests/schema-validation.test.ts +++ b/packages/core/tests/schema-validation.test.ts @@ -437,8 +437,8 @@ describe('Schema Validation', () => { describe('allowed children', () => { it('flags wrong child type in interface', () => { - const v = validate(['interface name=User', ' method name=foo'].join('\n')); - expect(v.some((v) => v.message.includes("does not allow child type 'method'"))).toBe(true); + const v = validate(['interface name=User', ' const name=foo value=1'].join('\n')); + expect(v.some((v) => v.message.includes("does not allow child type 'const'"))).toBe(true); }); it('allows field in interface', () => { @@ -446,6 +446,11 @@ describe('Schema Validation', () => { expect(v).toHaveLength(0); }); + it('allows method in interface', () => { + const v = validate(['interface name=User', ' method name=displayName returns=string'].join('\n')); + expect(v).toHaveLength(0); + }); + it('allows handler as universal child', () => { // handler is a universal child allowed everywhere const v = validate(['fn name=foo', ' handler <<>>'].join('\n')); diff --git a/packages/core/tests/semantic-substrate.test.ts b/packages/core/tests/semantic-substrate.test.ts index fc8a1649..7f29874b 100644 --- a/packages/core/tests/semantic-substrate.test.ts +++ b/packages/core/tests/semantic-substrate.test.ts @@ -525,6 +525,7 @@ describe('KERN semantic substrate', () => { 'import from="./protocols" names=ExternalProtocol', 'interface name=Entity', ' field name=id type=string', + ' method name=load params="id:string" returns=string', 'interface name=Named extends=Entity', ' field name=name type=string', 'interface name=BrokenProtocol extends=MissingBaseProtocol', @@ -537,6 +538,9 @@ describe('KERN semantic substrate', () => { ' getter name=name returns=string', ' handler lang=kern', ' return value="this.id"', + ' method name=load params="id:string" returns=string', + ' handler lang=kern', + ' return value="id"', 'class name=Broken implements=Named', ' field name=id type=string', 'class name=Invalid implements=BrokenProtocol', @@ -560,7 +564,7 @@ describe('KERN semantic substrate', () => { className: 'User', interfaceName: 'Named', status: 'satisfied', - satisfiedMembers: ['id', 'name'], + satisfiedMembers: ['id', 'load', 'name'], missingMembers: [], }), expect.objectContaining({ @@ -572,7 +576,7 @@ describe('KERN semantic substrate', () => { className: 'Broken', interfaceName: 'Named', status: 'missing-members', - missingMembers: ['name'], + missingMembers: ['load', 'name'], }), expect.objectContaining({ className: 'Invalid', From f02dd28f947ab2b6a30a88e031c0eb0d7ee01752 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 07:33:18 +0200 Subject: [PATCH 37/46] feat(core): enforce static interface member protocols --- packages/core/src/core-runtime/index.ts | 184 ++++++++++++++++-- .../core/src/core-runtime/shape-validator.ts | 4 +- packages/core/src/semantic-validator.ts | 105 ++++++++-- packages/core/tests/class-semantics.test.ts | 64 ++++++ packages/core/tests/core-runtime.test.ts | 130 +++++++++++-- .../core/tests/semantic-substrate.test.ts | 17 ++ 6 files changed, 462 insertions(+), 42 deletions(-) diff --git a/packages/core/src/core-runtime/index.ts b/packages/core/src/core-runtime/index.ts index 44d921cd..8344936c 100644 --- a/packages/core/src/core-runtime/index.ts +++ b/packages/core/src/core-runtime/index.ts @@ -320,6 +320,7 @@ function executeNode(node: IRNode, env: CoreRuntimeEnv): CoreCompletion { const klass = makeClass(node, env); env.define(klass.name, klass); initializeClassStaticFields(klass); + validateImplementedClassStaticProtocols(klass); return { kind: 'normal', value: kUndefined() }; } case 'assign': @@ -811,6 +812,57 @@ function validateImplementedClassProtocols(instance: KernInstanceValue, klass: K } } +function validateImplementedClassStaticProtocols(klass: KernClassValue): void { + const root = klass.runtimeRootContext ?? klass.env.getRuntimeRootContext(); + if (!root) return; + const facts = collectCoreShapeFacts(root); + const shapeByName = new Map(facts.interfaces.map((shape) => [shape.name, shape])); + const importedProtocolNames = runtimeImportedProtocolNames(root); + for (const interfaceName of runtimeClassReferenceNames(klass.node.props?.implements)) { + const shape = shapeByName.get(interfaceName); + if (!shape) { + if (importedProtocolNames.has(interfaceName)) continue; + throw new Error(`KERN core runtime class '${klass.name}' implements unknown interface '${interfaceName}'.`); + } + if (!shape.validatorAvailable || shape.indexers.length > 0) { + throw new Error( + `KERN core runtime class '${klass.name}' implements interface '${interfaceName}' that is not executable as a class protocol in v1.`, + ); + } + const staticFields = runtimeInterfaceProtocolFields(root, interfaceName, true); + if (staticFields.length > 0) { + const missingFields = staticFields.filter((field) => !classHasRuntimeProtocolField(klass, field)); + if (missingFields.length > 0) { + throw new Error( + `KERN core runtime class '${klass.name}' violates implemented interface '${interfaceName}': missing or incompatible static member(s): ${missingFields + .map((field) => field.name) + .join(', ')}.`, + ); + } + const fieldBackedFields = staticFields.filter( + (field) => findReadableClassShapeMember(klass, field.name, true)?.kind === 'field', + ); + const projection = classStaticProtocolProjection(klass, fieldBackedFields); + const result = validateProjectedProtocolFields(projection, interfaceName, fieldBackedFields, root); + if (!result.passed) { + throw new Error( + `KERN core runtime class '${klass.name}' violates implemented interface '${interfaceName}' static field contract:\n${result.diagnostics + .map((diagnostic) => diagnostic.message) + .join('\n')}`, + ); + } + } + const missingMethods = runtimeInterfaceProtocolMethods(root, interfaceName, true) + .filter((method) => !classHasRuntimeProtocolMethod(klass, method, true)) + .map((method) => method.name); + if (missingMethods.length > 0) { + throw new Error( + `KERN core runtime class '${klass.name}' violates implemented interface '${interfaceName}': missing or incompatible static member(s): ${missingMethods.join(', ')}.`, + ); + } + } +} + function classProtocolProjection(instance: KernInstanceValue, fieldNames: readonly string[]): KernValue { const entries = createRecordEntries(); for (const fieldName of fieldNames) { @@ -825,6 +877,49 @@ function classProtocolProjection(instance: KernInstanceValue, fieldNames: readon return brandValue({ kind: 'record', entries }); } +function classStaticProtocolProjection( + klass: KernClassValue, + fields: readonly RuntimeInterfaceProtocolField[], +): KernValue { + const entries = createRecordEntries(); + for (const field of fields) { + const member = findReadableClassShapeMember(klass, field.name, true); + if (member?.kind !== 'field') continue; + entries[field.name] = evalClassMember(klass, field.name); + } + return brandValue({ kind: 'record', entries }); +} + +function validateProjectedProtocolFields( + projection: KernValue, + interfaceName: string, + fields: readonly RuntimeInterfaceProtocolField[], + rootOrNodes: IRNode | readonly IRNode[], +): ReturnType { + const syntheticName = `__KernStaticProtocol_${interfaceName}`; + const syntheticInterface: IRNode = { + type: 'interface', + props: { name: syntheticName }, + children: fields.map((field) => ({ + type: 'field', + props: { + name: field.name, + optional: field.optional, + ...(field.type ? { type: field.type } : {}), + }, + })), + }; + const roots = [...(isIRNodeArray(rootOrNodes) ? rootOrNodes : [rootOrNodes]), syntheticInterface]; + return validateCoreShape(projection, syntheticName, roots); +} + +interface RuntimeInterfaceProtocolField { + readonly name: string; + readonly type?: string; + readonly optional: boolean; + readonly static: boolean; +} + interface RuntimeInterfaceProtocolMethod { readonly name: string; readonly arity: number; @@ -832,23 +927,50 @@ interface RuntimeInterfaceProtocolMethod { readonly async: boolean; readonly stream: boolean; readonly generator: boolean; + readonly static: boolean; readonly returns?: string; } -function runtimeInterfaceProtocolMethods( +function runtimeInterfaceProtocolFields( rootOrNodes: IRNode | readonly IRNode[], interfaceName: string, -): RuntimeInterfaceProtocolMethod[] { - const interfaceByName = new Map(); - const visit = (node: IRNode): void => { - if (node.type === 'interface') { - const name = runtimeStringProp(node.props?.name); - if (name && !interfaceByName.has(name)) interfaceByName.set(name, node); + staticOnly: boolean, +): RuntimeInterfaceProtocolField[] { + const interfaceByName = runtimeInterfaceNodesByName(rootOrNodes); + const resolve = (name: string, seen: ReadonlySet): RuntimeInterfaceProtocolField[] => { + if (seen.has(name)) return []; + const node = interfaceByName.get(name); + if (!node) return []; + const nextSeen = new Set(seen); + nextSeen.add(name); + const fields = new Map(); + for (const baseName of runtimeClassReferenceNames(node.props?.extends)) { + for (const field of resolve(baseName, nextSeen)) fields.set(runtimeInterfaceMemberShapeKey(field), field); } - for (const child of node.children ?? []) visit(child); + for (const child of node.children ?? []) { + if (child.type !== 'field') continue; + const name = runtimeStringProp(child.props?.name); + if (!name) continue; + const isStatic = runtimeBooleanProp(child.props?.static); + if (isStatic !== staticOnly) continue; + fields.set(runtimeInterfaceMemberShapeKey({ name, static: isStatic }), { + name, + optional: runtimeBooleanProp(child.props?.optional), + static: isStatic, + ...(runtimeStringProp(child.props?.type) ? { type: runtimeStringProp(child.props?.type) } : {}), + }); + } + return [...fields.values()]; }; - for (const node of isIRNodeArray(rootOrNodes) ? rootOrNodes : [rootOrNodes]) visit(node); + return resolve(interfaceName, new Set()); +} +function runtimeInterfaceProtocolMethods( + rootOrNodes: IRNode | readonly IRNode[], + interfaceName: string, + staticOnly = false, +): RuntimeInterfaceProtocolMethod[] { + const interfaceByName = runtimeInterfaceNodesByName(rootOrNodes); const resolve = (name: string, seen: ReadonlySet): RuntimeInterfaceProtocolMethod[] => { if (seen.has(name)) return []; const node = interfaceByName.get(name); @@ -857,19 +979,22 @@ function runtimeInterfaceProtocolMethods( nextSeen.add(name); const methods = new Map(); for (const baseName of runtimeClassReferenceNames(node.props?.extends)) { - for (const method of resolve(baseName, nextSeen)) methods.set(method.name, method); + for (const method of resolve(baseName, nextSeen)) methods.set(runtimeInterfaceMemberShapeKey(method), method); } for (const child of node.children ?? []) { if (child.type !== 'method') continue; const name = runtimeStringProp(child.props?.name); if (!name) continue; - methods.set(name, { + const isStatic = runtimeBooleanProp(child.props?.static); + if (isStatic !== staticOnly) continue; + methods.set(runtimeInterfaceMemberShapeKey({ name, static: isStatic }), { name, arity: runtimeParams(child).length, paramTypes: runtimeParams(child).map((param) => param.type ?? ''), async: runtimeBooleanProp(child.props?.async), stream: runtimeBooleanProp(child.props?.stream), generator: runtimeBooleanProp(child.props?.generator), + static: isStatic, ...(runtimeStringProp(child.props?.returns) ? { returns: runtimeStringProp(child.props?.returns) } : {}), }); } @@ -879,8 +1004,29 @@ function runtimeInterfaceProtocolMethods( return resolve(interfaceName, new Set()); } -function classHasRuntimeProtocolMethod(klass: KernClassValue, method: RuntimeInterfaceProtocolMethod): boolean { - const member = findReadableClassShapeMember(klass, method.name, false); +function runtimeInterfaceNodesByName(rootOrNodes: IRNode | readonly IRNode[]): Map { + const interfaceByName = new Map(); + const visit = (node: IRNode): void => { + if (node.type === 'interface') { + const name = runtimeStringProp(node.props?.name); + if (name && !interfaceByName.has(name)) interfaceByName.set(name, node); + } + for (const child of node.children ?? []) visit(child); + }; + for (const node of isIRNodeArray(rootOrNodes) ? rootOrNodes : [rootOrNodes]) visit(node); + return interfaceByName; +} + +function runtimeInterfaceMemberShapeKey(member: { readonly name: string; readonly static: boolean }): string { + return `${member.static ? 'static' : 'instance'}:${member.name}`; +} + +function classHasRuntimeProtocolMethod( + klass: KernClassValue, + method: RuntimeInterfaceProtocolMethod, + staticOnly = false, +): boolean { + const member = findReadableClassShapeMember(klass, method.name, staticOnly); if (member?.kind !== 'method') return false; if (runtimeBooleanProp(member.node.props?.private)) return false; const params = runtimeParams(member.node); @@ -908,6 +1054,18 @@ function classHasRuntimeProtocolMethod(klass: KernClassValue, method: RuntimeInt ); } +function classHasRuntimeProtocolField(klass: KernClassValue, field: RuntimeInterfaceProtocolField): boolean { + const member = findReadableClassShapeMember(klass, field.name, true); + if (!member) return field.optional; + if (member.kind !== 'field' && member.kind !== 'getter') return false; + if (runtimeBooleanProp(member.node.props?.private)) return false; + const actualType = + member.kind === 'getter' + ? runtimeStringProp(member.node.props?.returns) + : runtimeStringProp(member.node.props?.type); + return !field.type || normalizeRuntimeProtocolType(actualType) === normalizeRuntimeProtocolType(field.type); +} + function runtimeProtocolParamTypesCompatible(actual: readonly string[], expected: readonly string[]): boolean { return expected.every( (type, index) => !type || normalizeRuntimeProtocolType(actual[index]) === normalizeRuntimeProtocolType(type), diff --git a/packages/core/src/core-runtime/shape-validator.ts b/packages/core/src/core-runtime/shape-validator.ts index 2d5618e8..88d35d35 100644 --- a/packages/core/src/core-runtime/shape-validator.ts +++ b/packages/core/src/core-runtime/shape-validator.ts @@ -204,7 +204,9 @@ function collectShapeRegistry(rootOrNodes: IRNode | readonly IRNode[]): ShapeReg const shape: ShapeInterface = { name, extendsNames: splitExtends(node.props?.extends), - fields: (node.children ?? []).filter((child) => child.type === 'field').map((field) => shapeField(field)), + fields: (node.children ?? []) + .filter((child) => child.type === 'field' && !trueFlag(child.props?.static)) + .map((field) => shapeField(field)), indexers: (node.children ?? []) .filter((child) => child.type === 'indexer') .map((indexer) => shapeIndexer(indexer)), diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index 27b523ee..bd45943d 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -141,6 +141,8 @@ export interface ClassSemanticProtocolConformanceFact { readonly status: ClassSemanticProtocolStatus; readonly missingMembers: readonly string[]; readonly satisfiedMembers: readonly string[]; + readonly missingStaticMembers: readonly string[]; + readonly satisfiedStaticMembers: readonly string[]; readonly diagnostics?: readonly string[]; readonly unsupportedReasons?: readonly string[]; readonly loc?: ClassSemanticLocation; @@ -2726,6 +2728,7 @@ interface InterfaceFieldInfo { name: string; type?: string; optional: boolean; + static: boolean; } interface InterfaceMethodInfo { @@ -2736,6 +2739,7 @@ interface InterfaceMethodInfo { async: boolean; stream: boolean; generator: boolean; + static: boolean; } interface ClassProtocolShapeContext { @@ -2747,6 +2751,8 @@ interface ClassInterfaceConformanceResult { status: Exclude; missingMembers: string[]; satisfiedMembers: string[]; + missingStaticMembers: string[]; + satisfiedStaticMembers: string[]; diagnostics: string[]; unsupportedReasons: string[]; } @@ -2870,6 +2876,7 @@ function collectInterfaceFields(node: IRNode): InterfaceFieldInfo[] { name, ...(stringProp(child, 'type') ? { type: stringProp(child, 'type') } : {}), optional: isTrueFlag(child.props?.optional), + static: isTrueFlag(child.props?.static), }); } return fields; @@ -2889,6 +2896,7 @@ function collectInterfaceMethods(node: IRNode): InterfaceMethodInfo[] { async: isTrueFlag(child.props?.async), stream: isTrueFlag(child.props?.stream), generator: isTrueFlag(child.props?.generator), + static: isTrueFlag(child.props?.static), }); } return methods; @@ -3427,6 +3435,8 @@ function collectClassProtocolConformanceFacts( status: visible ? 'external' : 'unknown-interface', missingMembers: [], satisfiedMembers: [], + missingStaticMembers: [], + satisfiedStaticMembers: [], ...(info.node.loc ? { loc: semanticLocation(info.node) } : {}), }); continue; @@ -3438,6 +3448,8 @@ function collectClassProtocolConformanceFacts( status: result.status, missingMembers: result.missingMembers, satisfiedMembers: result.satisfiedMembers, + missingStaticMembers: result.missingStaticMembers, + satisfiedStaticMembers: result.satisfiedStaticMembers, ...(result.diagnostics.length > 0 ? { diagnostics: result.diagnostics } : {}), ...(result.unsupportedReasons.length > 0 ? { unsupportedReasons: result.unsupportedReasons } : {}), ...(info.node.loc ? { loc: semanticLocation(info.node) } : {}), @@ -3463,6 +3475,8 @@ function classInterfaceConformance( status: 'invalid-interface', missingMembers: [], satisfiedMembers: [], + missingStaticMembers: [], + satisfiedStaticMembers: [], diagnostics: sortedUnique(diagnostics), unsupportedReasons: [], }; @@ -3472,6 +3486,8 @@ function classInterfaceConformance( status: 'unsupported-protocol', missingMembers: [], satisfiedMembers: [], + missingStaticMembers: [], + satisfiedStaticMembers: [], diagnostics: [], unsupportedReasons: sortedUnique([ ...shape.unsupportedReasons, @@ -3480,36 +3496,79 @@ function classInterfaceConformance( }; } const effectiveMembers = effectiveClassMemberFacts(info, classByName); - const fields = shape?.fields ?? protocol.fields; - const requiredFields = fields.filter((field) => !field.optional); + const fields = effectiveInterfaceFields(protocol, interfaceByName); + const requiredFields = fields.filter((field) => !field.optional && !field.static); + const requiredStaticFields = fields.filter((field) => !field.optional && field.static); const requiredMethods = effectiveInterfaceMethods(protocol, interfaceByName); + const requiredInstanceMethods = requiredMethods.filter((method) => !method.static); + const requiredStaticMethods = requiredMethods.filter((method) => method.static); const missingMembers: string[] = []; const satisfiedMembers: string[] = []; + const missingStaticMembers: string[] = []; + const satisfiedStaticMembers: string[] = []; for (const field of requiredFields) { - if (classHasReadableInstanceMember(effectiveMembers, field)) { + if (classHasReadableMember(effectiveMembers, field, false)) { satisfiedMembers.push(field.name); } else { missingMembers.push(field.name); } } - for (const method of requiredMethods) { - if (classHasCallableInstanceMethod(effectiveMembers, method)) { + for (const field of requiredStaticFields) { + if (classHasReadableMember(effectiveMembers, field, true)) { + satisfiedStaticMembers.push(field.name); + } else { + missingStaticMembers.push(field.name); + } + } + for (const method of requiredInstanceMethods) { + if (classHasCallableMethod(effectiveMembers, method, false)) { satisfiedMembers.push(method.name); } else { missingMembers.push(method.name); } } + for (const method of requiredStaticMethods) { + if (classHasCallableMethod(effectiveMembers, method, true)) { + satisfiedStaticMembers.push(method.name); + } else { + missingStaticMembers.push(method.name); + } + } const missing = sortedUnique(missingMembers); const satisfied = sortedUnique(satisfiedMembers); + const missingStatic = sortedUnique(missingStaticMembers); + const satisfiedStatic = sortedUnique(satisfiedStaticMembers); return { - status: missing.length > 0 ? 'missing-members' : 'satisfied', + status: missing.length > 0 || missingStatic.length > 0 ? 'missing-members' : 'satisfied', missingMembers: missing, satisfiedMembers: satisfied, + missingStaticMembers: missingStatic, + satisfiedStaticMembers: satisfiedStatic, diagnostics: [], unsupportedReasons: [], }; } +function effectiveInterfaceFields( + protocol: InterfaceInfo, + interfaceByName: ReadonlyMap, + seen: ReadonlySet = new Set(), +): InterfaceFieldInfo[] { + if (seen.has(protocol.name)) return []; + const nextSeen = new Set(seen); + nextSeen.add(protocol.name); + const fields = new Map(); + for (const baseName of protocol.extendsNames) { + const base = interfaceByName.get(baseName); + if (!base) continue; + for (const field of effectiveInterfaceFields(base, interfaceByName, nextSeen)) { + fields.set(interfaceMemberShapeKey(field), field); + } + } + for (const field of protocol.fields) fields.set(interfaceMemberShapeKey(field), field); + return [...fields.values()]; +} + function effectiveInterfaceMethods( protocol: InterfaceInfo, interfaceByName: ReadonlyMap, @@ -3522,30 +3581,40 @@ function effectiveInterfaceMethods( for (const baseName of protocol.extendsNames) { const base = interfaceByName.get(baseName); if (!base) continue; - for (const method of effectiveInterfaceMethods(base, interfaceByName, nextSeen)) methods.set(method.name, method); + for (const method of effectiveInterfaceMethods(base, interfaceByName, nextSeen)) { + methods.set(interfaceMemberShapeKey(method), method); + } } - for (const method of protocol.methods) methods.set(method.name, method); + for (const method of protocol.methods) methods.set(interfaceMemberShapeKey(method), method); return [...methods.values()]; } -function classHasReadableInstanceMember( +function interfaceMemberShapeKey(member: { readonly name: string; readonly static: boolean }): string { + return `${member.static ? 'static' : 'instance'}:${member.name}`; +} + +function classHasReadableMember( members: readonly ClassSemanticMemberFact[], field: { readonly name: string; readonly type?: string }, + staticOnly: boolean, ): boolean { return members.some((member) => { - if (member.name !== field.name || member.static) return false; + if (member.name !== field.name || member.static !== staticOnly || member.private) return false; if (member.kind !== 'field' && member.kind !== 'getter') return false; const actualType = member.kind === 'getter' ? member.returns : member.type; return !field.type || actualType === field.type; }); } -function classHasCallableInstanceMethod( +function classHasCallableMethod( members: readonly ClassSemanticMemberFact[], method: InterfaceMethodInfo, + staticOnly: boolean, ): boolean { return members.some((member) => { - if (member.name !== method.name || member.static || member.private || member.kind !== 'method') return false; + if (member.name !== method.name || member.static !== staticOnly || member.private || member.kind !== 'method') { + return false; + } if (member.arity !== method.arity) return false; if (!methodParamTypesCompatible(member.paramTypes ?? [], method.paramTypes)) return false; if ((member.async === true) !== method.async) return false; @@ -3693,11 +3762,19 @@ function validateClassImplements( }); continue; } - if (conformance.missingMembers.length === 0) continue; + if (conformance.missingMembers.length === 0 && conformance.missingStaticMembers.length === 0) continue; + const missingParts = [ + ...(conformance.missingMembers.length > 0 + ? [`instance member(s): ${conformance.missingMembers.join(', ')}`] + : []), + ...(conformance.missingStaticMembers.length > 0 + ? [`static member(s): ${conformance.missingStaticMembers.join(', ')}`] + : []), + ]; violations.push({ rule: 'class-implements-missing-member', nodeType: 'class', - message: `Class '${info.name}' does not satisfy interface '${interfaceName}'. Missing readable instance member(s): ${conformance.missingMembers.join(', ')}.`, + message: `Class '${info.name}' does not satisfy interface '${interfaceName}'. Missing readable ${missingParts.join('; ')}.`, line: info.node.loc?.line, col: info.node.loc?.col, }); diff --git a/packages/core/tests/class-semantics.test.ts b/packages/core/tests/class-semantics.test.ts index 74c8dcaf..81e27cce 100644 --- a/packages/core/tests/class-semantics.test.ts +++ b/packages/core/tests/class-semantics.test.ts @@ -74,6 +74,20 @@ describe('semantic-validator — class object model', () => { expect(rules).not.toContain('class-implements-missing-member'); }); + test('rejects private instance fields as protocol members', () => { + const violations = violationsFor( + [ + 'interface name=Named', + ' field name=name type=string', + 'class name=User implements=Named', + ' field name=name type=string private=true', + ].join('\n'), + ); + + const violation = violations.find((candidate) => candidate.rule === 'class-implements-missing-member'); + expect(violation?.message).toContain('instance member(s): name'); + }); + test('accepts class implements when instance methods satisfy interface methods', () => { const rules = rulesFor( [ @@ -90,6 +104,56 @@ describe('semantic-validator — class object model', () => { expect(rules).not.toContain('class-implements-missing-member'); }); + test('accepts static fields and inherited static methods for class implements', () => { + const rules = rulesFor( + [ + 'interface name=Factory', + ' field name=kind type=string static=true', + ' method name=create params="id:string" returns=string static=true', + 'class name=Base', + ' method name=create params="id:string" returns=string static=true', + ' handler lang=kern', + ' return value="id"', + 'class name=UserFactory extends=Base implements=Factory', + ' field name=kind type=string static=true', + ].join('\n'), + ); + + expect(rules).not.toContain('class-implements-missing-member'); + }); + + test('rejects private static fields as protocol members', () => { + const violations = violationsFor( + [ + 'interface name=Factory', + ' field name=kind type=string static=true', + 'class name=UserFactory implements=Factory', + ' field name=kind type=string static=true private=true', + ].join('\n'), + ); + + const violation = violations.find((candidate) => candidate.rule === 'class-implements-missing-member'); + expect(violation?.message).toContain('static member(s): kind'); + }); + + test('rejects static protocol members satisfied only by instance members', () => { + const violations = violationsFor( + [ + 'interface name=Factory', + ' field name=kind type=string static=true', + ' method name=create params="id:string" returns=string static=true', + 'class name=Confused implements=Factory', + ' field name=kind type=string', + ' method name=create params="id:string" returns=string', + ' handler lang=kern', + ' return value="id"', + ].join('\n'), + ); + + const violation = violations.find((candidate) => candidate.rule === 'class-implements-missing-member'); + expect(violation?.message).toContain('static member(s): create, kind'); + }); + test('reports missing and incompatible interface methods for class implements', () => { const violations = violationsFor( [ diff --git a/packages/core/tests/core-runtime.test.ts b/packages/core/tests/core-runtime.test.ts index fb30cd58..918e6e87 100644 --- a/packages/core/tests/core-runtime.test.ts +++ b/packages/core/tests/core-runtime.test.ts @@ -467,6 +467,117 @@ describe('KERN core runtime statements', () => { expect(toHostValue(evalCoreExpression('new User().name', env))).toBe('Ada'); }); + test('validates static implemented interface fields at class definition', () => { + const root = parse( + [ + 'interface name=Factory', + ' field name=kind type=string static=true', + 'class name=UserFactory implements=Factory', + ' field name=kind type=string static=true value="user"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + + expect(() => runCoreRuntime(root, env)).not.toThrow(); + expect(toHostValue(evalCoreExpression('UserFactory.kind', env))).toBe('user'); + }); + + test('accepts missing optional static implemented interface fields', () => { + const root = parse( + [ + 'interface name=Factory', + ' field name=kind type=string static=true optional=true', + 'class name=UserFactory implements=Factory', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + + expect(() => runCoreRuntime(root, env)).not.toThrow(); + }); + + test('rejects private static implemented interface fields', () => { + const root = parse( + [ + 'interface name=Factory', + ' field name=kind type=string static=true', + 'class name=UserFactory implements=Factory', + ' field name=kind type=string static=true private=true value="user"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + + expect(() => runCoreRuntime(root, env)).toThrow('missing or incompatible static member(s): kind'); + }); + + test('rejects static implemented interface field type mismatches', () => { + const root = parse( + [ + 'interface name=Factory', + ' field name=kind type=string static=true', + 'class name=BadFactory implements=Factory', + ' field name=kind type=number static=true value=1', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + + expect(() => runCoreRuntime(root, env)).toThrow('missing or incompatible static member(s): kind'); + }); + + test('rejects static implemented interface members satisfied only by instance members', () => { + const root = parse( + [ + 'interface name=Factory', + ' field name=kind type=string static=true', + ' method name=create params="id:string" returns=string static=true', + 'class name=Confused implements=Factory', + ' field name=kind type=string value="user"', + ' method name=create params="id:string" returns=string', + ' handler', + ' return value="id"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + + expect(() => runCoreRuntime(root, env)).toThrow('missing or incompatible static member(s): kind'); + }); + + test('does not invoke static getters while validating implemented interface fields', () => { + const root = parse( + [ + 'interface name=Factory', + ' field name=kind type=string static=true', + 'class name=UserFactory implements=Factory', + ' getter name=kind returns=string static=true', + ' handler', + ' return value="Later.kind"', + 'class name=Later', + ' field name=kind type=string static=true value="user"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('UserFactory.kind', env))).toBe('user'); + }); + + test('validates inherited static methods for implemented interfaces', () => { + const root = parse( + [ + 'interface name=Factory', + ' method name=create params="id:string" returns=string static=true', + 'class name=BaseFactory', + ' method name=create params="id:string" returns=string static=true', + ' handler', + ' return value="id"', + 'class name=UserFactory extends=BaseFactory implements=Factory', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('UserFactory.create("u1")', env))).toBe('u1'); + }); + test('validates implemented interface methods without invoking them', () => { const root = parse( [ @@ -823,9 +934,8 @@ describe('KERN core runtime statements', () => { ].join('\n'), ); const env = createCoreRuntimeEnv(); - runCoreRuntime(root, env); - expect(() => evalCoreExpression('new User()', env)).toThrow( + expect(() => runCoreRuntime(root, env)).toThrow( "implements interface 'Dictionary' that is not executable as a class protocol in v1", ); }); @@ -840,9 +950,8 @@ describe('KERN core runtime statements', () => { ].join('\n'), ); const env = createCoreRuntimeEnv(); - runCoreRuntime(root, env); - expect(() => evalCoreExpression('new User()', env)).toThrow('implements= contains an empty reference'); + expect(() => runCoreRuntime(root, env)).toThrow('implements= contains an empty reference'); }); test('invalid runtime implements entries fail instead of being ignored', () => { @@ -855,9 +964,8 @@ describe('KERN core runtime statements', () => { ].join('\n'), ); const env = createCoreRuntimeEnv(); - runCoreRuntime(root, env); - expect(() => evalCoreExpression('new User()', env)).toThrow('implements= contains an invalid reference: 123'); + expect(() => runCoreRuntime(root, env)).toThrow('implements= contains an invalid reference: 123'); }); test('runtime implements entries reject trailing junk', () => { @@ -870,21 +978,15 @@ describe('KERN core runtime statements', () => { ].join('\n'), ); const env = createCoreRuntimeEnv(); - runCoreRuntime(root, env); - expect(() => evalCoreExpression('new User()', env)).toThrow( - 'implements= contains an invalid reference: Named junk', - ); + expect(() => runCoreRuntime(root, env)).toThrow('implements= contains an invalid reference: Named junk'); }); test('unknown local runtime implements targets fail instead of being ignored', () => { const root = parse(['class name=User implements=MissingProtocol'].join('\n')); const env = createCoreRuntimeEnv(); - runCoreRuntime(root, env); - expect(() => evalCoreExpression('new User()', env)).toThrow( - "class 'User' implements unknown interface 'MissingProtocol'", - ); + expect(() => runCoreRuntime(root, env)).toThrow("class 'User' implements unknown interface 'MissingProtocol'"); }); test('imported runtime implements targets are treated as external protocols', () => { diff --git a/packages/core/tests/semantic-substrate.test.ts b/packages/core/tests/semantic-substrate.test.ts index 7f29874b..435da00c 100644 --- a/packages/core/tests/semantic-substrate.test.ts +++ b/packages/core/tests/semantic-substrate.test.ts @@ -532,8 +532,15 @@ describe('KERN semantic substrate', () => { ' field name=id type=string', 'interface name=DictionaryProtocol', ' indexer keyName=key keyType=string type=number', + 'interface name=FactoryProtocol', + ' field name=kind type=string static=true', + ' method name=create params="id:string" returns=string static=true', 'class name=Base', ' field name=id type=string', + 'class name=BaseFactory', + ' method name=create params="id:string" returns=string static=true', + ' handler lang=kern', + ' return value="id"', 'class name=User extends=Base implements="Named,ExternalProtocol,MissingProtocol"', ' getter name=name returns=string', ' handler lang=kern', @@ -546,6 +553,8 @@ describe('KERN semantic substrate', () => { 'class name=Invalid implements=BrokenProtocol', ' field name=id type=string', 'class name=Dictionary implements=DictionaryProtocol', + 'class name=FactoryImpl extends=BaseFactory implements=FactoryProtocol', + ' field name=kind type=string static=true', ].join('\n'), ), ); @@ -555,6 +564,7 @@ describe('KERN semantic substrate', () => { { from: 'User', to: 'Named', relation: 'implements', resolved: true, external: false }, { from: 'User', to: 'ExternalProtocol', relation: 'implements', resolved: true, external: true }, { from: 'User', to: 'MissingProtocol', relation: 'implements', resolved: false, external: false }, + { from: 'FactoryImpl', to: 'FactoryProtocol', relation: 'implements', resolved: true, external: false }, ]), ); expect(facts.unresolvedImplements).toEqual(['MissingProtocol']); @@ -590,6 +600,13 @@ describe('KERN semantic substrate', () => { status: 'unsupported-protocol', unsupportedReasons: ['indexer'], }), + expect.objectContaining({ + className: 'FactoryImpl', + interfaceName: 'FactoryProtocol', + status: 'satisfied', + satisfiedStaticMembers: ['create', 'kind'], + missingStaticMembers: [], + }), ]), ); }); From 863f9b85f8d2eb6c7924139f2e2e5fd1f23b3d71 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 07:33:18 +0200 Subject: [PATCH 38/46] fix(core): pin core contract element type to satisfy strict tsc --- packages/core/src/semantic-substrate.ts | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/packages/core/src/semantic-substrate.ts b/packages/core/src/semantic-substrate.ts index dd0a9e6f..891cd89a 100644 --- a/packages/core/src/semantic-substrate.ts +++ b/packages/core/src/semantic-substrate.ts @@ -6,7 +6,12 @@ import { type PortableLogicSupport, type PortableLogicTarget, } from './codegen/portable-logic-primitives.js'; -import { CORE_TYPE_CONTRACTS, type CoreOperationReturns, contractToGraphEdges } from './core-contracts/index.js'; +import { + CORE_TYPE_CONTRACTS, + type CoreOperationReturns, + type CoreTypeContract, + contractToGraphEdges, +} from './core-contracts/index.js'; import { type CoreShapeFacts, collectCoreShapeFacts } from './core-runtime/shape-validator.js'; import type { NodeContract } from './ir/semantics/index.js'; import { snapshotRegistry } from './ir/semantics/index.js'; @@ -138,7 +143,10 @@ export interface BuildKernSemanticSubstrateOptions { export function buildKernSemanticSubstrate(options: BuildKernSemanticSubstrateOptions = {}): KernSemanticSubstrate { const ragFacts = options.documentRag ? collectRagSemanticFacts(options.documentRag) : undefined; - const coreTypes = Object.values(CORE_TYPE_CONTRACTS.types).map((contract) => ({ + // The registry guarantees every value is a CoreTypeContract; pin the element type so + // Object.values does not widen to unknown/any under stricter tsconfig settings (ts18046). + const coreContracts = Object.values(CORE_TYPE_CONTRACTS.types) as readonly CoreTypeContract[]; + const coreTypes = coreContracts.map((contract) => ({ id: `core.type.${contract.name}`, name: contract.name, kind: contract.kind, @@ -160,7 +168,7 @@ export function buildKernSemanticSubstrate(options: BuildKernSemanticSubstrateOp generatedBy: 'kern-semantic-substrate', source: options.source ?? 'codegen-from-ts', coreTypes, - coreGraphEdges: Object.values(CORE_TYPE_CONTRACTS.types).flatMap((contract) => contractToGraphEdges(contract)), + coreGraphEdges: coreContracts.flatMap((contract) => contractToGraphEdges(contract)), portablePrimitives: PORTABLE_LOGIC_PRIMITIVE_IDS.map((id) => { const primitive = PORTABLE_LOGIC_PRIMITIVES[id]; return { From 04a1e06b11b382fec9dbf8d9f56127c5f04266c8 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 08:10:30 +0200 Subject: [PATCH 39/46] fix(core): clear kern-guard findings in class semantic validator --- packages/core/src/semantic-validator.ts | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index bd45943d..53c8669f 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -3130,10 +3130,9 @@ function fieldInitializerNames(info: ClassInfo): string[] { function constructorThisAssignmentNames(info: ClassInfo): string[] { if (info.constructors.length === 0) return []; - const constructorAssignments: string[][] = []; - for (const ctor of info.constructors) { - constructorAssignments.push([...definiteThisAssignmentsInStatements(constructorBodyStatements(ctor))]); - } + const constructorAssignments = info.constructors.map((ctor) => [ + ...definiteThisAssignmentsInStatements(constructorBodyStatements(ctor)), + ]); const [first = [], ...rest] = constructorAssignments; return sortedUnique([...rest.reduce((common, names) => setIntersection(common, new Set(names)), new Set(first))]); } @@ -3277,7 +3276,9 @@ function superCallCountInNode(node: IRNode): number { if (!text) continue; try { count += valueIRSuperConstructorCallCount(parseExpression(text)); - } catch {} + } catch { + // Unparseable expression text contributes no super() calls. + } } return 'continue'; }); From 750fd932f8b6efed66f99e530335e127a0fa7a74 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 09:50:10 +0200 Subject: [PATCH 40/46] feat(python): lower KERN classes to pure Python --- packages/python/src/codegen-body-python.ts | 18 +++ packages/python/src/codegen-python.ts | 3 + packages/python/src/generators/data.ts | 126 ++++++++++++++++++++- packages/python/src/targets/python.ts | 12 ++ 4 files changed, 155 insertions(+), 4 deletions(-) diff --git a/packages/python/src/codegen-body-python.ts b/packages/python/src/codegen-body-python.ts index 4fb213fa..4266a8ac 100644 --- a/packages/python/src/codegen-body-python.ts +++ b/packages/python/src/codegen-body-python.ts @@ -74,6 +74,15 @@ export interface BodyEmitOptions { * the KERN-form `userId` resolves to the snake_cased Python parameter. * Identifiers not in the map pass through unchanged. */ symbolMap?: Record; + /** When true, the handler is a class member body: identifier `super` + * lowers to Python `super()` (so `super.m()` -> `super().m()`) and a + * direct `super(...)` call lowers to `super().__init__(...)`. Paired with + * a `symbolMap` entry `this -> self` by the class generator. */ + inClassBody?: boolean; + /** When true, the handler is specifically a constructor body, so a direct + * `super(...)` call lowers to `super().__init__(...)`. Outside a constructor + * `super(...)` is not a parent-constructor call and is left untouched. */ + inConstructor?: boolean; /** Slice 4a review fix (Gemini #5) — how to lower the `?` propagation * hoist's err-branch return: * - 'value' (default for `fn`): `return __k_tN` so the caller sees @@ -137,6 +146,8 @@ interface BodyEmitContext { * `each` pair-mode). Consumer emits each entry at module scope. */ helpers: Set; symbolMap: Record; + inClassBody: boolean; + inConstructor: boolean; shadowedSymbols: Set; localScopes: Array>; regexScopes: Array | null>>; @@ -171,6 +182,8 @@ function freshCtx(options?: BodyEmitOptions): BodyEmitContext { imports: new Set(), helpers: new Set(), symbolMap: options?.symbolMap ?? {}, + inClassBody: options?.inClassBody ?? false, + inConstructor: options?.inConstructor ?? false, shadowedSymbols: new Set(), localScopes: [], regexScopes: [], @@ -1695,6 +1708,7 @@ function emitPyExprCtx(node: ValueIR, ctx: BodyEmitContext): string { // Python-form `user_id`. Identifiers not in the map (locals, globals, // module names) pass through unchanged. if (ctx.shadowedSymbols.has(node.name)) return node.name; + if (ctx.inClassBody && node.name === 'super') return 'super()'; return ctx.symbolMap[node.name] ?? node.name; } case 'member': @@ -2101,6 +2115,10 @@ function lowerChain(node: ChainNode, ctx: BodyEmitContext): GuardedExpr { if (regex !== null) return { guard: null, expr: regex }; const stdlib = applyStdlibLoweringPython(node, ctx); if (stdlib !== null) return { guard: null, expr: stdlib }; + if (ctx.inConstructor && node.callee.kind === 'ident' && node.callee.name === 'super') { + const superArgs = node.args.map((arg) => emitPyExprCtx(arg, ctx)).join(', '); + return { guard: null, expr: `super().__init__(${superArgs})` }; + } if (node.callee.kind === 'ident' && node.callee.name === 'String') { if (node.args.length !== 1) { throw new Error('String() portable coercion expects exactly one argument on Python target.'); diff --git a/packages/python/src/codegen-python.ts b/packages/python/src/codegen-python.ts index a3950b84..0fd5a782 100644 --- a/packages/python/src/codegen-python.ts +++ b/packages/python/src/codegen-python.ts @@ -27,6 +27,7 @@ import { // Data layer generators (model, repository, cache, dependency, service, union) import { generatePythonCache, + generatePythonClass, generatePythonDependency, generatePythonModel, generatePythonRepository, @@ -180,6 +181,8 @@ export function generatePythonCoreNode(node: IRNode, options: PythonCodegenOptio return generatePythonDependency(node); case 'service': return generatePythonService(node); + case 'class': + return generatePythonClass(node); case 'union': return generatePythonUnion(node); // Backend infrastructure diff --git a/packages/python/src/generators/data.ts b/packages/python/src/generators/data.ts index cf88b89d..516d9d86 100644 --- a/packages/python/src/generators/data.ts +++ b/packages/python/src/generators/data.ts @@ -22,7 +22,10 @@ import { mapTsTypeToPython, toSnakeCase } from '../type-map.js'; * * When the handler is legacy raw, returns `{ code: handlerCode(method), * imports: empty }`. */ -function methodBodyCodePython(method: IRNode): { code: string; imports: Set; helpers: Set } { +function methodBodyCodePython( + method: IRNode, + opts?: { classBody?: boolean; isConstructor?: boolean }, +): { code: string; imports: Set; helpers: Set } { const handler = getFirstChild(method, 'handler'); if (!handler || getProps(handler).lang !== 'kern') { return { code: handlerCode(method), imports: new Set(), helpers: new Set() }; @@ -54,7 +57,14 @@ function methodBodyCodePython(method: IRNode): { code: string; imports: Set`self`, `super.m()`->`super().m()`, `new X()`->`X()`) is the next +// sub-problem the differential class fixtures will drive. +export function generatePythonClass(node: IRNode): string[] { + const props = p(node); + const name = emitIdentifier(props.name as string, 'UnknownClass', node); + const baseRaw = typeof props.extends === 'string' ? (props.extends as string) : ''; + const base = baseRaw ? emitIdentifier(baseRaw, 'object', node) : ''; + const header = base ? `class ${name}(${base}):` : `class ${name}:`; + + const isStatic = (n: IRNode): boolean => { + const np = p(n); + return np.static === 'true' || np.static === true; + }; + + const fields = kids(node, 'field'); + const staticFields = fields.filter(isStatic); + const methods = kids(node, 'method'); + const getters = kids(node, 'getter'); + const setters = kids(node, 'setter'); + const ctor = firstChild(node, 'constructor'); + + const body: string[] = []; + + // Static fields -> class-level attributes. + for (const f of staticFields) { + const fp = p(f); + const fname = toSnakeCase((fp.name as string) || 'field'); + const ftype = fp.type ? mapTsTypeToPython(fp.type as string) : 'Any'; + const raw = typeof fp.value === 'string' ? (fp.value as string).replace(/\bnew\s+/g, '') : undefined; + const value = raw !== undefined ? formatPythonDefault(raw, (fp.type as string) || '') : 'None'; + body.push(` ${fname}: ${ftype} = ${value}`); + } + if (staticFields.length > 0) body.push(''); + + // Constructor -> __init__. + if (ctor) { + body.push(` def __init__(${buildPythonParamList(ctor, { selfPrefix: true })}):`); + body.push(...methodBodyLinesPython(ctor, { classBody: true, isConstructor: true })); + body.push(''); + } + + // Methods (instance + static). + for (const m of methods) { + const mp = p(m); + const mname = toSnakeCase((mp.name as string) || 'method'); + const asyncKw = mp.async === 'true' || mp.async === true ? 'async ' : ''; + const returns = mp.returns ? ` -> ${mapTsTypeToPython(mp.returns as string)}` : ''; + if (isStatic(m)) { + body.push(' @staticmethod'); + body.push(` ${asyncKw}def ${mname}(${buildPythonParamList(m, { selfPrefix: false })})${returns}:`); + } else { + body.push(` ${asyncKw}def ${mname}(${buildPythonParamList(m, { selfPrefix: true })})${returns}:`); + } + body.push(...methodBodyLinesPython(m, { classBody: !isStatic(m) })); + body.push(''); + } + + // Getters -> @property. Static accessors need a metaclass/classmethod-property + // and are a follow-up; skip them with a marker rather than emit broken code. + const instanceGetterNames = new Set(); + for (const g of getters) { + const gp = p(g); + const gname = toSnakeCase((gp.name as string) || 'prop'); + if (isStatic(g)) { + body.push(` # static getter '${gname}' is not yet supported on the Python target`); + continue; + } + instanceGetterNames.add(gname); + const returns = gp.returns ? ` -> ${mapTsTypeToPython(gp.returns as string)}` : ''; + body.push(' @property'); + body.push(` def ${gname}(self)${returns}:`); + body.push(...methodBodyLinesPython(g, { classBody: true })); + body.push(''); + } + // Setters -> @.setter. Python requires a property to exist before its + // `.setter`; KERN allows setter-only properties, so synthesize a getter when + // none was declared (write-only -> returns None, matching a TS getter-less read). + for (const s of setters) { + const sp = p(s); + const sname = toSnakeCase((sp.name as string) || 'prop'); + if (isStatic(s)) { + body.push(` # static setter '${sname}' is not yet supported on the Python target`); + continue; + } + if (!instanceGetterNames.has(sname)) { + body.push(' @property'); + body.push(` def ${sname}(self): # write-only property (no getter declared in KERN)`); + body.push(' return None'); + body.push(''); + instanceGetterNames.add(sname); + } + body.push(` @${sname}.setter`); + body.push(` def ${sname}(${buildPythonParamList(s, { selfPrefix: true })}):`); + body.push(...methodBodyLinesPython(s, { classBody: true })); + body.push(''); + } + + if (body.length === 0) body.push(' pass'); + + return [header, ...body]; +} + // ── Union (Pydantic Discriminated Union) ──────────────────────────────── // union name=ContentSegment discriminant=type // variant name=prose diff --git a/packages/python/src/targets/python.ts b/packages/python/src/targets/python.ts index 35d1593a..be642a7a 100644 --- a/packages/python/src/targets/python.ts +++ b/packages/python/src/targets/python.ts @@ -5,6 +5,7 @@ import { emitModels } from '../core/emit-models.js'; import { collectFenceDiagnostics } from '../core/fence-diagnostics.js'; import { emitPureHandlers } from '../core/handlers/index.js'; import { findServerNode } from '../fastapi-utils.js'; +import { generatePythonClass } from '../generators/data.js'; /** * The PyDotDict / _DotList shim, emitted at the top of every `--emit=backend` @@ -132,6 +133,11 @@ export function transpilePython(root: IRNode, config?: ResolvedKernConfig): Tran target: 'python', }); + // 3b. Class declarations -> pure Python classes. Additive: files without + // `class` nodes (e.g. the models-only byte-invariance corpus) are untouched. + const classNodes = (root.children ?? []).filter((child) => child.type === 'class'); + const classesCode = classNodes.map((node) => generatePythonClass(node).join('\n')).join('\n\n'); + const lines: string[] = []; // Sort and print imports @@ -166,6 +172,12 @@ export function transpilePython(root: IRNode, config?: ResolvedKernConfig): Tran lines.push(modelsCode); } + // Class definitions (pure Python — not FastAPI/Pydantic). + if (classesCode.trim().length > 0) { + lines.push(''); + lines.push(classesCode); + } + // Pure handlers (additive) if (handlersCode) { lines.push(''); From 377a8ee4096a463b75e3b8a54e3c0a0a4f0f83d0 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 09:50:10 +0200 Subject: [PATCH 41/46] test(python): lock single-source class codegen --- packages/python/tests/class-python.test.ts | 121 +++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 packages/python/tests/class-python.test.ts diff --git a/packages/python/tests/class-python.test.ts b/packages/python/tests/class-python.test.ts new file mode 100644 index 00000000..53ce5d1e --- /dev/null +++ b/packages/python/tests/class-python.test.ts @@ -0,0 +1,121 @@ +/** Single-source class slice — Python target. + * + * KERN `class` nodes lower to pure Python (NOT FastAPI/Pydantic) via + * `generatePythonClass`. Class member bodies translate through the shared + * Python body emitter with `inClassBody`/`inConstructor`: + * - `this` -> `self` (symbol map) + * - `super(args)` -> `super().__init__(args)` (constructor only) + * - `super.m()` / `super.x` -> `super().m()` / `super().x` (any member) + * + * Behaviour locked here was driven by an Agon review of the slice + * (setter-only synthesis + static-accessor skip closed two blocking findings). + */ + +import type { IRNode } from '@kernlang/core'; +import { generatePythonClass } from '../src/generators/data.js'; + +function handler(children: IRNode[]): IRNode { + return { type: 'handler', props: { lang: 'kern' }, children }; +} +function param(name: string, type?: string): IRNode { + return { type: 'param', props: type ? { name, type } : { name }, children: [] }; +} + +describe('Python class codegen (single-source class slice)', () => { + test('emits a pure-Python class: __init__, this->self, instance method, getter', () => { + const animal: IRNode = { + type: 'class', + props: { name: 'Animal' }, + children: [ + { + type: 'constructor', + props: {}, + children: [ + param('name', 'string'), + param('legs', 'number'), + handler([ + { type: 'assign', props: { target: 'this.name', value: 'name' }, children: [] }, + { type: 'assign', props: { target: 'this.legs', value: 'legs' }, children: [] }, + ]), + ], + }, + { + type: 'getter', + props: { name: 'legCount', returns: 'number' }, + children: [handler([{ type: 'return', props: { value: 'this.legs' }, children: [] }])], + }, + ], + }; + const code = generatePythonClass(animal).join('\n'); + expect(code).toContain('class Animal:'); + expect(code).toContain('def __init__(self, name: str, legs: float):'); + expect(code).toContain('self.name = name'); + expect(code).toContain('@property'); + expect(code).toContain('def leg_count(self) -> float:'); + expect(code).toContain('return self.legs'); + expect(code).not.toContain('this.'); // no JS-ism leaks + }); + + test('inheritance: super(...) -> super().__init__ in constructor, super.m() -> super().m()', () => { + const dog: IRNode = { + type: 'class', + props: { name: 'Dog', extends: 'Animal' }, + children: [ + { + type: 'constructor', + props: {}, + children: [ + param('name', 'string'), + handler([{ type: 'do', props: { value: 'super(name, 4)' }, children: [] }]), + ], + }, + { + type: 'method', + props: { name: 'summary', returns: 'string' }, + children: [handler([{ type: 'return', props: { value: '`${super.describe()}`' }, children: [] }])], + }, + ], + }; + const code = generatePythonClass(dog).join('\n'); + expect(code).toContain('class Dog(Animal):'); + expect(code).toContain('super().__init__(name, 4)'); + expect(code).toContain('super().describe()'); + }); + + test('setter-only property synthesizes a write-only getter (valid Python, no NameError)', () => { + const box: IRNode = { + type: 'class', + props: { name: 'Box' }, + children: [ + { + type: 'setter', + props: { name: 'items' }, + children: [ + param('next', 'object[]'), + handler([{ type: 'assign', props: { target: 'this.store', value: 'next' }, children: [] }]), + ], + }, + ], + }; + const code = generatePythonClass(box).join('\n'); + expect(code).toContain('def items(self):'); // synthesized getter precedes the setter + expect(code).toContain('@items.setter'); + }); + + test('static accessors are skipped (not emitted as broken instance @property)', () => { + const reg: IRNode = { + type: 'class', + props: { name: 'Reg' }, + children: [ + { + type: 'getter', + props: { name: 'label', static: 'true', returns: 'string' }, + children: [handler([{ type: 'return', props: { value: '"x"' }, children: [] }])], + }, + ], + }; + const code = generatePythonClass(reg).join('\n'); + expect(code).not.toContain('def label(self)'); + expect(code).toContain("static getter 'label'"); + }); +}); From a2459ec09d294c55b89476d4b781aa2066b0bb7f Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 10:21:35 +0200 Subject: [PATCH 42/46] feat(python): per-instance field defaults and static field values --- packages/python/src/generators/data.ts | 59 +++++++++++++++++++++++--- packages/python/src/targets/python.ts | 2 +- 2 files changed, 54 insertions(+), 7 deletions(-) diff --git a/packages/python/src/generators/data.ts b/packages/python/src/generators/data.ts index 516d9d86..d620922f 100644 --- a/packages/python/src/generators/data.ts +++ b/packages/python/src/generators/data.ts @@ -119,6 +119,25 @@ export function formatPythonDefault(value: string, kernType: string): string { return trimmed; } +/** Lower a field's default to a Python expression, or undefined when none. + * A `value={{ }}` block parses to `{ __expr: true, code: '' }`; + * a bare `default=...` is a raw string. `new X(...)` -> `X(...)`; literals go + * through formatPythonDefault (true/false/null/number/string handling). */ +function fieldDefaultPython(field: IRNode): string | undefined { + const fp = p(field); + const v = fp.value as unknown; + let code: string | undefined; + if (v && typeof v === 'object' && (v as { __expr?: boolean }).__expr) { + code = (v as { code?: string }).code; + } else if (typeof v === 'string') { + code = v; + } else if (typeof fp.default === 'string') { + code = fp.default as string; + } + if (code === undefined) return undefined; + return formatPythonDefault(code.replace(/\bnew\s+/g, ''), (fp.type as string) || ''); +} + // SQLModel column override: pydantic validator types -> plain DB types for column declarations const SQLMODEL_COLUMN_OVERRIDE: Record = { Email: 'str', @@ -494,21 +513,49 @@ export function generatePythonClass(node: IRNode): string[] { const body: string[] = []; - // Static fields -> class-level attributes. + // Static fields -> class-level attributes (shared across instances, like TS statics). for (const f of staticFields) { const fp = p(f); const fname = toSnakeCase((fp.name as string) || 'field'); const ftype = fp.type ? mapTsTypeToPython(fp.type as string) : 'Any'; - const raw = typeof fp.value === 'string' ? (fp.value as string).replace(/\bnew\s+/g, '') : undefined; - const value = raw !== undefined ? formatPythonDefault(raw, (fp.type as string) || '') : 'None'; - body.push(` ${fname}: ${ftype} = ${value}`); + body.push(` ${fname}: ${ftype} = ${fieldDefaultPython(f) ?? 'None'}`); } if (staticFields.length > 0) body.push(''); - // Constructor -> __init__. + // Constructor -> __init__. Instance-field defaults are emitted INSIDE __init__ + // (never as class-level attributes) so each instance gets a fresh value — + // matching TS per-instance field initialization and avoiding Python's + // shared-mutable-default trap (a class-level `items = []` would be shared by + // every instance). Defaults precede the constructor body, which may reassign + // them (TS field-init-then-constructor order). + const instanceDefaults = fields.filter((f) => !isStatic(f) && fieldDefaultPython(f) !== undefined); + const defaultLines = instanceDefaults.map( + (f) => ` self.${toSnakeCase((p(f).name as string) || 'field')} = ${fieldDefaultPython(f)}`, + ); if (ctor) { body.push(` def __init__(${buildPythonParamList(ctor, { selfPrefix: true })}):`); - body.push(...methodBodyLinesPython(ctor, { classBody: true, isConstructor: true })); + const ctorLines = methodBodyLinesPython(ctor, { classBody: true, isConstructor: true }); + // Field initializers run AFTER super().__init__() (TS field-init-after-super + // order), so inject defaults right after the super call when present, else at + // the top of the constructor body. + const superIdx = ctorLines.findIndex((line) => line.includes('super().__init__')); + if (superIdx >= 0) { + body.push(...ctorLines.slice(0, superIdx + 1), ...defaultLines, ...ctorLines.slice(superIdx + 1)); + } else { + body.push(...defaultLines, ...ctorLines); + } + body.push(''); + } else if (instanceDefaults.length > 0) { + // No explicit constructor. A derived class still forwards to its base + // initializer (TS subclasses without a constructor auto-forward args), then + // applies its own field defaults. + if (base) { + body.push(' def __init__(self, *args, **kwargs):'); + body.push(' super().__init__(*args, **kwargs)'); + } else { + body.push(' def __init__(self):'); + } + body.push(...defaultLines); body.push(''); } diff --git a/packages/python/src/targets/python.ts b/packages/python/src/targets/python.ts index be642a7a..ab921d9b 100644 --- a/packages/python/src/targets/python.ts +++ b/packages/python/src/targets/python.ts @@ -135,7 +135,7 @@ export function transpilePython(root: IRNode, config?: ResolvedKernConfig): Tran // 3b. Class declarations -> pure Python classes. Additive: files without // `class` nodes (e.g. the models-only byte-invariance corpus) are untouched. - const classNodes = (root.children ?? []).filter((child) => child.type === 'class'); + const classNodes = root.type === 'class' ? [root] : (root.children ?? []).filter((child) => child.type === 'class'); const classesCode = classNodes.map((node) => generatePythonClass(node).join('\n')).join('\n\n'); const lines: string[] = []; From c0e7fe6675afe80d16bf4c6dc9fcc357df3b57ac Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 10:21:35 +0200 Subject: [PATCH 43/46] test(python): cover field defaults and super-ordering --- packages/python/tests/class-python.test.ts | 83 ++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/packages/python/tests/class-python.test.ts b/packages/python/tests/class-python.test.ts index 53ce5d1e..c3cc0b3b 100644 --- a/packages/python/tests/class-python.test.ts +++ b/packages/python/tests/class-python.test.ts @@ -118,4 +118,87 @@ describe('Python class codegen (single-source class slice)', () => { expect(code).not.toContain('def label(self)'); expect(code).toContain("static getter 'label'"); }); + + test('instance-field defaults emit in __init__, never as a shared class attr', () => { + const bag: IRNode = { + type: 'class', + props: { name: 'Bag' }, + children: [ + { + type: 'field', + props: { name: 'items', type: 'object[]', value: { __expr: true, code: '[]' } }, + children: [], + }, + { + type: 'field', + props: { name: 'tag', type: 'string', value: { __expr: true, code: '"empty"' } }, + children: [], + }, + ], + }; + const code = generatePythonClass(bag).join('\n'); + expect(code).toContain('def __init__(self):'); + expect(code).toContain('self.items = []'); + expect(code).toContain('self.tag = "empty"'); + // Shared-mutable-default trap: instance fields must NOT become class-level attrs. + expect(code).not.toMatch(/^ {4}items\s*[:=]/m); + }); + + test('static field values are extracted from value={{...}} (not None)', () => { + const reg: IRNode = { + type: 'class', + props: { name: 'Reg' }, + children: [ + { + type: 'field', + props: { name: 'kind', type: 'string', static: 'true', value: { __expr: true, code: '"audited"' } }, + children: [], + }, + ], + }; + const code = generatePythonClass(reg).join('\n'); + expect(code).toContain('kind: str = "audited"'); + expect(code).not.toContain('kind: str = None'); + }); + + test('derived class without a constructor forwards to base init, then applies defaults', () => { + const dog: IRNode = { + type: 'class', + props: { name: 'Dog', extends: 'Animal' }, + children: [ + { + type: 'field', + props: { name: 'tricks', type: 'object[]', value: { __expr: true, code: '[]' } }, + children: [], + }, + ], + }; + const code = generatePythonClass(dog).join('\n'); + expect(code).toContain('def __init__(self, *args, **kwargs):'); + expect(code).toContain('super().__init__(*args, **kwargs)'); + expect(code).toContain('self.tricks = []'); + expect(code.indexOf('super().__init__')).toBeLessThan(code.indexOf('self.tricks = []')); + }); + + test('field defaults run AFTER super() inside an explicit derived constructor', () => { + const dog: IRNode = { + type: 'class', + props: { name: 'Dog', extends: 'Animal' }, + children: [ + { + type: 'field', + props: { name: 'tricks', type: 'object[]', value: { __expr: true, code: '[]' } }, + children: [], + }, + { + type: 'constructor', + props: {}, + children: [param('name', 'string'), handler([{ type: 'do', props: { value: 'super(name)' }, children: [] }])], + }, + ], + }; + const code = generatePythonClass(dog).join('\n'); + expect(code).toContain('super().__init__(name)'); + expect(code.indexOf('super().__init__(name)')).toBeLessThan(code.indexOf('self.tricks = []')); + }); }); From fcd4b7b0d4507f1b6d4fd54a04429dbf616fd93d Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 10:32:19 +0200 Subject: [PATCH 44/46] test(conformance): CI-enforce class TS<->Python parity --- package.json | 7 +- scripts/class-conformance.mjs | 190 ++++++++++++++++++++++++++++++++++ 2 files changed, 194 insertions(+), 3 deletions(-) create mode 100644 scripts/class-conformance.mjs diff --git a/package.json b/package.json index f8f118e4..1952fa1a 100644 --- a/package.json +++ b/package.json @@ -4,7 +4,7 @@ "private": true, "type": "module", "packageManager": "pnpm@10.32.1", - "description": "KERN — backend structure and portable route logic for TypeScript/Express and Python/FastAPI parity.", + "description": "KERN \u2014 backend structure and portable route logic for TypeScript/Express and Python/FastAPI parity.", "author": "cukas", "repository": { "type": "git", @@ -18,7 +18,7 @@ "test:non-semantics": "pnpm -r --filter '!kern-monorepo' --filter '!@kernlang/review-python' test --testPathIgnorePatterns=ir-semantics && pnpm test:prepush && pnpm check:rule-coverage", "check:rule-coverage": "node ./scripts/check-rule-coverage.mjs", "check:python-codegen": "pnpm --filter @kernlang/core --filter @kernlang/python build && node ./scripts/lift-rate-python.mjs --check", - "check:conformance": "pnpm --filter @kernlang/core --filter @kernlang/python --filter @kernlang/express build && node ./scripts/conformance.mjs", + "check:conformance": "pnpm --filter @kernlang/core --filter @kernlang/python --filter @kernlang/express build && node ./scripts/conformance.mjs && node ./scripts/class-conformance.mjs", "docs:contracts": "pnpm --filter @kernlang/core build && node ./scripts/generate-ir-semantics-docs.mjs --format=markdown --out=-", "docs:contracts:json": "pnpm --filter @kernlang/core build && node ./scripts/generate-ir-semantics-docs.mjs --format=json --out=generated/contracts/registry.json", "docs:contracts:check": "pnpm --filter @kernlang/core build && node ./scripts/check-contract-docs.mjs", @@ -32,7 +32,8 @@ "lint:fix": "biome check --fix", "format": "biome format --write", "prepush": "node ./scripts/pre-push.mjs", - "prepare": "node ./scripts/install-git-hooks.mjs" + "prepare": "node ./scripts/install-git-hooks.mjs", + "check:class-conformance": "pnpm --filter @kernlang/core --filter @kernlang/python build && node ./scripts/class-conformance.mjs" }, "pnpm": { "onlyBuiltDependencies": [ diff --git a/scripts/class-conformance.mjs b/scripts/class-conformance.mjs new file mode 100644 index 00000000..e8a2e7de --- /dev/null +++ b/scripts/class-conformance.mjs @@ -0,0 +1,190 @@ +/** + * Class differential conformance — KERN single-source class parity. + * + * Each fixture is a self-contained KERN module: a class (or class hierarchy) + * plus a zero-arg `fn probe` that exercises it. The module is compiled through + * BOTH codegen paths (core -> TypeScript, python -> pure Python), each driver + * calls `probe()` and prints its JSON-normalized return, and we assert + * ts == python == expected. This proves class behavior is identical across + * targets BY CONSTRUCTION (both derive from one definition), not by hand-diffing + * two emitters. + * + * Scope: portable probes only (number/string ops). List mutation needs a + * portable list-append lowering and is exercised separately (unit tests prove + * the instance-field-default isolation directly). + * + * Run: node scripts/class-conformance.mjs (or via `pnpm check:class-conformance`) + */ + +import { execFileSync } from 'node:child_process'; +import { mkdtempSync, rmSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { dirname, join } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const REPO = dirname(dirname(fileURLToPath(import.meta.url))); +const { parse, generateCoreNode } = await import(join(REPO, 'packages/core/dist/index.js')); +const { generatePythonCoreNode } = await import(join(REPO, 'packages/python/dist/codegen-python.js')); +const tsCompiler = await import('typescript'); + +const FIXTURES = [ + { + name: 'construction + fields + method', + kern: `class name=Point export=true + field name=x type=number + field name=y type=number + constructor + param name=x type=number + param name=y type=number + handler + assign target="this.x" value="x" + assign target="this.y" value="y" + method name=sum returns=number + handler + return value="this.x + this.y" +fn name=probe returns=number + handler + return value="new Point(3, 4).sum()"`, + expected: 7, + }, + { + name: 'single inheritance + super constructor + super method', + kern: `class name=Animal export=true + field name=name type=string + constructor + param name=name type=string + handler + assign target="this.name" value="name" + method name=describe returns=string + handler + return value="\`\${this.name} is an animal\`" +class name=Dog extends=Animal export=true + constructor + param name=name type=string + handler + do value="super(name)" + method name=describe returns=string + handler + return value="\`\${super.describe()} (a dog)\`" +fn name=probe returns=string + handler + return value="new Dog(\\"Rex\\").describe()"`, + expected: 'Rex is an animal (a dog)', + }, + { + name: 'instance getter', + kern: `class name=Person export=true + field name=first type=string + field name=last type=string + constructor + param name=first type=string + param name=last type=string + handler + assign target="this.first" value="first" + assign target="this.last" value="last" + getter name=full returns=string + handler + return value="\`\${this.first} \${this.last}\`" +fn name=probe returns=string + handler + return value="new Person(\\"Ada\\", \\"Lovelace\\").full"`, + expected: 'Ada Lovelace', + }, + { + name: 'static method', + kern: `class name=MathBox export=true + method name=double static=true returns=number + param name=n type=number + handler + return value="n * 2" +fn name=probe returns=number + handler + return value="MathBox.double(21)"`, + expected: 42, + }, + { + name: 'instance field default (read, no constructor)', + kern: `class name=Config export=true + field name=mode type=string value={{ "dev" }} +fn name=probe returns=string + handler + return value="new Config().mode"`, + expected: 'dev', + }, + { + name: 'getter + setter + field default round-trip', + kern: `class name=Cell export=true + field name=v type=number value={{ 0 }} + getter name=value returns=number + handler + return value="this.v" + setter name=value + param name=next type=number + handler + assign target="this.v" value="next" +fn name=probe returns=number + handler + let name=c value="new Cell()" + assign target="c.value" value="9" + return value="c.value"`, + expected: 9, + }, +]; + +const canon = (v) => JSON.stringify(v); + +const dir = mkdtempSync(join(tmpdir(), 'kern-class-conf-')); +process.on('exit', () => { + try { + rmSync(dir, { recursive: true, force: true }); + } catch { + // best-effort tmp cleanup — never fail the run on it + } +}); + +let pass = 0; +const failures = []; + +for (let i = 0; i < FIXTURES.length; i++) { + const fx = FIXTURES[i]; + try { + const root = parse(fx.kern); + // A single top-level decl parses as the node itself; multiple decls wrap in a root. + const topNodes = root.type === 'class' || root.type === 'fn' ? [root] : (root.children ?? []); + + // TypeScript module + const tsSource = `${topNodes.map((n) => generateCoreNode(n).join('\n')).join('\n\n')}\nconsole.log(JSON.stringify(probe()));`; + const tsFile = join(dir, `mod-${i}.mjs`); + writeFileSync( + tsFile, + tsCompiler.transpileModule(tsSource, { + compilerOptions: { module: tsCompiler.ModuleKind.ESNext, target: tsCompiler.ScriptTarget.ES2022 }, + }).outputText, + ); + + // Python module + const pySource = `import json\n${topNodes.map((n) => generatePythonCoreNode(n).join('\n')).join('\n\n')}\nprint(json.dumps(probe()))`; + const pyFile = join(dir, `mod-${i}.py`); + writeFileSync(pyFile, pySource); + + const opts = { encoding: 'utf8', timeout: 10_000 }; + const tsOut = JSON.parse(execFileSync('node', [tsFile], opts).trim()); + const pyOut = JSON.parse(execFileSync('python3', [pyFile], opts).trim()); + + if (canon(tsOut) === canon(fx.expected) && canon(pyOut) === canon(fx.expected)) { + pass++; + } else { + failures.push({ name: fx.name, expected: fx.expected, ts: tsOut, py: pyOut }); + } + } catch (err) { + failures.push({ name: fx.name, error: err?.stderr?.toString?.() || err?.message || String(err) }); + } +} + +console.log(`Class conformance: ${pass}/${FIXTURES.length} fixtures passed (ts == python == expected)`); +for (const f of failures) { + if (f.error) console.error(` FAIL ${f.name}: ${f.error}`); + else console.error(` FAIL ${f.name}: expected ${canon(f.expected)} | ts ${canon(f.ts)} | py ${canon(f.py)}`); +} +if (failures.length > 0) process.exit(1); +console.log('All passed.'); From bba9db8105a5f7e2abb881a0a36a9c20131e14eb Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 11:13:47 +0200 Subject: [PATCH 45/46] feat(python): static accessors via per-class metaclass (with chaining) --- packages/python/src/generators/data.ts | 70 ++++++++++++++++++++------ 1 file changed, 55 insertions(+), 15 deletions(-) diff --git a/packages/python/src/generators/data.ts b/packages/python/src/generators/data.ts index d620922f..242da074 100644 --- a/packages/python/src/generators/data.ts +++ b/packages/python/src/generators/data.ts @@ -24,7 +24,7 @@ import { mapTsTypeToPython, toSnakeCase } from '../type-map.js'; * imports: empty }`. */ function methodBodyCodePython( method: IRNode, - opts?: { classBody?: boolean; isConstructor?: boolean }, + opts?: { classBody?: boolean; isConstructor?: boolean; staticReceiver?: boolean }, ): { code: string; imports: Set; helpers: Set } { const handler = getFirstChild(method, 'handler'); if (!handler || getProps(handler).lang !== 'kern') { @@ -59,7 +59,8 @@ function methodBodyCodePython( } // Class member bodies: `this` resolves to `self`, and `super(...)`/`super.x` // lower to `super().__init__(...)`/`super().x` via the inClassBody flag. - if (opts?.classBody) symbolMap.this = 'self'; + // In a static accessor (metaclass property) body `this` is the class -> `cls`. + if (opts?.classBody) symbolMap.this = opts?.staticReceiver ? 'cls' : 'self'; const { code, imports, helpers } = emitNativeKernBodyPythonWithImports(handler, { symbolMap, inClassBody: opts?.classBody ?? false, @@ -74,7 +75,10 @@ function methodBodyCodePython( * scope absorbs them, and Python caches modules after first import. * Returns the indented lines (4-space prefix) ready to push into the * enclosing class definition. Empty body yields a single `pass`. */ -function methodBodyLinesPython(method: IRNode, opts?: { classBody?: boolean; isConstructor?: boolean }): string[] { +function methodBodyLinesPython( + method: IRNode, + opts?: { classBody?: boolean; isConstructor?: boolean; staticReceiver?: boolean }, +): string[] { const { code, imports, helpers } = methodBodyCodePython(method, opts); const lines: string[] = []; for (const mod of [...imports].sort()) { @@ -497,7 +501,6 @@ export function generatePythonClass(node: IRNode): string[] { const name = emitIdentifier(props.name as string, 'UnknownClass', node); const baseRaw = typeof props.extends === 'string' ? (props.extends as string) : ''; const base = baseRaw ? emitIdentifier(baseRaw, 'object', node) : ''; - const header = base ? `class ${name}(${base}):` : `class ${name}:`; const isStatic = (n: IRNode): boolean => { const np = p(n); @@ -511,6 +514,49 @@ export function generatePythonClass(node: IRNode): string[] { const setters = kids(node, 'setter'); const ctor = firstChild(node, 'constructor'); + // Static accessors (static get/set) lower to a per-class metaclass: both + // `Box.label` reads and `Box.label = x` writes dispatch through the metaclass + // @property/.setter (a plain descriptor would be shadowed on assignment). The + // static backing field stays a class attribute. The metaclass extends + // `type()` so that when the base ALSO has static accessors the derived + // metaclass subclasses the base metaclass (no `metaclass conflict`, and the + // base's static accessors are inherited); when the base has none, `type()` + // is just `type`. + const staticGetters = getters.filter(isStatic); + const staticSetters = setters.filter(isStatic); + const metaName = `_${name}Meta`; + const metaLines: string[] = []; + if (staticGetters.length + staticSetters.length > 0) { + metaLines.push(`class ${metaName}(${base ? `type(${base})` : 'type'}):`); + const metaGetterNames = new Set(); + for (const g of staticGetters) { + const gp = p(g); + const gname = toSnakeCase((gp.name as string) || 'prop'); + const returns = gp.returns ? ` -> ${mapTsTypeToPython(gp.returns as string)}` : ''; + metaGetterNames.add(gname); + metaLines.push(' @property'); + metaLines.push(` def ${gname}(cls)${returns}:`); + metaLines.push(...methodBodyLinesPython(g, { classBody: true, staticReceiver: true })); + metaLines.push(''); + } + for (const s of staticSetters) { + const sname = toSnakeCase((p(s).name as string) || 'prop'); + if (!metaGetterNames.has(sname)) { + metaLines.push(' @property'); + metaLines.push(` def ${sname}(cls): # write-only static property`); + metaLines.push(' return None'); + metaLines.push(''); + metaGetterNames.add(sname); + } + metaLines.push(` @${sname}.setter`); + metaLines.push(` def ${sname}(cls, ${buildPythonParamList(s, { selfPrefix: false })}):`); + metaLines.push(...methodBodyLinesPython(s, { classBody: true, staticReceiver: true })); + metaLines.push(''); + } + } + const baseParts = [base, metaLines.length > 0 ? `metaclass=${metaName}` : ''].filter(Boolean); + const header = baseParts.length > 0 ? `class ${name}(${baseParts.join(', ')}):` : `class ${name}:`; + const body: string[] = []; // Static fields -> class-level attributes (shared across instances, like TS statics). @@ -575,16 +621,12 @@ export function generatePythonClass(node: IRNode): string[] { body.push(''); } - // Getters -> @property. Static accessors need a metaclass/classmethod-property - // and are a follow-up; skip them with a marker rather than emit broken code. + // Getters -> @property. Static getters were already emitted on the metaclass. const instanceGetterNames = new Set(); for (const g of getters) { + if (isStatic(g)) continue; const gp = p(g); const gname = toSnakeCase((gp.name as string) || 'prop'); - if (isStatic(g)) { - body.push(` # static getter '${gname}' is not yet supported on the Python target`); - continue; - } instanceGetterNames.add(gname); const returns = gp.returns ? ` -> ${mapTsTypeToPython(gp.returns as string)}` : ''; body.push(' @property'); @@ -596,12 +638,9 @@ export function generatePythonClass(node: IRNode): string[] { // `.setter`; KERN allows setter-only properties, so synthesize a getter when // none was declared (write-only -> returns None, matching a TS getter-less read). for (const s of setters) { + if (isStatic(s)) continue; // static setters were already emitted on the metaclass const sp = p(s); const sname = toSnakeCase((sp.name as string) || 'prop'); - if (isStatic(s)) { - body.push(` # static setter '${sname}' is not yet supported on the Python target`); - continue; - } if (!instanceGetterNames.has(sname)) { body.push(' @property'); body.push(` def ${sname}(self): # write-only property (no getter declared in KERN)`); @@ -617,7 +656,8 @@ export function generatePythonClass(node: IRNode): string[] { if (body.length === 0) body.push(' pass'); - return [header, ...body]; + // Metaclass (if any) must be defined before the class that references it. + return metaLines.length > 0 ? [...metaLines, header, ...body] : [header, ...body]; } // ── Union (Pydantic Discriminated Union) ──────────────────────────────── From 9f8615d32166453c5016b043f8224091c3fbdbe3 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 11:13:47 +0200 Subject: [PATCH 46/46] test(python): static-accessor metaclass + inheritance conformance --- packages/python/tests/class-python.test.ts | 18 +++++++-- scripts/class-conformance.mjs | 43 ++++++++++++++++++++++ 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/packages/python/tests/class-python.test.ts b/packages/python/tests/class-python.test.ts index c3cc0b3b..9b54e0e3 100644 --- a/packages/python/tests/class-python.test.ts +++ b/packages/python/tests/class-python.test.ts @@ -102,7 +102,7 @@ describe('Python class codegen (single-source class slice)', () => { expect(code).toContain('@items.setter'); }); - test('static accessors are skipped (not emitted as broken instance @property)', () => { + test('static accessors lower to a per-class metaclass property (this -> cls)', () => { const reg: IRNode = { type: 'class', props: { name: 'Reg' }, @@ -110,13 +110,25 @@ describe('Python class codegen (single-source class slice)', () => { { type: 'getter', props: { name: 'label', static: 'true', returns: 'string' }, - children: [handler([{ type: 'return', props: { value: '"x"' }, children: [] }])], + children: [handler([{ type: 'return', props: { value: 'this.store' }, children: [] }])], + }, + { + type: 'setter', + props: { name: 'label', static: 'true' }, + children: [ + param('v', 'string'), + handler([{ type: 'assign', props: { target: 'this.store', value: 'v' }, children: [] }]), + ], }, ], }; const code = generatePythonClass(reg).join('\n'); + expect(code).toContain('class _RegMeta(type):'); + expect(code).toContain('class Reg(metaclass=_RegMeta):'); + expect(code).toContain('def label(cls) -> str:'); + expect(code).toContain('return cls.store'); // this -> cls inside a static accessor + expect(code).toContain('@label.setter'); expect(code).not.toContain('def label(self)'); - expect(code).toContain("static getter 'label'"); }); test('instance-field defaults emit in __init__, never as a shared class attr', () => { diff --git a/scripts/class-conformance.mjs b/scripts/class-conformance.mjs index e8a2e7de..36e93643 100644 --- a/scripts/class-conformance.mjs +++ b/scripts/class-conformance.mjs @@ -129,6 +129,49 @@ fn name=probe returns=number return value="c.value"`, expected: 9, }, + { + name: 'static accessor read + write round-trip', + kern: `class name=Counter export=true + field name=_count type=number static=true value={{ 0 }} + getter name=count static=true returns=number + handler + return value="this._count" + setter name=count static=true + param name=v type=number + handler + assign target="this._count" value="v" +fn name=probe returns=number + handler + assign target="Counter.count" value="Counter.count + 5" + assign target="Counter.count" value="Counter.count + 5" + return value="Counter.count"`, + expected: 10, + }, + { + name: 'inherited + overridden static accessor (metaclass chaining)', + kern: `class name=Base export=true + field name=_val type=number static=true value={{ 0 }} + getter name=val static=true returns=number + handler + return value="this._val" + setter name=val static=true + param name=v type=number + handler + assign target="this._val" value="v" +class name=Derived extends=Base export=true + getter name=val static=true returns=number + handler + return value="this._val * 2" + setter name=val static=true + param name=v type=number + handler + assign target="this._val" value="v + 1" +fn name=probe returns=number + handler + assign target="Derived.val" value="5" + return value="Derived.val"`, + expected: 12, + }, ]; const canon = (v) => JSON.stringify(v);