From be94b7e3ad656e9de84ce8ee55f68d7809540372 Mon Sep 17 00:00:00 2001 From: Nico Date: Sat, 6 Jun 2026 17:21:07 +0200 Subject: [PATCH 01/63] Feat/portable logic foundation (#386) * Add portable expression logic foundation * Format portable expression foundation changes --- generated/contracts/registry.json | 17 ++ packages/core/src/codegen/body-ts.ts | 69 +++++++++ .../src/codegen/portable-logic-primitives.ts | 46 +++++- packages/core/src/index.ts | 4 + .../core/src/ir/semantics/expression-v1.ts | 145 ++++++++++++++++++ .../core/src/ir/semantics/portable-scalar.ts | 26 ++++ .../core/src/ir/semantics/register-all.ts | 2 + packages/core/src/ir/semantics/ts-leg.ts | 7 +- packages/core/src/node-props.ts | 6 + packages/core/src/parser-core.ts | 4 + .../src/parser-validate-body-statements.ts | 3 +- packages/core/src/schema.ts | 23 +++ packages/core/src/spec.ts | 1 + .../tests/ir-semantics-expression-v1.test.ts | 116 ++++++++++++++ packages/core/tests/native-handlers.test.ts | 104 +++++++++++++ .../tests/portable-logic-primitives.test.ts | 20 ++- packages/python/src/codegen-body-python.ts | 94 +++++++++++- packages/python/src/core/expr/helpers.ts | 2 + .../python/src/ir-semantics/python-leg.ts | 7 +- .../tests/ir-semantics-python-leg.test.ts | 29 ++++ .../tests/native-handlers-python.test.ts | 113 ++++++++++++++ .../native-handlers-slice2-python.test.ts | 2 +- scripts/conformance.mjs | 17 +- 23 files changed, 841 insertions(+), 16 deletions(-) create mode 100644 packages/core/src/ir/semantics/expression-v1.ts create mode 100644 packages/core/tests/ir-semantics-expression-v1.test.ts diff --git a/generated/contracts/registry.json b/generated/contracts/registry.json index 77e33c04..67b4095c 100644 --- a/generated/contracts/registry.json +++ b/generated/contracts/registry.json @@ -100,6 +100,23 @@ } ] }, + { + "nodeType": "expression-v1", + "forbiddenRewrites": [], + "fixtureCount": 11, + "fixtureSamples": [ + { + "description": "expression-v1: number scalar", + "expectedCompletionKind": "normal", + "expectedEventCount": 1 + }, + { + "description": "expression-v1: truthiness basic", + "expectedCompletionKind": "normal", + "expectedEventCount": 1 + } + ] + }, { "nodeType": "fmt", "forbiddenRewrites": [ diff --git a/packages/core/src/codegen/body-ts.ts b/packages/core/src/codegen/body-ts.ts index 46f985b1..3be84e3f 100644 --- a/packages/core/src/codegen/body-ts.ts +++ b/packages/core/src/codegen/body-ts.ts @@ -51,6 +51,7 @@ import type { ExprObject, IRNode } from '../types.js'; import type { ValueIR } from '../value-ir.js'; import { emitFmtTemplate, emitIdentifier, emitTypeAnnotation } from './emitters.js'; import { emitStringKeyArray, parseKeys } from './ground-layer.js'; +import { emitParamList } from './type-system.js'; /** Slice 3e — caller-provided options, parity with the Python body emitter. * `symbolMap` is currently unused on the TS target; reserved for future @@ -154,6 +155,7 @@ export function emitNativeKernBodyTSWithImports(handlerNode: IRNode, options?: B * try/each) emit multiple lines and never receive the slot. */ const TRAILING_COMMENT_TYPES = new Set([ 'let', + 'expression-v1', 'assign', 'fmt', 'clamp', @@ -190,6 +192,10 @@ function emitChildrenTS( for (const line of emitSetTS(child, ctx)) lines.push(`${indent}${line}`); } else if (child.type === 'let') { for (const line of emitLetTS(child, ctx)) lines.push(`${indent}${line}`); + } else if (child.type === 'expression-v1') { + for (const line of emitExpressionV1TS(child, ctx)) lines.push(`${indent}${line}`); + } else if (child.type === 'fn') { + for (const line of emitFnTS(child, ctx, indent)) lines.push(line); } else if (child.type === 'assign') { for (const line of emitAssignTS(child, ctx)) lines.push(`${indent}${line}`); } else if (child.type === 'destructure') { @@ -1312,3 +1318,66 @@ function emitFmtTS(node: IRNode, ctx: BodyEmitContext): string[] { if (ctx.traceHooks?.letAssign) lines.push(letAssignTraceTS(name)); return lines; } + +function emitExpressionV1TS(node: IRNode, ctx: BodyEmitContext): string[] { + const props = (node.props ?? {}) as Record; + const name = String(props.name ?? ''); + if (!name) throw new Error('body-statement `expression-v1` requires `name=`.'); + const typeAnn = props.type ? `: ${emitTypeAnnotation(String(props.type), 'unknown', node)}` : ''; + const rawExpr = props.expr; + const exprSource = unwrapBodyExpr(rawExpr); + if (exprSource === undefined || exprSource === '') { + throw new Error('body-statement `expression-v1` requires `expr=`.'); + } + const exprIR = parseExpression(exprSource); + declareLocalBinding(ctx, name, 'const'); + const lines = [`const ${name}${typeAnn} = ${emitExpression(exprIR)};`]; + if (ctx.traceHooks?.letAssign) lines.push(letAssignTraceTS(name)); + return lines; +} + +function emitFnTS(node: IRNode, ctx: BodyEmitContext, indent: string): string[] { + const props = (node.props ?? {}) as Record; + const name = String(props.name ?? ''); + if (!name) throw new Error('body-statement `fn` requires `name=`.'); + declareLocalBinding(ctx, name, 'const'); + + const isAsync = props.async === 'true' || props.async === true; + const asyncKw = isAsync ? 'async ' : ''; + const returns = props.returns ? emitTypeAnnotation(String(props.returns), 'unknown', node) : ''; + const returnType = returns && isAsync && !/^Promise\s*` : returns; + const retClause = returnType ? `: ${returnType}` : ''; + if (props.params && node.children?.some((c) => c.type === 'param')) { + throw new Error('body-statement `fn` cannot mix legacy `params=` with structured `param` children.'); + } + const paramList = emitParamList(node); + + const lines: string[] = []; + lines.push(`${indent}${asyncKw}function ${name}(${paramList})${retClause} {`); + + const handlerNode = node.children?.find((c) => c.type === 'handler'); + const bodyNodes = handlerNode ? (handlerNode.children ?? []) : (node.children ?? []); + const stmtNodes = bodyNodes.filter((c) => c.type !== 'param' && c.type !== 'decorator'); + + for (const sl of emitChildrenTS(stmtNodes, ctx, indent + INDENT_STEP, paramBindingsFromSignature(paramList))) { + lines.push(sl); + } + lines.push(`${indent}}`); + return lines; +} + +function paramBindingsFromSignature(paramList: string): Array<[string, 'const']> { + if (!paramList.trim()) return []; + return splitBodyExpressionList(paramList, 'fn params=') + .map( + (part) => + part + .split('=')[0] + ?.split(':')[0] + ?.trim() + .replace(/^\.\.\./, '') + .replace(/\?$/, '') ?? '', + ) + .filter((name) => /^[A-Za-z_$][\w$]*$/.test(name)) + .map((name) => [name, 'const']); +} diff --git a/packages/core/src/codegen/portable-logic-primitives.ts b/packages/core/src/codegen/portable-logic-primitives.ts index 96890a46..cde1a919 100644 --- a/packages/core/src/codegen/portable-logic-primitives.ts +++ b/packages/core/src/codegen/portable-logic-primitives.ts @@ -30,6 +30,7 @@ export type PortableLogicPrimitiveId = | 'collection.indexBy' | 'collection.countBy' | 'logic.firstTruthy' + | 'logic.coalesce' | 'time.epochMs' | 'logic.not' | 'number.clamp' @@ -42,9 +43,12 @@ export type PortableLogicPrimitiveId = | 'string.trim' | 'string.split' | 'string.replaceFirst' - | 'string.replaceAll'; + | 'string.replaceAll' + | 'logic.firstDefined' + | 'string.coerce'; export type PortableLogicTarget = 'ts' | 'python' | 'go'; export type PortableLogicSupport = 'stable' | 'preview' | 'unsupported'; +export type GoPortableLogicSupport = 'preview' | 'unsupported'; export type PortableLogicPurity = 'pure' | 'reads-time'; export type PortableLogicIntent = 'semantic-gap' | 'host-pattern' | 'language-operator'; @@ -56,7 +60,11 @@ export interface PortableLogicPrimitive { hostPatterns: readonly string[]; portabilityNotes: readonly string[]; operatorRationale?: string; - targets: Record; + targets: { + ts: PortableLogicSupport; + python: PortableLogicSupport; + go: GoPortableLogicSupport; + }; } export const PORTABLE_LOGIC_PRIMITIVES = { @@ -277,6 +285,16 @@ export const PORTABLE_LOGIC_PRIMITIVES = { operatorRationale: 'KERN firstTruthy names this common fallback operator chain as portable intent.', targets: { ts: 'stable', python: 'stable', go: 'unsupported' }, }, + 'logic.coalesce': { + id: 'logic.coalesce', + description: 'Ordered nullish fallback selection that preserves false, zero, and empty string.', + purity: 'pure', + intent: 'language-operator', + hostPatterns: ['a ?? b ?? c'], + portabilityNotes: ['Uses null/None-only fallback; undefined is normalized to null only at target boundaries.'], + operatorRationale: 'KERN coalesce names the portable nullish fallback operator chain for body and route lowering.', + targets: { ts: 'stable', python: 'stable', go: 'unsupported' }, + }, 'time.epochMs': { id: 'time.epochMs', description: 'Epoch-milliseconds extraction from a date/time value, e.g. JS new Date(x).getTime().', @@ -399,6 +417,29 @@ export const PORTABLE_LOGIC_PRIMITIVES = { portabilityNotes: ['Replacement callbacks, regex searches, and substitution-token replacements are excluded.'], targets: { ts: 'stable', python: 'stable', go: 'unsupported' }, }, + 'logic.firstDefined': { + id: 'logic.firstDefined', + description: 'First defined (non-null/non-undefined) value selection.', + purity: 'pure', + intent: 'language-operator', + hostPatterns: ['a ?? b'], + portabilityNotes: ['Returns the first value that is not null or undefined.'], + operatorRationale: 'Names the nullish coalescing fallback intent.', + targets: { ts: 'stable', python: 'stable', go: 'unsupported' }, + }, + 'string.coerce': { + id: 'string.coerce', + description: 'Portable scalar-to-string coercion for null, booleans, strings, and numbers.', + purity: 'pure', + intent: 'language-operator', + hostPatterns: ['String(value)', '_kern_fmt(value)'], + portabilityNotes: [ + 'Null becomes "null", booleans use lowercase spelling, strings pass through, and numbers use JS decimal text.', + ], + operatorRationale: + 'String coercion is a host operator in TS/Python; KERN documents the expression-v1 subset explicitly.', + targets: { ts: 'stable', python: 'stable', go: 'unsupported' }, + }, } as const satisfies Record; export function validatePortableLogicPrimitiveRegistry( @@ -410,6 +451,7 @@ export function validatePortableLogicPrimitiveRegistry( } const idSegments = id.split('.').map((segment) => segment.toLowerCase()); if ( + id !== 'logic.coalesce' && idSegments.some((segment) => segment === 'nullish' || segment === 'coalesce' || segment === 'nullishcoalesce') ) { throw new Error(`Portable logic primitive '${id}' duplicates existing language nullish/coalesce syntax.`); diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 730cf7c3..d6b7b160 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -201,6 +201,10 @@ export { // TS → .kern importer export type { ImportResult } from './importer.js'; export { escapeKernString, importTypeScript } from './importer.js'; +export { + expressionV1Contract, + registerExpressionV1Contract, +} from './ir/semantics/expression-v1.js'; export type { LowerTarget } from './ir/semantics/fixture-lowering.js'; export { lowerFixtureForTarget, serializeValue } from './ir/semantics/fixture-lowering.js'; // IR runtime semantics — executable contracts + differential harness. diff --git a/packages/core/src/ir/semantics/expression-v1.ts b/packages/core/src/ir/semantics/expression-v1.ts new file mode 100644 index 00000000..166439d1 --- /dev/null +++ b/packages/core/src/ir/semantics/expression-v1.ts @@ -0,0 +1,145 @@ +/** + * `expression-v1` runtime semantics. + */ + +import { parseExpression } from '../../parser-expression.js'; +import { type IRNode, isExprObject } from '../../types.js'; +import { type NodeContract, type NodeFixture, registerContract, type SemanticEnv } from './index.js'; +import { evalPortableValue, isPortableBindingName } from './portable-scalar.js'; +import type { Trace } from './trace.js'; + +interface ExpressionV1Props { + name?: string; + expr?: unknown; +} + +function asExpressionV1Props(ir: IRNode): ExpressionV1Props { + return (ir.props ?? {}) as ExpressionV1Props; +} + +function expressionSource(expr: unknown): string | undefined { + if (expr === undefined || expr === null) return undefined; + if (isExprObject(expr)) return expr.code; + return String(expr); +} + +function expressionV1Preconditions(ir: IRNode, env: SemanticEnv): boolean { + const props = asExpressionV1Props(ir); + if (!isPortableBindingName(props.name)) return false; + if (env.bindings.has(props.name)) return false; + const expr = expressionSource(props.expr); + if (!Object.hasOwn(ir.props ?? {}, 'expr') || expr === undefined || expr === '') return false; + try { + evalPortableValue(parseExpression(expr), env); + return true; + } catch { + return false; + } +} + +function expressionV1Effects(ir: IRNode, env: SemanticEnv): Trace { + const props = asExpressionV1Props(ir); + const name = props.name as string; + const expr = expressionSource(props.expr); + if (expr === undefined || expr === '') { + throw new Error('expression-v1: missing expr'); + } + const value = evalPortableValue(parseExpression(expr), env); + env.bindings.set(name, value); + return { events: [{ op: 'assign', target: name, value }], completion: { kind: 'normal' } }; +} + +function expressionV1Completion() { + return { kind: 'normal' as const }; +} + +const FIXTURES: readonly NodeFixture[] = Object.freeze([ + { + description: 'expression-v1: number scalar', + ir: { type: 'expression-v1', props: { name: 'n', expr: '42' } }, + expected: { events: [{ op: 'assign', target: 'n', value: 42 }], completion: { kind: 'normal' } }, + }, + { + description: 'expression-v1: string scalar', + ir: { type: 'expression-v1', props: { name: 's', expr: '"hello"' } }, + expected: { events: [{ op: 'assign', target: 's', value: 'hello' }], completion: { kind: 'normal' } }, + }, + { + description: 'expression-v1: boolean scalar', + ir: { type: 'expression-v1', props: { name: 'b', expr: 'true' } }, + expected: { events: [{ op: 'assign', target: 'b', value: true }], completion: { kind: 'normal' } }, + }, + { + description: 'expression-v1: null scalar', + ir: { type: 'expression-v1', props: { name: 'nl', expr: 'null' } }, + expected: { events: [{ op: 'assign', target: 'nl', value: null }], completion: { kind: 'normal' } }, + }, + { + description: 'expression-v1: equality', + ir: { type: 'expression-v1', props: { name: 'eq', expr: 'x === y' } }, + env: { + bindings: new Map([ + ['x', 1], + ['y', 1], + ]), + }, + expected: { events: [{ op: 'assign', target: 'eq', value: true }], completion: { kind: 'normal' } }, + }, + { + description: 'expression-v1: truthiness basic', + ir: { type: 'expression-v1', props: { name: 'truth', expr: '!x' } }, + env: { bindings: new Map([['x', '']]) }, + expected: { events: [{ op: 'assign', target: 'truth', value: true }], completion: { kind: 'normal' } }, + }, + { + description: 'expression-v1: template literal string coercion', + ir: { type: 'expression-v1', props: { name: 'res', expr: '`n=${n}`' } }, + env: { bindings: new Map([['n', 100]]) }, + expected: { events: [{ op: 'assign', target: 'res', value: 'n=100' }], completion: { kind: 'normal' } }, + }, + { + description: 'expression-v1: String coercion constructor call', + ir: { type: 'expression-v1', props: { name: 'res', expr: 'String(n)' } }, + env: { bindings: new Map([['n', 100]]) }, + expected: { events: [{ op: 'assign', target: 'res', value: '100' }], completion: { kind: 'normal' } }, + }, + { + description: 'expression-v1: String coercion canonicalizes null', + ir: { type: 'expression-v1', props: { name: 'res', expr: 'String(n)' } }, + env: { bindings: new Map([['n', null]]) }, + expected: { events: [{ op: 'assign', target: 'res', value: 'null' }], completion: { kind: 'normal' } }, + }, + { + description: 'expression-v1: String coercion canonicalizes boolean', + ir: { type: 'expression-v1', props: { name: 'res', expr: 'String(flag)' } }, + env: { bindings: new Map([['flag', false]]) }, + expected: { events: [{ op: 'assign', target: 'res', value: 'false' }], completion: { kind: 'normal' } }, + }, + { + description: 'expression-v1: ExprObject expression prop', + ir: { type: 'expression-v1', props: { name: 'res', expr: { __expr: true, code: 'n + 1' } } }, + env: { bindings: new Map([['n', 41]]) }, + expected: { events: [{ op: 'assign', target: 'res', value: 42 }], completion: { kind: 'normal' } }, + }, +]); + +export const expressionV1Contract: NodeContract = { + nodeType: 'expression-v1', + preconditions: expressionV1Preconditions, + effects: expressionV1Effects, + completion: expressionV1Completion, + forbiddenRewrites: [], + fixtures: FIXTURES, +}; + +let registered = false; + +export function registerExpressionV1Contract(): void { + if (registered) return; + registerContract(expressionV1Contract); + registered = true; +} + +export function _resetExpressionV1ContractForTest(): void { + registered = false; +} diff --git a/packages/core/src/ir/semantics/portable-scalar.ts b/packages/core/src/ir/semantics/portable-scalar.ts index 7696c14d..83b099dd 100644 --- a/packages/core/src/ir/semantics/portable-scalar.ts +++ b/packages/core/src/ir/semantics/portable-scalar.ts @@ -128,11 +128,37 @@ export function evalPortableValue(node: ValueIR, env: SemanticEnv): PortableScal case 'typeAssert': case 'nonNull': return evalPortableValue(node.expression, env); + case 'tmplLit': { + let result = ''; + for (let i = 0; i < node.quasis.length; i++) { + result += node.quasis[i]; + if (i < node.expressions.length) { + const val = evalPortableValue(node.expressions[i], env); + result += coerceToString(val); + } + } + return result; + } + case 'call': { + if (node.callee.kind === 'ident' && node.callee.name === 'String') { + if (node.args.length !== 1) { + throw new Error('portable: String() expects exactly 1 argument'); + } + const val = evalPortableValue(node.args[0], env); + return coerceToString(val); + } + throw new Error(`portable: unsupported call to "${node.callee.kind === 'ident' ? node.callee.name : 'unknown'}"`); + } default: throw new Error(`portable: expression kind "${node.kind}" is outside the portable scalar domain`); } } +export function coerceToString(val: PortableScalar): string { + if (val === null) return 'null'; + return String(val); +} + export function evalPortableBinary(node: Extract, env: SemanticEnv): PortableScalar { if (node.op === '&&') { const left = evalPortableValue(node.left, env); diff --git a/packages/core/src/ir/semantics/register-all.ts b/packages/core/src/ir/semantics/register-all.ts index 657f3a7a..73885f7e 100644 --- a/packages/core/src/ir/semantics/register-all.ts +++ b/packages/core/src/ir/semantics/register-all.ts @@ -16,6 +16,7 @@ import { registerAssignContract } from './assign.js'; import { registerBranchContract } from './branch.js'; import { registerEachContract } from './each.js'; +import { registerExpressionV1Contract } from './expression-v1.js'; import { registerFmtContract } from './fmt.js'; import { registerForContract } from './for.js'; import { registerIfContract } from './if.js'; @@ -37,4 +38,5 @@ export function registerAllContracts(): void { registerFmtContract(); registerWhileContract(); registerTryContract(); + registerExpressionV1Contract(); } diff --git a/packages/core/src/ir/semantics/ts-leg.ts b/packages/core/src/ir/semantics/ts-leg.ts index 3fe3f5fc..c1ae122d 100644 --- a/packages/core/src/ir/semantics/ts-leg.ts +++ b/packages/core/src/ir/semantics/ts-leg.ts @@ -175,17 +175,20 @@ export async function runTsEmitterLeg(fixture: FixtureForLeg, env: SemanticEnv): } function shouldTraceLetAssign(ir: IRNode): boolean { - // `let` (declaration), `assign` (reassignment), and `fmt` (formatted binding) - // observe their binding write through the same `{op:"assign"}` trace hook. + // `let` (declaration), `expression-v1`, `assign` (reassignment), and `fmt` + // (formatted binding) observe their binding write through the same + // `{op:"assign"}` trace hook. // `while` fixtures opt in too: their counter setup/advance (let + assign in // body) must emit the same assign events the reference produces. const contract = ir.props?.__semanticContract; const t = ir.type; return ( t === 'let' || + t === 'expression-v1' || t === 'assign' || t === 'fmt' || contract === 'let' || + contract === 'expression-v1' || contract === 'assign' || contract === 'fmt' || contract === 'while' diff --git a/packages/core/src/node-props.ts b/packages/core/src/node-props.ts index ab8f7059..aab6a40a 100644 --- a/packages/core/src/node-props.ts +++ b/packages/core/src/node-props.ts @@ -72,6 +72,11 @@ export interface LetProps extends BaseProps { kind?: string; } +export interface ExpressionV1Props extends BaseProps { + expr?: string | ExprObject; + type?: string; +} + export interface IndexerProps extends BaseProps { keyName?: string; keyType?: string; @@ -850,6 +855,7 @@ export interface NodePropsMap { use: UseProps; from: FromProps; let: LetProps; + 'expression-v1': ExpressionV1Props; indexer: IndexerProps; overload: OverloadProps; service: ServiceProps; diff --git a/packages/core/src/parser-core.ts b/packages/core/src/parser-core.ts index 2ebe93d5..f4bae5a5 100644 --- a/packages/core/src/parser-core.ts +++ b/packages/core/src/parser-core.ts @@ -854,7 +854,9 @@ function isNativeBodyStatementChild(node: IRNode): boolean { case 'cell': case 'set': case 'comment': + case 'fn': case 'let': + case 'expression-v1': case 'assign': case 'destructure': case 'do': @@ -893,7 +895,9 @@ function isNativeBodyStatementChild(node: IRNode): boolean { function isKernHandlerBodySignal(node: IRNode): boolean { switch (node.type) { case 'cell': + case 'fn': case 'let': + case 'expression-v1': case 'assign': case 'destructure': case 'do': diff --git a/packages/core/src/parser-validate-body-statements.ts b/packages/core/src/parser-validate-body-statements.ts index fc115cdb..8286950f 100644 --- a/packages/core/src/parser-validate-body-statements.ts +++ b/packages/core/src/parser-validate-body-statements.ts @@ -1,6 +1,6 @@ /** @internal Native KERN body-statement context validator — slice 5b-pre. * - * Body-statement nodes (`assign`, `return`, `throw`, `do`, `continue`, `break`, `while`, `for`, `with`, + * Body-statement nodes (`expression-v1`, `assign`, `return`, `throw`, `do`, `continue`, `break`, `while`, `for`, `with`, * body-form `if`/`else`, body-form `try`) are valid only inside a * `handler lang="kern"` scope (or nested inside another body-statement * under such a handler). Without this rule, the parser silently accepts @@ -213,6 +213,7 @@ function isBodyStatementMisplaced(node: IRNode, ctx: WalkContext): boolean { // lower recursively. Every other non-native context stays rejected. return !ctx.inPortableRoute; case 'cell': + case 'expression-v1': case 'return': case 'throw': case 'continue': diff --git a/packages/core/src/schema.ts b/packages/core/src/schema.ts index c0890594..e10b6ae4 100644 --- a/packages/core/src/schema.ts +++ b/packages/core/src/schema.ts @@ -601,6 +601,15 @@ export const NODE_SCHEMAS: Record = { effects: { kind: 'string' }, }, }, + 'expression-v1': { + description: 'Expression v1 evaluation node for TS/Python parity', + example: 'expression-v1 name=res expr="a === b"', + props: { + name: { required: true, kind: 'identifier' }, + expr: { required: true, kind: 'rawExpr' }, + type: { kind: 'typeAnnotation' }, + }, + }, fmt: { description: 'Formatted string — declarative template literal. The `template` body is emitted verbatim between backticks, so `${expr}` placeholders interpolate normally. Three positional modes: (1) binding form `fmt name=X template=...` emits `const X = \\`...\\`;` at the current scope; (2) return form `fmt return=true template=...` emits `return \\`...\\`;` inside a `fn` body (name must be omitted); (3) inline-JSX form `fmt template=...` (no name, no return=true) appears as a direct child of `render`/`group` and emits `{\\`...\\`}` as a JSX expression — use this to replace handler-wrapped `{\\`${x} files\\`}` text inside composed renders.', @@ -654,7 +663,9 @@ export const NODE_SCHEMAS: Record = { 'catch', 'finally', 'comment', + 'fn', 'let', + 'expression-v1', 'assign', 'destructure', 'do', @@ -701,7 +712,9 @@ export const NODE_SCHEMAS: Record = { allowedChildren: [ 'handler', 'comment', + 'fn', 'let', + 'expression-v1', 'assign', 'destructure', 'do', @@ -734,7 +747,9 @@ export const NODE_SCHEMAS: Record = { props: {}, allowedChildren: [ 'comment', + 'fn', 'let', + 'expression-v1', 'assign', 'do', 'fmt', @@ -1868,7 +1883,9 @@ export const NODE_SCHEMAS: Record = { 'cell', 'set', 'comment', + 'fn', 'let', + 'expression-v1', 'assign', 'destructure', 'do', @@ -1968,7 +1985,9 @@ export const NODE_SCHEMAS: Record = { }, allowedChildren: [ 'comment', + 'fn', 'let', + 'expression-v1', 'assign', 'destructure', 'do', @@ -2007,7 +2026,9 @@ export const NODE_SCHEMAS: Record = { }, allowedChildren: [ 'comment', + 'fn', 'let', + 'expression-v1', 'assign', 'destructure', 'do', @@ -2048,7 +2069,9 @@ export const NODE_SCHEMAS: Record = { }, allowedChildren: [ 'comment', + 'fn', 'let', + 'expression-v1', 'assign', 'destructure', 'do', diff --git a/packages/core/src/spec.ts b/packages/core/src/spec.ts index 46e2c875..fc777c31 100644 --- a/packages/core/src/spec.ts +++ b/packages/core/src/spec.ts @@ -344,6 +344,7 @@ export const NODE_TYPES = [ 'description', 'sampling', 'elicitation', + 'expression-v1', ] as const; export type IRNodeType = (typeof NODE_TYPES)[number]; diff --git a/packages/core/tests/ir-semantics-expression-v1.test.ts b/packages/core/tests/ir-semantics-expression-v1.test.ts new file mode 100644 index 00000000..88a1a485 --- /dev/null +++ b/packages/core/tests/ir-semantics-expression-v1.test.ts @@ -0,0 +1,116 @@ +/** + * Executable semantic contract for body-statement `expression-v1`. + * + * This contract pins the initial portable scalar expression subset used by + * TS/Python parity: null/bool/string/number scalars, scalar equality, + * truthiness, and KERN-canonical string coercion. + */ + +import { + CONTRACT_REGISTRY, + makeEnv, + ReferenceRunnerError, + referenceRun, + runDifferential, + type Verdict, +} from '../src/index.js'; +import { + _resetExpressionV1ContractForTest, + expressionV1Contract, + registerExpressionV1Contract, +} from '../src/ir/semantics/expression-v1.js'; +import { _resetPrimitivesForTest, registerPrimitives } from '../src/ir/semantics/primitives.js'; +import type { IRNode } from '../src/types.js'; + +beforeEach(() => { + CONTRACT_REGISTRY.clear(); + _resetExpressionV1ContractForTest(); + _resetPrimitivesForTest(); + registerPrimitives(); + registerExpressionV1Contract(); +}); + +afterEach(() => { + CONTRACT_REGISTRY.clear(); + _resetExpressionV1ContractForTest(); + _resetPrimitivesForTest(); +}); + +describe('expression-v1 contract — positive fixtures', () => { + it('exposes scalar, equality, truthiness, and string coercion coverage', () => { + expect(expressionV1Contract.fixtures.length).toBeGreaterThanOrEqual(10); + expect(expressionV1Contract.fixtures.map((f) => f.description)).toEqual( + expect.arrayContaining([ + expect.stringContaining('number scalar'), + expect.stringContaining('string scalar'), + expect.stringContaining('boolean scalar'), + expect.stringContaining('null scalar'), + expect.stringContaining('equality'), + expect.stringContaining('truthiness'), + expect.stringContaining('template literal string coercion'), + expect.stringContaining('canonicalizes null'), + expect.stringContaining('canonicalizes boolean'), + expect.stringContaining('ExprObject expression prop'), + ]), + ); + }); + + it.each( + expressionV1Contract.fixtures.map((f) => [f.description, f] as const), + )('reference fixture: %s', async (_desc, fixture) => { + const result = await runDifferential(fixture, { skipTs: true, skipPython: true }); + if (result.verdict !== 'pass') { + throw new Error( + `verdict=${result.verdict}\nfixture=${fixture.description}\nreference=${JSON.stringify( + result.reference, + null, + 2, + )}`, + ); + } + expect(result.verdict).toBe('pass'); + }); + + it.each( + expressionV1Contract.fixtures.map((f) => [f.description, f] as const), + )('TS differential fixture: %s', async (_desc, fixture) => { + const result = await runDifferential(fixture, { skipPython: true }); + if (result.verdict !== 'pass') { + throw new Error( + `verdict=${result.verdict}\n` + + `fixture=${fixture.description}\n` + + `reference=${JSON.stringify(result.reference, null, 2)}\n` + + `ts=${JSON.stringify(result.ts, null, 2)}\n` + + `legError=${JSON.stringify(result.legError, null, 2)}`, + ); + } + expect(result.verdict).toBe('pass'); + }); +}); + +describe('expression-v1 contract — preconditions reject out-of-domain IR', () => { + function mustReject(ir: IRNode, label: string, bindings: Map = new Map()): void { + expect(() => referenceRun(ir, makeEnv({ bindings }))).toThrow(ReferenceRunnerError); + expect(label.length).toBeGreaterThan(0); + } + + it('rejects missing expr', () => { + mustReject({ type: 'expression-v1', props: { name: 'x' } }, 'missing expr'); + }); + + it('rejects empty ExprObject expr', () => { + mustReject({ type: 'expression-v1', props: { name: 'x', expr: { __expr: true, code: '' } } }, 'empty expr object'); + }); + + it('rejects non-portable object literals', () => { + mustReject({ type: 'expression-v1', props: { name: 'x', expr: '{ a: 1 }' } }, 'object literal'); + }); + + it('rejects builtin-shadowing names', () => { + mustReject({ type: 'expression-v1', props: { name: 'print', expr: '"x"' } }, 'builtin'); + }); + + it('rejects redeclaring a current binding', () => { + mustReject({ type: 'expression-v1', props: { name: 'x', expr: '1' } }, 'redeclaration', new Map([['x', 0]])); + }); +}); diff --git a/packages/core/tests/native-handlers.test.ts b/packages/core/tests/native-handlers.test.ts index 9c7786f6..a66148cc 100644 --- a/packages/core/tests/native-handlers.test.ts +++ b/packages/core/tests/native-handlers.test.ts @@ -376,6 +376,110 @@ describe('emitNativeKernBodyTS — slice 1 statements', () => { }); }); +describe('emitNativeKernBodyTS — expression-v1 and nested fn statements', () => { + test('expression-v1 emits a typed scalar binding', () => { + const handler = makeHandler([ + { type: 'expression-v1', props: { name: 'label', type: 'string', expr: 'String(value)' } }, + { type: 'return', props: { value: 'label' } }, + ]); + expect(emitNativeKernBodyTS(handler)).toBe(['const label: string = String(value);', 'return label;'].join('\n')); + }); + + test('expression-v1 accepts ExprObject expr props', () => { + const handler = makeHandler([ + { type: 'expression-v1', props: { name: 'total', expr: { __expr: true, code: 'amount + 1' } } }, + { type: 'return', props: { value: 'total' } }, + ]); + expect(emitNativeKernBodyTS(handler)).toBe(['const total = amount + 1;', 'return total;'].join('\n')); + }); + + test('infers kern handler language from expression-v1 child', () => { + const doc = parseDocument( + [ + 'fn name=label returns=string', + ' handler', + ' expression-v1 name=label expr="String(value)"', + ' return value=label', + ].join('\n'), + ); + const handler = doc.children?.[0]?.children?.find((child) => child.type === 'handler'); + expect(handler?.props?.lang).toBe('kern'); + }); + + test('nested fn supports legacy params and returns inside body emit', () => { + const handler = makeHandler([ + { + type: 'fn', + props: { name: 'add', params: 'a:number,b:number', returns: 'number' }, + children: [ + { type: 'handler', props: { lang: 'kern' }, children: [{ type: 'return', props: { value: 'a + b' } }] }, + ], + }, + { type: 'return', props: { value: 'add(2, 3)' } }, + ]); + expect(emitNativeKernBodyTS(handler)).toBe( + ['function add(a: number, b: number): number {', ' return a + b;', '}', 'return add(2, 3);'].join('\n'), + ); + }); + + test('nested fn supports structured param children', () => { + const handler = makeHandler([ + { + type: 'fn', + props: { name: 'add', returns: 'number' }, + children: [ + { type: 'param', props: { name: 'a', type: 'number' } }, + { type: 'param', props: { name: 'b', type: 'number' } }, + { type: 'handler', props: { lang: 'kern' }, children: [{ type: 'return', props: { value: 'a + b' } }] }, + ], + }, + { type: 'return', props: { value: 'add(2, 3)' } }, + ]); + expect(emitNativeKernBodyTS(handler)).toContain('function add(a: number, b: number): number {'); + }); + + test('nested async fn preserves await expressions in body emit', () => { + const handler = makeHandler([ + { + type: 'fn', + props: { name: 'loadTotal', params: 'amount:number', returns: 'number', async: 'true' }, + children: [ + { + type: 'handler', + props: { lang: 'kern' }, + children: [ + { type: 'let', props: { name: 'loaded', value: 'await load(amount)' } }, + { type: 'return', props: { value: 'loaded + 5' } }, + ], + }, + ], + }, + ]); + expect(emitNativeKernBodyTS(handler)).toBe( + [ + 'async function loadTotal(amount: number): Promise {', + ' const loaded = await load(amount);', + ' return loaded + 5;', + '}', + ].join('\n'), + ); + }); + + test('nested fn rejects mixed legacy and structured params', () => { + const handler = makeHandler([ + { + type: 'fn', + props: { name: 'mixed', params: 'a:number' }, + children: [ + { type: 'param', props: { name: 'b', type: 'number' } }, + { type: 'handler', props: { lang: 'kern' }, children: [] }, + ], + }, + ]); + expect(() => emitNativeKernBodyTS(handler)).toThrow(/cannot mix legacy `params=`/); + }); +}); + describe('emitNativeKernBodyTS — destructure body statement', () => { test('emits object destructuring inside native body', () => { const handler = makeHandler([ diff --git a/packages/core/tests/portable-logic-primitives.test.ts b/packages/core/tests/portable-logic-primitives.test.ts index 48cc7962..0e102c53 100644 --- a/packages/core/tests/portable-logic-primitives.test.ts +++ b/packages/core/tests/portable-logic-primitives.test.ts @@ -36,6 +36,7 @@ describe('portable logic primitive registry', () => { 'collection.indexBy', 'collection.countBy', 'logic.firstTruthy', + 'logic.coalesce', 'time.epochMs', 'logic.not', 'number.clamp', @@ -49,6 +50,8 @@ describe('portable logic primitive registry', () => { 'string.split', 'string.replaceFirst', 'string.replaceAll', + 'logic.firstDefined', + 'string.coerce', ]); }); @@ -75,6 +78,7 @@ describe('portable logic primitive registry', () => { expect(portableLogicSupportForTarget('collection.indexBy', 'python')).toBe('stable'); expect(portableLogicSupportForTarget('collection.countBy', 'python')).toBe('stable'); expect(portableLogicSupportForTarget('logic.firstTruthy', 'python')).toBe('stable'); + expect(portableLogicSupportForTarget('logic.coalesce', 'python')).toBe('stable'); expect(portableLogicSupportForTarget('time.epochMs', 'python')).toBe('stable'); expect(portableLogicSupportForTarget('logic.not', 'python')).toBe('stable'); expect(portableLogicSupportForTarget('number.clamp', 'python')).toBe('stable'); @@ -358,6 +362,7 @@ describe('portable logic primitive registry', () => { test('string parity slice has matching target support', () => { const stringPrimitives: PortableLogicPrimitiveId[] = [ + 'string.coerce', 'string.trim', 'string.split', 'string.replaceFirst', @@ -386,6 +391,8 @@ describe('portable logic primitive registry', () => { const firstTruthy = lookupPortableLogicPrimitive('logic.firstTruthy'); expect(firstTruthy?.hostPatterns).toContain('a || b || c'); expect(firstTruthy?.portabilityNotes.join(' ')).toContain('empty collections are target-specific'); + expect(lookupPortableLogicPrimitive('logic.coalesce')?.hostPatterns).toContain('a ?? b ?? c'); + expect(lookupPortableLogicPrimitive('logic.coalesce')?.portabilityNotes.join(' ')).toContain('null/None-only'); expect(lookupPortableLogicPrimitive('number.clamp')?.hostPatterns).toContain('Math.max(lo, Math.min(hi, value))'); expect(lookupPortableLogicPrimitive('number.clamp')?.intent).toBe('semantic-gap'); expect(lookupPortableLogicPrimitive('object.keys')?.hostPatterns).toContain('Object.keys(obj)'); @@ -401,6 +408,7 @@ describe('portable logic primitive registry', () => { expect(lookupPortableLogicPrimitive('string.replaceAll')?.hostPatterns).toContain( 'value.replaceAll(search, replacement)', ); + expect(lookupPortableLogicPrimitive('string.coerce')?.hostPatterns).toContain('String(value)'); expect(lookupPortableLogicPrimitive('host.randomThing')).toBeNull(); }); @@ -439,17 +447,17 @@ describe('portable logic primitive registry', () => { expect(() => validatePortableLogicPrimitiveRegistry({ - 'string.coalesceAtStart': { + 'string.coalesce': { ...valid!, - id: 'string.coalesceAtStart' as PortableLogicPrimitiveId, + id: 'string.coalesce' as PortableLogicPrimitiveId, }, }), - ).not.toThrow(); + ).toThrow(/duplicates existing language nullish\/coalesce syntax/); }); - test('does not register a named nullish/coalesce primitive', () => { - // The language already has `??`; this guards against adding a duplicate registry API by accident. - expect(PORTABLE_LOGIC_PRIMITIVE_IDS.some((id) => id.includes('nullish') || id.includes('coalesce'))).toBe(false); + test('registers exactly the portable coalesce primitive, not stray nullish aliases', () => { + expect(PORTABLE_LOGIC_PRIMITIVE_IDS.filter((id) => id.includes('coalesce'))).toEqual(['logic.coalesce']); expect(lookupPortableLogicPrimitive('logic.nullishCoalesce')).toBeNull(); + expect(lookupPortableLogicPrimitive('logic.coalesce')).not.toBeNull(); }); }); diff --git a/packages/python/src/codegen-body-python.ts b/packages/python/src/codegen-body-python.ts index 05a4d784..1875165c 100644 --- a/packages/python/src/codegen-body-python.ts +++ b/packages/python/src/codegen-body-python.ts @@ -55,12 +55,14 @@ import { parseKeys, suggestStdlibMethod, } from '@kernlang/core'; +import { buildPythonParamList } from './codegen-helpers.js'; import { KERN_FMT_HELPER_PY, KERN_I32_HELPER_PY, KERN_PAIR_HELPERS_PY, KERN_TMOD_HELPER_PY, } from './core/expr/index.js'; +import { mapTsTypeToPython } from './type-map.js'; /** Slice 3e — caller-provided options for the Python body emitter. * Currently only `symbolMap`; future slices may add diagnostics, source-map @@ -158,6 +160,7 @@ interface BodyEmitContext { /** Depth of nested `finally` blocks. Propagation from finally would * override pending control flow, so it gets a finally-specific error. */ finallyDepth: number; + standaloneExpression: boolean; } const INDENT_STEP = ' '; @@ -176,6 +179,7 @@ function freshCtx(options?: BodyEmitOptions): BodyEmitContext { usedPropagation: false, tryDepth: 0, finallyDepth: 0, + standaloneExpression: false, traceHooks: options?.traceHooks, }; } @@ -288,6 +292,7 @@ export function emitNativeKernBodyPythonWithImports(handlerNode: IRNode, options * `trailingComment=` prop. Mirrors the TS emitter's set. */ const TRAILING_COMMENT_TYPES = new Set([ 'let', + 'expression-v1', 'assign', 'fmt', 'clamp', @@ -335,6 +340,10 @@ function emitChildrenPy( for (const line of emitSetPy(child, ctx)) lines.push(`${indent}${line}`); } else if (child.type === 'let') { for (const line of emitLetPy(child, ctx)) lines.push(`${indent}${line}`); + } else if (child.type === 'expression-v1') { + for (const line of emitExpressionV1Py(child, ctx)) lines.push(`${indent}${line}`); + } else if (child.type === 'fn') { + for (const line of emitFnPy(child, ctx, indent)) lines.push(line); } else if (child.type === 'assign') { for (const line of emitAssignPy(child, ctx)) lines.push(`${indent}${line}`); } else if (child.type === 'destructure') { @@ -1646,7 +1655,9 @@ const NON_EXCEPTION_LITERAL_KINDS: ReadonlySet = new Set([ * `emitPyExprCtx` which threads the live ctx (and therefore the live * imports set) end-to-end. */ export function emitPyExpression(node: ValueIR, options?: BodyEmitOptions): string { - return emitPyExprCtx(node, freshCtx(options)); + const ctx = freshCtx(options); + ctx.standaloneExpression = true; + return emitPyExprCtx(node, ctx); } function emitPyExprCtx(node: ValueIR, ctx: BodyEmitContext): string { @@ -2089,6 +2100,15 @@ function lowerChain(node: ChainNode, ctx: BodyEmitContext): GuardedExpr { if (regex !== null) return { guard: null, expr: regex }; const stdlib = applyStdlibLoweringPython(node, ctx); if (stdlib !== null) return { guard: null, expr: stdlib }; + if (node.callee.kind === 'ident' && node.callee.name === 'String') { + if (node.args.length !== 1) { + throw new Error('String() portable coercion expects exactly one argument on Python target.'); + } + const arg = emitPyExprCtx(node.args[0], ctx); + if (ctx.standaloneExpression) return { guard: null, expr: inlineKernFmtPy(arg) }; + ctx.helpers.add(KERN_FMT_HELPER_PY); + return { guard: null, expr: `_kern_fmt(${arg})` }; + } const callee = node.callee; const inner: GuardedExpr = callee.kind === 'member' || callee.kind === 'call' || callee.kind === 'index' @@ -2501,3 +2521,75 @@ export function registerHelpers(node: ValueIR, ctx: BodyEmitContext) { break; } } + +function emitExpressionV1Py(node: IRNode, ctx: BodyEmitContext): string[] { + const props = (node.props ?? {}) as Record; + const userName = String(props.name ?? ''); + if (!userName) throw new Error('body-statement `expression-v1` requires `name=`.'); + const rawExpr = props.expr; + const exprSource = unwrapBodyExpr(rawExpr); + if (exprSource === undefined || exprSource === '') { + throw new Error('body-statement `expression-v1` requires `expr=`.'); + } + const exprIR = parseExpression(exprSource); + declareLocalBinding(ctx, userName, 'const'); + const name = maybeRenameOnShadow(ctx, userName); + setRegexBinding(ctx, userName, exprIR.kind === 'regexLit' ? exprIR : null); + const lines = [`${name} = ${emitPyExprCtx(exprIR, ctx)}`]; + if (ctx.traceHooks?.letAssign) lines.push(letAssignTracePy(name)); + return lines; +} + +function emitFnPy(node: IRNode, ctx: BodyEmitContext, indent: string): string[] { + const props = (node.props ?? {}) as Record; + const userName = String(props.name ?? ''); + if (!userName) throw new Error('body-statement `fn` requires `name=`.'); + declareLocalBinding(ctx, userName, 'const'); + const name = maybeRenameOnShadow(ctx, userName); + + const isAsync = props.async === 'true' || props.async === true; + const asyncKw = isAsync ? 'async ' : ''; + if (props.params && node.children?.some((c) => c.type === 'param')) { + throw new Error('body-statement `fn` cannot mix legacy `params=` with structured `param` children.'); + } + const paramList = buildPythonParamList(node); + + const returns = props.returns ? String(props.returns) : ''; + const retClause = returns ? ` -> ${mapTsTypeToPython(returns)}` : ''; + + const lines: string[] = []; + lines.push(`${indent}${asyncKw}def ${name}(${paramList})${retClause}:`); + + const handlerNode = node.children?.find((c) => c.type === 'handler'); + const bodyNodes = handlerNode ? (handlerNode.children ?? []) : (node.children ?? []); + const stmtNodes = bodyNodes.filter((c) => c.type !== 'param' && c.type !== 'decorator'); + + const inner = emitChildrenPy(stmtNodes, ctx, indent + INDENT_STEP, paramBindingsFromPythonSignature(paramList)); + if (inner.length === 0) { + lines.push(`${indent}${INDENT_STEP}pass`); + } else { + for (const sl of inner) { + lines.push(sl); + } + } + return lines; +} + +function paramBindingsFromPythonSignature(paramList: string): Array<[string, 'const']> { + if (!paramList.trim()) return []; + return splitBodyExpressionList(paramList, 'fn params=') + .map((part) => part.split('=')[0]?.split(':')[0]?.trim().replace(/^\*+/, '') ?? '') + .filter((name) => /^[A-Za-z_]\w*$/.test(name)) + .map((name) => [name, 'const']); +} + +function inlineKernFmtPy(expr: string): string { + return [ + '(lambda __k_v: ', + "('true' if __k_v else 'false') if isinstance(__k_v, bool) else ", + "'null' if __k_v is None else ", + 'str(int(__k_v)) if isinstance(__k_v, float) and __k_v.is_integer() else ', + 'str(__k_v))', + `(${expr})`, + ].join(''); +} diff --git a/packages/python/src/core/expr/helpers.ts b/packages/python/src/core/expr/helpers.ts index 069e832a..4f5b4534 100644 --- a/packages/python/src/core/expr/helpers.ts +++ b/packages/python/src/core/expr/helpers.ts @@ -17,6 +17,8 @@ export const KERN_FMT_HELPER_PY = [ " return 'true' if __k_v else 'false'", ' if __k_v is None:', " return 'null'", + ' if isinstance(__k_v, float) and __k_v.is_integer():', + ' return str(int(__k_v))', ' return str(__k_v)', ].join('\n'); diff --git a/packages/python/src/ir-semantics/python-leg.ts b/packages/python/src/ir-semantics/python-leg.ts index c00142db..094506aa 100644 --- a/packages/python/src/ir-semantics/python-leg.ts +++ b/packages/python/src/ir-semantics/python-leg.ts @@ -326,17 +326,20 @@ export async function runPythonEmitterLeg(fixture: NodeFixture, env: SemanticEnv } function shouldTraceLetAssign(ir: NodeFixture['ir']): boolean { - // `let` (declaration), `assign` (reassignment), and `fmt` (formatted binding) - // observe their binding write through the same `{op:"assign"}` trace hook. + // `let` (declaration), `expression-v1`, `assign` (reassignment), and `fmt` + // (formatted binding) observe their binding write through the same + // `{op:"assign"}` trace hook. // `while` fixtures opt in too: their counter setup/advance (let + assign in // body) must emit the same assign events the reference produces. const contract = ir.props?.__semanticContract; const t = ir.type; return ( t === 'let' || + t === 'expression-v1' || t === 'assign' || t === 'fmt' || contract === 'let' || + contract === 'expression-v1' || contract === 'assign' || contract === 'fmt' || contract === 'while' diff --git a/packages/python/tests/ir-semantics-python-leg.test.ts b/packages/python/tests/ir-semantics-python-leg.test.ts index a0b3e128..8de85197 100644 --- a/packages/python/tests/ir-semantics-python-leg.test.ts +++ b/packages/python/tests/ir-semantics-python-leg.test.ts @@ -25,6 +25,11 @@ import { registerBranchContract, } from '../../core/src/ir/semantics/branch.js'; import { _resetEachContractForTest, eachContract, registerEachContract } from '../../core/src/ir/semantics/each.js'; +import { + _resetExpressionV1ContractForTest, + expressionV1Contract, + registerExpressionV1Contract, +} from '../../core/src/ir/semantics/expression-v1.js'; import { _resetFmtContractForTest, fmtContract, registerFmtContract } from '../../core/src/ir/semantics/fmt.js'; import { _resetForContractForTest, forContract, registerForContract } from '../../core/src/ir/semantics/for.js'; import { _resetIfContractForTest, ifContract, registerIfContract } from '../../core/src/ir/semantics/if.js'; @@ -54,6 +59,7 @@ beforeEach(() => { CONTRACT_REGISTRY.clear(); _resetBranchContractForTest(); _resetEachContractForTest(); + _resetExpressionV1ContractForTest(); _resetIfContractForTest(); _resetForContractForTest(); _resetLambdaContractForTest(); @@ -65,6 +71,7 @@ beforeEach(() => { _resetPrimitivesForTest(); registerPrimitives(); registerEachContract(); + registerExpressionV1Contract(); registerBranchContract(); registerIfContract(); registerForContract(); @@ -80,6 +87,7 @@ afterEach(() => { CONTRACT_REGISTRY.clear(); _resetBranchContractForTest(); _resetEachContractForTest(); + _resetExpressionV1ContractForTest(); _resetIfContractForTest(); _resetForContractForTest(); _resetLambdaContractForTest(); @@ -91,6 +99,27 @@ afterEach(() => { _resetPrimitivesForTest(); }); +describeIfPython('Python emitter leg — expression-v1 fixtures (three-way differential)', () => { + it.each(expressionV1Contract.fixtures.map((f) => [f.description, f] as const))( + 'fixture: %s', + async (_desc, fixture) => { + const result = await runDifferential(fixture, { pythonLeg: runPythonEmitterLeg }); + if (result.verdict !== 'pass') { + throw new Error( + `verdict=${result.verdict}\n` + + `fixture=${fixture.description}\n` + + `reference=${JSON.stringify(result.reference, null, 2)}\n` + + `ts=${JSON.stringify(result.ts, null, 2)}\n` + + `python=${JSON.stringify(result.python, null, 2)}\n` + + `legError=${JSON.stringify(result.legError, null, 2)}`, + ); + } + expect(result.verdict).toBe('pass'); + }, + 15_000, + ); +}); + /** * PR-4 — Python emitter normalises pair-mode iteration via runtime helpers * `_kern_pairs` (sync) and `_kern_async_pairs` (async). This closes the diff --git a/packages/python/tests/native-handlers-python.test.ts b/packages/python/tests/native-handlers-python.test.ts index 7cd8695d..21f42353 100644 --- a/packages/python/tests/native-handlers-python.test.ts +++ b/packages/python/tests/native-handlers-python.test.ts @@ -117,6 +117,119 @@ describe('emitPyExpression — slice 1 lowering rules', () => { }); }); +describe('emitNativeKernBodyPython — expression-v1 and nested fn statements', () => { + test('expression-v1 emits a scalar binding through Python expression lowering', () => { + const handler = makeHandler([ + { type: 'expression-v1', props: { name: 'label', expr: 'String(value)' } }, + { type: 'return', props: { value: 'label' } }, + ]); + expect(emitNativeKernBodyPython(handler)).toBe( + [ + 'def _kern_fmt(__k_v):', + ' if isinstance(__k_v, bool):', + " return 'true' if __k_v else 'false'", + ' if __k_v is None:', + " return 'null'", + ' if isinstance(__k_v, float) and __k_v.is_integer():', + ' return str(int(__k_v))', + ' return str(__k_v)', + '', + 'label = _kern_fmt(value)', + 'return label', + ].join('\n'), + ); + }); + + test('nested fn supports legacy params and returns inside body emit', () => { + const handler = makeHandler([ + { + type: 'fn', + props: { name: 'add', params: 'a:number,b:number', returns: 'number' }, + children: [ + { type: 'handler', props: { lang: 'kern' }, children: [{ type: 'return', props: { value: 'a + b' } }] }, + ], + }, + { type: 'return', props: { value: 'add(2, 3)' } }, + ]); + expect(emitNativeKernBodyPython(handler)).toBe( + ['def add(a: float, b: float) -> float:', ' return a + b', 'return add(2, 3)'].join('\n'), + ); + }); + + test('nested fn supports structured param children', () => { + const handler = makeHandler([ + { + type: 'fn', + props: { name: 'add', returns: 'number' }, + children: [ + { type: 'param', props: { name: 'a', type: 'number' } }, + { type: 'param', props: { name: 'b', type: 'number' } }, + { type: 'handler', props: { lang: 'kern' }, children: [{ type: 'return', props: { value: 'a + b' } }] }, + ], + }, + { type: 'return', props: { value: 'add(2, 3)' } }, + ]); + expect(emitNativeKernBodyPython(handler)).toContain('def add(a: float, b: float) -> float:'); + }); + + test('nested async fn preserves await expressions in body emit', () => { + const handler = makeHandler([ + { + type: 'fn', + props: { name: 'loadTotal', params: 'amount:number', returns: 'number', async: 'true' }, + children: [ + { + type: 'handler', + props: { lang: 'kern' }, + children: [ + { type: 'let', props: { name: 'loaded', value: 'await load(amount)' } }, + { type: 'return', props: { value: 'loaded + 5' } }, + ], + }, + ], + }, + ]); + expect(emitNativeKernBodyPython(handler)).toBe( + ['async def loadTotal(amount: float) -> float:', ' loaded = await load(amount)', ' return loaded + 5'].join( + '\n', + ), + ); + }); + + test('String() portable coercion requires exactly one arg', () => { + expect(() => emitPyExpression(parseExpression('String()'))).toThrow(/expects exactly one argument/); + expect(() => emitPyExpression(parseExpression('String(a, b)'))).toThrow(/expects exactly one argument/); + }); + + test('standalone String(value) lowering is self-contained', () => { + expect(emitPyExpression(parseExpression('String(value)'))).toBe( + "(lambda __k_v: ('true' if __k_v else 'false') if isinstance(__k_v, bool) else 'null' if __k_v is None else str(int(__k_v)) if isinstance(__k_v, float) and __k_v.is_integer() else str(__k_v))(value)", + ); + }); + + test('expression-v1 accepts ExprObject expr props', () => { + const handler = makeHandler([ + { type: 'expression-v1', props: { name: 'total', expr: { __expr: true, code: 'amount + 1' } } }, + { type: 'return', props: { value: 'total' } }, + ]); + expect(emitNativeKernBodyPython(handler)).toBe(['total = amount + 1', 'return total'].join('\n')); + }); + + test('nested fn rejects mixed legacy and structured params', () => { + const handler = makeHandler([ + { + type: 'fn', + props: { name: 'mixed', params: 'a:number' }, + children: [ + { type: 'param', props: { name: 'b', type: 'number' } }, + { type: 'handler', props: { lang: 'kern' }, children: [] }, + ], + }, + ]); + expect(() => emitNativeKernBodyPython(handler)).toThrow(/cannot mix legacy `params=`/); + }); +}); + describe('emitNativeKernBodyPython — slice 1 statements', () => { test('let with simple call', () => { const h = makeHandler([{ type: 'let', props: { name: 'x', value: 'foo()' } }]); diff --git a/packages/python/tests/native-handlers-slice2-python.test.ts b/packages/python/tests/native-handlers-slice2-python.test.ts index bf183a4d..e69cb204 100644 --- a/packages/python/tests/native-handlers-slice2-python.test.ts +++ b/packages/python/tests/native-handlers-slice2-python.test.ts @@ -105,7 +105,7 @@ describe('emitPyExpression — arithmetic + comparison + unary', () => { expect(emitPyExpression(parseExpression('a instanceof B && c'))).toBe('isinstance(a, B) and c'); // The dominant idiom — mirrors the TS-side round-trip in core/expression.test.ts. expect(emitPyExpression(parseExpression('err instanceof Error ? err.message : String(err)'))).toBe( - 'err.message if (isinstance(err, Error)) else String(err)', + "err.message if (isinstance(err, Error)) else (lambda __k_v: ('true' if __k_v else 'false') if isinstance(__k_v, bool) else 'null' if __k_v is None else str(int(__k_v)) if isinstance(__k_v, float) and __k_v.is_integer() else str(__k_v))(err)", ); }); diff --git a/scripts/conformance.mjs b/scripts/conformance.mjs index ca5fec85..6b870b71 100644 --- a/scripts/conformance.mjs +++ b/scripts/conformance.mjs @@ -479,6 +479,20 @@ const FIXTURES = [ ], body: `firstDefined name=winner values="missingA, missingB, 'fallback'"\nreturn value="winner"`, expected: 'fallback' }, + { kind: 'stmt', name: 'stmt: expression-v1 string coercion canonicalizes bool and null', + params: [ + { name: 'flag', type: 'boolean', value: false }, + { name: 'missing', type: 'any', value: null }, + ], + body: `expression-v1 name=flagText expr="String(flag)"\nexpression-v1 name=nullText expr="String(missing)"\nreturn value="{ flagText: flagText, nullText: nullText }"`, + expected: { flagText: 'false', nullText: 'null' } }, + { kind: 'stmt', name: 'stmt: nested fn with let and return executes inside body', + params: [ + { name: 'left', type: 'number', value: 2 }, + { name: 'right', type: 'number', value: 3 }, + ], + body: `fn name=add params="a:number,b:number" returns=number\n handler\n let name=sum value="a + b"\n return value="sum"\nreturn value="add(left, right)"`, + expected: 5 }, { kind: 'stmt', name: 'stmt: while loop accumulates (mutable kind=let)', params: [{ name: 'n', type: 'number', value: 5 }, { name: 'min', type: 'number', value: 0 }], body: `let name=total value="0" kind=let\nlet name=i value="0" kind=let\nwhile cond="i < n"\n assign target="total" value="total + i"\n assign target="i" value="i + 1"\nreturn value="{ total: total }"`, @@ -1394,7 +1408,8 @@ for (const fx of FIXTURES) { compilerOptions: { module: tsCompiler.ModuleKind.ESNext, target: tsCompiler.ScriptTarget.ES2022 }, }).outputText, ); - writeFileSync(pyFile, `import json\n${[...(pyEmit.imports ?? [])].join('\n')}\ndef __h(${names.join(', ')}):\n${pyEmit.code.split('\n').map((l) => ` ${l}`).join('\n')}\nprint(json.dumps(__h(${fx.params.map((p) => pyVal(p.value)).join(', ')}), default=str, allow_nan=False))`); + const pyHelpers = [...(pyEmit.helpers ?? [])].join('\n\n'); + writeFileSync(pyFile, `import json\n${[...(pyEmit.imports ?? [])].join('\n')}\n${pyHelpers}\ndef __h(${names.join(', ')}):\n${pyEmit.code.split('\n').map((l) => ` ${l}`).join('\n')}\nprint(json.dumps(__h(${fx.params.map((p) => pyVal(p.value)).join(', ')}), default=str, allow_nan=False))`); const stmtOpts = { encoding: 'utf8', timeout: 10_000 }; const tsOut = execFileSync('node', [tsFile], stmtOpts).trim(); const pyOut = execFileSync('python3', [pyFile], stmtOpts).trim(); From 4d00fa5060ed60574a5a0622a58e351fb8f979df Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 7 Jun 2026 08:15:20 +0200 Subject: [PATCH 02/63] build(deps): bump the minor-and-patch group with 5 updates (#388) Bumps the minor-and-patch group with 5 updates: | Package | From | To | | --- | --- | --- | | [@types/node](https://github.com/DefinitelyTyped/DefinitelyTyped/tree/HEAD/types/node) | `25.9.1` | `25.9.2` | | [next](https://github.com/vercel/next.js) | `16.2.6` | `16.2.7` | | [react](https://github.com/facebook/react/tree/HEAD/packages/react) | `19.2.6` | `19.2.7` | | [@types/react](https://github.com/DefinitelyTyped/DefinitelyTyped/tree/HEAD/types/react) | `19.2.15` | `19.2.17` | | [react-dom](https://github.com/facebook/react/tree/HEAD/packages/react-dom) | `19.2.6` | `19.2.7` | Updates `@types/node` from 25.9.1 to 25.9.2 - [Release notes](https://github.com/DefinitelyTyped/DefinitelyTyped/releases) - [Commits](https://github.com/DefinitelyTyped/DefinitelyTyped/commits/HEAD/types/node) Updates `next` from 16.2.6 to 16.2.7 - [Release notes](https://github.com/vercel/next.js/releases) - [Changelog](https://github.com/vercel/next.js/blob/canary/release.js) - [Commits](https://github.com/vercel/next.js/compare/v16.2.6...v16.2.7) Updates `react` from 19.2.6 to 19.2.7 - [Release notes](https://github.com/facebook/react/releases) - [Changelog](https://github.com/facebook/react/blob/main/CHANGELOG.md) - [Commits](https://github.com/facebook/react/commits/v19.2.7/packages/react) Updates `@types/react` from 19.2.15 to 19.2.17 - [Release notes](https://github.com/DefinitelyTyped/DefinitelyTyped/releases) - [Commits](https://github.com/DefinitelyTyped/DefinitelyTyped/commits/HEAD/types/react) Updates `react-dom` from 19.2.6 to 19.2.7 - [Release notes](https://github.com/facebook/react/releases) - [Changelog](https://github.com/facebook/react/blob/main/CHANGELOG.md) - [Commits](https://github.com/facebook/react/commits/v19.2.7/packages/react-dom) Updates `@types/react` from 19.2.15 to 19.2.17 - [Release notes](https://github.com/DefinitelyTyped/DefinitelyTyped/releases) - [Commits](https://github.com/DefinitelyTyped/DefinitelyTyped/commits/HEAD/types/react) --- updated-dependencies: - dependency-name: "@types/node" dependency-version: 25.9.2 dependency-type: direct:development update-type: version-update:semver-patch dependency-group: minor-and-patch - dependency-name: next dependency-version: 16.2.7 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: minor-and-patch - dependency-name: react dependency-version: 19.2.7 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: minor-and-patch - dependency-name: "@types/react" dependency-version: 19.2.17 dependency-type: direct:development update-type: version-update:semver-patch dependency-group: minor-and-patch - dependency-name: react-dom dependency-version: 19.2.7 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: minor-and-patch - dependency-name: "@types/react" dependency-version: 19.2.17 dependency-type: direct:development update-type: version-update:semver-patch dependency-group: minor-and-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- package.json | 2 +- packages/playground/package.json | 10 +- packages/terminal/package.json | 4 +- pnpm-lock.yaml | 216 +++++++++++++++---------------- 4 files changed, 116 insertions(+), 116 deletions(-) diff --git a/package.json b/package.json index efa4fbec..f8f118e4 100644 --- a/package.json +++ b/package.json @@ -54,7 +54,7 @@ }, "devDependencies": { "@biomejs/biome": "^2.4.16", - "@types/node": "^25.9.1", + "@types/node": "^25.9.2", "typescript": "^6.0.3" } } diff --git a/packages/playground/package.json b/packages/playground/package.json index 79223169..8226570c 100644 --- a/packages/playground/package.json +++ b/packages/playground/package.json @@ -20,13 +20,13 @@ "@kernlang/vue": "workspace:*", "@monaco-editor/react": "^4.6.0", "monaco-editor": "^0.55.1", - "next": "^16.2.6", - "react": "^19.2.6", - "react-dom": "^19.2.6" + "next": "^16.2.7", + "react": "^19.2.7", + "react-dom": "^19.2.7" }, "devDependencies": { - "@types/node": "^25.9.1", - "@types/react": "^19.2.15", + "@types/node": "^25.9.2", + "@types/react": "^19.2.17", "@types/react-dom": "^19.0.0", "typescript": "^6.0.3" } diff --git a/packages/terminal/package.json b/packages/terminal/package.json index f6e76229..8e312eaf 100644 --- a/packages/terminal/package.json +++ b/packages/terminal/package.json @@ -46,9 +46,9 @@ } }, "devDependencies": { - "@types/react": "19.2.15", + "@types/react": "19.2.17", "@inkjs/ui": "2.0.0", "ink": "7.0.5", - "react": "19.2.6" + "react": "19.2.7" } } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 20f98d52..3bbfa045 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -22,8 +22,8 @@ importers: specifier: ^2.4.16 version: 2.4.16 '@types/node': - specifier: ^25.9.1 - version: 25.9.1 + specifier: ^25.9.2 + version: 25.9.2 typescript: specifier: ^6.0.3 version: 6.0.3 @@ -251,29 +251,29 @@ importers: version: link:../vue '@monaco-editor/react': specifier: ^4.6.0 - version: 4.7.0(monaco-editor@0.55.1)(react-dom@19.2.6(react@19.2.6))(react@19.2.6) + version: 4.7.0(monaco-editor@0.55.1)(react-dom@19.2.7(react@19.2.7))(react@19.2.7) monaco-editor: specifier: ^0.55.1 version: 0.55.1 next: - specifier: ^16.2.6 - version: 16.2.6(react-dom@19.2.6(react@19.2.6))(react@19.2.6) + specifier: ^16.2.7 + version: 16.2.7(react-dom@19.2.7(react@19.2.7))(react@19.2.7) react: - specifier: ^19.2.6 - version: 19.2.6 + specifier: ^19.2.7 + version: 19.2.7 react-dom: - specifier: ^19.2.6 - version: 19.2.6(react@19.2.6) + specifier: ^19.2.7 + version: 19.2.7(react@19.2.7) devDependencies: '@types/node': - specifier: ^25.9.1 - version: 25.9.1 + specifier: ^25.9.2 + version: 25.9.2 '@types/react': - specifier: ^19.2.15 - version: 19.2.15 + specifier: ^19.2.17 + version: 19.2.17 '@types/react-dom': specifier: ^19.0.0 - version: 19.2.3(@types/react@19.2.15) + version: 19.2.3(@types/react@19.2.17) typescript: specifier: ^6.0.3 version: 6.0.3 @@ -349,16 +349,16 @@ importers: devDependencies: '@inkjs/ui': specifier: 2.0.0 - version: 2.0.0(ink@7.0.5(@types/react@19.2.15)(react@19.2.6)) + version: 2.0.0(ink@7.0.5(@types/react@19.2.17)(react@19.2.7)) '@types/react': - specifier: 19.2.15 - version: 19.2.15 + specifier: 19.2.17 + version: 19.2.17 ink: specifier: 7.0.5 - version: 7.0.5(@types/react@19.2.15)(react@19.2.6) + version: 7.0.5(@types/react@19.2.17)(react@19.2.7) react: - specifier: 19.2.6 - version: 19.2.6 + specifier: 19.2.7 + version: 19.2.7 packages/test: dependencies: @@ -627,57 +627,57 @@ packages: react: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 react-dom: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 - '@next/env@16.2.6': - resolution: {integrity: sha512-gd8HoHN4ufj73WmR3JmVolrpJR47ILK6LouP5xElPglaVxir6e1a7VzvTvDWkOoPXT9rkkTzyCxBu4yeZfZwcw==} + '@next/env@16.2.7': + resolution: {integrity: sha512-tMJizPlj6ZYpBMMdK8S0LJufrP4QTdR6pcv9KQ/bVETPAmg0j1mlHE9G2c38UyGHxoBapgwuj7XjbGJ2RcDFOg==} - '@next/swc-darwin-arm64@16.2.6': - resolution: {integrity: sha512-ZJGkkcNfYgrrMkqOdZ7zoLa1TOy0qpcMfk/z4Mh/FKUz40gVO+HNQWqmLxf67Z5WB64DRp0dhEbyHfel+6sJUg==} + '@next/swc-darwin-arm64@16.2.7': + resolution: {integrity: sha512-vm1EDI/pVaBNNiychmxk3fft+OhQPVD9cIM/tReLZIQ3TfQ4kqI9DwKk00dzuS1ulC7icbrzCFrmRRlk9PfNdw==} engines: {node: '>= 10'} cpu: [arm64] os: [darwin] - '@next/swc-darwin-x64@16.2.6': - resolution: {integrity: sha512-v/YLBHIY132Ced3puBJ7YJKw1lqsCrgcNo2aRJlCEyQrrCeRJlvGlnmxhPxNQI3KE3N1DN5r9TPNPvka3nq5RQ==} + '@next/swc-darwin-x64@16.2.7': + resolution: {integrity: sha512-O3IRSv1ZBL1zs0WrIgefTEcTKFVn+ryxBNe54erJ6KsD+2f/Mmt7g2jOYh8PSBdUwPtKQJuCsTMlZ7tIu2AcsQ==} engines: {node: '>= 10'} cpu: [x64] os: [darwin] - '@next/swc-linux-arm64-gnu@16.2.6': - resolution: {integrity: sha512-RPOvqlYBbcQjkz9VQQDZ2T2bARIjXZV1KFlt+V2Mr6SW/e4I9fcKsaA0hdyf2FHoTlsV2xnBd5Y912rP/1Ce6w==} + '@next/swc-linux-arm64-gnu@16.2.7': + resolution: {integrity: sha512-Re6PZtjBDd0aMU+VcZcC/PrIvj4WhrjDYtMhhCVQamWN4L90EVP0pcEOBQD25prSlw7OzNw5QpHLWMilRLsRNw==} engines: {node: '>= 10'} cpu: [arm64] os: [linux] libc: [glibc] - '@next/swc-linux-arm64-musl@16.2.6': - resolution: {integrity: sha512-URUTu1+dMkxJsPFgm+OeEvq9wf5sujw0EvgYy80TDGHTSLTnIHeqb0Eu8A3sC95IRgjejQL+kC4mw+4yPxiAXA==} + '@next/swc-linux-arm64-musl@16.2.7': + resolution: {integrity: sha512-qyogG9QtBzWxgJfeGBvOEHI3851gTfCF3wLZ5RDLTBJGAmE9p1qDwKCOdrBrvBzRvYDT+gUDp72pzlSEfAXgNA==} engines: {node: '>= 10'} cpu: [arm64] os: [linux] libc: [musl] - '@next/swc-linux-x64-gnu@16.2.6': - resolution: {integrity: sha512-DOj182mPV8G3UkrayLoREM5YEYI+Dk5wv7Ox9xl1fFibAELEsFD0lDPfHIeILlutMMfdyhlzYPELG3peuKaurw==} + '@next/swc-linux-x64-gnu@16.2.7': + resolution: {integrity: sha512-Vhe4ZDuBpmMogrGi5D4R2Kq4JAQlj6+wvgaFYy31zfES0zPmt6TLA+cuYpM/OLrPZjo2MYQTHVqNUSCR6+fDZQ==} engines: {node: '>= 10'} cpu: [x64] os: [linux] libc: [glibc] - '@next/swc-linux-x64-musl@16.2.6': - resolution: {integrity: sha512-HKQ5SP/V/ub73UvF7n/zeJlxk2kLmtL7Wzrg4WfmkjmNos5onJ2tKu7yZOPdL18A6Svfn3max29ym+ry7NkK4g==} + '@next/swc-linux-x64-musl@16.2.7': + resolution: {integrity: sha512-srvian89JahFLw1YLBEuhvPJ0DO5lpUeJQMXy4xYo7g628ZlNgXdNkqoxSAv9OYrBfByh6vxISMwW/mRbzCY+g==} engines: {node: '>= 10'} cpu: [x64] os: [linux] libc: [musl] - '@next/swc-win32-arm64-msvc@16.2.6': - resolution: {integrity: sha512-LZXpTlPyS5v7HhSmnvsLGP3iIYgYOBnc8r8ArlT55sGHV89bR2HlDdBjWQ+PY6SJMmk8TuVGFuxalnP3k/0Dwg==} + '@next/swc-win32-arm64-msvc@16.2.7': + resolution: {integrity: sha512-GX3wvLpULFuRFJzwHaKfm7QZJ18F4ZSuxlPJ96BoBglCzBmdSjyeBKF+ZhWhvL/ckxNfLnNa7bsObO2ipYpszw==} engines: {node: '>= 10'} cpu: [arm64] os: [win32] - '@next/swc-win32-x64-msvc@16.2.6': - resolution: {integrity: sha512-F0+4i0h9J6C4eE3EAPWsoCk7UW/dbzOjyzxY0qnDUOYFu6FFmdZ6l97/XdV3/Nz3VYyO7UWjyEJUXkGqcoXfMA==} + '@next/swc-win32-x64-msvc@16.2.7': + resolution: {integrity: sha512-J4WlM72NMk076Qsg0jTdK3SNXatlSdnjW7L7oNGLst1tAGjHrJh/FYi+pw9wyIjEtGRKDNzD0zuiY16oWYWVaw==} engines: {node: '>= 10'} cpu: [x64] os: [win32] @@ -703,8 +703,8 @@ packages: '@types/http-errors@2.0.5': resolution: {integrity: sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==} - '@types/node@25.9.1': - resolution: {integrity: sha512-xfrlY7UD5rMJk3ZVJP8BNzS28J36YJg+xp+LPXV1TdWxr8uMH5A860QNxYDGQe/ylDSgjxE52Q9VnO7p75tJxg==} + '@types/node@25.9.2': + resolution: {integrity: sha512-G05zqtJhcDLb8uslf5EjCxXg9G1KQxiV8OS0R26IC//Eoyitzqe8z37I7cqvnZlrlSfgocQRfSn/AHBZJJFyGw==} '@types/qs@6.15.0': resolution: {integrity: sha512-JawvT8iBVWpzTrz3EGw9BTQFg3BQNmwERdKE22vlTxawwtbyUSlMppvZYKLZzB5zgACXdXxbD3m1bXaMqP/9ow==} @@ -717,8 +717,8 @@ packages: peerDependencies: '@types/react': ^19.2.0 - '@types/react@19.2.15': - resolution: {integrity: sha512-eRwcGNHve+E8qtEQSSRl6urh+rFop4v8gm6O8rGv25CodbvFdLjA1vVQ1KkiFE0w0UPOnb8tDiFKL5lp0rtY5Q==} + '@types/react@19.2.17': + resolution: {integrity: sha512-MXfmqaVPEVgkBT/aY0aGCkRWWtByiYQXo3xdQ8r5RzuFrPiRn8Gar2tQdXSUQ2GKV3bkXckek89V8wQBY2Q/Aw==} '@types/send@1.2.1': resolution: {integrity: sha512-arsCikDvlU99zl1g69TcAB3mzZPpxgw0UQnaHeC1Nwb015xp8bknZv5rIfri9xTOcMuaVgvabfIRA7PSZVuZIQ==} @@ -764,8 +764,8 @@ packages: resolution: {integrity: sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==} engines: {node: 18 || 20 || >=22} - baseline-browser-mapping@2.10.29: - resolution: {integrity: sha512-Asa2krT+XTPZINCS+2QcyS8WTkObE77RwkydwF7h6DmnKqbvlalz93m/dnphUyCa6SWSP51VgtEUf2FN+gelFQ==} + baseline-browser-mapping@2.10.34: + resolution: {integrity: sha512-IMDedajPifLnHNY0X9n8hKxRTQ6/eTHwr5bDo04WnuqxyKw6LYtQywCuuqPZwhl3aBXMvQpJov42GLCwRRdQzw==} engines: {node: '>=6.0.0'} hasBin: true @@ -789,8 +789,8 @@ packages: resolution: {integrity: sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==} engines: {node: '>= 0.4'} - caniuse-lite@1.0.30001792: - resolution: {integrity: sha512-hVLMUZFgR4JJ6ACt1uEESvQN1/dBVqPAKY0hgrV70eN3391K6juAfTjKZLKvOMsx8PxA7gsY1/tLMMTcfFLLpw==} + caniuse-lite@1.0.30001797: + resolution: {integrity: sha512-l8xKG+gwAIExZGl9FrF7KUwuOmk6wbEPC9Xoy/RtnWv1XG0Q4LFlagaLpUv3Kiza3W/wm27zy0yWJEieYKAP6w==} chalk@5.6.2: resolution: {integrity: sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA==} @@ -1121,8 +1121,8 @@ packages: resolution: {integrity: sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg==} engines: {node: '>= 0.6'} - next@16.2.6: - resolution: {integrity: sha512-qOVgKJg1+At15NpeUP+eJgCHvTCgXsogweq87Ri/Ix7PkqQHg4sdaXmSFqKlgaIXE4kW0g25LE68W87UANlHtw==} + next@16.2.7: + resolution: {integrity: sha512-eMJxgjRzBaj3olkP4cBamHDXL79A8FC6u1GcsO1D1Tsx8bw/LLXUJCaoajVxtnhD3A1IJqIT8IcRJjgBIPJq4w==} engines: {node: '>=20.9.0'} hasBin: true peerDependencies: @@ -1218,10 +1218,10 @@ packages: resolution: {integrity: sha512-K5zQjDllxWkf7Z5xJdV0/B0WTNqx6vxG70zJE4N0kBs4LovmEYWJzQGxC9bS9RAKu3bgM40lrd5zoLJ12MQ5BA==} engines: {node: '>= 0.10'} - react-dom@19.2.6: - resolution: {integrity: sha512-0prMI+hvBbPjsWnxDLxlCGyM8PN6UuWjEUCYmZhO67xIV9Xasa/r/vDnq+Xyq4Lo27g8QSbO5YzARu0D1Sps3g==} + react-dom@19.2.7: + resolution: {integrity: sha512-t0BRVXvbiE/o20Hfw669rLbMCDWtYZLvmJigy2f0MxsXF+71pxhR3xOkspmsO8h3ZlNzyibAmtCa3l4lYKk6gQ==} peerDependencies: - react: ^19.2.6 + react: ^19.2.7 react-reconciler@0.33.0: resolution: {integrity: sha512-KetWRytFv1epdpJc3J4G75I4WrplZE5jOL7Yq0p34+OVOKF4Se7WrdIdVC45XsSSmUTlht2FM/fM1FZb1mfQeA==} @@ -1229,8 +1229,8 @@ packages: peerDependencies: react: ^19.2.0 - react@19.2.6: - resolution: {integrity: sha512-sfWGGfavi0xr8Pg0sVsyHMAOziVYKgPLNrS7ig+ivMNb3wbCBw3KxtflsGBAwD3gYQlE/AEZsTLgToRrSCjb0Q==} + react@19.2.7: + resolution: {integrity: sha512-HNe9WslTbXmFK8o8cmwgAeJFSBvt1bPdHCVKtaaV+WlAN36mpT4hcRpwbf3fY56ar2oIXzsBpOAiIRHAdY0OlQ==} engines: {node: '>=0.10.0'} readdirp@5.0.0: @@ -1255,8 +1255,8 @@ packages: scheduler@0.27.0: resolution: {integrity: sha512-eNv+WrVbKu1f3vbYJT/xtiF5syA5HPIMtf9IgY/nKg0sWqzAUEvqY/xm7OcZc/qafLx/iO9FgOmeSAp4v5ti/Q==} - semver@7.8.0: - resolution: {integrity: sha512-AcM7dV/5ul4EekoQ29Agm5vri8JNqRyj39o0qpX6vDF2GZrtutZl5RwgD1XnZjiTAfncsJhMI48QQH3sN87YNA==} + semver@7.8.2: + resolution: {integrity: sha512-c8jsqUZm3omBOI66G90z1Dyw5z622G8oLG+omfsHBJf3CWQTlOcwOjvOG6wtiNfW6anKm/eA39LMwMtMez2TiQ==} engines: {node: '>=10'} hasBin: true @@ -1591,13 +1591,13 @@ snapshots: '@img/sharp-win32-x64@0.34.5': optional: true - '@inkjs/ui@2.0.0(ink@7.0.5(@types/react@19.2.15)(react@19.2.6))': + '@inkjs/ui@2.0.0(ink@7.0.5(@types/react@19.2.17)(react@19.2.7))': dependencies: chalk: 5.6.2 cli-spinners: 3.4.0 deepmerge: 4.3.1 figures: 6.1.0 - ink: 7.0.5(@types/react@19.2.15)(react@19.2.6) + ink: 7.0.5(@types/react@19.2.17)(react@19.2.7) '@modelcontextprotocol/sdk@1.29.0(zod@4.4.3)': dependencies: @@ -1625,37 +1625,37 @@ snapshots: dependencies: state-local: 1.0.7 - '@monaco-editor/react@4.7.0(monaco-editor@0.55.1)(react-dom@19.2.6(react@19.2.6))(react@19.2.6)': + '@monaco-editor/react@4.7.0(monaco-editor@0.55.1)(react-dom@19.2.7(react@19.2.7))(react@19.2.7)': dependencies: '@monaco-editor/loader': 1.7.0 monaco-editor: 0.55.1 - react: 19.2.6 - react-dom: 19.2.6(react@19.2.6) + react: 19.2.7 + react-dom: 19.2.7(react@19.2.7) - '@next/env@16.2.6': {} + '@next/env@16.2.7': {} - '@next/swc-darwin-arm64@16.2.6': + '@next/swc-darwin-arm64@16.2.7': optional: true - '@next/swc-darwin-x64@16.2.6': + '@next/swc-darwin-x64@16.2.7': optional: true - '@next/swc-linux-arm64-gnu@16.2.6': + '@next/swc-linux-arm64-gnu@16.2.7': optional: true - '@next/swc-linux-arm64-musl@16.2.6': + '@next/swc-linux-arm64-musl@16.2.7': optional: true - '@next/swc-linux-x64-gnu@16.2.6': + '@next/swc-linux-x64-gnu@16.2.7': optional: true - '@next/swc-linux-x64-musl@16.2.6': + '@next/swc-linux-x64-musl@16.2.7': optional: true - '@next/swc-win32-arm64-msvc@16.2.6': + '@next/swc-win32-arm64-msvc@16.2.7': optional: true - '@next/swc-win32-x64-msvc@16.2.6': + '@next/swc-win32-x64-msvc@16.2.7': optional: true '@swc/helpers@0.5.15': @@ -1671,15 +1671,15 @@ snapshots: '@types/body-parser@1.19.6': dependencies: '@types/connect': 3.4.38 - '@types/node': 25.9.1 + '@types/node': 25.9.2 '@types/connect@3.4.38': dependencies: - '@types/node': 25.9.1 + '@types/node': 25.9.2 '@types/express-serve-static-core@5.1.1': dependencies: - '@types/node': 25.9.1 + '@types/node': 25.9.2 '@types/qs': 6.15.0 '@types/range-parser': 1.2.7 '@types/send': 1.2.1 @@ -1692,7 +1692,7 @@ snapshots: '@types/http-errors@2.0.5': {} - '@types/node@25.9.1': + '@types/node@25.9.2': dependencies: undici-types: 7.24.6 @@ -1700,22 +1700,22 @@ snapshots: '@types/range-parser@1.2.7': {} - '@types/react-dom@19.2.3(@types/react@19.2.15)': + '@types/react-dom@19.2.3(@types/react@19.2.17)': dependencies: - '@types/react': 19.2.15 + '@types/react': 19.2.17 - '@types/react@19.2.15': + '@types/react@19.2.17': dependencies: csstype: 3.2.3 '@types/send@1.2.1': dependencies: - '@types/node': 25.9.1 + '@types/node': 25.9.2 '@types/serve-static@2.2.0': dependencies: '@types/http-errors': 2.0.5 - '@types/node': 25.9.1 + '@types/node': 25.9.2 '@types/trusted-types@2.0.7': optional: true @@ -1748,7 +1748,7 @@ snapshots: balanced-match@4.0.4: {} - baseline-browser-mapping@2.10.29: {} + baseline-browser-mapping@2.10.34: {} body-parser@2.2.2: dependencies: @@ -1780,7 +1780,7 @@ snapshots: call-bind-apply-helpers: 1.0.2 get-intrinsic: 1.3.0 - caniuse-lite@1.0.30001792: {} + caniuse-lite@1.0.30001797: {} chalk@5.6.2: {} @@ -1994,7 +1994,7 @@ snapshots: inherits@2.0.4: {} - ink@7.0.5(@types/react@19.2.15)(react@19.2.6): + ink@7.0.5(@types/react@19.2.17)(react@19.2.7): dependencies: '@alcalzone/ansi-tokenize': 0.3.0 ansi-escapes: 7.3.0 @@ -2009,8 +2009,8 @@ snapshots: indent-string: 5.0.0 is-in-ci: 2.0.0 patch-console: 2.0.0 - react: 19.2.6 - react-reconciler: 0.33.0(react@19.2.6) + react: 19.2.7 + react-reconciler: 0.33.0(react@19.2.7) scheduler: 0.27.0 signal-exit: 3.0.7 slice-ansi: 9.0.0 @@ -2023,7 +2023,7 @@ snapshots: ws: 8.21.0 yoga-layout: 3.2.1 optionalDependencies: - '@types/react': 19.2.15 + '@types/react': 19.2.17 transitivePeerDependencies: - bufferutil - utf-8-validate @@ -2085,25 +2085,25 @@ snapshots: negotiator@1.0.0: {} - next@16.2.6(react-dom@19.2.6(react@19.2.6))(react@19.2.6): + next@16.2.7(react-dom@19.2.7(react@19.2.7))(react@19.2.7): dependencies: - '@next/env': 16.2.6 + '@next/env': 16.2.7 '@swc/helpers': 0.5.15 - baseline-browser-mapping: 2.10.29 - caniuse-lite: 1.0.30001792 + baseline-browser-mapping: 2.10.34 + caniuse-lite: 1.0.30001797 postcss: 8.5.15 - react: 19.2.6 - react-dom: 19.2.6(react@19.2.6) - styled-jsx: 5.1.6(react@19.2.6) + react: 19.2.7 + react-dom: 19.2.7(react@19.2.7) + styled-jsx: 5.1.6(react@19.2.7) optionalDependencies: - '@next/swc-darwin-arm64': 16.2.6 - '@next/swc-darwin-x64': 16.2.6 - '@next/swc-linux-arm64-gnu': 16.2.6 - '@next/swc-linux-arm64-musl': 16.2.6 - '@next/swc-linux-x64-gnu': 16.2.6 - '@next/swc-linux-x64-musl': 16.2.6 - '@next/swc-win32-arm64-msvc': 16.2.6 - '@next/swc-win32-x64-msvc': 16.2.6 + '@next/swc-darwin-arm64': 16.2.7 + '@next/swc-darwin-x64': 16.2.7 + '@next/swc-linux-arm64-gnu': 16.2.7 + '@next/swc-linux-arm64-musl': 16.2.7 + '@next/swc-linux-x64-gnu': 16.2.7 + '@next/swc-linux-x64-musl': 16.2.7 + '@next/swc-win32-arm64-msvc': 16.2.7 + '@next/swc-win32-x64-msvc': 16.2.7 sharp: 0.34.5 transitivePeerDependencies: - '@babel/core' @@ -2169,17 +2169,17 @@ snapshots: iconv-lite: 0.7.2 unpipe: 1.0.0 - react-dom@19.2.6(react@19.2.6): + react-dom@19.2.7(react@19.2.7): dependencies: - react: 19.2.6 + react: 19.2.7 scheduler: 0.27.0 - react-reconciler@0.33.0(react@19.2.6): + react-reconciler@0.33.0(react@19.2.7): dependencies: - react: 19.2.6 + react: 19.2.7 scheduler: 0.27.0 - react@19.2.6: {} + react@19.2.7: {} readdirp@5.0.0: {} @@ -2204,7 +2204,7 @@ snapshots: scheduler@0.27.0: {} - semver@7.8.0: + semver@7.8.2: optional: true send@1.2.1: @@ -2238,7 +2238,7 @@ snapshots: dependencies: '@img/colour': 1.1.0 detect-libc: 2.1.2 - semver: 7.8.0 + semver: 7.8.2 optionalDependencies: '@img/sharp-darwin-arm64': 0.34.5 '@img/sharp-darwin-x64': 0.34.5 @@ -2326,10 +2326,10 @@ snapshots: dependencies: ansi-regex: 6.2.2 - styled-jsx@5.1.6(react@19.2.6): + styled-jsx@5.1.6(react@19.2.7): dependencies: client-only: 0.0.1 - react: 19.2.6 + react: 19.2.7 tagged-tag@1.0.0: {} From a5c1bf1e1c715aedac9e2783b37c6c40661883ed Mon Sep 17 00:00:00 2001 From: Nico Date: Sun, 7 Jun 2026 09:03:25 +0200 Subject: [PATCH 03/63] Feat/portable logic foundation (#389) * Add portable expression logic foundation * Format portable expression foundation changes From e5cbb349385e778f3d07dc6ad7acaff26999ba31 Mon Sep 17 00:00:00 2001 From: cukas Date: Sun, 7 Jun 2026 09:05:20 +0200 Subject: [PATCH 04/63] Fix guard typecheck findings --- packages/core/src/ir/semantics/expression-v1.ts | 13 +++++++++++-- packages/python/src/codegen-body-python.ts | 1 + 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/packages/core/src/ir/semantics/expression-v1.ts b/packages/core/src/ir/semantics/expression-v1.ts index 166439d1..699facd7 100644 --- a/packages/core/src/ir/semantics/expression-v1.ts +++ b/packages/core/src/ir/semantics/expression-v1.ts @@ -3,7 +3,7 @@ */ import { parseExpression } from '../../parser-expression.js'; -import { type IRNode, isExprObject } from '../../types.js'; +import type { IRNode } from '../../types.js'; import { type NodeContract, type NodeFixture, registerContract, type SemanticEnv } from './index.js'; import { evalPortableValue, isPortableBindingName } from './portable-scalar.js'; import type { Trace } from './trace.js'; @@ -17,9 +17,18 @@ function asExpressionV1Props(ir: IRNode): ExpressionV1Props { return (ir.props ?? {}) as ExpressionV1Props; } +function hasExpressionCode(expr: unknown): expr is { __expr: true; code: string } { + return ( + typeof expr === 'object' && + expr !== null && + (expr as { __expr?: unknown }).__expr === true && + typeof (expr as { code?: unknown }).code === 'string' + ); +} + function expressionSource(expr: unknown): string | undefined { if (expr === undefined || expr === null) return undefined; - if (isExprObject(expr)) return expr.code; + if (hasExpressionCode(expr)) return expr.code; return String(expr); } diff --git a/packages/python/src/codegen-body-python.ts b/packages/python/src/codegen-body-python.ts index 1875165c..4fb213fa 100644 --- a/packages/python/src/codegen-body-python.ts +++ b/packages/python/src/codegen-body-python.ts @@ -1877,6 +1877,7 @@ function emitPyExprCtx(node: ValueIR, ctx: BodyEmitContext): string { `Mid-expression \`${node.op}\` is rejected — bind the call to a \`let\` first, then use the bound name.`, ); } + throw new Error(`emitPyExpression: unsupported expression kind '${(node as { kind?: string }).kind ?? 'unknown'}'.`); } function emitPyTypeof(argument: ValueIR, ctx: BodyEmitContext): string { From 270d442e63c406dcbb988b87fa69b42b374cdf5a Mon Sep 17 00:00:00 2001 From: cukas Date: Sun, 7 Jun 2026 12:11:13 +0200 Subject: [PATCH 05/63] Add KERN core runtime foundation --- packages/core/src/core-runtime/index.ts | 747 +++++++++++++++++++++++ packages/core/src/index.ts | 25 + packages/core/tests/core-runtime.test.ts | 329 ++++++++++ 3 files changed, 1101 insertions(+) create mode 100644 packages/core/src/core-runtime/index.ts create mode 100644 packages/core/tests/core-runtime.test.ts diff --git a/packages/core/src/core-runtime/index.ts b/packages/core/src/core-runtime/index.ts new file mode 100644 index 00000000..3498569b --- /dev/null +++ b/packages/core/src/core-runtime/index.ts @@ -0,0 +1,747 @@ +import { parseExpression } from '../parser-expression.js'; +import { splitPortableExpressionList } from '../portable-expression-list.js'; +import type { IRNode } from '../types.js'; +import type { ValueIR } from '../value-ir.js'; + +const KERN_VALUE_BRAND: unique symbol = Symbol('KERN core runtime value'); +const INTEGER_INDEX_RE = /^(0|[1-9]\d*)$/; + +export type KernValue = + | { kind: 'null' } + | { kind: 'undefined' } + | { kind: 'boolean'; value: boolean } + | { kind: 'number'; value: number } + | { kind: 'string'; value: string } + | { kind: 'array'; items: KernValue[] } + | { kind: 'record'; entries: Record } + | KernFunctionValue + | KernBuiltinValue; + +export interface KernFunctionValue { + kind: 'function'; + name?: string; + params: RuntimeParam[]; + body: IRNode[]; + env: CoreRuntimeEnv; +} + +export interface KernBuiltinValue { + kind: 'builtin'; + name: string; + call: (args: KernValue[]) => KernValue; +} + +export interface RuntimeParam { + name: string; + type?: string; + defaultExpr?: string; +} + +export type CoreCompletion = { kind: 'normal'; value: KernValue } | { kind: 'return'; value: KernValue }; + +export interface CoreRuntimeResult { + completion: CoreCompletion; + env: CoreRuntimeEnv; +} + +export interface CreateCoreRuntimeEnvOptions { + globals?: Record; + parent?: CoreRuntimeEnv; +} + +export class CoreRuntimeEnv { + private readonly bindings = new Map(); + + constructor(readonly parent?: CoreRuntimeEnv) {} + + define(name: string, value: KernValue): KernValue { + if (this.bindings.has(name)) throw new Error(`KERN core runtime binding already defined: ${name}`); + this.bindings.set(name, value); + return value; + } + + lookup(name: string): KernValue { + if (this.bindings.has(name)) return this.bindings.get(name) ?? kUndefined(); + if (this.parent) return this.parent.lookup(name); + throw new Error(`KERN core runtime binding not found: ${name}`); + } + + has(name: string): boolean { + return this.bindings.has(name) || (this.parent?.has(name) ?? false); + } + + child(): CoreRuntimeEnv { + return new CoreRuntimeEnv(this); + } +} + +export const kNull = (): KernValue => brandValue({ kind: 'null' }); +export const kUndefined = (): KernValue => brandValue({ kind: 'undefined' }); +export const kBoolean = (value: boolean): KernValue => brandValue({ kind: 'boolean', value }); +export const kNumber = (value: number): KernValue => { + if (!Number.isFinite(value)) throw new Error('KERN core runtime number must be finite.'); + return brandValue({ kind: 'number', value }); +}; +export const kString = (value: string): KernValue => brandValue({ kind: 'string', value }); + +export function createCoreRuntimeEnv(options: CreateCoreRuntimeEnvOptions = {}): CoreRuntimeEnv { + const env = new CoreRuntimeEnv(options.parent); + for (const [name, value] of Object.entries(options.globals ?? {})) env.define(name, fromHostValue(value)); + installPortableBuiltins(env); + return env; +} + +function installPortableBuiltins(env: CoreRuntimeEnv): void { + for (const builtin of [ + { + kind: 'builtin' as const, + name: 'String', + call: (args: KernValue[]) => { + if (args.length !== 1) throw new Error('KERN core runtime String() expects exactly one argument.'); + return kString(kernStringCoerce(args[0])); + }, + }, + ]) { + if (!env.has(builtin.name)) env.define(builtin.name, brandValue(builtin)); + } +} + +export function fromHostValue(value: unknown): KernValue { + if (isKernValue(value)) return value; + if (value === null) return kNull(); + if (value === undefined) return kUndefined(); + if (typeof value === 'boolean') return kBoolean(value); + if (typeof value === 'number') return kNumber(value); + if (typeof value === 'string') return kString(value); + if (Array.isArray(value)) return brandValue({ kind: 'array', items: Array.from(value, fromHostValue) }); + if (isPlainRecord(value)) { + const entries = createRecordEntries(); + for (const [key, entry] of Object.entries(value)) entries[key] = fromHostValue(entry); + return brandValue({ + kind: 'record', + entries, + }); + } + throw new Error(`Unsupported host value for KERN core runtime: ${typeof value}`); +} + +export function toHostValue(value: KernValue | undefined): unknown { + if (value === undefined) return undefined; + switch (value.kind) { + case 'null': + return null; + case 'undefined': + return undefined; + case 'boolean': + case 'number': + case 'string': + return value.value; + case 'array': + return value.items.map(toHostValue); + case 'record': + return Object.fromEntries(Object.entries(value.entries).map(([key, entry]) => [key, toHostValue(entry)])); + case 'function': + case 'builtin': + return `[KERN ${value.kind}${value.name ? ` ${value.name}` : ''}]`; + } +} + +export function kernTruthy(value: KernValue): boolean { + switch (value.kind) { + case 'null': + case 'undefined': + return false; + case 'boolean': + return value.value; + case 'number': + return value.value !== 0; + case 'string': + return value.value.length > 0; + case 'array': + case 'record': + case 'function': + case 'builtin': + return true; + } +} + +export function evalCoreExpression(expr: string | ValueIR, env: CoreRuntimeEnv = createCoreRuntimeEnv()): KernValue { + installPortableBuiltins(env); + const valueIR = typeof expr === 'string' ? parseExpression(expr) : expr; + return evalValueIR(valueIR, env); +} + +export function runCoreRuntime( + nodeOrNodes: IRNode | readonly IRNode[], + env = createCoreRuntimeEnv(), +): CoreRuntimeResult { + const nodes: readonly IRNode[] = isIRNodeArray(nodeOrNodes) ? nodeOrNodes : runtimeChildren(nodeOrNodes); + return { completion: executeSequence(nodes, env), env }; +} + +export function callCoreFunction( + fnNode: IRNode, + args: KernValue[], + env = createCoreRuntimeEnv(), +): { value: KernValue; env: CoreRuntimeEnv } { + if (fnNode.type !== 'fn') throw new Error('KERN core runtime callCoreFunction expects an fn node.'); + const fn = makeFunction(fnNode, env); + return callFunctionValue(fn, args); +} + +function executeSequence(nodes: readonly IRNode[], env: CoreRuntimeEnv): CoreCompletion { + for (let i = 0; i < nodes.length; i += 1) { + const node = nodes[i]; + if (node.type === 'else') throw new Error('KERN core runtime `else` must immediately follow an `if`.'); + if (node.type === 'if') { + const completion = executeIf(node, nodes[i + 1], env); + if (nodes[i + 1]?.type === 'else') i += 1; + if (completion.kind !== 'normal') return completion; + continue; + } + const completion = executeNode(node, env); + if (completion.kind !== 'normal') return completion; + } + return { kind: 'normal', value: kUndefined() }; +} + +function executeNode(node: IRNode, env: CoreRuntimeEnv): CoreCompletion { + switch (node.type) { + case 'handler': + case '__block': + return executeSequence(node.children ?? [], env); + case 'let': + case 'expression-v1': { + const name = requiredString(node.props?.name, `${node.type} name=`); + const rawExpr = node.type === 'let' ? node.props?.value : node.props?.expr; + env.define(name, evalCoreExpression(unwrapExpr(rawExpr, `${node.type} expression`), env)); + return { kind: 'normal', value: kUndefined() }; + } + case 'return': { + if (node.props && Object.hasOwn(node.props, 'value')) { + return { kind: 'return', value: evalCoreExpression(unwrapExpr(node.props.value, 'return value='), env) }; + } + return { kind: 'return', value: kUndefined() }; + } + case 'fn': { + const fn = makeFunction(node, env); + env.define(requiredString(node.props?.name, 'fn name='), fn); + return { kind: 'normal', value: kUndefined() }; + } + case 'coalesce': + case 'firstDefined': + return executeCoalesce(node, env); + case 'firstTruthy': + return executeFirstTruthy(node, env); + default: + throw new Error(`KERN core runtime unsupported node type: ${node.type}`); + } +} + +function executeIf(node: IRNode, maybeElse: IRNode | undefined, env: CoreRuntimeEnv): CoreCompletion { + const cond = evalCoreExpression(unwrapExpr(node.props?.cond, 'if cond='), env); + if (kernTruthy(cond)) return executeSequence(node.children ?? [], env.child()); + if (maybeElse?.type === 'else') return executeSequence(maybeElse.children ?? [], env.child()); + return { kind: 'normal', value: kUndefined() }; +} + +function executeCoalesce(node: IRNode, env: CoreRuntimeEnv): CoreCompletion { + const name = requiredString(node.props?.name, `${node.type} name=`); + const values = splitPortableExpressionList( + requiredString(node.props?.values, `${node.type} values=`), + `${node.type} values=`, + ); + if (values.length < 2) throw new Error(`KERN core runtime ${node.type} requires at least two values.`); + let winner = kUndefined(); + for (const value of values) { + const candidate = evalCoreExpression(value, env); + if (!isNullish(candidate)) { + winner = candidate; + break; + } + } + env.define(name, winner); + return { kind: 'normal', value: kUndefined() }; +} + +function executeFirstTruthy(node: IRNode, env: CoreRuntimeEnv): CoreCompletion { + const name = requiredString(node.props?.name, 'firstTruthy name='); + const values = splitPortableExpressionList( + requiredString(node.props?.values, 'firstTruthy values='), + 'firstTruthy values=', + ); + if (values.length < 2) throw new Error('KERN core runtime firstTruthy requires at least two values.'); + let winner = kUndefined(); + for (const value of values) { + const candidate = evalCoreExpression(value, env); + if (kernTruthy(candidate)) { + winner = candidate; + break; + } + } + env.define(name, winner); + return { kind: 'normal', value: kUndefined() }; +} + +function evalValueIR(node: ValueIR, env: CoreRuntimeEnv): KernValue { + switch (node.kind) { + case 'numLit': + if (node.bigint) throw new Error('KERN core runtime bigint literals are not supported yet.'); + return kNumber(node.value); + case 'strLit': + return kString(node.value); + case 'boolLit': + return kBoolean(node.value); + case 'nullLit': + return kNull(); + case 'undefLit': + return kUndefined(); + case 'ident': + return env.lookup(node.name); + case 'tmplLit': + return kString( + node.quasis.reduce((out, quasi, index) => { + const expr = + index < node.expressions.length ? kernStringCoerce(evalValueIR(node.expressions[index], env)) : ''; + return out + quasi + expr; + }, ''), + ); + case 'arrayLit': + return brandValue({ kind: 'array', items: node.items.map((item) => evalValueIR(item, env)) }); + case 'objectLit': + return evalObjectLiteral(node, env); + case 'unary': + return evalUnary(node, env); + case 'binary': + return evalBinary(node, env); + case 'conditional': + return kernTruthy(evalValueIR(node.test, env)) + ? evalValueIR(node.consequent, env) + : evalValueIR(node.alternate, env); + case 'typeAssert': + case 'nonNull': + return evalValueIR(node.expression, env); + case 'member': + return evalMember(node, env); + case 'index': + return evalIndex(node, env); + case 'call': + return evalCall(node, env); + case 'lambda': + throw new Error('KERN core runtime lambda expressions are not supported in the first runtime slice.'); + default: + throw new Error(`KERN core runtime unsupported expression kind: ${node.kind}`); + } +} + +function evalObjectLiteral(node: Extract, env: CoreRuntimeEnv): KernValue { + const entries = createRecordEntries(); + for (const entry of node.entries) { + if (isObjectSpreadEntry(entry)) { + const spread = evalValueIR(entry.argument, env); + if (spread.kind !== 'record') throw new Error('KERN core runtime object spread requires a record.'); + for (const [key, value] of Object.entries(spread.entries)) entries[key] = value; + } else { + entries[entry.key] = evalValueIR(entry.value, env); + } + } + return brandValue({ kind: 'record', entries }); +} + +function evalUnary(node: Extract, env: CoreRuntimeEnv): KernValue { + const arg = evalValueIR(node.argument, env); + if (node.op === '!') return kBoolean(!kernTruthy(arg)); + if (node.op === '-' || node.op === '+') { + if (arg.kind !== 'number') throw new Error(`KERN core runtime unary ${node.op} requires a number.`); + return kNumber(node.op === '-' ? -arg.value : arg.value); + } + throw new Error(`KERN core runtime unsupported unary operator: ${node.op}`); +} + +function evalBinary(node: Extract, env: CoreRuntimeEnv): KernValue { + if (node.op === '&&') { + const left = evalValueIR(node.left, env); + return kernTruthy(left) ? evalValueIR(node.right, env) : left; + } + if (node.op === '||') { + const left = evalValueIR(node.left, env); + return kernTruthy(left) ? left : evalValueIR(node.right, env); + } + if (node.op === '??') { + const left = evalValueIR(node.left, env); + return isNullish(left) ? evalValueIR(node.right, env) : left; + } + + const left = evalValueIR(node.left, env); + const right = evalValueIR(node.right, env); + switch (node.op) { + case '+': + if (left.kind === 'number' && right.kind === 'number') return kNumber(left.value + right.value); + if (left.kind === 'string' && right.kind === 'string') return kString(left.value + right.value); + throw new Error('KERN core runtime + requires two numbers or two strings.'); + case '-': + case '*': + case '/': + case '%': + return evalNumberBinary(node.op, left, right); + case '===': + case '==': + return kBoolean(kernEquals(left, right)); + case '!==': + case '!=': + return kBoolean(!kernEquals(left, right)); + case '<': + case '<=': + case '>': + case '>=': + return evalOrderedComparison(node.op, left, right); + default: + throw new Error(`KERN core runtime unsupported binary operator: ${node.op}`); + } +} + +function evalNumberBinary(op: string, left: KernValue, right: KernValue): KernValue { + if (left.kind !== 'number' || right.kind !== 'number') { + throw new Error(`KERN core runtime ${op} requires two numbers.`); + } + if (op === '-') return kNumber(left.value - right.value); + if (op === '*') return kNumber(left.value * right.value); + if (right.value === 0 && (op === '/' || op === '%')) throw new Error(`KERN core runtime ${op} division by zero.`); + if (op === '/') return kNumber(left.value / right.value); + return kNumber(left.value % right.value); +} + +function evalOrderedComparison(op: string, left: KernValue, right: KernValue): KernValue { + if (!((left.kind === 'number' && right.kind === 'number') || (left.kind === 'string' && right.kind === 'string'))) { + throw new Error(`KERN core runtime ${op} requires same-kind number or string operands.`); + } + if (op === '<') return kBoolean(left.value < right.value); + if (op === '<=') return kBoolean(left.value <= right.value); + if (op === '>') return kBoolean(left.value > right.value); + return kBoolean(left.value >= right.value); +} + +function evalMember(node: Extract, env: CoreRuntimeEnv): KernValue { + const object = evalValueIR(node.object, env); + if (isNullish(object)) { + if (node.optional) return kUndefined(); + throw new Error(`KERN core runtime cannot read .${node.property} from ${object.kind}.`); + } + if (object.kind === 'record') { + return Object.hasOwn(object.entries, node.property) ? object.entries[node.property] : kUndefined(); + } + if (object.kind === 'array' && node.property === 'length') return kNumber(object.items.length); + if (object.kind === 'string' && node.property === 'length') return kNumber(object.value.length); + return kUndefined(); +} + +function evalIndex(node: Extract, env: CoreRuntimeEnv): KernValue { + const object = evalValueIR(node.object, env); + if (isNullish(object)) { + if (node.optional) return kUndefined(); + throw new Error(`KERN core runtime cannot index ${object.kind}.`); + } + const index = evalValueIR(node.index, env); + if (object.kind === 'array') { + if (index.kind !== 'number') throw new Error('KERN core runtime array index must be a number.'); + return object.items[index.value] ?? kUndefined(); + } + if (object.kind === 'record' || object.kind === 'string') { + if (index.kind !== 'string' && index.kind !== 'number') { + throw new Error('KERN core runtime record/string index must be a string or number.'); + } + const key = String(index.value); + if (object.kind === 'record') return Object.hasOwn(object.entries, key) ? object.entries[key] : kUndefined(); + const charIndex = + index.kind === 'number' ? index.value : INTEGER_INDEX_RE.test(index.value) ? Number(index.value) : NaN; + return Number.isInteger(charIndex) && charIndex >= 0 && charIndex < object.value.length + ? kString(object.value[charIndex] ?? '') + : kUndefined(); + } + return kUndefined(); +} + +function evalCall(node: Extract, env: CoreRuntimeEnv): KernValue { + const callee = evalValueIR(node.callee, env); + if (isNullish(callee)) { + if (node.optional) return kUndefined(); + throw new Error(`KERN core runtime cannot call ${callee.kind}.`); + } + const args = node.args.map((arg) => evalValueIR(arg, env)); + if (callee.kind === 'builtin') return callee.call(args); + if (callee.kind === 'function') return callFunctionValue(callee, args).value; + throw new Error(`KERN core runtime cannot call ${callee.kind}.`); +} + +function makeFunction(node: IRNode, env: CoreRuntimeEnv): KernFunctionValue { + return brandValue({ + kind: 'function', + name: requiredString(node.props?.name, 'fn name='), + params: runtimeParams(node), + body: runtimeFunctionBody(node), + env, + }); +} + +function callFunctionValue( + fn: KernFunctionValue, + args: readonly KernValue[], +): { value: KernValue; env: CoreRuntimeEnv } { + const callEnv = fn.env.child(); + fn.params.forEach((param, index) => { + const provided = args[index]; + const value = + provided === undefined || (provided.kind === 'undefined' && param.defaultExpr) + ? param.defaultExpr + ? evalCoreExpression(param.defaultExpr, callEnv) + : kUndefined() + : provided; + callEnv.define(param.name, value); + }); + const completion = executeSequence(fn.body, callEnv); + return { value: completion.value, env: callEnv }; +} + +function runtimeFunctionBody(node: IRNode): IRNode[] { + const handler = node.children?.find((child) => child.type === 'handler'); + const body = handler ? (handler.children ?? []) : (node.children ?? []); + return body.filter((child) => child.type !== 'param' && child.type !== 'decorator'); +} + +function runtimeChildren(node: IRNode): IRNode[] { + if (node.type === 'handler' || node.type === '__block') return node.children ?? []; + return [node]; +} + +function runtimeParams(node: IRNode): RuntimeParam[] { + const childParams = + node.children + ?.filter((child) => child.type === 'param') + .map((child) => ({ + name: requiredString(child.props?.name, 'param name='), + type: typeof child.props?.type === 'string' ? child.props.type : undefined, + defaultExpr: runtimeParamDefaultExpr(child), + })) ?? []; + if (childParams.length > 0) return childParams; + + const raw = typeof node.props?.params === 'string' ? node.props.params : ''; + if (!raw.trim()) return []; + return splitPortableExpressionList(raw, 'fn params=').map((part) => { + const defaultIndex = findRuntimeDefaultSeparator(part); + const beforeDefault = defaultIndex >= 0 ? part.slice(0, defaultIndex) : part; + const defaultExpr = defaultIndex >= 0 ? part.slice(defaultIndex + 1).trim() : undefined; + const typeIndex = beforeDefault.indexOf(':'); + const name = typeIndex >= 0 ? beforeDefault.slice(0, typeIndex) : beforeDefault; + const type = typeIndex >= 0 ? beforeDefault.slice(typeIndex + 1) : ''; + return { + name: requiredString(name.trim(), 'param name='), + type: type.trim() || undefined, + defaultExpr: defaultExpr || undefined, + }; + }); +} + +function runtimeParamDefaultExpr(node: IRNode): string | undefined { + const propName = Object.hasOwn(node.props ?? {}, 'value') ? 'value' : 'default'; + const rawValue = propName === 'value' ? node.props?.value : node.props?.default; + if (rawValue === undefined || rawValue === null) return undefined; + if (typeof rawValue === 'string' && (node.__quotedProps ?? []).includes(propName)) return JSON.stringify(rawValue); + return unwrapExpr(rawValue, 'param value='); +} + +function unwrapExpr(value: unknown, label: string): string { + if (typeof value === 'string') return value; + if (isExprObject(value)) return value.code; + if (value === undefined || value === null) throw new Error(`KERN core runtime missing ${label}.`); + if (typeof value === 'number' || typeof value === 'boolean') return String(value); + throw new Error(`KERN core runtime ${label} must be a string expression.`); +} + +function requiredString(value: unknown, label: string): string { + if (typeof value !== 'string' || value.length === 0) throw new Error(`KERN core runtime requires ${label}.`); + return value; +} + +function kernStringCoerce(value: KernValue): string { + if (value.kind === 'null') return 'null'; + if (value.kind === 'undefined') return 'undefined'; + if (value.kind === 'boolean') return value.value ? 'true' : 'false'; + if (value.kind === 'number') return String(value.value); + if (value.kind === 'string') return value.value; + return String(toHostValue(value)); +} + +function kernEquals(left: KernValue, right: KernValue): boolean { + if (left.kind !== right.kind) return false; + switch (left.kind) { + case 'null': + case 'undefined': + return true; + case 'boolean': + return left.value === (right as Extract).value; + case 'number': + return left.value === (right as Extract).value; + case 'string': + return left.value === (right as Extract).value; + case 'array': { + const rightArray = right as Extract; + return ( + left.items.length === rightArray.items.length && + left.items.every((item, i) => kernEquals(item, rightArray.items[i])) + ); + } + case 'record': { + const rightRecord = right as Extract; + const leftKeys = Object.keys(left.entries); + const rightKeys = Object.keys(rightRecord.entries); + return ( + leftKeys.length === rightKeys.length && + leftKeys.every( + (key) => Object.hasOwn(rightRecord.entries, key) && kernEquals(left.entries[key], rightRecord.entries[key]), + ) + ); + } + case 'function': + case 'builtin': + return left === right; + } +} + +function isNullish(value: KernValue): boolean { + return value.kind === 'null' || value.kind === 'undefined'; +} + +function isKernValue(value: unknown): value is KernValue { + if ( + !isPlainRecord(value) || + (value as { [KERN_VALUE_BRAND]?: true })[KERN_VALUE_BRAND] !== true || + typeof value.kind !== 'string' + ) { + return false; + } + switch (value.kind) { + case 'null': + case 'undefined': + return hasOnlyKeys(value, ['kind']); + case 'boolean': + return hasOnlyKeys(value, ['kind', 'value']) && typeof value.value === 'boolean'; + case 'number': + return hasOnlyKeys(value, ['kind', 'value']) && typeof value.value === 'number' && Number.isFinite(value.value); + case 'string': + return hasOnlyKeys(value, ['kind', 'value']) && typeof value.value === 'string'; + case 'array': + return ( + hasOnlyKeys(value, ['kind', 'items']) && + Array.isArray(value.items) && + !hasArrayHoles(value.items) && + value.items.every(isKernValue) + ); + case 'record': + return ( + hasOnlyKeys(value, ['kind', 'entries']) && + isPlainRecord(value.entries) && + Object.values(value.entries).every(isKernValue) + ); + case 'function': + return ( + hasOnlyKeys(value, ['kind', 'params', 'body', 'env'], ['name']) && + (value.name === undefined || typeof value.name === 'string') && + Array.isArray(value.params) && + Array.isArray(value.body) && + value.env instanceof CoreRuntimeEnv + ); + case 'builtin': + return ( + hasOnlyKeys(value, ['kind', 'name', 'call']) && + typeof value.name === 'string' && + typeof value.call === 'function' + ); + default: + return false; + } +} + +function brandValue(value: T): T { + Object.defineProperty(value, KERN_VALUE_BRAND, { value: true }); + return value; +} + +function hasArrayHoles(value: readonly unknown[]): boolean { + for (let i = 0; i < value.length; i += 1) { + if (!Object.hasOwn(value, i)) return true; + } + return false; +} + +function hasOnlyKeys( + value: Record, + required: readonly string[], + optional: readonly string[] = [], +): boolean { + const allowed = new Set([...required, ...optional]); + const keys = Object.keys(value); + return required.every((key) => Object.hasOwn(value, key)) && keys.every((key) => allowed.has(key)); +} + +function createRecordEntries(): Record { + return Object.create(null) as Record; +} + +function isPlainRecord(value: unknown): value is Record { + if (typeof value !== 'object' || value === null) return false; + const proto = Object.getPrototypeOf(value); + return proto === Object.prototype || proto === null; +} + +function findRuntimeDefaultSeparator(value: string): number { + let depth = 0; + let quote: '"' | "'" | '`' | '' = ''; + let escaped = false; + for (let i = 0; i < value.length; i += 1) { + const ch = value[i]; + if (quote) { + if (escaped) { + escaped = false; + } else if (ch === '\\') { + escaped = true; + } else if (ch === quote) { + quote = ''; + } + continue; + } + if (ch === '"' || ch === "'" || ch === '`') { + quote = ch; + continue; + } + if (ch === '<' || ch === '(' || ch === '{' || ch === '[') depth += 1; + else if ((ch === '>' || ch === ')' || ch === '}' || ch === ']') && depth > 0) depth -= 1; + else if (ch === '=' && depth === 0) { + if ( + value[i + 1] === '>' || + value[i + 1] === '=' || + value[i - 1] === '=' || + value[i - 1] === '<' || + value[i - 1] === '>' || + value[i - 1] === '!' + ) { + continue; + } + return i; + } + } + return -1; +} + +function isExprObject(value: unknown): value is { __expr: true; code: string } { + return isPlainRecord(value) && value.__expr === true && typeof value.code === 'string'; +} + +function isIRNodeArray(value: IRNode | readonly IRNode[]): value is readonly IRNode[] { + return Array.isArray(value); +} + +function isObjectSpreadEntry( + entry: Extract['entries'][number], +): entry is { kind: 'spread'; argument: ValueIR } { + return 'kind' in entry && entry.kind === 'spread'; +} diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index d6b7b160..6cf5b14c 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -140,6 +140,31 @@ export { VALID_STRUCTURES, VALID_TARGETS, } from './config.js'; +export type { + CoreCompletion, + CoreRuntimeResult, + CreateCoreRuntimeEnvOptions, + KernBuiltinValue, + KernFunctionValue, + KernValue, + RuntimeParam, +} from './core-runtime/index.js'; +// KERN Core Runtime +export { + CoreRuntimeEnv, + callCoreFunction, + createCoreRuntimeEnv, + evalCoreExpression, + fromHostValue, + kBoolean, + kernTruthy, + kNull, + kNumber, + kString, + kUndefined, + runCoreRuntime, + toHostValue, +} from './core-runtime/index.js'; export type { CoverageGap } from './coverage-gap.js'; // Coverage gap emitter (v3) export { collectCoverageGaps, readCoverageGaps, writeCoverageGaps } from './coverage-gap.js'; diff --git a/packages/core/tests/core-runtime.test.ts b/packages/core/tests/core-runtime.test.ts new file mode 100644 index 00000000..6ded098a --- /dev/null +++ b/packages/core/tests/core-runtime.test.ts @@ -0,0 +1,329 @@ +import { + CoreRuntimeEnv, + callCoreFunction, + createCoreRuntimeEnv, + evalCoreExpression, + fromHostValue, + kBoolean, + kernTruthy, + kNull, + kNumber, + kString, + kUndefined, + runCoreRuntime, + toHostValue, +} from '../src/index.js'; +import type { IRNode } from '../src/types.js'; + +function handler(children: IRNode[]): IRNode { + return { type: 'handler', props: { lang: 'kern' }, children }; +} + +describe('KERN core runtime values and expressions', () => { + test('truthiness is owned by KERN values', () => { + expect(kernTruthy(kNull())).toBe(false); + expect(kernTruthy(kUndefined())).toBe(false); + expect(kernTruthy(kBoolean(false))).toBe(false); + expect(kernTruthy(kNumber(0))).toBe(false); + expect(kernTruthy(kString(''))).toBe(false); + expect(kernTruthy(kString('x'))).toBe(true); + }); + + test('String(value) uses KERN coercion, not host spelling', () => { + const env = createCoreRuntimeEnv({ + globals: { + n: 12, + none: null, + yes: true, + no: false, + }, + }); + expect(toHostValue(evalCoreExpression('String(n)', env))).toBe('12'); + expect(toHostValue(evalCoreExpression('String(none)', env))).toBe('null'); + expect(toHostValue(evalCoreExpression('String(yes)', env))).toBe('true'); + expect(toHostValue(evalCoreExpression('String(no)', env))).toBe('false'); + }); + + test('null and undefined are distinct but both nullish', () => { + expect(toHostValue(kNull())).toBeNull(); + expect(toHostValue(kUndefined())).toBeUndefined(); + const env = createCoreRuntimeEnv({ globals: { a: undefined, b: null, c: 5 } }); + expect(toHostValue(evalCoreExpression('a ?? c', env))).toBe(5); + expect(toHostValue(evalCoreExpression('b ?? c', env))).toBe(5); + }); + + test('plain host records with kind fields are not mistaken for KERN values', () => { + const value = fromHostValue({ kind: 'trap', label: 'Trap' }); + expect(toHostValue(value)).toEqual({ kind: 'trap', label: 'Trap' }); + expect(toHostValue(fromHostValue({ kind: 'null', label: 'Trap' }))).toEqual({ kind: 'null', label: 'Trap' }); + expect(toHostValue(fromHostValue({ kind: 'string', value: 'x', label: 'Trap' }))).toEqual({ + kind: 'string', + label: 'Trap', + value: 'x', + }); + expect(toHostValue(fromHostValue({ kind: 'string', value: 'door' }))).toEqual({ kind: 'string', value: 'door' }); + }); + + test('record maps use own properties only', () => { + const value = fromHostValue({ a: 1 }); + if (value.kind !== 'record') throw new Error('expected record value'); + expect(Object.getPrototypeOf(value.entries)).toBeNull(); + const env = createCoreRuntimeEnv({ globals: { record: value } }); + expect(toHostValue(evalCoreExpression('record.a', env))).toBe(1); + expect(toHostValue(evalCoreExpression('record.toString', env))).toBeUndefined(); + }); + + test('sparse host arrays become dense KERN arrays with undefined entries', () => { + const host = [] as unknown[]; + host[1] = 'set'; + expect(toHostValue(fromHostValue(host))).toEqual([undefined, 'set']); + }); + + test('caller-created envs still get portable builtins for expression evaluation', () => { + const env = new CoreRuntimeEnv(); + env.define('flag', kBoolean(false)); + expect(toHostValue(evalCoreExpression('String(flag)', env))).toBe('false'); + }); + + test('structural equality preserves undefined/null distinctions in arrays and records', () => { + const env = createCoreRuntimeEnv({ + globals: { + xs: [undefined], + ys: [null], + a: { value: undefined }, + b: {}, + }, + }); + expect(toHostValue(evalCoreExpression('xs === ys', env))).toBe(false); + expect(toHostValue(evalCoreExpression('a === b', env))).toBe(false); + }); + + test('string index misses return KERN undefined', () => { + const env = createCoreRuntimeEnv({ globals: { label: 'ab' } }); + expect(toHostValue(evalCoreExpression('label[1]', env))).toBe('b'); + expect(toHostValue(evalCoreExpression('label["1"]', env))).toBe('b'); + expect(toHostValue(evalCoreExpression('label[4]', env))).toBeUndefined(); + expect(toHostValue(evalCoreExpression('label[""]', env))).toBeUndefined(); + expect(toHostValue(evalCoreExpression('label["1.0"]', env))).toBeUndefined(); + }); + + test('optional index skips unresolved index expressions for nullish objects', () => { + const env = createCoreRuntimeEnv({ globals: { maybe: null } }); + expect(toHostValue(evalCoreExpression('maybe?.[missingName]', env))).toBeUndefined(); + }); + + test('optional calls skip unresolved argument expressions for nullish callees', () => { + const env = createCoreRuntimeEnv({ globals: { maybeFn: null } }); + expect(toHostValue(evalCoreExpression('maybeFn?.(missingName)', env))).toBeUndefined(); + }); + + test('division by zero fails with a KERN runtime diagnostic', () => { + const env = createCoreRuntimeEnv(); + expect(() => evalCoreExpression('4 / 0', env)).toThrow(/division by zero/); + expect(() => evalCoreExpression('4 % 0', env)).toThrow(/division by zero/); + }); +}); + +describe('KERN core runtime statements', () => { + test('runs let, expression-v1, and return', () => { + const result = runCoreRuntime( + handler([ + { type: 'let', props: { name: 'count', value: '41' } }, + { type: 'expression-v1', props: { name: 'label', expr: '`n=${count + 1}`' } }, + { type: 'return', props: { value: 'label' } }, + ]), + ); + expect(result.completion.kind).toBe('return'); + expect(toHostValue(result.completion.value)).toBe('n=42'); + }); + + test('if/else executes only the selected branch and block-local lets do not leak', () => { + const result = runCoreRuntime( + handler([ + { type: 'let', props: { name: 'x', value: '1' } }, + { type: 'if', props: { cond: 'false' }, children: [{ type: 'let', props: { name: 'x', value: '2' } }] }, + { type: 'else', children: [{ type: 'let', props: { name: 'y', value: '3' } }] }, + { type: 'return', props: { value: 'x' } }, + ]), + ); + expect(toHostValue(result.completion.value)).toBe(1); + expect(() => result.env.lookup('y')).toThrow(/not found/); + }); + + test('coalesce and firstDefined preserve falsy defined values', () => { + const result = runCoreRuntime( + handler([ + { type: 'let', props: { name: 'missing', value: 'undefined' } }, + { type: 'let', props: { name: 'zero', value: '0' } }, + { type: 'let', props: { name: 'flag', value: 'false' } }, + { type: 'let', props: { name: 'empty', value: '""' } }, + { type: 'coalesce', props: { name: 'a', values: "missing, zero, 'fallback'" } }, + { type: 'firstDefined', props: { name: 'b', values: "missing, flag, 'fallback'" } }, + { type: 'coalesce', props: { name: 'c', values: "missing, empty, 'fallback'" } }, + { type: 'return', props: { value: '{ a: a, b: b, c: c }' } }, + ]), + ); + expect(toHostValue(result.completion.value)).toEqual({ a: 0, b: false, c: '' }); + }); + + test('coalesce and firstTruthy short-circuit later expressions', () => { + const result = runCoreRuntime( + handler([ + { type: 'let', props: { name: 'present', value: '"ok"' } }, + { type: 'coalesce', props: { name: 'a', values: 'present, missingName' } }, + { type: 'firstTruthy', props: { name: 'b', values: 'present, alsoMissing' } }, + { type: 'return', props: { value: '{ a: a, b: b }' } }, + ]), + ); + expect(toHostValue(result.completion.value)).toEqual({ a: 'ok', b: 'ok' }); + }); +}); + +describe('KERN core runtime functions', () => { + test('nested fn captures the lexical environment and returns through its own frame', () => { + const result = runCoreRuntime( + handler([ + { type: 'let', props: { name: 'base', value: '10' } }, + { + type: 'fn', + props: { name: 'addBase', params: 'amount:number', returns: 'number' }, + children: [ + { + type: 'handler', + props: { lang: 'kern' }, + children: [{ type: 'return', props: { value: 'amount + base' } }], + }, + ], + }, + { type: 'let', props: { name: 'total', value: 'addBase(5)' } }, + { type: 'return', props: { value: 'total' } }, + ]), + ); + expect(toHostValue(result.completion.value)).toBe(15); + }); + + test('function params shadow outer bindings without mutating them', () => { + const result = runCoreRuntime( + handler([ + { type: 'let', props: { name: 'x', value: '1' } }, + { + type: 'fn', + props: { name: 'echo', params: 'x:number', returns: 'number' }, + children: [ + { + type: 'handler', + props: { lang: 'kern' }, + children: [{ type: 'return', props: { value: 'x' } }], + }, + ], + }, + { type: 'let', props: { name: 'inner', value: 'echo(7)' } }, + { type: 'return', props: { value: '{ outer: x, inner: inner }' } }, + ]), + ); + expect(toHostValue(result.completion.value)).toEqual({ outer: 1, inner: 7 }); + }); + + test('function parameter defaults evaluate in the call frame', () => { + const result = runCoreRuntime( + handler([ + { type: 'let', props: { name: 'base', value: '5' } }, + { + type: 'fn', + props: { name: 'fill', params: 'x:number=base + 2,y:number=x + 3', returns: 'number' }, + children: [ + { + type: 'handler', + props: { lang: 'kern' }, + children: [{ type: 'return', props: { value: 'y' } }], + }, + ], + }, + { type: 'return', props: { value: 'fill()' } }, + ]), + ); + expect(toHostValue(result.completion.value)).toBe(10); + }); + + test('explicit KERN undefined triggers function parameter defaults', () => { + const fnNode: IRNode = { + type: 'fn', + props: { name: 'fallback', params: 'value:number=3', returns: 'number' }, + children: [ + { + type: 'handler', + props: { lang: 'kern' }, + children: [{ type: 'return', props: { value: 'value' } }], + }, + ], + }; + const result = callCoreFunction(fnNode, [kUndefined()]); + expect(toHostValue(result.value)).toBe(3); + }); + + test('legacy parameter parsing preserves colons inside type text', () => { + const fnNode: IRNode = { + type: 'fn', + props: { name: 'readA', params: 'obj:{a:number,b:string}={ a: 1, b: "x" }', returns: 'number' }, + children: [ + { + type: 'handler', + props: { lang: 'kern' }, + children: [{ type: 'return', props: { value: 'obj.a' } }], + }, + ], + }; + const result = callCoreFunction(fnNode, []); + expect(toHostValue(result.value)).toBe(1); + }); + + test('structured param child defaults are supported', () => { + const fnNode: IRNode = { + type: 'fn', + props: { name: 'greet', returns: 'string' }, + children: [ + { type: 'param', props: { name: 'name', type: 'string', value: 'world' }, __quotedProps: ['value'] }, + { + type: 'handler', + props: { lang: 'kern' }, + children: [{ type: 'return', props: { value: '`hi ${name}`' } }], + }, + ], + }; + const result = callCoreFunction(fnNode, []); + expect(toHostValue(result.value)).toBe('hi world'); + }); + + test('structured default prop quoting is supported', () => { + const fnNode: IRNode = { + type: 'fn', + props: { name: 'greet', returns: 'string' }, + children: [ + { type: 'param', props: { name: 'name', type: 'string', default: 'world' }, __quotedProps: ['default'] }, + { + type: 'handler', + props: { lang: 'kern' }, + children: [{ type: 'return', props: { value: '`hi ${name}`' } }], + }, + ], + }; + const result = callCoreFunction(fnNode, []); + expect(toHostValue(result.value)).toBe('hi world'); + }); + + test('callCoreFunction executes a top-level fn with host args', () => { + const fnNode: IRNode = { + type: 'fn', + props: { name: 'label', params: 'value:number', returns: 'string' }, + children: [ + { + type: 'handler', + props: { lang: 'kern' }, + children: [{ type: 'return', props: { value: '`v=${value}`' } }], + }, + ], + }; + const result = callCoreFunction(fnNode, [fromHostValue(9)]); + expect(toHostValue(result.value)).toBe('v=9'); + }); +}); From 4a8107aa0e8534cad53f06fdf008315a13e3541d Mon Sep 17 00:00:00 2001 From: cukas Date: Sun, 7 Jun 2026 18:30:04 +0200 Subject: [PATCH 06/63] feat(core): add kern object runtime foundation --- packages/core/package.json | 4 + packages/core/src/core-contracts/boolean.ts | 111 +++ packages/core/src/core-contracts/function.ts | 8 + packages/core/src/core-contracts/index.ts | 57 ++ packages/core/src/core-contracts/list.ts | 54 ++ packages/core/src/core-contracts/nullish.ts | 15 + packages/core/src/core-contracts/number.ts | 215 ++++++ packages/core/src/core-contracts/record.ts | 32 + packages/core/src/core-contracts/schema.ts | 191 +++++ packages/core/src/core-contracts/semantics.ts | 251 +++++++ packages/core/src/core-contracts/string.ts | 337 +++++++++ .../core/src/core-runtime/contract-adapter.ts | 93 +++ packages/core/src/core-runtime/index.ts | 659 +++++++++++++++++- packages/core/src/core-runtime/value-brand.ts | 6 + packages/core/src/index.ts | 40 ++ packages/core/tests/core-contracts.test.ts | 341 +++++++++ packages/core/tests/core-runtime.test.ts | 306 ++++++++ 17 files changed, 2691 insertions(+), 29 deletions(-) create mode 100644 packages/core/src/core-contracts/boolean.ts create mode 100644 packages/core/src/core-contracts/function.ts create mode 100644 packages/core/src/core-contracts/index.ts create mode 100644 packages/core/src/core-contracts/list.ts create mode 100644 packages/core/src/core-contracts/nullish.ts create mode 100644 packages/core/src/core-contracts/number.ts create mode 100644 packages/core/src/core-contracts/record.ts create mode 100644 packages/core/src/core-contracts/schema.ts create mode 100644 packages/core/src/core-contracts/semantics.ts create mode 100644 packages/core/src/core-contracts/string.ts create mode 100644 packages/core/src/core-runtime/contract-adapter.ts create mode 100644 packages/core/src/core-runtime/value-brand.ts create mode 100644 packages/core/tests/core-contracts.test.ts diff --git a/packages/core/package.json b/packages/core/package.json index 9d166f08..ea8eec4f 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -30,6 +30,10 @@ "types": "./dist/config.d.ts", "default": "./dist/config.js" }, + "./core-contracts": { + "types": "./dist/core-contracts/index.d.ts", + "default": "./dist/core-contracts/index.js" + }, "./parser": { "types": "./dist/parser.d.ts", "default": "./dist/parser.js" diff --git a/packages/core/src/core-contracts/boolean.ts b/packages/core/src/core-contracts/boolean.ts new file mode 100644 index 00000000..25c8b8ff --- /dev/null +++ b/packages/core/src/core-contracts/boolean.ts @@ -0,0 +1,111 @@ +import type { CoreTypeContract } from './schema.js'; + +export const BOOLEAN_CONTRACT = { + name: 'Boolean', + kind: 'primitive', + strict: true, + operations: [ + { + id: 'Boolean.not', + kind: 'method', + args: ['Boolean'], + returns: 'Boolean', + lowers: { + kern: 'Boolean.not($0)', + ts: '__kernBooleanNot($0)', + python: '__kern_boolean_not($0)', + }, + fixtures: [ + { args: [true], returns: false }, + { args: [false], returns: true }, + ], + review: { + summary: 'Strict boolean negation.', + graph: ['Boolean', 'portable'], + }, + }, + { + id: 'Boolean.and', + kind: 'method', + args: ['Boolean', 'Boolean'], + returns: 'Boolean', + lowers: { + kern: 'Boolean.and($0, $1)', + ts: '__kernBooleanAnd($0, $1)', + python: '__kern_boolean_and($0, $1)', + }, + fixtures: [ + { args: [true, true], returns: true }, + { args: [true, false], returns: false }, + { args: [false, true], returns: false }, + { args: [true, 'true'], throws: { code: 'strict-type', message: 'Boolean.and expects Boolean, Boolean.' } }, + { args: [true, 1], throws: { code: 'strict-type', message: 'Boolean.and expects Boolean, Boolean.' } }, + ], + review: { + summary: 'Strict boolean conjunction; both operands must be Boolean.', + graph: ['Boolean', 'strict', 'portable'], + }, + }, + { + id: 'Boolean.or', + kind: 'method', + args: ['Boolean', 'Boolean'], + returns: 'Boolean', + lowers: { + kern: 'Boolean.or($0, $1)', + ts: '__kernBooleanOr($0, $1)', + python: '__kern_boolean_or($0, $1)', + }, + fixtures: [ + { args: [false, false], returns: false }, + { args: [true, false], returns: true }, + { args: [false, true], returns: true }, + { args: [false, 'false'], throws: { code: 'strict-type', message: 'Boolean.or expects Boolean, Boolean.' } }, + ], + review: { + summary: 'Strict boolean disjunction; both operands must be Boolean.', + graph: ['Boolean', 'strict', 'portable'], + }, + }, + { + id: 'Boolean.equals', + kind: 'method', + args: ['Boolean', 'Boolean'], + returns: 'Boolean', + lowers: { + kern: 'Boolean.equals($0, $1)', + ts: '__kernBooleanEquals($0, $1)', + python: '__kern_boolean_equals($0, $1)', + }, + fixtures: [ + { args: [true, true], returns: true }, + { args: [true, false], returns: false }, + { args: [true, 'true'], throws: { code: 'strict-type', message: 'Boolean.equals expects Boolean, Boolean.' } }, + { args: [true, 1], throws: { code: 'strict-type', message: 'Boolean.equals expects Boolean, Boolean.' } }, + ], + review: { + summary: 'Strict boolean equality; cross-type equality is a type error in schema v1.', + graph: ['Boolean', 'strict', 'portable'], + }, + }, + { + id: 'Boolean.toString', + kind: 'coercion', + args: ['Boolean'], + returns: 'String', + lowers: { + kern: 'Boolean.toString($0)', + ts: '__kernBooleanToString($0)', + python: '__kern_boolean_to_string($0)', + }, + fixtures: [ + { args: [true], returns: 'true' }, + { args: [false], returns: 'false' }, + ], + review: { + summary: 'Portable Boolean to String coercion using KERN lowercase boolean spelling.', + graph: ['Boolean', 'String', 'portable'], + }, + }, + ], +} as const satisfies CoreTypeContract; diff --git a/packages/core/src/core-contracts/function.ts b/packages/core/src/core-contracts/function.ts new file mode 100644 index 00000000..d289ad28 --- /dev/null +++ b/packages/core/src/core-contracts/function.ts @@ -0,0 +1,8 @@ +import type { CoreTypeContract } from './schema.js'; + +export const FUNCTION_CONTRACT = { + name: 'Function', + kind: 'callable', + strict: true, + operations: [], +} as const satisfies CoreTypeContract; diff --git a/packages/core/src/core-contracts/index.ts b/packages/core/src/core-contracts/index.ts new file mode 100644 index 00000000..24de1296 --- /dev/null +++ b/packages/core/src/core-contracts/index.ts @@ -0,0 +1,57 @@ +import { BOOLEAN_CONTRACT } from './boolean.js'; +import { FUNCTION_CONTRACT } from './function.js'; +import { LIST_CONTRACT } from './list.js'; +import { NULL_CONTRACT, UNDEFINED_CONTRACT } from './nullish.js'; +import { NUMBER_CONTRACT } from './number.js'; +import { RECORD_CONTRACT } from './record.js'; +import type { CoreTypeContractRegistry } from './schema.js'; +import { STRING_CONTRACT } from './string.js'; + +export { BOOLEAN_CONTRACT } from './boolean.js'; +export { FUNCTION_CONTRACT } from './function.js'; +export { LIST_CONTRACT } from './list.js'; +export { NULL_CONTRACT, UNDEFINED_CONTRACT } from './nullish.js'; +export { NUMBER_CONTRACT } from './number.js'; +export { RECORD_CONTRACT } from './record.js'; +export type { + CoreFixture, + CoreFixtureError, + CoreFixtureValue, + CoreGraphEdge, + CoreLowerings, + CoreOperation, + CoreOperationKind, + CoreOperationReturns, + CoreTypeContract, + CoreTypeContractRegistry, + CoreTypeKind, + CoreTypeName, +} from './schema.js'; +export { + CORE_FIXTURE_FUNCTION, + CORE_FIXTURE_UNDEFINED, + CORE_TYPE_NAMES, + contractToGraphEdges, + isCoreFixtureFunction, + isCoreFixtureUndefined, +} from './schema.js'; +export { + CoreContractEvaluationError, + coreFixtureValueType, + evaluateCoreContractOperation, +} from './semantics.js'; +export { STRING_CONTRACT } from './string.js'; + +export const CORE_TYPE_CONTRACTS = { + schemaVersion: 1, + types: { + String: STRING_CONTRACT, + Boolean: BOOLEAN_CONTRACT, + Number: NUMBER_CONTRACT, + List: LIST_CONTRACT, + Record: RECORD_CONTRACT, + Function: FUNCTION_CONTRACT, + Null: NULL_CONTRACT, + Undefined: UNDEFINED_CONTRACT, + }, +} as const satisfies CoreTypeContractRegistry; diff --git a/packages/core/src/core-contracts/list.ts b/packages/core/src/core-contracts/list.ts new file mode 100644 index 00000000..4ea5e38d --- /dev/null +++ b/packages/core/src/core-contracts/list.ts @@ -0,0 +1,54 @@ +import { CORE_FIXTURE_UNDEFINED, type CoreTypeContract } from './schema.js'; + +export const LIST_CONTRACT = { + name: 'List', + kind: 'collection', + strict: true, + operations: [ + { + id: 'List.length', + kind: 'property', + args: ['List'], + returns: 'Number', + lowers: { + kern: 'List.length($0)', + ts: '__kernListLength($0)', + python: '__kern_list_length($0)', + }, + fixtures: [ + { args: [[]], returns: 0 }, + { args: [[1, 2, 3]], returns: 3 }, + { args: ['not-list'], throws: { code: 'strict-type', message: 'List.length expects List.' } }, + ], + review: { + summary: 'Strict list cardinality.', + graph: ['List', 'Number', 'strict', 'portable'], + }, + }, + { + id: 'List.index', + kind: 'method', + args: ['List', 'Number'], + returns: ['String', 'Boolean', 'Number', 'List', 'Record', 'Function', 'Null', 'Undefined'], + lowers: { + kern: 'List.index($0, $1)', + ts: '__kernListIndex($0, $1)', + python: '__kern_list_index($0, $1)', + }, + fixtures: [ + { args: [[10, 20, 30], 0], returns: 10 }, + { args: [[10, 20, 30], 2], returns: 30 }, + { args: [[10, 20, 30], 3], returns: CORE_FIXTURE_UNDEFINED }, + { args: [[10, 20, 30], -1], returns: CORE_FIXTURE_UNDEFINED }, + { args: [[10, 20, 30], 1.5], returns: CORE_FIXTURE_UNDEFINED }, + { args: [[10, CORE_FIXTURE_UNDEFINED, 30], 1], returns: CORE_FIXTURE_UNDEFINED }, + { args: [[], 0], returns: CORE_FIXTURE_UNDEFINED }, + { args: [[10], '0'], throws: { code: 'strict-type', message: 'List.index expects List, Number.' } }, + ], + review: { + summary: 'Strict list index by numeric offset; misses return Undefined.', + graph: ['List', 'Number', 'Undefined', 'strict', 'portable'], + }, + }, + ], +} as const satisfies CoreTypeContract; diff --git a/packages/core/src/core-contracts/nullish.ts b/packages/core/src/core-contracts/nullish.ts new file mode 100644 index 00000000..8b59b36b --- /dev/null +++ b/packages/core/src/core-contracts/nullish.ts @@ -0,0 +1,15 @@ +import type { CoreTypeContract } from './schema.js'; + +export const NULL_CONTRACT = { + name: 'Null', + kind: 'nullish', + strict: true, + operations: [], +} as const satisfies CoreTypeContract; + +export const UNDEFINED_CONTRACT = { + name: 'Undefined', + kind: 'nullish', + strict: true, + operations: [], +} as const satisfies CoreTypeContract; diff --git a/packages/core/src/core-contracts/number.ts b/packages/core/src/core-contracts/number.ts new file mode 100644 index 00000000..33db742a --- /dev/null +++ b/packages/core/src/core-contracts/number.ts @@ -0,0 +1,215 @@ +import type { CoreTypeContract } from './schema.js'; + +export const NUMBER_CONTRACT = { + name: 'Number', + kind: 'primitive', + strict: true, + operations: [ + { + id: 'Number.negate', + kind: 'method', + args: ['Number'], + returns: 'Number', + lowers: { + kern: 'Number.negate($0)', + ts: '__kernNumberNegate($0)', + python: '__kern_number_negate($0)', + }, + fixtures: [ + { args: [3], returns: -3 }, + { args: [-3], returns: 3 }, + { args: ['3'], throws: { code: 'strict-type', message: 'Number.negate expects Number.' } }, + ], + review: { + summary: 'Strict numeric negation over finite KERN Numbers.', + graph: ['Number', 'strict', 'portable'], + }, + }, + { + id: 'Number.add', + kind: 'method', + args: ['Number', 'Number'], + returns: 'Number', + lowers: { + kern: 'Number.add($0, $1)', + ts: '__kernNumberAdd($0, $1)', + python: '__kern_number_add($0, $1)', + }, + fixtures: [ + { args: [2, 3], returns: 5 }, + { args: [-2, 3], returns: 1 }, + { args: [2, '3'], throws: { code: 'strict-type', message: 'Number.add expects Number, Number.' } }, + ], + review: { + summary: 'Strict numeric addition over finite KERN Numbers.', + graph: ['Number', 'strict', 'portable'], + }, + }, + { + id: 'Number.subtract', + kind: 'method', + args: ['Number', 'Number'], + returns: 'Number', + lowers: { + kern: 'Number.subtract($0, $1)', + ts: '__kernNumberSubtract($0, $1)', + python: '__kern_number_subtract($0, $1)', + }, + fixtures: [ + { args: [5, 3], returns: 2 }, + { args: [3, 5], returns: -2 }, + { args: [5, false], throws: { code: 'strict-type', message: 'Number.subtract expects Number, Number.' } }, + ], + review: { + summary: 'Strict numeric subtraction over finite KERN Numbers.', + graph: ['Number', 'strict', 'portable'], + }, + }, + { + id: 'Number.multiply', + kind: 'method', + args: ['Number', 'Number'], + returns: 'Number', + lowers: { + kern: 'Number.multiply($0, $1)', + ts: '__kernNumberMultiply($0, $1)', + python: '__kern_number_multiply($0, $1)', + }, + fixtures: [ + { args: [3, 4], returns: 12 }, + { args: [-3, 4], returns: -12 }, + { args: [3, null], throws: { code: 'strict-type', message: 'Number.multiply expects Number, Number.' } }, + ], + review: { + summary: 'Strict numeric multiplication over finite KERN Numbers.', + graph: ['Number', 'strict', 'portable'], + }, + }, + { + id: 'Number.divide', + kind: 'method', + args: ['Number', 'Number'], + returns: 'Number', + lowers: { + kern: 'Number.divide($0, $1)', + ts: '__kernNumberDivide($0, $1)', + python: '__kern_number_divide($0, $1)', + }, + fixtures: [ + { args: [6, 2], returns: 3 }, + { args: [5, 2], returns: 2.5 }, + { args: [1, 0], throws: { code: 'division-by-zero', message: 'Number.divide division by zero.' } }, + { args: [6, '2'], throws: { code: 'strict-type', message: 'Number.divide expects Number, Number.' } }, + ], + review: { + summary: 'Strict numeric division over finite KERN Numbers; zero divisor is a contract error.', + graph: ['Number', 'strict', 'portable'], + }, + }, + { + id: 'Number.remainder', + kind: 'method', + args: ['Number', 'Number'], + returns: 'Number', + lowers: { + kern: 'Number.remainder($0, $1)', + ts: '__kernNumberRemainder($0, $1)', + python: '__kern_number_remainder($0, $1)', + }, + fixtures: [ + { args: [5, 2], returns: 1 }, + { args: [-5, 2], returns: -1 }, + { args: [5, -2], returns: 1 }, + { args: [1, 0], throws: { code: 'division-by-zero', message: 'Number.remainder division by zero.' } }, + { args: [5, '2'], throws: { code: 'strict-type', message: 'Number.remainder expects Number, Number.' } }, + ], + review: { + summary: 'Strict numeric remainder using KERN dividend-sign semantics; zero divisor is a contract error.', + graph: ['Number', 'strict', 'portable'], + }, + }, + { + id: 'Number.lessThan', + kind: 'method', + args: ['Number', 'Number'], + returns: 'Boolean', + lowers: { + kern: 'Number.lessThan($0, $1)', + ts: '__kernNumberLessThan($0, $1)', + python: '__kern_number_less_than($0, $1)', + }, + fixtures: [ + { args: [2, 3], returns: true }, + { args: [3, 2], returns: false }, + { args: [2, '3'], throws: { code: 'strict-type', message: 'Number.lessThan expects Number, Number.' } }, + ], + review: { + summary: 'Strict numeric less-than comparison.', + graph: ['Number', 'Boolean', 'strict', 'portable'], + }, + }, + { + id: 'Number.lessThanOrEqual', + kind: 'method', + args: ['Number', 'Number'], + returns: 'Boolean', + lowers: { + kern: 'Number.lessThanOrEqual($0, $1)', + ts: '__kernNumberLessThanOrEqual($0, $1)', + python: '__kern_number_less_than_or_equal($0, $1)', + }, + fixtures: [ + { args: [2, 2], returns: true }, + { args: [3, 2], returns: false }, + { args: [2, null], throws: { code: 'strict-type', message: 'Number.lessThanOrEqual expects Number, Number.' } }, + ], + review: { + summary: 'Strict numeric less-than-or-equal comparison.', + graph: ['Number', 'Boolean', 'strict', 'portable'], + }, + }, + { + id: 'Number.greaterThan', + kind: 'method', + args: ['Number', 'Number'], + returns: 'Boolean', + lowers: { + kern: 'Number.greaterThan($0, $1)', + ts: '__kernNumberGreaterThan($0, $1)', + python: '__kern_number_greater_than($0, $1)', + }, + fixtures: [ + { args: [3, 2], returns: true }, + { args: [2, 3], returns: false }, + { args: [3, true], throws: { code: 'strict-type', message: 'Number.greaterThan expects Number, Number.' } }, + ], + review: { + summary: 'Strict numeric greater-than comparison.', + graph: ['Number', 'Boolean', 'strict', 'portable'], + }, + }, + { + id: 'Number.greaterThanOrEqual', + kind: 'method', + args: ['Number', 'Number'], + returns: 'Boolean', + lowers: { + kern: 'Number.greaterThanOrEqual($0, $1)', + ts: '__kernNumberGreaterThanOrEqual($0, $1)', + python: '__kern_number_greater_than_or_equal($0, $1)', + }, + fixtures: [ + { args: [3, 3], returns: true }, + { args: [2, 3], returns: false }, + { + args: [3, '3'], + throws: { code: 'strict-type', message: 'Number.greaterThanOrEqual expects Number, Number.' }, + }, + ], + review: { + summary: 'Strict numeric greater-than-or-equal comparison.', + graph: ['Number', 'Boolean', 'strict', 'portable'], + }, + }, + ], +} as const satisfies CoreTypeContract; diff --git a/packages/core/src/core-contracts/record.ts b/packages/core/src/core-contracts/record.ts new file mode 100644 index 00000000..d250f23f --- /dev/null +++ b/packages/core/src/core-contracts/record.ts @@ -0,0 +1,32 @@ +import { CORE_FIXTURE_UNDEFINED, type CoreTypeContract } from './schema.js'; + +export const RECORD_CONTRACT = { + name: 'Record', + kind: 'record', + strict: true, + operations: [ + { + id: 'Record.get', + kind: 'method', + args: ['Record', 'String'], + returns: ['String', 'Boolean', 'Number', 'List', 'Record', 'Function', 'Null', 'Undefined'], + lowers: { + kern: 'Record.get($0, $1)', + ts: '__kernRecordGet($0, $1)', + python: '__kern_record_get($0, $1)', + }, + fixtures: [ + { args: [{ x: 1 }, 'x'], returns: 1 }, + { args: [{ x: 1 }, 'y'], returns: CORE_FIXTURE_UNDEFINED }, + { args: [{ x: CORE_FIXTURE_UNDEFINED }, 'x'], returns: CORE_FIXTURE_UNDEFINED }, + { args: [{}, 'toString'], returns: CORE_FIXTURE_UNDEFINED }, + { args: [{}, ''], returns: CORE_FIXTURE_UNDEFINED }, + { args: [{ x: 1 }, 0], throws: { code: 'strict-type', message: 'Record.get expects Record, String.' } }, + ], + review: { + summary: 'Strict own-key record lookup; missing keys return Undefined.', + graph: ['Record', 'String', 'Undefined', 'strict', 'portable'], + }, + }, + ], +} as const satisfies CoreTypeContract; diff --git a/packages/core/src/core-contracts/schema.ts b/packages/core/src/core-contracts/schema.ts new file mode 100644 index 00000000..9436c7d8 --- /dev/null +++ b/packages/core/src/core-contracts/schema.ts @@ -0,0 +1,191 @@ +export const CORE_TYPE_NAMES = [ + 'String', + 'Boolean', + 'Number', + 'List', + 'Record', + 'Function', + 'Null', + 'Undefined', +] as const; + +export type CoreTypeName = (typeof CORE_TYPE_NAMES)[number]; + +export type CoreTypeKind = 'primitive' | 'collection' | 'callable' | 'record' | 'nullish'; + +export type CoreOperationKind = 'method' | 'property' | 'operator' | 'constructor' | 'coercion'; + +export type CoreLowerings = { + /** + * Target lowerings are semantic helper calls, not raw host snippets. + * Each helper must enforce the same strict argument and return contract as + * evaluateCoreContractOperation before using host operations internally. + */ + readonly kern?: string; + readonly ts?: string; + readonly python?: string; +}; + +// Fixture data reserves this exact record shape for Undefined so JSON fixtures +// can distinguish null from undefined without overloading result records. +export const CORE_FIXTURE_UNDEFINED = { __kernFixture: 'Undefined' } as const; +export const CORE_FIXTURE_FUNCTION = { __kernFixture: 'Function' } as const; + +export function isCoreFixtureUndefined(value: unknown): value is typeof CORE_FIXTURE_UNDEFINED { + return ( + typeof value === 'object' && + value !== null && + !Array.isArray(value) && + Object.keys(value).length === 1 && + (value as { readonly __kernFixture?: unknown }).__kernFixture === 'Undefined' + ); +} + +export function isCoreFixtureFunction(value: unknown): value is typeof CORE_FIXTURE_FUNCTION { + return ( + typeof value === 'object' && + value !== null && + !Array.isArray(value) && + Object.keys(value).length === 1 && + (value as { readonly __kernFixture?: unknown }).__kernFixture === 'Function' + ); +} + +export type CoreFixtureValue = + | string + | number + | boolean + | null + | typeof CORE_FIXTURE_UNDEFINED + | typeof CORE_FIXTURE_FUNCTION + | readonly CoreFixtureValue[] + | { readonly [key: string]: CoreFixtureValue }; + +export type CoreFixtureError = { + readonly code: 'strict-type' | 'division-by-zero'; + readonly message: string; +}; + +export type CoreFixture = + | { + readonly args: readonly CoreFixtureValue[]; + readonly returns: CoreFixtureValue; + } + | { + readonly args: readonly CoreFixtureValue[]; + readonly throws: CoreFixtureError; + }; + +export type CoreOperationReturns = CoreTypeName | readonly CoreTypeName[]; + +export type CoreOperation = { + readonly id: string; + readonly kind: CoreOperationKind; + readonly args: readonly CoreTypeName[]; + readonly returns: CoreOperationReturns; + readonly lowers?: CoreLowerings; + readonly fixtures: readonly CoreFixture[]; + readonly review: { + readonly summary: string; + readonly graph: readonly string[]; + }; +}; + +export type CoreTypeContract = { + readonly name: CoreTypeName; + readonly kind: CoreTypeKind; + readonly strict: true; + readonly operations: readonly CoreOperation[]; +}; + +export type CoreTypeContractRegistry = { + readonly schemaVersion: 1; + readonly types: { readonly [Name in CoreTypeName]: CoreTypeContract & { readonly name: Name } }; +}; + +export type CoreGraphEdge = { + readonly from: string; + readonly relation: string; + readonly to: string; + readonly operation?: string; + readonly index?: number; +}; + +export function contractToGraphEdges(contract: CoreTypeContract): CoreGraphEdge[] { + const edges: CoreGraphEdge[] = []; + + for (const operation of contract.operations) { + if (!operation.id.startsWith(`${contract.name}.`)) { + throw new Error(`Core operation id ${operation.id} must be prefixed with ${contract.name}.`); + } + const methodName = operation.id.slice(contract.name.length + 1); + const explicitArgs = operation.args.slice(1).join(', '); + const operationRelation = `${methodName}(${explicitArgs})`; + + edges.push({ + from: contract.name, + relation: operationRelation, + to: formatReturnTypes(operation.returns), + operation: operation.id, + }); + + operation.args.forEach((arg, index) => { + edges.push({ + from: operation.id, + relation: 'accepts', + to: arg, + operation: operation.id, + index, + }); + }); + + for (const returnType of returnTypeNames(operation.returns)) { + edges.push({ + from: operation.id, + relation: 'returns', + to: returnType, + operation: operation.id, + }); + } + + for (const target of ['kern', 'ts', 'python'] as const) { + const lowering = operation.lowers?.[target]; + if (!lowering) continue; + edges.push({ + from: operation.id, + relation: `lowers.${target}`, + to: lowering, + operation: operation.id, + }); + } + + operation.fixtures.forEach((_, index) => { + edges.push({ + from: operation.id, + relation: 'fixture', + to: `${operation.id}.fixture.${index}`, + operation: operation.id, + index, + }); + }); + + for (const tag of operation.review.graph) { + edges.push({ + from: operation.id, + relation: 'tagged', + to: tag, + operation: operation.id, + }); + } + } + + return edges; +} + +function returnTypeNames(returns: CoreOperationReturns): readonly CoreTypeName[] { + return typeof returns === 'string' ? [returns] : returns; +} + +function formatReturnTypes(returns: CoreOperationReturns): string { + return returnTypeNames(returns).join(' | '); +} diff --git a/packages/core/src/core-contracts/semantics.ts b/packages/core/src/core-contracts/semantics.ts new file mode 100644 index 00000000..4041189b --- /dev/null +++ b/packages/core/src/core-contracts/semantics.ts @@ -0,0 +1,251 @@ +import { + CORE_FIXTURE_UNDEFINED, + type CoreFixtureValue, + type CoreTypeName, + isCoreFixtureFunction, + isCoreFixtureUndefined, +} from './schema.js'; + +export class CoreContractEvaluationError extends Error { + constructor( + readonly code: 'strict-type' | 'division-by-zero' | 'unsupported-operation', + message: string, + ) { + super(message); + this.name = 'CoreContractEvaluationError'; + } +} + +export function coreFixtureValueType(value: CoreFixtureValue): CoreTypeName { + if (typeof value === 'string') return 'String'; + if (typeof value === 'boolean') return 'Boolean'; + if (typeof value === 'number') return 'Number'; + if (value === null) return 'Null'; + if (isCoreFixtureUndefined(value)) return 'Undefined'; + if (isCoreFixtureFunction(value)) return 'Function'; + if (Array.isArray(value)) return 'List'; + return 'Record'; +} + +export function evaluateCoreContractOperation( + operationId: string, + args: readonly CoreFixtureValue[], +): CoreFixtureValue { + switch (operationId) { + case 'Boolean.not': { + const [value] = expectCoreTypes(operationId, args, ['Boolean']); + return !value; + } + case 'Boolean.and': { + const [left, right] = expectCoreTypes(operationId, args, ['Boolean', 'Boolean']); + return left && right; + } + case 'Boolean.or': { + const [left, right] = expectCoreTypes(operationId, args, ['Boolean', 'Boolean']); + return left || right; + } + case 'Boolean.equals': { + const [left, right] = expectCoreTypes(operationId, args, ['Boolean', 'Boolean']); + return left === right; + } + case 'Boolean.toString': { + const [value] = expectCoreTypes(operationId, args, ['Boolean']); + return value ? 'true' : 'false'; + } + case 'String.length': { + const [value] = expectCoreTypes(operationId, args, ['String']); + return stringCodePoints(value).length; + } + case 'String.index': { + const [value, index] = expectCoreTypes(operationId, args, ['String', 'Number']); + if (!Number.isInteger(index) || index < 0) return CORE_FIXTURE_UNDEFINED; + const chars = stringCodePoints(value); + return index < chars.length ? (chars[index] ?? '') : CORE_FIXTURE_UNDEFINED; + } + case 'String.includes': { + const [value, search] = expectCoreTypes(operationId, args, ['String', 'String']); + return value.includes(search); + } + case 'String.startsWith': { + const [value, search] = expectCoreTypes(operationId, args, ['String', 'String']); + return value.startsWith(search); + } + case 'String.endsWith': { + const [value, search] = expectCoreTypes(operationId, args, ['String', 'String']); + return value.endsWith(search); + } + case 'String.slice': { + const [value, start, end] = expectCoreTypes(operationId, args, ['String', 'Number', 'Number']); + return stringCodePoints(value).slice(truncateOffset(start), truncateOffset(end)).join(''); + } + case 'String.trim': { + const [value] = expectCoreTypes(operationId, args, ['String']); + return value.trim(); + } + case 'String.lower': { + const [value] = expectCoreTypes(operationId, args, ['String']); + return value.toLowerCase(); + } + case 'String.upper': { + const [value] = expectCoreTypes(operationId, args, ['String']); + return value.toUpperCase(); + } + case 'String.concat': { + const [left, right] = expectCoreTypes(operationId, args, ['String', 'String']); + return left + right; + } + case 'String.equals': { + const [left, right] = expectCoreTypes(operationId, args, ['String', 'String']); + return left === right; + } + case 'String.lessThan': { + const [left, right] = expectCoreTypes(operationId, args, ['String', 'String']); + return compareStrings(left, right) < 0; + } + case 'String.lessThanOrEqual': { + const [left, right] = expectCoreTypes(operationId, args, ['String', 'String']); + return compareStrings(left, right) <= 0; + } + case 'String.greaterThan': { + const [left, right] = expectCoreTypes(operationId, args, ['String', 'String']); + return compareStrings(left, right) > 0; + } + case 'String.greaterThanOrEqual': { + const [left, right] = expectCoreTypes(operationId, args, ['String', 'String']); + return compareStrings(left, right) >= 0; + } + case 'String.toString': { + const [value] = expectCoreTypes(operationId, args, ['String']); + return value; + } + case 'Number.negate': { + const [value] = expectCoreTypes(operationId, args, ['Number']); + return finiteNumberResult(operationId, -value); + } + case 'Number.add': { + const [left, right] = expectCoreTypes(operationId, args, ['Number', 'Number']); + return finiteNumberResult(operationId, left + right); + } + case 'Number.subtract': { + const [left, right] = expectCoreTypes(operationId, args, ['Number', 'Number']); + return finiteNumberResult(operationId, left - right); + } + case 'Number.multiply': { + const [left, right] = expectCoreTypes(operationId, args, ['Number', 'Number']); + return finiteNumberResult(operationId, left * right); + } + case 'Number.divide': { + const [left, right] = expectCoreTypes(operationId, args, ['Number', 'Number']); + if (right === 0) throw new CoreContractEvaluationError('division-by-zero', 'Number.divide division by zero.'); + return finiteNumberResult(operationId, left / right); + } + case 'Number.remainder': { + const [left, right] = expectCoreTypes(operationId, args, ['Number', 'Number']); + if (right === 0) throw new CoreContractEvaluationError('division-by-zero', 'Number.remainder division by zero.'); + return finiteNumberResult(operationId, left % right); + } + case 'Number.lessThan': { + const [left, right] = expectCoreTypes(operationId, args, ['Number', 'Number']); + return left < right; + } + case 'Number.lessThanOrEqual': { + const [left, right] = expectCoreTypes(operationId, args, ['Number', 'Number']); + return left <= right; + } + case 'Number.greaterThan': { + const [left, right] = expectCoreTypes(operationId, args, ['Number', 'Number']); + return left > right; + } + case 'Number.greaterThanOrEqual': { + const [left, right] = expectCoreTypes(operationId, args, ['Number', 'Number']); + return left >= right; + } + case 'List.length': { + const [value] = expectCoreTypes(operationId, args, ['List']); + return value.length; + } + case 'List.index': { + const [value, index] = expectCoreTypes(operationId, args, ['List', 'Number']); + if (!Number.isInteger(index) || index < 0) return CORE_FIXTURE_UNDEFINED; + return index < value.length && Object.hasOwn(value, index) + ? (value[index] as CoreFixtureValue) + : CORE_FIXTURE_UNDEFINED; + } + case 'Record.get': { + const [value, key] = expectCoreTypes(operationId, args, ['Record', 'String']); + return Object.hasOwn(value, key) ? (value[key] as CoreFixtureValue) : CORE_FIXTURE_UNDEFINED; + } + default: + throw new CoreContractEvaluationError( + 'unsupported-operation', + `Unsupported core contract operation: ${operationId}`, + ); + } +} + +function expectCoreTypes( + operationId: string, + args: readonly CoreFixtureValue[], + types: T, +): CoreTypeTuple { + if (args.length !== types.length) throw strictTypeError(operationId, types); + for (let index = 0; index < types.length; index += 1) { + if (!Object.hasOwn(args, index)) throw strictTypeError(operationId, types); + const arg = args[index] as CoreFixtureValue; + if (coreFixtureValueType(arg) !== types[index]) throw strictTypeError(operationId, types); + if (types[index] === 'Number' && (typeof arg !== 'number' || !Number.isFinite(arg))) { + throw strictTypeError(operationId, types); + } + } + return args as CoreTypeTuple; +} + +function strictTypeError(operationId: string, types: readonly CoreTypeName[]): CoreContractEvaluationError { + return new CoreContractEvaluationError('strict-type', `${operationId} expects ${types.join(', ')}.`); +} + +function stringCodePoints(value: string): string[] { + return Array.from(value); +} + +function compareStrings(left: string, right: string): number { + const leftPoints = Array.from(left, (char) => char.codePointAt(0) ?? 0); + const rightPoints = Array.from(right, (char) => char.codePointAt(0) ?? 0); + const length = Math.min(leftPoints.length, rightPoints.length); + for (let index = 0; index < length; index += 1) { + const delta = (leftPoints[index] ?? 0) - (rightPoints[index] ?? 0); + if (delta !== 0) return delta; + } + return leftPoints.length - rightPoints.length; +} + +function truncateOffset(value: number): number { + return Math.trunc(value); +} + +function finiteNumberResult(operationId: string, value: number): number { + if (!Number.isFinite(value)) { + throw new CoreContractEvaluationError('strict-type', `${operationId} result must be finite.`); + } + return value; +} + +type CoreTypeTuple = { + readonly [Index in keyof T]: T[Index] extends 'Boolean' + ? boolean + : T[Index] extends 'Number' + ? number + : T[Index] extends 'String' + ? string + : T[Index] extends 'List' + ? readonly CoreFixtureValue[] + : T[Index] extends 'Record' + ? { readonly [key: string]: CoreFixtureValue } + : T[Index] extends 'Null' + ? null + : T[Index] extends 'Undefined' + ? typeof CORE_FIXTURE_UNDEFINED + : T[Index] extends 'Function' + ? never + : CoreFixtureValue; +}; diff --git a/packages/core/src/core-contracts/string.ts b/packages/core/src/core-contracts/string.ts new file mode 100644 index 00000000..1a67f6ee --- /dev/null +++ b/packages/core/src/core-contracts/string.ts @@ -0,0 +1,337 @@ +import { CORE_FIXTURE_UNDEFINED, type CoreTypeContract } from './schema.js'; + +export const STRING_CONTRACT = { + name: 'String', + kind: 'primitive', + strict: true, + operations: [ + { + id: 'String.length', + kind: 'property', + args: ['String'], + returns: 'Number', + lowers: { + kern: 'String.length($0)', + ts: '__kernStringLength($0)', + python: '__kern_string_length($0)', + }, + fixtures: [ + { args: [''], returns: 0 }, + { args: ['kern'], returns: 4 }, + { args: ['𐐷'], returns: 1 }, + { args: ['e\u0301'], returns: 2 }, + ], + review: { + summary: 'KERN string length counts Unicode code points with no normalization.', + graph: ['String', 'Number', 'portable', 'unicode-code-point'], + }, + }, + { + id: 'String.index', + kind: 'method', + args: ['String', 'Number'], + returns: ['String', 'Undefined'], + lowers: { + kern: 'String.index($0, $1)', + ts: '__kernStringIndex($0, $1)', + python: '__kern_string_index($0, $1)', + }, + fixtures: [ + { args: ['abc', 1], returns: 'b' }, + { args: ['a𐐷b', 1], returns: '𐐷' }, + { args: ['abc', 4], returns: CORE_FIXTURE_UNDEFINED }, + { args: ['abc', -1], returns: CORE_FIXTURE_UNDEFINED }, + { args: ['abc', 1.2], returns: CORE_FIXTURE_UNDEFINED }, + { args: ['abc', '1'], throws: { code: 'strict-type', message: 'String.index expects String, Number.' } }, + ], + review: { + summary: + 'Strict string indexing over Unicode code-point offsets; negative, fractional, and out-of-range misses return Undefined.', + graph: ['String', 'Undefined', 'strict', 'portable', 'unicode-code-point'], + }, + }, + { + id: 'String.includes', + kind: 'method', + args: ['String', 'String'], + returns: 'Boolean', + lowers: { + kern: 'String.includes($0, $1)', + ts: '__kernStringIncludes($0, $1)', + python: '__kern_string_includes($0, $1)', + }, + fixtures: [ + { args: ['abc', 'b'], returns: true }, + { args: ['abc', 'x'], returns: false }, + ], + review: { + summary: 'Strict string containment.', + graph: ['String', 'Boolean', 'portable'], + }, + }, + { + id: 'String.startsWith', + kind: 'method', + args: ['String', 'String'], + returns: 'Boolean', + lowers: { + kern: 'String.startsWith($0, $1)', + ts: '__kernStringStartsWith($0, $1)', + python: '__kern_string_starts_with($0, $1)', + }, + fixtures: [ + { args: ['kern', 'ke'], returns: true }, + { args: ['kern', 'rn'], returns: false }, + ], + review: { + summary: 'Strict string prefix test.', + graph: ['String', 'Boolean', 'portable'], + }, + }, + { + id: 'String.endsWith', + kind: 'method', + args: ['String', 'String'], + returns: 'Boolean', + lowers: { + kern: 'String.endsWith($0, $1)', + ts: '__kernStringEndsWith($0, $1)', + python: '__kern_string_ends_with($0, $1)', + }, + fixtures: [ + { args: ['kern', 'rn'], returns: true }, + { args: ['kern', 'ke'], returns: false }, + ], + review: { + summary: 'Strict string suffix test.', + graph: ['String', 'Boolean', 'portable'], + }, + }, + { + id: 'String.slice', + kind: 'method', + args: ['String', 'Number', 'Number'], + returns: 'String', + lowers: { + kern: 'String.slice($0, $1, $2)', + ts: '__kernStringSlice($0, $1, $2)', + python: '__kern_string_slice($0, $1, $2)', + }, + fixtures: [ + { args: ['abcdef', 1, 4], returns: 'bcd' }, + { args: ['abcdef', 1.9, 4.2], returns: 'bcd' }, + { args: ['abc', 0, 2], returns: 'ab' }, + { args: ['abcdef', -3, -1], returns: 'de' }, + { args: ['abcdef', -20, 2], returns: 'ab' }, + { args: ['abcdef', 4, 2], returns: '' }, + { args: ['a𐐷b', 1, 2], returns: '𐐷' }, + { args: ['e\u0301x', 0, 2], returns: 'e\u0301' }, + { + args: ['abc', '0', 2], + throws: { code: 'strict-type', message: 'String.slice expects String, Number, Number.' }, + }, + { + args: ['abc', 1], + throws: { code: 'strict-type', message: 'String.slice expects String, Number, Number.' }, + }, + ], + review: { + summary: + 'Strict string slice over Unicode code-point offsets with explicit start and end offsets; negative offsets count from the end.', + graph: ['String', 'Number', 'strict', 'portable', 'unicode-code-point'], + }, + }, + { + id: 'String.trim', + kind: 'method', + args: ['String'], + returns: 'String', + lowers: { + kern: 'String.trim($0)', + ts: '__kernStringTrim($0)', + python: '__kern_string_trim($0)', + }, + fixtures: [ + { args: [' kern '], returns: 'kern' }, + { args: ['\ncore\t'], returns: 'core' }, + ], + review: { + summary: 'Portable surrounding whitespace trim.', + graph: ['String', 'portable'], + }, + }, + { + id: 'String.lower', + kind: 'method', + args: ['String'], + returns: 'String', + lowers: { + kern: 'String.lower($0)', + ts: '__kernStringLower($0)', + python: '__kern_string_lower($0)', + }, + fixtures: [ + { args: ['KERN'], returns: 'kern' }, + { args: ['Core'], returns: 'core' }, + ], + review: { + summary: 'Portable lowercase conversion.', + graph: ['String', 'portable'], + }, + }, + { + id: 'String.upper', + kind: 'method', + args: ['String'], + returns: 'String', + lowers: { + kern: 'String.upper($0)', + ts: '__kernStringUpper($0)', + python: '__kern_string_upper($0)', + }, + fixtures: [ + { args: ['kern'], returns: 'KERN' }, + { args: ['Core'], returns: 'CORE' }, + ], + review: { + summary: 'Portable uppercase conversion.', + graph: ['String', 'portable'], + }, + }, + { + id: 'String.concat', + kind: 'method', + args: ['String', 'String'], + returns: 'String', + lowers: { + kern: 'String.concat($0, $1)', + ts: '__kernStringConcat($0, $1)', + python: '__kern_string_concat($0, $1)', + }, + fixtures: [ + { args: ['kern', 'lang'], returns: 'kernlang' }, + { args: ['count:', 2], throws: { code: 'strict-type', message: 'String.concat expects String, String.' } }, + ], + review: { + summary: 'Strict string concatenation; both operands must be String.', + graph: ['String', 'strict', 'portable'], + }, + }, + { + id: 'String.equals', + kind: 'method', + args: ['String', 'String'], + returns: 'Boolean', + lowers: { + kern: 'String.equals($0, $1)', + ts: '__kernStringEquals($0, $1)', + python: '__kern_string_equals($0, $1)', + }, + fixtures: [ + { args: ['kern', 'kern'], returns: true }, + { args: ['kern', 'core'], returns: false }, + { args: ['kern', true], throws: { code: 'strict-type', message: 'String.equals expects String, String.' } }, + ], + review: { + summary: 'Strict string equality; cross-type equality is a type error in schema v1.', + graph: ['String', 'Boolean', 'strict', 'portable'], + }, + }, + { + id: 'String.lessThan', + kind: 'operator', + args: ['String', 'String'], + returns: 'Boolean', + lowers: { + kern: 'String.lessThan($0, $1)', + ts: '__kernStringLessThan($0, $1)', + python: '__kern_string_less_than($0, $1)', + }, + fixtures: [ + { args: ['abc', 'abd'], returns: true }, + { args: ['abc', 'abc'], returns: false }, + { args: ['abc', true], throws: { code: 'strict-type', message: 'String.lessThan expects String, String.' } }, + ], + review: { + summary: 'Strict string less-than comparison using KERN portable code-point ordering.', + graph: ['String', 'Boolean', 'strict', 'portable', 'unicode-code-point-order'], + }, + }, + { + id: 'String.lessThanOrEqual', + kind: 'operator', + args: ['String', 'String'], + returns: 'Boolean', + lowers: { + kern: 'String.lessThanOrEqual($0, $1)', + ts: '__kernStringLessThanOrEqual($0, $1)', + python: '__kern_string_less_than_or_equal($0, $1)', + }, + fixtures: [ + { args: ['abc', 'abc'], returns: true }, + { args: ['abd', 'abc'], returns: false }, + ], + review: { + summary: 'Strict string less-than-or-equal comparison using KERN portable code-point ordering.', + graph: ['String', 'Boolean', 'strict', 'portable', 'unicode-code-point-order'], + }, + }, + { + id: 'String.greaterThan', + kind: 'operator', + args: ['String', 'String'], + returns: 'Boolean', + lowers: { + kern: 'String.greaterThan($0, $1)', + ts: '__kernStringGreaterThan($0, $1)', + python: '__kern_string_greater_than($0, $1)', + }, + fixtures: [ + { args: ['abd', 'abc'], returns: true }, + { args: ['abc', 'abc'], returns: false }, + ], + review: { + summary: 'Strict string greater-than comparison using KERN portable code-point ordering.', + graph: ['String', 'Boolean', 'strict', 'portable', 'unicode-code-point-order'], + }, + }, + { + id: 'String.greaterThanOrEqual', + kind: 'operator', + args: ['String', 'String'], + returns: 'Boolean', + lowers: { + kern: 'String.greaterThanOrEqual($0, $1)', + ts: '__kernStringGreaterThanOrEqual($0, $1)', + python: '__kern_string_greater_than_or_equal($0, $1)', + }, + fixtures: [ + { args: ['abc', 'abc'], returns: true }, + { args: ['abc', 'abd'], returns: false }, + ], + review: { + summary: 'Strict string greater-than-or-equal comparison using KERN portable code-point ordering.', + graph: ['String', 'Boolean', 'strict', 'portable', 'unicode-code-point-order'], + }, + }, + { + id: 'String.toString', + kind: 'coercion', + args: ['String'], + returns: 'String', + lowers: { + kern: 'String.toString($0)', + ts: '__kernStringToString($0)', + python: '__kern_string_to_string($0)', + }, + fixtures: [ + { args: ['kern'], returns: 'kern' }, + { args: [''], returns: '' }, + ], + review: { + summary: 'String identity coercion.', + graph: ['String', 'portable'], + }, + }, + ], +} as const satisfies CoreTypeContract; diff --git a/packages/core/src/core-runtime/contract-adapter.ts b/packages/core/src/core-runtime/contract-adapter.ts new file mode 100644 index 00000000..226c6343 --- /dev/null +++ b/packages/core/src/core-runtime/contract-adapter.ts @@ -0,0 +1,93 @@ +import { + CORE_FIXTURE_FUNCTION, + CORE_FIXTURE_UNDEFINED, + type CoreFixtureValue, + isCoreFixtureFunction, + isCoreFixtureUndefined, +} from '../core-contracts/index.js'; +import type { KernValue } from './index.js'; +import { brandValue } from './value-brand.js'; + +export class CoreRuntimeContractAdapterError extends Error { + constructor(message: string) { + super(message); + this.name = 'CoreRuntimeContractAdapterError'; + } +} + +export function kernValueToCoreFixtureValue(value: KernValue): CoreFixtureValue { + switch (value.kind) { + case 'null': + return null; + case 'undefined': + return CORE_FIXTURE_UNDEFINED; + case 'boolean': + case 'number': + case 'string': + return value.value; + case 'array': + return value.items.map(kernValueToCoreFixtureValue); + case 'record': + if (isReservedFixtureSentinelRecord(value.entries)) { + throw new CoreRuntimeContractAdapterError( + 'KERN record value uses reserved core fixture sentinel shape: __kernFixture.', + ); + } + return Object.fromEntries( + Object.entries(value.entries).map(([key, entry]) => [key, kernValueToCoreFixtureValue(entry)]), + ); + case 'function': + case 'builtin': + case 'class': + case 'bound-method': + case 'super': + return CORE_FIXTURE_FUNCTION; + case 'instance': + if (isReservedFixtureSentinelRecord(value.fields)) { + throw new CoreRuntimeContractAdapterError( + 'KERN instance value uses reserved core fixture sentinel shape: __kernFixture.', + ); + } + return Object.fromEntries( + Object.entries(value.fields).map(([key, entry]) => [key, kernValueToCoreFixtureValue(entry)]), + ); + } +} + +export function coreFixtureValueToKernValue(value: CoreFixtureValue): KernValue { + if (value === null) return brandValue({ kind: 'null' }); + if (isCoreFixtureUndefined(value)) return brandValue({ kind: 'undefined' }); + if (isCoreFixtureFunction(value)) { + throw new CoreRuntimeContractAdapterError( + 'Core Function fixture references cannot be materialized as runtime code.', + ); + } + switch (typeof value) { + case 'boolean': + return brandValue({ kind: 'boolean', value }); + case 'number': + return brandValue({ kind: 'number', value }); + case 'string': + return brandValue({ kind: 'string', value }); + case 'object': { + if (Array.isArray(value)) { + return brandValue({ kind: 'array', items: value.map(coreFixtureValueToKernValue) }); + } + const entries = Object.create(null) as Record; + for (const [key, entry] of Object.entries(value)) entries[key] = coreFixtureValueToKernValue(entry); + return brandValue({ kind: 'record', entries }); + } + } +} + +export function roundTripKernContractDataValue(value: KernValue): KernValue { + return coreFixtureValueToKernValue(kernValueToCoreFixtureValue(value)); +} + +function isReservedFixtureSentinelRecord(entries: Record): boolean { + return ( + Object.keys(entries).length === 1 && + entries.__kernFixture?.kind === 'string' && + (entries.__kernFixture.value === 'Undefined' || entries.__kernFixture.value === 'Function') + ); +} diff --git a/packages/core/src/core-runtime/index.ts b/packages/core/src/core-runtime/index.ts index 3498569b..afc7f74d 100644 --- a/packages/core/src/core-runtime/index.ts +++ b/packages/core/src/core-runtime/index.ts @@ -1,9 +1,20 @@ +import { + CORE_TYPE_CONTRACTS, + CoreContractEvaluationError, + type CoreFixtureValue, + evaluateCoreContractOperation, +} from '../core-contracts/index.js'; import { parseExpression } from '../parser-expression.js'; import { splitPortableExpressionList } from '../portable-expression-list.js'; import type { IRNode } from '../types.js'; import type { ValueIR } from '../value-ir.js'; +import { + CoreRuntimeContractAdapterError, + coreFixtureValueToKernValue, + kernValueToCoreFixtureValue, +} from './contract-adapter.js'; +import { brandValue, KERN_VALUE_BRAND } from './value-brand.js'; -const KERN_VALUE_BRAND: unique symbol = Symbol('KERN core runtime value'); const INTEGER_INDEX_RE = /^(0|[1-9]\d*)$/; export type KernValue = @@ -15,7 +26,11 @@ export type KernValue = | { kind: 'array'; items: KernValue[] } | { kind: 'record'; entries: Record } | KernFunctionValue - | KernBuiltinValue; + | KernBuiltinValue + | KernClassValue + | KernInstanceValue + | KernBoundMethodValue + | KernSuperValue; export interface KernFunctionValue { kind: 'function'; @@ -31,6 +46,35 @@ export interface KernBuiltinValue { call: (args: KernValue[]) => KernValue; } +export interface KernClassValue { + kind: 'class'; + name: string; + node: IRNode; + env: CoreRuntimeEnv; +} + +export interface KernInstanceValue { + kind: 'instance'; + classValue: KernClassValue; + fields: Record; + initializedClasses: Set; +} + +export interface KernBoundMethodValue { + kind: 'bound-method'; + name: string; + receiver: KernInstanceValue; + methodNode: IRNode; + ownerClass: KernClassValue; +} + +export interface KernSuperValue { + kind: 'super'; + receiver: KernInstanceValue; + ownerClass: KernClassValue; + mode: 'constructor' | 'method'; +} + export interface RuntimeParam { name: string; type?: string; @@ -60,6 +104,15 @@ export class CoreRuntimeEnv { return value; } + assign(name: string, value: KernValue): KernValue { + if (this.bindings.has(name)) { + this.bindings.set(name, value); + return value; + } + if (this.parent) return this.parent.assign(name, value); + throw new Error(`KERN core runtime binding not found: ${name}`); + } + lookup(name: string): KernValue { if (this.bindings.has(name)) return this.bindings.get(name) ?? kUndefined(); if (this.parent) return this.parent.lookup(name); @@ -140,9 +193,15 @@ export function toHostValue(value: KernValue | undefined): unknown { return value.items.map(toHostValue); case 'record': return Object.fromEntries(Object.entries(value.entries).map(([key, entry]) => [key, toHostValue(entry)])); + case 'instance': + return Object.fromEntries(Object.entries(value.fields).map(([key, entry]) => [key, toHostValue(entry)])); case 'function': case 'builtin': + case 'class': + case 'bound-method': return `[KERN ${value.kind}${value.name ? ` ${value.name}` : ''}]`; + case 'super': + return `[KERN super ${value.ownerClass.name}]`; } } @@ -161,6 +220,10 @@ export function kernTruthy(value: KernValue): boolean { case 'record': case 'function': case 'builtin': + case 'class': + case 'instance': + case 'bound-method': + case 'super': return true; } } @@ -228,6 +291,18 @@ function executeNode(node: IRNode, env: CoreRuntimeEnv): CoreCompletion { env.define(requiredString(node.props?.name, 'fn name='), fn); return { kind: 'normal', value: kUndefined() }; } + case 'class': { + const klass = makeClass(node, env); + env.define(klass.name, klass); + return { kind: 'normal', value: kUndefined() }; + } + case 'assign': + executeAssign(node, env); + return { kind: 'normal', value: kUndefined() }; + case 'do': { + evalCoreExpression(unwrapExpr(node.props?.value, 'do value='), env); + return { kind: 'normal', value: kUndefined() }; + } case 'coalesce': case 'firstDefined': return executeCoalesce(node, env); @@ -329,6 +404,8 @@ function evalValueIR(node: ValueIR, env: CoreRuntimeEnv): KernValue { return evalCall(node, env); case 'lambda': throw new Error('KERN core runtime lambda expressions are not supported in the first runtime slice.'); + case 'new': + return evalNew(node, env); default: throw new Error(`KERN core runtime unsupported expression kind: ${node.kind}`); } @@ -350,10 +427,13 @@ function evalObjectLiteral(node: Extract, env: C function evalUnary(node: Extract, env: CoreRuntimeEnv): KernValue { const arg = evalValueIR(node.argument, env); - if (node.op === '!') return kBoolean(!kernTruthy(arg)); + if (node.op === '!') { + if (arg.kind !== 'boolean') throw new Error('KERN core runtime unary ! requires a boolean.'); + return dispatchCoreContractOperation('Boolean.not', [arg.value]); + } if (node.op === '-' || node.op === '+') { if (arg.kind !== 'number') throw new Error(`KERN core runtime unary ${node.op} requires a number.`); - return kNumber(node.op === '-' ? -arg.value : arg.value); + return node.op === '-' ? dispatchCoreContractOperation('Number.negate', [arg.value]) : arg; } throw new Error(`KERN core runtime unsupported unary operator: ${node.op}`); } @@ -376,8 +456,12 @@ function evalBinary(node: Extract, env: CoreRuntime const right = evalValueIR(node.right, env); switch (node.op) { case '+': - if (left.kind === 'number' && right.kind === 'number') return kNumber(left.value + right.value); - if (left.kind === 'string' && right.kind === 'string') return kString(left.value + right.value); + if (left.kind === 'number' && right.kind === 'number') { + return dispatchCoreContractOperation('Number.add', [left.value, right.value]); + } + if (left.kind === 'string' && right.kind === 'string') { + return dispatchCoreContractOperation('String.concat', [left.value, right.value]); + } throw new Error('KERN core runtime + requires two numbers or two strings.'); case '-': case '*': @@ -404,21 +488,34 @@ function evalNumberBinary(op: string, left: KernValue, right: KernValue): KernVa if (left.kind !== 'number' || right.kind !== 'number') { throw new Error(`KERN core runtime ${op} requires two numbers.`); } - if (op === '-') return kNumber(left.value - right.value); - if (op === '*') return kNumber(left.value * right.value); - if (right.value === 0 && (op === '/' || op === '%')) throw new Error(`KERN core runtime ${op} division by zero.`); - if (op === '/') return kNumber(left.value / right.value); - return kNumber(left.value % right.value); + switch (op) { + case '-': + return dispatchCoreContractOperation('Number.subtract', [left.value, right.value]); + case '*': + return dispatchCoreContractOperation('Number.multiply', [left.value, right.value]); + case '/': + return dispatchCoreContractOperation('Number.divide', [left.value, right.value]); + case '%': + return dispatchCoreContractOperation('Number.remainder', [left.value, right.value]); + default: + throw new Error(`KERN core runtime unsupported numeric operator: ${op}`); + } } function evalOrderedComparison(op: string, left: KernValue, right: KernValue): KernValue { if (!((left.kind === 'number' && right.kind === 'number') || (left.kind === 'string' && right.kind === 'string'))) { throw new Error(`KERN core runtime ${op} requires same-kind number or string operands.`); } - if (op === '<') return kBoolean(left.value < right.value); - if (op === '<=') return kBoolean(left.value <= right.value); - if (op === '>') return kBoolean(left.value > right.value); - return kBoolean(left.value >= right.value); + if (left.kind === 'number' && right.kind === 'number') { + if (op === '<') return dispatchCoreContractOperation('Number.lessThan', [left.value, right.value]); + if (op === '<=') return dispatchCoreContractOperation('Number.lessThanOrEqual', [left.value, right.value]); + if (op === '>') return dispatchCoreContractOperation('Number.greaterThan', [left.value, right.value]); + return dispatchCoreContractOperation('Number.greaterThanOrEqual', [left.value, right.value]); + } + if (op === '<') return dispatchCoreContractOperation('String.lessThan', [left.value, right.value]); + if (op === '<=') return dispatchCoreContractOperation('String.lessThanOrEqual', [left.value, right.value]); + if (op === '>') return dispatchCoreContractOperation('String.greaterThan', [left.value, right.value]); + return dispatchCoreContractOperation('String.greaterThanOrEqual', [left.value, right.value]); } function evalMember(node: Extract, env: CoreRuntimeEnv): KernValue { @@ -428,10 +525,16 @@ function evalMember(node: Extract, env: CoreRuntime throw new Error(`KERN core runtime cannot read .${node.property} from ${object.kind}.`); } if (object.kind === 'record') { - return Object.hasOwn(object.entries, node.property) ? object.entries[node.property] : kUndefined(); + return evalRecordGet(object, node.property); } - if (object.kind === 'array' && node.property === 'length') return kNumber(object.items.length); - if (object.kind === 'string' && node.property === 'length') return kNumber(object.value.length); + if (object.kind === 'instance') return evalInstanceMember(object, node.property); + if (object.kind === 'super') return evalSuperMember(object, node.property); + if (object.kind === 'class') return evalClassMember(object, node.property); + if (object.kind === 'array' && node.property === 'length') { + return kNumber(object.items.length); + } + if (object.kind === 'string') return evalStringMember(object, node.property); + if (object.kind === 'boolean') return evalBooleanMember(object, node.property); return kUndefined(); } @@ -444,23 +547,115 @@ function evalIndex(node: Extract, env: CoreRuntimeEn const index = evalValueIR(node.index, env); if (object.kind === 'array') { if (index.kind !== 'number') throw new Error('KERN core runtime array index must be a number.'); - return object.items[index.value] ?? kUndefined(); + return evalListIndex(object, index.value); } if (object.kind === 'record' || object.kind === 'string') { if (index.kind !== 'string' && index.kind !== 'number') { throw new Error('KERN core runtime record/string index must be a string or number.'); } const key = String(index.value); - if (object.kind === 'record') return Object.hasOwn(object.entries, key) ? object.entries[key] : kUndefined(); + if (object.kind === 'record') return evalRecordGet(object, key); const charIndex = index.kind === 'number' ? index.value : INTEGER_INDEX_RE.test(index.value) ? Number(index.value) : NaN; - return Number.isInteger(charIndex) && charIndex >= 0 && charIndex < object.value.length - ? kString(object.value[charIndex] ?? '') - : kUndefined(); + if (!Number.isFinite(charIndex) && index.kind !== 'number') return kUndefined(); + return dispatchCoreContractOperation('String.index', [object.value, charIndex]); } return kUndefined(); } +function evalStringMember(object: Extract, property: string): KernValue { + if (property === 'length') return dispatchCoreContractOperation('String.length', [object.value]); + const operation = stringMemberOperation(property); + if (!operation) return kUndefined(); + return boundCoreContractOperation(`String.${operation}`, [object.value]); +} + +function evalBooleanMember(object: Extract, property: string): KernValue { + const operation = booleanMemberOperation(property); + if (!operation) return kUndefined(); + return boundCoreContractOperation(`Boolean.${operation}`, [object.value]); +} + +function stringMemberOperation(property: string): string | undefined { + switch (property) { + case 'includes': + case 'index': + case 'startsWith': + case 'endsWith': + case 'slice': + case 'trim': + case 'lower': + case 'upper': + case 'concat': + case 'equals': + case 'toString': + return property; + default: + return undefined; + } +} + +function booleanMemberOperation(property: string): string | undefined { + switch (property) { + case 'not': + case 'and': + case 'or': + case 'equals': + case 'toString': + return property; + default: + return undefined; + } +} + +function boundCoreContractOperation(operationId: string, receiverArgs: readonly CoreFixtureValue[]): KernValue { + return brandValue({ + kind: 'builtin', + name: operationId, + call: (args: KernValue[]) => { + try { + return dispatchCoreContractOperation(operationId, [...receiverArgs, ...args.map(kernValueToCoreFixtureValue)]); + } catch (error) { + if (error instanceof CoreRuntimeContractAdapterError) { + throw new CoreContractEvaluationError('strict-type', coreOperationStrictTypeMessage(operationId)); + } + throw error; + } + }, + }); +} + +function dispatchCoreContractOperation(operationId: string, args: readonly CoreFixtureValue[]): KernValue { + return coreFixtureValueToKernValue(evaluateCoreContractOperation(operationId, args)); +} + +function evalListIndex(object: Extract, index: number): KernValue { + if (!Number.isFinite(index) || !Number.isInteger(index) || index < 0 || index >= object.items.length) { + return kUndefined(); + } + return object.items[index] ?? kUndefined(); +} + +function evalRecordGet(object: Extract, key: string): KernValue { + if (!Object.hasOwn(object.entries, key)) + return dispatchCoreContractOperation('Record.get', [recordShapeFixture(object), key]); + return object.entries[key] ?? kUndefined(); +} + +function recordShapeFixture(object: Extract): Record { + const shape = Object.create(null) as Record; + for (const key of Object.keys(object.entries)) shape[key] = null; + return shape; +} + +function coreOperationStrictTypeMessage(operationId: string): string { + for (const contract of Object.values(CORE_TYPE_CONTRACTS.types)) { + const operation = contract.operations.find((operation) => operation.id === operationId); + if (operation) return `${operationId} expects ${operation.args.join(', ')}.`; + } + return `${operationId} received an unsupported runtime value.`; +} + function evalCall(node: Extract, env: CoreRuntimeEnv): KernValue { const callee = evalValueIR(node.callee, env); if (isNullish(callee)) { @@ -470,9 +665,250 @@ function evalCall(node: Extract, env: CoreRuntimeEnv) const args = node.args.map((arg) => evalValueIR(arg, env)); if (callee.kind === 'builtin') return callee.call(args); if (callee.kind === 'function') return callFunctionValue(callee, args).value; + if (callee.kind === 'class') return constructClassValue(callee, args); + if (callee.kind === 'bound-method') return callBoundMethodValue(callee, args).value; + if (callee.kind === 'super') return callSuperConstructor(callee, args); throw new Error(`KERN core runtime cannot call ${callee.kind}.`); } +function evalNew(node: Extract, env: CoreRuntimeEnv): KernValue { + if (node.argument.kind === 'member') { + return evalValueIR({ ...node.argument, object: { kind: 'new', argument: node.argument.object } as ValueIR }, env); + } + if (node.argument.kind === 'index') { + return evalValueIR({ ...node.argument, object: { kind: 'new', argument: node.argument.object } as ValueIR }, env); + } + if ( + node.argument.kind === 'call' && + (node.argument.callee.kind === 'member' || node.argument.callee.kind === 'index') + ) { + return evalValueIR( + { + ...node.argument, + callee: { + ...node.argument.callee, + object: { kind: 'new', argument: node.argument.callee.object } as ValueIR, + }, + }, + env, + ); + } + if (node.argument.kind !== 'call') throw new Error('KERN core runtime new expects a constructor call.'); + const callee = evalValueIR(node.argument.callee, env); + if (callee.kind !== 'class') throw new Error('KERN core runtime new expects a class value.'); + return constructClassValue( + callee, + node.argument.args.map((arg) => evalValueIR(arg, env)), + ); +} + +function makeClass(node: IRNode, env: CoreRuntimeEnv): KernClassValue { + if (node.type !== 'class') throw new Error('KERN core runtime makeClass expects a class node.'); + return brandValue({ + kind: 'class', + name: requiredString(node.props?.name, 'class name='), + node, + env, + }); +} + +function constructClassValue(klass: KernClassValue, args: readonly KernValue[]): KernInstanceValue { + const instance = brandValue({ + kind: 'instance' as const, + classValue: klass, + fields: createRecordEntries(), + initializedClasses: new Set(), + }); + initializeClassLayer(instance, klass, args, true); + return instance; +} + +function initializeClassLayer( + instance: KernInstanceValue, + klass: KernClassValue, + args: readonly KernValue[], + receivesConstructorArgs: boolean, +): void { + if (instance.initializedClasses.has(klass.name)) { + throw new Error(`KERN core runtime class already initialized: ${klass.name}`); + } + const base = resolveBaseClass(klass); + const ctor = firstRuntimeChild(klass.node, 'constructor'); + const ctorCallsSuper = Boolean(base && ctor && constructorCallsSuper(ctor)); + if (base && !ctorCallsSuper) initializeClassLayer(instance, base, [], false); + if (!ctorCallsSuper) initializeClassFields(instance, klass); + if (!ctor) { + if (receivesConstructorArgs && args.length > 0) { + throw new Error(`KERN core runtime class ${klass.name} has no constructor.`); + } + instance.initializedClasses.add(klass.name); + return; + } + callClassMemberBody(ctor, klass, instance, receivesConstructorArgs ? args : []).value; + if (base && ctorCallsSuper && !instance.initializedClasses.has(base.name)) { + throw new Error(`KERN core runtime constructor ${klass.name} must call super(...).`); + } + instance.initializedClasses.add(klass.name); +} + +function initializeClassFields(instance: KernInstanceValue, klass: KernClassValue): void { + for (const field of runtimeChildNodes(klass.node, 'field')) { + const name = requiredString(field.props?.name, 'field name='); + if (field.props?.static === true || field.props?.static === 'true') continue; + const value = + Object.hasOwn(field.props ?? {}, 'value') || Object.hasOwn(field.props ?? {}, 'default') + ? evalCoreExpression(runtimeFieldInitializerExpr(field), classThisEnv(klass, instance)) + : kUndefined(); + instance.fields[name] = value; + } +} + +function runtimeFieldInitializerExpr(node: IRNode): string { + const propName = Object.hasOwn(node.props ?? {}, 'value') ? 'value' : 'default'; + const rawValue = propName === 'value' ? node.props?.value : node.props?.default; + if (typeof rawValue === 'string' && (node.__quotedProps ?? []).includes(propName)) return JSON.stringify(rawValue); + return unwrapExpr(rawValue, 'field value='); +} + +function evalInstanceMember(object: KernInstanceValue, property: string): KernValue { + if (Object.hasOwn(object.fields, property)) return object.fields[property] ?? kUndefined(); + const getter = findClassMember(object.classValue, 'getter', property); + if (getter) return callClassMemberBody(getter.node, getter.owner, object, []).value; + const method = findClassMember(object.classValue, 'method', property); + if (method) { + return brandValue({ + kind: 'bound-method', + name: `${object.classValue.name}.${property}`, + receiver: object, + methodNode: method.node, + ownerClass: method.owner, + }); + } + return kUndefined(); +} + +function evalSuperMember(object: KernSuperValue, property: string): KernValue { + const base = resolveBaseClass(object.ownerClass); + if (!base) return kUndefined(); + const getter = findClassMember(base, 'getter', property); + if (getter) return callClassMemberBody(getter.node, getter.owner, object.receiver, []).value; + const method = findClassMember(base, 'method', property); + if (method) { + return brandValue({ + kind: 'bound-method', + name: `${base.name}.${property}`, + receiver: object.receiver, + methodNode: method.node, + ownerClass: method.owner, + }); + } + if (Object.hasOwn(object.receiver.fields, property)) return object.receiver.fields[property] ?? kUndefined(); + return kUndefined(); +} + +function evalClassMember(object: KernClassValue, property: string): KernValue { + const method = findClassMember(object, 'method', property, true); + if (method) { + return brandValue({ + kind: 'builtin', + name: `${object.name}.${property}`, + call: (args) => callClassMemberBody(method.node, method.owner, undefined, args).value, + }); + } + return kUndefined(); +} + +function callBoundMethodValue( + method: KernBoundMethodValue, + args: readonly KernValue[], +): { value: KernValue; env: CoreRuntimeEnv } { + return callClassMemberBody(method.methodNode, method.ownerClass, method.receiver, args); +} + +function callSuperConstructor(value: KernSuperValue, args: readonly KernValue[]): KernValue { + if (value.mode !== 'constructor') { + throw new Error('KERN core runtime super(...) is only valid inside a constructor.'); + } + const base = resolveBaseClass(value.ownerClass); + if (!base) throw new Error(`KERN core runtime class ${value.ownerClass.name} has no base class.`); + initializeClassLayer(value.receiver, base, args, true); + initializeClassFields(value.receiver, value.ownerClass); + return value.receiver; +} + +function callClassMemberBody( + memberNode: IRNode, + ownerClass: KernClassValue, + receiver: KernInstanceValue | undefined, + args: readonly KernValue[], +): { value: KernValue; env: CoreRuntimeEnv } { + const callEnv = ownerClass.env.child(); + if (receiver) { + callEnv.define('this', receiver); + if (resolveBaseClass(ownerClass)) { + callEnv.define( + 'super', + brandValue({ + kind: 'super', + receiver, + ownerClass, + mode: memberNode.type === 'constructor' ? 'constructor' : 'method', + }), + ); + } + } + const params = runtimeParams(memberNode); + validateRuntimeArgs(`${ownerClass.name}.${memberNode.type}`, params, args); + params.forEach((param, index) => { + const provided = args[index]; + const value = + provided === undefined || (provided.kind === 'undefined' && param.defaultExpr) + ? param.defaultExpr + ? evalCoreExpression(param.defaultExpr, callEnv) + : kUndefined() + : provided; + callEnv.define(param.name, value); + }); + const completion = executeSequence(runtimeFunctionBody(memberNode), callEnv); + return { value: completion.value, env: callEnv }; +} + +function findClassMember( + klass: KernClassValue, + type: 'method' | 'getter', + name: string, + staticOnly = false, +): { node: IRNode; owner: KernClassValue } | undefined { + for (const child of klass.node.children ?? []) { + if (child.type !== type || child.props?.name !== name) continue; + const isStatic = child.props?.static === true || child.props?.static === 'true'; + if (staticOnly !== isStatic) continue; + return { node: child, owner: klass }; + } + const base = resolveBaseClass(klass); + return base ? findClassMember(base, type, name, staticOnly) : undefined; +} + +function resolveBaseClass(klass: KernClassValue): KernClassValue | undefined { + const baseName = classBaseName(klass.node.props?.extends); + if (!baseName) return undefined; + const base = klass.env.lookup(baseName); + if (base.kind !== 'class') throw new Error(`KERN core runtime base class is not a class: ${baseName}`); + return base; +} + +function classBaseName(value: unknown): string | undefined { + if (typeof value !== 'string' || !value.trim()) return undefined; + const match = /^([A-Za-z_$][\w$]*)/.exec(value.trim()); + return match?.[1]; +} + +function classThisEnv(klass: KernClassValue, receiver: KernInstanceValue): CoreRuntimeEnv { + const env = klass.env.child(); + env.define('this', receiver); + return env; +} + function makeFunction(node: IRNode, env: CoreRuntimeEnv): KernFunctionValue { return brandValue({ kind: 'function', @@ -488,6 +924,7 @@ function callFunctionValue( args: readonly KernValue[], ): { value: KernValue; env: CoreRuntimeEnv } { const callEnv = fn.env.child(); + validateRuntimeArgs(fn.name ?? 'anonymous function', fn.params, args); fn.params.forEach((param, index) => { const provided = args[index]; const value = @@ -502,14 +939,144 @@ function callFunctionValue( return { value: completion.value, env: callEnv }; } +function validateRuntimeArgs(label: string, params: readonly RuntimeParam[], args: readonly KernValue[]): void { + if (args.length > params.length) { + throw new Error(`KERN core runtime ${label} received too many arguments.`); + } + params.forEach((param, index) => { + if (index >= args.length && !param.defaultExpr) { + throw new Error(`KERN core runtime ${label} missing required argument: ${param.name}.`); + } + }); +} + +function executeAssign(node: IRNode, env: CoreRuntimeEnv): void { + const target = requiredString(node.props?.target, 'assign target='); + if (Object.hasOwn(node.props ?? {}, 'op') && node.props?.op !== '=') { + throw new Error('KERN core runtime assign supports only direct assignment in this slice.'); + } + const value = evalCoreExpression(unwrapExpr(node.props?.value, 'assign value='), env); + assignRuntimeTarget(target, value, env); +} + +function assignRuntimeTarget(target: string, value: KernValue, env: CoreRuntimeEnv): void { + const parsed = parseExpression(target); + if (parsed.kind === 'ident') { + env.assign(parsed.name, value); + return; + } + if (parsed.kind === 'member') { + const object = evalValueIR(parsed.object, env); + if (object.kind === 'instance') { + object.fields[parsed.property] = value; + return; + } + if (object.kind === 'record') { + object.entries[parsed.property] = value; + return; + } + throw new Error(`KERN core runtime cannot assign member on ${object.kind}.`); + } + if (parsed.kind === 'index') { + const object = evalValueIR(parsed.object, env); + const index = evalValueIR(parsed.index, env); + if (object.kind === 'array') { + if (index.kind !== 'number' || !Number.isInteger(index.value) || index.value < 0) { + throw new Error('KERN core runtime array assignment index must be a non-negative integer.'); + } + object.items[index.value] = value; + return; + } + if (object.kind === 'record') { + if (index.kind !== 'string') throw new Error('KERN core runtime record assignment key must be a string.'); + object.entries[index.value] = value; + return; + } + throw new Error(`KERN core runtime cannot assign index on ${object.kind}.`); + } + throw new Error('KERN core runtime assign target must be an identifier, member, or index expression.'); +} + function runtimeFunctionBody(node: IRNode): IRNode[] { const handler = node.children?.find((child) => child.type === 'handler'); const body = handler ? (handler.children ?? []) : (node.children ?? []); return body.filter((child) => child.type !== 'param' && child.type !== 'decorator'); } +function firstRuntimeChild(node: IRNode, type: string): IRNode | undefined { + return node.children?.find((child) => child.type === type); +} + +function runtimeChildNodes(node: IRNode, type: string): IRNode[] { + return node.children?.filter((child) => child.type === type) ?? []; +} + +function constructorCallsSuper(node: IRNode): boolean { + return runtimeFunctionBody(node).some(statementCallsSuper); +} + +function statementCallsSuper(node: IRNode): boolean { + const rawValue = node.type === 'do' ? node.props?.value : undefined; + if (rawValue !== undefined && expressionCallsSuper(rawValue)) return true; + return (node.children ?? []).some(statementCallsSuper); +} + +function expressionCallsSuper(value: unknown): boolean { + try { + return valueIRCallsSuper(parseExpression(unwrapExpr(value, 'super expression'))); + } catch { + return false; + } +} + +function valueIRCallsSuper(value: ValueIR): boolean { + switch (value.kind) { + case 'call': + return ( + (value.callee.kind === 'ident' && value.callee.name === 'super') || + valueIRCallsSuper(value.callee) || + value.args.some(valueIRCallsSuper) + ); + case 'member': + return valueIRCallsSuper(value.object); + case 'index': + return valueIRCallsSuper(value.object) || valueIRCallsSuper(value.index); + case 'tmplLit': + return value.expressions.some(valueIRCallsSuper); + case 'arrayLit': + return value.items.some(valueIRCallsSuper); + case 'objectLit': + return value.entries.some((entry) => + 'kind' in entry ? valueIRCallsSuper(entry.argument) : valueIRCallsSuper(entry.value), + ); + case 'unary': + case 'await': + case 'new': + case 'spread': + case 'propagate': + return valueIRCallsSuper(value.argument); + case 'typeAssert': + case 'nonNull': + return valueIRCallsSuper(value.expression); + case 'binary': + return valueIRCallsSuper(value.left) || valueIRCallsSuper(value.right); + case 'conditional': + return valueIRCallsSuper(value.test) || valueIRCallsSuper(value.consequent) || valueIRCallsSuper(value.alternate); + case 'lambda': + return valueIRCallsSuper(value.body); + case 'numLit': + case 'strLit': + case 'boolLit': + case 'nullLit': + case 'undefLit': + case 'regexLit': + case 'ident': + return false; + } +} + function runtimeChildren(node: IRNode): IRNode[] { - if (node.type === 'handler' || node.type === '__block') return node.children ?? []; + if (node.type === 'document' || node.type === 'handler' || node.type === '__block') return node.children ?? []; return [node]; } @@ -603,6 +1170,10 @@ function kernEquals(left: KernValue, right: KernValue): boolean { } case 'function': case 'builtin': + case 'class': + case 'instance': + case 'bound-method': + case 'super': return left === right; } } @@ -656,16 +1227,46 @@ function isKernValue(value: unknown): value is KernValue { typeof value.name === 'string' && typeof value.call === 'function' ); + case 'class': + return ( + hasOnlyKeys(value, ['kind', 'name', 'node', 'env']) && + typeof value.name === 'string' && + isPlainRecord(value.node) && + value.env instanceof CoreRuntimeEnv + ); + case 'instance': + return ( + hasOnlyKeys(value, ['kind', 'classValue', 'fields', 'initializedClasses']) && + isKernValue(value.classValue) && + value.classValue.kind === 'class' && + isPlainRecord(value.fields) && + Object.values(value.fields).every(isKernValue) && + value.initializedClasses instanceof Set + ); + case 'bound-method': + return ( + hasOnlyKeys(value, ['kind', 'name', 'receiver', 'methodNode', 'ownerClass']) && + typeof value.name === 'string' && + isKernValue(value.receiver) && + value.receiver.kind === 'instance' && + isPlainRecord(value.methodNode) && + isKernValue(value.ownerClass) && + value.ownerClass.kind === 'class' + ); + case 'super': + return ( + hasOnlyKeys(value, ['kind', 'receiver', 'ownerClass', 'mode']) && + isKernValue(value.receiver) && + value.receiver.kind === 'instance' && + isKernValue(value.ownerClass) && + value.ownerClass.kind === 'class' && + (value.mode === 'constructor' || value.mode === 'method') + ); default: return false; } } -function brandValue(value: T): T { - Object.defineProperty(value, KERN_VALUE_BRAND, { value: true }); - return value; -} - function hasArrayHoles(value: readonly unknown[]): boolean { for (let i = 0; i < value.length; i += 1) { if (!Object.hasOwn(value, i)) return true; diff --git a/packages/core/src/core-runtime/value-brand.ts b/packages/core/src/core-runtime/value-brand.ts new file mode 100644 index 00000000..cc0f1a76 --- /dev/null +++ b/packages/core/src/core-runtime/value-brand.ts @@ -0,0 +1,6 @@ +export const KERN_VALUE_BRAND: unique symbol = Symbol('KERN core runtime value'); + +export function brandValue(value: T): T { + Object.defineProperty(value, KERN_VALUE_BRAND, { value: true }); + return value; +} diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 6cf5b14c..6ff3e81d 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -140,6 +140,46 @@ export { VALID_STRUCTURES, VALID_TARGETS, } from './config.js'; +export type { + CoreFixture, + CoreFixtureError, + CoreFixtureValue, + CoreGraphEdge, + CoreLowerings, + CoreOperation, + CoreOperationKind, + CoreOperationReturns, + CoreTypeContract, + CoreTypeContractRegistry, + CoreTypeKind, + CoreTypeName, +} from './core-contracts/index.js'; +export { + BOOLEAN_CONTRACT, + CORE_FIXTURE_FUNCTION, + CORE_FIXTURE_UNDEFINED, + CORE_TYPE_CONTRACTS, + CORE_TYPE_NAMES, + CoreContractEvaluationError, + contractToGraphEdges, + coreFixtureValueType, + evaluateCoreContractOperation, + FUNCTION_CONTRACT, + isCoreFixtureFunction, + isCoreFixtureUndefined, + LIST_CONTRACT, + NULL_CONTRACT, + NUMBER_CONTRACT, + RECORD_CONTRACT, + STRING_CONTRACT, + UNDEFINED_CONTRACT, +} from './core-contracts/index.js'; +export { + CoreRuntimeContractAdapterError, + coreFixtureValueToKernValue, + kernValueToCoreFixtureValue, + roundTripKernContractDataValue, +} from './core-runtime/contract-adapter.js'; export type { CoreCompletion, CoreRuntimeResult, diff --git a/packages/core/tests/core-contracts.test.ts b/packages/core/tests/core-contracts.test.ts new file mode 100644 index 00000000..c3455cbd --- /dev/null +++ b/packages/core/tests/core-contracts.test.ts @@ -0,0 +1,341 @@ +import type { CoreFixtureValue, CoreOperation } from '../src/core-contracts/index.js'; +import { + BOOLEAN_CONTRACT, + CORE_FIXTURE_FUNCTION, + CORE_FIXTURE_UNDEFINED, + CORE_TYPE_CONTRACTS, + CORE_TYPE_NAMES, + CoreContractEvaluationError, + contractToGraphEdges, + coreFixtureValueType, + evaluateCoreContractOperation, + LIST_CONTRACT, + NUMBER_CONTRACT, + RECORD_CONTRACT, + STRING_CONTRACT, +} from '../src/core-contracts/index.js'; + +describe('core type contracts registry', () => { + it('exposes schemaVersion 1 and all builtin core types', () => { + expect(CORE_TYPE_CONTRACTS.schemaVersion).toBe(1); + expect(Object.keys(CORE_TYPE_CONTRACTS.types)).toEqual([...CORE_TYPE_NAMES]); + + for (const name of CORE_TYPE_NAMES) { + const contract = CORE_TYPE_CONTRACTS.types[name]; + expect(contract.name).toBe(name); + expect(contract.strict).toBe(true); + expect(Array.isArray(contract.operations)).toBe(true); + } + }); + + it('exposes behavior contracts for VM-backed core values', () => { + expect(BOOLEAN_CONTRACT.operations.map((operation) => operation.id)).toEqual([ + 'Boolean.not', + 'Boolean.and', + 'Boolean.or', + 'Boolean.equals', + 'Boolean.toString', + ]); + expect(STRING_CONTRACT.operations.map((operation) => operation.id)).toEqual([ + 'String.length', + 'String.index', + 'String.includes', + 'String.startsWith', + 'String.endsWith', + 'String.slice', + 'String.trim', + 'String.lower', + 'String.upper', + 'String.concat', + 'String.equals', + 'String.lessThan', + 'String.lessThanOrEqual', + 'String.greaterThan', + 'String.greaterThanOrEqual', + 'String.toString', + ]); + expect(NUMBER_CONTRACT.operations.map((operation) => operation.id)).toEqual([ + 'Number.negate', + 'Number.add', + 'Number.subtract', + 'Number.multiply', + 'Number.divide', + 'Number.remainder', + 'Number.lessThan', + 'Number.lessThanOrEqual', + 'Number.greaterThan', + 'Number.greaterThanOrEqual', + ]); + expect(LIST_CONTRACT.operations.map((operation) => operation.id)).toEqual(['List.length', 'List.index']); + expect(RECORD_CONTRACT.operations.map((operation) => operation.id)).toEqual(['Record.get']); + + expect(CORE_TYPE_CONTRACTS.types.Function.operations).toHaveLength(0); + expect(CORE_TYPE_CONTRACTS.types.Null.operations).toHaveLength(0); + expect(CORE_TYPE_CONTRACTS.types.Undefined.operations).toHaveLength(0); + }); + + it('uses registry-level schemaVersion instead of operation version suffixes', () => { + for (const contract of Object.values(CORE_TYPE_CONTRACTS.types)) { + for (const operation of contract.operations) { + expect(operation.id).not.toMatch(/@v\d+/); + } + } + }); + + it('keeps every operation graphable and fixture-backed', () => { + for (const contract of [BOOLEAN_CONTRACT, STRING_CONTRACT, NUMBER_CONTRACT, LIST_CONTRACT, RECORD_CONTRACT]) { + for (const operation of contract.operations) { + expect(operation.args[0]).toBe(contract.name); + expect(operation.fixtures.length).toBeGreaterThan(0); + expect(operation.review.summary.length).toBeGreaterThan(0); + expect(operation.review.graph).toContain(contract.name); + expect(operation.lowers?.kern).toBeTruthy(); + expect(operation.lowers?.ts).toBeTruthy(); + expect(operation.lowers?.python).toBeTruthy(); + } + } + }); +}); + +describe('core type contract fixtures', () => { + it('evaluates Boolean operation fixtures including strict type errors', () => { + for (const operation of BOOLEAN_CONTRACT.operations) { + expectOperationFixtures(operation); + } + }); + + it('evaluates String operation fixtures including strict type errors', () => { + for (const operation of STRING_CONTRACT.operations) { + expectOperationFixtures(operation); + } + }); + + it('evaluates Number, List, and Record operation fixtures including strict errors', () => { + for (const operation of [ + ...NUMBER_CONTRACT.operations, + ...LIST_CONTRACT.operations, + ...RECORD_CONTRACT.operations, + ]) { + expectOperationFixtures(operation); + } + }); + + it('pins explicitly rejected strict signatures', () => { + expectErrorFixture('Boolean.and', [true, 'true']); + expectErrorFixture('String.concat', ['count:', 2]); + expectErrorFixture('String.equals', ['kern', true]); + expectErrorFixture('Number.add', [2, '3']); + expectErrorFixture('List.index', [[10], '0']); + expectErrorFixture('Record.get', [{ x: 1 }, 0]); + }); + + it('classifies all schema-level fixture value kinds for future contracts', () => { + expect(coreFixtureValueType(null)).toBe('Null'); + expect(coreFixtureValueType(CORE_FIXTURE_UNDEFINED)).toBe('Undefined'); + expect(coreFixtureValueType(CORE_FIXTURE_FUNCTION)).toBe('Function'); + expect(coreFixtureValueType(JSON.parse(JSON.stringify(CORE_FIXTURE_UNDEFINED)))).toBe('Undefined'); + expect(coreFixtureValueType(JSON.parse(JSON.stringify(CORE_FIXTURE_FUNCTION)))).toBe('Function'); + expect(coreFixtureValueType(['x'])).toBe('List'); + expect(coreFixtureValueType({ key: 'value' })).toBe('Record'); + expect(coreFixtureValueType({ kind: 'Undefined' })).toBe('Record'); + expect(coreFixtureValueType({ error: 'strict-type', message: 'valid record value' })).toBe('Record'); + }); + + it('uses unambiguous fixture result keys instead of overloading record-shaped values', () => { + for (const operation of [...BOOLEAN_CONTRACT.operations, ...STRING_CONTRACT.operations]) { + for (const fixture of operation.fixtures) { + expect(Array.isArray(fixture)).toBe(false); + expect('args' in fixture).toBe(true); + expect('returns' in fixture !== 'throws' in fixture).toBe(true); + } + } + }); + + it('pins KERN-owned Unicode code-point string semantics', () => { + expect(evaluateCoreContractOperation('String.length', ['𐐷'])).toBe(1); + expect(evaluateCoreContractOperation('String.length', ['e\u0301'])).toBe(2); + expect(evaluateCoreContractOperation('String.index', ['a𐐷b', 1])).toBe('𐐷'); + expect(evaluateCoreContractOperation('String.index', ['a𐐷b', 3])).toEqual(CORE_FIXTURE_UNDEFINED); + expect(evaluateCoreContractOperation('String.slice', ['a𐐷b', 1, 2])).toBe('𐐷'); + expect(evaluateCoreContractOperation('String.slice', ['e\u0301x', 0, 2])).toBe('e\u0301'); + expect(evaluateCoreContractOperation('String.lessThan', ['a', '𐐷'])).toBe(true); + expect(evaluateCoreContractOperation('String.greaterThan', ['𐐷', 'z'])).toBe(true); + }); + + it('rejects non-finite Number values without storing them in exported fixture data', () => { + expect(() => evaluateCoreContractOperation('String.slice', ['abc', Number.POSITIVE_INFINITY, 2])).toThrow( + 'String.slice expects String, Number, Number.', + ); + expect(() => evaluateCoreContractOperation('Number.add', [Number.NaN, 1])).toThrow( + 'Number.add expects Number, Number.', + ); + expect(() => evaluateCoreContractOperation('Number.add', [1e308, 1e308])).toThrow( + 'Number.add result must be finite.', + ); + }); + + it('pins KERN Number and collection semantics', () => { + expect(evaluateCoreContractOperation('Number.divide', [5, 2])).toBe(2.5); + expect(() => evaluateCoreContractOperation('Number.divide', [1, 0])).toThrow('Number.divide division by zero.'); + expect(() => evaluateCoreContractOperation('Number.remainder', [1, 0])).toThrow( + 'Number.remainder division by zero.', + ); + expect(evaluateCoreContractOperation('Number.remainder', [-5, 2])).toBe(-1); + expect(evaluateCoreContractOperation('Number.remainder', [5, -2])).toBe(1); + expect(evaluateCoreContractOperation('List.length', [[1, 2, 3]])).toBe(3); + expect(evaluateCoreContractOperation('List.index', [[null], 0])).toBeNull(); + expect(evaluateCoreContractOperation('List.index', [[10, 20], 2])).toEqual(CORE_FIXTURE_UNDEFINED); + expect(evaluateCoreContractOperation('List.index', [[10, 20], -1])).toEqual(CORE_FIXTURE_UNDEFINED); + expect(evaluateCoreContractOperation('Record.get', [{ x: 1 }, 'x'])).toBe(1); + expect(evaluateCoreContractOperation('Record.get', [{ x: null }, 'x'])).toBeNull(); + expect(evaluateCoreContractOperation('Record.get', [{}, 'toString'])).toEqual(CORE_FIXTURE_UNDEFINED); + }); +}); + +describe('core type contract graph extraction', () => { + it('derives type, lowering, fixture, and tag edges for String.includes', () => { + const edges = contractToGraphEdges(STRING_CONTRACT); + + expect( + hasEdge(edges, { + from: 'String', + relation: 'includes(String)', + to: 'Boolean', + operation: 'String.includes', + }), + ).toBe(true); + expect( + hasEdge(edges, { + from: 'String.includes', + relation: 'lowers.ts', + to: '__kernStringIncludes($0, $1)', + operation: 'String.includes', + }), + ).toBe(true); + expect( + hasEdge(edges, { + from: 'String.includes', + relation: 'lowers.python', + to: '__kern_string_includes($0, $1)', + operation: 'String.includes', + }), + ).toBe(true); + expect( + hasEdge(edges, { + from: 'String.includes', + relation: 'fixture', + to: 'String.includes.fixture.0', + operation: 'String.includes', + index: 0, + }), + ).toBe(true); + }); + + it('derives a Boolean.not operation edge', () => { + expect( + hasEdge(contractToGraphEdges(BOOLEAN_CONTRACT), { + from: 'Boolean', + relation: 'not()', + to: 'Boolean', + operation: 'Boolean.not', + }), + ).toBe(true); + }); + + it('rejects operation ids that do not match the owning contract name', () => { + expect(() => + contractToGraphEdges({ + ...STRING_CONTRACT, + operations: [{ ...STRING_CONTRACT.operations[0], id: 'Boolean.length' }], + }), + ).toThrow('must be prefixed with String'); + }); +}); + +function expectOperationFixtures(operation: CoreOperation): void { + for (const fixture of operation.fixtures) { + if ('throws' in fixture) { + expect(() => evaluateCoreContractOperation(operation.id, fixture.args)).toThrow(fixture.throws.message); + try { + evaluateCoreContractOperation(operation.id, fixture.args); + } catch (error) { + expect(error).toBeInstanceOf(CoreContractEvaluationError); + expect((error as CoreContractEvaluationError).code).toBe(fixture.throws.code); + } + } else { + expect(evaluateCoreContractOperation(operation.id, fixture.args)).toEqual(fixture.returns); + } + } +} + +function expectErrorFixture(operationId: string, expectedArgs: readonly CoreFixtureValue[]): void { + const operation = [ + ...BOOLEAN_CONTRACT.operations, + ...STRING_CONTRACT.operations, + ...NUMBER_CONTRACT.operations, + ...LIST_CONTRACT.operations, + ...RECORD_CONTRACT.operations, + ].find((operation) => operation.id === operationId); + if (!operation) throw new Error(`Missing operation ${operationId}`); + expect( + operation.fixtures.some( + (fixture) => + sameFixtureValueList(fixture.args, expectedArgs) && + 'throws' in fixture && + fixture.throws.code === 'strict-type', + ), + ).toBe(true); +} + +function sameFixtureValue(left: CoreFixtureValue, right: CoreFixtureValue): boolean { + const leftKind = coreFixtureValueType(left); + if (leftKind !== coreFixtureValueType(right)) return false; + if (leftKind === 'Null' || leftKind === 'Undefined') return true; + if (leftKind === 'List') { + const leftArray = left as readonly CoreFixtureValue[]; + const rightArray = right as readonly CoreFixtureValue[]; + return ( + leftArray.length === rightArray.length && + leftArray.every((item, index) => sameFixtureValue(item, rightArray[index])) + ); + } + if (leftKind === 'Record') { + const leftRecord = left as { readonly [key: string]: CoreFixtureValue }; + const rightRecord = right as { readonly [key: string]: CoreFixtureValue }; + const leftKeys = Object.keys(leftRecord).sort(); + const rightKeys = Object.keys(rightRecord).sort(); + return ( + sameStringList(leftKeys, rightKeys) && + leftKeys.every((key) => sameFixtureValue(leftRecord[key], rightRecord[key])) + ); + } + return left === right; +} + +function sameFixtureValueList(left: readonly CoreFixtureValue[], right: readonly CoreFixtureValue[]): boolean { + return left.length === right.length && left.every((item, index) => sameFixtureValue(item, right[index])); +} + +function sameStringList(left: readonly string[], right: readonly string[]): boolean { + return left.length === right.length && left.every((item, index) => item === right[index]); +} + +function hasEdge( + edges: ReturnType, + expected: { + readonly from: string; + readonly relation: string; + readonly to: string; + readonly operation?: string; + readonly index?: number; + }, +): boolean { + return edges.some( + (edge) => + edge.from === expected.from && + edge.relation === expected.relation && + edge.to === expected.to && + edge.operation === expected.operation && + edge.index === expected.index, + ); +} diff --git a/packages/core/tests/core-runtime.test.ts b/packages/core/tests/core-runtime.test.ts index 6ded098a..92bf9739 100644 --- a/packages/core/tests/core-runtime.test.ts +++ b/packages/core/tests/core-runtime.test.ts @@ -1,18 +1,25 @@ import { + CORE_FIXTURE_FUNCTION, + CORE_FIXTURE_UNDEFINED, + CoreRuntimeContractAdapterError, CoreRuntimeEnv, callCoreFunction, + coreFixtureValueToKernValue, createCoreRuntimeEnv, evalCoreExpression, fromHostValue, kBoolean, kernTruthy, + kernValueToCoreFixtureValue, kNull, kNumber, kString, kUndefined, + roundTripKernContractDataValue, runCoreRuntime, toHostValue, } from '../src/index.js'; +import { parse } from '../src/parser.js'; import type { IRNode } from '../src/types.js'; function handler(children: IRNode[]): IRNode { @@ -107,6 +114,92 @@ describe('KERN core runtime values and expressions', () => { expect(toHostValue(evalCoreExpression('label["1.0"]', env))).toBeUndefined(); }); + test('string length and index use KERN code-point semantics in the VM', () => { + const env = createCoreRuntimeEnv({ globals: { label: 'a𐐷b', combo: 'e\u0301x' } }); + expect(toHostValue(evalCoreExpression('label.length', env))).toBe(3); + expect(toHostValue(evalCoreExpression('label[1]', env))).toBe('𐐷'); + expect(toHostValue(evalCoreExpression('combo.length', env))).toBe(3); + expect(toHostValue(evalCoreExpression('combo[1]', env))).toBe('\u0301'); + }); + + test('string methods dispatch through KERN core contracts in the VM', () => { + const env = createCoreRuntimeEnv({ globals: { label: 'a𐐷b', word: ' KERN ' } }); + expect(toHostValue(evalCoreExpression('label.slice(1, 2)', env))).toBe('𐐷'); + expect(toHostValue(evalCoreExpression('label.index(1)', env))).toBe('𐐷'); + expect(toHostValue(evalCoreExpression('label.index(3)', env))).toBeUndefined(); + expect(() => evalCoreExpression('label.slice(1)', env)).toThrow('String.slice expects String, Number, Number.'); + expect(toHostValue(evalCoreExpression('label.includes("𐐷")', env))).toBe(true); + expect(toHostValue(evalCoreExpression('label.startsWith("a")', env))).toBe(true); + expect(toHostValue(evalCoreExpression('label.endsWith("b")', env))).toBe(true); + expect(toHostValue(evalCoreExpression('word.trim().lower()', env))).toBe('kern'); + expect(toHostValue(evalCoreExpression('word.trim().upper()', env))).toBe('KERN'); + expect(toHostValue(evalCoreExpression('label.concat("!")', env))).toBe('a𐐷b!'); + expect(toHostValue(evalCoreExpression('label.equals("a𐐷b")', env))).toBe(true); + }); + + test('string and boolean contract methods reject cross-type operands in the VM', () => { + const env = createCoreRuntimeEnv({ globals: { label: 'count:', flag: true } }); + expect(() => evalCoreExpression('label.concat(2)', env)).toThrow('String.concat expects String, String.'); + expect(() => evalCoreExpression('label.concat(String)', env)).toThrow('String.concat expects String, String.'); + expect(() => evalCoreExpression('label.equals(true)', env)).toThrow('String.equals expects String, String.'); + expect(() => evalCoreExpression('flag.and("true")', env)).toThrow('Boolean.and expects Boolean, Boolean.'); + expect(() => evalCoreExpression('flag.equals(1)', env)).toThrow('Boolean.equals expects Boolean, Boolean.'); + expect(toHostValue(evalCoreExpression('flag.not()', env))).toBe(false); + expect(toHostValue(evalCoreExpression('flag.and(false)', env))).toBe(false); + expect(toHostValue(evalCoreExpression('flag.or(false)', env))).toBe(true); + expect(toHostValue(evalCoreExpression('flag.toString()', env))).toBe('true'); + }); + + test('number operators dispatch through KERN core contracts in the VM', () => { + const env = createCoreRuntimeEnv(); + expect(toHostValue(evalCoreExpression('2 + 3', env))).toBe(5); + expect(toHostValue(evalCoreExpression('5 - 3', env))).toBe(2); + expect(toHostValue(evalCoreExpression('3 * 4', env))).toBe(12); + expect(toHostValue(evalCoreExpression('5 / 2', env))).toBe(2.5); + expect(toHostValue(evalCoreExpression('-3', env))).toBe(-3); + expect(toHostValue(evalCoreExpression('-5 % 2', env))).toBe(-1); + expect(toHostValue(evalCoreExpression('5 % -2', env))).toBe(1); + expect(toHostValue(evalCoreExpression('2 < 3', env))).toBe(true); + expect(toHostValue(evalCoreExpression('3 <= 2', env))).toBe(false); + expect(toHostValue(evalCoreExpression('3 > 2', env))).toBe(true); + expect(toHostValue(evalCoreExpression('2 >= 3', env))).toBe(false); + expect(() => evalCoreExpression('1 / 0', env)).toThrow('Number.divide division by zero.'); + expect(() => evalCoreExpression('1 % 0', env)).toThrow('Number.remainder division by zero.'); + }); + + test('string ordered comparisons dispatch through KERN core contracts in the VM', () => { + const env = createCoreRuntimeEnv(); + expect(toHostValue(evalCoreExpression('"abc" < "abd"', env))).toBe(true); + expect(toHostValue(evalCoreExpression('"abc" <= "abc"', env))).toBe(true); + expect(toHostValue(evalCoreExpression('"abd" > "abc"', env))).toBe(true); + expect(toHostValue(evalCoreExpression('"abc" >= "abd"', env))).toBe(false); + expect(toHostValue(evalCoreExpression('"𐐷" > "z"', env))).toBe(true); + }); + + test('unary boolean not dispatches through KERN core contracts in the VM', () => { + const env = createCoreRuntimeEnv(); + expect(toHostValue(evalCoreExpression('!true', env))).toBe(false); + expect(toHostValue(evalCoreExpression('!false', env))).toBe(true); + expect(() => evalCoreExpression('!5', env)).toThrow('KERN core runtime unary ! requires a boolean.'); + }); + + test('list and record reads dispatch through KERN core contracts in the VM', () => { + const env = createCoreRuntimeEnv({ + globals: { xs: [10, undefined, 30], user: { name: 'Ada' }, sentinel: { __kernFixture: 'Undefined' } }, + }); + expect(toHostValue(evalCoreExpression('xs.length', env))).toBe(3); + expect(toHostValue(evalCoreExpression('xs[0]', env))).toBe(10); + expect(toHostValue(evalCoreExpression('xs[1]', env))).toBeUndefined(); + expect(toHostValue(evalCoreExpression('xs[-1]', env))).toBeUndefined(); + expect(toHostValue(evalCoreExpression('xs[1.5]', env))).toBeUndefined(); + expect(toHostValue(evalCoreExpression('user.name', env))).toBe('Ada'); + expect(toHostValue(evalCoreExpression('user["missing"]', env))).toBeUndefined(); + expect(toHostValue(evalCoreExpression('user.toString', env))).toBeUndefined(); + expect(toHostValue(evalCoreExpression('sentinel.__kernFixture', env))).toBe('Undefined'); + expect(toHostValue(evalCoreExpression('[String].length', env))).toBe(1); + expect(toHostValue(evalCoreExpression('[String][0]', env))).toBe('[KERN builtin String]'); + }); + test('optional index skips unresolved index expressions for nullish objects', () => { const env = createCoreRuntimeEnv({ globals: { maybe: null } }); expect(toHostValue(evalCoreExpression('maybe?.[missingName]', env))).toBeUndefined(); @@ -124,6 +217,61 @@ describe('KERN core runtime values and expressions', () => { }); }); +describe('KERN core runtime contract adapter', () => { + test('round-trips supported KERN values through core contract fixture values', () => { + const value = fromHostValue({ + text: 'a𐐷b', + flag: true, + count: 3, + none: null, + missing: undefined, + list: [false, 'x'], + sentinelLikeRecord: { kind: 'Undefined' }, + }); + + const roundTripped = toHostValue(roundTripKernContractDataValue(value)) as Record; + const { missing: roundTrippedMissing, ...roundTrippedWithoutMissing } = roundTripped; + expect(roundTrippedWithoutMissing).toEqual({ + text: 'a𐐷b', + flag: true, + count: 3, + none: null, + list: [false, 'x'], + sentinelLikeRecord: { kind: 'Undefined' }, + }); + expect(Object.hasOwn(roundTripped, 'missing')).toBe(true); + expect(roundTrippedMissing).toBeUndefined(); + }); + + test('keeps Undefined fixture encoding stable across JSON round trips', () => { + const encoded = kernValueToCoreFixtureValue(kUndefined()); + expect(encoded).toEqual(CORE_FIXTURE_UNDEFINED); + expect(toHostValue(coreFixtureValueToKernValue(JSON.parse(JSON.stringify(encoded))))).toBeUndefined(); + }); + + test('rejects runtime records that use the reserved Undefined fixture sentinel shape', () => { + expect(() => kernValueToCoreFixtureValue(fromHostValue({ __kernFixture: 'Undefined' }))).toThrow( + 'reserved core fixture sentinel shape', + ); + }); + + test('rejects runtime instances that use reserved fixture sentinel field shape', () => { + const root = parse(['class name=Trap', ' field name=__kernFixture type=string value="Function"'].join('\n')); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => kernValueToCoreFixtureValue(evalCoreExpression('new Trap()', env))).toThrow( + 'reserved core fixture sentinel shape', + ); + }); + + test('represents runtime-only callable values as opaque Function fixture references', () => { + const env = createCoreRuntimeEnv(); + expect(kernValueToCoreFixtureValue(env.lookup('String'))).toEqual(CORE_FIXTURE_FUNCTION); + expect(() => coreFixtureValueToKernValue(CORE_FIXTURE_FUNCTION)).toThrow(CoreRuntimeContractAdapterError); + }); +}); + describe('KERN core runtime statements', () => { test('runs let, expression-v1, and return', () => { const result = runCoreRuntime( @@ -177,6 +325,164 @@ describe('KERN core runtime statements', () => { ); expect(toHostValue(result.completion.value)).toEqual({ a: 'ok', b: 'ok' }); }); + + test('executes user-defined classes with fields constructors methods and getters', () => { + const root = parse( + [ + 'class name=Counter', + ' field name=count type=number value={{ 0 }}', + ' constructor', + ' param name=initial type=number value={{ 0 }}', + ' handler', + ' assign target="this.count" value="initial"', + ' method name=inc returns=number', + ' param name=step type=number value={{ 1 }}', + ' handler', + ' assign target="this.count" value="this.count + step"', + ' return value="this.count"', + ' getter name=label returns=string', + ' handler', + ' return value="`count=${this.count}`"', + 'fn name=make returns=number', + ' handler', + ' let name=c value="new Counter(4)"', + ' do value="c.inc(2)"', + ' return value="c.count"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new Counter(3).count', env))).toBe(3); + expect(toHostValue(evalCoreExpression('new Counter(3).inc()', env))).toBe(4); + expect(toHostValue(evalCoreExpression('new Counter(3).label', env))).toBe('count=3'); + expect(toHostValue(evalCoreExpression('make()', env))).toBe(6); + }); + + test('executes inherited fields getters methods and overrides', () => { + const root = parse( + [ + 'class name=Entity', + ' field name=id type=string value="base"', + ' method name=kind returns=string', + ' handler', + ' return value="\'entity\'"', + ' getter name=summary returns=string', + ' handler', + ' return value="`${this.kind()}:${this.id}`"', + 'class name=User extends=Entity', + ' field name=name type=string value="Ada"', + ' method name=kind returns=string', + ' handler', + ' return value="`user/${super.kind()}`"', + ' method name=label returns=string', + ' handler', + ' return value="`${this.summary}:${this.name}`"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new User().id', env))).toBe('base'); + expect(toHostValue(evalCoreExpression('new User().kind()', env))).toBe('user/entity'); + expect(toHostValue(evalCoreExpression('new User().summary', env))).toBe('user/entity:base'); + expect(toHostValue(evalCoreExpression('new User().label()', env))).toBe('user/entity:base:Ada'); + }); + + test('executes derived constructors with super constructor arguments', () => { + const root = parse( + [ + 'class name=Entity', + ' field name=id type=string value="unset"', + ' constructor', + ' param name=id type=string', + ' handler', + ' assign target="this.id" value="id"', + 'class name=User extends=Entity', + ' field name=name type=string value="unset"', + ' constructor', + ' param name=id type=string', + ' param name=name type=string', + ' handler', + ' do value="super(id)"', + ' assign target="this.name" value="name"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new User("u1", "Ada").id', env))).toBe('u1'); + expect(toHostValue(evalCoreExpression('new User("u1", "Ada").name', env))).toBe('Ada'); + }); + + test('initializes derived fields after super constructor state', () => { + const root = parse( + [ + 'class name=Entity', + ' field name=id type=string value="unset"', + ' constructor', + ' param name=id type=string', + ' handler', + ' assign target="this.id" value="id"', + 'class name=User extends=Entity', + ' field name=copy type=string value={{ this.id }}', + ' constructor', + ' param name=id type=string', + ' handler', + ' do value="super(id)"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new User("u1").copy', env))).toBe('u1'); + }); + + test('rejects missing and extra runtime arguments strictly', () => { + const root = parse( + [ + 'class name=Box', + ' constructor', + ' param name=value type=number', + ' handler', + ' assign target="this.value" value="value"', + 'fn name=need returns=number', + ' param name=value type=number', + ' handler', + ' return value="value"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('need()', env)).toThrow('missing required argument: value'); + expect(() => evalCoreExpression('need(1, 2)', env)).toThrow('received too many arguments'); + expect(() => evalCoreExpression('new Box()', env)).toThrow('missing required argument: value'); + expect(() => evalCoreExpression('new Box(1, 2)', env)).toThrow('received too many arguments'); + }); + + test('detects nested constructor super calls structurally', () => { + const root = parse( + [ + 'class name=Entity', + ' field name=id type=string value="unset"', + ' constructor', + ' param name=id type=string', + ' handler', + ' assign target="this.id" value="id"', + 'class name=User extends=Entity', + ' constructor', + ' param name=id type=string', + ' handler', + ' if cond=true', + ' do value="super(id)"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new User("u1").id', env))).toBe('u1'); + }); }); describe('KERN core runtime functions', () => { From 2011262bb827670b956f30697d0e8f375a86f46f Mon Sep 17 00:00:00 2001 From: cukas Date: Sun, 7 Jun 2026 22:14:12 +0200 Subject: [PATCH 07/63] feat(core): add semantic substrate for review --- packages/core/src/core-runtime/index.ts | 1 + packages/core/src/index.ts | 17 ++ packages/core/src/semantic-substrate.ts | 231 ++++++++++++++++++ .../core/tests/semantic-substrate.test.ts | 111 +++++++++ .../src/rules/suggest-kern-primitive.ts | 26 +- .../rules-suggest-kern-primitive.test.ts | 1 + 6 files changed, 368 insertions(+), 19 deletions(-) create mode 100644 packages/core/src/semantic-substrate.ts create mode 100644 packages/core/tests/semantic-substrate.test.ts diff --git a/packages/core/src/core-runtime/index.ts b/packages/core/src/core-runtime/index.ts index afc7f74d..b50efa91 100644 --- a/packages/core/src/core-runtime/index.ts +++ b/packages/core/src/core-runtime/index.ts @@ -1073,6 +1073,7 @@ function valueIRCallsSuper(value: ValueIR): boolean { case 'ident': return false; } + return false; } function runtimeChildren(node: IRNode): IRNode[] { diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 6ff3e81d..5602ae6c 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -436,6 +436,23 @@ export { formatScanSummary, generateConfigSource, scanProject } from './scanner. export type { KernSchemaJSON, NodeSchema, PropKind, PropSchema, SchemaViolation } from './schema.js'; // Schema validation + export export { exportSchemaJSON, NODE_SCHEMAS, validateSchema } from './schema.js'; +export type { + BuildKernSemanticSubstrateOptions, + KernSemanticCoreOperation, + KernSemanticCoreType, + KernSemanticIrContract, + KernSemanticPrimitive, + KernSemanticStdlibOperation, + KernSemanticSubstrate, + KernSemanticSubstrateSource, + KernSemanticSubstrateTarget, + KernSemanticSupport, +} from './semantic-substrate.js'; +export { + buildKernSemanticSubstrate, + lookupSemanticPrimitive, + semanticPrimitiveSupportSummary, +} from './semantic-substrate.js'; // Semantic validation export type { SemanticViolation } from './semantic-validator.js'; export { validateSemantics } from './semantic-validator.js'; diff --git a/packages/core/src/semantic-substrate.ts b/packages/core/src/semantic-substrate.ts new file mode 100644 index 00000000..a310e9da --- /dev/null +++ b/packages/core/src/semantic-substrate.ts @@ -0,0 +1,231 @@ +import { KERN_STDLIB } from './codegen/kern-stdlib.js'; +import { + PORTABLE_LOGIC_PRIMITIVE_IDS, + PORTABLE_LOGIC_PRIMITIVES, + type PortableLogicPrimitiveId, + type PortableLogicSupport, + type PortableLogicTarget, +} from './codegen/portable-logic-primitives.js'; +import { CORE_TYPE_CONTRACTS, type CoreOperationReturns, contractToGraphEdges } from './core-contracts/index.js'; +import type { NodeContract } from './ir/semantics/index.js'; +import { snapshotRegistry } from './ir/semantics/index.js'; + +export type KernSemanticSubstrateSource = 'codegen-from-ts' | 'native-kern'; +export type KernSemanticSubstrateTarget = PortableLogicTarget; + +export interface KernSemanticSupport { + readonly ts: PortableLogicSupport; + readonly python: PortableLogicSupport; + readonly go: PortableLogicSupport; +} + +export interface KernSemanticCoreOperation { + readonly id: string; + readonly kind: string; + readonly args: readonly string[]; + readonly returns: readonly string[]; + readonly lowerings: Readonly>; + readonly fixtureCount: number; + readonly reviewSummary: string; + readonly reviewTags: readonly string[]; +} + +export interface KernSemanticCoreType { + readonly id: string; + readonly name: string; + readonly kind: string; + readonly strict: true; + readonly operations: readonly KernSemanticCoreOperation[]; +} + +export interface KernSemanticPrimitive { + readonly id: PortableLogicPrimitiveId; + readonly kernName: string; + readonly domain: string; + readonly description: string; + readonly intent: string; + readonly purity: string; + readonly hostPatterns: readonly string[]; + readonly portabilityNotes: readonly string[]; + readonly support: KernSemanticSupport; +} + +export interface KernSemanticStdlibOperation { + readonly id: string; + readonly module: string; + readonly method: string; + readonly arity: number; + readonly support: KernSemanticSupport; +} + +export interface KernSemanticIrContract { + readonly nodeType: string; + readonly forbiddenRewrites: readonly string[]; + readonly fixtureCount: number; +} + +export interface KernSemanticSubstrate { + readonly schemaVersion: 1; + readonly generatedBy: 'kern-semantic-substrate'; + readonly source: KernSemanticSubstrateSource; + readonly coreTypes: readonly KernSemanticCoreType[]; + readonly coreGraphEdges: readonly { + readonly from: string; + readonly relation: string; + readonly to: string; + readonly operation?: string; + readonly index?: number; + }[]; + readonly portablePrimitives: readonly KernSemanticPrimitive[]; + readonly stdlibOperations: readonly KernSemanticStdlibOperation[]; + readonly irContracts: readonly KernSemanticIrContract[]; +} + +export interface BuildKernSemanticSubstrateOptions { + readonly source?: KernSemanticSubstrateSource; + readonly irContracts?: ReadonlyMap; +} + +export function buildKernSemanticSubstrate(options: BuildKernSemanticSubstrateOptions = {}): KernSemanticSubstrate { + const coreTypes = Object.values(CORE_TYPE_CONTRACTS.types).map((contract) => ({ + id: `core.type.${contract.name}`, + name: contract.name, + kind: contract.kind, + strict: contract.strict, + operations: contract.operations.map((operation) => ({ + id: operation.id, + kind: operation.kind, + args: [...operation.args], + returns: normalizeReturns(operation.returns), + lowerings: operation.lowers ? { ...operation.lowers } : {}, + fixtureCount: operation.fixtures.length, + reviewSummary: operation.review.summary, + reviewTags: [...operation.review.graph], + })), + })); + + return { + schemaVersion: 1, + generatedBy: 'kern-semantic-substrate', + source: options.source ?? 'codegen-from-ts', + coreTypes, + coreGraphEdges: Object.values(CORE_TYPE_CONTRACTS.types).flatMap((contract) => contractToGraphEdges(contract)), + portablePrimitives: PORTABLE_LOGIC_PRIMITIVE_IDS.map((id) => { + const primitive = PORTABLE_LOGIC_PRIMITIVES[id]; + return { + id, + kernName: kernPrimitiveName(id), + domain: id.split('.')[0], + description: primitive.description, + intent: primitive.intent, + purity: primitive.purity, + hostPatterns: [...primitive.hostPatterns], + portabilityNotes: [...primitive.portabilityNotes], + support: { ...primitive.targets }, + }; + }), + stdlibOperations: Object.entries(KERN_STDLIB).flatMap(([module, entries]) => + Object.entries(entries).map(([method, entry]) => ({ + id: `stdlib.${module}.${method}`, + module, + method, + arity: entry.arity, + support: { + ts: entry.ts ? 'stable' : 'unsupported', + python: entry.py ? 'stable' : 'unsupported', + go: 'unsupported', + }, + })), + ), + irContracts: options.irContracts + ? snapshotRegistry(options.irContracts).contracts.map((contract) => ({ + nodeType: contract.nodeType, + forbiddenRewrites: [...contract.forbiddenRewrites], + fixtureCount: contract.fixtureCount, + })) + : [], + }; +} + +export function lookupSemanticPrimitive( + substrate: KernSemanticSubstrate, + id: PortableLogicPrimitiveId, +): KernSemanticPrimitive { + const primitive = substrate.portablePrimitives.find((candidate) => candidate.id === id); + if (!primitive) { + throw new Error(`KERN semantic substrate missing portable primitive '${id}'.`); + } + return primitive; +} + +export function semanticPrimitiveSupportSummary( + primitive: KernSemanticPrimitive, + targets: readonly KernSemanticSubstrateTarget[], +): string { + const bySupport: Record = { + preview: [], + stable: [], + unsupported: [], + }; + for (const target of targets) { + bySupport[primitive.support[target] ?? 'unsupported'].push(target); + } + + const parts: string[] = []; + for (const support of ['stable', 'preview', 'unsupported'] satisfies PortableLogicSupport[]) { + const targetNames = bySupport[support]; + if (targetNames.length > 0) parts.push(`${support}: ${targetNames.join(', ')}`); + } + return parts.join('; '); +} + +function normalizeReturns(returns: CoreOperationReturns): readonly string[] { + return typeof returns === 'string' ? [returns] : [...returns]; +} + +const KERN_PRIMITIVE_NAMES = { + 'collection.has': 'includes', + 'collection.count': 'count', + 'collection.filter': 'filter', + 'collection.compact': 'compact', + 'collection.pluck': 'pluck', + 'collection.take': 'take', + 'collection.drop': 'drop', + 'collection.slice': 'slice', + 'collection.reverse': 'reverse', + 'collection.at': 'at', + 'collection.join': 'join', + 'collection.concat': 'concat', + 'collection.includes': 'includes', + 'collection.indexOf': 'indexOf', + 'collection.lastIndexOf': 'lastIndexOf', + 'collection.sort': 'sort', + 'collection.uniqueBy': 'uniqueBy', + 'collection.groupBy': 'groupBy', + 'collection.partition': 'partition', + 'collection.indexBy': 'indexBy', + 'collection.countBy': 'countBy', + 'logic.firstTruthy': 'firstTruthy', + 'logic.coalesce': 'coalesce', + 'time.epochMs': 'epochMs', + 'logic.not': 'not', + 'number.clamp': 'clamp', + 'object.keys': 'objectKeys', + 'object.values': 'objectValues', + 'object.entries': 'objectEntries', + 'object.merge': 'objectMerge', + 'object.omit': 'objectOmit', + 'object.pick': 'objectPick', + 'string.trim': 'trim', + 'string.split': 'split', + 'string.replaceFirst': 'replaceFirst', + 'string.replaceAll': 'replaceAll', + 'logic.firstDefined': 'firstDefined', + 'string.coerce': 'string', +} as const satisfies Record; + +function kernPrimitiveName(id: PortableLogicPrimitiveId): string { + const name = KERN_PRIMITIVE_NAMES[id]; + if (!name) throw new Error(`KERN semantic substrate missing KERN primitive name for '${id}'.`); + return name; +} diff --git a/packages/core/tests/semantic-substrate.test.ts b/packages/core/tests/semantic-substrate.test.ts new file mode 100644 index 00000000..a7e44065 --- /dev/null +++ b/packages/core/tests/semantic-substrate.test.ts @@ -0,0 +1,111 @@ +import { + buildKernSemanticSubstrate, + lookupSemanticPrimitive, + makeEnv, + type NodeContract, + semanticPrimitiveSupportSummary, +} from '../src/index.js'; + +describe('KERN semantic substrate', () => { + test('exports core runtime contracts as reviewable semantic operations', () => { + const substrate = buildKernSemanticSubstrate(); + + expect(substrate.schemaVersion).toBe(1); + expect(substrate.generatedBy).toBe('kern-semantic-substrate'); + expect(substrate.source).toBe('codegen-from-ts'); + + const numberType = substrate.coreTypes.find((type) => type.name === 'Number'); + expect(numberType?.strict).toBe(true); + expect(numberType?.operations.map((operation) => operation.id)).toContain('Number.divide'); + + const divide = numberType?.operations.find((operation) => operation.id === 'Number.divide'); + expect(divide?.args).toEqual(['Number', 'Number']); + expect(divide?.returns).toEqual(['Number']); + expect(divide?.fixtureCount).toBeGreaterThan(0); + expect(divide?.reviewTags).toContain('strict'); + + expect( + substrate.coreGraphEdges.find( + (edge) => + edge.from === 'Number.divide' && + edge.relation === 'returns' && + edge.to === 'Number' && + edge.operation === 'Number.divide', + ), + ).toEqual( + expect.objectContaining({ + from: 'Number.divide', + relation: 'returns', + to: 'Number', + operation: 'Number.divide', + }), + ); + }); + + test('exports portable review primitives as stable query objects', () => { + const substrate = buildKernSemanticSubstrate(); + const clamp = lookupSemanticPrimitive(substrate, 'number.clamp'); + + expect(clamp.kernName).toBe('clamp'); + expect(clamp.domain).toBe('number'); + expect(clamp.support.ts).toBe('stable'); + expect(clamp.support.python).toBe('stable'); + expect(semanticPrimitiveSupportSummary(clamp, ['ts', 'python', 'go'])).toBe('stable: ts, python; unsupported: go'); + }); + + test('throws when a review consumer asks for an unknown semantic primitive', () => { + const substrate = buildKernSemanticSubstrate(); + + expect(() => lookupSemanticPrimitive(substrate, 'number.missing' as never)).toThrow( + "KERN semantic substrate missing portable primitive 'number.missing'.", + ); + }); + + test('exports stdlib operation summaries for downstream review/doc consumers', () => { + const substrate = buildKernSemanticSubstrate(); + + expect(substrate.stdlibOperations.find((operation) => operation.id === 'stdlib.Text.trim')).toEqual( + expect.objectContaining({ + module: 'Text', + method: 'trim', + arity: 1, + }), + ); + expect(substrate.stdlibOperations.find((operation) => operation.id === 'stdlib.Json.stringify')).toEqual( + expect.objectContaining({ + module: 'Json', + method: 'stringify', + }), + ); + }); + + test('can include IR semantic contract summaries without touching the global registry', () => { + const fakeContract: NodeContract = { + nodeType: 'fixtureNode', + preconditions: () => true, + effects: () => ({ events: [], completion: { kind: 'normal' } }), + completion: () => ({ kind: 'normal' }), + forbiddenRewrites: ['erase fixture node'], + fixtures: [ + { + description: 'fixture node completes normally', + ir: { type: 'fixtureNode', props: {} }, + env: makeEnv(), + expected: { events: [], completion: { kind: 'normal' } }, + }, + ], + }; + + const substrate = buildKernSemanticSubstrate({ + irContracts: new Map([[fakeContract.nodeType, fakeContract]]), + }); + + expect(substrate.irContracts).toEqual([ + { + nodeType: 'fixtureNode', + forbiddenRewrites: ['erase fixture node'], + fixtureCount: 1, + }, + ]); + }); +}); diff --git a/packages/review/src/rules/suggest-kern-primitive.ts b/packages/review/src/rules/suggest-kern-primitive.ts index fb67e391..48c37d6b 100644 --- a/packages/review/src/rules/suggest-kern-primitive.ts +++ b/packages/review/src/rules/suggest-kern-primitive.ts @@ -28,10 +28,11 @@ */ import { + buildKernSemanticSubstrate, + lookupSemanticPrimitive, type PortableLogicPrimitiveId, - type PortableLogicSupport, type PortableLogicTarget, - portableLogicSupportForTarget, + semanticPrimitiveSupportSummary, } from '@kernlang/core'; import type { ArrowFunction, @@ -88,6 +89,7 @@ const ARRAY_METHODS: Record = { }; const PORTABLE_LOGIC_TARGETS: readonly PortableLogicTarget[] = ['ts', 'python', 'go']; +const KERN_SEMANTIC_SUBSTRATE = buildKernSemanticSubstrate(); // Node kinds whose descendants should be skipped — don't flag opportunities // inside test files, type-only files, or generated code paths by path hint. @@ -235,22 +237,7 @@ function nodeColumn(node: TsNode): number { } function portableLogicSupportSummary(id: PortableLogicPrimitiveId): string { - const bySupport: Record = { - preview: [], - stable: [], - unsupported: [], - }; - for (const target of PORTABLE_LOGIC_TARGETS) { - const support = portableLogicSupportForTarget(id, target); - bySupport[support].push(target); - } - - const parts: string[] = []; - for (const support of ['stable', 'preview', 'unsupported'] satisfies PortableLogicSupport[]) { - const targets = bySupport[support]; - if (targets.length > 0) parts.push(`${support}: ${targets.join(', ')}`); - } - return parts.join('; '); + return semanticPrimitiveSupportSummary(lookupSemanticPrimitive(KERN_SEMANTIC_SUBSTRATE, id), PORTABLE_LOGIC_TARGETS); } function portableLogicFinding( @@ -259,11 +246,12 @@ function portableLogicFinding( id: PortableLogicPrimitiveId, label: string, ): ReviewFinding { + const primitive = lookupSemanticPrimitive(KERN_SEMANTIC_SUBSTRATE, id); return finding( 'suggest-kern-primitive', 'info', 'pattern', - `JS ${label} is covered by KERN portable logic primitive \`${id}\` (${portableLogicSupportSummary(id)})`, + `JS ${label} is covered by KERN portable logic primitive \`${id}\` / \`${primitive.kernName}\` (${portableLogicSupportSummary(id)})`, ctx.filePath, node.getStartLineNumber(), nodeColumn(node), diff --git a/packages/review/tests/rules-suggest-kern-primitive.test.ts b/packages/review/tests/rules-suggest-kern-primitive.test.ts index a34dc5af..d2446e29 100644 --- a/packages/review/tests/rules-suggest-kern-primitive.test.ts +++ b/packages/review/tests/rules-suggest-kern-primitive.test.ts @@ -213,6 +213,7 @@ describe('suggest-kern-primitive rule', () => { 'clamp name=inverted value={{ score }} min={{ config.min }} max={{ config.max }}', ); expect(portable[0].message).toContain('number.clamp'); + expect(portable[0].message).toContain('`clamp`'); expect(portable[0].message).toContain('stable: ts, python'); }); From 7f75f40bf36b6ee880f64c0bb8063c542df38d26 Mon Sep 17 00:00:00 2001 From: cukas Date: Sun, 7 Jun 2026 22:29:26 +0200 Subject: [PATCH 08/63] fix(core): harden semantic substrate typing --- packages/core/src/semantic-substrate.ts | 54 +++++++++++++++++-------- 1 file changed, 38 insertions(+), 16 deletions(-) diff --git a/packages/core/src/semantic-substrate.ts b/packages/core/src/semantic-substrate.ts index a310e9da..64381971 100644 --- a/packages/core/src/semantic-substrate.ts +++ b/packages/core/src/semantic-substrate.ts @@ -124,19 +124,7 @@ export function buildKernSemanticSubstrate(options: BuildKernSemanticSubstrateOp support: { ...primitive.targets }, }; }), - stdlibOperations: Object.entries(KERN_STDLIB).flatMap(([module, entries]) => - Object.entries(entries).map(([method, entry]) => ({ - id: `stdlib.${module}.${method}`, - module, - method, - arity: entry.arity, - support: { - ts: entry.ts ? 'stable' : 'unsupported', - python: entry.py ? 'stable' : 'unsupported', - go: 'unsupported', - }, - })), - ), + stdlibOperations: stdlibOperationSummaries(), irContracts: options.irContracts ? snapshotRegistry(options.irContracts).contracts.map((contract) => ({ nodeType: contract.nodeType, @@ -168,7 +156,7 @@ export function semanticPrimitiveSupportSummary( unsupported: [], }; for (const target of targets) { - bySupport[primitive.support[target] ?? 'unsupported'].push(target); + bySupport[semanticSupportForTarget(primitive.support, target)].push(target); } const parts: string[] = []; @@ -179,11 +167,45 @@ export function semanticPrimitiveSupportSummary( return parts.join('; '); } +function semanticSupportForTarget( + support: KernSemanticSupport, + target: KernSemanticSubstrateTarget, +): PortableLogicSupport { + switch (target) { + case 'ts': + return support.ts; + case 'python': + return support.python; + case 'go': + return support.go; + } +} + +function stdlibOperationSummaries(): KernSemanticStdlibOperation[] { + return typedEntries(KERN_STDLIB).flatMap(([module, entries]) => + typedEntries(entries).map(([method, entry]) => ({ + id: `stdlib.${module}.${method}`, + module, + method, + arity: entry.arity, + support: { + ts: entry.ts ? 'stable' : 'unsupported', + python: entry.py ? 'stable' : 'unsupported', + go: 'unsupported', + }, + })), + ); +} + +function typedEntries(record: Record): Array<[string, T]> { + return Object.entries(record) as Array<[string, T]>; +} + function normalizeReturns(returns: CoreOperationReturns): readonly string[] { return typeof returns === 'string' ? [returns] : [...returns]; } -const KERN_PRIMITIVE_NAMES = { +const KERN_PRIMITIVE_NAMES: Record = { 'collection.has': 'includes', 'collection.count': 'count', 'collection.filter': 'filter', @@ -222,7 +244,7 @@ const KERN_PRIMITIVE_NAMES = { 'string.replaceAll': 'replaceAll', 'logic.firstDefined': 'firstDefined', 'string.coerce': 'string', -} as const satisfies Record; +}; function kernPrimitiveName(id: PortableLogicPrimitiveId): string { const name = KERN_PRIMITIVE_NAMES[id]; From b1d7cdcdb8edfb8207c3fb161f142ea8d1766730 Mon Sep 17 00:00:00 2001 From: cukas Date: Sun, 7 Jun 2026 22:35:39 +0200 Subject: [PATCH 09/63] fix(core): avoid unknown stdlib entries --- packages/core/src/semantic-substrate.ts | 41 +++++++++++++++---------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/packages/core/src/semantic-substrate.ts b/packages/core/src/semantic-substrate.ts index 64381971..98c12293 100644 --- a/packages/core/src/semantic-substrate.ts +++ b/packages/core/src/semantic-substrate.ts @@ -1,4 +1,4 @@ -import { KERN_STDLIB } from './codegen/kern-stdlib.js'; +import { KERN_STDLIB, type StdlibEntry } from './codegen/kern-stdlib.js'; import { PORTABLE_LOGIC_PRIMITIVE_IDS, PORTABLE_LOGIC_PRIMITIVES, @@ -182,23 +182,30 @@ function semanticSupportForTarget( } function stdlibOperationSummaries(): KernSemanticStdlibOperation[] { - return typedEntries(KERN_STDLIB).flatMap(([module, entries]) => - typedEntries(entries).map(([method, entry]) => ({ - id: `stdlib.${module}.${method}`, - module, - method, - arity: entry.arity, - support: { - ts: entry.ts ? 'stable' : 'unsupported', - python: entry.py ? 'stable' : 'unsupported', - go: 'unsupported', - }, - })), - ); -} + const stdlib: Record> = KERN_STDLIB; + const operations: KernSemanticStdlibOperation[] = []; + + for (const module of Object.keys(stdlib)) { + const entries = stdlib[module]; + if (!entries) continue; + for (const method of Object.keys(entries)) { + const entry = entries[method]; + if (!entry) continue; + operations.push({ + id: `stdlib.${module}.${method}`, + module, + method, + arity: entry.arity, + support: { + ts: entry.ts ? 'stable' : 'unsupported', + python: entry.py ? 'stable' : 'unsupported', + go: 'unsupported', + }, + }); + } + } -function typedEntries(record: Record): Array<[string, T]> { - return Object.entries(record) as Array<[string, T]>; + return operations; } function normalizeReturns(returns: CoreOperationReturns): readonly string[] { From 2307eacf2aa67fe68daac315df4485a075aec16a Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 07:19:39 +0200 Subject: [PATCH 10/63] feat(core): add class object semantic validation --- packages/core/src/semantic-validator.ts | 468 ++++++++++++++++++++ packages/core/tests/class-semantics.test.ts | 262 +++++++++++ 2 files changed, 730 insertions(+) create mode 100644 packages/core/tests/class-semantics.test.ts diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index 0651b21e..f1a59767 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -17,7 +17,10 @@ import { collectExternalImportSymbols, type ExternalImportSymbolTable } from './external-symbols.js'; import { importRegistryOf } from './import-metadata.js'; +import { parseExpression } from './parser-expression.js'; +import { splitPortableExpressionList } from './portable-expression-list.js'; import type { IRNode } from './types.js'; +import type { ValueIR } from './value-ir.js'; export interface SemanticViolation { rule: string; @@ -33,6 +36,7 @@ export interface SemanticViolation { */ export function validateSemantics(root: IRNode): SemanticViolation[] { const violations: SemanticViolation[] = []; + validateClassGraph(root, violations); validateNode(root, violations, [], []); return violations; } @@ -438,6 +442,470 @@ function validateNode( } } +type ClassMemberKind = 'field' | 'method' | 'getter' | 'setter'; + +interface ClassInfo { + node: IRNode; + name: string; + baseName?: string; + members: ClassMemberInfo[]; + constructors: IRNode[]; +} + +interface ClassMemberInfo { + node: IRNode; + name: string; + kind: ClassMemberKind; + static: boolean; + arity: number; +} + +const BUILTIN_CLASS_BASES = new Set(['Error']); +const BODY_EXPRESSION_PROPS = [ + 'value', + 'expr', + 'target', + 'cond', + 'on', + 'in', + 'from', + 'to', + 'initial', + 'source', + 'sources', + 'cleanup', + 'min', + 'max', +] as const; + +function validateClassGraph(root: IRNode, violations: SemanticViolation[]): void { + const classes = collectClassInfos(root); + if (classes.length === 0) return; + + const classByName = new Map(); + const visibleNames = collectVisibleClassBaseNames(root); + for (const info of classes) { + const prev = classByName.get(info.name); + if (!prev) { + classByName.set(info.name, info); + } + visibleNames.add(info.name); + } + + for (const info of classes) { + validateClassBaseReference(info, visibleNames, violations); + validateClassConstructors(info, violations); + validateClassMemberConflicts(info, violations); + validateClassSuperUsage(info, violations); + } + + validateClassInheritanceCycles(classes, classByName, violations); + validateClassOverrides(classes, classByName, violations); +} + +function collectClassInfos(root: IRNode): ClassInfo[] { + const out: ClassInfo[] = []; + walkSemanticTree(root, (node) => { + if (node.type !== 'class') return; + const name = stringProp(node, 'name'); + if (!name) return; + out.push({ + node, + name, + baseName: classBaseName(node.props?.extends), + members: collectClassMembers(node), + constructors: (node.children ?? []).filter((child) => child.type === 'constructor'), + }); + }); + return out; +} + +function collectClassMembers(node: IRNode): ClassMemberInfo[] { + const members: ClassMemberInfo[] = []; + for (const child of node.children ?? []) { + if (!isClassMemberNode(child)) continue; + const name = stringProp(child, 'name'); + if (!name) continue; + members.push({ + node: child, + name, + kind: child.type, + static: isTrueFlag(child.props?.static), + arity: memberArity(child), + }); + } + return members; +} + +function isClassMemberNode(node: IRNode): node is IRNode & { type: ClassMemberKind } { + return node.type === 'field' || node.type === 'method' || node.type === 'getter' || node.type === 'setter'; +} + +function validateClassBaseReference( + info: ClassInfo, + visibleNames: ReadonlySet, + violations: SemanticViolation[], +): void { + if (!info.baseName) return; + if (visibleNames.has(info.baseName) || BUILTIN_CLASS_BASES.has(info.baseName)) return; + violations.push({ + rule: 'class-extends-unknown', + nodeType: 'class', + message: `Class '${info.name}' extends unknown base '${info.baseName}'. Declare or import the base class before extending it.`, + line: info.node.loc?.line, + col: info.node.loc?.col, + }); +} + +function validateClassConstructors(info: ClassInfo, violations: SemanticViolation[]): void { + if (info.constructors.length <= 1) return; + for (const extra of info.constructors.slice(1)) { + violations.push({ + rule: 'class-single-constructor-only', + nodeType: 'constructor', + message: `Class '${info.name}' declares more than one constructor. KERN classes have exactly one construction path.`, + line: extra.loc?.line, + col: extra.loc?.col, + }); + } +} + +function validateClassMemberConflicts(info: ClassInfo, violations: SemanticViolation[]): void { + const seen = new Map(); + for (const member of info.members) { + const key = `${member.static ? 'static' : 'instance'}:${member.name}`; + const prev = seen.get(key) ?? []; + const next = [...prev, member]; + if (isAllowedMemberGroup(next)) { + seen.set(key, next); + continue; + } + const first = prev[0] ?? member; + violations.push({ + rule: 'class-member-conflict', + nodeType: member.node.type, + message: `Class '${info.name}' has conflicting ${member.static ? 'static' : 'instance'} member '${member.name}' (${first.kind} and ${member.kind}). Use one field/method/accessor surface per name.`, + line: member.node.loc?.line, + col: member.node.loc?.col, + }); + seen.set(key, next); + } +} + +function validateClassSuperUsage(info: ClassInfo, violations: SemanticViolation[]): void { + const hasBase = Boolean(info.baseName); + for (const ctor of info.constructors) { + const callsSuper = nodeBodyCallsSuperConstructor(ctor); + if (hasBase && !callsSuper) { + violations.push({ + rule: 'class-constructor-missing-super', + nodeType: 'constructor', + message: `Class '${info.name}' extends '${info.baseName}' but its constructor does not call \`super(...)\`. Derived constructors must initialize the base class explicitly.`, + line: ctor.loc?.line, + col: ctor.loc?.col, + }); + } + if (!hasBase && nodeBodyUsesSuper(ctor)) { + violations.push({ + rule: 'class-super-without-base', + nodeType: 'constructor', + message: `Class '${info.name}' uses \`super\` but does not extend a base class.`, + line: ctor.loc?.line, + col: ctor.loc?.col, + }); + } + } + + if (!hasBase) { + for (const member of info.members) { + if (!nodeBodyUsesSuper(member.node)) continue; + violations.push({ + rule: 'class-super-without-base', + nodeType: member.node.type, + message: `Class '${info.name}' member '${member.name}' uses \`super\` but the class does not extend a base class.`, + line: member.node.loc?.line, + col: member.node.loc?.col, + }); + } + } +} + +function validateClassInheritanceCycles( + classes: readonly ClassInfo[], + classByName: ReadonlyMap, + violations: SemanticViolation[], +): void { + const emitted = new Set(); + for (const info of classes) { + const path: string[] = []; + const seen = new Set(); + let current: ClassInfo | undefined = info; + while (current) { + if (seen.has(current.name)) { + const cycleStart = path.indexOf(current.name); + const cycleNames = path.slice(cycleStart); + const cycleKey = normalizedCycleKey(cycleNames); + const cycle = [...cycleNames, current.name].join(' -> '); + if (!emitted.has(cycleKey)) { + emitted.add(cycleKey); + violations.push({ + rule: 'class-inheritance-cycle', + nodeType: 'class', + message: `Class inheritance cycle detected: ${cycle}.`, + line: current.node.loc?.line, + col: current.node.loc?.col, + }); + } + break; + } + seen.add(current.name); + path.push(current.name); + current = current.baseName ? classByName.get(current.baseName) : undefined; + } + } +} + +function validateClassOverrides( + classes: readonly ClassInfo[], + classByName: ReadonlyMap, + violations: SemanticViolation[], +): void { + for (const info of classes) { + for (const member of info.members) { + const baseMember = findBaseMember(info, member, classByName); + if (!baseMember) continue; + if (!sameOverrideKind(member, baseMember)) { + violations.push({ + rule: 'class-override-kind-mismatch', + nodeType: member.node.type, + message: `Class '${info.name}' member '${member.name}' overrides base ${baseMember.kind} with ${member.kind}. Overrides must preserve field/method/accessor kind.`, + line: member.node.loc?.line, + col: member.node.loc?.col, + }); + continue; + } + if (member.kind === 'method' && baseMember.kind === 'method' && member.arity !== baseMember.arity) { + violations.push({ + rule: 'class-override-arity-mismatch', + nodeType: member.node.type, + message: `Class '${info.name}' method '${member.name}' overrides a base method with ${baseMember.arity} parameter(s), but declares ${member.arity}.`, + line: member.node.loc?.line, + col: member.node.loc?.col, + }); + } + } + } +} + +function normalizedCycleKey(cycleNames: readonly string[]): string { + if (cycleNames.length === 0) return ''; + let best = cycleNames.join('\0'); + for (let index = 1; index < cycleNames.length; index++) { + const rotated = [...cycleNames.slice(index), ...cycleNames.slice(0, index)].join('\0'); + if (rotated < best) best = rotated; + } + return best; +} + +function findBaseMember( + info: ClassInfo, + member: ClassMemberInfo, + classByName: ReadonlyMap, +): ClassMemberInfo | undefined { + let current = info.baseName ? classByName.get(info.baseName) : undefined; + const visited = new Set(); + while (current) { + if (visited.has(current.name)) return undefined; + visited.add(current.name); + const found = current.members.find( + (candidate) => candidate.name === member.name && candidate.static === member.static, + ); + if (found) return found; + current = current.baseName ? classByName.get(current.baseName) : undefined; + } + return undefined; +} + +function sameOverrideKind(member: ClassMemberInfo, baseMember: ClassMemberInfo): boolean { + if (isAccessorPair(member, baseMember)) return true; + return member.kind === baseMember.kind; +} + +function isAccessorPair(a: ClassMemberInfo, b: ClassMemberInfo): boolean { + return (a.kind === 'getter' && b.kind === 'setter') || (a.kind === 'setter' && b.kind === 'getter'); +} + +function isAllowedMemberGroup(members: readonly ClassMemberInfo[]): boolean { + if (members.length <= 1) return true; + if (members.length > 2) return false; + if (!members.every((member) => member.kind === 'getter' || member.kind === 'setter')) return false; + return isAccessorPair(members[0], members[1]); +} + +function collectVisibleClassBaseNames(root: IRNode): Set { + const names = new Set(BUILTIN_CLASS_BASES); + walkSemanticTree(root, (node) => { + const name = stringProp(node, 'name'); + if (name && isVisibleClassBaseDeclaration(node.type)) names.add(name); + if (node.type === 'import') { + for (const binding of importLocalBindings(node)) names.add(binding.name); + } + if (node.type === 'use') { + for (const child of node.children ?? []) { + if (child.type !== 'from') continue; + if (!isUseClassBaseBinding(child)) continue; + const localName = stringProp(child, 'as') ?? stringProp(child, 'name'); + if (localName) names.add(localName); + } + } + }); + return names; +} + +function isVisibleClassBaseDeclaration(nodeType: string): boolean { + return nodeType === 'class' || nodeType === 'error'; +} + +function isUseClassBaseBinding(node: IRNode): boolean { + const kind = stringProp(node, 'kind'); + return !kind || kind === 'class' || kind === 'error'; +} + +function memberArity(node: IRNode): number { + const childParams = node.children?.filter((child) => child.type === 'param').length ?? 0; + if (childParams > 0) return childParams; + const params = node.props?.params; + if (typeof params !== 'string' || !params.trim()) return 0; + try { + return splitPortableExpressionList(params, `${node.type} params=`).length; + } catch { + return 0; + } +} + +function nodeBodyCallsSuperConstructor(node: IRNode): boolean { + return nodeBodyExpressions(node).some((expr) => { + try { + return valueIRCallsSuperConstructor(parseExpression(expr)); + } catch { + return false; + } + }); +} + +function nodeBodyUsesSuper(node: IRNode): boolean { + return nodeBodyExpressions(node).some((expr) => { + try { + return valueIRUsesSuper(parseExpression(expr)); + } catch { + return false; + } + }); +} + +function nodeBodyExpressions(node: IRNode): string[] { + const out: string[] = []; + walkSemanticTreeUntil(node, (candidate) => { + for (const prop of BODY_EXPRESSION_PROPS) { + const text = expressionPropText(candidate.props?.[prop]); + if (text) out.push(text); + } + return candidate !== node && candidate.type === 'class' ? 'stop' : 'continue'; + }); + return out; +} + +function expressionPropText(value: unknown): string | undefined { + if (typeof value === 'string') return value; + if (isExpressionObject(value)) return value.code; + if (typeof value === 'number' || typeof value === 'boolean') return String(value); + return undefined; +} + +function valueIRCallsSuperConstructor(value: ValueIR): boolean { + if (value.kind === 'call' && value.callee.kind === 'ident' && value.callee.name === 'super') return true; + if (value.kind === 'lambda') return false; + return valueIRChildren(value).some(valueIRCallsSuperConstructor); +} + +function valueIRUsesSuper(value: ValueIR): boolean { + if (value.kind === 'ident' && value.name === 'super') return true; + return valueIRChildren(value).some(valueIRUsesSuper); +} + +function valueIRChildren(value: ValueIR): ValueIR[] { + switch (value.kind) { + case 'call': + return [value.callee, ...value.args]; + case 'member': + return [value.object]; + case 'index': + return [value.object, value.index]; + case 'tmplLit': + return [...value.expressions]; + case 'arrayLit': + return [...value.items]; + case 'objectLit': + return value.entries.map((entry) => ('kind' in entry ? entry.argument : entry.value)); + case 'unary': + case 'await': + case 'new': + case 'spread': + case 'propagate': + return [value.argument]; + case 'typeAssert': + case 'nonNull': + return [value.expression]; + case 'binary': + return [value.left, value.right]; + case 'conditional': + return [value.test, value.consequent, value.alternate]; + case 'lambda': + return [value.body]; + case 'numLit': + case 'strLit': + case 'boolLit': + case 'nullLit': + case 'undefLit': + case 'regexLit': + case 'ident': + return []; + } +} + +function classBaseName(value: unknown): string | undefined { + if (typeof value !== 'string' || !value.trim()) return undefined; + const match = /^([A-Za-z_$][\w$]*)/.exec(value.trim()); + return match?.[1]; +} + +function stringProp(node: IRNode, prop: string): string | undefined; +function stringProp(props: IRNode['props'] | undefined, prop: string): string | undefined; +function stringProp(nodeOrProps: IRNode | IRNode['props'] | undefined, prop: string): string | undefined { + const props = nodeOrProps && 'type' in nodeOrProps ? nodeOrProps.props : nodeOrProps; + const value = props ? (props as Record)[prop] : undefined; + return typeof value === 'string' && value.length > 0 ? value : undefined; +} + +function walkSemanticTree(node: IRNode, visit: (node: IRNode) => void): void { + visit(node); + for (const child of node.children ?? []) walkSemanticTree(child, visit); +} + +function walkSemanticTreeUntil(node: IRNode, visit: (node: IRNode) => 'continue' | 'stop'): void { + if (visit(node) === 'stop') return; + for (const child of node.children ?? []) walkSemanticTreeUntil(child, visit); +} + +function isExpressionObject(value: unknown): value is { code: string } { + return ( + typeof value === 'object' && + value !== null && + (value as { readonly __expr?: unknown }).__expr === true && + typeof (value as { readonly code?: unknown }).code === 'string' + ); +} + interface ExportBinding { source: string; alias?: string; diff --git a/packages/core/tests/class-semantics.test.ts b/packages/core/tests/class-semantics.test.ts new file mode 100644 index 00000000..0ad7c116 --- /dev/null +++ b/packages/core/tests/class-semantics.test.ts @@ -0,0 +1,262 @@ +import { parseDocumentWithDiagnostics } from '../src/parser.js'; +import { validateSemantics } from '../src/semantic-validator.js'; + +function violationsFor(source: string) { + return validateSemantics(parseDocumentWithDiagnostics(source).root); +} + +function rulesFor(source: string): string[] { + return violationsFor(source).map((violation) => violation.rule); +} + +describe('semantic-validator — class object model', () => { + test('accepts valid inheritance with explicit constructor super and method override', () => { + const source = [ + 'class name=Entity', + ' field name=id type=string', + ' constructor', + ' param name=id type=string', + ' handler lang=kern', + ' assign target="this.id" value="id"', + ' method name=kind returns=string', + ' handler lang=kern', + ' return value="\'entity\'"', + 'class name=User extends=Entity', + ' constructor', + ' param name=id type=string', + ' handler lang=kern', + ' do value="super(id)"', + ' method name=kind returns=string', + ' handler lang=kern', + ' return value="`user/${super.kind()}`"', + ].join('\n'); + + expect(rulesFor(source)).toEqual([]); + }); + + test('accepts imported base class names as visible extension targets', () => { + const source = [ + 'import from="./base" names=BaseEntity', + 'class name=User extends=BaseEntity', + ' field name=id type=string', + ].join('\n'); + + expect(rulesFor(source)).not.toContain('class-extends-unknown'); + }); + + test('accepts external package imports as visible extension targets', () => { + const source = [ + 'import from="@kern/base" registry=npm names=ExternalBase', + 'class name=User extends=ExternalBase', + ' field name=id type=string', + ].join('\n'); + + expect(rulesFor(source)).not.toContain('class-extends-unknown'); + }); + + test('reports unknown base class names', () => { + const violations = violationsFor('class name=User extends=MissingBase'); + + expect(violations).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + rule: 'class-extends-unknown', + message: expect.stringContaining("extends unknown base 'MissingBase'"), + }), + ]), + ); + }); + + test('reports non-class declarations used as superclass targets', () => { + const violations = violationsFor(['interface name=Shape', 'class name=Circle extends=Shape'].join('\n')); + + expect(violations.map((violation) => violation.rule)).toContain('class-extends-unknown'); + }); + + test('reports inheritance cycles across known local classes', () => { + const violations = violationsFor( + ['class name=A extends=B', 'class name=B extends=C', 'class name=C extends=A'].join('\n'), + ); + + expect(violations).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + rule: 'class-inheritance-cycle', + message: expect.stringContaining('A -> B -> C -> A'), + }), + ]), + ); + expect(violations.filter((violation) => violation.rule === 'class-inheritance-cycle')).toHaveLength(1); + }); + + test('reports duplicate constructors', () => { + const violations = violationsFor( + [ + 'class name=User', + ' constructor', + ' handler lang=kern', + ' do value="1"', + ' constructor', + ' handler lang=kern', + ' do value="2"', + ].join('\n'), + ); + + expect(violations.map((violation) => violation.rule)).toContain('class-single-constructor-only'); + }); + + test('reports class member conflicts while allowing getter/setter pairs', () => { + const conflict = violationsFor( + [ + 'class name=Bad', + ' field name=value type=number', + ' method name=value returns=number', + ' handler lang=kern', + ' return value=1', + ].join('\n'), + ); + expect(conflict.map((violation) => violation.rule)).toContain('class-member-conflict'); + + const accessorPair = rulesFor( + [ + 'class name=Good', + ' getter name=value returns=number', + ' handler lang=kern', + ' return value="this._value"', + ' setter name=value', + ' param name=next type=number', + ' handler lang=kern', + ' assign target="this._value" value="next"', + ].join('\n'), + ); + expect(accessorPair).not.toContain('class-member-conflict'); + }); + + test('reports duplicate accessors for the same member name', () => { + const violations = violationsFor( + [ + 'class name=Bad', + ' getter name=value returns=number', + ' handler lang=kern', + ' return value=1', + ' setter name=value', + ' param name=next type=number', + ' handler lang=kern', + ' assign target="this._value" value="next"', + ' setter name=value', + ' param name=other type=number', + ' handler lang=kern', + ' assign target="this._value" value="other"', + ].join('\n'), + ); + + expect(violations.map((violation) => violation.rule)).toContain('class-member-conflict'); + }); + + test('reports derived constructors that omit super', () => { + const violations = violationsFor( + [ + 'class name=Entity', + 'class name=User extends=Entity', + ' constructor', + ' handler lang=kern', + ' assign target="this.name" value="\'Ada\'"', + ].join('\n'), + ); + + expect(violations.map((violation) => violation.rule)).toContain('class-constructor-missing-super'); + }); + + test('does not accept delayed super calls inside constructor lambdas', () => { + const violations = violationsFor( + [ + 'class name=Entity', + 'class name=User extends=Entity', + ' constructor', + ' handler lang=kern', + ' do value="(() => super())"', + ].join('\n'), + ); + + expect(violations.map((violation) => violation.rule)).toContain('class-constructor-missing-super'); + }); + + test('reports super usage in classes without a base', () => { + const violations = violationsFor( + [ + 'class name=User', + ' method name=kind returns=string', + ' handler lang=kern', + ' return value="super.kind()"', + ].join('\n'), + ); + + expect(violations.map((violation) => violation.rule)).toContain('class-super-without-base'); + }); + + test('finds super usage in control-flow expression props', () => { + const violations = violationsFor( + [ + 'class name=User', + ' method name=check returns=void', + ' handler lang=kern', + ' if cond="super.ready()"', + ' do value="1"', + ].join('\n'), + ); + + expect(violations.map((violation) => violation.rule)).toContain('class-super-without-base'); + }); + + test('does not attribute nested class super usage to the outer class', () => { + const source = [ + 'class name=Base', + 'class name=Outer', + ' method name=install returns=void', + ' handler lang=kern', + ' class name=Inner extends=Base', + ' constructor', + ' handler lang=kern', + ' do value="super()"', + ].join('\n'); + + expect(rulesFor(source)).not.toContain('class-super-without-base'); + }); + + test('reports override kind and arity mismatches', () => { + const source = [ + 'class name=Base', + ' method name=load returns=string', + ' param name=id type=string', + ' handler lang=kern', + ' return value=id', + ' field name=status type=string', + 'class name=Derived extends=Base', + ' method name=load returns=string', + ' handler lang=kern', + ' return value="\'missing id\'"', + ' method name=status returns=string', + ' handler lang=kern', + ' return value="\'ok\'"', + ].join('\n'); + const rules = rulesFor(source); + + expect(rules).toContain('class-override-arity-mismatch'); + expect(rules).toContain('class-override-kind-mismatch'); + }); + + test('override validation terminates when an inheritance cycle has no matching member', () => { + const rules = rulesFor( + [ + 'class name=A extends=B', + ' method name=onlyA returns=number', + ' handler lang=kern', + ' return value=1', + 'class name=B extends=C', + 'class name=C extends=B', + ].join('\n'), + ); + + expect(rules).toContain('class-inheritance-cycle'); + }); +}); From 2d6a94e2ac553fd481a77f6cd2366572b6544cb9 Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 08:01:24 +0200 Subject: [PATCH 11/63] feat(core): harden class runtime setters --- packages/core/src/core-runtime/index.ts | 59 ++++++++- packages/core/src/semantic-validator.ts | 2 + packages/core/tests/core-runtime.test.ts | 159 +++++++++++++++++++++++ 3 files changed, 217 insertions(+), 3 deletions(-) diff --git a/packages/core/src/core-runtime/index.ts b/packages/core/src/core-runtime/index.ts index b50efa91..920414c2 100644 --- a/packages/core/src/core-runtime/index.ts +++ b/packages/core/src/core-runtime/index.ts @@ -16,6 +16,7 @@ import { import { brandValue, KERN_VALUE_BRAND } from './value-brand.js'; const INTEGER_INDEX_RE = /^(0|[1-9]\d*)$/; +const ACTIVE_INSTANCE_SETTERS = new WeakMap>(); export type KernValue = | { kind: 'null' } @@ -818,6 +819,54 @@ function evalClassMember(object: KernClassValue, property: string): KernValue { return kUndefined(); } +function assignInstanceMember(object: KernInstanceValue, property: string, value: KernValue): void { + const setter = findClassMember(object.classValue, 'setter', property); + if (setter) { + callSetterBody(object, setter.node, setter.owner, property, value); + return; + } + if (findClassMember(object.classValue, 'getter', property)) { + throw new Error(`KERN core runtime cannot assign getter-only property: ${property}.`); + } + object.fields[property] = value; +} + +function assignSuperMember(object: KernSuperValue, property: string, value: KernValue): void { + const base = resolveBaseClass(object.ownerClass); + if (!base) throw new Error(`KERN core runtime class ${object.ownerClass.name} has no base class.`); + const setter = findClassMember(base, 'setter', property); + if (setter) { + callSetterBody(object.receiver, setter.node, setter.owner, property, value); + return; + } + if (findClassMember(base, 'getter', property)) { + throw new Error(`KERN core runtime cannot assign getter-only property: ${property}.`); + } + object.receiver.fields[property] = value; +} + +function callSetterBody( + receiver: KernInstanceValue, + setterNode: IRNode, + ownerClass: KernClassValue, + property: string, + value: KernValue, +): void { + const key = `${ownerClass.name}.${property}`; + const activeSetters = ACTIVE_INSTANCE_SETTERS.get(receiver) ?? new Set(); + if (activeSetters.has(key)) { + throw new Error(`KERN core runtime recursive setter assignment: ${key}.`); + } + activeSetters.add(key); + ACTIVE_INSTANCE_SETTERS.set(receiver, activeSetters); + try { + callClassMemberBody(setterNode, ownerClass, receiver, [value]); + } finally { + activeSetters.delete(key); + if (activeSetters.size === 0) ACTIVE_INSTANCE_SETTERS.delete(receiver); + } +} + function callBoundMethodValue( method: KernBoundMethodValue, args: readonly KernValue[], @@ -875,7 +924,7 @@ function callClassMemberBody( function findClassMember( klass: KernClassValue, - type: 'method' | 'getter', + type: 'method' | 'getter' | 'setter', name: string, staticOnly = false, ): { node: IRNode; owner: KernClassValue } | undefined { @@ -968,7 +1017,11 @@ function assignRuntimeTarget(target: string, value: KernValue, env: CoreRuntimeE if (parsed.kind === 'member') { const object = evalValueIR(parsed.object, env); if (object.kind === 'instance') { - object.fields[parsed.property] = value; + assignInstanceMember(object, parsed.property, value); + return; + } + if (object.kind === 'super') { + assignSuperMember(object, parsed.property, value); return; } if (object.kind === 'record') { @@ -1063,7 +1116,7 @@ function valueIRCallsSuper(value: ValueIR): boolean { case 'conditional': return valueIRCallsSuper(value.test) || valueIRCallsSuper(value.consequent) || valueIRCallsSuper(value.alternate); case 'lambda': - return valueIRCallsSuper(value.body); + return false; case 'numLit': case 'strLit': case 'boolLit': diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index f1a59767..3745d163 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -871,6 +871,8 @@ function valueIRChildren(value: ValueIR): ValueIR[] { case 'ident': return []; } + const exhaustive: never = value; + return exhaustive; } function classBaseName(value: unknown): string | undefined { diff --git a/packages/core/tests/core-runtime.test.ts b/packages/core/tests/core-runtime.test.ts index 92bf9739..7977b290 100644 --- a/packages/core/tests/core-runtime.test.ts +++ b/packages/core/tests/core-runtime.test.ts @@ -483,6 +483,165 @@ describe('KERN core runtime statements', () => { expect(toHostValue(evalCoreExpression('new User("u1").id', env))).toBe('u1'); }); + + test('dispatches instance assignment through setters', () => { + const root = parse( + [ + 'class name=Gauge', + ' field name=_value type=number value={{ 0 }}', + ' setter name=value', + ' param name=next type=number', + ' handler', + ' assign target="this._value" value="next * 2"', + ' getter name=value returns=number', + ' handler', + ' return value="this._value"', + 'fn name=setGauge returns=number', + ' handler', + ' let name=g value="new Gauge()"', + ' assign target="g.value" value="7"', + ' return value="g.value"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('setGauge()', env))).toBe(14); + }); + + test('dispatches inherited and super assignment through setters', () => { + const root = parse( + [ + 'class name=Base', + ' field name=_value type=number value={{ 0 }}', + ' setter name=value', + ' param name=next type=number', + ' handler', + ' assign target="this._value" value="next + 1"', + ' getter name=value returns=number', + ' handler', + ' return value="this._value"', + 'class name=Derived extends=Base', + ' method name=setViaSuper returns=number', + ' param name=next type=number', + ' handler', + ' assign target="super.value" value="next"', + ' return value="this.value"', + 'fn name=setDerived returns=number', + ' handler', + ' let name=d value="new Derived()"', + ' assign target="d.value" value="4"', + ' return value="d.setViaSuper(9) + d.value"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('setDerived()', env))).toBe(20); + }); + + test('supports setter-only properties and rejects getter-only assignment', () => { + const root = parse( + [ + 'class name=WriteOnly', + ' field name=stored type=number value={{ 0 }}', + ' setter name=value', + ' param name=next type=number', + ' handler', + ' assign target="this.stored" value="next"', + 'class name=ReadOnly', + ' getter name=value returns=number', + ' handler', + ' return value="1"', + 'fn name=setWriteOnly returns=number', + ' handler', + ' let name=w value="new WriteOnly()"', + ' assign target="w.value" value="5"', + ' return value="w.stored"', + 'fn name=setReadOnly returns=number', + ' handler', + ' let name=r value="new ReadOnly()"', + ' assign target="r.value" value="5"', + ' return value="r.value"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('setWriteOnly()', env))).toBe(5); + expect(() => evalCoreExpression('setReadOnly()', env)).toThrow('cannot assign getter-only property: value'); + }); + + test('rejects recursive setter assignment', () => { + const root = parse( + [ + 'class name=Loop', + ' setter name=value', + ' param name=next type=number', + ' handler', + ' assign target="this.value" value="next"', + 'fn name=setLoop returns=number', + ' handler', + ' let name=loop value="new Loop()"', + ' assign target="loop.value" value="5"', + ' return value="0"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('setLoop()', env)).toThrow('recursive setter assignment: Loop.value'); + }); + + test('allows chained setters for different properties', () => { + const root = parse( + [ + 'class name=Chain', + ' field name=_b type=number value={{ 0 }}', + ' setter name=a', + ' param name=next type=number', + ' handler', + ' assign target="this.b" value="next + 1"', + ' setter name=b', + ' param name=next type=number', + ' handler', + ' assign target="this._b" value="next * 2"', + ' getter name=b returns=number', + ' handler', + ' return value="this._b"', + 'fn name=setChain returns=number', + ' handler', + ' let name=chain value="new Chain()"', + ' assign target="chain.a" value="4"', + ' return value="chain.b"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('setChain()', env))).toBe(10); + }); + + test('does not count delayed lambda super calls as constructor initialization', () => { + const root = parse( + [ + 'class name=Entity', + ' constructor', + ' param name=id type=string', + ' handler', + ' assign target="this.id" value="id"', + 'class name=User extends=Entity', + ' constructor', + ' param name=id type=string', + ' handler', + ' do value="(() => super(id))"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new User("u1")', env)).toThrow('missing required argument: id'); + }); }); describe('KERN core runtime functions', () => { From cdcb60a9dd0bdac04fc6ae4b4cca3c2276388ec9 Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 13:55:18 +0200 Subject: [PATCH 12/63] feat(core): add static class member runtime --- packages/core/src/core-runtime/index.ts | 201 ++++++++++++++++++++--- packages/core/tests/core-runtime.test.ts | 123 ++++++++++++++ 2 files changed, 305 insertions(+), 19 deletions(-) diff --git a/packages/core/src/core-runtime/index.ts b/packages/core/src/core-runtime/index.ts index 920414c2..8fc48a60 100644 --- a/packages/core/src/core-runtime/index.ts +++ b/packages/core/src/core-runtime/index.ts @@ -17,6 +17,7 @@ import { brandValue, KERN_VALUE_BRAND } from './value-brand.js'; const INTEGER_INDEX_RE = /^(0|[1-9]\d*)$/; const ACTIVE_INSTANCE_SETTERS = new WeakMap>(); +const ACTIVE_CLASS_SETTERS = new WeakMap>(); export type KernValue = | { kind: 'null' } @@ -52,6 +53,7 @@ export interface KernClassValue { name: string; node: IRNode; env: CoreRuntimeEnv; + staticFields: Record; } export interface KernInstanceValue { @@ -71,9 +73,9 @@ export interface KernBoundMethodValue { export interface KernSuperValue { kind: 'super'; - receiver: KernInstanceValue; + receiver: KernInstanceValue | KernClassValue; ownerClass: KernClassValue; - mode: 'constructor' | 'method'; + mode: 'constructor' | 'method' | 'static'; } export interface RuntimeParam { @@ -295,6 +297,7 @@ function executeNode(node: IRNode, env: CoreRuntimeEnv): CoreCompletion { case 'class': { const klass = makeClass(node, env); env.define(klass.name, klass); + initializeClassStaticFields(klass); return { kind: 'normal', value: kUndefined() }; } case 'assign': @@ -710,9 +713,22 @@ function makeClass(node: IRNode, env: CoreRuntimeEnv): KernClassValue { name: requiredString(node.props?.name, 'class name='), node, env, + staticFields: createRecordEntries(), }); } +function initializeClassStaticFields(klass: KernClassValue): void { + for (const field of runtimeChildNodes(klass.node, 'field')) { + if (field.props?.static !== true && field.props?.static !== 'true') continue; + const name = requiredString(field.props?.name, 'field name='); + const value = + Object.hasOwn(field.props ?? {}, 'value') || Object.hasOwn(field.props ?? {}, 'default') + ? evalCoreExpression(runtimeFieldInitializerExpr(field), classStaticEnv(klass)) + : kUndefined(); + klass.staticFields[name] = value; + } +} + function constructClassValue(klass: KernClassValue, args: readonly KernValue[]): KernInstanceValue { const instance = brandValue({ kind: 'instance' as const, @@ -791,6 +807,7 @@ function evalInstanceMember(object: KernInstanceValue, property: string): KernVa function evalSuperMember(object: KernSuperValue, property: string): KernValue { const base = resolveBaseClass(object.ownerClass); if (!base) return kUndefined(); + if (object.receiver.kind === 'class') return evalClassMemberFrom(base, property, object.receiver); const getter = findClassMember(base, 'getter', property); if (getter) return callClassMemberBody(getter.node, getter.owner, object.receiver, []).value; const method = findClassMember(base, 'method', property); @@ -808,15 +825,23 @@ function evalSuperMember(object: KernSuperValue, property: string): KernValue { } function evalClassMember(object: KernClassValue, property: string): KernValue { - const method = findClassMember(object, 'method', property, true); + return evalClassMemberFrom(object, property, object); +} + +function evalClassMemberFrom(owner: KernClassValue, property: string, receiver: KernClassValue): KernValue { + if (Object.hasOwn(owner.staticFields, property)) return owner.staticFields[property] ?? kUndefined(); + const getter = findOwnClassMember(owner, 'getter', property, true); + if (getter) return callStaticClassMemberBody(getter.node, getter.owner, receiver, []).value; + const method = findOwnClassMember(owner, 'method', property, true); if (method) { return brandValue({ - kind: 'builtin', - name: `${object.name}.${property}`, - call: (args) => callClassMemberBody(method.node, method.owner, undefined, args).value, + kind: 'builtin' as const, + name: `${receiver.name}.${property}`, + call: (args) => callStaticClassMemberBody(method.node, method.owner, receiver, args).value, }); } - return kUndefined(); + const base = resolveBaseClass(owner); + return base ? evalClassMemberFrom(base, property, receiver) : kUndefined(); } function assignInstanceMember(object: KernInstanceValue, property: string, value: KernValue): void { @@ -834,6 +859,10 @@ function assignInstanceMember(object: KernInstanceValue, property: string, value function assignSuperMember(object: KernSuperValue, property: string, value: KernValue): void { const base = resolveBaseClass(object.ownerClass); if (!base) throw new Error(`KERN core runtime class ${object.ownerClass.name} has no base class.`); + if (object.receiver.kind === 'class') { + assignClassMemberFrom(base, object.receiver, property, value); + return; + } const setter = findClassMember(base, 'setter', property); if (setter) { callSetterBody(object.receiver, setter.node, setter.owner, property, value); @@ -845,6 +874,36 @@ function assignSuperMember(object: KernSuperValue, property: string, value: Kern object.receiver.fields[property] = value; } +function assignClassMember(object: KernClassValue, property: string, value: KernValue): void { + assignClassMemberFrom(object, object, property, value); +} + +function assignClassMemberFrom( + owner: KernClassValue, + receiver: KernClassValue, + property: string, + value: KernValue, +): void { + if (Object.hasOwn(owner.staticFields, property)) { + receiver.staticFields[property] = value; + return; + } + const setter = findOwnClassMember(owner, 'setter', property, true); + if (setter) { + callStaticSetterBody(receiver, setter.node, setter.owner, property, value); + return; + } + if (findOwnClassMember(owner, 'getter', property, true)) { + throw new Error(`KERN core runtime cannot assign getter-only static property: ${property}.`); + } + const base = resolveBaseClass(owner); + if (base) { + assignClassMemberFrom(base, receiver, property, value); + return; + } + receiver.staticFields[property] = value; +} + function callSetterBody( receiver: KernInstanceValue, setterNode: IRNode, @@ -867,6 +926,28 @@ function callSetterBody( } } +function callStaticSetterBody( + receiver: KernClassValue, + setterNode: IRNode, + ownerClass: KernClassValue, + property: string, + value: KernValue, +): void { + const key = `${ownerClass.name}.${property}`; + const activeSetters = ACTIVE_CLASS_SETTERS.get(receiver) ?? new Set(); + if (activeSetters.has(key)) { + throw new Error(`KERN core runtime recursive static setter assignment: ${key}.`); + } + activeSetters.add(key); + ACTIVE_CLASS_SETTERS.set(receiver, activeSetters); + try { + callStaticClassMemberBody(setterNode, ownerClass, receiver, [value]); + } finally { + activeSetters.delete(key); + if (activeSetters.size === 0) ACTIVE_CLASS_SETTERS.delete(receiver); + } +} + function callBoundMethodValue( method: KernBoundMethodValue, args: readonly KernValue[], @@ -878,6 +959,9 @@ function callSuperConstructor(value: KernSuperValue, args: readonly KernValue[]) if (value.mode !== 'constructor') { throw new Error('KERN core runtime super(...) is only valid inside a constructor.'); } + if (value.receiver.kind !== 'instance') { + throw new Error('KERN core runtime super(...) requires an instance receiver.'); + } const base = resolveBaseClass(value.ownerClass); if (!base) throw new Error(`KERN core runtime class ${value.ownerClass.name} has no base class.`); initializeClassLayer(value.receiver, base, args, true); @@ -922,6 +1006,56 @@ function callClassMemberBody( return { value: completion.value, env: callEnv }; } +function callStaticClassMemberBody( + memberNode: IRNode, + ownerClass: KernClassValue, + receiver: KernClassValue, + args: readonly KernValue[], +): { value: KernValue; env: CoreRuntimeEnv } { + const callEnv = ownerClass.env.child(); + callEnv.define('this', receiver); + if (resolveBaseClass(ownerClass)) { + callEnv.define( + 'super', + brandValue({ + kind: 'super', + receiver, + ownerClass, + mode: 'static', + }), + ); + } + const params = runtimeParams(memberNode); + validateRuntimeArgs(`${ownerClass.name}.${memberNode.type}`, params, args); + params.forEach((param, index) => { + const provided = args[index]; + const value = + provided === undefined || (provided.kind === 'undefined' && param.defaultExpr) + ? param.defaultExpr + ? evalCoreExpression(param.defaultExpr, callEnv) + : kUndefined() + : provided; + callEnv.define(param.name, value); + }); + const completion = executeSequence(runtimeFunctionBody(memberNode), callEnv); + return { value: completion.value, env: callEnv }; +} + +function findOwnClassMember( + klass: KernClassValue, + type: 'method' | 'getter' | 'setter', + name: string, + staticOnly = false, +): { node: IRNode; owner: KernClassValue } | undefined { + for (const child of klass.node.children ?? []) { + if (child.type !== type || child.props?.name !== name) continue; + const isStatic = child.props?.static === true || child.props?.static === 'true'; + if (staticOnly !== isStatic) continue; + return { node: child, owner: klass }; + } + return undefined; +} + function findClassMember( klass: KernClassValue, type: 'method' | 'getter' | 'setter', @@ -958,6 +1092,23 @@ function classThisEnv(klass: KernClassValue, receiver: KernInstanceValue): CoreR return env; } +function classStaticEnv(klass: KernClassValue): CoreRuntimeEnv { + const env = klass.env.child(); + env.define('this', klass); + if (resolveBaseClass(klass)) { + env.define( + 'super', + brandValue({ + kind: 'super', + receiver: klass, + ownerClass: klass, + mode: 'static', + }), + ); + } + return env; +} + function makeFunction(node: IRNode, env: CoreRuntimeEnv): KernFunctionValue { return brandValue({ kind: 'function', @@ -1028,6 +1179,10 @@ function assignRuntimeTarget(target: string, value: KernValue, env: CoreRuntimeE object.entries[parsed.property] = value; return; } + if (object.kind === 'class') { + assignClassMember(object, parsed.property, value); + return; + } throw new Error(`KERN core runtime cannot assign member on ${object.kind}.`); } if (parsed.kind === 'index') { @@ -1237,6 +1392,10 @@ function isNullish(value: KernValue): boolean { } function isKernValue(value: unknown): value is KernValue { + return isKernValueShape(value, new WeakSet()); +} + +function isKernValueShape(value: unknown, seen: WeakSet): value is KernValue { if ( !isPlainRecord(value) || (value as { [KERN_VALUE_BRAND]?: true })[KERN_VALUE_BRAND] !== true || @@ -1244,6 +1403,8 @@ function isKernValue(value: unknown): value is KernValue { ) { return false; } + if (seen.has(value)) return true; + seen.add(value); switch (value.kind) { case 'null': case 'undefined': @@ -1259,13 +1420,13 @@ function isKernValue(value: unknown): value is KernValue { hasOnlyKeys(value, ['kind', 'items']) && Array.isArray(value.items) && !hasArrayHoles(value.items) && - value.items.every(isKernValue) + value.items.every((item) => isKernValueShape(item, seen)) ); case 'record': return ( hasOnlyKeys(value, ['kind', 'entries']) && isPlainRecord(value.entries) && - Object.values(value.entries).every(isKernValue) + Object.values(value.entries).every((entry) => isKernValueShape(entry, seen)) ); case 'function': return ( @@ -1283,38 +1444,40 @@ function isKernValue(value: unknown): value is KernValue { ); case 'class': return ( - hasOnlyKeys(value, ['kind', 'name', 'node', 'env']) && + hasOnlyKeys(value, ['kind', 'name', 'node', 'env', 'staticFields']) && typeof value.name === 'string' && isPlainRecord(value.node) && - value.env instanceof CoreRuntimeEnv + value.env instanceof CoreRuntimeEnv && + isPlainRecord(value.staticFields) && + Object.values(value.staticFields).every((entry) => isKernValueShape(entry, seen)) ); case 'instance': return ( hasOnlyKeys(value, ['kind', 'classValue', 'fields', 'initializedClasses']) && - isKernValue(value.classValue) && + isKernValueShape(value.classValue, seen) && value.classValue.kind === 'class' && isPlainRecord(value.fields) && - Object.values(value.fields).every(isKernValue) && + Object.values(value.fields).every((entry) => isKernValueShape(entry, seen)) && value.initializedClasses instanceof Set ); case 'bound-method': return ( hasOnlyKeys(value, ['kind', 'name', 'receiver', 'methodNode', 'ownerClass']) && typeof value.name === 'string' && - isKernValue(value.receiver) && + isKernValueShape(value.receiver, seen) && value.receiver.kind === 'instance' && isPlainRecord(value.methodNode) && - isKernValue(value.ownerClass) && + isKernValueShape(value.ownerClass, seen) && value.ownerClass.kind === 'class' ); case 'super': return ( hasOnlyKeys(value, ['kind', 'receiver', 'ownerClass', 'mode']) && - isKernValue(value.receiver) && - value.receiver.kind === 'instance' && - isKernValue(value.ownerClass) && + isKernValueShape(value.receiver, seen) && + (value.receiver.kind === 'instance' || value.receiver.kind === 'class') && + isKernValueShape(value.ownerClass, seen) && value.ownerClass.kind === 'class' && - (value.mode === 'constructor' || value.mode === 'method') + (value.mode === 'constructor' || value.mode === 'method' || value.mode === 'static') ); default: return false; diff --git a/packages/core/tests/core-runtime.test.ts b/packages/core/tests/core-runtime.test.ts index 7977b290..4d262e66 100644 --- a/packages/core/tests/core-runtime.test.ts +++ b/packages/core/tests/core-runtime.test.ts @@ -389,6 +389,129 @@ describe('KERN core runtime statements', () => { expect(toHostValue(evalCoreExpression('new User().label()', env))).toBe('user/entity:base:Ada'); }); + test('executes static fields getters methods and inherited static receiver dispatch', () => { + const root = parse( + [ + 'class name=Base', + ' field name=count type=number static=true value={{ 1 }}', + ' field name=seed type=number static=true value={{ 2 }}', + ' getter name=label static=true returns=string', + ' handler', + ' return value="`count=${this.count}`"', + ' method name=bump static=true returns=number', + ' param name=step type=number value={{ 1 }}', + ' handler', + ' assign target="this.count" value="this.count + step"', + ' return value="this.count"', + ' method name=tag static=true returns=string', + ' handler', + ' return value="\'base\'"', + 'class name=Derived extends=Base', + ' field name=own type=number static=true value={{ this.count + 9 }}', + ' field name=fromBase type=number static=true value={{ super.seed + this.count }}', + ' method name=tag static=true returns=string', + ' handler', + ' return value="`derived/${super.tag()}/${this.own}`"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('Base.count', env))).toBe(1); + expect(toHostValue(evalCoreExpression('Derived.count', env))).toBe(1); + expect(toHostValue(evalCoreExpression('Derived.own', env))).toBe(10); + expect(toHostValue(evalCoreExpression('Derived.fromBase', env))).toBe(3); + expect(toHostValue(evalCoreExpression('Derived.label', env))).toBe('count=1'); + expect(toHostValue(evalCoreExpression('Derived.tag()', env))).toBe('derived/base/10'); + expect(toHostValue(evalCoreExpression('Derived.bump(4)', env))).toBe(5); + expect(toHostValue(evalCoreExpression('Derived.count', env))).toBe(5); + expect(toHostValue(evalCoreExpression('Base.count', env))).toBe(1); + }); + + test('dispatches static assignment through setters and rejects getter-only static assignment', () => { + const root = parse( + [ + 'class name=Gauge', + ' field name=_value type=number static=true value={{ 0 }}', + ' setter name=value static=true', + ' param name=next type=number', + ' handler', + ' assign target="this._value" value="next * 3"', + ' getter name=value static=true returns=number', + ' handler', + ' return value="this._value"', + 'class name=ReadOnly', + ' getter name=value static=true returns=number', + ' handler', + ' return value="1"', + 'class name=Dual', + ' field name=value type=number value={{ 2 }}', + ' field name=value type=number static=true value={{ 1 }}', + 'class name=ParentReadOnly', + ' getter name=value static=true returns=number', + ' handler', + ' return value="1"', + 'class name=ChildShadow extends=ParentReadOnly', + ' field name=value type=number static=true value={{ 2 }}', + 'fn name=setGaugeStatic returns=number', + ' handler', + ' assign target="Gauge.value" value="7"', + ' return value="Gauge.value"', + 'fn name=setReadOnlyStatic returns=number', + ' handler', + ' assign target="ReadOnly.value" value="7"', + ' return value="ReadOnly.value"', + 'fn name=setDualStatic returns=number', + ' handler', + ' assign target="Dual.value" value="8"', + ' return value="new Dual().value"', + 'fn name=setChildShadowStatic returns=number', + ' handler', + ' assign target="ChildShadow.value" value="3"', + ' return value="ChildShadow.value"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('setGaugeStatic()', env))).toBe(21); + expect(toHostValue(evalCoreExpression('setDualStatic()', env))).toBe(2); + expect(toHostValue(evalCoreExpression('Dual.value', env))).toBe(8); + expect(toHostValue(evalCoreExpression('setChildShadowStatic()', env))).toBe(3); + expect(toHostValue(evalCoreExpression('ParentReadOnly.value', env))).toBe(1); + expect(() => evalCoreExpression('setReadOnlyStatic()', env)).toThrow( + 'cannot assign getter-only static property: value', + ); + }); + + test('rejects recursive static setter assignment', () => { + const root = parse( + [ + 'class name=Loop', + ' setter name=value static=true', + ' param name=next type=number', + ' handler', + ' assign target="this.value" value="next"', + 'fn name=setLoopStatic returns=number', + ' handler', + ' assign target="Loop.value" value="5"', + ' return value="0"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('setLoopStatic()', env)).toThrow('recursive static setter assignment: Loop.value'); + }); + + test('accepts self-referential static fields as branded KERN values', () => { + const root = parse(['class name=SelfRef', ' field name=self static=true value={{ this }}'].join('\n')); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(fromHostValue(env.lookup('SelfRef'))).toBe(env.lookup('SelfRef')); + }); + test('executes derived constructors with super constructor arguments', () => { const root = parse( [ From 82c4581901ef35e085560ce0c5c3d8db01542d05 Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 14:24:06 +0200 Subject: [PATCH 13/63] feat(core): enforce constructor super discipline --- packages/core/src/core-runtime/index.ts | 137 ++++++------ packages/core/src/semantic-validator.ts | 232 ++++++++++++++++++-- packages/core/tests/class-semantics.test.ts | 138 ++++++++++++ packages/core/tests/core-runtime.test.ts | 85 ++++++- 4 files changed, 497 insertions(+), 95 deletions(-) diff --git a/packages/core/src/core-runtime/index.ts b/packages/core/src/core-runtime/index.ts index 8fc48a60..3df3285c 100644 --- a/packages/core/src/core-runtime/index.ts +++ b/packages/core/src/core-runtime/index.ts @@ -18,6 +18,7 @@ import { brandValue, KERN_VALUE_BRAND } from './value-brand.js'; const INTEGER_INDEX_RE = /^(0|[1-9]\d*)$/; const ACTIVE_INSTANCE_SETTERS = new WeakMap>(); const ACTIVE_CLASS_SETTERS = new WeakMap>(); +const ACTIVE_CONSTRUCTORS = new WeakMap(); export type KernValue = | { kind: 'null' } @@ -84,6 +85,11 @@ export interface RuntimeParam { defaultExpr?: string; } +interface RuntimeConstructionFrame { + ownerClass: KernClassValue; + superCalled: boolean; +} + export type CoreCompletion = { kind: 'normal'; value: KernValue } | { kind: 'return'; value: KernValue }; export interface CoreRuntimeResult { @@ -375,8 +381,11 @@ function evalValueIR(node: ValueIR, env: CoreRuntimeEnv): KernValue { return kNull(); case 'undefLit': return kUndefined(); - case 'ident': - return env.lookup(node.name); + case 'ident': { + const value = env.lookup(node.name); + if (node.name === 'this' && value.kind === 'instance') guardConstructedInstanceAccess(value); + return value; + } case 'tmplLit': return kString( node.quasis.reduce((out, quasi, index) => { @@ -751,9 +760,8 @@ function initializeClassLayer( } const base = resolveBaseClass(klass); const ctor = firstRuntimeChild(klass.node, 'constructor'); - const ctorCallsSuper = Boolean(base && ctor && constructorCallsSuper(ctor)); - if (base && !ctorCallsSuper) initializeClassLayer(instance, base, [], false); - if (!ctorCallsSuper) initializeClassFields(instance, klass); + if (base && !ctor) initializeClassLayer(instance, base, [], false); + if (!base || !ctor) initializeClassFields(instance, klass); if (!ctor) { if (receivesConstructorArgs && args.length > 0) { throw new Error(`KERN core runtime class ${klass.name} has no constructor.`); @@ -761,8 +769,14 @@ function initializeClassLayer( instance.initializedClasses.add(klass.name); return; } - callClassMemberBody(ctor, klass, instance, receivesConstructorArgs ? args : []).value; - if (base && ctorCallsSuper && !instance.initializedClasses.has(base.name)) { + if (base) { + withConstructionFrame(instance, klass, () => { + callClassMemberBody(ctor, klass, instance, receivesConstructorArgs ? args : []).value; + }); + } else { + callClassMemberBody(ctor, klass, instance, receivesConstructorArgs ? args : []).value; + } + if (base && !instance.initializedClasses.has(base.name)) { throw new Error(`KERN core runtime constructor ${klass.name} must call super(...).`); } instance.initializedClasses.add(klass.name); @@ -788,6 +802,7 @@ function runtimeFieldInitializerExpr(node: IRNode): string { } function evalInstanceMember(object: KernInstanceValue, property: string): KernValue { + guardConstructedInstanceAccess(object); if (Object.hasOwn(object.fields, property)) return object.fields[property] ?? kUndefined(); const getter = findClassMember(object.classValue, 'getter', property); if (getter) return callClassMemberBody(getter.node, getter.owner, object, []).value; @@ -808,6 +823,7 @@ function evalSuperMember(object: KernSuperValue, property: string): KernValue { const base = resolveBaseClass(object.ownerClass); if (!base) return kUndefined(); if (object.receiver.kind === 'class') return evalClassMemberFrom(base, property, object.receiver); + guardConstructedSuperAccess(object.receiver); const getter = findClassMember(base, 'getter', property); if (getter) return callClassMemberBody(getter.node, getter.owner, object.receiver, []).value; const method = findClassMember(base, 'method', property); @@ -845,6 +861,7 @@ function evalClassMemberFrom(owner: KernClassValue, property: string, receiver: } function assignInstanceMember(object: KernInstanceValue, property: string, value: KernValue): void { + guardConstructedInstanceAccess(object); const setter = findClassMember(object.classValue, 'setter', property); if (setter) { callSetterBody(object, setter.node, setter.owner, property, value); @@ -863,6 +880,7 @@ function assignSuperMember(object: KernSuperValue, property: string, value: Kern assignClassMemberFrom(base, object.receiver, property, value); return; } + guardConstructedSuperAccess(object.receiver); const setter = findClassMember(base, 'setter', property); if (setter) { callSetterBody(object.receiver, setter.node, setter.owner, property, value); @@ -964,11 +982,51 @@ function callSuperConstructor(value: KernSuperValue, args: readonly KernValue[]) } const base = resolveBaseClass(value.ownerClass); if (!base) throw new Error(`KERN core runtime class ${value.ownerClass.name} has no base class.`); + const frame = activeConstructionFrame(value.receiver); + if (!frame || frame.ownerClass !== value.ownerClass) { + throw new Error(`KERN core runtime super(...) is not active for constructor ${value.ownerClass.name}.`); + } + if (frame.superCalled || value.receiver.initializedClasses.has(base.name)) { + throw new Error(`KERN core runtime constructor ${value.ownerClass.name} called super(...) more than once.`); + } + frame.superCalled = true; initializeClassLayer(value.receiver, base, args, true); initializeClassFields(value.receiver, value.ownerClass); return value.receiver; } +function withConstructionFrame(instance: KernInstanceValue, ownerClass: KernClassValue, run: () => void): void { + const stack = ACTIVE_CONSTRUCTORS.get(instance) ?? []; + const frame: RuntimeConstructionFrame = { ownerClass, superCalled: false }; + stack.push(frame); + ACTIVE_CONSTRUCTORS.set(instance, stack); + try { + run(); + } finally { + stack.pop(); + if (stack.length === 0) ACTIVE_CONSTRUCTORS.delete(instance); + } +} + +function activeConstructionFrame(instance: KernInstanceValue): RuntimeConstructionFrame | undefined { + const stack = ACTIVE_CONSTRUCTORS.get(instance); + return stack?.[stack.length - 1]; +} + +function guardConstructedInstanceAccess(instance: KernInstanceValue): void { + const frame = activeConstructionFrame(instance); + if (!frame || frame.superCalled) return; + if (!resolveBaseClass(frame.ownerClass)) return; + throw new Error(`KERN core runtime cannot access this before super(...) in ${frame.ownerClass.name}.`); +} + +function guardConstructedSuperAccess(instance: KernInstanceValue): void { + const frame = activeConstructionFrame(instance); + if (!frame || frame.superCalled) return; + if (!resolveBaseClass(frame.ownerClass)) return; + throw new Error(`KERN core runtime cannot access super members before super(...) in ${frame.ownerClass.name}.`); +} + function callClassMemberBody( memberNode: IRNode, ownerClass: KernClassValue, @@ -1219,71 +1277,6 @@ function runtimeChildNodes(node: IRNode, type: string): IRNode[] { return node.children?.filter((child) => child.type === type) ?? []; } -function constructorCallsSuper(node: IRNode): boolean { - return runtimeFunctionBody(node).some(statementCallsSuper); -} - -function statementCallsSuper(node: IRNode): boolean { - const rawValue = node.type === 'do' ? node.props?.value : undefined; - if (rawValue !== undefined && expressionCallsSuper(rawValue)) return true; - return (node.children ?? []).some(statementCallsSuper); -} - -function expressionCallsSuper(value: unknown): boolean { - try { - return valueIRCallsSuper(parseExpression(unwrapExpr(value, 'super expression'))); - } catch { - return false; - } -} - -function valueIRCallsSuper(value: ValueIR): boolean { - switch (value.kind) { - case 'call': - return ( - (value.callee.kind === 'ident' && value.callee.name === 'super') || - valueIRCallsSuper(value.callee) || - value.args.some(valueIRCallsSuper) - ); - case 'member': - return valueIRCallsSuper(value.object); - case 'index': - return valueIRCallsSuper(value.object) || valueIRCallsSuper(value.index); - case 'tmplLit': - return value.expressions.some(valueIRCallsSuper); - case 'arrayLit': - return value.items.some(valueIRCallsSuper); - case 'objectLit': - return value.entries.some((entry) => - 'kind' in entry ? valueIRCallsSuper(entry.argument) : valueIRCallsSuper(entry.value), - ); - case 'unary': - case 'await': - case 'new': - case 'spread': - case 'propagate': - return valueIRCallsSuper(value.argument); - case 'typeAssert': - case 'nonNull': - return valueIRCallsSuper(value.expression); - case 'binary': - return valueIRCallsSuper(value.left) || valueIRCallsSuper(value.right); - case 'conditional': - return valueIRCallsSuper(value.test) || valueIRCallsSuper(value.consequent) || valueIRCallsSuper(value.alternate); - case 'lambda': - return false; - case 'numLit': - case 'strLit': - case 'boolLit': - case 'nullLit': - case 'undefLit': - case 'regexLit': - case 'ident': - return false; - } - return false; -} - function runtimeChildren(node: IRNode): IRNode[] { if (node.type === 'document' || node.type === 'handler' || node.type === '__block') return node.children ?? []; return [node]; diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index f9ec1e39..da6d4e31 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -595,15 +595,8 @@ function validateClassMemberConflicts(info: ClassInfo, violations: SemanticViola function validateClassSuperUsage(info: ClassInfo, violations: SemanticViolation[]): void { const hasBase = Boolean(info.baseName); for (const ctor of info.constructors) { - const callsSuper = nodeBodyCallsSuperConstructor(ctor); - if (hasBase && !callsSuper) { - violations.push({ - rule: 'class-constructor-missing-super', - nodeType: 'constructor', - message: `Class '${info.name}' extends '${info.baseName}' but its constructor does not call \`super(...)\`. Derived constructors must initialize the base class explicitly.`, - line: ctor.loc?.line, - col: ctor.loc?.col, - }); + if (hasBase) { + validateDerivedConstructorDiscipline(info, ctor, violations); } if (!hasBase && nodeBodyUsesSuper(ctor)) { violations.push({ @@ -630,6 +623,204 @@ function validateClassSuperUsage(info: ClassInfo, violations: SemanticViolation[ } } +type ConstructorSuperState = 'uninit' | 'init' | 'maybe'; + +interface ConstructorDisciplineContext { + info: ClassInfo; + violations: SemanticViolation[]; + sawSuper: boolean; + emittedConditionalSuper: boolean; +} + +interface ConstructorAnalysis { + state: ConstructorSuperState; + sawSuper: boolean; +} + +function validateDerivedConstructorDiscipline(info: ClassInfo, ctor: IRNode, violations: SemanticViolation[]): void { + const ctx: ConstructorDisciplineContext = { + info, + violations, + sawSuper: false, + emittedConditionalSuper: false, + }; + const analysis = analyzeConstructorStatements(constructorBodyStatements(ctor), 'uninit', ctx); + if (analysis.state !== 'init') { + if (ctx.sawSuper) { + emitConstructorConditionalSuper(ctx, ctor); + } else { + violations.push({ + rule: 'class-constructor-missing-super', + nodeType: 'constructor', + message: `Class '${info.name}' extends '${info.baseName}' but its constructor does not call \`super(...)\`. Derived constructors must initialize the base class explicitly.`, + line: ctor.loc?.line, + col: ctor.loc?.col, + }); + } + } +} + +function analyzeConstructorStatements( + statements: readonly IRNode[], + initialState: ConstructorSuperState, + ctx: ConstructorDisciplineContext, +): ConstructorAnalysis { + let state = initialState; + let sawSuper = false; + for (let index = 0; index < statements.length; index += 1) { + const statement = statements[index]; + if (statement.type === 'else') continue; + const maybeElse = + statement.type === 'if' && statements[index + 1]?.type === 'else' ? statements[index + 1] : undefined; + const result = analyzeConstructorStatement(statement, maybeElse, state, ctx); + state = result.state; + sawSuper = sawSuper || result.sawSuper; + if (maybeElse) index += 1; + } + return { state, sawSuper }; +} + +function analyzeConstructorStatement( + statement: IRNode, + maybeElse: IRNode | undefined, + state: ConstructorSuperState, + ctx: ConstructorDisciplineContext, +): ConstructorAnalysis { + if (statement.type === 'class') return { state, sawSuper: false }; + const directSuper = directSuperConstructorCall(statement); + if (directSuper) { + scanValueIRForPreSuperAccess(directSuper, state, ctx, statement); + ctx.sawSuper = true; + if (state === 'init' || state === 'maybe' || directSuper.args.some(valueIRCallsSuperConstructor)) { + emitConstructorDoubleSuper(ctx, statement); + } + if (state === 'maybe') emitConstructorConditionalSuper(ctx, statement); + return { state: 'init', sawSuper: true }; + } + if (statement.type === 'if') return analyzeConstructorIf(statement, maybeElse, state, ctx); + + const sawSuper = scanConstructorStatementExpressions(statement, state, ctx); + if (sawSuper && state === 'init') emitConstructorDoubleSuper(ctx, statement); + if (sawSuper && state !== 'init') emitConstructorConditionalSuper(ctx, statement); + return { state, sawSuper }; +} + +function analyzeConstructorIf( + statement: IRNode, + maybeElse: IRNode | undefined, + state: ConstructorSuperState, + ctx: ConstructorDisciplineContext, +): ConstructorAnalysis { + const cond = expressionPropText(statement.props?.cond); + if (cond) scanExpressionForConstructorEffects(cond, state, ctx, statement); + const thenResult = analyzeConstructorStatements(statement.children ?? [], state, ctx); + const elseResult = maybeElse + ? analyzeConstructorStatements(maybeElse.children ?? [], state, ctx) + : { state, sawSuper: false }; + const merged = mergeConstructorStates(thenResult.state, elseResult.state); + const sawSuper = thenResult.sawSuper || elseResult.sawSuper; + if (sawSuper && merged !== 'init') emitConstructorConditionalSuper(ctx, statement); + return { state: merged, sawSuper }; +} + +function mergeConstructorStates(left: ConstructorSuperState, right: ConstructorSuperState): ConstructorSuperState { + if (left === 'init' && right === 'init') return 'init'; + if (left === 'uninit' && right === 'uninit') return 'uninit'; + return 'maybe'; +} + +function constructorBodyStatements(node: IRNode): IRNode[] { + const handler = node.children?.find((child) => child.type === 'handler'); + const body = handler ? (handler.children ?? []) : (node.children ?? []); + return body.filter((child) => child.type !== 'param' && child.type !== 'decorator'); +} + +function directSuperConstructorCall(node: IRNode): Extract | undefined { + if (node.type !== 'do') return undefined; + const text = expressionPropText(node.props?.value); + if (!text) return undefined; + try { + const value = parseExpression(text); + return value.kind === 'call' && value.callee.kind === 'ident' && value.callee.name === 'super' ? value : undefined; + } catch { + return undefined; + } +} + +function scanConstructorStatementExpressions( + node: IRNode, + state: ConstructorSuperState, + ctx: ConstructorDisciplineContext, +): boolean { + let sawSuper = false; + walkSemanticTreeUntil(node, (candidate) => { + if (candidate !== node && candidate.type === 'class') return 'stop'; + for (const prop of BODY_EXPRESSION_PROPS) { + const text = expressionPropText(candidate.props?.[prop]); + if (!text) continue; + sawSuper = scanExpressionForConstructorEffects(text, state, ctx, candidate) || sawSuper; + } + return 'continue'; + }); + return sawSuper; +} + +function scanExpressionForConstructorEffects( + text: string, + state: ConstructorSuperState, + ctx: ConstructorDisciplineContext, + node: IRNode, +): boolean { + try { + const value = parseExpression(text); + scanValueIRForPreSuperAccess(value, state, ctx, node); + const sawSuper = valueIRCallsSuperConstructor(value); + if (sawSuper) ctx.sawSuper = true; + return sawSuper; + } catch { + return false; + } +} + +function scanValueIRForPreSuperAccess( + value: ValueIR, + state: ConstructorSuperState, + ctx: ConstructorDisciplineContext, + node: IRNode, +): void { + if (state === 'init') return; + if (!valueIRUsesThisOrSuperMember(value)) return; + ctx.violations.push({ + rule: 'class-constructor-this-before-super', + nodeType: node.type, + message: `Class '${ctx.info.name}' constructor uses \`this\` or \`super\` member access before \`super(...)\`. Derived constructors must initialize the base class first.`, + line: node.loc?.line, + col: node.loc?.col, + }); +} + +function emitConstructorDoubleSuper(ctx: ConstructorDisciplineContext, node: IRNode): void { + ctx.violations.push({ + rule: 'class-constructor-double-super', + nodeType: node.type, + message: `Class '${ctx.info.name}' constructor calls \`super(...)\` more than once. Derived constructors may initialize the base class once.`, + line: node.loc?.line, + col: node.loc?.col, + }); +} + +function emitConstructorConditionalSuper(ctx: ConstructorDisciplineContext, node: IRNode): void { + if (ctx.emittedConditionalSuper) return; + ctx.emittedConditionalSuper = true; + ctx.violations.push({ + rule: 'class-constructor-conditional-super', + nodeType: node.type, + message: `Class '${ctx.info.name}' constructor must call \`super(...)\` definitely on every path before using derived state. Move \`super(...)\` to a straight-line statement or cover every branch.`, + line: node.loc?.line, + col: node.loc?.col, + }); +} + function validateClassInheritanceCycles( classes: readonly ClassInfo[], classByName: ReadonlyMap, @@ -783,16 +974,6 @@ function memberArity(node: IRNode): number { } } -function nodeBodyCallsSuperConstructor(node: IRNode): boolean { - return nodeBodyExpressions(node).some((expr) => { - try { - return valueIRCallsSuperConstructor(parseExpression(expr)); - } catch { - return false; - } - }); -} - function nodeBodyUsesSuper(node: IRNode): boolean { return nodeBodyExpressions(node).some((expr) => { try { @@ -833,6 +1014,19 @@ function valueIRUsesSuper(value: ValueIR): boolean { return valueIRChildren(value).some(valueIRUsesSuper); } +function valueIRUsesThisOrSuperMember(value: ValueIR): boolean { + if (value.kind === 'ident' && value.name === 'this') return true; + if ( + (value.kind === 'member' || value.kind === 'index') && + value.object.kind === 'ident' && + value.object.name === 'super' + ) { + return true; + } + if (value.kind === 'lambda') return false; + return valueIRChildren(value).some(valueIRUsesThisOrSuperMember); +} + function valueIRChildren(value: ValueIR): ValueIR[] { switch (value.kind) { case 'call': diff --git a/packages/core/tests/class-semantics.test.ts b/packages/core/tests/class-semantics.test.ts index 0ad7c116..71b54eeb 100644 --- a/packages/core/tests/class-semantics.test.ts +++ b/packages/core/tests/class-semantics.test.ts @@ -181,6 +181,144 @@ describe('semantic-validator — class object model', () => { expect(violations.map((violation) => violation.rule)).toContain('class-constructor-missing-super'); }); + test('reports this and super member access before constructor super', () => { + const rules = rulesFor( + [ + 'class name=Entity', + ' method name=kind returns=string', + ' handler lang=kern', + ' return value="\'entity\'"', + 'class name=User extends=Entity', + ' constructor', + ' handler lang=kern', + ' assign target="this.name" value="\'Ada\'"', + ' do value="super()"', + 'class name=Admin extends=Entity', + ' constructor', + ' handler lang=kern', + ' return value="super.kind()"', + ].join('\n'), + ); + + expect(rules.filter((rule) => rule === 'class-constructor-this-before-super')).toHaveLength(2); + }); + + test('reports double constructor super calls', () => { + const rules = rulesFor( + [ + 'class name=Entity', + 'class name=User extends=Entity', + ' constructor', + ' handler lang=kern', + ' do value="super()"', + ' do value="super()"', + ].join('\n'), + ); + + expect(rules).toContain('class-constructor-double-super'); + }); + + test('reports direct constructor super after maybe-initialized state', () => { + const rules = rulesFor( + [ + 'class name=Entity', + 'class name=User extends=Entity', + ' constructor', + ' param name=ready type=boolean', + ' handler lang=kern', + ' if cond=ready', + ' do value="super()"', + ' do value="super()"', + ].join('\n'), + ); + + expect(rules).toContain('class-constructor-double-super'); + expect(rules).toContain('class-constructor-conditional-super'); + }); + + test('reports nested constructor super inside super arguments as double super', () => { + const rules = rulesFor( + [ + 'class name=Entity', + 'class name=User extends=Entity', + ' constructor', + ' handler lang=kern', + ' do value="super(super())"', + ].join('\n'), + ); + + expect(rules).toContain('class-constructor-double-super'); + }); + + test('reports non-direct constructor super after initialization as double super', () => { + const rules = rulesFor( + [ + 'class name=Entity', + 'class name=User extends=Entity', + ' constructor', + ' handler lang=kern', + ' do value="super()"', + ' return value="super()"', + ].join('\n'), + ); + + expect(rules).toContain('class-constructor-double-super'); + }); + + test('reports conditional constructor super when not every path initializes', () => { + const rules = rulesFor( + [ + 'class name=Entity', + 'class name=User extends=Entity', + ' constructor', + ' param name=ready type=boolean', + ' handler lang=kern', + ' if cond=ready', + ' do value="super()"', + ].join('\n'), + ); + + expect(rules).toContain('class-constructor-conditional-super'); + }); + + test('accepts branch-complete constructor super before derived this usage', () => { + const rules = rulesFor( + [ + 'class name=Entity', + 'class name=User extends=Entity', + ' constructor', + ' param name=ready type=boolean', + ' handler lang=kern', + ' if cond=ready', + ' do value="super()"', + ' else', + ' do value="super()"', + ' assign target="this.name" value="\'Ada\'"', + ].join('\n'), + ); + + expect(rules).not.toContain('class-constructor-conditional-super'); + expect(rules).not.toContain('class-constructor-this-before-super'); + expect(rules).not.toContain('class-constructor-missing-super'); + }); + + test('reports constructor this usage in conditions before super', () => { + const rules = rulesFor( + [ + 'class name=Entity', + 'class name=User extends=Entity', + ' constructor', + ' handler lang=kern', + ' if cond="this.ready"', + ' do value="super()"', + ' else', + ' do value="super()"', + ].join('\n'), + ); + + expect(rules).toContain('class-constructor-this-before-super'); + }); + test('reports super usage in classes without a base', () => { const violations = violationsFor( [ diff --git a/packages/core/tests/core-runtime.test.ts b/packages/core/tests/core-runtime.test.ts index 4d262e66..c072aff0 100644 --- a/packages/core/tests/core-runtime.test.ts +++ b/packages/core/tests/core-runtime.test.ts @@ -538,6 +538,22 @@ describe('KERN core runtime statements', () => { expect(toHostValue(evalCoreExpression('new User("u1", "Ada").name', env))).toBe('Ada'); }); + test('initializes fields before running a base-less constructor body', () => { + const root = parse( + [ + 'class name=Plain', + ' field name=count type=number value={{ 2 }}', + ' constructor', + ' handler', + ' assign target="this.count" value="this.count + 3"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new Plain().count', env))).toBe(5); + }); + test('initializes derived fields after super constructor state', () => { const root = parse( [ @@ -584,7 +600,66 @@ describe('KERN core runtime statements', () => { expect(() => evalCoreExpression('new Box(1, 2)', env)).toThrow('received too many arguments'); }); - test('detects nested constructor super calls structurally', () => { + test('requires explicit super before this access in derived constructors', () => { + const root = parse( + [ + 'class name=Entity', + ' constructor', + ' param name=id type=string', + ' handler', + ' assign target="this.id" value="id"', + 'class name=User extends=Entity', + ' constructor', + ' param name=id type=string', + ' handler', + ' assign target="this.id" value="id"', + ' do value="super(id)"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new User("u1")', env)).toThrow('cannot access this before super(...)'); + }); + + test('allows reading a separate initialized instance before constructor super', () => { + const root = parse( + [ + 'class name=Entity', + ' field name=id type=string value="base"', + 'class name=User extends=Entity', + ' constructor', + ' param name=other type=Entity', + ' handler', + ' let name=otherId value="other.id"', + ' do value="super()"', + ' assign target="this.id" value="otherId"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new User(new Entity()).id', env))).toBe('base'); + }); + + test('rejects double super calls in derived constructors', () => { + const root = parse( + [ + 'class name=Entity', + 'class name=User extends=Entity', + ' constructor', + ' handler', + ' do value="super()"', + ' do value="super()"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new User()', env)).toThrow('called super(...) more than once'); + }); + + test('missing runtime super path fails instead of auto-initializing the base', () => { const root = parse( [ 'class name=Entity', @@ -596,15 +671,17 @@ describe('KERN core runtime statements', () => { 'class name=User extends=Entity', ' constructor', ' param name=id type=string', + ' param name=ready type=boolean', ' handler', - ' if cond=true', + ' if cond=ready', ' do value="super(id)"', ].join('\n'), ); const env = createCoreRuntimeEnv(); runCoreRuntime(root, env); - expect(toHostValue(evalCoreExpression('new User("u1").id', env))).toBe('u1'); + expect(toHostValue(evalCoreExpression('new User("u1", true).id', env))).toBe('u1'); + expect(() => evalCoreExpression('new User("u2", false)', env)).toThrow('must call super(...)'); }); test('dispatches instance assignment through setters', () => { @@ -763,7 +840,7 @@ describe('KERN core runtime statements', () => { const env = createCoreRuntimeEnv(); runCoreRuntime(root, env); - expect(() => evalCoreExpression('new User("u1")', env)).toThrow('missing required argument: id'); + expect(() => evalCoreExpression('new User("u1")', env)).toThrow('lambda expressions are not supported'); }); }); From 274396baf9ed4abedeae22098243329685bd3bf2 Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 15:00:33 +0200 Subject: [PATCH 14/63] feat(core): enforce declared class shapes --- packages/core/src/core-runtime/index.ts | 231 +++++++++++++------- packages/core/src/semantic-validator.ts | 173 +++++++++++++++ packages/core/tests/class-semantics.test.ts | 63 ++++++ packages/core/tests/core-runtime.test.ts | 63 ++++++ 4 files changed, 451 insertions(+), 79 deletions(-) diff --git a/packages/core/src/core-runtime/index.ts b/packages/core/src/core-runtime/index.ts index 3df3285c..0f68395f 100644 --- a/packages/core/src/core-runtime/index.ts +++ b/packages/core/src/core-runtime/index.ts @@ -803,20 +803,24 @@ function runtimeFieldInitializerExpr(node: IRNode): string { function evalInstanceMember(object: KernInstanceValue, property: string): KernValue { guardConstructedInstanceAccess(object); - if (Object.hasOwn(object.fields, property)) return object.fields[property] ?? kUndefined(); - const getter = findClassMember(object.classValue, 'getter', property); - if (getter) return callClassMemberBody(getter.node, getter.owner, object, []).value; - const method = findClassMember(object.classValue, 'method', property); - if (method) { - return brandValue({ - kind: 'bound-method', - name: `${object.classValue.name}.${property}`, - receiver: object, - methodNode: method.node, - ownerClass: method.owner, - }); + const member = findReadableClassShapeMember(object.classValue, property, false); + if (!member) throw new Error(`KERN core runtime unknown instance property: ${object.classValue.name}.${property}.`); + switch (member.kind) { + case 'field': + return object.fields[property] ?? kUndefined(); + case 'getter': + return callClassMemberBody(member.node, member.owner, object, []).value; + case 'method': + return brandValue({ + kind: 'bound-method', + name: `${object.classValue.name}.${property}`, + receiver: object, + methodNode: member.node, + ownerClass: member.owner, + }); + case 'setter': + throw new Error(`KERN core runtime cannot read setter-only property: ${property}.`); } - return kUndefined(); } function evalSuperMember(object: KernSuperValue, property: string): KernValue { @@ -824,20 +828,24 @@ function evalSuperMember(object: KernSuperValue, property: string): KernValue { if (!base) return kUndefined(); if (object.receiver.kind === 'class') return evalClassMemberFrom(base, property, object.receiver); guardConstructedSuperAccess(object.receiver); - const getter = findClassMember(base, 'getter', property); - if (getter) return callClassMemberBody(getter.node, getter.owner, object.receiver, []).value; - const method = findClassMember(base, 'method', property); - if (method) { - return brandValue({ - kind: 'bound-method', - name: `${base.name}.${property}`, - receiver: object.receiver, - methodNode: method.node, - ownerClass: method.owner, - }); + const member = findReadableClassShapeMember(base, property, false); + if (!member) throw new Error(`KERN core runtime unknown super property: ${object.ownerClass.name}.${property}.`); + switch (member.kind) { + case 'field': + return object.receiver.fields[property] ?? kUndefined(); + case 'getter': + return callClassMemberBody(member.node, member.owner, object.receiver, []).value; + case 'method': + return brandValue({ + kind: 'bound-method', + name: `${base.name}.${property}`, + receiver: object.receiver, + methodNode: member.node, + ownerClass: member.owner, + }); + case 'setter': + throw new Error(`KERN core runtime cannot read setter-only super property: ${property}.`); } - if (Object.hasOwn(object.receiver.fields, property)) return object.receiver.fields[property] ?? kUndefined(); - return kUndefined(); } function evalClassMember(object: KernClassValue, property: string): KernValue { @@ -845,32 +853,42 @@ function evalClassMember(object: KernClassValue, property: string): KernValue { } function evalClassMemberFrom(owner: KernClassValue, property: string, receiver: KernClassValue): KernValue { - if (Object.hasOwn(owner.staticFields, property)) return owner.staticFields[property] ?? kUndefined(); - const getter = findOwnClassMember(owner, 'getter', property, true); - if (getter) return callStaticClassMemberBody(getter.node, getter.owner, receiver, []).value; - const method = findOwnClassMember(owner, 'method', property, true); - if (method) { - return brandValue({ - kind: 'builtin' as const, - name: `${receiver.name}.${property}`, - call: (args) => callStaticClassMemberBody(method.node, method.owner, receiver, args).value, - }); + const member = findReadableClassShapeMember(owner, property, true); + if (!member) throw new Error(`KERN core runtime unknown static property: ${receiver.name}.${property}.`); + switch (member.kind) { + case 'field': + return member.owner === receiver + ? (receiver.staticFields[property] ?? kUndefined()) + : evalClassStaticField(member.owner, receiver, property); + case 'getter': + return callStaticClassMemberBody(member.node, member.owner, receiver, []).value; + case 'method': + return brandValue({ + kind: 'builtin' as const, + name: `${receiver.name}.${property}`, + call: (args) => callStaticClassMemberBody(member.node, member.owner, receiver, args).value, + }); + case 'setter': + throw new Error(`KERN core runtime cannot read setter-only static property: ${property}.`); } - const base = resolveBaseClass(owner); - return base ? evalClassMemberFrom(base, property, receiver) : kUndefined(); } function assignInstanceMember(object: KernInstanceValue, property: string, value: KernValue): void { guardConstructedInstanceAccess(object); - const setter = findClassMember(object.classValue, 'setter', property); - if (setter) { - callSetterBody(object, setter.node, setter.owner, property, value); - return; - } - if (findClassMember(object.classValue, 'getter', property)) { - throw new Error(`KERN core runtime cannot assign getter-only property: ${property}.`); + const member = findWritableClassShapeMember(object.classValue, property, false); + if (!member) throw new Error(`KERN core runtime cannot assign undeclared instance property: ${property}.`); + switch (member.kind) { + case 'field': + object.fields[property] = value; + return; + case 'setter': + callSetterBody(object, member.node, member.owner, property, value); + return; + case 'getter': + throw new Error(`KERN core runtime cannot assign getter-only property: ${property}.`); + case 'method': + throw new Error(`KERN core runtime cannot assign method property: ${property}.`); } - object.fields[property] = value; } function assignSuperMember(object: KernSuperValue, property: string, value: KernValue): void { @@ -881,15 +899,20 @@ function assignSuperMember(object: KernSuperValue, property: string, value: Kern return; } guardConstructedSuperAccess(object.receiver); - const setter = findClassMember(base, 'setter', property); - if (setter) { - callSetterBody(object.receiver, setter.node, setter.owner, property, value); - return; - } - if (findClassMember(base, 'getter', property)) { - throw new Error(`KERN core runtime cannot assign getter-only property: ${property}.`); + const member = findWritableClassShapeMember(base, property, false); + if (!member) throw new Error(`KERN core runtime cannot assign undeclared super property: ${property}.`); + switch (member.kind) { + case 'field': + object.receiver.fields[property] = value; + return; + case 'setter': + callSetterBody(object.receiver, member.node, member.owner, property, value); + return; + case 'getter': + throw new Error(`KERN core runtime cannot assign getter-only property: ${property}.`); + case 'method': + throw new Error(`KERN core runtime cannot assign method property: ${property}.`); } - object.receiver.fields[property] = value; } function assignClassMember(object: KernClassValue, property: string, value: KernValue): void { @@ -902,24 +925,20 @@ function assignClassMemberFrom( property: string, value: KernValue, ): void { - if (Object.hasOwn(owner.staticFields, property)) { - receiver.staticFields[property] = value; - return; - } - const setter = findOwnClassMember(owner, 'setter', property, true); - if (setter) { - callStaticSetterBody(receiver, setter.node, setter.owner, property, value); - return; - } - if (findOwnClassMember(owner, 'getter', property, true)) { - throw new Error(`KERN core runtime cannot assign getter-only static property: ${property}.`); - } - const base = resolveBaseClass(owner); - if (base) { - assignClassMemberFrom(base, receiver, property, value); - return; + const member = findWritableClassShapeMember(owner, property, true); + if (!member) throw new Error(`KERN core runtime cannot assign undeclared static property: ${property}.`); + switch (member.kind) { + case 'field': + receiver.staticFields[property] = value; + return; + case 'setter': + callStaticSetterBody(receiver, member.node, member.owner, property, value); + return; + case 'getter': + throw new Error(`KERN core runtime cannot assign getter-only static property: ${property}.`); + case 'method': + throw new Error(`KERN core runtime cannot assign static method property: ${property}.`); } - receiver.staticFields[property] = value; } function callSetterBody( @@ -1114,20 +1133,74 @@ function findOwnClassMember( return undefined; } -function findClassMember( +type RuntimeClassShapeKind = 'field' | 'getter' | 'setter' | 'method'; + +interface RuntimeClassShapeMember { + kind: RuntimeClassShapeKind; + node: IRNode; + owner: KernClassValue; +} + +function findReadableClassShapeMember( klass: KernClassValue, - type: 'method' | 'getter' | 'setter', name: string, - staticOnly = false, -): { node: IRNode; owner: KernClassValue } | undefined { + staticOnly: boolean, +): RuntimeClassShapeMember | undefined { + return findClassShapeMember(klass, name, staticOnly, ['field', 'getter', 'method', 'setter']); +} + +function findWritableClassShapeMember( + klass: KernClassValue, + name: string, + staticOnly: boolean, +): RuntimeClassShapeMember | undefined { + return findClassShapeMember(klass, name, staticOnly, ['field', 'setter', 'getter', 'method']); +} + +function findClassShapeMember( + klass: KernClassValue, + name: string, + staticOnly: boolean, + precedence: readonly RuntimeClassShapeKind[], +): RuntimeClassShapeMember | undefined { + for (const kind of precedence) { + const member = + kind === 'field' + ? findOwnClassField(klass, name, staticOnly) + : findOwnClassMethodShapeMember(klass, kind, name, staticOnly); + if (member) return member; + } + const base = resolveBaseClass(klass); + return base ? findClassShapeMember(base, name, staticOnly, precedence) : undefined; +} + +function findOwnClassMethodShapeMember( + klass: KernClassValue, + kind: 'getter' | 'setter' | 'method', + name: string, + staticOnly: boolean, +): RuntimeClassShapeMember | undefined { + const member = findOwnClassMember(klass, kind, name, staticOnly); + return member ? { kind, node: member.node, owner: member.owner } : undefined; +} + +function findOwnClassField( + klass: KernClassValue, + name: string, + staticOnly: boolean, +): RuntimeClassShapeMember | undefined { for (const child of klass.node.children ?? []) { - if (child.type !== type || child.props?.name !== name) continue; + if (child.type !== 'field' || child.props?.name !== name) continue; const isStatic = child.props?.static === true || child.props?.static === 'true'; if (staticOnly !== isStatic) continue; - return { node: child, owner: klass }; + return { kind: 'field', node: child, owner: klass }; } - const base = resolveBaseClass(klass); - return base ? findClassMember(base, type, name, staticOnly) : undefined; + return undefined; +} + +function evalClassStaticField(owner: KernClassValue, receiver: KernClassValue, property: string): KernValue { + if (Object.hasOwn(receiver.staticFields, property)) return receiver.staticFields[property] ?? kUndefined(); + return owner.staticFields[property] ?? kUndefined(); } function resolveBaseClass(klass: KernClassValue): KernClassValue | undefined { diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index da6d4e31..b07e6e8f 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -501,6 +501,7 @@ function validateClassGraph(root: IRNode, violations: SemanticViolation[]): void validateClassInheritanceCycles(classes, classByName, violations); validateClassOverrides(classes, classByName, violations); + validateClassShapeUsage(classes, classByName, violations); } function collectClassInfos(root: IRNode): ClassInfo[] { @@ -888,6 +889,178 @@ function validateClassOverrides( } } +type ClassShapeAccessKind = 'read' | 'write'; + +function validateClassShapeUsage( + classes: readonly ClassInfo[], + classByName: ReadonlyMap, + violations: SemanticViolation[], +): void { + for (const info of classes) { + for (const ctor of info.constructors) validateClassShapeNode(info, ctor, false, classByName, violations); + for (const member of info.members) + validateClassShapeNode(info, member.node, member.static, classByName, violations); + } +} + +function validateClassShapeNode( + info: ClassInfo, + node: IRNode, + staticContext: boolean, + classByName: ReadonlyMap, + violations: SemanticViolation[], +): void { + walkSemanticTreeUntil(node, (candidate) => { + if (candidate !== node && candidate.type === 'class') return 'stop'; + if (candidate.type === 'assign') { + const target = expressionPropText(candidate.props?.target); + if (target && validateClassShapeTarget(info, candidate, target, staticContext, classByName, violations)) { + const value = expressionPropText(candidate.props?.value); + if (value) validateClassShapeExpression(info, candidate, value, staticContext, classByName, violations); + return 'continue'; + } + } + for (const prop of BODY_EXPRESSION_PROPS) { + const text = expressionPropText(candidate.props?.[prop]); + if (!text) continue; + validateClassShapeExpression(info, candidate, text, staticContext, classByName, violations); + } + return 'continue'; + }); +} + +function validateClassShapeTarget( + info: ClassInfo, + node: IRNode, + text: string, + staticContext: boolean, + classByName: ReadonlyMap, + violations: SemanticViolation[], +): boolean { + try { + const value = parseExpression(text); + if (value.kind !== 'member') return false; + if (value.object.kind !== 'ident' || (value.object.name !== 'this' && value.object.name !== 'super')) return false; + validateClassShapeAccess( + info, + node, + value.object.name, + value.property, + 'write', + staticContext, + classByName, + violations, + ); + return true; + } catch { + return false; + } +} + +function validateClassShapeExpression( + info: ClassInfo, + node: IRNode, + text: string, + staticContext: boolean, + classByName: ReadonlyMap, + violations: SemanticViolation[], +): void { + try { + validateClassShapeValueIR(info, node, parseExpression(text), staticContext, classByName, violations); + } catch { + return; + } +} + +function validateClassShapeValueIR( + info: ClassInfo, + node: IRNode, + value: ValueIR, + staticContext: boolean, + classByName: ReadonlyMap, + violations: SemanticViolation[], +): void { + if (value.kind === 'member' && value.object.kind === 'ident') { + if (value.object.name === 'this' || value.object.name === 'super') { + validateClassShapeAccess( + info, + node, + value.object.name, + value.property, + 'read', + staticContext, + classByName, + violations, + ); + } + } + for (const child of valueIRChildren(value)) { + validateClassShapeValueIR(info, node, child, staticContext, classByName, violations); + } +} + +function validateClassShapeAccess( + info: ClassInfo, + node: IRNode, + receiver: 'this' | 'super', + property: string, + accessKind: ClassShapeAccessKind, + staticContext: boolean, + classByName: ReadonlyMap, + violations: SemanticViolation[], +): void { + const start = receiver === 'super' ? (info.baseName ? classByName.get(info.baseName) : undefined) : info; + if (!start) return; + const member = findClassShapeMember(start, property, staticContext, classByName, accessKind); + if (!member) { + violations.push({ + rule: 'class-member-undeclared', + nodeType: node.type, + message: `Class '${info.name}' ${receiver}.${property} is not declared on the ${staticContext ? 'static' : 'instance'} class shape.`, + line: node.loc?.line, + col: node.loc?.col, + }); + return; + } + if (accessKind === 'read' && member.kind === 'setter') { + violations.push({ + rule: 'class-member-read-not-readable', + nodeType: node.type, + message: `Class '${info.name}' reads setter-only ${receiver}.${property}. Add a getter or read a declared field.`, + line: node.loc?.line, + col: node.loc?.col, + }); + } + if (accessKind === 'write' && (member.kind === 'getter' || member.kind === 'method')) { + violations.push({ + rule: 'class-member-write-not-writable', + nodeType: node.type, + message: `Class '${info.name}' writes non-writable ${receiver}.${property}. Declare a field or setter for writes.`, + line: node.loc?.line, + col: node.loc?.col, + }); + } +} + +function findClassShapeMember( + info: ClassInfo, + property: string, + staticContext: boolean, + classByName: ReadonlyMap, + accessKind: ClassShapeAccessKind, +): ClassMemberInfo | undefined { + const precedence: readonly ClassMemberKind[] = + accessKind === 'read' ? ['field', 'getter', 'method', 'setter'] : ['field', 'setter', 'getter', 'method']; + for (const kind of precedence) { + const found = info.members.find( + (member) => member.name === property && member.static === staticContext && member.kind === kind, + ); + if (found) return found; + } + const base = info.baseName ? classByName.get(info.baseName) : undefined; + return base ? findClassShapeMember(base, property, staticContext, classByName, accessKind) : undefined; +} + function normalizedCycleKey(cycleNames: readonly string[]): string { if (cycleNames.length === 0) return ''; let best = cycleNames.join('\0'); diff --git a/packages/core/tests/class-semantics.test.ts b/packages/core/tests/class-semantics.test.ts index 71b54eeb..50cb7d0b 100644 --- a/packages/core/tests/class-semantics.test.ts +++ b/packages/core/tests/class-semantics.test.ts @@ -223,6 +223,7 @@ describe('semantic-validator — class object model', () => { [ 'class name=Entity', 'class name=User extends=Entity', + ' field name=name type=string', ' constructor', ' param name=ready type=boolean', ' handler lang=kern', @@ -302,6 +303,68 @@ describe('semantic-validator — class object model', () => { expect(rules).not.toContain('class-constructor-missing-super'); }); + test('reports undeclared this and super class-shape member access', () => { + const rules = rulesFor( + [ + 'class name=Base', + ' field name=known type=number', + 'class name=User extends=Base', + ' field name=own type=number', + ' method name=readMissing returns=number', + ' handler lang=kern', + ' return value="this.missing"', + ' method name=writeMissing returns=void', + ' handler lang=kern', + ' assign target="this.missing" value=1', + ' method name=readMissingSuper returns=number', + ' handler lang=kern', + ' return value="super.missing"', + ].join('\n'), + ); + + expect(rules.filter((rule) => rule === 'class-member-undeclared')).toHaveLength(3); + }); + + test('reports static and instance shape mismatches for this access', () => { + const rules = rulesFor( + [ + 'class name=Shape', + ' field name=instanceOnly type=number', + ' field name=staticOnly type=number static=true', + ' method name=badInstance returns=number', + ' handler lang=kern', + ' return value="this.staticOnly"', + ' method name=badStatic static=true returns=number', + ' handler lang=kern', + ' return value="this.instanceOnly"', + ].join('\n'), + ); + + expect(rules.filter((rule) => rule === 'class-member-undeclared')).toHaveLength(2); + }); + + test('reports non-readable and non-writable class-shape members', () => { + const rules = rulesFor( + [ + 'class name=Access', + ' setter name=writeOnly', + ' param name=value type=number', + ' handler lang=kern', + ' do value=value', + ' getter name=readOnly returns=number', + ' handler lang=kern', + ' return value=1', + ' method name=run returns=number', + ' handler lang=kern', + ' assign target="this.readOnly" value=2', + ' return value="this.writeOnly"', + ].join('\n'), + ); + + expect(rules).toContain('class-member-read-not-readable'); + expect(rules).toContain('class-member-write-not-writable'); + }); + test('reports constructor this usage in conditions before super', () => { const rules = rulesFor( [ diff --git a/packages/core/tests/core-runtime.test.ts b/packages/core/tests/core-runtime.test.ts index c072aff0..8c9cbd17 100644 --- a/packages/core/tests/core-runtime.test.ts +++ b/packages/core/tests/core-runtime.test.ts @@ -772,6 +772,69 @@ describe('KERN core runtime statements', () => { expect(() => evalCoreExpression('setReadOnly()', env)).toThrow('cannot assign getter-only property: value'); }); + test('rejects undeclared instance and super property reads and writes', () => { + const root = parse( + [ + 'class name=Base', + ' field name=known type=number value={{ 1 }}', + 'class name=Derived extends=Base', + ' method name=readMissingSuper returns=number', + ' handler', + ' return value="super.missing"', + ' method name=writeMissingSuper returns=number', + ' handler', + ' assign target="super.missing" value="2"', + ' return value="this.known"', + 'fn name=readMissing returns=number', + ' handler', + ' let name=d value="new Derived()"', + ' return value="d.missing"', + 'fn name=writeMissing returns=number', + ' handler', + ' let name=d value="new Derived()"', + ' assign target="d.missing" value="2"', + ' return value="d.known"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('readMissing()', env)).toThrow('unknown instance property'); + expect(() => evalCoreExpression('writeMissing()', env)).toThrow('undeclared instance property'); + expect(() => evalCoreExpression('new Derived().readMissingSuper()', env)).toThrow('unknown super property'); + expect(() => evalCoreExpression('new Derived().writeMissingSuper()', env)).toThrow('undeclared super property'); + }); + + test('rejects undeclared static property reads and writes', () => { + const root = parse( + [ + 'class name=Closed', + ' field name=known type=number static=true value={{ 1 }}', + 'fn name=writeMissingStatic returns=number', + ' handler', + ' assign target="Closed.missing" value="2"', + ' return value="Closed.known"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('Closed.missing', env)).toThrow('unknown static property'); + expect(() => evalCoreExpression('writeMissingStatic()', env)).toThrow('undeclared static property'); + }); + + test('keeps records open while class instances are shape-checked', () => { + const result = runCoreRuntime( + handler([ + { type: 'let', props: { name: 'r', value: '{ a: 1 }' } }, + { type: 'assign', props: { target: 'r.b', value: '2' } }, + { type: 'return', props: { value: 'r.b' } }, + ]), + ); + + expect(toHostValue(result.completion.value)).toBe(2); + }); + test('rejects recursive setter assignment', () => { const root = parse( [ From 4decdf4c3a2788f00fbaa19e3d89d8fc7f35a5e9 Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 15:50:07 +0200 Subject: [PATCH 15/63] feat(core): expose class semantic facts --- packages/core/src/index.ts | 15 +- packages/core/src/semantic-substrate.ts | 32 +++ packages/core/src/semantic-validator.ts | 220 +++++++++++++++++- .../core/tests/semantic-substrate.test.ts | 192 +++++++++++++++ 4 files changed, 450 insertions(+), 9 deletions(-) diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 5602ae6c..309fb323 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -447,6 +447,7 @@ export type { KernSemanticSubstrateSource, KernSemanticSubstrateTarget, KernSemanticSupport, + KernSemanticValidationSummary, } from './semantic-substrate.js'; export { buildKernSemanticSubstrate, @@ -454,8 +455,18 @@ export { semanticPrimitiveSupportSummary, } from './semantic-substrate.js'; // Semantic validation -export type { SemanticViolation } from './semantic-validator.js'; -export { validateSemantics } from './semantic-validator.js'; +export type { + ClassSemanticClassFact, + ClassSemanticFacts, + ClassSemanticInheritanceEdge, + ClassSemanticLocation, + ClassSemanticMemberFact, + ClassSemanticMemberKind, + ClassSemanticOverrideFact, + ClassSemanticOverrideStatus, + SemanticViolation, +} from './semantic-validator.js'; +export { collectClassSemanticFacts, validateClassSemantics, validateSemantics } from './semantic-validator.js'; export type { ShadowAnalyzeOptions, ShadowDiagnostic } from './shadow-analyzer.js'; export { analyzeShadow } from './shadow-analyzer.js'; export type { SourceMapV3 } from './source-map.js'; diff --git a/packages/core/src/semantic-substrate.ts b/packages/core/src/semantic-substrate.ts index 98c12293..cccc594e 100644 --- a/packages/core/src/semantic-substrate.ts +++ b/packages/core/src/semantic-substrate.ts @@ -9,6 +9,13 @@ import { import { CORE_TYPE_CONTRACTS, type CoreOperationReturns, contractToGraphEdges } from './core-contracts/index.js'; import type { NodeContract } from './ir/semantics/index.js'; import { snapshotRegistry } from './ir/semantics/index.js'; +import { + type ClassSemanticFacts, + collectClassSemanticFacts, + type SemanticViolation, + validateClassSemantics, +} from './semantic-validator.js'; +import type { IRNode } from './types.js'; export type KernSemanticSubstrateSource = 'codegen-from-ts' | 'native-kern'; export type KernSemanticSubstrateTarget = PortableLogicTarget; @@ -64,6 +71,11 @@ export interface KernSemanticIrContract { readonly fixtureCount: number; } +export interface KernSemanticValidationSummary { + readonly total: number; + readonly byRule: Readonly>; +} + export interface KernSemanticSubstrate { readonly schemaVersion: 1; readonly generatedBy: 'kern-semantic-substrate'; @@ -79,11 +91,15 @@ export interface KernSemanticSubstrate { readonly portablePrimitives: readonly KernSemanticPrimitive[]; readonly stdlibOperations: readonly KernSemanticStdlibOperation[]; readonly irContracts: readonly KernSemanticIrContract[]; + readonly classFacts?: ClassSemanticFacts; + readonly classValidationSummary?: KernSemanticValidationSummary; } export interface BuildKernSemanticSubstrateOptions { readonly source?: KernSemanticSubstrateSource; readonly irContracts?: ReadonlyMap; + readonly documentClasses?: IRNode | readonly IRNode[]; + readonly includeClassValidationSummary?: boolean; } export function buildKernSemanticSubstrate(options: BuildKernSemanticSubstrateOptions = {}): KernSemanticSubstrate { @@ -132,6 +148,10 @@ export function buildKernSemanticSubstrate(options: BuildKernSemanticSubstrateOp fixtureCount: contract.fixtureCount, })) : [], + ...(options.documentClasses ? { classFacts: collectClassSemanticFacts(options.documentClasses) } : {}), + ...(options.documentClasses && options.includeClassValidationSummary + ? { classValidationSummary: semanticValidationSummary(options.documentClasses) } + : {}), }; } @@ -212,6 +232,18 @@ function normalizeReturns(returns: CoreOperationReturns): readonly string[] { return typeof returns === 'string' ? [returns] : [...returns]; } +function semanticValidationSummary(root: IRNode | readonly IRNode[]): KernSemanticValidationSummary { + return summarizeSemanticViolations(validateClassSemantics(root)); +} + +function summarizeSemanticViolations(violations: readonly SemanticViolation[]): KernSemanticValidationSummary { + const byRule: Record = {}; + for (const violation of violations) { + byRule[violation.rule] = (byRule[violation.rule] ?? 0) + 1; + } + return { total: violations.length, byRule }; +} + const KERN_PRIMITIVE_NAMES: Record = { 'collection.has': 'includes', 'collection.count': 'count', diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index b07e6e8f..397309e3 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -30,6 +30,65 @@ export interface SemanticViolation { col?: number; } +export type ClassSemanticMemberKind = 'field' | 'method' | 'getter' | 'setter'; + +export type ClassSemanticOverrideStatus = 'compatible' | 'kind-mismatch' | 'arity-mismatch'; + +export interface ClassSemanticLocation { + readonly line: number; + readonly col: number; +} + +export interface ClassSemanticMemberFact { + readonly className: string; + readonly owner: string; + readonly name: string; + readonly kind: ClassSemanticMemberKind; + readonly static: boolean; + readonly arity: number; + readonly readable: boolean; + readonly writable: boolean; + readonly loc?: ClassSemanticLocation; +} + +export interface ClassSemanticClassFact { + readonly name: string; + readonly baseName?: string; + readonly hasConstructor: boolean; + readonly constructorCount: number; + readonly members: readonly ClassSemanticMemberFact[]; + readonly loc?: ClassSemanticLocation; +} + +export interface ClassSemanticInheritanceEdge { + readonly from: string; + readonly to: string; + readonly relation: 'extends'; + readonly resolved: boolean; + readonly builtin: boolean; +} + +export interface ClassSemanticOverrideFact { + readonly className: string; + readonly memberName: string; + readonly static: boolean; + readonly kind: ClassSemanticMemberKind; + readonly arity: number; + readonly baseClassName: string; + readonly baseKind: ClassSemanticMemberKind; + readonly baseArity: number; + readonly status: ClassSemanticOverrideStatus; + readonly loc?: ClassSemanticLocation; +} + +export interface ClassSemanticFacts { + readonly classes: readonly ClassSemanticClassFact[]; + readonly inheritanceEdges: readonly ClassSemanticInheritanceEdge[]; + readonly overrides: readonly ClassSemanticOverrideFact[]; + readonly unresolvedBases: readonly string[]; + readonly cycles: readonly (readonly string[])[]; +} + /** * Run semantic validation on an IR tree. * Returns an empty array when the tree is valid. @@ -41,6 +100,12 @@ export function validateSemantics(root: IRNode): SemanticViolation[] { return violations; } +export function validateClassSemantics(root: IRNode | readonly IRNode[]): SemanticViolation[] { + const violations: SemanticViolation[] = []; + validateClassGraphRoots(Array.isArray(root) ? root : [root], violations); + return violations; +} + // True when the *innermost* handler ancestor is opted into native body- // statement mode (`lang="kern"`). Body statements like `let`/`assign`/`do`/ // `if`/`try` nest freely inside that scope, so the let-parent rule has to @@ -446,6 +511,7 @@ type ClassMemberKind = 'field' | 'method' | 'getter' | 'setter'; interface ClassInfo { node: IRNode; + rootIndex: number; name: string; baseName?: string; members: ClassMemberInfo[]; @@ -454,6 +520,7 @@ interface ClassInfo { interface ClassMemberInfo { node: IRNode; + owner: string; name: string; kind: ClassMemberKind; static: boolean; @@ -479,21 +546,31 @@ const BODY_EXPRESSION_PROPS = [ ] as const; function validateClassGraph(root: IRNode, violations: SemanticViolation[]): void { - const classes = collectClassInfos(root); + validateClassGraphRoots([root], violations); +} + +function validateClassGraphRoots(roots: readonly IRNode[], violations: SemanticViolation[]): void { + const classesByRoot = roots.map((root, rootIndex) => collectClassInfos(root, rootIndex)); + const classes = classesByRoot.flat(); if (classes.length === 0) return; const classByName = new Map(); - const visibleNames = collectVisibleClassBaseNames(root); + const declaredClassNames = new Set(); for (const info of classes) { const prev = classByName.get(info.name); if (!prev) { classByName.set(info.name, info); } - visibleNames.add(info.name); + declaredClassNames.add(info.name); } + const visibleNamesByRoot = roots.map((root) => { + const visibleNames = collectVisibleClassBaseNames(root); + for (const className of declaredClassNames) visibleNames.add(className); + return visibleNames; + }); for (const info of classes) { - validateClassBaseReference(info, visibleNames, violations); + validateClassBaseReference(info, visibleNamesByRoot[info.rootIndex] ?? declaredClassNames, violations); validateClassConstructors(info, violations); validateClassMemberConflicts(info, violations); validateClassSuperUsage(info, violations); @@ -504,7 +581,7 @@ function validateClassGraph(root: IRNode, violations: SemanticViolation[]): void validateClassShapeUsage(classes, classByName, violations); } -function collectClassInfos(root: IRNode): ClassInfo[] { +function collectClassInfos(root: IRNode, rootIndex = 0): ClassInfo[] { const out: ClassInfo[] = []; walkSemanticTree(root, (node) => { if (node.type !== 'class') return; @@ -512,16 +589,17 @@ function collectClassInfos(root: IRNode): ClassInfo[] { if (!name) return; out.push({ node, + rootIndex, name, baseName: classBaseName(node.props?.extends), - members: collectClassMembers(node), + members: collectClassMembers(node, name), constructors: (node.children ?? []).filter((child) => child.type === 'constructor'), }); }); return out; } -function collectClassMembers(node: IRNode): ClassMemberInfo[] { +function collectClassMembers(node: IRNode, owner: string): ClassMemberInfo[] { const members: ClassMemberInfo[] = []; for (const child of node.children ?? []) { if (!isClassMemberNode(child)) continue; @@ -529,6 +607,7 @@ function collectClassMembers(node: IRNode): ClassMemberInfo[] { if (!name) continue; members.push({ node: child, + owner, name, kind: child.type, static: isTrueFlag(child.props?.static), @@ -538,6 +617,133 @@ function collectClassMembers(node: IRNode): ClassMemberInfo[] { return members; } +export function collectClassSemanticFacts(root: IRNode | readonly IRNode[]): ClassSemanticFacts { + const roots = Array.isArray(root) ? root : [root]; + const classes = roots.flatMap((candidate, rootIndex) => collectClassInfos(candidate, rootIndex)); + const classByName = new Map(); + for (const info of classes) { + if (!classByName.has(info.name)) classByName.set(info.name, info); + } + const visibleNamesByRoot = roots.map((candidate) => collectVisibleClassBaseNames(candidate)); + + const inheritanceEdges: ClassSemanticInheritanceEdge[] = []; + const unresolvedBases = new Set(); + for (const info of classes) { + if (!info.baseName) continue; + const resolved = + classByName.has(info.baseName) || (visibleNamesByRoot[info.rootIndex] ?? BUILTIN_CLASS_BASES).has(info.baseName); + const builtin = BUILTIN_CLASS_BASES.has(info.baseName); + inheritanceEdges.push({ + from: info.name, + to: info.baseName, + relation: 'extends', + resolved, + builtin, + }); + if (!resolved) unresolvedBases.add(info.baseName); + } + + return { + classes: classes.map(classSemanticFact), + inheritanceEdges, + overrides: collectClassOverrideFacts(classes, classByName), + unresolvedBases: [...unresolvedBases].sort(), + cycles: collectClassCycleFacts(classes, classByName), + }; +} + +function classSemanticFact(info: ClassInfo): ClassSemanticClassFact { + return { + name: info.name, + ...(info.baseName ? { baseName: info.baseName } : {}), + hasConstructor: info.constructors.length > 0, + constructorCount: info.constructors.length, + members: info.members.map(classMemberSemanticFact), + ...(info.node.loc ? { loc: semanticLocation(info.node) } : {}), + }; +} + +function classMemberSemanticFact(member: ClassMemberInfo): ClassSemanticMemberFact { + return { + className: member.owner, + owner: member.owner, + name: member.name, + kind: member.kind, + static: member.static, + arity: member.arity, + readable: member.kind === 'field' || member.kind === 'getter' || member.kind === 'method', + writable: member.kind === 'field' || member.kind === 'setter', + ...(member.node.loc ? { loc: semanticLocation(member.node) } : {}), + }; +} + +function collectClassOverrideFacts( + classes: readonly ClassInfo[], + classByName: ReadonlyMap, +): ClassSemanticOverrideFact[] { + const overrides: ClassSemanticOverrideFact[] = []; + for (const info of classes) { + for (const member of info.members) { + const baseMember = findBaseMember(info, member, classByName); + if (!baseMember) continue; + overrides.push({ + className: info.name, + memberName: member.name, + static: member.static, + kind: member.kind, + arity: member.arity, + baseClassName: baseMember.owner, + baseKind: baseMember.kind, + baseArity: baseMember.arity, + status: classOverrideStatus(member, baseMember), + ...(member.node.loc ? { loc: semanticLocation(member.node) } : {}), + }); + } + } + return overrides; +} + +function classOverrideStatus(member: ClassMemberInfo, baseMember: ClassMemberInfo): ClassSemanticOverrideStatus { + if (!sameOverrideKind(member, baseMember)) return 'kind-mismatch'; + if (member.kind === 'method' && baseMember.kind === 'method' && member.arity !== baseMember.arity) { + return 'arity-mismatch'; + } + return 'compatible'; +} + +function collectClassCycleFacts( + classes: readonly ClassInfo[], + classByName: ReadonlyMap, +): readonly (readonly string[])[] { + const cycles: string[][] = []; + const emitted = new Set(); + for (const info of classes) { + const path: string[] = []; + const seen = new Set(); + let current: ClassInfo | undefined = info; + while (current) { + if (seen.has(current.name)) { + const cycleStart = path.indexOf(current.name); + const cycleNames = path.slice(cycleStart); + const cycleKey = normalizedCycleKey(cycleNames); + if (!emitted.has(cycleKey)) { + emitted.add(cycleKey); + cycles.push([...cycleNames, current.name]); + } + break; + } + seen.add(current.name); + path.push(current.name); + current = current.baseName ? classByName.get(current.baseName) : undefined; + } + } + return cycles; +} + +function semanticLocation(node: IRNode): ClassSemanticLocation | undefined { + return node.loc ? { line: node.loc.line, col: node.loc.col } : undefined; +} + function isClassMemberNode(node: IRNode): node is IRNode & { type: ClassMemberKind } { return node.type === 'field' || node.type === 'method' || node.type === 'getter' || node.type === 'setter'; } diff --git a/packages/core/tests/semantic-substrate.test.ts b/packages/core/tests/semantic-substrate.test.ts index a7e44065..a51d2ccd 100644 --- a/packages/core/tests/semantic-substrate.test.ts +++ b/packages/core/tests/semantic-substrate.test.ts @@ -1,10 +1,16 @@ import { buildKernSemanticSubstrate, + collectClassSemanticFacts, lookupSemanticPrimitive, makeEnv, type NodeContract, semanticPrimitiveSupportSummary, } from '../src/index.js'; +import { parseDocumentWithDiagnostics } from '../src/parser.js'; + +function parseRoot(source: string) { + return parseDocumentWithDiagnostics(source).root; +} describe('KERN semantic substrate', () => { test('exports core runtime contracts as reviewable semantic operations', () => { @@ -42,6 +48,192 @@ describe('KERN semantic substrate', () => { ); }); + test('keeps document class facts opt-in for existing review consumers', () => { + const substrate = buildKernSemanticSubstrate(); + + expect(Object.keys(substrate)).toEqual([ + 'schemaVersion', + 'generatedBy', + 'source', + 'coreTypes', + 'coreGraphEdges', + 'portablePrimitives', + 'stdlibOperations', + 'irContracts', + ]); + expect(Object.hasOwn(substrate, 'classFacts')).toBe(false); + expect(Object.hasOwn(substrate, 'classValidationSummary')).toBe(false); + }); + + test('exports document class member inheritance and override facts when requested', () => { + const root = parseRoot( + [ + 'class name=Base', + ' field name=id type=string', + ' method name=load returns=string', + ' param name=id type=string', + ' getter name=label returns=string', + 'class name=Derived extends=Base', + ' constructor', + ' handler lang=kern', + ' do value="super()"', + ' method name=load returns=string', + ' param name=id type=string', + ' param name=extra type=string', + ' field name=count type=number static=true', + ' setter name=label', + ' param name=value type=string', + ].join('\n'), + ); + + const substrate = buildKernSemanticSubstrate({ documentClasses: root }); + + expect(substrate.classFacts?.inheritanceEdges).toEqual([ + { from: 'Derived', to: 'Base', relation: 'extends', resolved: true, builtin: false }, + ]); + expect(substrate.classFacts?.unresolvedBases).toEqual([]); + + const derived = substrate.classFacts?.classes.find((candidate) => candidate.name === 'Derived'); + expect(derived).toEqual( + expect.objectContaining({ + name: 'Derived', + baseName: 'Base', + hasConstructor: true, + constructorCount: 1, + }), + ); + expect(derived?.members).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + owner: 'Derived', + name: 'count', + kind: 'field', + static: true, + arity: 0, + readable: true, + writable: true, + }), + expect.objectContaining({ + owner: 'Derived', + name: 'label', + kind: 'setter', + static: false, + arity: 1, + readable: false, + writable: true, + }), + ]), + ); + + expect(substrate.classFacts?.overrides).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + className: 'Derived', + memberName: 'load', + baseClassName: 'Base', + baseKind: 'method', + kind: 'method', + arity: 2, + baseArity: 1, + status: 'arity-mismatch', + }), + expect.objectContaining({ + className: 'Derived', + memberName: 'label', + baseClassName: 'Base', + baseKind: 'getter', + kind: 'setter', + status: 'compatible', + }), + ]), + ); + }); + + test('reports unresolved bases and inheritance cycles as class facts', () => { + const facts = collectClassSemanticFacts( + parseRoot( + ['class name=UsesExternal extends=ExternalBase', 'class name=A extends=B', 'class name=B extends=A'].join('\n'), + ), + ); + + expect(facts.unresolvedBases).toEqual(['ExternalBase']); + expect(facts.inheritanceEdges).toEqual( + expect.arrayContaining([ + { from: 'UsesExternal', to: 'ExternalBase', relation: 'extends', resolved: false, builtin: false }, + { from: 'A', to: 'B', relation: 'extends', resolved: true, builtin: false }, + ]), + ); + expect(facts.cycles).toEqual([['A', 'B', 'A']]); + }); + + test('resolves imported and cross-root class bases consistently with validation', () => { + const importedFacts = collectClassSemanticFacts( + parseRoot(['import from="./base" names=ExternalBase', 'class name=UsesExternal extends=ExternalBase'].join('\n')), + ); + expect(importedFacts.unresolvedBases).toEqual([]); + expect(importedFacts.inheritanceEdges).toEqual([ + { from: 'UsesExternal', to: 'ExternalBase', relation: 'extends', resolved: true, builtin: false }, + ]); + + const importedElsewhere = collectClassSemanticFacts([ + parseRoot('import from="./base" names=ExternalBase'), + parseRoot('class name=Leaky extends=ExternalBase'), + ]); + expect(importedElsewhere.unresolvedBases).toEqual(['ExternalBase']); + expect(importedElsewhere.inheritanceEdges).toEqual([ + { from: 'Leaky', to: 'ExternalBase', relation: 'extends', resolved: false, builtin: false }, + ]); + + const baseRoot = parseRoot('class name=Base'); + const childRoot = parseRoot('class name=Child extends=Base'); + const substrate = buildKernSemanticSubstrate({ + documentClasses: [baseRoot, childRoot], + includeClassValidationSummary: true, + }); + + expect(substrate.classFacts?.inheritanceEdges).toEqual([ + { from: 'Child', to: 'Base', relation: 'extends', resolved: true, builtin: false }, + ]); + expect(substrate.classValidationSummary?.byRule['class-extends-unknown']).toBeUndefined(); + + const invalidSubstrate = buildKernSemanticSubstrate({ + documentClasses: [baseRoot, parseRoot('class name=Broken extends=Missing')], + includeClassValidationSummary: true, + }); + expect(invalidSubstrate.classValidationSummary?.byRule['class-extends-unknown']).toBe(1); + }); + + test('can summarize class validation rules alongside class facts', () => { + const root = parseRoot( + [ + 'class name=Base', + 'class name=Bad extends=Base', + ' constructor', + ' handler lang=kern', + ' do value="super()"', + ' constructor', + ' handler lang=kern', + ' do value="super()"', + 'machine name=Flow', + ' transition name=go from=Missing to=Missing', + ].join('\n'), + ); + + const substrate = buildKernSemanticSubstrate({ + documentClasses: root, + includeClassValidationSummary: true, + }); + + expect(substrate.classFacts?.classes.find((candidate) => candidate.name === 'Bad')?.constructorCount).toBe(2); + expect(substrate.classValidationSummary?.total).toBeGreaterThan(0); + expect(substrate.classValidationSummary?.byRule).toEqual( + expect.objectContaining({ + 'class-single-constructor-only': 1, + }), + ); + expect(substrate.classValidationSummary?.byRule['machine-transition-from']).toBeUndefined(); + }); + test('exports portable review primitives as stable query objects', () => { const substrate = buildKernSemanticSubstrate(); const clamp = lookupSemanticPrimitive(substrate, 'number.clamp'); From 6ea95d18fefc0a34f8855ca53a92b4c6b04be184 Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 16:24:51 +0200 Subject: [PATCH 16/63] test(core): expand class object conformance --- examples/native-test/conformance-classes.kern | 63 +++++++++++++++++++ .../native-test/conformance-classes.test.kern | 19 ++++++ packages/test/src/index.ts | 24 ++++--- 3 files changed, 96 insertions(+), 10 deletions(-) diff --git a/examples/native-test/conformance-classes.kern b/examples/native-test/conformance-classes.kern index a1e84ff1..3a5dffad 100644 --- a/examples/native-test/conformance-classes.kern +++ b/examples/native-test/conformance-classes.kern @@ -1,9 +1,16 @@ class name=UserDirectory export=true field name=items type="object[]" private=true value={{ [] }} + field name=kind type=string static=true value={{ "directory" }} + field name=labelStore type=string static=true value={{ "UserDirectory" }} + field name=empty type=object static=true value={{ new UserDirectory([]) }} constructor param name=initial type="object[]" value={{ [] }} handler assign target="this.items" value="initial" + method name=make static=true returns=object + param name=users type="object[]" + handler + return value="new UserDirectory(users)" method name=list returns="object[]" handler return value="this.items" @@ -16,6 +23,41 @@ class name=UserDirectory export=true getter name=count returns=number handler return value="this.items.length" + getter name=label static=true returns=string + handler + return value="this.labelStore" + setter name=label static=true + param name=next type=string + handler + assign target="this.labelStore" value="next" + setter name=users + param name=next type="object[]" + handler + assign target="this.items" value="next" + +class name=AuditedDirectory extends=UserDirectory export=true + field name=audit type=string value={{ "" }} + field name=kind type=string static=true value={{ "audited" }} + constructor + param name=initial type="object[]" value={{ [] }} + param name=audit type=string value={{ "sync" }} + handler + do value="super(initial)" + assign target="this.audit" value="audit" + method name=describe returns=string + handler + return value="`${this.audit}:${super.active().length}:${this.count}`" + getter name=summary returns=object + handler + return value="{ total: this.count, active: super.active().length, audit: this.audit }" + getter name=label static=true returns=string + handler + return value="`audited:${super.label}`" + method name=makeAudited static=true returns=object + param name=users type="object[]" + param name=audit type=string value={{ "sync" }} + handler + return value="new AuditedDirectory(users, audit)" fn name=makeDirectory returns=object param name=users type="object[]" @@ -28,4 +70,25 @@ fn name=activeNames returns="string[]" let name=directory value="makeDirectory(users)" return value="directory.names()" +fn name=setDirectoryLabel returns=string + param name=label type=string + handler + assign target="UserDirectory.label" value="label" + return value="UserDirectory.label" + +fn name=auditedDescription returns=string + param name=users type="object[]" + param name=audit type=string + handler + return value="AuditedDirectory.makeAudited(users, audit).describe()" + +fn name=resetAuditedCount returns=number + param name=users type="object[]" + param name=next type="object[]" + handler + let name=directory value="new AuditedDirectory(users, 'reset')" + assign target="directory.users" value="next" + return value="directory.count" + derive name=emptyDirectoryCount expr={{new UserDirectory([]).count}} +derive name=emptyAuditedSummary expr={{new AuditedDirectory([], "empty").summary}} diff --git a/examples/native-test/conformance-classes.test.kern b/examples/native-test/conformance-classes.test.kern index 29fc73f8..3e03b870 100644 --- a/examples/native-test/conformance-classes.test.kern +++ b/examples/native-test/conformance-classes.test.kern @@ -1,5 +1,6 @@ test name="Class and function conformance" target="./conformance-classes.kern" fixture name=sampleUsers value={{[{ id: "u1", name: "Ada", active: true }, { id: "u2", name: "Grace", active: false }, { id: "u3", name: "Lin", active: true }]}} + fixture name=nextUsers value={{[{ id: "u4", name: "Katherine", active: true }]}} it name="class target stays schema and semantic valid" expect no=schemaViolations @@ -10,17 +11,35 @@ test name="Class and function conformance" target="./conformance-classes.kern" expect node=class name=UserDirectory child=constructor expect node=class name=UserDirectory child=method childName=active expect node=class name=UserDirectory child=getter childName=count + expect node=class name=UserDirectory child=field childName=kind + expect node=class name=AuditedDirectory child=method childName=describe + expect node=class name=AuditedDirectory child=getter childName=summary expect node=fn name=activeNames child=param count=1 it name="classes execute before codegen" expect expr={{new UserDirectory(sampleUsers).count}} equals=3 expect expr={{new UserDirectory(sampleUsers).active().map((user) => user.id)}} equals={{["u1", "u3"]}} expect expr={{new UserDirectory(sampleUsers).names()}} equals={{["Ada", "Lin"]}} + expect expr={{new AuditedDirectory(sampleUsers, "sync").summary}} equals={{({ total: 3, active: 2, audit: "sync" })}} + expect expr={{new AuditedDirectory(sampleUsers, "sync").describe()}} equals={{"sync:2:3"}} + + it name="static members and inherited dispatch execute before codegen" + expect expr={{UserDirectory.kind}} equals={{"directory"}} + expect expr={{UserDirectory.label}} equals={{"UserDirectory"}} + expect expr={{UserDirectory.empty.count}} equals=0 + expect expr={{UserDirectory.make(sampleUsers).count}} equals=3 + expect expr={{AuditedDirectory.kind}} equals={{"audited"}} + expect expr={{AuditedDirectory.label}} equals={{"audited:UserDirectory"}} + expect expr={{AuditedDirectory.makeAudited(sampleUsers, "sync").describe()}} equals={{"sync:2:3"}} it name="functions compose class behavior" expect fn=makeDirectory with=sampleUsers equals={{new UserDirectory(sampleUsers)}} expect fn=activeNames with=sampleUsers equals={{["Ada", "Lin"]}} + expect fn=setDirectoryLabel with={{"Directory"}} equals={{"Directory"}} + expect fn=auditedDescription args={{[sampleUsers, "sync"]}} equals={{"sync:2:3"}} + expect fn=resetAuditedCount args={{[sampleUsers, nextUsers]}} equals=1 expect derive=emptyDirectoryCount equals=0 + expect derive=emptyAuditedSummary equals={{({ total: 0, active: 0, audit: "empty" })}} it name="classes and functions reach core codegen" expect no=codegenErrors diff --git a/packages/test/src/index.ts b/packages/test/src/index.ts index 9dbfa0ad..d521e382 100644 --- a/packages/test/src/index.ts +++ b/packages/test/src/index.ts @@ -2977,15 +2977,16 @@ function runtimeHandlerLines(node: IRNode, spaces = 4): string[] { return code.split('\n').map((line) => `${prefix}${line}`); } -function runtimeClassFieldInitializers(node: IRNode): string[] { +function runtimeClassFieldLines(node: IRNode): string[] | undefined { const lines: string[] = []; for (const field of getChildren(node, 'field')) { const props = getProps(field); - if (isTruthy(props.static)) continue; const name = str(props.name); - if (!isRuntimeBindingName(name)) return []; + if (!isRuntimeBindingName(name)) return undefined; const value = exprPropToRuntimeSource(field, 'value') || rawPropToRuntimeSource(field, 'default'); - if (value) lines.push(` this.${name} = (${value});`); + if (!value) continue; + const staticKw = isTruthy(props.static) ? 'static ' : ''; + lines.push(` ${staticKw}${name} = (${value});`); } return lines; } @@ -3034,17 +3035,20 @@ function runtimeClassSetterLines(node: IRNode): string[] | undefined { function runtimeClassExpr(node: IRNode): string { const name = str(getProps(node).name); if (!isRuntimeBindingName(name)) return ''; + const baseName = str(getProps(node).extends); + if (baseName && !isRuntimeBindingName(baseName)) return ''; const ctorNode = getChildren(node, 'constructor')[0]; const ctorParams = ctorNode ? runtimeParamNames(ctorNode) : []; if (!ctorParams.every(isRuntimeBindingName)) return ''; - const fieldInitializers = runtimeClassFieldInitializers(node); - const lines = ['(class {']; - if (ctorNode || fieldInitializers.length > 0) { + const fieldLines = runtimeClassFieldLines(node); + if (!fieldLines) return ''; + const lines = [`(class ${name}${baseName ? ` extends ${baseName}` : ''} {`]; + lines.push(...fieldLines); + if (ctorNode) { lines.push(` constructor(${ctorParams.join(', ')}) {`); - lines.push(...fieldInitializers); - if (ctorNode) lines.push(...runtimeHandlerLines(ctorNode)); + lines.push(...runtimeHandlerLines(ctorNode)); lines.push(' }'); } @@ -4429,7 +4433,7 @@ function orderRuntimeBindings(bindings: RuntimeBinding[], entryExpr: string): Ru visiting.add(name); stack.push(name); for (const dep of depsIn(binding.expr)) { - if (dep === name && binding.kind === 'fn') continue; + if (dep === name && (binding.kind === 'fn' || binding.kind === 'class')) continue; const error = visit(dep); if (error) return error; } From eebf2942154509bf790e1e1b01ad25cec02835bb Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 17:02:39 +0200 Subject: [PATCH 17/63] test(core): add object protocol negative conformance --- .../native-test/conformance-bad-cases.kern | 105 ++++++++++++++++++ .../conformance-bad-cases.test.kern | 17 +++ packages/test/src/index.ts | 73 ++++++------ packages/test/tests/native-test.test.ts | 62 +++++++++++ 4 files changed, 224 insertions(+), 33 deletions(-) diff --git a/examples/native-test/conformance-bad-cases.kern b/examples/native-test/conformance-bad-cases.kern index 412b3d20..fa25261d 100644 --- a/examples/native-test/conformance-bad-cases.kern +++ b/examples/native-test/conformance-bad-cases.kern @@ -61,3 +61,108 @@ fn name=loadRemote handler <<< return fetch(url); >>> + +class name=UnknownBase extends=MissingProtocolBase + +class name=CycleA extends=CycleB +class name=CycleB extends=CycleA + +class name=MultiCtor + constructor + handler + do value="1" + constructor + handler + do value="2" + +class name=MemberConflict + field name=value type=number + method name=value returns=number + handler + return value=1 + +class name=PlainSuper + method name=kind returns=string + handler + return value="super.kind()" + +class name=ProtocolBase + field name=id type=string + method name=load returns=string + param name=id type=string + handler + return value=id + field name=status type=string + +class name=MissingSuper extends=ProtocolBase + constructor + handler + do value=1 + +class name=ThisBeforeSuper extends=ProtocolBase + field name=label type=string + constructor + handler + assign target="this.label" value="'early'" + do value="super('u1')" + +class name=DoubleSuper extends=ProtocolBase + constructor + handler + do value="super('u1')" + do value="super('u2')" + +class name=ConditionalSuper extends=ProtocolBase + constructor + param name=ready type=boolean + handler + if cond=ready + do value="super('u1')" + +class name=ClosedShape + field name=known type=number + method name=readMissing returns=number + handler + return value="this.missing" + method name=writeMissing returns=void + handler + assign target="this.missing" value=2 + +class name=ShapeBase + field name=known type=number + +class name=ShapeChild extends=ShapeBase + method name=readMissingSuper returns=number + handler + return value="super.missing" + +class name=StaticShape + field name=instanceOnly type=number + field name=staticOnly type=number static=true + method name=badInstance returns=number + handler + return value="this.staticOnly" + method name=badStatic returns=number static=true + handler + return value="this.instanceOnly" + +class name=AccessShape + setter name=writeOnly + param name=next type=number + handler + do value=next + getter name=readOnly returns=number + handler + return value=1 + method name=misuseAccessors returns=number + handler + assign target="this.readOnly" value=2 + return value="this.writeOnly" + +class name=OverrideShape extends=ProtocolBase + method name=load returns=string + handler + return value="'missing id'" + method name=status returns=string + handler + return value="'ok'" diff --git a/examples/native-test/conformance-bad-cases.test.kern b/examples/native-test/conformance-bad-cases.test.kern index 482f73dc..ab8b8c51 100644 --- a/examples/native-test/conformance-bad-cases.test.kern +++ b/examples/native-test/conformance-bad-cases.test.kern @@ -17,3 +17,20 @@ test name="Bad KERN conformance" target="./conformance-bad-cases.kern" coverage= expect has=routePathParams count=1 matches="id" expect has=effectWithoutCleanup count=1 matches="cleanup" expect has=unrecoveredAsync count=1 matches="recover" + expect has=semanticViolations matches="extends unknown base 'MissingProtocolBase'" + expect has=semanticViolations matches="Class inheritance cycle detected: CycleA -> CycleB -> CycleA" + expect has=semanticViolations matches="declares more than one constructor" + expect has=semanticViolations matches="conflicting instance member 'value'" + expect has=semanticViolations matches="uses .*super.* does not extend a base class" + expect has=semanticViolations matches="constructor does not call .*super" + expect has=semanticViolations matches="member access before .*super" + expect has=semanticViolations matches="calls .*super.* more than once" + expect has=semanticViolations matches="must call .*super.* definitely on every path" + expect has=semanticViolations matches="this.missing is not declared on the instance class shape" + expect has=semanticViolations matches="super.missing is not declared on the instance class shape" + expect has=semanticViolations matches="this.staticOnly is not declared on the instance class shape" + expect has=semanticViolations matches="this.instanceOnly is not declared on the static class shape" + expect has=semanticViolations matches="reads setter-only this.writeOnly" + expect has=semanticViolations matches="writes non-writable this.readOnly" + expect has=semanticViolations matches="overrides base field with method" + expect has=semanticViolations matches="overrides a base method with 1 parameter" diff --git a/packages/test/src/index.ts b/packages/test/src/index.ts index d521e382..21d2849b 100644 --- a/packages/test/src/index.ts +++ b/packages/test/src/index.ts @@ -6111,6 +6111,28 @@ function nativeInvariantFindings( return { message: `Unsupported native invariant: ${propName}=${str(props.has) || str(props.no)}` }; } +function evaluateFindingsMatch( + invariant: string, + pattern: string, + findings: readonly string[], +): { passed: boolean; message?: string } { + const message = findings.join('; '); + try { + const regex = new RegExp(pattern); + return findings.some((finding) => regex.test(finding)) + ? { passed: true } + : { + passed: false, + message: `Expected ${invariant || ''} findings to match /${pattern}/, got: ${message || ''}`, + }; + } catch (error) { + return { + passed: false, + message: `Native has assertion has invalid matches regex: ${error instanceof Error ? error.message : String(error)}`, + }; + } +} + function evaluateHasInvariant( node: IRNode, target: LoadedKernDocument, @@ -6143,21 +6165,7 @@ function evaluateHasInvariant( if ('matches' in props) { const pattern = runtimePatternValue(node, 'matches') || ''; - const message = findings.join('; '); - try { - const regex = new RegExp(pattern); - return regex.test(message) - ? { passed: true } - : { - passed: false, - message: `Expected ${invariant || ''} findings to match /${pattern}/, got: ${message || ''}`, - }; - } catch (error) { - return { - passed: false, - message: `Native has assertion has invalid matches regex: ${error instanceof Error ? error.message : String(error)}`, - }; - } + return evaluateFindingsMatch(invariant, pattern, findings); } return { passed: true }; @@ -6167,6 +6175,23 @@ function evaluateHasInvariant( const blocking = targetBlockingMessage(target); if (blocking) return { passed: false, message: blocking }; } + + if ('matches' in props) { + const collected = nativeInvariantFindings(node, target, context); + if (collected.message) return { passed: false, message: collected.message }; + + const findings = collected.findings || []; + if (findings.length === 0) { + return { + passed: false, + message: `Expected target to have ${invariant || ''}, but none was found`, + }; + } + + const pattern = runtimePatternValue(node, 'matches') || ''; + return evaluateFindingsMatch(invariant, pattern, findings); + } + const evaluated = evaluateNoInvariant(nodeWithProps(node, { ...props, no: invariant }), target, context); if (isAssertionConfigurationFailure(evaluated.message)) { @@ -6180,24 +6205,6 @@ function evaluateHasInvariant( }; } - if ('matches' in props) { - const pattern = runtimePatternValue(node, 'matches') || ''; - try { - const regex = new RegExp(pattern); - return regex.test(evaluated.message || '') - ? { passed: true } - : { - passed: false, - message: `Expected ${invariant || ''} message to match /${pattern}/, got: ${evaluated.message || ''}`, - }; - } catch (error) { - return { - passed: false, - message: `Native has assertion has invalid matches regex: ${error instanceof Error ? error.message : String(error)}`, - }; - } - } - return { passed: true }; } diff --git a/packages/test/tests/native-test.test.ts b/packages/test/tests/native-test.test.ts index 40ce986b..45f79f2f 100644 --- a/packages/test/tests/native-test.test.ts +++ b/packages/test/tests/native-test.test.ts @@ -148,6 +148,68 @@ describe('native kern test runner', () => { expect(summary.results.map((result) => result.ruleId)).toEqual(['has:duplicatenames', 'has:derivecycles']); }); + test('matches positive semantic invariant assertions against all findings', () => { + writeFileSync( + join(tmpDir, 'bad-semantics.kern'), + [ + 'class name=UnknownBase extends=MissingBase', + 'class name=MultiCtor', + ' constructor', + ' handler', + ' do value=1', + ' constructor', + ' handler', + ' do value=2', + ].join('\n'), + ); + const testFile = join(tmpDir, 'bad-semantics.test.kern'); + writeFileSync( + testFile, + [ + 'test name="Bad semantic target" target="./bad-semantics.kern"', + ' it name="matches non-first semantic violation"', + ' expect has=semanticViolations matches="declares more than one constructor"', + ].join('\n'), + ); + + const summary = runNativeKernTests(testFile); + + expect(summary.failed).toBe(0); + expect(summary.passed).toBe(1); + expect(summary.results[0].ruleId).toBe('has:semanticviolations'); + }); + + test('does not match positive invariant regexes across unrelated findings', () => { + writeFileSync( + join(tmpDir, 'bad-semantics-span.kern'), + [ + 'class name=UnknownBase extends=MissingBase', + 'class name=MultiCtor', + ' constructor', + ' handler', + ' do value=1', + ' constructor', + ' handler', + ' do value=2', + ].join('\n'), + ); + const testFile = join(tmpDir, 'bad-semantics-span.test.kern'); + writeFileSync( + testFile, + [ + 'test name="Bad semantic target" target="./bad-semantics-span.kern"', + ' it name="does not span diagnostics"', + ' expect has=semanticViolations matches="MissingBase.*more than one constructor"', + ].join('\n'), + ); + + const summary = runNativeKernTests(testFile); + + expect(summary.failed).toBe(1); + expect(summary.results[0].ruleId).toBe('has:semanticviolations'); + expect(summary.results[0].message).toContain('findings to match'); + }); + test('fails positive invariant assertions with incorrect expected counts', () => { writeFileSync( join(tmpDir, 'bad-count.kern'), From 732a8f96ad3ac615f91ed012020357b3768b9f71 Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 18:21:17 +0200 Subject: [PATCH 18/63] feat(core): add rag language contracts --- .../conformance-rag-bad-cases.kern | 16 + .../conformance-rag-bad-cases.test.kern | 17 + examples/native-test/conformance-rag.kern | 11 + .../native-test/conformance-rag.test.kern | 12 + packages/core/src/codegen-core.ts | 21 + packages/core/src/index.ts | 18 +- packages/core/src/schema.ts | 108 +++ packages/core/src/semantic-substrate.ts | 15 + packages/core/src/semantic-validator.ts | 796 ++++++++++++++++++ packages/core/src/spec.ts | 9 + packages/core/tests/rag-semantics.test.ts | 241 ++++++ packages/core/tests/schema-validation.test.ts | 43 + .../core/tests/semantic-substrate.test.ts | 55 ++ 13 files changed, 1361 insertions(+), 1 deletion(-) create mode 100644 examples/native-test/conformance-rag-bad-cases.kern create mode 100644 examples/native-test/conformance-rag-bad-cases.test.kern create mode 100644 examples/native-test/conformance-rag.kern create mode 100644 examples/native-test/conformance-rag.test.kern create mode 100644 packages/core/tests/rag-semantics.test.ts diff --git a/examples/native-test/conformance-rag-bad-cases.kern b/examples/native-test/conformance-rag-bad-cases.kern new file mode 100644 index 00000000..e989e2e8 --- /dev/null +++ b/examples/native-test/conformance-rag-bad-cases.kern @@ -0,0 +1,16 @@ +corpus name=Docs + source name=manuals uri="./docs/**/*.md" + chunking source=missing strategy=semantic maxTokens=64 overlap=64 + +embed name=BadEmbedding corpus=Missing dims=0 +embed name=OtherEmbedding corpus=Docs + +corpus name=OtherDocs + +retriever name=BadRetriever corpus=Missing embed=MissingEmbed topK=0 minScore=1.1 +retriever name=MismatchRetriever corpus=OtherDocs embed=OtherEmbedding + +rag name=BadRag retriever=MissingRetriever citations=true + +grounding rag=MissingRag maxContext=0 +ragEval rag=MissingRag threshold=1.1 diff --git a/examples/native-test/conformance-rag-bad-cases.test.kern b/examples/native-test/conformance-rag-bad-cases.test.kern new file mode 100644 index 00000000..c6fb8223 --- /dev/null +++ b/examples/native-test/conformance-rag-bad-cases.test.kern @@ -0,0 +1,17 @@ +test name="Bad RAG conformance" target="./conformance-rag-bad-cases.kern" coverage=false + it name="rag semantic assertions prove detector coverage" + expect has=semanticViolations matches="RAG chunking references unknown source 'missing'" + expect has=semanticViolations matches="RAG chunking overlap must be smaller than maxTokens" + expect has=semanticViolations matches="RAG embed 'BadEmbedding' references unknown corpus 'Missing'" + expect has=semanticViolations matches="RAG embed dims must be a positive integer" + expect has=semanticViolations matches="RAG retriever 'BadRetriever' references unknown corpus 'Missing'" + expect has=semanticViolations matches="RAG retriever 'BadRetriever' references unknown embed 'MissingEmbed'" + expect has=semanticViolations matches="RAG retriever topK must be a positive integer" + expect has=semanticViolations matches="RAG retriever minScore must be between 0 and 1" + expect has=semanticViolations matches="RAG retriever 'MismatchRetriever' uses embed 'OtherEmbedding'" + expect has=semanticViolations matches="RAG pipeline 'BadRag' references unknown retriever 'MissingRetriever'" + expect has=semanticViolations matches="RAG pipeline 'BadRag' requires citations" + expect has=semanticViolations matches="RAG grounding references unknown rag 'MissingRag'" + expect has=semanticViolations matches="RAG grounding maxContext must be a positive integer" + expect has=semanticViolations matches="RAG eval references unknown rag 'MissingRag'" + expect has=semanticViolations matches="RAG eval threshold must be between 0 and 1" diff --git a/examples/native-test/conformance-rag.kern b/examples/native-test/conformance-rag.kern new file mode 100644 index 00000000..652d877f --- /dev/null +++ b/examples/native-test/conformance-rag.kern @@ -0,0 +1,11 @@ +corpus name=Docs title="Support docs" + source name=manuals kind=local uri="./docs/**/*.md" media=markdown + chunking source=manuals strategy=semantic maxTokens=600 overlap=80 unit=tokens + +embed name=DocsEmbedding corpus=Docs model=text-embedding-3-small dims=1536 metric=cosine + +retriever name=DocsSearch corpus=Docs embed=DocsEmbedding mode=hybrid topK=8 minScore=0.72 + +rag name=AnswerDocs retriever=DocsSearch prompt="./answer.md" citations=true + grounding name=StrictGrounding requireCitations=true policy=strict maxContext=6000 + ragEval name=Faithfulness metric=faithfulness threshold=0.85 diff --git a/examples/native-test/conformance-rag.test.kern b/examples/native-test/conformance-rag.test.kern new file mode 100644 index 00000000..89b7ac9c --- /dev/null +++ b/examples/native-test/conformance-rag.test.kern @@ -0,0 +1,12 @@ +test name="RAG language conformance" target="./conformance-rag.kern" coverage=false + it name="rag declarations stay schema and semantic valid" + expect no=schemaViolations + expect no=semanticViolations + + it name="rag declarations keep their KERN shape" + expect node=corpus name=Docs child=source childName=manuals + expect node=corpus name=Docs child=chunking + expect node=embed name=DocsEmbedding prop=corpus is=Docs + expect node=retriever name=DocsSearch prop=topK is=8 + expect node=rag name=AnswerDocs child=grounding childName=StrictGrounding + expect node=rag name=AnswerDocs child=ragEval childName=Faithfulness diff --git a/packages/core/src/codegen-core.ts b/packages/core/src/codegen-core.ts index 4a3e962e..7d49750f 100644 --- a/packages/core/src/codegen-core.ts +++ b/packages/core/src/codegen-core.ts @@ -721,6 +721,15 @@ export const CORE_NODE_TYPES = new Set([ 'evidence', // Confidence layer 'needs', + // RAG contract layer + 'corpus', + 'source', + 'chunking', + 'embed', + 'retriever', + 'rag', + 'grounding', + 'ragEval', // Backend data layer (graduated nodes) 'model', 'column', @@ -1010,6 +1019,18 @@ export function generateCoreNode(node: IRNode, target?: string, runtime?: KernRu return []; case 'needs': return []; + // RAG declarations are semantic contracts consumed by validators, + // substrate, MCP/review tooling, and future adapters. They intentionally + // emit no JavaScript in core codegen. + case 'corpus': + case 'source': + case 'chunking': + case 'embed': + case 'retriever': + case 'rag': + case 'grounding': + case 'ragEval': + return []; // Graduated nodes — backend data layer case 'model': return generateModel(node); diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 309fb323..835b6ff0 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -464,9 +464,25 @@ export type { ClassSemanticMemberKind, ClassSemanticOverrideFact, ClassSemanticOverrideStatus, + RagSemanticChunkingFact, + RagSemanticCorpusFact, + RagSemanticEmbedFact, + RagSemanticEvalFact, + RagSemanticFacts, + RagSemanticGroundingFact, + RagSemanticLocation, + RagSemanticPipelineFact, + RagSemanticRetrieverFact, + RagSemanticSourceFact, SemanticViolation, } from './semantic-validator.js'; -export { collectClassSemanticFacts, validateClassSemantics, validateSemantics } from './semantic-validator.js'; +export { + collectClassSemanticFacts, + collectRagSemanticFacts, + validateClassSemantics, + validateRagSemantics, + validateSemantics, +} from './semantic-validator.js'; export type { ShadowAnalyzeOptions, ShadowDiagnostic } from './shadow-analyzer.js'; export { analyzeShadow } from './shadow-analyzer.js'; export type { SourceMapV3 } from './source-map.js'; diff --git a/packages/core/src/schema.ts b/packages/core/src/schema.ts index e10b6ae4..3172e8e1 100644 --- a/packages/core/src/schema.ts +++ b/packages/core/src/schema.ts @@ -2381,6 +2381,114 @@ export const NODE_SCHEMAS: Record = { }, }, + // ── RAG (retrieval-augmented generation) contract nodes ───────────── + + corpus: { + description: + 'RAG corpus declaration — names a document collection and its source/chunking contract without binding to a provider runtime.', + example: + 'corpus name=Docs title="Support docs"\n source name=manuals kind=local uri="./docs/**/*.md"\n chunking strategy=semantic maxTokens=600 overlap=80', + props: { + name: { required: true, kind: 'identifier' }, + title: { kind: 'string' }, + tenant: { kind: 'identifier' }, + refresh: { kind: 'string' }, + }, + allowedChildren: ['source', 'chunking'], + }, + source: { + description: 'RAG corpus source — a raw document location such as local files, S3, HTTP, or an MCP resource.', + example: 'source name=manuals kind=local uri="./docs/**/*.md" media=markdown', + props: { + name: { kind: 'identifier' }, + kind: { kind: 'identifier' }, + uri: { required: true, kind: 'string' }, + media: { kind: 'identifier' }, + acl: { kind: 'identifier' }, + }, + allowedChildren: [], + }, + chunking: { + description: + 'RAG chunking policy — describes document segmentation. Named `chunking` to avoid colliding with the collection `chunk` primitive.', + example: 'chunking corpus=Docs source=manuals strategy=semantic maxTokens=600 overlap=80 unit=tokens', + props: { + name: { kind: 'identifier' }, + corpus: { kind: 'identifier' }, + source: { kind: 'identifier' }, + strategy: { kind: 'identifier' }, + maxTokens: { kind: 'number' }, + overlap: { kind: 'number' }, + unit: { kind: 'identifier' }, + }, + allowedChildren: [], + }, + embed: { + description: + 'RAG embedding contract — names the embedding model/dimension contract for a corpus. Provider execution is adapter-owned.', + example: 'embed name=DocsEmbedding corpus=Docs model=text-embedding-3-small dims=1536 metric=cosine', + props: { + name: { required: true, kind: 'identifier' }, + corpus: { required: true, kind: 'identifier' }, + model: { kind: 'string' }, + dims: { kind: 'number' }, + metric: { kind: 'identifier' }, + }, + allowedChildren: [], + }, + retriever: { + description: + 'RAG retriever declaration — binds a corpus and optional embedding contract to search policy such as topK/minScore.', + example: 'retriever name=DocsSearch corpus=Docs embed=DocsEmbedding mode=hybrid topK=8 minScore=0.72', + props: { + name: { required: true, kind: 'identifier' }, + corpus: { required: true, kind: 'identifier' }, + embed: { kind: 'identifier' }, + mode: { kind: 'identifier' }, + topK: { kind: 'number' }, + minScore: { kind: 'number' }, + rerank: { kind: 'string' }, + }, + allowedChildren: [], + }, + rag: { + description: + 'RAG pipeline declaration — connects a query/answer flow to a retriever and grounding/evaluation requirements.', + example: + 'rag name=AnswerDocs retriever=DocsSearch prompt="./answer.md" citations=true\n grounding requireCitations=true policy=strict\n ragEval metric=faithfulness threshold=0.85', + props: { + name: { required: true, kind: 'identifier' }, + retriever: { required: true, kind: 'identifier' }, + prompt: { kind: 'string' }, + answer: { kind: 'string' }, + citations: { kind: 'boolean' }, + }, + allowedChildren: ['grounding', 'ragEval'], + }, + grounding: { + description: 'RAG grounding policy — declares citation and context constraints for a RAG pipeline.', + example: 'grounding rag=AnswerDocs requireCitations=true policy=strict maxContext=6000', + props: { + name: { kind: 'identifier' }, + rag: { kind: 'identifier' }, + requireCitations: { kind: 'boolean' }, + policy: { kind: 'identifier' }, + maxContext: { kind: 'number' }, + }, + allowedChildren: [], + }, + ragEval: { + description: 'RAG evaluation contract — declares a metric threshold for a RAG pipeline.', + example: 'ragEval rag=AnswerDocs metric=faithfulness threshold=0.85', + props: { + name: { kind: 'identifier' }, + rag: { kind: 'identifier' }, + metric: { kind: 'identifier' }, + threshold: { kind: 'number' }, + }, + allowedChildren: [], + }, + // ── React / UI element nodes ────────────────────────────────────────── screen: { diff --git a/packages/core/src/semantic-substrate.ts b/packages/core/src/semantic-substrate.ts index cccc594e..b4bca982 100644 --- a/packages/core/src/semantic-substrate.ts +++ b/packages/core/src/semantic-substrate.ts @@ -12,8 +12,11 @@ import { snapshotRegistry } from './ir/semantics/index.js'; import { type ClassSemanticFacts, collectClassSemanticFacts, + collectRagSemanticFacts, + type RagSemanticFacts, type SemanticViolation, validateClassSemantics, + validateRagSemantics, } from './semantic-validator.js'; import type { IRNode } from './types.js'; @@ -93,6 +96,8 @@ export interface KernSemanticSubstrate { readonly irContracts: readonly KernSemanticIrContract[]; readonly classFacts?: ClassSemanticFacts; readonly classValidationSummary?: KernSemanticValidationSummary; + readonly ragFacts?: RagSemanticFacts; + readonly ragValidationSummary?: KernSemanticValidationSummary; } export interface BuildKernSemanticSubstrateOptions { @@ -100,6 +105,8 @@ export interface BuildKernSemanticSubstrateOptions { readonly irContracts?: ReadonlyMap; readonly documentClasses?: IRNode | readonly IRNode[]; readonly includeClassValidationSummary?: boolean; + readonly documentRag?: IRNode | readonly IRNode[]; + readonly includeRagValidationSummary?: boolean; } export function buildKernSemanticSubstrate(options: BuildKernSemanticSubstrateOptions = {}): KernSemanticSubstrate { @@ -152,6 +159,10 @@ export function buildKernSemanticSubstrate(options: BuildKernSemanticSubstrateOp ...(options.documentClasses && options.includeClassValidationSummary ? { classValidationSummary: semanticValidationSummary(options.documentClasses) } : {}), + ...(options.documentRag ? { ragFacts: collectRagSemanticFacts(options.documentRag) } : {}), + ...(options.documentRag && options.includeRagValidationSummary + ? { ragValidationSummary: ragValidationSummary(options.documentRag) } + : {}), }; } @@ -236,6 +247,10 @@ function semanticValidationSummary(root: IRNode | readonly IRNode[]): KernSemant return summarizeSemanticViolations(validateClassSemantics(root)); } +function ragValidationSummary(root: IRNode | readonly IRNode[]): KernSemanticValidationSummary { + return summarizeSemanticViolations(validateRagSemantics(root)); +} + function summarizeSemanticViolations(violations: readonly SemanticViolation[]): KernSemanticValidationSummary { const byRule: Record = {}; for (const violation of violations) { diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index 397309e3..9d3becb2 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -89,6 +89,102 @@ export interface ClassSemanticFacts { readonly cycles: readonly (readonly string[])[]; } +export interface RagSemanticLocation { + readonly line: number; + readonly col: number; +} + +export interface RagSemanticSourceFact { + readonly name?: string; + readonly corpusName?: string; + readonly kind?: string; + readonly uri: string; + readonly media?: string; + readonly acl?: string; + readonly loc?: RagSemanticLocation; +} + +export interface RagSemanticChunkingFact { + readonly name?: string; + readonly corpusName?: string; + readonly sourceName?: string; + readonly strategy?: string; + readonly maxTokens?: number; + readonly overlap?: number; + readonly unit?: string; + readonly loc?: RagSemanticLocation; +} + +export interface RagSemanticEmbedFact { + readonly name: string; + readonly corpusName: string; + readonly model?: string; + readonly dims?: number; + readonly metric?: string; + readonly loc?: RagSemanticLocation; +} + +export interface RagSemanticCorpusFact { + readonly name: string; + readonly title?: string; + readonly tenant?: string; + readonly refresh?: string; + readonly sources: readonly RagSemanticSourceFact[]; + readonly chunking: readonly RagSemanticChunkingFact[]; + readonly embeds: readonly RagSemanticEmbedFact[]; + readonly loc?: RagSemanticLocation; +} + +export interface RagSemanticRetrieverFact { + readonly name: string; + readonly corpusName: string; + readonly embedName?: string; + readonly mode?: string; + readonly topK?: number; + readonly minScore?: number; + readonly rerank?: string; + readonly loc?: RagSemanticLocation; +} + +export interface RagSemanticGroundingFact { + readonly name?: string; + readonly ragName?: string; + readonly requireCitations: boolean; + readonly policy?: string; + readonly maxContext?: number; + readonly loc?: RagSemanticLocation; +} + +export interface RagSemanticEvalFact { + readonly name?: string; + readonly ragName?: string; + readonly metric?: string; + readonly threshold?: number; + readonly loc?: RagSemanticLocation; +} + +export interface RagSemanticPipelineFact { + readonly name: string; + readonly retrieverName: string; + readonly prompt?: string; + readonly answer?: string; + readonly citations: boolean; + readonly groundings: readonly RagSemanticGroundingFact[]; + readonly evals: readonly RagSemanticEvalFact[]; + readonly loc?: RagSemanticLocation; +} + +export interface RagSemanticFacts { + readonly corpora: readonly RagSemanticCorpusFact[]; + readonly retrievers: readonly RagSemanticRetrieverFact[]; + readonly pipelines: readonly RagSemanticPipelineFact[]; + readonly unresolvedCorpusRefs: readonly string[]; + readonly unresolvedRetrieverRefs: readonly string[]; + readonly unresolvedEmbedRefs: readonly string[]; + readonly unresolvedRagRefs: readonly string[]; + readonly unresolvedSourceRefs: readonly string[]; +} + /** * Run semantic validation on an IR tree. * Returns an empty array when the tree is valid. @@ -96,6 +192,7 @@ export interface ClassSemanticFacts { export function validateSemantics(root: IRNode): SemanticViolation[] { const violations: SemanticViolation[] = []; validateClassGraph(root, violations); + validateRagGraph(root, violations); validateNode(root, violations, [], []); return violations; } @@ -106,6 +203,12 @@ export function validateClassSemantics(root: IRNode | readonly IRNode[]): Semant return violations; } +export function validateRagSemantics(root: IRNode | readonly IRNode[]): SemanticViolation[] { + const violations: SemanticViolation[] = []; + validateRagGraphRoots(Array.isArray(root) ? root : [root], violations); + return violations; +} + // True when the *innermost* handler ancestor is opted into native body- // statement mode (`lang="kern"`). Body statements like `let`/`assign`/`do`/ // `if`/`try` nest freely inside that scope, so the let-parent rule has to @@ -507,6 +610,699 @@ function validateNode( } } +interface RagCorpusInfo { + node: IRNode; + rootIndex: number; + name: string; +} + +interface RagSourceInfo { + node: IRNode; + rootIndex: number; + name?: string; + corpusName?: string; +} + +interface RagChunkingInfo { + node: IRNode; + rootIndex: number; + name?: string; + corpusName?: string; + sourceName?: string; +} + +interface RagEmbedInfo { + node: IRNode; + rootIndex: number; + name: string; + corpusName: string; +} + +interface RagRetrieverInfo { + node: IRNode; + rootIndex: number; + name: string; + corpusName: string; + embedName?: string; +} + +interface RagPipelineInfo { + node: IRNode; + rootIndex: number; + name: string; + retrieverName: string; +} + +interface RagGroundingInfo { + node: IRNode; + rootIndex: number; + ragName?: string; +} + +interface RagEvalInfo { + node: IRNode; + rootIndex: number; + ragName?: string; +} + +interface RagInfos { + corpora: RagCorpusInfo[]; + sources: RagSourceInfo[]; + chunking: RagChunkingInfo[]; + embeds: RagEmbedInfo[]; + retrievers: RagRetrieverInfo[]; + pipelines: RagPipelineInfo[]; + groundings: RagGroundingInfo[]; + evals: RagEvalInfo[]; +} + +function validateRagGraph(root: IRNode, violations: SemanticViolation[]): void { + validateRagGraphRoots([root], violations); +} + +function validateRagGraphRoots(roots: readonly IRNode[], violations: SemanticViolation[]): void { + const infos = collectRagInfosForRoots(roots); + if ( + infos.corpora.length === 0 && + infos.sources.length === 0 && + infos.chunking.length === 0 && + infos.embeds.length === 0 && + infos.retrievers.length === 0 && + infos.pipelines.length === 0 && + infos.groundings.length === 0 && + infos.evals.length === 0 + ) { + return; + } + + const corpusByName = new Map(infos.corpora.map((info) => [info.name, info])); + const embedByName = new Map(infos.embeds.map((info) => [info.name, info])); + const retrieverByName = new Map(infos.retrievers.map((info) => [info.name, info])); + const ragByName = new Map(infos.pipelines.map((info) => [info.name, info])); + const sourceNamesByCorpus = collectRagSourceNamesByCorpus(infos.sources); + const globalSourceNames = new Set(infos.sources.map((info) => info.name).filter((name): name is string => !!name)); + + validateRagUniqueNames(infos, violations); + + for (const source of infos.sources) { + validateRagSource(source, violations); + } + for (const chunking of infos.chunking) { + validateRagChunking(chunking, corpusByName, sourceNamesByCorpus, globalSourceNames, violations); + } + for (const embed of infos.embeds) { + validateRagEmbed(embed, corpusByName, violations); + } + for (const retriever of infos.retrievers) { + validateRagRetriever(retriever, corpusByName, embedByName, violations); + } + for (const pipeline of infos.pipelines) { + validateRagPipeline(pipeline, retrieverByName, infos.groundings, violations); + } + for (const grounding of infos.groundings) { + validateRagGrounding(grounding, ragByName, violations); + } + for (const evaluation of infos.evals) { + validateRagEval(evaluation, ragByName, violations); + } +} + +function collectRagInfosForRoots(roots: readonly IRNode[]): RagInfos { + const out: RagInfos = { + corpora: [], + sources: [], + chunking: [], + embeds: [], + retrievers: [], + pipelines: [], + groundings: [], + evals: [], + }; + for (const [rootIndex, root] of roots.entries()) { + collectRagInfos(root, rootIndex, out); + } + return out; +} + +function collectRagInfos(root: IRNode, rootIndex: number, out: RagInfos): void { + function visit(node: IRNode, nearestCorpusName?: string, nearestRagName?: string): void { + const nextCorpusName = node.type === 'corpus' ? stringProp(node, 'name') || nearestCorpusName : nearestCorpusName; + const nextRagName = node.type === 'rag' ? stringProp(node, 'name') || nearestRagName : nearestRagName; + + if (node.type === 'corpus') { + const name = stringProp(node, 'name'); + if (name) out.corpora.push({ node, rootIndex, name }); + } else if (node.type === 'source') { + out.sources.push({ node, rootIndex, name: stringProp(node, 'name'), corpusName: nearestCorpusName }); + } else if (node.type === 'chunking') { + out.chunking.push({ + node, + rootIndex, + name: stringProp(node, 'name'), + corpusName: stringProp(node, 'corpus') || nearestCorpusName, + sourceName: stringProp(node, 'source'), + }); + } else if (node.type === 'embed') { + const name = stringProp(node, 'name'); + const corpusName = stringProp(node, 'corpus') || nearestCorpusName; + if (name && corpusName) out.embeds.push({ node, rootIndex, name, corpusName }); + } else if (node.type === 'retriever') { + const name = stringProp(node, 'name'); + const corpusName = stringProp(node, 'corpus'); + if (name && corpusName) { + out.retrievers.push({ node, rootIndex, name, corpusName, embedName: stringProp(node, 'embed') }); + } + } else if (node.type === 'rag') { + const name = stringProp(node, 'name'); + const retrieverName = stringProp(node, 'retriever'); + if (name && retrieverName) out.pipelines.push({ node, rootIndex, name, retrieverName }); + } else if (node.type === 'grounding') { + out.groundings.push({ node, rootIndex, ragName: stringProp(node, 'rag') || nearestRagName }); + } else if (node.type === 'ragEval') { + out.evals.push({ node, rootIndex, ragName: stringProp(node, 'rag') || nearestRagName }); + } + + for (const child of node.children ?? []) visit(child, nextCorpusName, nextRagName); + } + visit(root); +} + +function collectRagSourceNamesByCorpus(sources: readonly RagSourceInfo[]): Map> { + const out = new Map>(); + for (const source of sources) { + if (!source.corpusName || !source.name) continue; + const names = out.get(source.corpusName) ?? new Set(); + names.add(source.name); + out.set(source.corpusName, names); + } + return out; +} + +function validateRagUniqueNames(infos: RagInfos, violations: SemanticViolation[]): void { + validateRagUniqueNameSet('corpus', infos.corpora, violations); + validateRagUniqueSourceNames(infos.sources, violations); + validateRagUniqueNameSet('embed', infos.embeds, violations); + validateRagUniqueNameSet('retriever', infos.retrievers, violations); + validateRagUniqueNameSet('rag', infos.pipelines, violations); +} + +function validateRagUniqueNameSet( + kind: string, + infos: readonly { name: string; node: IRNode }[], + violations: SemanticViolation[], +): void { + const seen = new Map(); + for (const info of infos) { + const prev = seen.get(info.name); + if (prev) { + pushRagViolation( + violations, + `rag-duplicate-${kind}-name`, + info.node, + `Duplicate RAG ${kind} named '${info.name}' — first defined at line ${prev.loc?.line ?? '?'}.`, + ); + } else { + seen.set(info.name, info.node); + } + } +} + +function validateRagUniqueSourceNames(sources: readonly RagSourceInfo[], violations: SemanticViolation[]): void { + const seen = new Map(); + for (const source of sources) { + if (!source.name || !source.corpusName) continue; + const key = `${source.corpusName}:${source.name}`; + const prev = seen.get(key); + if (prev) { + pushRagViolation( + violations, + 'rag-duplicate-source-name', + source.node, + `Duplicate RAG source named '${source.name}' in corpus '${source.corpusName}' — first defined at line ${prev.loc?.line ?? '?'}.`, + ); + } else { + seen.set(key, source.node); + } + } +} + +function validateRagSource(source: RagSourceInfo, violations: SemanticViolation[]): void { + if (!source.corpusName) { + pushRagViolation(violations, 'rag-source-missing-corpus', source.node, 'RAG source must be nested under a corpus.'); + } + + const uri = stringProp(source.node, 'uri'); + if (uri !== undefined && uri.trim() === '') { + pushRagViolation( + violations, + 'rag-source-uri-empty', + source.node, + "RAG source 'uri=' must be a non-empty document location.", + ); + } +} + +function validateRagChunking( + chunking: RagChunkingInfo, + corpusByName: ReadonlyMap, + sourceNamesByCorpus: ReadonlyMap>, + globalSourceNames: ReadonlySet, + violations: SemanticViolation[], +): void { + if (!chunking.corpusName) { + pushRagViolation( + violations, + 'rag-chunking-missing-corpus', + chunking.node, + 'RAG chunking must be nested under a corpus or declare corpus=.', + ); + } + if (chunking.corpusName && !corpusByName.has(chunking.corpusName)) { + pushRagViolation( + violations, + 'rag-chunking-unknown-corpus', + chunking.node, + `RAG chunking references unknown corpus '${chunking.corpusName}'. Declare a corpus before chunking it.`, + ); + } + + if (chunking.sourceName) { + const sourceNames = chunking.corpusName ? sourceNamesByCorpus.get(chunking.corpusName) : undefined; + const sourceKnown = chunking.corpusName + ? Boolean(sourceNames?.has(chunking.sourceName)) + : globalSourceNames.has(chunking.sourceName); + if (!sourceKnown) { + pushRagViolation( + violations, + 'rag-chunking-unknown-source', + chunking.node, + `RAG chunking references unknown source '${chunking.sourceName}'. Declare a named source in the same corpus.`, + ); + } + } + + const maxTokens = numberProp(chunking.node, 'maxTokens'); + if ( + invalidNumberProp(chunking.node, 'maxTokens') || + (maxTokens !== undefined && (!Number.isInteger(maxTokens) || maxTokens <= 0)) + ) { + pushRagViolation( + violations, + 'rag-chunking-max-tokens-invalid', + chunking.node, + 'RAG chunking maxTokens must be a positive integer.', + ); + } + + const overlap = numberProp(chunking.node, 'overlap'); + if ( + invalidNumberProp(chunking.node, 'overlap') || + (overlap !== undefined && (!Number.isInteger(overlap) || overlap < 0)) + ) { + pushRagViolation( + violations, + 'rag-chunking-overlap-invalid', + chunking.node, + 'RAG chunking overlap must be a non-negative integer.', + ); + } else if (overlap !== undefined && maxTokens !== undefined && overlap >= maxTokens) { + pushRagViolation( + violations, + 'rag-chunking-overlap-invalid', + chunking.node, + 'RAG chunking overlap must be smaller than maxTokens.', + ); + } +} + +function validateRagEmbed( + embed: RagEmbedInfo, + corpusByName: ReadonlyMap, + violations: SemanticViolation[], +): void { + if (!corpusByName.has(embed.corpusName)) { + pushRagViolation( + violations, + 'rag-embed-unknown-corpus', + embed.node, + `RAG embed '${embed.name}' references unknown corpus '${embed.corpusName}'.`, + ); + } + + const dims = numberProp(embed.node, 'dims'); + if (invalidNumberProp(embed.node, 'dims') || (dims !== undefined && (!Number.isInteger(dims) || dims <= 0))) { + pushRagViolation(violations, 'rag-embed-dims-invalid', embed.node, 'RAG embed dims must be a positive integer.'); + } +} + +function validateRagRetriever( + retriever: RagRetrieverInfo, + corpusByName: ReadonlyMap, + embedByName: ReadonlyMap, + violations: SemanticViolation[], +): void { + if (!corpusByName.has(retriever.corpusName)) { + pushRagViolation( + violations, + 'rag-retriever-unknown-corpus', + retriever.node, + `RAG retriever '${retriever.name}' references unknown corpus '${retriever.corpusName}'.`, + ); + } + + if (retriever.embedName) { + const embed = embedByName.get(retriever.embedName); + if (!embed) { + pushRagViolation( + violations, + 'rag-retriever-unknown-embed', + retriever.node, + `RAG retriever '${retriever.name}' references unknown embed '${retriever.embedName}'.`, + ); + } else if (embed.corpusName !== retriever.corpusName) { + pushRagViolation( + violations, + 'rag-retriever-embed-corpus-mismatch', + retriever.node, + `RAG retriever '${retriever.name}' uses embed '${retriever.embedName}' for corpus '${embed.corpusName}', not '${retriever.corpusName}'.`, + ); + } + } + + const topK = numberProp(retriever.node, 'topK'); + if (invalidNumberProp(retriever.node, 'topK') || (topK !== undefined && (!Number.isInteger(topK) || topK <= 0))) { + pushRagViolation( + violations, + 'rag-retriever-topk-invalid', + retriever.node, + 'RAG retriever topK must be a positive integer.', + ); + } + + const minScore = numberProp(retriever.node, 'minScore'); + if (invalidNumberProp(retriever.node, 'minScore') || (minScore !== undefined && (minScore < 0 || minScore > 1))) { + pushRagViolation( + violations, + 'rag-retriever-minscore-invalid', + retriever.node, + 'RAG retriever minScore must be between 0 and 1.', + ); + } +} + +function validateRagPipeline( + pipeline: RagPipelineInfo, + retrieverByName: ReadonlyMap, + groundings: readonly RagGroundingInfo[], + violations: SemanticViolation[], +): void { + if (!retrieverByName.has(pipeline.retrieverName)) { + pushRagViolation( + violations, + 'rag-unknown-retriever', + pipeline.node, + `RAG pipeline '${pipeline.name}' references unknown retriever '${pipeline.retrieverName}'.`, + ); + } + + if (ragBooleanProp(pipeline.node, 'citations')) { + const hasCitationGrounding = groundings.some( + (grounding) => grounding.ragName === pipeline.name && ragBooleanProp(grounding.node, 'requireCitations'), + ); + if (!hasCitationGrounding) { + pushRagViolation( + violations, + 'rag-citations-require-grounding', + pipeline.node, + `RAG pipeline '${pipeline.name}' requires citations but has no grounding requireCitations=true policy.`, + ); + } + } +} + +function validateRagGrounding( + grounding: RagGroundingInfo, + ragByName: ReadonlyMap, + violations: SemanticViolation[], +): void { + if (!grounding.ragName) { + pushRagViolation( + violations, + 'rag-grounding-missing-rag', + grounding.node, + 'RAG grounding must be nested under a rag pipeline or declare rag=.', + ); + } + if (grounding.ragName && !ragByName.has(grounding.ragName)) { + pushRagViolation( + violations, + 'rag-grounding-unknown-rag', + grounding.node, + `RAG grounding references unknown rag '${grounding.ragName}'.`, + ); + } + + const maxContext = numberProp(grounding.node, 'maxContext'); + if ( + invalidNumberProp(grounding.node, 'maxContext') || + (maxContext !== undefined && (!Number.isInteger(maxContext) || maxContext <= 0)) + ) { + pushRagViolation( + violations, + 'rag-grounding-max-context-invalid', + grounding.node, + 'RAG grounding maxContext must be a positive integer.', + ); + } +} + +function validateRagEval( + evaluation: RagEvalInfo, + ragByName: ReadonlyMap, + violations: SemanticViolation[], +): void { + if (!evaluation.ragName) { + pushRagViolation( + violations, + 'rag-eval-missing-rag', + evaluation.node, + 'RAG eval must be nested under a rag pipeline or declare rag=.', + ); + } + if (evaluation.ragName && !ragByName.has(evaluation.ragName)) { + pushRagViolation( + violations, + 'rag-eval-unknown-rag', + evaluation.node, + `RAG eval references unknown rag '${evaluation.ragName}'.`, + ); + } + + const threshold = numberProp(evaluation.node, 'threshold'); + if ( + invalidNumberProp(evaluation.node, 'threshold') || + (threshold !== undefined && (threshold < 0 || threshold > 1)) + ) { + pushRagViolation( + violations, + 'rag-eval-threshold-invalid', + evaluation.node, + 'RAG eval threshold must be between 0 and 1.', + ); + } +} + +function pushRagViolation(violations: SemanticViolation[], rule: string, node: IRNode, message: string): void { + violations.push({ rule, nodeType: node.type, message, line: node.loc?.line, col: node.loc?.col }); +} + +export function collectRagSemanticFacts(root: IRNode | readonly IRNode[]): RagSemanticFacts { + const roots = Array.isArray(root) ? root : [root]; + const infos = collectRagInfosForRoots(roots); + const corpusNames = new Set(infos.corpora.map((info) => info.name)); + const embedNames = new Set(infos.embeds.map((info) => info.name)); + const retrieverNames = new Set(infos.retrievers.map((info) => info.name)); + const ragNames = new Set(infos.pipelines.map((info) => info.name)); + const sourceNamesByCorpus = collectRagSourceNamesByCorpus(infos.sources); + const globalSourceNames = new Set(infos.sources.map((info) => info.name).filter((name): name is string => !!name)); + + return { + corpora: infos.corpora.map((info) => ragCorpusFact(info, infos)), + retrievers: infos.retrievers.map(ragRetrieverFact), + pipelines: infos.pipelines.map((info) => ragPipelineFact(info, infos.groundings, infos.evals)), + unresolvedCorpusRefs: sortedUnique([ + ...infos.chunking + .map((info) => info.corpusName) + .filter((name): name is string => !!name && !corpusNames.has(name)), + ...infos.embeds.map((info) => info.corpusName).filter((name) => !corpusNames.has(name)), + ...infos.retrievers.map((info) => info.corpusName).filter((name) => !corpusNames.has(name)), + ]), + unresolvedRetrieverRefs: sortedUnique( + infos.pipelines.map((info) => info.retrieverName).filter((name) => !retrieverNames.has(name)), + ), + unresolvedEmbedRefs: sortedUnique( + infos.retrievers.map((info) => info.embedName).filter((name): name is string => !!name && !embedNames.has(name)), + ), + unresolvedRagRefs: sortedUnique( + [...infos.groundings.map((info) => info.ragName), ...infos.evals.map((info) => info.ragName)].filter( + (name): name is string => !!name && !ragNames.has(name), + ), + ), + unresolvedSourceRefs: sortedUnique( + infos.chunking + .filter((info) => { + if (!info.sourceName) return false; + const sourceNames = info.corpusName ? sourceNamesByCorpus.get(info.corpusName) : undefined; + return info.corpusName ? !sourceNames?.has(info.sourceName) : !globalSourceNames.has(info.sourceName); + }) + .map((info) => info.sourceName) + .filter((name): name is string => !!name), + ), + }; +} + +function ragCorpusFact(info: RagCorpusInfo, all: RagInfos): RagSemanticCorpusFact { + return { + name: info.name, + ...optionalStringFact(info.node, 'title', 'title'), + ...optionalStringFact(info.node, 'tenant', 'tenant'), + ...optionalStringFact(info.node, 'refresh', 'refresh'), + sources: all.sources.filter((source) => source.corpusName === info.name).map(ragSourceFact), + chunking: all.chunking.filter((chunking) => chunking.corpusName === info.name).map(ragChunkingFact), + embeds: all.embeds.filter((embed) => embed.corpusName === info.name).map(ragEmbedFact), + ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), + }; +} + +function ragSourceFact(info: RagSourceInfo): RagSemanticSourceFact { + return { + ...optionalStringValue('name', info.name), + ...optionalStringValue('corpusName', info.corpusName), + ...optionalStringFact(info.node, 'kind', 'kind'), + uri: stringProp(info.node, 'uri') ?? '', + ...optionalStringFact(info.node, 'media', 'media'), + ...optionalStringFact(info.node, 'acl', 'acl'), + ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), + }; +} + +function ragChunkingFact(info: RagChunkingInfo): RagSemanticChunkingFact { + return { + ...optionalStringValue('name', info.name), + ...optionalStringValue('corpusName', info.corpusName), + ...optionalStringValue('sourceName', info.sourceName), + ...optionalStringFact(info.node, 'strategy', 'strategy'), + ...optionalNumberFact(info.node, 'maxTokens', 'maxTokens'), + ...optionalNumberFact(info.node, 'overlap', 'overlap'), + ...optionalStringFact(info.node, 'unit', 'unit'), + ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), + }; +} + +function ragEmbedFact(info: RagEmbedInfo): RagSemanticEmbedFact { + return { + name: info.name, + corpusName: info.corpusName, + ...optionalStringFact(info.node, 'model', 'model'), + ...optionalNumberFact(info.node, 'dims', 'dims'), + ...optionalStringFact(info.node, 'metric', 'metric'), + ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), + }; +} + +function ragRetrieverFact(info: RagRetrieverInfo): RagSemanticRetrieverFact { + return { + name: info.name, + corpusName: info.corpusName, + ...optionalStringValue('embedName', info.embedName), + ...optionalStringFact(info.node, 'mode', 'mode'), + ...optionalNumberFact(info.node, 'topK', 'topK'), + ...optionalNumberFact(info.node, 'minScore', 'minScore'), + ...optionalStringFact(info.node, 'rerank', 'rerank'), + ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), + }; +} + +function ragPipelineFact( + info: RagPipelineInfo, + groundings: readonly RagGroundingInfo[], + evals: readonly RagEvalInfo[], +): RagSemanticPipelineFact { + return { + name: info.name, + retrieverName: info.retrieverName, + ...optionalStringFact(info.node, 'prompt', 'prompt'), + ...optionalStringFact(info.node, 'answer', 'answer'), + citations: ragBooleanProp(info.node, 'citations'), + groundings: groundings.filter((grounding) => grounding.ragName === info.name).map(ragGroundingFact), + evals: evals.filter((evaluation) => evaluation.ragName === info.name).map(ragEvalFact), + ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), + }; +} + +function ragGroundingFact(info: RagGroundingInfo): RagSemanticGroundingFact { + return { + ...optionalStringFact(info.node, 'name', 'name'), + ...optionalStringValue('ragName', info.ragName), + requireCitations: ragBooleanProp(info.node, 'requireCitations'), + ...optionalStringFact(info.node, 'policy', 'policy'), + ...optionalNumberFact(info.node, 'maxContext', 'maxContext'), + ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), + }; +} + +function ragEvalFact(info: RagEvalInfo): RagSemanticEvalFact { + return { + ...optionalStringFact(info.node, 'name', 'name'), + ...optionalStringValue('ragName', info.ragName), + ...optionalStringFact(info.node, 'metric', 'metric'), + ...optionalNumberFact(info.node, 'threshold', 'threshold'), + ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), + }; +} + +function ragLocation(node: IRNode): RagSemanticLocation | undefined { + return node.loc ? { line: node.loc.line, col: node.loc.col } : undefined; +} + +function optionalStringFact(node: IRNode, prop: string, factName: string): Record { + return optionalStringValue(factName, stringProp(node, prop)); +} + +function optionalStringValue(factName: string, value: string | undefined): Record { + return value ? { [factName]: value } : {}; +} + +function optionalNumberFact(node: IRNode, prop: string, factName: string): Record { + const value = numberProp(node, prop); + return value === undefined ? {} : { [factName]: value }; +} + +function numberProp(node: IRNode, prop: string): number | undefined { + const raw = node.props?.[prop]; + if (typeof raw === 'number') return Number.isFinite(raw) ? raw : undefined; + if (typeof raw !== 'string' || raw.trim() === '') return undefined; + const value = Number(raw); + return Number.isFinite(value) ? value : undefined; +} + +function invalidNumberProp(node: IRNode, prop: string): boolean { + const raw = node.props?.[prop]; + if (raw === undefined || raw === null || raw === '') return false; + if (typeof raw === 'number') return !Number.isFinite(raw); + if (typeof raw === 'string') return raw.trim() !== '' && !Number.isFinite(Number(raw)); + return true; +} + +function ragBooleanProp(node: IRNode, prop: string): boolean { + const raw = node.props?.[prop]; + return raw === true || (typeof raw === 'string' && raw.trim().toLowerCase() === 'true'); +} + +function sortedUnique(values: readonly string[]): string[] { + return [...new Set(values)].sort(); +} + type ClassMemberKind = 'field' | 'method' | 'getter' | 'setter'; interface ClassInfo { diff --git a/packages/core/src/spec.ts b/packages/core/src/spec.ts index fc777c31..328fa458 100644 --- a/packages/core/src/spec.ts +++ b/packages/core/src/spec.ts @@ -344,6 +344,15 @@ export const NODE_TYPES = [ 'description', 'sampling', 'elicitation', + // RAG — retrieval, grounding, and evaluation contracts + 'corpus', + 'source', + 'chunking', + 'embed', + 'retriever', + 'rag', + 'grounding', + 'ragEval', 'expression-v1', ] as const; diff --git a/packages/core/tests/rag-semantics.test.ts b/packages/core/tests/rag-semantics.test.ts new file mode 100644 index 00000000..abf23434 --- /dev/null +++ b/packages/core/tests/rag-semantics.test.ts @@ -0,0 +1,241 @@ +import { generateCoreNode, isCoreNode } from '../src/codegen-core.js'; +import { parseDocumentWithDiagnostics } from '../src/parser.js'; +import { validateSchema } from '../src/schema.js'; +import { collectRagSemanticFacts, validateRagSemantics, validateSemantics } from '../src/semantic-validator.js'; + +function parseRoot(source: string) { + return parseDocumentWithDiagnostics(source).root; +} + +function rulesFor(source: string): string[] { + return validateSemantics(parseRoot(source)).map((violation) => violation.rule); +} + +describe('RAG language semantics', () => { + test('registers RAG declarations as core language nodes', () => { + for (const type of ['corpus', 'source', 'chunking', 'embed', 'retriever', 'rag', 'grounding', 'ragEval']) { + expect(isCoreNode(type)).toBe(true); + expect(generateCoreNode({ type, props: {} })).toEqual([]); + } + }); + + test('accepts a minimal grounded RAG declaration graph', () => { + const source = [ + 'corpus name=Docs title="Support docs"', + ' source name=manuals kind=local uri="./docs/**/*.md" media=markdown', + ' chunking source=manuals strategy=semantic maxTokens=600 overlap=80 unit=tokens', + 'embed name=DocsEmbedding corpus=Docs model=text-embedding-3-small dims=1536 metric=cosine', + 'retriever name=DocsSearch corpus=Docs embed=DocsEmbedding mode=hybrid topK=8 minScore=0.72', + 'rag name=AnswerDocs retriever=DocsSearch prompt="./answer.md" citations=true', + ' grounding requireCitations=true policy=strict maxContext=6000', + ' ragEval metric=faithfulness threshold=0.85', + ].join('\n'); + + expect(validateSchema(parseRoot(source))).toEqual([]); + expect(validateSemantics(parseRoot(source))).toEqual([]); + }); + + test('collects RAG semantic facts for corpus retriever and pipeline contracts', () => { + const root = parseRoot( + [ + 'corpus name=Docs title="Support docs"', + ' source name=manuals kind=local uri="./docs/**/*.md" media=markdown', + ' chunking source=manuals strategy=semantic maxTokens=600 overlap=80 unit=tokens', + 'embed name=DocsEmbedding corpus=Docs model=text-embedding-3-small dims=1536 metric=cosine', + 'retriever name=DocsSearch corpus=Docs embed=DocsEmbedding mode=hybrid topK=8 minScore=0.72', + 'rag name=AnswerDocs retriever=DocsSearch prompt="./answer.md" citations=true', + ' grounding name=StrictGrounding requireCitations=true policy=strict maxContext=6000', + ' ragEval name=Faithfulness metric=faithfulness threshold=0.85', + ].join('\n'), + ); + + const facts = collectRagSemanticFacts(root); + + expect(facts.unresolvedCorpusRefs).toEqual([]); + expect(facts.unresolvedRetrieverRefs).toEqual([]); + expect(facts.corpora).toEqual([ + expect.objectContaining({ + name: 'Docs', + title: 'Support docs', + sources: [ + expect.objectContaining({ + name: 'manuals', + corpusName: 'Docs', + kind: 'local', + uri: './docs/**/*.md', + media: 'markdown', + }), + ], + chunking: [ + expect.objectContaining({ + corpusName: 'Docs', + sourceName: 'manuals', + strategy: 'semantic', + maxTokens: 600, + overlap: 80, + unit: 'tokens', + }), + ], + embeds: [ + expect.objectContaining({ + name: 'DocsEmbedding', + corpusName: 'Docs', + model: 'text-embedding-3-small', + dims: 1536, + metric: 'cosine', + }), + ], + }), + ]); + expect(facts.retrievers).toEqual([ + expect.objectContaining({ + name: 'DocsSearch', + corpusName: 'Docs', + embedName: 'DocsEmbedding', + mode: 'hybrid', + topK: 8, + minScore: 0.72, + }), + ]); + expect(facts.pipelines).toEqual([ + expect.objectContaining({ + name: 'AnswerDocs', + retrieverName: 'DocsSearch', + citations: true, + groundings: [ + expect.objectContaining({ + name: 'StrictGrounding', + ragName: 'AnswerDocs', + requireCitations: true, + policy: 'strict', + maxContext: 6000, + }), + ], + evals: [ + expect.objectContaining({ + name: 'Faithfulness', + ragName: 'AnswerDocs', + metric: 'faithfulness', + threshold: 0.85, + }), + ], + }), + ]); + }); + + test('treats explicit false RAG booleans as false', () => { + const root = parseRoot( + [ + 'corpus name=Docs', + ' source name=manuals uri="./docs/**/*.md"', + 'embed name=DocsEmbedding corpus=Docs', + 'retriever name=DocsSearch corpus=Docs embed=DocsEmbedding', + 'rag name=AnswerDocs retriever=DocsSearch citations=false', + ' grounding requireCitations=false', + ].join('\n'), + ); + + expect(validateSemantics(root)).toEqual([]); + expect(collectRagSemanticFacts(root).pipelines[0]).toEqual( + expect.objectContaining({ + citations: false, + groundings: [expect.objectContaining({ requireCitations: false })], + }), + ); + }); + + test('reports invalid RAG references and numeric contracts', () => { + const source = [ + 'corpus name=Docs', + ' source name=manuals uri="./docs/**/*.md"', + ' chunking source=missing strategy=semantic maxTokens=64 overlap=64', + 'embed name=BadEmbedding corpus=Missing dims=0', + 'embed name=OtherEmbedding corpus=Docs', + 'corpus name=OtherDocs', + 'retriever name=BadRetriever corpus=Missing embed=MissingEmbed topK=0 minScore=1.1', + 'retriever name=MismatchRetriever corpus=OtherDocs embed=OtherEmbedding', + 'rag name=BadRag retriever=MissingRetriever citations=true', + 'grounding rag=MissingRag maxContext=0', + 'ragEval rag=MissingRag threshold=1.1', + ].join('\n'); + + expect(rulesFor(source)).toEqual( + expect.arrayContaining([ + 'rag-chunking-unknown-source', + 'rag-chunking-overlap-invalid', + 'rag-embed-unknown-corpus', + 'rag-embed-dims-invalid', + 'rag-retriever-unknown-corpus', + 'rag-retriever-unknown-embed', + 'rag-retriever-topk-invalid', + 'rag-retriever-minscore-invalid', + 'rag-retriever-embed-corpus-mismatch', + 'rag-unknown-retriever', + 'rag-citations-require-grounding', + 'rag-grounding-unknown-rag', + 'rag-grounding-max-context-invalid', + 'rag-eval-unknown-rag', + 'rag-eval-threshold-invalid', + ]), + ); + }); + + test('reports disconnected and duplicate RAG declarations', () => { + const source = [ + 'corpus name=Docs', + 'corpus name=Docs', + 'source name=topLevel uri="./loose.md"', + 'corpus name=DuplicatedSources', + ' source name=manuals uri="./a.md"', + ' source name=manuals uri="./b.md"', + 'embed name=DocsEmbedding corpus=Docs', + 'embed name=DocsEmbedding corpus=Docs', + 'retriever name=DocsSearch corpus=Docs', + 'retriever name=DocsSearch corpus=Docs', + 'rag name=AnswerDocs retriever=DocsSearch', + 'rag name=AnswerDocs retriever=DocsSearch', + 'chunking source=manuals maxTokens=abc', + 'grounding maxContext=abc', + 'ragEval threshold=abc', + ].join('\n'); + + expect(rulesFor(source)).toEqual( + expect.arrayContaining([ + 'rag-duplicate-corpus-name', + 'rag-source-missing-corpus', + 'rag-duplicate-source-name', + 'rag-duplicate-embed-name', + 'rag-duplicate-retriever-name', + 'rag-duplicate-rag-name', + 'rag-chunking-missing-corpus', + 'rag-chunking-max-tokens-invalid', + 'rag-grounding-missing-rag', + 'rag-grounding-max-context-invalid', + 'rag-eval-missing-rag', + 'rag-eval-threshold-invalid', + ]), + ); + }); + + test('requires chunking source refs to resolve inside the referenced corpus', () => { + const source = [ + 'corpus name=Docs', + 'corpus name=OtherDocs', + ' source name=manuals uri="./other/**/*.md"', + 'chunking corpus=Docs source=manuals maxTokens=100', + ].join('\n'); + + expect(rulesFor(source)).toContain('rag-chunking-unknown-source'); + expect(collectRagSemanticFacts(parseRoot(source)).unresolvedSourceRefs).toEqual(['manuals']); + }); + + test('can validate only RAG rules when consumers need a focused pass', () => { + const root = parseRoot( + ['machine name=Flow', ' transition name=go from=Missing to=Missing', 'rag name=Bad retriever=Missing'].join( + '\n', + ), + ); + + expect(validateRagSemantics(root).map((violation) => violation.rule)).toEqual(['rag-unknown-retriever']); + }); +}); diff --git a/packages/core/tests/schema-validation.test.ts b/packages/core/tests/schema-validation.test.ts index df1afe75..397b8f3c 100644 --- a/packages/core/tests/schema-validation.test.ts +++ b/packages/core/tests/schema-validation.test.ts @@ -95,6 +95,49 @@ describe('Schema Validation', () => { expect(v).toHaveLength(0); }); + it('passes valid RAG declarations and flags missing required graph props', () => { + const valid = validate( + [ + 'corpus name=Docs', + ' source name=manuals uri="./docs/**/*.md"', + ' chunking source=manuals strategy=semantic maxTokens=600 overlap=80', + 'embed name=DocsEmbedding corpus=Docs', + 'retriever name=DocsSearch corpus=Docs embed=DocsEmbedding topK=8 minScore=0.72', + 'rag name=AnswerDocs retriever=DocsSearch', + ' grounding requireCitations=true maxContext=6000', + ' ragEval metric=faithfulness threshold=0.85', + ].join('\n'), + ); + expect(valid).toHaveLength(0); + + const missing = validate( + [ + 'corpus', + 'source name=missingUri', + 'embed name=NoCorpus', + 'retriever name=NoCorpus', + 'rag name=NoRetriever', + ].join('\n'), + ); + expect(missing.some((violation) => violation.message.includes("'corpus' requires prop 'name'"))).toBe(true); + expect(missing.some((violation) => violation.message.includes("'source' requires prop 'uri'"))).toBe(true); + expect(missing.some((violation) => violation.message.includes("'embed' requires prop 'corpus'"))).toBe(true); + expect(missing.some((violation) => violation.message.includes("'retriever' requires prop 'corpus'"))).toBe(true); + expect(missing.some((violation) => violation.message.includes("'rag' requires prop 'retriever'"))).toBe(true); + + const misplaced = validate( + ['retriever name=DocsSearch corpus=Docs', ' grounding requireCitations=true'].join('\n'), + ); + expect( + misplaced.some((violation) => violation.message.includes("'retriever' does not allow child type 'grounding'")), + ).toBe(true); + + const nestedEmbed = validate(['corpus name=Docs', ' embed name=DocsEmbedding corpus=Docs'].join('\n')); + expect( + nestedEmbed.some((violation) => violation.message.includes("'corpus' does not allow child type 'embed'")), + ).toBe(true); + }); + it('passes explicit foreign handler metadata', () => { const v = validate( [ diff --git a/packages/core/tests/semantic-substrate.test.ts b/packages/core/tests/semantic-substrate.test.ts index a51d2ccd..35ce6a00 100644 --- a/packages/core/tests/semantic-substrate.test.ts +++ b/packages/core/tests/semantic-substrate.test.ts @@ -63,6 +63,8 @@ describe('KERN semantic substrate', () => { ]); expect(Object.hasOwn(substrate, 'classFacts')).toBe(false); expect(Object.hasOwn(substrate, 'classValidationSummary')).toBe(false); + expect(Object.hasOwn(substrate, 'ragFacts')).toBe(false); + expect(Object.hasOwn(substrate, 'ragValidationSummary')).toBe(false); }); test('exports document class member inheritance and override facts when requested', () => { @@ -234,6 +236,59 @@ describe('KERN semantic substrate', () => { expect(substrate.classValidationSummary?.byRule['machine-transition-from']).toBeUndefined(); }); + test('exports document RAG facts and validation summaries when requested', () => { + const root = parseRoot( + [ + 'corpus name=Docs title="Support docs"', + ' source name=manuals kind=local uri="./docs/**/*.md"', + ' chunking source=manuals strategy=semantic maxTokens=600 overlap=80', + 'embed name=DocsEmbedding corpus=Docs model=text-embedding-3-small dims=1536 metric=cosine', + 'retriever name=DocsSearch corpus=Docs embed=DocsEmbedding mode=hybrid topK=8 minScore=0.72', + 'rag name=AnswerDocs retriever=DocsSearch citations=true', + ' grounding requireCitations=true policy=strict maxContext=6000', + ' ragEval metric=faithfulness threshold=0.85', + ].join('\n'), + ); + + const substrate = buildKernSemanticSubstrate({ + documentRag: root, + includeRagValidationSummary: true, + }); + + expect(substrate.ragValidationSummary).toEqual({ total: 0, byRule: {} }); + expect(substrate.ragFacts?.corpora).toEqual([ + expect.objectContaining({ + name: 'Docs', + sources: [expect.objectContaining({ name: 'manuals', uri: './docs/**/*.md' })], + embeds: [expect.objectContaining({ name: 'DocsEmbedding', corpusName: 'Docs' })], + }), + ]); + expect(substrate.ragFacts?.retrievers).toEqual([ + expect.objectContaining({ + name: 'DocsSearch', + corpusName: 'Docs', + embedName: 'DocsEmbedding', + topK: 8, + minScore: 0.72, + }), + ]); + expect(substrate.ragFacts?.pipelines).toEqual([ + expect.objectContaining({ + name: 'AnswerDocs', + retrieverName: 'DocsSearch', + citations: true, + groundings: [expect.objectContaining({ requireCitations: true, policy: 'strict' })], + evals: [expect.objectContaining({ metric: 'faithfulness', threshold: 0.85 })], + }), + ]); + + const invalidSubstrate = buildKernSemanticSubstrate({ + documentRag: parseRoot('rag name=Broken retriever=Missing'), + includeRagValidationSummary: true, + }); + expect(invalidSubstrate.ragValidationSummary?.byRule['rag-unknown-retriever']).toBe(1); + }); + test('exports portable review primitives as stable query objects', () => { const substrate = buildKernSemanticSubstrate(); const clamp = lookupSemanticPrimitive(substrate, 'number.clamp'); From 27eff2eaa9996bc47800f485f10a46af7cdb1b92 Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 19:34:51 +0200 Subject: [PATCH 19/63] feat(core): bind mcp tools to rag contracts --- .../conformance-mcp-rag-bad-cases.kern | 16 ++ .../conformance-mcp-rag-bad-cases.test.kern | 13 + examples/native-test/conformance-mcp-rag.kern | 17 ++ .../native-test/conformance-mcp-rag.test.kern | 5 + packages/core/src/schema.ts | 20 +- packages/core/src/semantic-validator.ts | 252 +++++++++++++++++- packages/core/src/spec.ts | 1 + packages/core/tests/rag-semantics.test.ts | 122 +++++++++ .../core/tests/semantic-substrate.test.ts | 15 ++ 9 files changed, 453 insertions(+), 8 deletions(-) create mode 100644 examples/native-test/conformance-mcp-rag-bad-cases.kern create mode 100644 examples/native-test/conformance-mcp-rag-bad-cases.test.kern create mode 100644 examples/native-test/conformance-mcp-rag.kern create mode 100644 examples/native-test/conformance-mcp-rag.test.kern diff --git a/examples/native-test/conformance-mcp-rag-bad-cases.kern b/examples/native-test/conformance-mcp-rag-bad-cases.kern new file mode 100644 index 00000000..89f629b2 --- /dev/null +++ b/examples/native-test/conformance-mcp-rag-bad-cases.kern @@ -0,0 +1,16 @@ +corpus name=Docs +retriever name=DocsSearch corpus=Docs + +rag name=AnswerDocs retriever=DocsSearch citations=true + grounding requireCitations=true + +mcp name=Support + tool name=badTool + param name=question type=string required=true + retrieve rag=AnswerDocs retriever=MissingRetriever queryParam=missing query={{question}} topK=0 minScore=1.2 requireGrounding=false + retrieve retriever=AlsoMissing queryParam=question + + prompt name=badPrompt + retrieve retriever=DocsSearch + +retrieve rag=MissingRag diff --git a/examples/native-test/conformance-mcp-rag-bad-cases.test.kern b/examples/native-test/conformance-mcp-rag-bad-cases.test.kern new file mode 100644 index 00000000..dcbd4a2f --- /dev/null +++ b/examples/native-test/conformance-mcp-rag-bad-cases.test.kern @@ -0,0 +1,13 @@ +test name="Bad MCP RAG conformance" target="./conformance-mcp-rag-bad-cases.kern" coverage=false + it name="mcp rag semantic assertions prove detector coverage" + expect has=semanticViolations matches="MCP retrieve cannot combine retriever= and rag=" + expect has=semanticViolations matches="MCP retrieve references unknown retriever 'MissingRetriever'" + expect has=semanticViolations matches="MCP retrieve queryParam 'missing' is not declared on tool 'badTool'" + expect has=semanticViolations matches="MCP retrieve cannot combine queryParam= and query=" + expect has=semanticViolations matches="MCP retrieve must declare queryParam= or query=" + expect has=semanticViolations matches="MCP retrieve topK must be a positive integer" + expect has=semanticViolations matches="MCP retrieve minScore must be between 0 and 1" + expect has=semanticViolations matches="MCP retrieve references citation-grounded rag 'AnswerDocs' but sets requireGrounding=false" + expect has=semanticViolations matches="MCP tool 'badTool' cannot declare more than one retrieve binding" + expect has=semanticViolations matches="MCP retrieve must be nested under a tool or prompt" + expect has=semanticViolations matches="MCP retrieve references unknown rag 'MissingRag'" diff --git a/examples/native-test/conformance-mcp-rag.kern b/examples/native-test/conformance-mcp-rag.kern new file mode 100644 index 00000000..2c4e2585 --- /dev/null +++ b/examples/native-test/conformance-mcp-rag.kern @@ -0,0 +1,17 @@ +corpus name=Docs + source name=manuals uri="./docs/**/*.md" + chunking source=manuals strategy=semantic maxTokens=600 overlap=80 + +retriever name=DocsSearch corpus=Docs mode=hybrid topK=8 minScore=0.72 + +rag name=AnswerDocs retriever=DocsSearch citations=true + grounding requireCitations=true policy=strict + +mcp name=Support + tool name=answerQuestion + param name=question type=string required=true + retrieve rag=AnswerDocs queryParam=question as=context topK=4 + + prompt name=summarizeDocs + param name=question type=string required=true + retrieve retriever=DocsSearch queryParam=question as=chunks diff --git a/examples/native-test/conformance-mcp-rag.test.kern b/examples/native-test/conformance-mcp-rag.test.kern new file mode 100644 index 00000000..c4bf6d17 --- /dev/null +++ b/examples/native-test/conformance-mcp-rag.test.kern @@ -0,0 +1,5 @@ +test name="MCP RAG conformance" target="./conformance-mcp-rag.kern" coverage=false + it name="mcp retrieve declarations bind to rag contracts" + expect no=schemaViolations + expect no=semanticViolations + expect node=retrieve count=2 diff --git a/packages/core/src/schema.ts b/packages/core/src/schema.ts index 3172e8e1..99267c62 100644 --- a/packages/core/src/schema.ts +++ b/packages/core/src/schema.ts @@ -2288,6 +2288,7 @@ export const NODE_SCHEMAS: Record = { 'guard', 'sampling', 'elicitation', + 'retrieve', 'derive', 'effect', 'respond', @@ -2355,7 +2356,24 @@ export const NODE_SCHEMAS: Record = { props: { name: { required: true, kind: 'identifier' }, }, - allowedChildren: ['param', 'handler', 'description'], + allowedChildren: ['param', 'handler', 'description', 'retrieve'], + }, + retrieve: { + description: + 'MCP retrieval intent — declaratively binds a tool or prompt to a RAG retriever or pipeline without executing provider retrieval in core.', + example: 'retrieve rag=AnswerDocs queryParam=question as=context requireGrounding=true topK=4', + props: { + name: { kind: 'identifier' }, + retriever: { kind: 'identifier' }, + rag: { kind: 'identifier' }, + queryParam: { kind: 'identifier' }, + query: { kind: 'expression' }, + as: { kind: 'identifier' }, + topK: { kind: 'number' }, + minScore: { kind: 'number' }, + requireGrounding: { kind: 'boolean' }, + }, + allowedChildren: [], }, description: { description: 'Documentation text for a tool, resource, or prompt', diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index 9d3becb2..02d93585 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -174,10 +174,26 @@ export interface RagSemanticPipelineFact { readonly loc?: RagSemanticLocation; } +export interface RagSemanticMcpRetrievalFact { + readonly containerKind?: 'tool' | 'prompt'; + readonly containerName?: string; + readonly targetKind: 'retriever' | 'rag'; + readonly targetName: string; + readonly name?: string; + readonly queryParam?: string; + readonly query?: string; + readonly as?: string; + readonly topK?: number; + readonly minScore?: number; + readonly requireGrounding: boolean; + readonly loc?: RagSemanticLocation; +} + export interface RagSemanticFacts { readonly corpora: readonly RagSemanticCorpusFact[]; readonly retrievers: readonly RagSemanticRetrieverFact[]; readonly pipelines: readonly RagSemanticPipelineFact[]; + readonly mcpRetrievals: readonly RagSemanticMcpRetrievalFact[]; readonly unresolvedCorpusRefs: readonly string[]; readonly unresolvedRetrieverRefs: readonly string[]; readonly unresolvedEmbedRefs: readonly string[]; @@ -665,6 +681,20 @@ interface RagEvalInfo { ragName?: string; } +interface RagMcpContainerInfo { + node: IRNode; + rootIndex: number; + kind: 'tool' | 'prompt'; + name?: string; + paramNames: ReadonlySet; +} + +interface RagMcpRetrievalInfo { + node: IRNode; + rootIndex: number; + container?: RagMcpContainerInfo; +} + interface RagInfos { corpora: RagCorpusInfo[]; sources: RagSourceInfo[]; @@ -674,6 +704,7 @@ interface RagInfos { pipelines: RagPipelineInfo[]; groundings: RagGroundingInfo[]; evals: RagEvalInfo[]; + mcpRetrievals: RagMcpRetrievalInfo[]; } function validateRagGraph(root: IRNode, violations: SemanticViolation[]): void { @@ -690,7 +721,8 @@ function validateRagGraphRoots(roots: readonly IRNode[], violations: SemanticVio infos.retrievers.length === 0 && infos.pipelines.length === 0 && infos.groundings.length === 0 && - infos.evals.length === 0 + infos.evals.length === 0 && + infos.mcpRetrievals.length === 0 ) { return; } @@ -725,6 +757,10 @@ function validateRagGraphRoots(roots: readonly IRNode[], violations: SemanticVio for (const evaluation of infos.evals) { validateRagEval(evaluation, ragByName, violations); } + validateRagMcpRetrievalDuplicates(infos.mcpRetrievals, violations); + for (const retrieval of infos.mcpRetrievals) { + validateRagMcpRetrieval(retrieval, retrieverByName, ragByName, violations); + } } function collectRagInfosForRoots(roots: readonly IRNode[]): RagInfos { @@ -737,6 +773,7 @@ function collectRagInfosForRoots(roots: readonly IRNode[]): RagInfos { pipelines: [], groundings: [], evals: [], + mcpRetrievals: [], }; for (const [rootIndex, root] of roots.entries()) { collectRagInfos(root, rootIndex, out); @@ -745,9 +782,18 @@ function collectRagInfosForRoots(roots: readonly IRNode[]): RagInfos { } function collectRagInfos(root: IRNode, rootIndex: number, out: RagInfos): void { - function visit(node: IRNode, nearestCorpusName?: string, nearestRagName?: string): void { + function visit( + node: IRNode, + nearestCorpusName?: string, + nearestRagName?: string, + nearestMcpContainer?: RagMcpContainerInfo, + ): void { const nextCorpusName = node.type === 'corpus' ? stringProp(node, 'name') || nearestCorpusName : nearestCorpusName; const nextRagName = node.type === 'rag' ? stringProp(node, 'name') || nearestRagName : nearestRagName; + const nextMcpContainer = + node.type === 'tool' || node.type === 'prompt' + ? ragMcpContainerInfo(node, rootIndex, node.type === 'tool' ? 'tool' : 'prompt') + : nearestMcpContainer; if (node.type === 'corpus') { const name = stringProp(node, 'name'); @@ -780,13 +826,26 @@ function collectRagInfos(root: IRNode, rootIndex: number, out: RagInfos): void { out.groundings.push({ node, rootIndex, ragName: stringProp(node, 'rag') || nearestRagName }); } else if (node.type === 'ragEval') { out.evals.push({ node, rootIndex, ragName: stringProp(node, 'rag') || nearestRagName }); + } else if (node.type === 'retrieve') { + out.mcpRetrievals.push({ node, rootIndex, container: nearestMcpContainer }); } - for (const child of node.children ?? []) visit(child, nextCorpusName, nextRagName); + for (const child of node.children ?? []) visit(child, nextCorpusName, nextRagName, nextMcpContainer); } visit(root); } +function ragMcpContainerInfo(node: IRNode, rootIndex: number, kind: 'tool' | 'prompt'): RagMcpContainerInfo { + const name = stringProp(node, 'name'); + const paramNames = new Set(); + for (const child of node.children ?? []) { + if (child.type !== 'param') continue; + const paramName = stringProp(child, 'name'); + if (paramName) paramNames.add(paramName); + } + return { node, rootIndex, kind, ...optionalStringValue('name', name), paramNames }; +} + function collectRagSourceNamesByCorpus(sources: readonly RagSourceInfo[]): Map> { const out = new Map>(); for (const source of sources) { @@ -1112,6 +1171,139 @@ function validateRagEval( } } +function validateRagMcpRetrievalDuplicates( + retrievals: readonly RagMcpRetrievalInfo[], + violations: SemanticViolation[], +): void { + const seen = new Map(); + for (const retrieval of retrievals) { + const containerNode = retrieval.container?.node; + if (!containerNode) continue; + const prev = seen.get(containerNode); + if (prev) { + pushRagViolation( + violations, + 'mcp-retrieve-duplicate', + retrieval.node, + `MCP ${retrieval.container?.kind} '${retrieval.container?.name ?? ''}' cannot declare more than one retrieve binding — first defined at line ${prev.loc?.line ?? '?'}.`, + ); + } else { + seen.set(containerNode, retrieval.node); + } + } +} + +function validateRagMcpRetrieval( + retrieval: RagMcpRetrievalInfo, + retrieverByName: ReadonlyMap, + ragByName: ReadonlyMap, + violations: SemanticViolation[], +): void { + if (!retrieval.container) { + pushRagViolation( + violations, + 'mcp-retrieve-missing-container', + retrieval.node, + 'MCP retrieve must be nested under a tool or prompt.', + ); + } + + const retrieverName = stringProp(retrieval.node, 'retriever'); + const ragName = stringProp(retrieval.node, 'rag'); + if (!retrieverName && !ragName) { + pushRagViolation( + violations, + 'mcp-retrieve-target-required', + retrieval.node, + 'MCP retrieve must declare retriever= or rag=.', + ); + } + if (retrieverName && ragName) { + pushRagViolation( + violations, + 'mcp-retrieve-target-exclusive', + retrieval.node, + 'MCP retrieve cannot combine retriever= and rag=.', + ); + } + if (retrieverName && !retrieverByName.has(retrieverName)) { + pushRagViolation( + violations, + 'mcp-retrieve-unknown-retriever', + retrieval.node, + `MCP retrieve references unknown retriever '${retrieverName}'.`, + ); + } + if (ragName && !ragByName.has(ragName)) { + pushRagViolation( + violations, + 'mcp-retrieve-unknown-rag', + retrieval.node, + `MCP retrieve references unknown rag '${ragName}'.`, + ); + } + + const queryParam = stringProp(retrieval.node, 'queryParam'); + const query = expressionPropText(retrieval.node.props?.query); + if (!queryParam && !query) { + pushRagViolation( + violations, + 'mcp-retrieve-query-required', + retrieval.node, + 'MCP retrieve must declare queryParam= or query={{...}}.', + ); + } + if (queryParam && query) { + pushRagViolation( + violations, + 'mcp-retrieve-query-exclusive', + retrieval.node, + 'MCP retrieve cannot combine queryParam= and query={{...}}.', + ); + } + if (queryParam && retrieval.container && !retrieval.container.paramNames.has(queryParam)) { + pushRagViolation( + violations, + 'mcp-retrieve-query-param-unknown', + retrieval.node, + `MCP retrieve queryParam '${queryParam}' is not declared on ${retrieval.container.kind} '${retrieval.container.name ?? ''}'.`, + ); + } + + const topK = numberProp(retrieval.node, 'topK'); + if (invalidNumberProp(retrieval.node, 'topK') || (topK !== undefined && (!Number.isInteger(topK) || topK <= 0))) { + pushRagViolation( + violations, + 'mcp-retrieve-topk-invalid', + retrieval.node, + 'MCP retrieve topK must be a positive integer.', + ); + } + + const minScore = numberProp(retrieval.node, 'minScore'); + if (invalidNumberProp(retrieval.node, 'minScore') || (minScore !== undefined && (minScore < 0 || minScore > 1))) { + pushRagViolation( + violations, + 'mcp-retrieve-minscore-invalid', + retrieval.node, + 'MCP retrieve minScore must be between 0 and 1.', + ); + } + + if (ragName && ragBooleanPropIsFalse(retrieval.node, 'requireGrounding')) { + const pipeline = ragByName.get(ragName); + const requiresCitations = pipeline && ragBooleanProp(pipeline.node, 'citations'); + if (requiresCitations) { + pushRagViolation( + violations, + 'mcp-retrieve-citations-require-grounding', + retrieval.node, + `MCP retrieve references citation-grounded rag '${ragName}' but sets requireGrounding=false.`, + ); + } + } +} + function pushRagViolation(violations: SemanticViolation[], rule: string, node: IRNode, message: string): void { violations.push({ rule, nodeType: node.type, message, line: node.loc?.line, col: node.loc?.col }); } @@ -1123,6 +1315,7 @@ export function collectRagSemanticFacts(root: IRNode | readonly IRNode[]): RagSe const embedNames = new Set(infos.embeds.map((info) => info.name)); const retrieverNames = new Set(infos.retrievers.map((info) => info.name)); const ragNames = new Set(infos.pipelines.map((info) => info.name)); + const ragByName = new Map(infos.pipelines.map((info) => [info.name, info])); const sourceNamesByCorpus = collectRagSourceNamesByCorpus(infos.sources); const globalSourceNames = new Set(infos.sources.map((info) => info.name).filter((name): name is string => !!name)); @@ -1130,6 +1323,7 @@ export function collectRagSemanticFacts(root: IRNode | readonly IRNode[]): RagSe corpora: infos.corpora.map((info) => ragCorpusFact(info, infos)), retrievers: infos.retrievers.map(ragRetrieverFact), pipelines: infos.pipelines.map((info) => ragPipelineFact(info, infos.groundings, infos.evals)), + mcpRetrievals: infos.mcpRetrievals.map((info) => ragMcpRetrievalFact(info, ragByName)), unresolvedCorpusRefs: sortedUnique([ ...infos.chunking .map((info) => info.corpusName) @@ -1138,15 +1332,20 @@ export function collectRagSemanticFacts(root: IRNode | readonly IRNode[]): RagSe ...infos.retrievers.map((info) => info.corpusName).filter((name) => !corpusNames.has(name)), ]), unresolvedRetrieverRefs: sortedUnique( - infos.pipelines.map((info) => info.retrieverName).filter((name) => !retrieverNames.has(name)), + [ + ...infos.pipelines.map((info) => info.retrieverName), + ...infos.mcpRetrievals.map((info) => stringProp(info.node, 'retriever')), + ].filter((name): name is string => !!name && !retrieverNames.has(name)), ), unresolvedEmbedRefs: sortedUnique( infos.retrievers.map((info) => info.embedName).filter((name): name is string => !!name && !embedNames.has(name)), ), unresolvedRagRefs: sortedUnique( - [...infos.groundings.map((info) => info.ragName), ...infos.evals.map((info) => info.ragName)].filter( - (name): name is string => !!name && !ragNames.has(name), - ), + [ + ...infos.groundings.map((info) => info.ragName), + ...infos.evals.map((info) => info.ragName), + ...infos.mcpRetrievals.map((info) => stringProp(info.node, 'rag')), + ].filter((name): name is string => !!name && !ragNames.has(name)), ), unresolvedSourceRefs: sortedUnique( infos.chunking @@ -1261,6 +1460,40 @@ function ragEvalFact(info: RagEvalInfo): RagSemanticEvalFact { }; } +function ragMcpRetrievalFact( + info: RagMcpRetrievalInfo, + ragByName: ReadonlyMap, +): RagSemanticMcpRetrievalFact { + const ragName = stringProp(info.node, 'rag'); + const retrieverName = stringProp(info.node, 'retriever'); + const targetKind = ragName ? 'rag' : 'retriever'; + const targetName = ragName || retrieverName || ''; + return { + ...(info.container ? { containerKind: info.container.kind, containerName: info.container.name ?? '' } : {}), + targetKind, + targetName, + ...optionalStringFact(info.node, 'name', 'name'), + ...optionalStringFact(info.node, 'queryParam', 'queryParam'), + ...optionalStringValue('query', expressionPropText(info.node.props?.query)), + ...optionalStringFact(info.node, 'as', 'as'), + ...optionalNumberFact(info.node, 'topK', 'topK'), + ...optionalNumberFact(info.node, 'minScore', 'minScore'), + requireGrounding: ragMcpRetrieveRequiresGrounding(info.node, ragName, ragByName), + ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), + }; +} + +function ragMcpRetrieveRequiresGrounding( + node: IRNode, + ragName: string | undefined, + ragByName: ReadonlyMap, +): boolean { + if (ragBooleanPropIsFalse(node, 'requireGrounding')) return false; + if (ragBooleanProp(node, 'requireGrounding')) return true; + const pipeline = ragName ? ragByName.get(ragName) : undefined; + return pipeline ? ragBooleanProp(pipeline.node, 'citations') : false; +} + function ragLocation(node: IRNode): RagSemanticLocation | undefined { return node.loc ? { line: node.loc.line, col: node.loc.col } : undefined; } @@ -1299,6 +1532,11 @@ function ragBooleanProp(node: IRNode, prop: string): boolean { return raw === true || (typeof raw === 'string' && raw.trim().toLowerCase() === 'true'); } +function ragBooleanPropIsFalse(node: IRNode, prop: string): boolean { + const raw = node.props?.[prop]; + return raw === false || (typeof raw === 'string' && raw.trim().toLowerCase() === 'false'); +} + function sortedUnique(values: readonly string[]): string[] { return [...new Set(values)].sort(); } diff --git a/packages/core/src/spec.ts b/packages/core/src/spec.ts index 328fa458..7b76bff6 100644 --- a/packages/core/src/spec.ts +++ b/packages/core/src/spec.ts @@ -344,6 +344,7 @@ export const NODE_TYPES = [ 'description', 'sampling', 'elicitation', + 'retrieve', // RAG — retrieval, grounding, and evaluation contracts 'corpus', 'source', diff --git a/packages/core/tests/rag-semantics.test.ts b/packages/core/tests/rag-semantics.test.ts index abf23434..94d525b9 100644 --- a/packages/core/tests/rag-semantics.test.ts +++ b/packages/core/tests/rag-semantics.test.ts @@ -123,6 +123,74 @@ describe('RAG language semantics', () => { ]); }); + test('accepts MCP tool and prompt retrieval intents against RAG contracts', () => { + const source = [ + 'corpus name=Docs', + ' source name=manuals uri="./docs/**/*.md"', + 'embed name=DocsEmbedding corpus=Docs', + 'retriever name=DocsSearch corpus=Docs embed=DocsEmbedding topK=8 minScore=0.72', + 'rag name=AnswerDocs retriever=DocsSearch citations=true', + ' grounding requireCitations=true policy=strict', + 'mcp name=Support', + ' tool name=answerQuestion', + ' param name=question type=string required=true', + ' retrieve rag=AnswerDocs queryParam=question as=context topK=4 minScore=0.8', + ' prompt name=summarizeDocs', + ' param name=question type=string required=true', + ' retrieve retriever=DocsSearch queryParam=question as=chunks requireGrounding=true', + ].join('\n'); + + expect(validateSchema(parseRoot(source))).toEqual([]); + expect(validateSemantics(parseRoot(source))).toEqual([]); + }); + + test('collects MCP retrieval intent facts from tools and prompts', () => { + const facts = collectRagSemanticFacts( + parseRoot( + [ + 'corpus name=Docs', + ' source name=manuals uri="./docs/**/*.md"', + 'retriever name=DocsSearch corpus=Docs', + 'rag name=AnswerDocs retriever=DocsSearch citations=true', + ' grounding requireCitations=true', + 'mcp name=Support', + ' tool name=answerQuestion', + ' param name=question type=string required=true', + ' retrieve name=answerDocs rag=AnswerDocs queryParam=question as=context topK=4 minScore=0.8', + ' prompt name=summarizeDocs', + ' param name=question type=string required=true', + ' retrieve retriever=DocsSearch queryParam=question as=chunks requireGrounding=true', + ].join('\n'), + ), + ); + + expect(facts.unresolvedRetrieverRefs).toEqual([]); + expect(facts.unresolvedRagRefs).toEqual([]); + expect(facts.mcpRetrievals).toEqual([ + expect.objectContaining({ + containerKind: 'tool', + containerName: 'answerQuestion', + targetKind: 'rag', + targetName: 'AnswerDocs', + name: 'answerDocs', + queryParam: 'question', + as: 'context', + topK: 4, + minScore: 0.8, + requireGrounding: true, + }), + expect.objectContaining({ + containerKind: 'prompt', + containerName: 'summarizeDocs', + targetKind: 'retriever', + targetName: 'DocsSearch', + queryParam: 'question', + as: 'chunks', + requireGrounding: true, + }), + ]); + }); + test('treats explicit false RAG booleans as false', () => { const root = parseRoot( [ @@ -180,6 +248,60 @@ describe('RAG language semantics', () => { ); }); + test('reports invalid MCP retrieval bindings into RAG contracts', () => { + const source = [ + 'corpus name=Docs', + 'retriever name=DocsSearch corpus=Docs', + 'rag name=AnswerDocs retriever=DocsSearch citations=true', + ' grounding requireCitations=true', + 'mcp name=Support', + ' tool name=badTool', + ' param name=question type=string required=true', + ' retrieve rag=AnswerDocs retriever=MissingRetriever queryParam=missing query={{question}} topK=0 minScore=1.2 requireGrounding=false', + ' retrieve retriever=AlsoMissing queryParam=question', + 'retrieve rag=MissingRag', + ].join('\n'); + + expect(rulesFor(source)).toEqual( + expect.arrayContaining([ + 'mcp-retrieve-target-exclusive', + 'mcp-retrieve-unknown-retriever', + 'mcp-retrieve-query-param-unknown', + 'mcp-retrieve-query-exclusive', + 'mcp-retrieve-topk-invalid', + 'mcp-retrieve-minscore-invalid', + 'mcp-retrieve-citations-require-grounding', + 'mcp-retrieve-duplicate', + 'mcp-retrieve-missing-container', + 'mcp-retrieve-unknown-rag', + ]), + ); + + const facts = collectRagSemanticFacts(parseRoot(source)); + expect(facts.unresolvedRetrieverRefs).toEqual(['AlsoMissing', 'MissingRetriever']); + expect(facts.unresolvedRagRefs).toEqual(['MissingRag']); + }); + + test('reports MCP retrieval declarations without a target', () => { + expect( + rulesFor(['mcp name=Support', ' tool name=badTool', ' retrieve queryParam=question'].join('\n')), + ).toContain('mcp-retrieve-target-required'); + }); + + test('reports MCP retrieval declarations without a query source', () => { + expect( + rulesFor( + [ + 'corpus name=Docs', + 'retriever name=DocsSearch corpus=Docs', + 'mcp name=Support', + ' tool name=badTool', + ' retrieve retriever=DocsSearch', + ].join('\n'), + ), + ).toContain('mcp-retrieve-query-required'); + }); + test('reports disconnected and duplicate RAG declarations', () => { const source = [ 'corpus name=Docs', diff --git a/packages/core/tests/semantic-substrate.test.ts b/packages/core/tests/semantic-substrate.test.ts index 35ce6a00..3c2862b8 100644 --- a/packages/core/tests/semantic-substrate.test.ts +++ b/packages/core/tests/semantic-substrate.test.ts @@ -247,6 +247,10 @@ describe('KERN semantic substrate', () => { 'rag name=AnswerDocs retriever=DocsSearch citations=true', ' grounding requireCitations=true policy=strict maxContext=6000', ' ragEval metric=faithfulness threshold=0.85', + 'mcp name=Support', + ' tool name=answerQuestion', + ' param name=question type=string required=true', + ' retrieve rag=AnswerDocs queryParam=question as=context', ].join('\n'), ); @@ -281,6 +285,17 @@ describe('KERN semantic substrate', () => { evals: [expect.objectContaining({ metric: 'faithfulness', threshold: 0.85 })], }), ]); + expect(substrate.ragFacts?.mcpRetrievals).toEqual([ + expect.objectContaining({ + containerKind: 'tool', + containerName: 'answerQuestion', + targetKind: 'rag', + targetName: 'AnswerDocs', + queryParam: 'question', + as: 'context', + requireGrounding: true, + }), + ]); const invalidSubstrate = buildKernSemanticSubstrate({ documentRag: parseRoot('rag name=Broken retriever=Missing'), From 335225a515489fb2248fa7e0c95209123ab1a14c Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 20:24:22 +0200 Subject: [PATCH 20/63] feat(core): add mcp resource rag ingress --- .../conformance-mcp-rag-bad-cases.kern | 13 ++ .../conformance-mcp-rag-bad-cases.test.kern | 6 + examples/native-test/conformance-mcp-rag.kern | 4 + .../native-test/conformance-mcp-rag.test.kern | 1 + packages/core/src/schema.ts | 1 + packages/core/src/semantic-validator.ts | 145 +++++++++++++++++- packages/core/tests/rag-semantics.test.ts | 73 +++++++++ .../core/tests/semantic-substrate.test.ts | 14 +- 8 files changed, 252 insertions(+), 5 deletions(-) diff --git a/examples/native-test/conformance-mcp-rag-bad-cases.kern b/examples/native-test/conformance-mcp-rag-bad-cases.kern index 89f629b2..b0a62f8d 100644 --- a/examples/native-test/conformance-mcp-rag-bad-cases.kern +++ b/examples/native-test/conformance-mcp-rag-bad-cases.kern @@ -5,6 +5,8 @@ rag name=AnswerDocs retriever=DocsSearch citations=true grounding requireCitations=true mcp name=Support + resource name=DocsResource uri="docs://manuals" + tool name=badTool param name=question type=string required=true retrieve rag=AnswerDocs retriever=MissingRetriever queryParam=missing query={{question}} topK=0 minScore=1.2 requireGrounding=false @@ -13,4 +15,15 @@ mcp name=Support prompt name=badPrompt retrieve retriever=DocsSearch +mcp name=OtherSupport + resource name=DocsResource uri="docs://other-manuals" + +corpus name=BadIngress + source name=missingResource kind=mcp uri="mcp://MissingResource" + source name=unknownResource kind=mcp resource=MissingResource uri="mcp://MissingResource" + source name=toolResource kind=mcp resource=badTool uri="mcp://badTool" + source name=promptResource kind=mcp resource=badPrompt uri="mcp://badPrompt" + source name=ambiguousResource kind=mcp resource=DocsResource uri="mcp://DocsResource" + source name=fileResource kind=local resource=DocsResource uri="./docs/**/*.md" + retrieve rag=MissingRag diff --git a/examples/native-test/conformance-mcp-rag-bad-cases.test.kern b/examples/native-test/conformance-mcp-rag-bad-cases.test.kern index dcbd4a2f..8f1b1170 100644 --- a/examples/native-test/conformance-mcp-rag-bad-cases.test.kern +++ b/examples/native-test/conformance-mcp-rag-bad-cases.test.kern @@ -11,3 +11,9 @@ test name="Bad MCP RAG conformance" target="./conformance-mcp-rag-bad-cases.kern expect has=semanticViolations matches="MCP tool 'badTool' cannot declare more than one retrieve binding" expect has=semanticViolations matches="MCP retrieve must be nested under a tool or prompt" expect has=semanticViolations matches="MCP retrieve references unknown rag 'MissingRag'" + expect has=semanticViolations matches="RAG source kind=mcp requires resource=" + expect has=semanticViolations matches="RAG source references unknown MCP resource 'MissingResource'" + expect has=semanticViolations matches="RAG source resource 'badTool' resolves to MCP tool" + expect has=semanticViolations matches="RAG source resource 'badPrompt' resolves to MCP prompt" + expect has=semanticViolations matches="RAG source resource 'DocsResource' is ambiguous" + expect has=semanticViolations matches="RAG source resource= is only valid with kind=mcp" diff --git a/examples/native-test/conformance-mcp-rag.kern b/examples/native-test/conformance-mcp-rag.kern index 2c4e2585..d367ac46 100644 --- a/examples/native-test/conformance-mcp-rag.kern +++ b/examples/native-test/conformance-mcp-rag.kern @@ -1,6 +1,8 @@ corpus name=Docs source name=manuals uri="./docs/**/*.md" + source name=mcpManuals kind=mcp resource=DocsResource uri="mcp://DocsResource" chunking source=manuals strategy=semantic maxTokens=600 overlap=80 + chunking source=mcpManuals strategy=semantic maxTokens=600 overlap=80 retriever name=DocsSearch corpus=Docs mode=hybrid topK=8 minScore=0.72 @@ -8,6 +10,8 @@ rag name=AnswerDocs retriever=DocsSearch citations=true grounding requireCitations=true policy=strict mcp name=Support + resource name=DocsResource uri="docs://manuals" + tool name=answerQuestion param name=question type=string required=true retrieve rag=AnswerDocs queryParam=question as=context topK=4 diff --git a/examples/native-test/conformance-mcp-rag.test.kern b/examples/native-test/conformance-mcp-rag.test.kern index c4bf6d17..d164bdab 100644 --- a/examples/native-test/conformance-mcp-rag.test.kern +++ b/examples/native-test/conformance-mcp-rag.test.kern @@ -2,4 +2,5 @@ test name="MCP RAG conformance" target="./conformance-mcp-rag.kern" coverage=fal it name="mcp retrieve declarations bind to rag contracts" expect no=schemaViolations expect no=semanticViolations + expect node=source count=2 expect node=retrieve count=2 diff --git a/packages/core/src/schema.ts b/packages/core/src/schema.ts index 99267c62..9bf22f8b 100644 --- a/packages/core/src/schema.ts +++ b/packages/core/src/schema.ts @@ -2421,6 +2421,7 @@ export const NODE_SCHEMAS: Record = { name: { kind: 'identifier' }, kind: { kind: 'identifier' }, uri: { required: true, kind: 'string' }, + resource: { kind: 'identifier' }, media: { kind: 'identifier' }, acl: { kind: 'identifier' }, }, diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index 02d93585..ea386e93 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -99,6 +99,7 @@ export interface RagSemanticSourceFact { readonly corpusName?: string; readonly kind?: string; readonly uri: string; + readonly resourceName?: string; readonly media?: string; readonly acl?: string; readonly loc?: RagSemanticLocation; @@ -189,16 +190,26 @@ export interface RagSemanticMcpRetrievalFact { readonly loc?: RagSemanticLocation; } +export interface RagSemanticResourceFeedFact { + readonly corpusName?: string; + readonly sourceName?: string; + readonly resourceName: string; + readonly uri: string; + readonly loc?: RagSemanticLocation; +} + export interface RagSemanticFacts { readonly corpora: readonly RagSemanticCorpusFact[]; readonly retrievers: readonly RagSemanticRetrieverFact[]; readonly pipelines: readonly RagSemanticPipelineFact[]; readonly mcpRetrievals: readonly RagSemanticMcpRetrievalFact[]; + readonly resourceFeedsCorpora: readonly RagSemanticResourceFeedFact[]; readonly unresolvedCorpusRefs: readonly string[]; readonly unresolvedRetrieverRefs: readonly string[]; readonly unresolvedEmbedRefs: readonly string[]; readonly unresolvedRagRefs: readonly string[]; readonly unresolvedSourceRefs: readonly string[]; + readonly unresolvedResourceRefs: readonly string[]; } /** @@ -695,6 +706,13 @@ interface RagMcpRetrievalInfo { container?: RagMcpContainerInfo; } +interface RagMcpSymbolInfo { + node: IRNode; + rootIndex: number; + kind: 'resource' | 'tool' | 'prompt'; + name: string; +} + interface RagInfos { corpora: RagCorpusInfo[]; sources: RagSourceInfo[]; @@ -705,6 +723,9 @@ interface RagInfos { groundings: RagGroundingInfo[]; evals: RagEvalInfo[]; mcpRetrievals: RagMcpRetrievalInfo[]; + mcpResources: RagMcpSymbolInfo[]; + mcpTools: RagMcpSymbolInfo[]; + mcpPrompts: RagMcpSymbolInfo[]; } function validateRagGraph(root: IRNode, violations: SemanticViolation[]): void { @@ -722,7 +743,10 @@ function validateRagGraphRoots(roots: readonly IRNode[], violations: SemanticVio infos.pipelines.length === 0 && infos.groundings.length === 0 && infos.evals.length === 0 && - infos.mcpRetrievals.length === 0 + infos.mcpRetrievals.length === 0 && + infos.mcpResources.length === 0 && + infos.mcpTools.length === 0 && + infos.mcpPrompts.length === 0 ) { return; } @@ -731,13 +755,18 @@ function validateRagGraphRoots(roots: readonly IRNode[], violations: SemanticVio const embedByName = new Map(infos.embeds.map((info) => [info.name, info])); const retrieverByName = new Map(infos.retrievers.map((info) => [info.name, info])); const ragByName = new Map(infos.pipelines.map((info) => [info.name, info])); + const mcpResourcesByName = collectRagMcpSymbolsByName(infos.mcpResources); + const mcpCallableByName = new Map([ + ...infos.mcpTools.map((info) => [info.name, info] as const), + ...infos.mcpPrompts.map((info) => [info.name, info] as const), + ]); const sourceNamesByCorpus = collectRagSourceNamesByCorpus(infos.sources); const globalSourceNames = new Set(infos.sources.map((info) => info.name).filter((name): name is string => !!name)); validateRagUniqueNames(infos, violations); for (const source of infos.sources) { - validateRagSource(source, violations); + validateRagSource(source, mcpResourcesByName, mcpCallableByName, violations); } for (const chunking of infos.chunking) { validateRagChunking(chunking, corpusByName, sourceNamesByCorpus, globalSourceNames, violations); @@ -774,6 +803,9 @@ function collectRagInfosForRoots(roots: readonly IRNode[]): RagInfos { groundings: [], evals: [], mcpRetrievals: [], + mcpResources: [], + mcpTools: [], + mcpPrompts: [], }; for (const [rootIndex, root] of roots.entries()) { collectRagInfos(root, rootIndex, out); @@ -787,9 +819,11 @@ function collectRagInfos(root: IRNode, rootIndex: number, out: RagInfos): void { nearestCorpusName?: string, nearestRagName?: string, nearestMcpContainer?: RagMcpContainerInfo, + nearestMcpName?: string, ): void { const nextCorpusName = node.type === 'corpus' ? stringProp(node, 'name') || nearestCorpusName : nearestCorpusName; const nextRagName = node.type === 'rag' ? stringProp(node, 'name') || nearestRagName : nearestRagName; + const nextMcpName = node.type === 'mcp' ? stringProp(node, 'name') || '' : nearestMcpName; const nextMcpContainer = node.type === 'tool' || node.type === 'prompt' ? ragMcpContainerInfo(node, rootIndex, node.type === 'tool' ? 'tool' : 'prompt') @@ -828,9 +862,21 @@ function collectRagInfos(root: IRNode, rootIndex: number, out: RagInfos): void { out.evals.push({ node, rootIndex, ragName: stringProp(node, 'rag') || nearestRagName }); } else if (node.type === 'retrieve') { out.mcpRetrievals.push({ node, rootIndex, container: nearestMcpContainer }); + } else if ( + nextMcpName !== undefined && + (node.type === 'resource' || node.type === 'tool' || node.type === 'prompt') + ) { + const name = stringProp(node, 'name'); + if (name) { + const kind = node.type === 'resource' ? 'resource' : node.type === 'tool' ? 'tool' : 'prompt'; + const info: RagMcpSymbolInfo = { node, rootIndex, kind, name }; + if (node.type === 'resource') out.mcpResources.push(info); + else if (node.type === 'tool') out.mcpTools.push(info); + else out.mcpPrompts.push(info); + } } - for (const child of node.children ?? []) visit(child, nextCorpusName, nextRagName, nextMcpContainer); + for (const child of node.children ?? []) visit(child, nextCorpusName, nextRagName, nextMcpContainer, nextMcpName); } visit(root); } @@ -857,6 +903,16 @@ function collectRagSourceNamesByCorpus(sources: readonly RagSourceInfo[]): Map { + const out = new Map(); + for (const symbol of symbols) { + const matches = out.get(symbol.name) ?? []; + matches.push(symbol); + out.set(symbol.name, matches); + } + return out; +} + function validateRagUniqueNames(infos: RagInfos, violations: SemanticViolation[]): void { validateRagUniqueNameSet('corpus', infos.corpora, violations); validateRagUniqueSourceNames(infos.sources, violations); @@ -905,11 +961,63 @@ function validateRagUniqueSourceNames(sources: readonly RagSourceInfo[], violati } } -function validateRagSource(source: RagSourceInfo, violations: SemanticViolation[]): void { +function validateRagSource( + source: RagSourceInfo, + mcpResourcesByName: ReadonlyMap, + mcpCallableByName: ReadonlyMap, + violations: SemanticViolation[], +): void { if (!source.corpusName) { pushRagViolation(violations, 'rag-source-missing-corpus', source.node, 'RAG source must be nested under a corpus.'); } + const kind = stringProp(source.node, 'kind'); + const resourceName = stringProp(source.node, 'resource'); + if (kind === 'mcp') { + if (!resourceName) { + pushRagViolation( + violations, + 'rag-source-mcp-resource-required', + source.node, + 'RAG source kind=mcp requires resource=.', + ); + } else { + const resources = mcpResourcesByName.get(resourceName) ?? []; + if (resources.length > 1) { + pushRagViolation( + violations, + 'rag-source-mcp-resource-ambiguous', + source.node, + `RAG source resource '${resourceName}' is ambiguous because multiple MCP resources use that name.`, + ); + } else if (resources.length === 0) { + const callable = mcpCallableByName.get(resourceName); + if (callable) { + pushRagViolation( + violations, + 'rag-source-mcp-resource-kind', + source.node, + `RAG source resource '${resourceName}' resolves to MCP ${callable.kind}, expected MCP resource.`, + ); + } else { + pushRagViolation( + violations, + 'rag-source-mcp-resource-unknown', + source.node, + `RAG source references unknown MCP resource '${resourceName}'.`, + ); + } + } + } + } else if (resourceName) { + pushRagViolation( + violations, + 'rag-source-resource-requires-mcp-kind', + source.node, + 'RAG source resource= is only valid with kind=mcp.', + ); + } + const uri = stringProp(source.node, 'uri'); if (uri !== undefined && uri.trim() === '') { pushRagViolation( @@ -1316,6 +1424,11 @@ export function collectRagSemanticFacts(root: IRNode | readonly IRNode[]): RagSe const retrieverNames = new Set(infos.retrievers.map((info) => info.name)); const ragNames = new Set(infos.pipelines.map((info) => info.name)); const ragByName = new Map(infos.pipelines.map((info) => [info.name, info])); + const mcpResourcesByName = collectRagMcpSymbolsByName(infos.mcpResources); + const mcpCallableNames = new Set([ + ...infos.mcpTools.map((info) => info.name), + ...infos.mcpPrompts.map((info) => info.name), + ]); const sourceNamesByCorpus = collectRagSourceNamesByCorpus(infos.sources); const globalSourceNames = new Set(infos.sources.map((info) => info.name).filter((name): name is string => !!name)); @@ -1324,6 +1437,13 @@ export function collectRagSemanticFacts(root: IRNode | readonly IRNode[]): RagSe retrievers: infos.retrievers.map(ragRetrieverFact), pipelines: infos.pipelines.map((info) => ragPipelineFact(info, infos.groundings, infos.evals)), mcpRetrievals: infos.mcpRetrievals.map((info) => ragMcpRetrievalFact(info, ragByName)), + resourceFeedsCorpora: infos.sources + .filter( + (info) => + stringProp(info.node, 'kind') === 'mcp' && + (mcpResourcesByName.get(stringProp(info.node, 'resource') ?? '')?.length ?? 0) === 1, + ) + .map(ragResourceFeedFact), unresolvedCorpusRefs: sortedUnique([ ...infos.chunking .map((info) => info.corpusName) @@ -1357,6 +1477,12 @@ export function collectRagSemanticFacts(root: IRNode | readonly IRNode[]): RagSe .map((info) => info.sourceName) .filter((name): name is string => !!name), ), + unresolvedResourceRefs: sortedUnique( + infos.sources + .filter((info) => stringProp(info.node, 'kind') === 'mcp') + .map((info) => stringProp(info.node, 'resource')) + .filter((name): name is string => !!name && !mcpResourcesByName.has(name) && !mcpCallableNames.has(name)), + ), }; } @@ -1379,12 +1505,23 @@ function ragSourceFact(info: RagSourceInfo): RagSemanticSourceFact { ...optionalStringValue('corpusName', info.corpusName), ...optionalStringFact(info.node, 'kind', 'kind'), uri: stringProp(info.node, 'uri') ?? '', + ...optionalStringFact(info.node, 'resource', 'resourceName'), ...optionalStringFact(info.node, 'media', 'media'), ...optionalStringFact(info.node, 'acl', 'acl'), ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), }; } +function ragResourceFeedFact(info: RagSourceInfo): RagSemanticResourceFeedFact { + return { + ...optionalStringValue('corpusName', info.corpusName), + ...optionalStringValue('sourceName', info.name), + resourceName: stringProp(info.node, 'resource') ?? '', + uri: stringProp(info.node, 'uri') ?? '', + ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), + }; +} + function ragChunkingFact(info: RagChunkingInfo): RagSemanticChunkingFact { return { ...optionalStringValue('name', info.name), diff --git a/packages/core/tests/rag-semantics.test.ts b/packages/core/tests/rag-semantics.test.ts index 94d525b9..e3523032 100644 --- a/packages/core/tests/rag-semantics.test.ts +++ b/packages/core/tests/rag-semantics.test.ts @@ -123,6 +123,40 @@ describe('RAG language semantics', () => { ]); }); + test('accepts MCP resource-backed corpus sources as static ingress contracts', () => { + const source = [ + 'mcp name=Support', + ' resource name=DocsResource uri="docs://manuals"', + 'corpus name=Docs', + ' source name=manuals kind=mcp resource=DocsResource uri="mcp://DocsResource" media=markdown', + ' chunking source=manuals strategy=semantic maxTokens=600 overlap=80', + 'retriever name=DocsSearch corpus=Docs', + ].join('\n'); + + expect(validateSchema(parseRoot(source))).toEqual([]); + expect(validateSemantics(parseRoot(source))).toEqual([]); + + const facts = collectRagSemanticFacts(parseRoot(source)); + expect(facts.unresolvedResourceRefs).toEqual([]); + expect(facts.corpora[0]?.sources).toEqual([ + expect.objectContaining({ + name: 'manuals', + corpusName: 'Docs', + kind: 'mcp', + uri: 'mcp://DocsResource', + resourceName: 'DocsResource', + }), + ]); + expect(facts.resourceFeedsCorpora).toEqual([ + expect.objectContaining({ + corpusName: 'Docs', + sourceName: 'manuals', + resourceName: 'DocsResource', + uri: 'mcp://DocsResource', + }), + ]); + }); + test('accepts MCP tool and prompt retrieval intents against RAG contracts', () => { const source = [ 'corpus name=Docs', @@ -282,6 +316,45 @@ describe('RAG language semantics', () => { expect(facts.unresolvedRagRefs).toEqual(['MissingRag']); }); + test('reports invalid MCP resource-backed corpus source bindings', () => { + const source = [ + 'mcp name=Support', + ' tool name=DocsTool', + ' prompt name=DocsPrompt', + ' resource name=DocsResource uri="docs://manuals"', + ' resource name=UniqueResource uri="docs://unique"', + 'mcp name=OtherSupport', + ' resource name=DocsResource uri="docs://other-manuals"', + 'corpus name=Docs', + ' source name=missingResource kind=mcp uri="mcp://MissingResource"', + ' source name=unknownResource kind=mcp resource=MissingResource uri="mcp://MissingResource"', + ' source name=toolResource kind=mcp resource=DocsTool uri="mcp://DocsTool"', + ' source name=promptResource kind=mcp resource=DocsPrompt uri="mcp://DocsPrompt"', + ' source name=ambiguousMcp kind=mcp resource=DocsResource uri="mcp://DocsResource"', + ' source name=validMcp kind=mcp resource=UniqueResource uri="mcp://UniqueResource"', + ' source name=fileResource kind=local resource=DocsResource uri="./docs/**/*.md"', + ].join('\n'); + + expect(rulesFor(source)).toEqual( + expect.arrayContaining([ + 'rag-source-mcp-resource-required', + 'rag-source-mcp-resource-unknown', + 'rag-source-mcp-resource-kind', + 'rag-source-mcp-resource-ambiguous', + 'rag-source-resource-requires-mcp-kind', + ]), + ); + + const facts = collectRagSemanticFacts(parseRoot(source)); + expect(facts.resourceFeedsCorpora).toEqual([ + expect.objectContaining({ + sourceName: 'validMcp', + resourceName: 'UniqueResource', + }), + ]); + expect(facts.unresolvedResourceRefs).toEqual(['MissingResource']); + }); + test('reports MCP retrieval declarations without a target', () => { expect( rulesFor(['mcp name=Support', ' tool name=badTool', ' retrieve queryParam=question'].join('\n')), diff --git a/packages/core/tests/semantic-substrate.test.ts b/packages/core/tests/semantic-substrate.test.ts index 3c2862b8..fd0a3e17 100644 --- a/packages/core/tests/semantic-substrate.test.ts +++ b/packages/core/tests/semantic-substrate.test.ts @@ -241,6 +241,7 @@ describe('KERN semantic substrate', () => { [ 'corpus name=Docs title="Support docs"', ' source name=manuals kind=local uri="./docs/**/*.md"', + ' source name=mcpManuals kind=mcp resource=DocsResource uri="mcp://DocsResource"', ' chunking source=manuals strategy=semantic maxTokens=600 overlap=80', 'embed name=DocsEmbedding corpus=Docs model=text-embedding-3-small dims=1536 metric=cosine', 'retriever name=DocsSearch corpus=Docs embed=DocsEmbedding mode=hybrid topK=8 minScore=0.72', @@ -248,6 +249,7 @@ describe('KERN semantic substrate', () => { ' grounding requireCitations=true policy=strict maxContext=6000', ' ragEval metric=faithfulness threshold=0.85', 'mcp name=Support', + ' resource name=DocsResource uri="docs://manuals"', ' tool name=answerQuestion', ' param name=question type=string required=true', ' retrieve rag=AnswerDocs queryParam=question as=context', @@ -263,7 +265,10 @@ describe('KERN semantic substrate', () => { expect(substrate.ragFacts?.corpora).toEqual([ expect.objectContaining({ name: 'Docs', - sources: [expect.objectContaining({ name: 'manuals', uri: './docs/**/*.md' })], + sources: [ + expect.objectContaining({ name: 'manuals', uri: './docs/**/*.md' }), + expect.objectContaining({ name: 'mcpManuals', resourceName: 'DocsResource', uri: 'mcp://DocsResource' }), + ], embeds: [expect.objectContaining({ name: 'DocsEmbedding', corpusName: 'Docs' })], }), ]); @@ -296,6 +301,13 @@ describe('KERN semantic substrate', () => { requireGrounding: true, }), ]); + expect(substrate.ragFacts?.resourceFeedsCorpora).toEqual([ + expect.objectContaining({ + corpusName: 'Docs', + sourceName: 'mcpManuals', + resourceName: 'DocsResource', + }), + ]); const invalidSubstrate = buildKernSemanticSubstrate({ documentRag: parseRoot('rag name=Broken retriever=Missing'), From 177c69570ce6415cc706467387dba51a7aebae43 Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 21:11:26 +0200 Subject: [PATCH 21/63] feat(core): add typed rag retrieval outputs --- .../conformance-mcp-rag-bad-cases.kern | 28 +++ .../conformance-mcp-rag-bad-cases.test.kern | 7 + examples/native-test/conformance-mcp-rag.kern | 4 +- .../native-test/conformance-mcp-rag.test.kern | 1 + packages/core/src/schema.ts | 9 +- packages/core/src/semantic-validator.ts | 162 ++++++++++++++++-- packages/core/tests/rag-semantics.test.ts | 77 ++++++++- .../core/tests/semantic-substrate.test.ts | 11 +- 8 files changed, 281 insertions(+), 18 deletions(-) diff --git a/examples/native-test/conformance-mcp-rag-bad-cases.kern b/examples/native-test/conformance-mcp-rag-bad-cases.kern index b0a62f8d..bd111914 100644 --- a/examples/native-test/conformance-mcp-rag-bad-cases.kern +++ b/examples/native-test/conformance-mcp-rag-bad-cases.kern @@ -15,6 +15,34 @@ mcp name=Support prompt name=badPrompt retrieve retriever=DocsSearch + tool name=badOutput + param name=question type=string required=true + retrieve retriever=DocsSearch queryParam=question output="Foo[]" + + tool name=scalarOutput + param name=question type=string required=true + retrieve retriever=DocsSearch queryParam=question output=RetrievedChunk + + tool name=fieldWithoutOutput + param name=question type=string required=true + retrieve retriever=DocsSearch queryParam=question citationField=citation + + tool name=requireCitationsWithoutOutput + param name=question type=string required=true + retrieve retriever=DocsSearch queryParam=question requireCitations=true + + tool name=missingOutputCitation + param name=question type=string required=true + retrieve retriever=DocsSearch queryParam=question output="RetrievedChunk[]" requireCitations=true provenance=source + + tool name=missingOutputSource + param name=question type=string required=true + retrieve retriever=DocsSearch queryParam=question output="RetrievedChunk[]" requireCitations=true citationField=citation + + tool name=weakensRagCitations + param name=question type=string required=true + retrieve rag=AnswerDocs queryParam=question output="RetrievedChunk[]" requireCitations=false + mcp name=OtherSupport resource name=DocsResource uri="docs://other-manuals" diff --git a/examples/native-test/conformance-mcp-rag-bad-cases.test.kern b/examples/native-test/conformance-mcp-rag-bad-cases.test.kern index 8f1b1170..d6745e79 100644 --- a/examples/native-test/conformance-mcp-rag-bad-cases.test.kern +++ b/examples/native-test/conformance-mcp-rag-bad-cases.test.kern @@ -11,6 +11,13 @@ test name="Bad MCP RAG conformance" target="./conformance-mcp-rag-bad-cases.kern expect has=semanticViolations matches="MCP tool 'badTool' cannot declare more than one retrieve binding" expect has=semanticViolations matches="MCP retrieve must be nested under a tool or prompt" expect has=semanticViolations matches="MCP retrieve references unknown rag 'MissingRag'" + expect has=semanticViolations matches="MCP retrieve output 'Foo\\[\\]' is not supported" + expect has=semanticViolations matches="MCP retrieve output must be RetrievedChunk\\[\\]" + expect has=semanticViolations matches="MCP retrieve output fields require output=RetrievedChunk\\[\\]" + expect has=semanticViolations matches="MCP retrieve requireCitations=.*requires output=RetrievedChunk\\[\\]" + expect has=semanticViolations matches="MCP retrieve output requires citationField=" + expect has=semanticViolations matches="MCP retrieve output requires sourceField=" + expect has=semanticViolations matches="MCP retrieve references citation-grounded rag 'AnswerDocs' but sets requireCitations=false" expect has=semanticViolations matches="RAG source kind=mcp requires resource=" expect has=semanticViolations matches="RAG source references unknown MCP resource 'MissingResource'" expect has=semanticViolations matches="RAG source resource 'badTool' resolves to MCP tool" diff --git a/examples/native-test/conformance-mcp-rag.kern b/examples/native-test/conformance-mcp-rag.kern index d367ac46..25934568 100644 --- a/examples/native-test/conformance-mcp-rag.kern +++ b/examples/native-test/conformance-mcp-rag.kern @@ -14,8 +14,8 @@ mcp name=Support tool name=answerQuestion param name=question type=string required=true - retrieve rag=AnswerDocs queryParam=question as=context topK=4 + retrieve rag=AnswerDocs queryParam=question as=context topK=4 output="RetrievedChunk[]" requireCitations=true provenance=source citationField=citation sourceField=uri scoreField=score prompt name=summarizeDocs param name=question type=string required=true - retrieve retriever=DocsSearch queryParam=question as=chunks + retrieve retriever=DocsSearch queryParam=question as=chunks output="RetrievedChunk[]" scoreField=score diff --git a/examples/native-test/conformance-mcp-rag.test.kern b/examples/native-test/conformance-mcp-rag.test.kern index d164bdab..c1c0d317 100644 --- a/examples/native-test/conformance-mcp-rag.test.kern +++ b/examples/native-test/conformance-mcp-rag.test.kern @@ -4,3 +4,4 @@ test name="MCP RAG conformance" target="./conformance-mcp-rag.kern" coverage=fal expect no=semanticViolations expect node=source count=2 expect node=retrieve count=2 + expect node=retrieve prop=output is="RetrievedChunk[]" diff --git a/packages/core/src/schema.ts b/packages/core/src/schema.ts index 9bf22f8b..062454d6 100644 --- a/packages/core/src/schema.ts +++ b/packages/core/src/schema.ts @@ -2361,7 +2361,8 @@ export const NODE_SCHEMAS: Record = { retrieve: { description: 'MCP retrieval intent — declaratively binds a tool or prompt to a RAG retriever or pipeline without executing provider retrieval in core.', - example: 'retrieve rag=AnswerDocs queryParam=question as=context requireGrounding=true topK=4', + example: + 'retrieve rag=AnswerDocs queryParam=question as=context output="RetrievedChunk[]" requireCitations=true provenance=source citationField=citation sourceField=uri scoreField=score', props: { name: { kind: 'identifier' }, retriever: { kind: 'identifier' }, @@ -2372,6 +2373,12 @@ export const NODE_SCHEMAS: Record = { topK: { kind: 'number' }, minScore: { kind: 'number' }, requireGrounding: { kind: 'boolean' }, + output: { kind: 'typeAnnotation' }, + requireCitations: { kind: 'boolean' }, + provenance: { kind: 'identifier' }, + citationField: { kind: 'identifier' }, + sourceField: { kind: 'identifier' }, + scoreField: { kind: 'identifier' }, }, allowedChildren: [], }, diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index ea386e93..e1699fd8 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -187,6 +187,15 @@ export interface RagSemanticMcpRetrievalFact { readonly topK?: number; readonly minScore?: number; readonly requireGrounding: boolean; + readonly outputShape?: string; + readonly outputItemShape?: string; + readonly requireCitations?: boolean; + readonly effectiveRequiresCitations: boolean; + readonly provenance?: string; + readonly citationField?: string; + readonly sourceField?: string; + readonly scoreField?: string; + readonly contractStatus: 'absent' | 'valid' | 'invalid'; readonly loc?: RagSemanticLocation; } @@ -760,6 +769,7 @@ function validateRagGraphRoots(roots: readonly IRNode[], violations: SemanticVio ...infos.mcpTools.map((info) => [info.name, info] as const), ...infos.mcpPrompts.map((info) => [info.name, info] as const), ]); + const citationRequiredRagNames = collectRagCitationRequiredNames(infos.pipelines, infos.groundings); const sourceNamesByCorpus = collectRagSourceNamesByCorpus(infos.sources); const globalSourceNames = new Set(infos.sources.map((info) => info.name).filter((name): name is string => !!name)); @@ -788,7 +798,7 @@ function validateRagGraphRoots(roots: readonly IRNode[], violations: SemanticVio } validateRagMcpRetrievalDuplicates(infos.mcpRetrievals, violations); for (const retrieval of infos.mcpRetrievals) { - validateRagMcpRetrieval(retrieval, retrieverByName, ragByName, violations); + validateRagMcpRetrieval(retrieval, retrieverByName, ragByName, citationRequiredRagNames, violations); } } @@ -1305,6 +1315,7 @@ function validateRagMcpRetrieval( retrieval: RagMcpRetrievalInfo, retrieverByName: ReadonlyMap, ragByName: ReadonlyMap, + citationRequiredRagNames: ReadonlySet, violations: SemanticViolation[], ): void { if (!retrieval.container) { @@ -1399,8 +1410,7 @@ function validateRagMcpRetrieval( } if (ragName && ragBooleanPropIsFalse(retrieval.node, 'requireGrounding')) { - const pipeline = ragByName.get(ragName); - const requiresCitations = pipeline && ragBooleanProp(pipeline.node, 'citations'); + const requiresCitations = citationRequiredRagNames.has(ragName); if (requiresCitations) { pushRagViolation( violations, @@ -1410,6 +1420,101 @@ function validateRagMcpRetrieval( ); } } + + validateRagMcpRetrievalOutput(retrieval, ragName, citationRequiredRagNames, violations); +} + +function validateRagMcpRetrievalOutput( + retrieval: RagMcpRetrievalInfo, + ragName: string | undefined, + citationRequiredRagNames: ReadonlySet, + violations: SemanticViolation[], +): void { + const outputShape = stringProp(retrieval.node, 'output'); + const provenance = stringProp(retrieval.node, 'provenance'); + const citationField = stringProp(retrieval.node, 'citationField'); + const sourceField = stringProp(retrieval.node, 'sourceField'); + const scoreField = stringProp(retrieval.node, 'scoreField'); + const hasRequireCitations = Object.hasOwn(retrieval.node.props ?? {}, 'requireCitations'); + const hasOutputField = Boolean(provenance || citationField || sourceField || scoreField); + + if (outputShape === RAG_MCP_RETRIEVE_OUTPUT_ITEM_SHAPE) { + pushRagViolation( + violations, + 'mcp-retrieve-output-array-required', + retrieval.node, + 'MCP retrieve output must be RetrievedChunk[] because retrieval bindings expose ranked context sets.', + ); + } else if (outputShape && outputShape !== RAG_MCP_RETRIEVE_OUTPUT_SHAPE) { + pushRagViolation( + violations, + 'mcp-retrieve-output-unknown', + retrieval.node, + `MCP retrieve output '${outputShape}' is not supported; use RetrievedChunk[] for this slice.`, + ); + } + + if (!outputShape && hasOutputField) { + pushRagViolation( + violations, + 'mcp-retrieve-output-field-without-output', + retrieval.node, + 'MCP retrieve output fields require output=RetrievedChunk[].', + ); + } + if (!outputShape && hasRequireCitations) { + pushRagViolation( + violations, + 'mcp-retrieve-output-required', + retrieval.node, + 'MCP retrieve requireCitations= requires output=RetrievedChunk[].', + ); + } + + const targetRequiresCitations = ragName ? citationRequiredRagNames.has(ragName) : false; + if (ragBooleanPropIsFalse(retrieval.node, 'requireCitations') && targetRequiresCitations) { + pushRagViolation( + violations, + 'mcp-retrieve-output-citations-cannot-weaken-rag', + retrieval.node, + `MCP retrieve references citation-grounded rag '${ragName}' but sets requireCitations=false.`, + ); + } + + if (outputShape !== RAG_MCP_RETRIEVE_OUTPUT_SHAPE) return; + + const explicitRequiresCitations = ragBooleanProp(retrieval.node, 'requireCitations'); + const effectiveRequiresCitations = explicitRequiresCitations || targetRequiresCitations; + if (effectiveRequiresCitations && !citationField) { + pushRagViolation( + violations, + 'mcp-retrieve-output-citation-field-required', + retrieval.node, + 'MCP retrieve output requires citationField= when citations are required.', + ); + } + if (effectiveRequiresCitations && !sourceField && provenance !== 'source') { + pushRagViolation( + violations, + 'mcp-retrieve-output-source-required', + retrieval.node, + 'MCP retrieve output requires sourceField= or provenance=source when citations are required.', + ); + } +} + +function collectRagCitationRequiredNames( + pipelines: readonly RagPipelineInfo[], + groundings: readonly RagGroundingInfo[], +): ReadonlySet { + const out = new Set(); + for (const pipeline of pipelines) { + if (ragBooleanProp(pipeline.node, 'citations')) out.add(pipeline.name); + } + for (const grounding of groundings) { + if (grounding.ragName && ragBooleanProp(grounding.node, 'requireCitations')) out.add(grounding.ragName); + } + return out; } function pushRagViolation(violations: SemanticViolation[], rule: string, node: IRNode, message: string): void { @@ -1423,7 +1528,7 @@ export function collectRagSemanticFacts(root: IRNode | readonly IRNode[]): RagSe const embedNames = new Set(infos.embeds.map((info) => info.name)); const retrieverNames = new Set(infos.retrievers.map((info) => info.name)); const ragNames = new Set(infos.pipelines.map((info) => info.name)); - const ragByName = new Map(infos.pipelines.map((info) => [info.name, info])); + const citationRequiredRagNames = collectRagCitationRequiredNames(infos.pipelines, infos.groundings); const mcpResourcesByName = collectRagMcpSymbolsByName(infos.mcpResources); const mcpCallableNames = new Set([ ...infos.mcpTools.map((info) => info.name), @@ -1436,7 +1541,7 @@ export function collectRagSemanticFacts(root: IRNode | readonly IRNode[]): RagSe corpora: infos.corpora.map((info) => ragCorpusFact(info, infos)), retrievers: infos.retrievers.map(ragRetrieverFact), pipelines: infos.pipelines.map((info) => ragPipelineFact(info, infos.groundings, infos.evals)), - mcpRetrievals: infos.mcpRetrievals.map((info) => ragMcpRetrievalFact(info, ragByName)), + mcpRetrievals: infos.mcpRetrievals.map((info) => ragMcpRetrievalFact(info, citationRequiredRagNames)), resourceFeedsCorpora: infos.sources .filter( (info) => @@ -1599,12 +1704,15 @@ function ragEvalFact(info: RagEvalInfo): RagSemanticEvalFact { function ragMcpRetrievalFact( info: RagMcpRetrievalInfo, - ragByName: ReadonlyMap, + citationRequiredRagNames: ReadonlySet, ): RagSemanticMcpRetrievalFact { const ragName = stringProp(info.node, 'rag'); const retrieverName = stringProp(info.node, 'retriever'); const targetKind = ragName ? 'rag' : 'retriever'; const targetName = ragName || retrieverName || ''; + const outputShape = stringProp(info.node, 'output'); + const targetRequiresCitations = ragName ? citationRequiredRagNames.has(ragName) : false; + const explicitRequiresCitations = ragBooleanProp(info.node, 'requireCitations'); return { ...(info.container ? { containerKind: info.container.kind, containerName: info.container.name ?? '' } : {}), targetKind, @@ -1615,20 +1723,52 @@ function ragMcpRetrievalFact( ...optionalStringFact(info.node, 'as', 'as'), ...optionalNumberFact(info.node, 'topK', 'topK'), ...optionalNumberFact(info.node, 'minScore', 'minScore'), - requireGrounding: ragMcpRetrieveRequiresGrounding(info.node, ragName, ragByName), + requireGrounding: ragMcpRetrieveRequiresGrounding(info.node, ragName, citationRequiredRagNames), + ...optionalStringValue('outputShape', outputShape), + ...(outputShape === RAG_MCP_RETRIEVE_OUTPUT_SHAPE ? { outputItemShape: RAG_MCP_RETRIEVE_OUTPUT_ITEM_SHAPE } : {}), + ...(Object.hasOwn(info.node.props ?? {}, 'requireCitations') + ? { requireCitations: explicitRequiresCitations } + : {}), + effectiveRequiresCitations: explicitRequiresCitations || targetRequiresCitations, + ...optionalStringFact(info.node, 'provenance', 'provenance'), + ...optionalStringFact(info.node, 'citationField', 'citationField'), + ...optionalStringFact(info.node, 'sourceField', 'sourceField'), + ...optionalStringFact(info.node, 'scoreField', 'scoreField'), + contractStatus: ragMcpRetrieveContractStatus(info.node, targetRequiresCitations), ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), }; } +function ragMcpRetrieveContractStatus( + node: IRNode, + targetRequiresCitations: boolean, +): RagSemanticMcpRetrievalFact['contractStatus'] { + const outputShape = stringProp(node, 'output'); + const hasRequireCitations = Object.hasOwn(node.props ?? {}, 'requireCitations'); + const hasOutputField = ['provenance', 'citationField', 'sourceField', 'scoreField'].some((prop) => + Boolean(stringProp(node, prop)), + ); + if (!outputShape) return hasOutputField || hasRequireCitations ? 'invalid' : 'absent'; + if (outputShape !== RAG_MCP_RETRIEVE_OUTPUT_SHAPE) return 'invalid'; + if (ragBooleanPropIsFalse(node, 'requireCitations') && targetRequiresCitations) return 'invalid'; + if (ragBooleanProp(node, 'requireCitations') || targetRequiresCitations) { + const citationField = stringProp(node, 'citationField'); + const sourceField = stringProp(node, 'sourceField'); + const provenance = stringProp(node, 'provenance'); + if (!citationField) return 'invalid'; + if (!sourceField && provenance !== 'source') return 'invalid'; + } + return 'valid'; +} + function ragMcpRetrieveRequiresGrounding( node: IRNode, ragName: string | undefined, - ragByName: ReadonlyMap, + citationRequiredRagNames: ReadonlySet, ): boolean { if (ragBooleanPropIsFalse(node, 'requireGrounding')) return false; if (ragBooleanProp(node, 'requireGrounding')) return true; - const pipeline = ragName ? ragByName.get(ragName) : undefined; - return pipeline ? ragBooleanProp(pipeline.node, 'citations') : false; + return ragName ? citationRequiredRagNames.has(ragName) : false; } function ragLocation(node: IRNode): RagSemanticLocation | undefined { @@ -1699,6 +1839,8 @@ interface ClassMemberInfo { } const BUILTIN_CLASS_BASES = new Set(['Error']); +const RAG_MCP_RETRIEVE_OUTPUT_SHAPE = 'RetrievedChunk[]'; +const RAG_MCP_RETRIEVE_OUTPUT_ITEM_SHAPE = 'RetrievedChunk'; const BODY_EXPRESSION_PROPS = [ 'value', 'expr', diff --git a/packages/core/tests/rag-semantics.test.ts b/packages/core/tests/rag-semantics.test.ts index e3523032..90e8d7a9 100644 --- a/packages/core/tests/rag-semantics.test.ts +++ b/packages/core/tests/rag-semantics.test.ts @@ -168,10 +168,10 @@ describe('RAG language semantics', () => { 'mcp name=Support', ' tool name=answerQuestion', ' param name=question type=string required=true', - ' retrieve rag=AnswerDocs queryParam=question as=context topK=4 minScore=0.8', + ' retrieve rag=AnswerDocs queryParam=question as=context topK=4 minScore=0.8 output="RetrievedChunk[]" requireCitations=true provenance=source citationField=citation sourceField=uri scoreField=score', ' prompt name=summarizeDocs', ' param name=question type=string required=true', - ' retrieve retriever=DocsSearch queryParam=question as=chunks requireGrounding=true', + ' retrieve retriever=DocsSearch queryParam=question as=chunks requireGrounding=true output="RetrievedChunk[]" scoreField=score', ].join('\n'); expect(validateSchema(parseRoot(source))).toEqual([]); @@ -190,10 +190,10 @@ describe('RAG language semantics', () => { 'mcp name=Support', ' tool name=answerQuestion', ' param name=question type=string required=true', - ' retrieve name=answerDocs rag=AnswerDocs queryParam=question as=context topK=4 minScore=0.8', + ' retrieve name=answerDocs rag=AnswerDocs queryParam=question as=context topK=4 minScore=0.8 output="RetrievedChunk[]" requireCitations=true provenance=source citationField=citation sourceField=uri scoreField=score', ' prompt name=summarizeDocs', ' param name=question type=string required=true', - ' retrieve retriever=DocsSearch queryParam=question as=chunks requireGrounding=true', + ' retrieve retriever=DocsSearch queryParam=question as=chunks requireGrounding=true output="RetrievedChunk[]" scoreField=score', ].join('\n'), ), ); @@ -212,6 +212,15 @@ describe('RAG language semantics', () => { topK: 4, minScore: 0.8, requireGrounding: true, + outputShape: 'RetrievedChunk[]', + outputItemShape: 'RetrievedChunk', + requireCitations: true, + effectiveRequiresCitations: true, + provenance: 'source', + citationField: 'citation', + sourceField: 'uri', + scoreField: 'score', + contractStatus: 'valid', }), expect.objectContaining({ containerKind: 'prompt', @@ -221,6 +230,11 @@ describe('RAG language semantics', () => { queryParam: 'question', as: 'chunks', requireGrounding: true, + outputShape: 'RetrievedChunk[]', + outputItemShape: 'RetrievedChunk', + effectiveRequiresCitations: false, + scoreField: 'score', + contractStatus: 'valid', }), ]); }); @@ -316,6 +330,61 @@ describe('RAG language semantics', () => { expect(facts.unresolvedRagRefs).toEqual(['MissingRag']); }); + test('reports invalid MCP retrieval output contracts', () => { + const source = [ + 'corpus name=Docs', + 'retriever name=DocsSearch corpus=Docs', + 'rag name=AnswerDocs retriever=DocsSearch citations=true', + ' grounding requireCitations=true', + 'rag name=PlainAnswer retriever=DocsSearch', + 'mcp name=Support', + ' tool name=badOutput', + ' param name=question type=string required=true', + ' retrieve retriever=DocsSearch queryParam=question output="Foo[]"', + ' tool name=scalarOutput', + ' param name=question type=string required=true', + ' retrieve retriever=DocsSearch queryParam=question output=RetrievedChunk', + ' tool name=fieldWithoutOutput', + ' param name=question type=string required=true', + ' retrieve retriever=DocsSearch queryParam=question citationField=citation', + ' tool name=requireCitationsWithoutOutput', + ' param name=question type=string required=true', + ' retrieve retriever=DocsSearch queryParam=question requireCitations=true', + ' tool name=missingCitationField', + ' param name=question type=string required=true', + ' retrieve rag=PlainAnswer queryParam=question output="RetrievedChunk[]" requireCitations=true provenance=source', + ' tool name=missingSourceField', + ' param name=question type=string required=true', + ' retrieve rag=PlainAnswer queryParam=question output="RetrievedChunk[]" requireCitations=true citationField=citation', + ' tool name=weakensCitations', + ' param name=question type=string required=true', + ' retrieve rag=AnswerDocs queryParam=question output="RetrievedChunk[]" requireCitations=false', + ].join('\n'); + + expect(rulesFor(source)).toEqual( + expect.arrayContaining([ + 'mcp-retrieve-output-unknown', + 'mcp-retrieve-output-array-required', + 'mcp-retrieve-output-field-without-output', + 'mcp-retrieve-output-required', + 'mcp-retrieve-output-citation-field-required', + 'mcp-retrieve-output-source-required', + 'mcp-retrieve-output-citations-cannot-weaken-rag', + ]), + ); + + const facts = collectRagSemanticFacts(parseRoot(source)); + expect(facts.mcpRetrievals.map((fact) => fact.contractStatus)).toEqual([ + 'invalid', + 'invalid', + 'invalid', + 'invalid', + 'invalid', + 'invalid', + 'invalid', + ]); + }); + test('reports invalid MCP resource-backed corpus source bindings', () => { const source = [ 'mcp name=Support', diff --git a/packages/core/tests/semantic-substrate.test.ts b/packages/core/tests/semantic-substrate.test.ts index fd0a3e17..87ee071a 100644 --- a/packages/core/tests/semantic-substrate.test.ts +++ b/packages/core/tests/semantic-substrate.test.ts @@ -252,7 +252,7 @@ describe('KERN semantic substrate', () => { ' resource name=DocsResource uri="docs://manuals"', ' tool name=answerQuestion', ' param name=question type=string required=true', - ' retrieve rag=AnswerDocs queryParam=question as=context', + ' retrieve rag=AnswerDocs queryParam=question as=context output="RetrievedChunk[]" requireCitations=true provenance=source citationField=citation sourceField=uri scoreField=score', ].join('\n'), ); @@ -299,6 +299,15 @@ describe('KERN semantic substrate', () => { queryParam: 'question', as: 'context', requireGrounding: true, + outputShape: 'RetrievedChunk[]', + outputItemShape: 'RetrievedChunk', + requireCitations: true, + effectiveRequiresCitations: true, + provenance: 'source', + citationField: 'citation', + sourceField: 'uri', + scoreField: 'score', + contractStatus: 'valid', }), ]); expect(substrate.ragFacts?.resourceFeedsCorpora).toEqual([ From 922b18b5ea41e9720dad5fa91a44a6e7cc2ae8bf Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 21:50:32 +0200 Subject: [PATCH 22/63] feat(core): add rag eval contract cases --- .../conformance-rag-bad-cases.kern | 17 + .../conformance-rag-bad-cases.test.kern | 17 + examples/native-test/conformance-rag.kern | 6 +- .../native-test/conformance-rag.test.kern | 2 + packages/core/src/codegen-core.ts | 4 + packages/core/src/index.ts | 2 + packages/core/src/schema.ts | 34 +- packages/core/src/semantic-validator.ts | 460 +++++++++++++++++- packages/core/src/spec.ts | 2 + packages/core/tests/rag-semantics.test.ts | 172 ++++++- 10 files changed, 707 insertions(+), 9 deletions(-) diff --git a/examples/native-test/conformance-rag-bad-cases.kern b/examples/native-test/conformance-rag-bad-cases.kern index e989e2e8..84e51325 100644 --- a/examples/native-test/conformance-rag-bad-cases.kern +++ b/examples/native-test/conformance-rag-bad-cases.kern @@ -11,6 +11,23 @@ retriever name=BadRetriever corpus=Missing embed=MissingEmbed topK=0 minScore=1. retriever name=MismatchRetriever corpus=OtherDocs embed=OtherEmbedding rag name=BadRag retriever=MissingRetriever citations=true + ragEval metric=faithfulness threshold=0.85 + ragCase name=badCase query="What changed?" topK=0 minScore=1.2 chunkCount=-1 + ragAssert kind=unknownKind + ragAssert kind=scoreGte threshold=1.5 + ragAssert kind=scoreLte + ragAssert kind=chunkHash value=not-a-hash + ragAssert kind=chunkCountEq count=-1 + ragAssert kind=latencyLte valueMs=-1 + ragAssert kind=sourceEq + +rag name=PlainRag retriever=MismatchRetriever + ragEval name=PlainEval metric=faithfulness threshold=0.85 mode=contract + ragCase name=needsCitations query="needs cite" sources="docs/refunds.md" + ragAssert kind=citesRequired grounding rag=MissingRag maxContext=0 ragEval rag=MissingRag threshold=1.1 + +ragCase name=loose query="outside eval" +ragAssert kind=citesRequired diff --git a/examples/native-test/conformance-rag-bad-cases.test.kern b/examples/native-test/conformance-rag-bad-cases.test.kern index c6fb8223..83d7c393 100644 --- a/examples/native-test/conformance-rag-bad-cases.test.kern +++ b/examples/native-test/conformance-rag-bad-cases.test.kern @@ -15,3 +15,20 @@ test name="Bad RAG conformance" target="./conformance-rag-bad-cases.kern" covera expect has=semanticViolations matches="RAG grounding maxContext must be a positive integer" expect has=semanticViolations matches="RAG eval references unknown rag 'MissingRag'" expect has=semanticViolations matches="RAG eval threshold must be between 0 and 1" + expect has=semanticViolations matches="RAG eval with ragCase children must declare name=" + expect has=semanticViolations matches="RAG eval with ragCase children must declare mode=contract" + expect has=semanticViolations matches="RAG eval case topK must be a positive integer" + expect has=semanticViolations matches="RAG eval case minScore must be between 0 and 1" + expect has=semanticViolations matches="RAG eval case chunkCount must be a non-negative integer" + expect has=semanticViolations matches="RAG eval case sources=.*requires a citation-grounded rag" + expect has=semanticViolations matches="RAG eval case must be nested under ragEval" + expect has=semanticViolations matches="RAG assert kind must be one of" + expect has=semanticViolations matches="RAG assert kind=scoreLte requires threshold" + expect has=semanticViolations matches="RAG assert kind=scoreGte threshold must be between 0 and 1" + expect has=semanticViolations matches="RAG assert kind=chunkHash value must be a 32-128 character hex hash" + expect has=semanticViolations matches="RAG assert kind=chunkCountEq requires a non-negative integer count" + expect has=semanticViolations matches="RAG assert kind=latencyLte requires a non-negative integer valueMs" + expect has=semanticViolations matches="RAG assert kind=sourceEq requires value=" + expect has=semanticViolations matches="RAG assert kind=citesRequired requires a citation-grounded rag" + expect has=semanticViolations matches="RAG assert must be nested under ragEval" + expect has=semanticViolations matches="RAG assert must be nested under ragCase" diff --git a/examples/native-test/conformance-rag.kern b/examples/native-test/conformance-rag.kern index 652d877f..84654d01 100644 --- a/examples/native-test/conformance-rag.kern +++ b/examples/native-test/conformance-rag.kern @@ -8,4 +8,8 @@ retriever name=DocsSearch corpus=Docs embed=DocsEmbedding mode=hybrid topK=8 min rag name=AnswerDocs retriever=DocsSearch prompt="./answer.md" citations=true grounding name=StrictGrounding requireCitations=true policy=strict maxContext=6000 - ragEval name=Faithfulness metric=faithfulness threshold=0.85 + ragEval name=Faithfulness metric=faithfulness threshold=0.85 mode=contract + ragCase name=refunds query="How do refunds work?" tags="smoke,policy" topK=4 minScore=0.72 sources="docs/refunds.md" + ragAssert kind=scoreGte threshold=0.72 required=true + ragAssert kind=sourceGlob value="docs/refunds.md" required=true + ragAssert kind=citesRequired diff --git a/examples/native-test/conformance-rag.test.kern b/examples/native-test/conformance-rag.test.kern index 89b7ac9c..a66308c0 100644 --- a/examples/native-test/conformance-rag.test.kern +++ b/examples/native-test/conformance-rag.test.kern @@ -10,3 +10,5 @@ test name="RAG language conformance" target="./conformance-rag.kern" coverage=fa expect node=retriever name=DocsSearch prop=topK is=8 expect node=rag name=AnswerDocs child=grounding childName=StrictGrounding expect node=rag name=AnswerDocs child=ragEval childName=Faithfulness + expect node=ragEval name=Faithfulness child=ragCase childName=refunds + expect node=ragAssert count=3 diff --git a/packages/core/src/codegen-core.ts b/packages/core/src/codegen-core.ts index 7d49750f..5d3290d2 100644 --- a/packages/core/src/codegen-core.ts +++ b/packages/core/src/codegen-core.ts @@ -730,6 +730,8 @@ export const CORE_NODE_TYPES = new Set([ 'rag', 'grounding', 'ragEval', + 'ragCase', + 'ragAssert', // Backend data layer (graduated nodes) 'model', 'column', @@ -1030,6 +1032,8 @@ export function generateCoreNode(node: IRNode, target?: string, runtime?: KernRu case 'rag': case 'grounding': case 'ragEval': + case 'ragCase': + case 'ragAssert': return []; // Graduated nodes — backend data layer case 'model': diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 835b6ff0..3a624744 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -467,6 +467,8 @@ export type { RagSemanticChunkingFact, RagSemanticCorpusFact, RagSemanticEmbedFact, + RagSemanticEvalAssertFact, + RagSemanticEvalCaseFact, RagSemanticEvalFact, RagSemanticFacts, RagSemanticGroundingFact, diff --git a/packages/core/src/schema.ts b/packages/core/src/schema.ts index 062454d6..67ed0bbc 100644 --- a/packages/core/src/schema.ts +++ b/packages/core/src/schema.ts @@ -2504,13 +2504,43 @@ export const NODE_SCHEMAS: Record = { allowedChildren: [], }, ragEval: { - description: 'RAG evaluation contract — declares a metric threshold for a RAG pipeline.', - example: 'ragEval rag=AnswerDocs metric=faithfulness threshold=0.85', + description: 'RAG evaluation contract — declares metric thresholds and static eval cases for a RAG pipeline.', + example: + 'ragEval rag=AnswerDocs metric=faithfulness threshold=0.85 mode=contract\n ragCase name=refunds query="How do refunds work?"\n ragAssert kind=sourceGlob value="docs/refunds.md" required=true', props: { name: { kind: 'identifier' }, rag: { kind: 'identifier' }, metric: { kind: 'identifier' }, threshold: { kind: 'number' }, + mode: { kind: 'identifier' }, + }, + allowedChildren: ['ragCase'], + }, + ragCase: { + description: 'RAG evaluation case — declares a single query and expected retrieval contract facts.', + example: + 'ragCase name=refunds query="How do refunds work?" tags=smoke minScore=0.72\n ragAssert kind=sourceGlob value="docs/refunds.md" required=true', + props: { + name: { required: true, kind: 'identifier' }, + query: { required: true, kind: 'string' }, + tags: { kind: 'string' }, + topK: { kind: 'number' }, + minScore: { kind: 'number' }, + chunkCount: { kind: 'number' }, + sources: { kind: 'string' }, + }, + allowedChildren: ['ragAssert'], + }, + ragAssert: { + description: 'RAG evaluation assertion — declares a closed static check over retrieved chunks or grounding.', + example: 'ragAssert kind=scoreGte threshold=0.72 required=true', + props: { + kind: { required: true, kind: 'identifier' }, + value: { kind: 'string' }, + threshold: { kind: 'number' }, + count: { kind: 'number' }, + valueMs: { kind: 'number' }, + required: { kind: 'boolean' }, }, allowedChildren: [], }, diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index e1699fd8..30e79f67 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -161,6 +161,38 @@ export interface RagSemanticEvalFact { readonly ragName?: string; readonly metric?: string; readonly threshold?: number; + readonly mode?: string; + readonly caseCount?: number; + readonly assertCount?: number; + readonly cases?: readonly RagSemanticEvalCaseFact[]; + readonly loc?: RagSemanticLocation; +} + +export interface RagSemanticEvalCaseFact { + readonly name: string; + readonly ragName?: string; + readonly evalName?: string; + readonly query: string; + readonly tags: readonly string[]; + readonly expected: { + readonly topK?: number; + readonly minScore?: number; + readonly chunkCount?: number; + readonly sources?: readonly string[]; + }; + readonly asserts: readonly RagSemanticEvalAssertFact[]; + readonly loc?: RagSemanticLocation; +} + +export interface RagSemanticEvalAssertFact { + readonly ragName?: string; + readonly evalName?: string; + readonly caseName?: string; + readonly kind: string; + readonly target: 'retrieved-chunk' | 'retrieved-chunks' | 'grounding' | 'latency'; + readonly op: 'eq' | 'gte' | 'lte' | 'contains' | 'glob' | 'present'; + readonly value?: string | number | boolean; + readonly required: boolean; readonly loc?: RagSemanticLocation; } @@ -701,6 +733,29 @@ interface RagEvalInfo { ragName?: string; } +interface RagCaseInfo { + node: IRNode; + rootIndex: number; + name?: string; + query?: string; + ragName?: string; + evalName?: string; + evalNode?: IRNode; + evalBound: boolean; +} + +interface RagAssertInfo { + node: IRNode; + rootIndex: number; + ragName?: string; + evalName?: string; + caseName?: string; + evalNode?: IRNode; + caseNode?: IRNode; + evalBound: boolean; + caseBound: boolean; +} + interface RagMcpContainerInfo { node: IRNode; rootIndex: number; @@ -731,6 +786,8 @@ interface RagInfos { pipelines: RagPipelineInfo[]; groundings: RagGroundingInfo[]; evals: RagEvalInfo[]; + cases: RagCaseInfo[]; + asserts: RagAssertInfo[]; mcpRetrievals: RagMcpRetrievalInfo[]; mcpResources: RagMcpSymbolInfo[]; mcpTools: RagMcpSymbolInfo[]; @@ -752,6 +809,8 @@ function validateRagGraphRoots(roots: readonly IRNode[], violations: SemanticVio infos.pipelines.length === 0 && infos.groundings.length === 0 && infos.evals.length === 0 && + infos.cases.length === 0 && + infos.asserts.length === 0 && infos.mcpRetrievals.length === 0 && infos.mcpResources.length === 0 && infos.mcpTools.length === 0 && @@ -796,6 +855,12 @@ function validateRagGraphRoots(roots: readonly IRNode[], violations: SemanticVio for (const evaluation of infos.evals) { validateRagEval(evaluation, ragByName, violations); } + for (const evaluationCase of infos.cases) { + validateRagCase(evaluationCase, citationRequiredRagNames, violations); + } + for (const assertion of infos.asserts) { + validateRagAssert(assertion, citationRequiredRagNames, violations); + } validateRagMcpRetrievalDuplicates(infos.mcpRetrievals, violations); for (const retrieval of infos.mcpRetrievals) { validateRagMcpRetrieval(retrieval, retrieverByName, ragByName, citationRequiredRagNames, violations); @@ -812,6 +877,8 @@ function collectRagInfosForRoots(roots: readonly IRNode[]): RagInfos { pipelines: [], groundings: [], evals: [], + cases: [], + asserts: [], mcpRetrievals: [], mcpResources: [], mcpTools: [], @@ -828,11 +895,28 @@ function collectRagInfos(root: IRNode, rootIndex: number, out: RagInfos): void { node: IRNode, nearestCorpusName?: string, nearestRagName?: string, + nearestRagEvalName?: string, + nearestRagCaseName?: string, + nearestRagEvalBound = false, + nearestRagCaseBound = false, + nearestRagEvalNode?: IRNode, + nearestRagCaseNode?: IRNode, nearestMcpContainer?: RagMcpContainerInfo, nearestMcpName?: string, ): void { const nextCorpusName = node.type === 'corpus' ? stringProp(node, 'name') || nearestCorpusName : nearestCorpusName; - const nextRagName = node.type === 'rag' ? stringProp(node, 'name') || nearestRagName : nearestRagName; + const nextRagName = + node.type === 'rag' + ? stringProp(node, 'name') || nearestRagName + : node.type === 'ragEval' + ? stringProp(node, 'rag') || nearestRagName + : nearestRagName; + const nextRagEvalName = node.type === 'ragEval' ? stringProp(node, 'name') : nearestRagEvalName; + const nextRagCaseName = node.type === 'ragCase' ? stringProp(node, 'name') : nearestRagCaseName; + const nextRagEvalBound = node.type === 'ragEval' || nearestRagEvalBound; + const nextRagCaseBound = node.type === 'ragCase' || nearestRagCaseBound; + const nextRagEvalNode = node.type === 'ragEval' ? node : nearestRagEvalNode; + const nextRagCaseNode = node.type === 'ragCase' ? node : nearestRagCaseNode; const nextMcpName = node.type === 'mcp' ? stringProp(node, 'name') || '' : nearestMcpName; const nextMcpContainer = node.type === 'tool' || node.type === 'prompt' @@ -870,6 +954,29 @@ function collectRagInfos(root: IRNode, rootIndex: number, out: RagInfos): void { out.groundings.push({ node, rootIndex, ragName: stringProp(node, 'rag') || nearestRagName }); } else if (node.type === 'ragEval') { out.evals.push({ node, rootIndex, ragName: stringProp(node, 'rag') || nearestRagName }); + } else if (node.type === 'ragCase') { + out.cases.push({ + node, + rootIndex, + name: stringProp(node, 'name'), + query: stringProp(node, 'query'), + ragName: nearestRagName, + evalName: nearestRagEvalName, + evalNode: nearestRagEvalNode, + evalBound: nearestRagEvalBound, + }); + } else if (node.type === 'ragAssert') { + out.asserts.push({ + node, + rootIndex, + ragName: nearestRagName, + evalName: nearestRagEvalName, + caseName: nearestRagCaseName, + evalNode: nearestRagEvalNode, + caseNode: nearestRagCaseNode, + evalBound: nearestRagEvalBound, + caseBound: nearestRagCaseBound, + }); } else if (node.type === 'retrieve') { out.mcpRetrievals.push({ node, rootIndex, container: nearestMcpContainer }); } else if ( @@ -886,7 +993,20 @@ function collectRagInfos(root: IRNode, rootIndex: number, out: RagInfos): void { } } - for (const child of node.children ?? []) visit(child, nextCorpusName, nextRagName, nextMcpContainer, nextMcpName); + for (const child of node.children ?? []) + visit( + child, + nextCorpusName, + nextRagName, + nextRagEvalName, + nextRagCaseName, + nextRagEvalBound, + nextRagCaseBound, + nextRagEvalNode, + nextRagCaseNode, + nextMcpContainer, + nextMcpName, + ); } visit(root); } @@ -1287,6 +1407,222 @@ function validateRagEval( 'RAG eval threshold must be between 0 and 1.', ); } + + const mode = stringProp(evaluation.node, 'mode'); + const hasCases = (evaluation.node.children ?? []).some((child) => child.type === 'ragCase'); + if (hasCases && !stringProp(evaluation.node, 'name')) { + pushRagViolation( + violations, + 'rag-eval-name-required', + evaluation.node, + 'RAG eval with ragCase children must declare name= for stable eval facts.', + ); + } + if (hasCases && !mode) { + pushRagViolation( + violations, + 'rag-eval-mode-required', + evaluation.node, + 'RAG eval with ragCase children must declare mode=contract.', + ); + } + if (mode && mode !== 'contract') { + pushRagViolation( + violations, + 'rag-eval-mode-invalid', + evaluation.node, + "RAG eval mode only supports 'contract' in this slice.", + ); + } +} + +function validateRagCase( + evaluationCase: RagCaseInfo, + citationRequiredRagNames: ReadonlySet, + violations: SemanticViolation[], +): void { + if (!evaluationCase.evalBound) { + pushRagViolation( + violations, + 'rag-case-missing-eval', + evaluationCase.node, + 'RAG eval case must be nested under ragEval.', + ); + } + if (!evaluationCase.name) { + pushRagViolation(violations, 'rag-case-name-required', evaluationCase.node, 'RAG eval case requires name=.'); + } + if (!evaluationCase.query) { + pushRagViolation( + violations, + 'rag-case-query-required', + evaluationCase.node, + 'RAG eval case requires query=.', + ); + } + + const topK = numberProp(evaluationCase.node, 'topK'); + if ( + invalidNumberProp(evaluationCase.node, 'topK') || + (topK !== undefined && (!Number.isInteger(topK) || topK <= 0)) + ) { + pushRagViolation( + violations, + 'rag-case-topk-invalid', + evaluationCase.node, + 'RAG eval case topK must be a positive integer.', + ); + } + + const minScore = numberProp(evaluationCase.node, 'minScore'); + if ( + invalidNumberProp(evaluationCase.node, 'minScore') || + (minScore !== undefined && (minScore < 0 || minScore > 1)) + ) { + pushRagViolation( + violations, + 'rag-case-minscore-invalid', + evaluationCase.node, + 'RAG eval case minScore must be between 0 and 1.', + ); + } + + const chunkCount = numberProp(evaluationCase.node, 'chunkCount'); + if ( + invalidNumberProp(evaluationCase.node, 'chunkCount') || + (chunkCount !== undefined && (!Number.isInteger(chunkCount) || chunkCount < 0)) + ) { + pushRagViolation( + violations, + 'rag-case-chunk-count-invalid', + evaluationCase.node, + 'RAG eval case chunkCount must be a non-negative integer.', + ); + } + + if ( + stringProp(evaluationCase.node, 'sources') && + (!evaluationCase.ragName || !citationRequiredRagNames.has(evaluationCase.ragName)) + ) { + pushRagViolation( + violations, + 'rag-case-sources-require-citations', + evaluationCase.node, + 'RAG eval case sources=<...> requires a citation-grounded rag.', + ); + } +} + +function validateRagAssert( + assertion: RagAssertInfo, + citationRequiredRagNames: ReadonlySet, + violations: SemanticViolation[], +): void { + if (!assertion.evalBound) { + pushRagViolation(violations, 'rag-assert-missing-eval', assertion.node, 'RAG assert must be nested under ragEval.'); + } + if (!assertion.caseBound) { + pushRagViolation(violations, 'rag-assert-missing-case', assertion.node, 'RAG assert must be nested under ragCase.'); + } + + const kind = stringProp(assertion.node, 'kind'); + if (!kind || !RAG_ASSERT_KINDS.has(kind)) { + pushRagViolation( + violations, + 'rag-assert-kind-invalid', + assertion.node, + `RAG assert kind must be one of ${[...RAG_ASSERT_KINDS].join(', ')}.`, + ); + return; + } + + if ( + ['factId', 'chunkHash', 'contains', 'sourceEq', 'sourceGlob'].includes(kind) && + !stringProp(assertion.node, 'value') + ) { + pushRagViolation( + violations, + 'rag-assert-value-required', + assertion.node, + `RAG assert kind=${kind} requires value=.`, + ); + } + + const chunkHash = kind === 'chunkHash' ? stringProp(assertion.node, 'value') : undefined; + if (chunkHash && !/^[a-fA-F0-9]{32,128}$/.test(chunkHash)) { + pushRagViolation( + violations, + 'rag-assert-chunk-hash-invalid', + assertion.node, + 'RAG assert kind=chunkHash value must be a 32-128 character hex hash.', + ); + } + + if (kind === 'scoreGte' || kind === 'scoreLte') { + const threshold = numberProp(assertion.node, 'threshold'); + if (threshold === undefined && !invalidNumberProp(assertion.node, 'threshold')) { + pushRagViolation( + violations, + 'rag-assert-threshold-required', + assertion.node, + `RAG assert kind=${kind} requires threshold=.`, + ); + } else if ( + invalidNumberProp(assertion.node, 'threshold') || + threshold === undefined || + threshold < 0 || + threshold > 1 + ) { + pushRagViolation( + violations, + 'rag-assert-threshold-invalid', + assertion.node, + `RAG assert kind=${kind} threshold must be between 0 and 1.`, + ); + } + } + + if ( + kind === 'citesRequired' && + assertion.caseBound && + (!assertion.ragName || !citationRequiredRagNames.has(assertion.ragName)) + ) { + pushRagViolation( + violations, + 'rag-assert-citations-require-grounding', + assertion.node, + 'RAG assert kind=citesRequired requires a citation-grounded rag.', + ); + } + + if (kind === 'uniqueSourcesGte' || kind === 'chunkCountEq') { + const count = numberProp(assertion.node, 'count'); + if (invalidNumberProp(assertion.node, 'count') || count === undefined || !Number.isInteger(count) || count < 0) { + pushRagViolation( + violations, + 'rag-assert-count-invalid', + assertion.node, + `RAG assert kind=${kind} requires a non-negative integer count.`, + ); + } + } + + if (kind === 'latencyLte') { + const valueMs = numberProp(assertion.node, 'valueMs'); + if ( + invalidNumberProp(assertion.node, 'valueMs') || + valueMs === undefined || + !Number.isInteger(valueMs) || + valueMs < 0 + ) { + pushRagViolation( + violations, + 'rag-assert-value-ms-invalid', + assertion.node, + 'RAG assert kind=latencyLte requires a non-negative integer valueMs.', + ); + } + } } function validateRagMcpRetrievalDuplicates( @@ -1540,7 +1876,9 @@ export function collectRagSemanticFacts(root: IRNode | readonly IRNode[]): RagSe return { corpora: infos.corpora.map((info) => ragCorpusFact(info, infos)), retrievers: infos.retrievers.map(ragRetrieverFact), - pipelines: infos.pipelines.map((info) => ragPipelineFact(info, infos.groundings, infos.evals)), + pipelines: infos.pipelines.map((info) => + ragPipelineFact(info, infos.groundings, infos.evals, infos.cases, infos.asserts), + ), mcpRetrievals: infos.mcpRetrievals.map((info) => ragMcpRetrievalFact(info, citationRequiredRagNames)), resourceFeedsCorpora: infos.sources .filter( @@ -1668,6 +2006,8 @@ function ragPipelineFact( info: RagPipelineInfo, groundings: readonly RagGroundingInfo[], evals: readonly RagEvalInfo[], + cases: readonly RagCaseInfo[], + asserts: readonly RagAssertInfo[], ): RagSemanticPipelineFact { return { name: info.name, @@ -1676,7 +2016,9 @@ function ragPipelineFact( ...optionalStringFact(info.node, 'answer', 'answer'), citations: ragBooleanProp(info.node, 'citations'), groundings: groundings.filter((grounding) => grounding.ragName === info.name).map(ragGroundingFact), - evals: evals.filter((evaluation) => evaluation.ragName === info.name).map(ragEvalFact), + evals: evals + .filter((evaluation) => evaluation.ragName === info.name) + .map((evaluation) => ragEvalFact(evaluation, cases, asserts)), ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), }; } @@ -1692,16 +2034,111 @@ function ragGroundingFact(info: RagGroundingInfo): RagSemanticGroundingFact { }; } -function ragEvalFact(info: RagEvalInfo): RagSemanticEvalFact { +function ragEvalFact( + info: RagEvalInfo, + cases: readonly RagCaseInfo[], + asserts: readonly RagAssertInfo[], +): RagSemanticEvalFact { + const evalCases = cases.filter((evaluationCase) => evaluationCase.evalNode === info.node); + const caseFacts = evalCases.map((evaluationCase) => ragEvalCaseFact(evaluationCase, asserts)); return { ...optionalStringFact(info.node, 'name', 'name'), ...optionalStringValue('ragName', info.ragName), ...optionalStringFact(info.node, 'metric', 'metric'), ...optionalNumberFact(info.node, 'threshold', 'threshold'), + ...optionalStringFact(info.node, 'mode', 'mode'), + caseCount: caseFacts.length, + assertCount: caseFacts.reduce((count, evaluationCase) => count + evaluationCase.asserts.length, 0), + cases: caseFacts, + ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), + }; +} + +function ragEvalCaseFact(info: RagCaseInfo, asserts: readonly RagAssertInfo[]): RagSemanticEvalCaseFact { + const caseAsserts = asserts.filter((assertion) => assertion.caseNode === info.node); + return { + name: info.name ?? '', + ...optionalStringValue('ragName', info.ragName), + ...optionalStringValue('evalName', info.evalName), + query: info.query ?? '', + tags: splitRagList(stringProp(info.node, 'tags')), + expected: { + ...optionalNumberFact(info.node, 'topK', 'topK'), + ...optionalNumberFact(info.node, 'minScore', 'minScore'), + ...optionalNumberFact(info.node, 'chunkCount', 'chunkCount'), + ...(stringProp(info.node, 'sources') ? { sources: splitRagList(stringProp(info.node, 'sources')) } : {}), + }, + asserts: caseAsserts.map(ragEvalAssertFact), ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), }; } +function ragEvalAssertFact(info: RagAssertInfo): RagSemanticEvalAssertFact { + const kind = stringProp(info.node, 'kind') ?? ''; + return { + ...optionalStringValue('ragName', info.ragName), + ...optionalStringValue('evalName', info.evalName), + ...optionalStringValue('caseName', info.caseName), + kind, + target: ragAssertTarget(kind), + op: ragAssertOp(kind), + ...ragAssertValueFact(info.node, kind), + required: ragBooleanProp(info.node, 'required'), + ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), + }; +} + +function ragAssertTarget(kind: string): RagSemanticEvalAssertFact['target'] { + if (kind === 'uniqueSourcesGte' || kind === 'chunkCountEq') return 'retrieved-chunks'; + if (kind === 'latencyLte') return 'latency'; + if (kind === 'citesRequired') return 'grounding'; + return 'retrieved-chunk'; +} + +function ragAssertOp(kind: string): RagSemanticEvalAssertFact['op'] { + switch (kind) { + case 'scoreGte': + case 'uniqueSourcesGte': + return 'gte'; + case 'scoreLte': + case 'latencyLte': + return 'lte'; + case 'contains': + return 'contains'; + case 'sourceGlob': + return 'glob'; + case 'citesRequired': + return 'present'; + default: + return 'eq'; + } +} + +function ragAssertValueFact(node: IRNode, kind: string): Record { + if (kind === 'scoreGte' || kind === 'scoreLte') { + const threshold = numberProp(node, 'threshold'); + return threshold === undefined ? {} : { value: threshold }; + } + if (kind === 'uniqueSourcesGte' || kind === 'chunkCountEq') { + const count = numberProp(node, 'count'); + return count === undefined ? {} : { value: count }; + } + if (kind === 'latencyLte') { + const valueMs = numberProp(node, 'valueMs'); + return valueMs === undefined ? {} : { value: valueMs }; + } + if (kind === 'citesRequired') return { value: true }; + return optionalStringFact(node, 'value', 'value'); +} + +function splitRagList(value: string | undefined): string[] { + if (!value) return []; + return value + .split(',') + .map((item) => item.trim()) + .filter((item) => item.length > 0); +} + function ragMcpRetrievalFact( info: RagMcpRetrievalInfo, citationRequiredRagNames: ReadonlySet, @@ -1841,6 +2278,19 @@ interface ClassMemberInfo { const BUILTIN_CLASS_BASES = new Set(['Error']); const RAG_MCP_RETRIEVE_OUTPUT_SHAPE = 'RetrievedChunk[]'; const RAG_MCP_RETRIEVE_OUTPUT_ITEM_SHAPE = 'RetrievedChunk'; +const RAG_ASSERT_KINDS = new Set([ + 'factId', + 'chunkHash', + 'scoreGte', + 'scoreLte', + 'contains', + 'sourceEq', + 'sourceGlob', + 'uniqueSourcesGte', + 'chunkCountEq', + 'latencyLte', + 'citesRequired', +]); const BODY_EXPRESSION_PROPS = [ 'value', 'expr', diff --git a/packages/core/src/spec.ts b/packages/core/src/spec.ts index 7b76bff6..5bded53f 100644 --- a/packages/core/src/spec.ts +++ b/packages/core/src/spec.ts @@ -354,6 +354,8 @@ export const NODE_TYPES = [ 'rag', 'grounding', 'ragEval', + 'ragCase', + 'ragAssert', 'expression-v1', ] as const; diff --git a/packages/core/tests/rag-semantics.test.ts b/packages/core/tests/rag-semantics.test.ts index 90e8d7a9..5771851e 100644 --- a/packages/core/tests/rag-semantics.test.ts +++ b/packages/core/tests/rag-semantics.test.ts @@ -13,7 +13,18 @@ function rulesFor(source: string): string[] { describe('RAG language semantics', () => { test('registers RAG declarations as core language nodes', () => { - for (const type of ['corpus', 'source', 'chunking', 'embed', 'retriever', 'rag', 'grounding', 'ragEval']) { + for (const type of [ + 'corpus', + 'source', + 'chunking', + 'embed', + 'retriever', + 'rag', + 'grounding', + 'ragEval', + 'ragCase', + 'ragAssert', + ]) { expect(isCoreNode(type)).toBe(true); expect(generateCoreNode({ type, props: {} })).toEqual([]); } @@ -123,6 +134,123 @@ describe('RAG language semantics', () => { ]); }); + test('collects RAG eval case and assertion contracts as semantic facts', () => { + const facts = collectRagSemanticFacts( + parseRoot( + [ + 'corpus name=Docs', + ' source name=manuals uri="./docs/**/*.md"', + 'retriever name=DocsSearch corpus=Docs', + 'rag name=AnswerDocs retriever=DocsSearch citations=true', + ' grounding requireCitations=true', + ' ragEval name=SupportEval metric=faithfulness threshold=0.85 mode=contract', + ' ragCase name=refunds query="How do refunds work?" tags="smoke,policy" topK=4 minScore=0.72 chunkCount=2 sources="docs/refunds.md,docs/policies.md"', + ' ragAssert kind=scoreGte threshold=0.72 required=true', + ' ragAssert kind=sourceGlob value="docs/refunds.md" required=true', + ' ragAssert kind=uniqueSourcesGte count=2', + ' ragAssert kind=latencyLte valueMs=250', + ' ragAssert kind=citesRequired', + ].join('\n'), + ), + ); + + expect(facts.pipelines[0]?.evals).toEqual([ + expect.objectContaining({ + name: 'SupportEval', + ragName: 'AnswerDocs', + metric: 'faithfulness', + threshold: 0.85, + mode: 'contract', + caseCount: 1, + assertCount: 5, + cases: [ + expect.objectContaining({ + name: 'refunds', + ragName: 'AnswerDocs', + evalName: 'SupportEval', + query: 'How do refunds work?', + tags: ['smoke', 'policy'], + expected: { + topK: 4, + minScore: 0.72, + chunkCount: 2, + sources: ['docs/refunds.md', 'docs/policies.md'], + }, + asserts: [ + expect.objectContaining({ + kind: 'scoreGte', + target: 'retrieved-chunk', + op: 'gte', + value: 0.72, + required: true, + }), + expect.objectContaining({ + kind: 'sourceGlob', + target: 'retrieved-chunk', + op: 'glob', + value: 'docs/refunds.md', + required: true, + }), + expect.objectContaining({ + kind: 'uniqueSourcesGte', + target: 'retrieved-chunks', + op: 'gte', + value: 2, + required: false, + }), + expect.objectContaining({ + kind: 'latencyLte', + target: 'latency', + op: 'lte', + value: 250, + required: false, + }), + expect.objectContaining({ + kind: 'citesRequired', + target: 'grounding', + op: 'present', + value: true, + required: false, + }), + ], + }), + ], + }), + ]); + }); + + test('keeps RAG eval case facts scoped to their parent eval node', () => { + const facts = collectRagSemanticFacts( + parseRoot( + [ + 'corpus name=Docs', + 'retriever name=DocsSearch corpus=Docs', + 'rag name=AnswerDocs retriever=DocsSearch citations=true', + ' grounding requireCitations=true', + ' ragEval name=SupportEval metric=faithfulness threshold=0.85 mode=contract', + ' ragCase name=nested query="nested case"', + ' ragAssert kind=contains value="nested"', + 'ragEval rag=AnswerDocs name=SupportEval metric=faithfulness threshold=0.85 mode=contract', + ' ragCase name=topLevel query="top-level case"', + ' ragAssert kind=contains value="top-level"', + ].join('\n'), + ), + ); + + expect(facts.pipelines[0]?.evals).toEqual([ + expect.objectContaining({ + caseCount: 1, + assertCount: 1, + cases: [expect.objectContaining({ name: 'nested', query: 'nested case' })], + }), + expect.objectContaining({ + caseCount: 1, + assertCount: 1, + cases: [expect.objectContaining({ name: 'topLevel', query: 'top-level case' })], + }), + ]); + }); + test('accepts MCP resource-backed corpus sources as static ingress contracts', () => { const source = [ 'mcp name=Support', @@ -296,6 +424,48 @@ describe('RAG language semantics', () => { ); }); + test('reports invalid RAG eval case and assertion contracts', () => { + const source = [ + 'corpus name=Docs', + 'retriever name=DocsSearch corpus=Docs', + 'rag name=AnswerDocs retriever=DocsSearch', + ' ragEval metric=faithfulness threshold=0.85', + ' ragCase name=missingMode query="What changed?" sources="docs/refunds.md" topK=0 minScore=1.2 chunkCount=-1', + ' ragAssert kind=unknownKind', + ' ragAssert kind=scoreGte threshold=1.5', + ' ragAssert kind=scoreLte', + ' ragAssert kind=chunkHash value=not-a-hash', + ' ragAssert kind=chunkCountEq count=-1', + ' ragAssert kind=latencyLte valueMs=-1', + ' ragAssert kind=sourceEq', + ' ragAssert kind=citesRequired', + 'ragCase name=loose query="outside eval"', + 'ragAssert kind=citesRequired', + ].join('\n'); + + expect(rulesFor(source)).toEqual( + expect.arrayContaining([ + 'rag-eval-name-required', + 'rag-eval-mode-required', + 'rag-case-topk-invalid', + 'rag-case-minscore-invalid', + 'rag-case-chunk-count-invalid', + 'rag-case-sources-require-citations', + 'rag-case-missing-eval', + 'rag-assert-kind-invalid', + 'rag-assert-threshold-required', + 'rag-assert-threshold-invalid', + 'rag-assert-chunk-hash-invalid', + 'rag-assert-count-invalid', + 'rag-assert-value-ms-invalid', + 'rag-assert-value-required', + 'rag-assert-citations-require-grounding', + 'rag-assert-missing-eval', + 'rag-assert-missing-case', + ]), + ); + }); + test('reports invalid MCP retrieval bindings into RAG contracts', () => { const source = [ 'corpus name=Docs', From 99422a5b6b39713f3c546939f9a0f5a7d3238446 Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 22:04:14 +0200 Subject: [PATCH 23/63] feat(core): harden rag eval contracts --- .../conformance-rag-bad-cases.kern | 4 ++ .../conformance-rag-bad-cases.test.kern | 2 + packages/core/src/index.ts | 2 + packages/core/src/rag-assertions.ts | 17 ++++++ packages/core/src/schema.ts | 21 ++++++- packages/core/src/semantic-validator.ts | 60 ++++++++++++++----- packages/core/tests/rag-semantics.test.ts | 32 ++++++++++ packages/core/tests/schema-validation.test.ts | 18 +++++- 8 files changed, 139 insertions(+), 17 deletions(-) create mode 100644 packages/core/src/rag-assertions.ts diff --git a/examples/native-test/conformance-rag-bad-cases.kern b/examples/native-test/conformance-rag-bad-cases.kern index 84e51325..27b4463a 100644 --- a/examples/native-test/conformance-rag-bad-cases.kern +++ b/examples/native-test/conformance-rag-bad-cases.kern @@ -25,6 +25,10 @@ rag name=PlainRag retriever=MismatchRetriever ragEval name=PlainEval metric=faithfulness threshold=0.85 mode=contract ragCase name=needsCitations query="needs cite" sources="docs/refunds.md" ragAssert kind=citesRequired + ragCase name=needsCitations query="duplicate case" + +ragEval rag=PlainRag name=PlainEval metric=faithfulness threshold=0.85 mode=contract + ragCase name=outside query="duplicate eval" grounding rag=MissingRag maxContext=0 ragEval rag=MissingRag threshold=1.1 diff --git a/examples/native-test/conformance-rag-bad-cases.test.kern b/examples/native-test/conformance-rag-bad-cases.test.kern index 83d7c393..5362de69 100644 --- a/examples/native-test/conformance-rag-bad-cases.test.kern +++ b/examples/native-test/conformance-rag-bad-cases.test.kern @@ -11,6 +11,8 @@ test name="Bad RAG conformance" target="./conformance-rag-bad-cases.kern" covera expect has=semanticViolations matches="RAG retriever 'MismatchRetriever' uses embed 'OtherEmbedding'" expect has=semanticViolations matches="RAG pipeline 'BadRag' references unknown retriever 'MissingRetriever'" expect has=semanticViolations matches="RAG pipeline 'BadRag' requires citations" + expect has=semanticViolations matches="Duplicate RAG eval named 'PlainEval'" + expect has=semanticViolations matches="Duplicate RAG eval case named 'needsCitations'" expect has=semanticViolations matches="RAG grounding references unknown rag 'MissingRag'" expect has=semanticViolations matches="RAG grounding maxContext must be a positive integer" expect has=semanticViolations matches="RAG eval references unknown rag 'MissingRag'" diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 3a624744..01b18119 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -427,6 +427,8 @@ export { validatePortablePredicateAST, } from './portable-predicate.js'; export { parsePortableNonNegativeIntLiteral, parsePortablePathSegments } from './portable-route-collection.js'; +export type { RagAssertionKind } from './rag-assertions.js'; +export { RAG_ASSERTION_KIND_SET, RAG_ASSERTION_KINDS } from './rag-assertions.js'; export type { ParserHintsConfig } from './runtime.js'; // Runtime (instance-based state) export { defaultRuntime, KernRuntime } from './runtime.js'; diff --git a/packages/core/src/rag-assertions.ts b/packages/core/src/rag-assertions.ts new file mode 100644 index 00000000..5e812c26 --- /dev/null +++ b/packages/core/src/rag-assertions.ts @@ -0,0 +1,17 @@ +export const RAG_ASSERTION_KINDS = [ + 'factId', + 'chunkHash', + 'scoreGte', + 'scoreLte', + 'contains', + 'sourceEq', + 'sourceGlob', + 'uniqueSourcesGte', + 'chunkCountEq', + 'latencyLte', + 'citesRequired', +] as const; + +export type RagAssertionKind = (typeof RAG_ASSERTION_KINDS)[number]; + +export const RAG_ASSERTION_KIND_SET: ReadonlySet = new Set(RAG_ASSERTION_KINDS); diff --git a/packages/core/src/schema.ts b/packages/core/src/schema.ts index 67ed0bbc..9e456045 100644 --- a/packages/core/src/schema.ts +++ b/packages/core/src/schema.ts @@ -24,6 +24,7 @@ import { import { type KernTarget, VALID_TARGETS } from './config.js'; import { validateCapabilityMetadata, validateImportMetadata } from './import-metadata.js'; import { parsePortablePredicateProp, validatePortablePredicateAST } from './portable-predicate.js'; +import { RAG_ASSERTION_KINDS } from './rag-assertions.js'; import { defaultRuntime, type KernRuntime } from './runtime.js'; import { KERN_VERSION, NODE_TYPES, STYLE_SHORTHANDS, VALUE_SHORTHANDS } from './spec.js'; import type { IRNode } from './types.js'; @@ -43,6 +44,7 @@ export type PropKind = export interface PropSchema { required?: boolean; kind: PropKind; + values?: readonly string[]; } export interface NodeSchema { @@ -2535,7 +2537,7 @@ export const NODE_SCHEMAS: Record = { description: 'RAG evaluation assertion — declares a closed static check over retrieved chunks or grounding.', example: 'ragAssert kind=scoreGte threshold=0.72 required=true', props: { - kind: { required: true, kind: 'identifier' }, + kind: { required: true, kind: 'identifier', values: RAG_ASSERTION_KINDS }, value: { kind: 'string' }, threshold: { kind: 'number' }, count: { kind: 'number' }, @@ -4316,11 +4318,28 @@ function checkAllowedChildren(node: IRNode, schema: NodeSchema, violations: Sche } } +function checkAllowedPropValues(node: IRNode, schema: NodeSchema, violations: SchemaViolation[]): void { + const props = node.props || {}; + for (const [propName, propSchema] of Object.entries(schema.props)) { + if (!propSchema.values || !(propName in props)) continue; + const value = props[propName]; + if (typeof value !== 'string' || !propSchema.values.includes(value)) { + violations.push({ + nodeType: node.type, + message: `'${node.type}' prop '${propName}' must be one of ${propSchema.values.join(', ')}`, + line: node.loc?.line, + col: node.loc?.col, + }); + } + } +} + function validateNode(node: IRNode, violations: SchemaViolation[], parent?: IRNode): void { const schema = Object.hasOwn(NODE_SCHEMAS, node.type) ? NODE_SCHEMAS[node.type] : undefined; if (schema) { checkRequiredProps(node, schema, violations, parent); checkCrossProps(node, violations, parent); + checkAllowedPropValues(node, schema, violations); checkAllowedChildren(node, schema, violations); } if (node.children) { diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index 30e79f67..de067972 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -19,6 +19,7 @@ import { collectExternalImportSymbols, type ExternalImportSymbolTable } from './ import { importRegistryOf } from './import-metadata.js'; import { parseExpression } from './parser-expression.js'; import { splitPortableExpressionList } from './portable-expression-list.js'; +import { RAG_ASSERTION_KIND_SET, RAG_ASSERTION_KINDS } from './rag-assertions.js'; import type { IRNode } from './types.js'; import type { ValueIR } from './value-ir.js'; @@ -1049,6 +1050,8 @@ function validateRagUniqueNames(infos: RagInfos, violations: SemanticViolation[] validateRagUniqueNameSet('embed', infos.embeds, violations); validateRagUniqueNameSet('retriever', infos.retrievers, violations); validateRagUniqueNameSet('rag', infos.pipelines, violations); + validateRagUniqueEvalNames(infos.evals, violations); + validateRagUniqueCaseNames(infos.cases, violations); } function validateRagUniqueNameSet( @@ -1091,6 +1094,46 @@ function validateRagUniqueSourceNames(sources: readonly RagSourceInfo[], violati } } +function validateRagUniqueEvalNames(evals: readonly RagEvalInfo[], violations: SemanticViolation[]): void { + const seen = new Map(); + for (const evaluation of evals) { + const name = stringProp(evaluation.node, 'name'); + if (!name || !evaluation.ragName) continue; + const key = `${evaluation.ragName}:${name}`; + const prev = seen.get(key); + if (prev) { + pushRagViolation( + violations, + 'rag-duplicate-eval-name', + evaluation.node, + `Duplicate RAG eval named '${name}' in rag '${evaluation.ragName}' — first defined at line ${prev.loc?.line ?? '?'}.`, + ); + } else { + seen.set(key, evaluation.node); + } + } +} + +function validateRagUniqueCaseNames(cases: readonly RagCaseInfo[], violations: SemanticViolation[]): void { + const seen = new Map>(); + for (const evaluationCase of cases) { + if (!evaluationCase.name || !evaluationCase.evalNode) continue; + const evalCases = seen.get(evaluationCase.evalNode) ?? new Map(); + const prev = evalCases.get(evaluationCase.name); + if (prev) { + pushRagViolation( + violations, + 'rag-duplicate-case-name', + evaluationCase.node, + `Duplicate RAG eval case named '${evaluationCase.name}' in eval '${evaluationCase.evalName ?? '?'}' — first defined at line ${prev.loc?.line ?? '?'}.`, + ); + } else { + evalCases.set(evaluationCase.name, evaluationCase.node); + seen.set(evaluationCase.evalNode, evalCases); + } + } +} + function validateRagSource( source: RagSourceInfo, mcpResourcesByName: ReadonlyMap, @@ -1526,12 +1569,12 @@ function validateRagAssert( } const kind = stringProp(assertion.node, 'kind'); - if (!kind || !RAG_ASSERT_KINDS.has(kind)) { + if (!kind || !RAG_ASSERTION_KIND_SET.has(kind)) { pushRagViolation( violations, 'rag-assert-kind-invalid', assertion.node, - `RAG assert kind must be one of ${[...RAG_ASSERT_KINDS].join(', ')}.`, + `RAG assert kind must be one of ${RAG_ASSERTION_KINDS.join(', ')}.`, ); return; } @@ -2278,19 +2321,6 @@ interface ClassMemberInfo { const BUILTIN_CLASS_BASES = new Set(['Error']); const RAG_MCP_RETRIEVE_OUTPUT_SHAPE = 'RetrievedChunk[]'; const RAG_MCP_RETRIEVE_OUTPUT_ITEM_SHAPE = 'RetrievedChunk'; -const RAG_ASSERT_KINDS = new Set([ - 'factId', - 'chunkHash', - 'scoreGte', - 'scoreLte', - 'contains', - 'sourceEq', - 'sourceGlob', - 'uniqueSourcesGte', - 'chunkCountEq', - 'latencyLte', - 'citesRequired', -]); const BODY_EXPRESSION_PROPS = [ 'value', 'expr', diff --git a/packages/core/tests/rag-semantics.test.ts b/packages/core/tests/rag-semantics.test.ts index 5771851e..41e81290 100644 --- a/packages/core/tests/rag-semantics.test.ts +++ b/packages/core/tests/rag-semantics.test.ts @@ -651,6 +651,38 @@ describe('RAG language semantics', () => { ); }); + test('reports duplicate RAG eval and case names in their contract namespaces', () => { + const source = [ + 'corpus name=Docs', + 'retriever name=DocsSearch corpus=Docs', + 'rag name=AnswerDocs retriever=DocsSearch', + ' ragEval name=Faithfulness metric=faithfulness threshold=0.85 mode=contract', + ' ragCase name=refunds query="first"', + ' ragCase name=refunds query="duplicate"', + 'ragEval rag=AnswerDocs name=Faithfulness metric=faithfulness threshold=0.9 mode=contract', + ' ragCase name=external query="duplicate eval"', + ].join('\n'); + + expect(rulesFor(source)).toEqual(expect.arrayContaining(['rag-duplicate-eval-name', 'rag-duplicate-case-name'])); + }); + + test('allows RAG eval and case name reuse across separate namespaces', () => { + const source = [ + 'corpus name=Docs', + 'retriever name=DocsSearch corpus=Docs', + 'rag name=AnswerDocs retriever=DocsSearch', + ' ragEval name=Faithfulness metric=faithfulness threshold=0.85 mode=contract', + ' ragCase name=refunds query="answer docs"', + 'rag name=AuditDocs retriever=DocsSearch', + ' ragEval name=Faithfulness metric=faithfulness threshold=0.85 mode=contract', + ' ragCase name=refunds query="audit docs"', + 'ragEval rag=AnswerDocs name=Relevance metric=relevance threshold=0.85 mode=contract', + ' ragCase name=refunds query="same case name, different eval"', + ].join('\n'); + + expect(validateSemantics(parseRoot(source))).toEqual([]); + }); + test('requires chunking source refs to resolve inside the referenced corpus', () => { const source = [ 'corpus name=Docs', diff --git a/packages/core/tests/schema-validation.test.ts b/packages/core/tests/schema-validation.test.ts index 397b8f3c..46242a26 100644 --- a/packages/core/tests/schema-validation.test.ts +++ b/packages/core/tests/schema-validation.test.ts @@ -105,7 +105,9 @@ describe('Schema Validation', () => { 'retriever name=DocsSearch corpus=Docs embed=DocsEmbedding topK=8 minScore=0.72', 'rag name=AnswerDocs retriever=DocsSearch', ' grounding requireCitations=true maxContext=6000', - ' ragEval metric=faithfulness threshold=0.85', + ' ragEval name=Faithfulness metric=faithfulness threshold=0.85 mode=contract', + ' ragCase name=refunds query="How do refunds work?"', + ' ragAssert kind=scoreGte threshold=0.72', ].join('\n'), ); expect(valid).toHaveLength(0); @@ -136,6 +138,20 @@ describe('Schema Validation', () => { expect( nestedEmbed.some((violation) => violation.message.includes("'corpus' does not allow child type 'embed'")), ).toBe(true); + + const invalidAssertKind = validate( + [ + 'corpus name=Docs', + 'retriever name=DocsSearch corpus=Docs', + 'rag name=AnswerDocs retriever=DocsSearch', + ' ragEval name=Faithfulness metric=faithfulness threshold=0.85 mode=contract', + ' ragCase name=refunds query="How do refunds work?"', + ' ragAssert kind=unsupported', + ].join('\n'), + ); + expect( + invalidAssertKind.some((violation) => violation.message.includes("'ragAssert' prop 'kind' must be one of")), + ).toBe(true); }); it('passes explicit foreign handler metadata', () => { From 0d7b5f3ec030d796445880934a0e4994d1a86f2d Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 22:22:55 +0200 Subject: [PATCH 24/63] feat(core): add in-memory rag runtime --- packages/core/src/index.ts | 15 ++ packages/core/src/rag-runtime.ts | 185 ++++++++++++++++++++++++ packages/core/tests/rag-runtime.test.ts | 167 +++++++++++++++++++++ 3 files changed, 367 insertions(+) create mode 100644 packages/core/src/rag-runtime.ts create mode 100644 packages/core/tests/rag-runtime.test.ts diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 01b18119..e4364a0f 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -429,6 +429,21 @@ export { export { parsePortableNonNegativeIntLiteral, parsePortablePathSegments } from './portable-route-collection.js'; export type { RagAssertionKind } from './rag-assertions.js'; export { RAG_ASSERTION_KIND_SET, RAG_ASSERTION_KINDS } from './rag-assertions.js'; +export type { + InMemoryRagRetriever, + RagChunkInput, + RagCitation, + RetrievedChunk, + RetrieveOptions, + RetrieveResult, +} from './rag-runtime.js'; +export { + createInMemoryRetriever, + InMemoryRagCorpus, + MAX_IN_MEMORY_RAG_TOP_K, + retrieveFromInMemoryCorpus, + tokenizeForRetrieval, +} from './rag-runtime.js'; export type { ParserHintsConfig } from './runtime.js'; // Runtime (instance-based state) export { defaultRuntime, KernRuntime } from './runtime.js'; diff --git a/packages/core/src/rag-runtime.ts b/packages/core/src/rag-runtime.ts new file mode 100644 index 00000000..74999209 --- /dev/null +++ b/packages/core/src/rag-runtime.ts @@ -0,0 +1,185 @@ +export interface RagCitation { + readonly uri?: string; + readonly locator?: string; +} + +export interface RagChunkInput { + readonly id: string; + readonly text: string; + readonly source: string; + readonly citation?: RagCitation; + readonly metadata?: Record; +} + +export interface RetrievedChunk { + readonly id: string; + readonly text: string; + readonly score: number; + readonly source: string; + readonly citation: RagCitation; + readonly metadata?: Record; +} + +export interface RetrieveOptions { + readonly topK?: number; + readonly minScore?: number; +} + +export interface RetrieveResult { + readonly query: string; + readonly chunks: RetrievedChunk[]; +} + +export type InMemoryRagRetriever = (query: string, options?: RetrieveOptions) => RetrieveResult; + +export const MAX_IN_MEMORY_RAG_TOP_K = 1000; + +interface StoredRagChunk { + readonly chunk: RagChunkInput; + readonly terms: ReadonlySet; +} + +export class InMemoryRagCorpus { + private readonly chunks = new Map(); + + constructor(chunks: Iterable = []) { + for (const chunk of chunks) this.add(chunk); + } + + get size(): number { + return this.chunks.size; + } + + add(chunk: RagChunkInput): void { + if (typeof chunk.id !== 'string' || !chunk.id.trim()) { + throw new Error('KERN RAG runtime chunk id must be a non-empty string.'); + } + if (typeof chunk.text !== 'string' || !chunk.text.trim()) { + throw new Error(`KERN RAG runtime chunk '${chunk.id}' text must be a non-empty string.`); + } + if (typeof chunk.source !== 'string' || !chunk.source.trim()) { + throw new Error(`KERN RAG runtime chunk '${chunk.id}' source must be a non-empty string.`); + } + const storedChunk = { + ...chunk, + citation: chunk.citation ? { ...chunk.citation } : undefined, + metadata: chunk.metadata ? cloneMetadata(chunk.metadata) : undefined, + }; + this.chunks.set(chunk.id, { chunk: storedChunk, terms: tokenizeForRetrieval(storedChunk.text) }); + } + + get(id: string): RagChunkInput | undefined { + const stored = this.chunks.get(id); + return stored ? cloneChunkInput(stored.chunk) : undefined; + } + + all(): RagChunkInput[] { + return Array.from(this.chunks.values(), (stored) => cloneChunkInput(stored.chunk)); + } + + retrieve(query: string, options: RetrieveOptions = {}): RetrieveResult { + if (typeof query !== 'string') throw new Error('KERN RAG runtime query must be a string.'); + const { topK, minScore } = normalizeRetrieveOptions(options); + const queryTerms = tokenizeForRetrieval(query); + if (queryTerms.size === 0) return { query, chunks: [] }; + + const chunks = Array.from(this.chunks.values()) + .map((stored) => ({ chunk: stored.chunk, score: jaccardScore(queryTerms, stored.terms) })) + .filter((candidate) => candidate.score > 0 && candidate.score >= minScore) + .sort((a, b) => b.score - a.score || a.chunk.id.localeCompare(b.chunk.id)) + .slice(0, topK) + .map(({ chunk, score }) => retrievedChunk(chunk, score)); + + return { query, chunks }; + } +} + +export function createInMemoryRetriever(corpus: InMemoryRagCorpus): InMemoryRagRetriever { + return (query: string, options: RetrieveOptions = {}): RetrieveResult => corpus.retrieve(query, options); +} + +export function retrieveFromInMemoryCorpus( + corpus: InMemoryRagCorpus, + query: string, + options: RetrieveOptions = {}, +): RetrieveResult { + return corpus.retrieve(query, options); +} + +function normalizeRetrieveOptions(options: RetrieveOptions): Required { + const topK = options.topK ?? 5; + const minScore = options.minScore ?? 0; + if (!Number.isInteger(topK) || topK <= 0 || topK > MAX_IN_MEMORY_RAG_TOP_K) { + throw new Error(`KERN RAG runtime topK must be a positive integer up to ${MAX_IN_MEMORY_RAG_TOP_K}.`); + } + if (!Number.isFinite(minScore) || minScore < 0 || minScore > 1) { + throw new Error('KERN RAG runtime minScore must be between 0 and 1.'); + } + return { topK, minScore }; +} + +export function tokenizeForRetrieval(value: string): ReadonlySet { + return new Set(value.toLowerCase().match(/[\p{L}\p{N}]+/gu) ?? []); +} + +function jaccardScore(queryTerms: ReadonlySet, chunkTerms: ReadonlySet): number { + if (queryTerms.size === 0 || chunkTerms.size === 0) return 0; + let intersection = 0; + for (const term of queryTerms) { + if (chunkTerms.has(term)) intersection += 1; + } + const union = queryTerms.size + chunkTerms.size - intersection; + return union === 0 ? 0 : intersection / union; +} + +function retrievedChunk(chunk: RagChunkInput, score: number): RetrievedChunk { + return { + id: chunk.id, + text: chunk.text, + score, + source: chunk.source, + citation: chunk.citation ? { ...chunk.citation } : { uri: chunk.source }, + ...(chunk.metadata ? { metadata: cloneMetadata(chunk.metadata) } : {}), + }; +} + +function cloneChunkInput(chunk: RagChunkInput): RagChunkInput { + return { + ...chunk, + citation: chunk.citation ? { ...chunk.citation } : undefined, + metadata: chunk.metadata ? cloneMetadata(chunk.metadata) : undefined, + }; +} + +function cloneMetadata(metadata: Record): Record { + return cloneMetadataValue(metadata, new WeakMap()) as Record; +} + +function cloneMetadataValue(value: unknown, seen: WeakMap): unknown { + if (Array.isArray(value)) { + const existing = seen.get(value); + if (existing) return existing; + const out: unknown[] = []; + seen.set(value, out); + for (const item of value) out.push(cloneMetadataValue(item, seen)); + return out; + } + if (isPlainMetadataObject(value)) { + const existing = seen.get(value); + if (existing) return existing; + const out: Record = {}; + seen.set(value, out); + for (const [key, entry] of Object.entries(value)) { + if (key === '__proto__' || key === 'constructor' || key === 'prototype') continue; + out[key] = cloneMetadataValue(entry, seen); + } + return out; + } + return value; +} + +function isPlainMetadataObject(value: unknown): value is Record { + if (value === null || typeof value !== 'object') return false; + const prototype = Object.getPrototypeOf(value); + return prototype === Object.prototype || prototype === null; +} diff --git a/packages/core/tests/rag-runtime.test.ts b/packages/core/tests/rag-runtime.test.ts new file mode 100644 index 00000000..28bca45e --- /dev/null +++ b/packages/core/tests/rag-runtime.test.ts @@ -0,0 +1,167 @@ +import { + createInMemoryRetriever, + InMemoryRagCorpus, + MAX_IN_MEMORY_RAG_TOP_K, + retrieveFromInMemoryCorpus, + tokenizeForRetrieval, +} from '../src/index.js'; + +describe('RAG in-memory runtime retrieval', () => { + test('ranks exact lexical matches first', () => { + const corpus = new InMemoryRagCorpus([ + { id: 'b', text: 'refund policy', source: 'docs/refunds.md' }, + { id: 'a', text: 'shipping policy', source: 'docs/shipping.md' }, + ]); + + const result = retrieveFromInMemoryCorpus(corpus, 'refund policy'); + + expect(result.query).toBe('refund policy'); + expect(result.chunks[0]).toEqual( + expect.objectContaining({ + id: 'b', + score: 1, + source: 'docs/refunds.md', + citation: { uri: 'docs/refunds.md' }, + }), + ); + }); + + test('limits results by topK', () => { + const corpus = new InMemoryRagCorpus( + Array.from({ length: 10 }, (_, index) => ({ + id: `chunk-${index}`, + text: `refund policy ${index}`, + source: `docs/${index}.md`, + })), + ); + + expect(retrieveFromInMemoryCorpus(corpus, 'refund policy', { topK: 3 }).chunks).toHaveLength(3); + }); + + test('filters results by minScore', () => { + const corpus = new InMemoryRagCorpus([ + { id: 'weak', text: 'refund unrelated unrelated unrelated', source: 'docs/weak.md' }, + { id: 'none', text: 'shipping delivery', source: 'docs/none.md' }, + ]); + + expect(retrieveFromInMemoryCorpus(corpus, 'refund policy', { minScore: 0.5 }).chunks).toEqual([]); + }); + + test('orders results by descending score', () => { + const corpus = new InMemoryRagCorpus([ + { id: 'partial', text: 'refund shipping', source: 'docs/partial.md' }, + { id: 'exact', text: 'refund policy', source: 'docs/exact.md' }, + { id: 'weak', text: 'refund shipping returns', source: 'docs/weak.md' }, + ]); + + const scores = retrieveFromInMemoryCorpus(corpus, 'refund policy').chunks.map((chunk) => chunk.score); + + expect(scores.length).toBeGreaterThan(1); + for (let index = 0; index < scores.length - 1; index += 1) { + expect(scores[index]).toBeGreaterThanOrEqual(scores[index + 1]); + } + }); + + test('breaks score ties by chunk id deterministically', () => { + const corpus = new InMemoryRagCorpus([ + { id: 'b', text: 'refund', source: 'docs/b.md' }, + { id: 'a', text: 'refund', source: 'docs/a.md' }, + ]); + const retrieve = createInMemoryRetriever(corpus); + + expect(retrieve('refund').chunks.map((chunk) => chunk.id)).toEqual(['a', 'b']); + expect(retrieve('refund').chunks.map((chunk) => chunk.id)).toEqual(['a', 'b']); + }); + + test('returns empty results for empty corpus and empty queries', () => { + const empty = new InMemoryRagCorpus(); + expect(retrieveFromInMemoryCorpus(empty, 'refund').chunks).toEqual([]); + + const corpus = new InMemoryRagCorpus([{ id: 'refunds', text: 'refund policy', source: 'docs/refunds.md' }]); + expect(retrieveFromInMemoryCorpus(corpus, ' ').chunks).toEqual([]); + }); + + test('preserves citation and metadata provenance', () => { + const corpus = new InMemoryRagCorpus([ + { + id: 'refunds', + text: 'refund policy', + source: 'docs/refunds.md', + citation: { uri: 'file:///docs/refunds.md', locator: 'L10-L20' }, + metadata: { section: 'policy' }, + }, + ]); + + expect(retrieveFromInMemoryCorpus(corpus, 'refund policy').chunks[0]).toEqual( + expect.objectContaining({ + id: 'refunds', + citation: { uri: 'file:///docs/refunds.md', locator: 'L10-L20' }, + metadata: { section: 'policy' }, + }), + ); + }); + + test('returns defensive copies from corpus reads', () => { + const circularMetadata: Record = { section: 'policy', nested: { owner: 'support' } }; + circularMetadata.self = circularMetadata; + const corpus = new InMemoryRagCorpus([ + { + id: 'refunds', + text: 'refund policy', + source: 'docs/refunds.md', + citation: { uri: 'docs/refunds.md' }, + metadata: circularMetadata, + }, + ]); + + const snapshot = corpus.get('refunds'); + if (!snapshot) throw new Error('missing fixture chunk'); + (snapshot.metadata as Record).section = 'mutated'; + ((snapshot.metadata as Record).nested as Record).owner = 'mutated'; + (snapshot.citation as Record).uri = 'mutated'; + + expect(corpus.retrieve('refund policy').chunks[0]).toEqual( + expect.objectContaining({ + citation: { uri: 'docs/refunds.md' }, + metadata: expect.objectContaining({ section: 'policy', nested: { owner: 'support' } }), + }), + ); + }); + + test('upserts chunks by id without changing retrieval determinism', () => { + const corpus = new InMemoryRagCorpus([{ id: 'refunds', text: 'old refund policy', source: 'docs/old.md' }]); + + corpus.add({ id: 'refunds', text: 'updated return policy', source: 'docs/new.md' }); + + expect(corpus.size).toBe(1); + expect(corpus.retrieve('updated return policy').chunks[0]).toEqual( + expect.objectContaining({ id: 'refunds', source: 'docs/new.md' }), + ); + }); + + test('validates retrieval options and chunk identity inputs', () => { + const corpus = new InMemoryRagCorpus([{ id: 'refunds', text: 'refund policy', source: 'docs/refunds.md' }]); + + expect(() => retrieveFromInMemoryCorpus(corpus, 'refund', { topK: 0 })).toThrow('topK'); + expect(() => retrieveFromInMemoryCorpus(corpus, 'refund', { topK: MAX_IN_MEMORY_RAG_TOP_K + 1 })).toThrow('topK'); + expect(() => retrieveFromInMemoryCorpus(corpus, 'refund', { minScore: 1.1 })).toThrow('minScore'); + expect(() => retrieveFromInMemoryCorpus(corpus, 1 as unknown as string)).toThrow('query'); + expect(() => corpus.add({ id: ' ', text: 'bad', source: 'docs/bad.md' })).toThrow('chunk id'); + expect(() => corpus.add({ id: 'bad', text: ' ', source: 'docs/bad.md' })).toThrow('text'); + expect(() => corpus.add({ id: 'bad', text: 'bad', source: ' ' })).toThrow('source'); + expect(() => + corpus.add({ id: 1, text: 'bad', source: 'docs/bad.md' } as unknown as Parameters[0]), + ).toThrow('chunk id'); + }); + + test('tokenizes Unicode text for non-English retrieval', () => { + const corpus = new InMemoryRagCorpus([ + { id: 'resume', text: 'résumé policy', source: 'docs/resume.md' }, + { id: 'jp', text: '日本語 ガイド', source: 'docs/jp.md' }, + ]); + + expect([...tokenizeForRetrieval('résumé 日本語')]).toEqual(['résumé', '日本語']); + expect(retrieveFromInMemoryCorpus(corpus, 'résumé').chunks[0]?.id).toBe('resume'); + expect(retrieveFromInMemoryCorpus(corpus, '日本語').chunks[0]?.id).toBe('jp'); + }); +}); From 970bb843c293f83e2fb9d12a6b9c396341ce3554 Mon Sep 17 00:00:00 2001 From: cukas Date: Mon, 8 Jun 2026 23:13:40 +0200 Subject: [PATCH 25/63] feat(core): evaluate rag runtime contracts --- .../kernlang-typescript-surface.kern | 1 + packages/core/src/index.ts | 8 + packages/core/src/rag-runtime.ts | 498 +++++++++++++++++- packages/core/tests/rag-runtime.test.ts | 252 +++++++++ packages/core/tests/type-guards.test.ts | 2 +- 5 files changed, 759 insertions(+), 2 deletions(-) diff --git a/packages/core/native-test/kernlang-typescript-surface.kern b/packages/core/native-test/kernlang-typescript-surface.kern index 40f73103..e1ba92cb 100644 --- a/packages/core/native-test/kernlang-typescript-surface.kern +++ b/packages/core/native-test/kernlang-typescript-surface.kern @@ -71,6 +71,7 @@ fn name=assertUser params="x:unknown" returns="asserts x is User" throw value="new Error(\"not a user\")" class name=Account + field name=role type=string method name=isAdmin returns="this is AdminUser" handler return value="this.role === \"admin\"" diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index e4364a0f..3fa0ae67 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -433,12 +433,20 @@ export type { InMemoryRagRetriever, RagChunkInput, RagCitation, + RagContractRetriever, + RagEvalAssertionCode, + RagEvalAssertionResult, + RagEvalCaseResult, + RagEvalContractOptions, + RagEvalContractResult, RetrievedChunk, RetrieveOptions, RetrieveResult, } from './rag-runtime.js'; export { createInMemoryRetriever, + evaluateRagEvalContract, + hashRetrievedChunkText, InMemoryRagCorpus, MAX_IN_MEMORY_RAG_TOP_K, retrieveFromInMemoryCorpus, diff --git a/packages/core/src/rag-runtime.ts b/packages/core/src/rag-runtime.ts index 74999209..8bcb6294 100644 --- a/packages/core/src/rag-runtime.ts +++ b/packages/core/src/rag-runtime.ts @@ -1,3 +1,5 @@ +import type { RagSemanticEvalAssertFact, RagSemanticEvalCaseFact, RagSemanticEvalFact } from './semantic-validator.js'; + export interface RagCitation { readonly uri?: string; readonly locator?: string; @@ -31,9 +33,54 @@ export interface RetrieveResult { } export type InMemoryRagRetriever = (query: string, options?: RetrieveOptions) => RetrieveResult; +export type RagContractRetriever = (query: string, options?: RetrieveOptions) => RetrieveResult; export const MAX_IN_MEMORY_RAG_TOP_K = 1000; +export type RagEvalAssertionCode = + | 'PASS' + | 'ASSERTION_FAIL' + | 'INVALID_ASSERTION' + | 'RETRIEVER_ERROR' + | 'UNSUPPORTED_ASSERTION'; + +export interface RagEvalContractOptions { + readonly sourceGlobCaseSensitive?: boolean; + readonly now?: () => number; +} + +export interface RagEvalAssertionResult { + readonly kind: string; + readonly required?: boolean; + readonly passed: boolean; + readonly code: RagEvalAssertionCode; + readonly message: string; + readonly expected?: unknown; + readonly actual?: unknown; +} + +export interface RagEvalCaseResult { + readonly name: string; + readonly query: string; + readonly passed: boolean; + readonly durationMs: number; + readonly retrieveOptions: RetrieveOptions; + readonly chunks: readonly RetrievedChunk[]; + readonly assertions: readonly RagEvalAssertionResult[]; +} + +export interface RagEvalContractResult { + readonly passed: boolean; + readonly ragName?: string; + readonly evalName?: string; + readonly caseCount: number; + readonly passedCaseCount: number; + readonly assertionCount: number; + readonly passedAssertionCount: number; + readonly durationMs: number; + readonly cases: readonly RagEvalCaseResult[]; +} + interface StoredRagChunk { readonly chunk: RagChunkInput; readonly terms: ReadonlySet; @@ -106,6 +153,47 @@ export function retrieveFromInMemoryCorpus( return corpus.retrieve(query, options); } +export function evaluateRagEvalContract( + evaluation: RagSemanticEvalFact, + retriever: RagContractRetriever, + options: RagEvalContractOptions = {}, +): RagEvalContractResult { + const startedAt = runtimeNow(options); + const cases = (evaluation.cases ?? []).map((evaluationCase) => + evaluateRagCase(evaluation, evaluationCase, retriever, options), + ); + const assertionCount = cases.reduce((count, evaluationCase) => count + evaluationCase.assertions.length, 0); + const passedAssertionCount = cases.reduce( + (count, evaluationCase) => count + evaluationCase.assertions.filter((assertion) => assertion.passed).length, + 0, + ); + return { + // Empty eval contracts fail closed; a vacuous pass would hide unconfigured evals. + passed: cases.length > 0 && cases.every((evaluationCase) => evaluationCase.passed), + ...optionalStringValue('ragName', evaluation.ragName), + ...optionalStringValue('evalName', evaluation.name), + caseCount: cases.length, + passedCaseCount: cases.filter((evaluationCase) => evaluationCase.passed).length, + assertionCount, + passedAssertionCount, + durationMs: runtimeNow(options) - startedAt, + cases, + }; +} + +export function hashRetrievedChunkText(text: string): string { + let left = 0xcbf29ce484222325n; + let right = 0x84222325cbf29ce4n; + for (const byte of new TextEncoder().encode(text)) { + const value = BigInt(byte); + left ^= value; + left = BigInt.asUintN(64, left * 0x100000001b3n); + right ^= value + 0x9en; + right = BigInt.asUintN(64, right * 0x100000001b3n); + } + return `${left.toString(16).padStart(16, '0')}${right.toString(16).padStart(16, '0')}`; +} + function normalizeRetrieveOptions(options: RetrieveOptions): Required { const topK = options.topK ?? 5; const minScore = options.minScore ?? 0; @@ -118,8 +206,416 @@ function normalizeRetrieveOptions(options: RetrieveOptions): Required + evaluateRagAssertion(evaluation, evaluationCase, assertion, chunks, durationMs, options), + ), + ]; + } + return { + name: evaluationCase.name, + query: evaluationCase.query, + passed: assertions.every(isPassingOrAdvisoryAssertion), + durationMs, + retrieveOptions, + chunks, + assertions, + }; +} + +function caseRetrieveOptions(evaluationCase: RagSemanticEvalCaseFact): RetrieveOptions { + return { + ...optionalNumberValue('topK', evaluationCase.expected?.topK), + ...optionalNumberValue('minScore', evaluationCase.expected?.minScore), + }; +} + +function evaluateExpectedCaseContracts( + evaluationCase: RagSemanticEvalCaseFact, + chunks: readonly RetrievedChunk[], +): RagEvalAssertionResult[] { + const results: RagEvalAssertionResult[] = []; + const { topK, minScore, chunkCount, sources } = evaluationCase.expected ?? {}; + if (topK !== undefined) { + results.push( + assertionResult('expected.topK', chunks.length <= topK, `expected at most ${topK} chunks`, topK, chunks.length), + ); + } + if (minScore !== undefined) { + const actual = + chunks.length === 0 + ? 0 + : chunks.reduce((minimumScore, chunk) => Math.min(minimumScore, chunk.score), Number.POSITIVE_INFINITY); + results.push( + assertionResult( + 'expected.minScore', + chunks.length > 0 && chunks.every((chunk) => chunk.score >= minScore), + `expected all retrieved chunks to score >= ${minScore}`, + minScore, + actual, + ), + ); + } + if (chunkCount !== undefined) { + results.push( + assertionResult( + 'expected.chunkCount', + chunks.length === chunkCount, + `expected ${chunkCount} chunks`, + chunkCount, + chunks.length, + ), + ); + } + if (sources?.length) { + const actualSources = [...new Set(chunks.map((chunk) => chunk.source))].sort(); + const expectedSources = [...sources].sort(); + const allowed = new Set(expectedSources); + results.push( + assertionResult( + 'expected.sources', + expectedSources.every((source) => actualSources.includes(source)) && + chunks.every((chunk) => allowed.has(chunk.source)), + `expected retrieved chunks to cover only sources ${expectedSources.join(', ')}`, + expectedSources, + actualSources, + ), + ); + } + return results; +} + +function evaluateRagAssertion( + evaluation: RagSemanticEvalFact, + evaluationCase: RagSemanticEvalCaseFact, + assertion: RagSemanticEvalAssertFact, + chunks: readonly RetrievedChunk[], + durationMs: number, + options: RagEvalContractOptions, +): RagEvalAssertionResult { + switch (assertion.kind) { + case 'scoreGte': + return withRagAssertRequired( + assertion, + numericChunkAssertion(assertion, chunks, (chunk, value) => chunk.score >= value, 'score >='), + ); + case 'scoreLte': + return withRagAssertRequired( + assertion, + numericChunkAssertion(assertion, chunks, (chunk, value) => chunk.score <= value, 'score <='), + ); + case 'contains': + return withRagAssertRequired( + assertion, + stringChunkAssertion( + assertion, + chunks, + 'text', + (chunk, value) => chunk.text.toLowerCase().includes(value.toLowerCase()), + 'text contains', + ), + ); + case 'sourceEq': + return withRagAssertRequired( + assertion, + stringChunkAssertion(assertion, chunks, 'source', (chunk, value) => chunk.source === value, 'source equals'), + ); + case 'sourceGlob': + return withRagAssertRequired( + assertion, + stringChunkAssertion( + assertion, + chunks, + 'source', + (chunk, value) => globMatches(value, chunk.source, options.sourceGlobCaseSensitive ?? false), + 'source matches', + ), + ); + case 'uniqueSourcesGte': { + const expected = numberAssertionValue(assertion); + if (expected === undefined) return invalidAssertionResult(assertion, 'requires numeric value.'); + const actual = new Set(chunks.map((chunk) => chunk.source)).size; + return withRagAssertRequired( + assertion, + assertionResult( + assertion.kind, + actual >= expected, + `expected at least ${expected} unique sources`, + expected, + actual, + ), + ); + } + case 'chunkCountEq': { + const expected = numberAssertionValue(assertion); + if (expected === undefined) return invalidAssertionResult(assertion, 'requires numeric value.'); + return withRagAssertRequired( + assertion, + assertionResult( + assertion.kind, + chunks.length === expected, + `expected exactly ${expected} chunks`, + expected, + chunks.length, + ), + ); + } + case 'citesRequired': { + const actual = chunks.length > 0 && chunks.every((chunk) => !!chunk.citation.uri || !!chunk.citation.locator); + return withRagAssertRequired( + assertion, + assertionResult(assertion.kind, actual, 'expected every chunk to carry citation data', true, actual), + ); + } + case 'factId': { + const expected = stringAssertionValue(assertion); + if (expected === undefined) return invalidAssertionResult(assertion, 'requires string value.'); + const actual = ragEvalCaseFactId(evaluation, evaluationCase); + return withRagAssertRequired( + assertion, + assertionResult(assertion.kind, actual === expected, `expected fact id ${expected}`, expected, actual), + ); + } + case 'chunkHash': { + const expected = stringAssertionValue(assertion); + if (expected === undefined) return invalidAssertionResult(assertion, 'requires string value.'); + const actual = chunks.map((chunk) => hashRetrievedChunkText(chunk.text)); + return withRagAssertRequired( + assertion, + assertionResult( + assertion.kind, + actual.includes(expected), + `expected retrieved chunk hash ${expected}`, + expected, + actual, + ), + ); + } + case 'latencyLte': { + const expected = numberAssertionValue(assertion); + if (expected === undefined) return invalidAssertionResult(assertion, 'requires numeric value.'); + return withRagAssertRequired( + assertion, + assertionResult( + assertion.kind, + durationMs <= expected, + `expected retrieval latency <= ${expected}ms`, + expected, + durationMs, + ), + ); + } + default: + return { + kind: assertion.kind, + required: assertion.required, + passed: false, + code: 'UNSUPPORTED_ASSERTION', + message: `Unsupported RAG eval assertion kind '${assertion.kind}'.`, + }; + } +} + +function numericChunkAssertion( + assertion: RagSemanticEvalAssertFact, + chunks: readonly RetrievedChunk[], + check: (chunk: RetrievedChunk, value: number) => boolean, + label: string, +): RagEvalAssertionResult { + const expected = numberAssertionValue(assertion); + if (expected === undefined) return invalidAssertionResult(assertion, 'requires numeric value.'); + const actual = chunks.map((chunk) => chunk.score); + return assertionResult( + assertion.kind, + chunks.length > 0 && chunks.every((chunk) => check(chunk, expected)), + `expected every retrieved chunk ${label} ${expected}`, + expected, + actual, + ); +} + +function stringChunkAssertion( + assertion: RagSemanticEvalAssertFact, + chunks: readonly RetrievedChunk[], + actualField: 'source' | 'text', + check: (chunk: RetrievedChunk, value: string) => boolean, + label: string, +): RagEvalAssertionResult { + const expected = stringAssertionValue(assertion); + if (expected === undefined) return invalidAssertionResult(assertion, 'requires non-empty string value.'); + const actual = chunks.map((chunk) => (actualField === 'source' ? chunk.source : chunk.text)); + return assertionResult( + assertion.kind, + chunks.some((chunk) => check(chunk, expected)), + `expected a retrieved chunk ${label} ${expected}`, + expected, + actual, + ); +} + +function invalidAssertionResult(assertion: RagSemanticEvalAssertFact, reason: string): RagEvalAssertionResult { + return { + kind: assertion.kind, + required: assertion.required, + passed: false, + code: 'INVALID_ASSERTION', + message: `RAG eval assertion kind=${assertion.kind} ${reason}`, + }; +} + +function assertionResult( + kind: string, + passed: boolean, + message: string, + expected?: unknown, + actual?: unknown, +): RagEvalAssertionResult { + return { + kind, + passed, + code: passed ? 'PASS' : 'ASSERTION_FAIL', + message, + ...optionalAssertionValue('expected', expected), + ...optionalAssertionValue('actual', actual), + }; +} + +function numberAssertionValue(assertion: RagSemanticEvalAssertFact): number | undefined { + return typeof assertion.value === 'number' && Number.isFinite(assertion.value) ? assertion.value : undefined; +} + +function stringAssertionValue(assertion: RagSemanticEvalAssertFact): string | undefined { + return typeof assertion.value === 'string' && assertion.value.length > 0 ? assertion.value : undefined; +} + +function withRagAssertRequired( + assertion: RagSemanticEvalAssertFact, + result: RagEvalAssertionResult, +): RagEvalAssertionResult { + return { ...result, required: assertion.required }; +} + +function isPassingOrAdvisoryAssertion(assertion: RagEvalAssertionResult): boolean { + return assertion.passed || (assertion.required === false && assertion.code === 'ASSERTION_FAIL'); +} + +function ragEvalCaseFactId(evaluation: RagSemanticEvalFact, evaluationCase: RagSemanticEvalCaseFact): string { + return [evaluationCase.ragName ?? evaluation.ragName, evaluationCase.evalName ?? evaluation.name, evaluationCase.name] + .filter((part): part is string => !!part) + .join(':'); +} + +function globMatches(pattern: string, value: string, caseSensitive: boolean): boolean { + const normalizedPattern = caseSensitive ? pattern : pattern.toLowerCase(); + const normalizedValue = caseSensitive ? value : value.toLowerCase(); + return wildcardMatches(normalizedPattern, normalizedValue); +} + +function wildcardMatches(pattern: string, value: string): boolean { + let patternIndex = 0; + let valueIndex = 0; + let starIndex = -1; + let starValueIndex = 0; + while (valueIndex < value.length) { + if ( + patternIndex < pattern.length && + (pattern[patternIndex] === '?' || pattern[patternIndex] === value[valueIndex]) + ) { + patternIndex += 1; + valueIndex += 1; + } else if (patternIndex < pattern.length && pattern[patternIndex] === '*') { + starIndex = patternIndex; + starValueIndex = valueIndex; + patternIndex += 1; + } else if (starIndex !== -1) { + patternIndex = starIndex + 1; + starValueIndex += 1; + valueIndex = starValueIndex; + } else { + return false; + } + } + while (patternIndex < pattern.length && pattern[patternIndex] === '*') patternIndex += 1; + return patternIndex === pattern.length; +} + +function runtimeNow(options: RagEvalContractOptions): number { + return options.now?.() ?? Date.now(); +} + +function optionalStringValue(key: string, value: string | undefined): Record { + return value === undefined ? {} : { [key]: value }; +} + +function optionalNumberValue(key: string, value: number | undefined): Record { + return value === undefined ? {} : { [key]: value }; +} + +function optionalAssertionValue(key: 'expected' | 'actual', value: unknown): Record { + return value === undefined ? {} : { [key]: value }; +} + +function validateRetrieveResult(result: RetrieveResult): RetrieveResult { + if (!result || !Array.isArray(result.chunks)) throw new Error('retriever result must include chunks array.'); + for (const [index, chunk] of result.chunks.entries()) { + if ( + !chunk || + typeof chunk.id !== 'string' || + typeof chunk.text !== 'string' || + typeof chunk.score !== 'number' || + !Number.isFinite(chunk.score) || + chunk.score < 0 || + chunk.score > 1 || + typeof chunk.source !== 'string' || + !isValidCitation(chunk.citation) + ) { + throw new Error(`retriever chunk at index ${index} is not a RetrievedChunk.`); + } + } + return result; +} + +function isValidCitation(value: unknown): value is RagCitation { + if (!value || typeof value !== 'object' || Array.isArray(value)) return false; + const citation = value as RagCitation; + return ( + (citation.uri === undefined || typeof citation.uri === 'string') && + (citation.locator === undefined || typeof citation.locator === 'string') + ); +} + export function tokenizeForRetrieval(value: string): ReadonlySet { - return new Set(value.toLowerCase().match(/[\p{L}\p{N}]+/gu) ?? []); + return new Set( + value + .normalize('NFKC') + .toLowerCase() + .match(/[\p{L}\p{M}\p{N}]+/gu) ?? [], + ); } function jaccardScore(queryTerms: ReadonlySet, chunkTerms: ReadonlySet): number { diff --git a/packages/core/tests/rag-runtime.test.ts b/packages/core/tests/rag-runtime.test.ts index 28bca45e..3bebf945 100644 --- a/packages/core/tests/rag-runtime.test.ts +++ b/packages/core/tests/rag-runtime.test.ts @@ -1,5 +1,8 @@ +import type { RagSemanticEvalFact } from '../src/index.js'; import { createInMemoryRetriever, + evaluateRagEvalContract, + hashRetrievedChunkText, InMemoryRagCorpus, MAX_IN_MEMORY_RAG_TOP_K, retrieveFromInMemoryCorpus, @@ -161,7 +164,256 @@ describe('RAG in-memory runtime retrieval', () => { ]); expect([...tokenizeForRetrieval('résumé 日本語')]).toEqual(['résumé', '日本語']); + expect([...tokenizeForRetrieval('résumé')]).toEqual(['résumé']); expect(retrieveFromInMemoryCorpus(corpus, 'résumé').chunks[0]?.id).toBe('resume'); + expect(retrieveFromInMemoryCorpus(corpus, 'résumé').chunks[0]?.id).toBe('resume'); expect(retrieveFromInMemoryCorpus(corpus, '日本語').chunks[0]?.id).toBe('jp'); }); }); + +describe('RAG eval runtime contracts', () => { + test('evaluates passing RAG eval cases against retrieved chunks', () => { + const corpus = new InMemoryRagCorpus([ + { + id: 'refunds', + text: 'refund policy', + source: 'docs/refunds.md', + citation: { uri: 'docs/refunds.md', locator: 'L1-L2' }, + metadata: { section: 'policy' }, + }, + { + id: 'policy', + text: 'policy details', + source: 'docs/policies.md', + citation: { uri: 'docs/policies.md' }, + }, + ]); + const refundHash = hashRetrievedChunkText('refund policy'); + expect(refundHash).toMatch(/^[a-f0-9]{32}$/); + const evalFact: RagSemanticEvalFact = { + name: 'Faithfulness', + ragName: 'AnswerDocs', + mode: 'contract', + cases: [ + { + name: 'refunds', + ragName: 'AnswerDocs', + evalName: 'Faithfulness', + query: 'refund policy', + tags: ['smoke'], + expected: { topK: 1, minScore: 0.25, sources: ['docs/refunds.md'] }, + asserts: [ + assertFact('scoreGte', 0.25), + assertFact('sourceGlob', 'docs/*.md'), + assertFact('contains', 'refund'), + assertFact('uniqueSourcesGte', 1), + assertFact('chunkCountEq', 1), + assertFact('citesRequired', true), + assertFact('factId', 'AnswerDocs:Faithfulness:refunds'), + assertFact('chunkHash', refundHash), + assertFact('latencyLte', 1), + ], + }, + ], + }; + let now = 10; + + const result = evaluateRagEvalContract(evalFact, createInMemoryRetriever(corpus), { + now: () => now++, + }); + + expect(result.passed).toBe(true); + expect(result.caseCount).toBe(1); + expect(result.passedAssertionCount).toBe(result.assertionCount); + expect(result.cases[0]?.retrieveOptions).toEqual({ topK: 1, minScore: 0.25 }); + expect(result.cases[0]?.assertions.map((assertion) => assertion.code)).toEqual( + new Array(result.cases[0]?.assertions.length).fill('PASS'), + ); + expect(JSON.parse(JSON.stringify(result))).toEqual(result); + }); + + test('reports failing RAG eval contracts and retriever errors as structured diagnostics', () => { + const corpus = new InMemoryRagCorpus([ + { id: 'shipping', text: 'shipping details', source: 'docs/shipping.md', citation: { uri: 'docs/shipping.md' } }, + ]); + const evalFact: RagSemanticEvalFact = { + name: 'Faithfulness', + ragName: 'AnswerDocs', + mode: 'contract', + cases: [ + { + name: 'refunds', + query: 'refund policy', + tags: [], + expected: { chunkCount: 1, sources: ['docs/refunds.md'] }, + asserts: [ + assertFact('scoreGte', 0.5), + assertFact('sourceEq', 'docs/refunds.md'), + assertFact('scoreLte', 0.1), + { ...assertFact('contains', ''), value: '' }, + { ...assertFact('unknownKind', 'x'), kind: 'unknownKind' }, + ], + }, + ], + }; + + const result = evaluateRagEvalContract(evalFact, createInMemoryRetriever(corpus)); + const errorResult = evaluateRagEvalContract(evalFact, () => { + throw new Error('offline'); + }); + + expect(result.passed).toBe(false); + expect(result.cases[0]?.assertions).toEqual( + expect.arrayContaining([ + expect.objectContaining({ kind: 'expected.chunkCount', passed: false, code: 'ASSERTION_FAIL' }), + expect.objectContaining({ kind: 'expected.sources', passed: false, code: 'ASSERTION_FAIL' }), + expect.objectContaining({ kind: 'contains', required: false, passed: false, code: 'INVALID_ASSERTION' }), + expect.objectContaining({ kind: 'unknownKind', required: false, passed: false, code: 'UNSUPPORTED_ASSERTION' }), + ]), + ); + expect(errorResult.cases[0]?.assertions).toEqual([ + expect.objectContaining({ kind: 'retriever', passed: false, code: 'RETRIEVER_ERROR' }), + ]); + const missingChunksResult = evaluateRagEvalContract( + evalFact, + () => + ({ + query: 'refund policy', + }) as unknown as ReturnType>, + ); + const malformedChunkResult = evaluateRagEvalContract( + evalFact, + () => + ({ + query: 'refund policy', + chunks: [{ id: 'bad' }], + }) as unknown as ReturnType>, + ); + const invalidScoreAndCitationResult = evaluateRagEvalContract( + evalFact, + () => + ({ + query: 'refund policy', + chunks: [{ id: 'bad', text: 'bad', score: Number.NaN, source: 'docs/bad.md', citation: { uri: 1 } }], + }) as unknown as ReturnType>, + ); + + for (const invalidResult of [missingChunksResult, malformedChunkResult, invalidScoreAndCitationResult]) { + expect(invalidResult).toEqual(expect.objectContaining({ passed: false })); + expect(invalidResult.cases[0]?.assertions).toEqual([ + expect.objectContaining({ kind: 'retriever', passed: false, code: 'RETRIEVER_ERROR' }), + ]); + } + }); + + test('handles empty and assertion-less eval facts without crashing', () => { + const corpus = new InMemoryRagCorpus([{ id: 'refunds', text: 'refund policy', source: 'docs/refunds.md' }]); + + expect(evaluateRagEvalContract({ name: 'Empty', cases: [] }, createInMemoryRetriever(corpus))).toEqual( + expect.objectContaining({ passed: false, caseCount: 0 }), + ); + expect( + evaluateRagEvalContract( + { + name: 'NoAsserts', + cases: [ + { + name: 'refunds', + query: 'refund policy', + tags: [], + expected: {}, + } as unknown as NonNullable[number], + ], + }, + createInMemoryRetriever(corpus), + ), + ).toEqual(expect.objectContaining({ passed: true, caseCount: 1 })); + }); + + test('treats non-required assertion failures as advisory diagnostics', () => { + const corpus = new InMemoryRagCorpus([ + { id: 'refunds', text: 'refund policy', source: 'docs/refunds.md', citation: { uri: 'docs/refunds.md' } }, + ]); + const optionalFailure = evaluateRagEvalContract( + { + name: 'Advisory', + ragName: 'AnswerDocs', + cases: [ + { + name: 'refunds', + query: 'refund policy', + tags: [], + expected: { chunkCount: 1 }, + asserts: [assertFact('sourceEq', 'docs/missing.md')], + }, + ], + }, + createInMemoryRetriever(corpus), + ); + const requiredFailure = evaluateRagEvalContract( + { + name: 'Required', + ragName: 'AnswerDocs', + cases: [ + { + name: 'refunds', + query: 'refund policy', + tags: [], + expected: { chunkCount: 1 }, + asserts: [{ ...assertFact('sourceEq', 'docs/missing.md'), required: true }], + }, + ], + }, + createInMemoryRetriever(corpus), + ); + + expect(optionalFailure).toEqual(expect.objectContaining({ passed: true, passedCaseCount: 1 })); + expect(optionalFailure.cases[0]?.assertions).toEqual( + expect.arrayContaining([ + expect.objectContaining({ kind: 'sourceEq', required: false, passed: false, code: 'ASSERTION_FAIL' }), + ]), + ); + expect(requiredFailure).toEqual(expect.objectContaining({ passed: false, passedCaseCount: 0 })); + expect(requiredFailure.cases[0]?.assertions).toEqual( + expect.arrayContaining([ + expect.objectContaining({ kind: 'sourceEq', required: true, passed: false, code: 'ASSERTION_FAIL' }), + ]), + ); + }); +}); + +function assertFact(kind: string, value: string | number | boolean) { + return { + kind, + target: ragAssertTarget(kind), + op: ragAssertOp(kind), + value, + required: false, + }; +} + +function ragAssertTarget(kind: string) { + if (kind === 'uniqueSourcesGte' || kind === 'chunkCountEq') return 'retrieved-chunks' as const; + if (kind === 'latencyLte') return 'latency' as const; + if (kind === 'citesRequired') return 'grounding' as const; + return 'retrieved-chunk' as const; +} + +function ragAssertOp(kind: string) { + switch (kind) { + case 'scoreGte': + case 'uniqueSourcesGte': + return 'gte' as const; + case 'scoreLte': + case 'latencyLte': + return 'lte' as const; + case 'contains': + return 'contains' as const; + case 'sourceGlob': + return 'glob' as const; + case 'citesRequired': + return 'present' as const; + default: + return 'eq' as const; + } +} diff --git a/packages/core/tests/type-guards.test.ts b/packages/core/tests/type-guards.test.ts index 7af95c8e..51362cb5 100644 --- a/packages/core/tests/type-guards.test.ts +++ b/packages/core/tests/type-guards.test.ts @@ -51,7 +51,7 @@ describe('Type guards (Slice 2d)', () => { // Gemini review of slice 2d: confirm method-in-class path also preserves // predicate return types via emitClassBody's emitTypeAnnotation route. const src = - 'class name=User\n method name=isAdmin returns="this is AdminUser"\n handler <<<\n return this.role === "admin";\n >>>'; + 'class name=User\n field name=role type=string\n method name=isAdmin returns="this is AdminUser"\n handler <<<\n return this.role === "admin";\n >>>'; const out = gen(src); expect(out).toContain('export class User {'); expect(out).toContain('isAdmin(): this is AdminUser {'); From 479a88c30a1168adbcf4cc6416835407c9a8267c Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 00:09:28 +0200 Subject: [PATCH 26/63] feat(core): add rag runtime provenance --- packages/core/src/index.ts | 11 ++ packages/core/src/rag-runtime.ts | 230 +++++++++++++++++++++- packages/core/src/semantic-validator.ts | 4 +- packages/core/tests/rag-runtime.test.ts | 250 ++++++++++++++++++++++++ 4 files changed, 490 insertions(+), 5 deletions(-) diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 3fa0ae67..2f9110f8 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -431,6 +431,7 @@ export type { RagAssertionKind } from './rag-assertions.js'; export { RAG_ASSERTION_KIND_SET, RAG_ASSERTION_KINDS } from './rag-assertions.js'; export type { InMemoryRagRetriever, + ProvenancedRetrieveResult, RagChunkInput, RagCitation, RagContractRetriever, @@ -439,18 +440,25 @@ export type { RagEvalCaseResult, RagEvalContractOptions, RagEvalContractResult, + RagMcpRetrieveProvenanceMapping, + RagRuntimeProvenance, + RagRuntimeProvenanceOptions, + RagRuntimeProvenanceStatus, RetrievedChunk, RetrieveOptions, RetrieveResult, } from './rag-runtime.js'; export { createInMemoryRetriever, + createRagRuntimeProvenance, evaluateRagEvalContract, hashRetrievedChunkText, InMemoryRagCorpus, MAX_IN_MEMORY_RAG_TOP_K, + ragMcpRetrieveProvenanceMapping, retrieveFromInMemoryCorpus, tokenizeForRetrieval, + withRagRuntimeProvenance, } from './rag-runtime.js'; export type { ParserHintsConfig } from './runtime.js'; // Runtime (instance-based state) @@ -498,6 +506,7 @@ export type { RagSemanticFacts, RagSemanticGroundingFact, RagSemanticLocation, + RagSemanticMcpRetrievalFact, RagSemanticPipelineFact, RagSemanticRetrieverFact, RagSemanticSourceFact, @@ -506,6 +515,8 @@ export type { export { collectClassSemanticFacts, collectRagSemanticFacts, + RAG_MCP_RETRIEVE_OUTPUT_ITEM_SHAPE, + RAG_MCP_RETRIEVE_OUTPUT_SHAPE, validateClassSemantics, validateRagSemantics, validateSemantics, diff --git a/packages/core/src/rag-runtime.ts b/packages/core/src/rag-runtime.ts index 8bcb6294..a7af688e 100644 --- a/packages/core/src/rag-runtime.ts +++ b/packages/core/src/rag-runtime.ts @@ -1,4 +1,10 @@ -import type { RagSemanticEvalAssertFact, RagSemanticEvalCaseFact, RagSemanticEvalFact } from './semantic-validator.js'; +import type { + RagSemanticEvalAssertFact, + RagSemanticEvalCaseFact, + RagSemanticEvalFact, + RagSemanticMcpRetrievalFact, +} from './semantic-validator.js'; +import { RAG_MCP_RETRIEVE_OUTPUT_ITEM_SHAPE, RAG_MCP_RETRIEVE_OUTPUT_SHAPE } from './semantic-validator.js'; export interface RagCitation { readonly uri?: string; @@ -29,7 +35,7 @@ export interface RetrieveOptions { export interface RetrieveResult { readonly query: string; - readonly chunks: RetrievedChunk[]; + readonly chunks: readonly RetrievedChunk[]; } export type InMemoryRagRetriever = (query: string, options?: RetrieveOptions) => RetrieveResult; @@ -37,6 +43,52 @@ export type RagContractRetriever = (query: string, options?: RetrieveOptions) => export const MAX_IN_MEMORY_RAG_TOP_K = 1000; +export type RagRuntimeProvenanceStatus = 'success' | 'retriever_error' | 'eval_failed'; + +export interface RagRuntimeProvenance { + readonly runId: string; + readonly retrieverName?: string; + readonly targetKind?: 'retriever' | 'rag'; + readonly targetName?: string; + readonly query: string; + readonly retrieveOptions: RetrieveOptions; + readonly citationsRequired: boolean; + readonly startedAtMs: number; + readonly durationMs: number; + readonly chunkCount: number; + readonly chunkHashes: readonly string[]; + readonly sources: readonly string[]; + readonly contractStatus: RagRuntimeProvenanceStatus; +} + +export interface RagRuntimeProvenanceOptions { + readonly runId?: string; + readonly retrieverName?: string; + readonly targetKind?: 'retriever' | 'rag'; + readonly targetName?: string; + readonly retrieveOptions?: RetrieveOptions; + readonly citationsRequired?: boolean; + readonly startedAtMs?: number; + readonly durationMs?: number; + readonly contractStatus?: RagRuntimeProvenanceStatus; +} + +export interface ProvenancedRetrieveResult extends RetrieveResult { + readonly provenance: RagRuntimeProvenance; +} + +export interface RagMcpRetrieveProvenanceMapping { + readonly outputShape?: string; + readonly outputItemShape?: string; + readonly citationField?: string; + readonly sourceField?: string; + readonly scoreField?: string; + readonly provenance?: string; + readonly citationsRequired: boolean; + readonly contractStatus: RagSemanticMcpRetrievalFact['contractStatus']; + readonly compatible: boolean; +} + export type RagEvalAssertionCode = | 'PASS' | 'ASSERTION_FAIL' @@ -153,6 +205,87 @@ export function retrieveFromInMemoryCorpus( return corpus.retrieve(query, options); } +export function createRagRuntimeProvenance( + result: RetrieveResult, + options: RagRuntimeProvenanceOptions = {}, +): RagRuntimeProvenance { + const validResult = validateRetrieveResult(result); + const retrieveOptions = normalizeProvenanceRetrieveOptions(options.retrieveOptions); + const chunkHashes = validResult.chunks.map((chunk) => hashRetrievedChunkText(chunk.text)); + const chunkProvenance = validResult.chunks.map((chunk, index) => ({ + index, + id: chunk.id, + source: chunk.source, + score: chunk.score, + citation: { ...chunk.citation }, + textHash: chunkHashes[index], + })); + const sources = uniqueOrdered(validResult.chunks.map((chunk) => chunk.source)); + const startedAtMs = options.startedAtMs ?? Date.now(); + const durationMs = options.durationMs ?? 0; + const contractStatus = options.contractStatus ?? 'success'; + return { + runId: + options.runId ?? + hashRetrievedChunkText( + stableStringify({ + retrieverName: options.retrieverName, + targetKind: options.targetKind, + targetName: options.targetName, + query: validResult.query, + retrieveOptions, + citationsRequired: options.citationsRequired ?? false, + chunks: chunkProvenance, + contractStatus, + }), + ), + ...optionalStringValue('retrieverName', options.retrieverName), + ...(options.targetKind ? { targetKind: options.targetKind } : {}), + ...optionalStringValue('targetName', options.targetName), + query: validResult.query, + retrieveOptions, + citationsRequired: options.citationsRequired ?? false, + startedAtMs, + durationMs, + chunkCount: validResult.chunks.length, + chunkHashes, + sources, + contractStatus, + }; +} + +export function withRagRuntimeProvenance( + result: RetrieveResult, + options: RagRuntimeProvenanceOptions = {}, +): ProvenancedRetrieveResult { + const validResult = validateRetrieveResult(result); + return { + query: validResult.query, + chunks: validResult.chunks.map(cloneRetrievedChunk), + provenance: createRagRuntimeProvenance(validResult, options), + }; +} + +export function ragMcpRetrieveProvenanceMapping( + retrieval: RagSemanticMcpRetrievalFact | null | undefined, +): RagMcpRetrieveProvenanceMapping { + if (!retrieval) throw new Error('KERN RAG MCP provenance mapping requires a retrieval fact.'); + return { + ...optionalStringValue('outputShape', retrieval.outputShape), + ...optionalStringValue('outputItemShape', retrieval.outputItemShape), + ...optionalStringValue('citationField', retrieval.citationField), + ...optionalStringValue('sourceField', retrieval.sourceField), + ...optionalStringValue('scoreField', retrieval.scoreField), + ...optionalStringValue('provenance', retrieval.provenance), + citationsRequired: retrieval.effectiveRequiresCitations, + contractStatus: retrieval.contractStatus, + compatible: + retrieval.contractStatus === 'valid' && + retrieval.outputShape === RAG_MCP_RETRIEVE_OUTPUT_SHAPE && + (retrieval.outputItemShape === undefined || retrieval.outputItemShape === RAG_MCP_RETRIEVE_OUTPUT_ITEM_SHAPE), + }; +} + export function evaluateRagEvalContract( evaluation: RagSemanticEvalFact, retriever: RagContractRetriever, @@ -580,9 +713,92 @@ function optionalAssertionValue(key: 'expected' | 'actual', value: unknown): Rec return value === undefined ? {} : { [key]: value }; } +function normalizeProvenanceRetrieveOptions(options: RetrieveOptions | undefined): RetrieveOptions { + if (options === undefined) return {}; + const out: { topK?: number; minScore?: number } = {}; + if (options.topK !== undefined) { + if (!Number.isInteger(options.topK) || options.topK <= 0 || options.topK > MAX_IN_MEMORY_RAG_TOP_K) { + throw new Error(`KERN RAG runtime topK must be a positive integer up to ${MAX_IN_MEMORY_RAG_TOP_K}.`); + } + out.topK = options.topK; + } + if (options.minScore !== undefined) { + if (!Number.isFinite(options.minScore) || options.minScore < 0 || options.minScore > 1) { + throw new Error('KERN RAG runtime minScore must be between 0 and 1.'); + } + out.minScore = options.minScore; + } + return out; +} + +function uniqueOrdered(values: readonly string[]): string[] { + const seen = new Set(); + const out: string[] = []; + for (const value of values) { + if (seen.has(value)) continue; + seen.add(value); + out.push(value); + } + return out; +} + +function stableStringify(value: unknown): string { + return JSON.stringify(stableJsonValue(value, new WeakSet())); +} + +function stableJsonValue(value: unknown, seen: WeakSet): unknown { + if (typeof value === 'bigint') return value.toString(); + if (typeof value === 'symbol') return value.description ?? value.toString(); + if (typeof value === 'function') return `[Function:${value.name || 'anonymous'}]`; + if (value === null || typeof value !== 'object') return value; + if (seen.has(value)) return '[Circular]'; + seen.add(value); + try { + if (Array.isArray(value)) return value.map((item) => stableJsonValue(item, seen)); + if (value instanceof Date) return value.toISOString(); + if (value instanceof RegExp) return value.toString(); + if (value instanceof Map) { + return Array.from(value.entries()) + .map(([key, entry]) => [stableJsonValue(key, seen), stableJsonValue(entry, seen)] as const) + .sort(([left], [right]) => stableStringCompare(left, right)); + } + if (value instanceof Set) { + return Array.from(value.values()) + .map((entry) => stableJsonValue(entry, seen)) + .sort(stableStringCompare); + } + if (isPlainMetadataObject(value)) { + const out: Record = {}; + for (const key of Object.keys(value).sort()) { + const entry = value[key]; + if (entry !== undefined) out[key] = stableJsonValue(entry, seen); + } + return out; + } + return String(value); + } finally { + seen.delete(value); + } +} + +function stableStringCompare(left: unknown, right: unknown): number { + const leftText = String(left); + const rightText = String(right); + return leftText < rightText ? -1 : leftText > rightText ? 1 : 0; +} + function validateRetrieveResult(result: RetrieveResult): RetrieveResult { - if (!result || !Array.isArray(result.chunks)) throw new Error('retriever result must include chunks array.'); + if (!result || typeof result.query !== 'string' || !Array.isArray(result.chunks)) { + throw new Error('retriever result must include query string and chunks array.'); + } for (const [index, chunk] of result.chunks.entries()) { + if ( + chunk && + typeof chunk.score === 'number' && + (!Number.isFinite(chunk.score) || chunk.score < 0 || chunk.score > 1) + ) { + throw new Error(`retriever chunk at index ${index} score must be between 0 and 1.`); + } if ( !chunk || typeof chunk.id !== 'string' || @@ -639,6 +855,14 @@ function retrievedChunk(chunk: RagChunkInput, score: number): RetrievedChunk { }; } +function cloneRetrievedChunk(chunk: RetrievedChunk): RetrievedChunk { + return { + ...chunk, + citation: { ...chunk.citation }, + ...(chunk.metadata ? { metadata: cloneMetadata(chunk.metadata) } : {}), + }; +} + function cloneChunkInput(chunk: RagChunkInput): RagChunkInput { return { ...chunk, diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index de067972..4a59e838 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -2319,8 +2319,8 @@ interface ClassMemberInfo { } const BUILTIN_CLASS_BASES = new Set(['Error']); -const RAG_MCP_RETRIEVE_OUTPUT_SHAPE = 'RetrievedChunk[]'; -const RAG_MCP_RETRIEVE_OUTPUT_ITEM_SHAPE = 'RetrievedChunk'; +export const RAG_MCP_RETRIEVE_OUTPUT_SHAPE = 'RetrievedChunk[]'; +export const RAG_MCP_RETRIEVE_OUTPUT_ITEM_SHAPE = 'RetrievedChunk'; const BODY_EXPRESSION_PROPS = [ 'value', 'expr', diff --git a/packages/core/tests/rag-runtime.test.ts b/packages/core/tests/rag-runtime.test.ts index 3bebf945..a39c5191 100644 --- a/packages/core/tests/rag-runtime.test.ts +++ b/packages/core/tests/rag-runtime.test.ts @@ -1,12 +1,15 @@ import type { RagSemanticEvalFact } from '../src/index.js'; import { createInMemoryRetriever, + createRagRuntimeProvenance, evaluateRagEvalContract, hashRetrievedChunkText, InMemoryRagCorpus, MAX_IN_MEMORY_RAG_TOP_K, + ragMcpRetrieveProvenanceMapping, retrieveFromInMemoryCorpus, tokenizeForRetrieval, + withRagRuntimeProvenance, } from '../src/index.js'; describe('RAG in-memory runtime retrieval', () => { @@ -382,6 +385,253 @@ describe('RAG eval runtime contracts', () => { }); }); +describe('RAG runtime provenance envelopes', () => { + test('creates deterministic provenance for retrieved chunks', () => { + const corpus = new InMemoryRagCorpus([ + { id: 'refunds', text: 'refund policy', source: 'docs/refunds.md', citation: { uri: 'docs/refunds.md' } }, + { id: 'shipping', text: 'refund shipping', source: 'docs/refunds.md', citation: { uri: 'docs/refunds.md' } }, + { id: 'policy', text: 'refund terms', source: 'docs/policies.md', citation: { uri: 'docs/policies.md' } }, + ]); + const result = retrieveFromInMemoryCorpus(corpus, 'refund', { topK: 3, minScore: 0.25 }); + const firstChunk = result.chunks[0]; + if (!firstChunk) throw new Error('missing provenance fixture chunk'); + + const provenance = createRagRuntimeProvenance(result, { + retrieverName: 'DocsSearch', + targetKind: 'rag', + targetName: 'AnswerDocs', + retrieveOptions: { minScore: 0.25, topK: 3 }, + citationsRequired: true, + startedAtMs: 100, + durationMs: 7, + }); + const sameProvenance = createRagRuntimeProvenance(result, { + targetName: 'AnswerDocs', + targetKind: 'rag', + retrieverName: 'DocsSearch', + retrieveOptions: { topK: 3, minScore: 0.25 }, + citationsRequired: true, + startedAtMs: 999, + durationMs: 1, + }); + const differentQuery = createRagRuntimeProvenance( + { query: 'shipping', chunks: result.chunks }, + { retrieverName: 'DocsSearch', targetKind: 'rag', targetName: 'AnswerDocs' }, + ); + const differentSource = createRagRuntimeProvenance( + { + query: 'refund', + chunks: [ + { + ...firstChunk, + id: 'mirror', + source: 'docs/mirror.md', + citation: { uri: 'docs/mirror.md' }, + }, + ], + }, + { + retrieverName: 'DocsSearch', + targetKind: 'rag', + targetName: 'AnswerDocs', + retrieveOptions: { topK: 3, minScore: 0.25 }, + }, + ); + const citationUriThenLocator = createRagRuntimeProvenance( + { + query: 'refund', + chunks: [ + { + ...firstChunk, + citation: { uri: 'docs/refunds.md', locator: 'L1' }, + }, + ], + }, + { retrieverName: 'DocsSearch', targetKind: 'rag', targetName: 'AnswerDocs' }, + ); + const citationLocatorThenUri = createRagRuntimeProvenance( + { + query: 'refund', + chunks: [ + { + ...firstChunk, + citation: { locator: 'L1', uri: 'docs/refunds.md' }, + }, + ], + }, + { retrieverName: 'DocsSearch', targetKind: 'rag', targetName: 'AnswerDocs' }, + ); + + expect(provenance).toEqual( + expect.objectContaining({ + retrieverName: 'DocsSearch', + targetKind: 'rag', + targetName: 'AnswerDocs', + query: 'refund', + retrieveOptions: { topK: 3, minScore: 0.25 }, + citationsRequired: true, + startedAtMs: 100, + durationMs: 7, + chunkCount: 3, + sources: ['docs/policies.md', 'docs/refunds.md'], + contractStatus: 'success', + }), + ); + expect(provenance.runId).toMatch(/^[a-f0-9]{32}$/); + expect(sameProvenance.runId).toBe(provenance.runId); + expect(differentQuery.runId).not.toBe(provenance.runId); + expect(differentSource.runId).not.toBe(provenance.runId); + expect(citationUriThenLocator.runId).toBe(citationLocatorThenUri.runId); + expect(provenance.chunkHashes).toHaveLength(result.chunks.length); + expect(JSON.parse(JSON.stringify(provenance))).toEqual(provenance); + }); + + test('records only retrieve options supplied for provenance', () => { + const result = { + query: 'refund', + chunks: [ + { + id: 'refunds', + text: 'refund policy', + score: 0.9, + source: 'docs/refunds.md', + citation: { uri: 'docs/refunds.md' }, + }, + ], + }; + + expect(createRagRuntimeProvenance(result).retrieveOptions).toEqual({}); + expect(createRagRuntimeProvenance(result, { retrieveOptions: { topK: 1 } }).retrieveOptions).toEqual({ topK: 1 }); + }); + + test('wraps retrieval results with provenance without mutating chunks', () => { + const result = { + query: 'refund', + chunks: [ + { + id: 'refunds', + text: 'refund policy', + score: 0.9, + source: 'docs/refunds.md', + citation: { uri: 'docs/refunds.md', locator: 'L1' }, + metadata: { section: 'policy' }, + }, + ], + }; + + const wrapped = withRagRuntimeProvenance(result, { + retrieverName: 'DocsSearch', + targetKind: 'retriever', + targetName: 'DocsSearch', + retrieveOptions: { topK: 1 }, + }); + (wrapped.chunks[0]?.metadata as Record).section = 'mutated'; + (wrapped.chunks[0]?.citation as Record).uri = 'mutated'; + + expect(result.chunks[0]?.metadata).toEqual({ section: 'policy' }); + expect(result.chunks[0]?.citation).toEqual({ uri: 'docs/refunds.md', locator: 'L1' }); + expect(wrapped.provenance).toEqual( + expect.objectContaining({ + targetKind: 'retriever', + targetName: 'DocsSearch', + chunkCount: 1, + sources: ['docs/refunds.md'], + }), + ); + }); + + test('validates malformed retrieval results before provenance creation', () => { + expect(() => + createRagRuntimeProvenance({ + query: 1 as unknown as string, + chunks: [], + }), + ).toThrow('query string'); + expect(() => + createRagRuntimeProvenance({ + query: 'refund', + chunks: [ + { + id: 'bad', + text: 'bad', + score: 1.5, + source: 'docs/bad.md', + citation: { uri: 'docs/bad.md' }, + }, + ], + }), + ).toThrow('score'); + expect(() => createRagRuntimeProvenance({ query: 'refund', chunks: [] }, { retrieveOptions: { topK: 0 } })).toThrow( + 'topK', + ); + expect(() => + createRagRuntimeProvenance( + { query: 'refund', chunks: [] }, + { retrieveOptions: { topK: MAX_IN_MEMORY_RAG_TOP_K + 1 } }, + ), + ).toThrow('topK'); + expect(() => + createRagRuntimeProvenance({ query: 'refund', chunks: [] }, { retrieveOptions: { minScore: Number.NaN } }), + ).toThrow('minScore'); + }); + + test('maps MCP retrieve facts to provenance-compatible output contracts', () => { + expect( + ragMcpRetrieveProvenanceMapping({ + targetKind: 'rag', + targetName: 'AnswerDocs', + outputShape: 'RetrievedChunk[]', + outputItemShape: 'RetrievedChunk', + citationField: 'citation', + sourceField: 'uri', + scoreField: 'score', + provenance: 'source', + requireGrounding: true, + effectiveRequiresCitations: true, + contractStatus: 'valid', + }), + ).toEqual({ + outputShape: 'RetrievedChunk[]', + outputItemShape: 'RetrievedChunk', + citationField: 'citation', + sourceField: 'uri', + scoreField: 'score', + provenance: 'source', + citationsRequired: true, + contractStatus: 'valid', + compatible: true, + }); + expect( + ragMcpRetrieveProvenanceMapping({ + targetKind: 'retriever', + targetName: 'DocsSearch', + requireGrounding: false, + effectiveRequiresCitations: false, + contractStatus: 'absent', + }), + ).toEqual({ citationsRequired: false, contractStatus: 'absent', compatible: false }); + expect( + ragMcpRetrieveProvenanceMapping({ + targetKind: 'retriever', + targetName: 'DocsSearch', + outputShape: 'RetrievedChunk[]', + outputItemShape: 'OtherChunk', + requireGrounding: false, + effectiveRequiresCitations: false, + contractStatus: 'valid', + }), + ).toEqual({ + outputShape: 'RetrievedChunk[]', + outputItemShape: 'OtherChunk', + citationsRequired: false, + contractStatus: 'valid', + compatible: false, + }); + expect(() => ragMcpRetrieveProvenanceMapping(undefined)).toThrow('retrieval fact'); + expect(() => ragMcpRetrieveProvenanceMapping(null)).toThrow('retrieval fact'); + }); +}); + function assertFact(kind: string, value: string | number | boolean) { return { kind, From c9e1737f40861d4bb72e1a501d464784e1474c65 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 00:47:35 +0200 Subject: [PATCH 27/63] feat(core): add rag answer contracts --- packages/core/src/index.ts | 9 + packages/core/src/rag-runtime.ts | 314 ++++++++++++++++++ packages/core/src/semantic-substrate.ts | 136 +++++++- packages/core/tests/rag-runtime.test.ts | 235 +++++++++++++ .../core/tests/semantic-substrate.test.ts | 34 +- 5 files changed, 725 insertions(+), 3 deletions(-) diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 2f9110f8..4f432b34 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -432,6 +432,12 @@ export { RAG_ASSERTION_KIND_SET, RAG_ASSERTION_KINDS } from './rag-assertions.js export type { InMemoryRagRetriever, ProvenancedRetrieveResult, + RagAnswerContract, + RagAnswerContractDiagnostic, + RagAnswerContractDiagnosticCode, + RagAnswerContractResult, + RagAnswerContractStatus, + RagAnswerGroundingSpan, RagChunkInput, RagCitation, RagContractRetriever, @@ -451,6 +457,7 @@ export type { export { createInMemoryRetriever, createRagRuntimeProvenance, + evaluateRagAnswerContract, evaluateRagEvalContract, hashRetrievedChunkText, InMemoryRagCorpus, @@ -475,6 +482,8 @@ export type { KernSemanticCoreType, KernSemanticIrContract, KernSemanticPrimitive, + KernSemanticRagAnswerReviewFact, + KernSemanticRagAnswerReviewStatus, KernSemanticStdlibOperation, KernSemanticSubstrate, KernSemanticSubstrateSource, diff --git a/packages/core/src/rag-runtime.ts b/packages/core/src/rag-runtime.ts index a7af688e..fa2295f1 100644 --- a/packages/core/src/rag-runtime.ts +++ b/packages/core/src/rag-runtime.ts @@ -89,6 +89,61 @@ export interface RagMcpRetrieveProvenanceMapping { readonly compatible: boolean; } +export type RagAnswerContractStatus = 'grounded' | 'partially_grounded' | 'ungrounded' | 'invalid'; + +export type RagAnswerContractDiagnosticCode = + | 'ANSWER_EMPTY' + | 'QUERY_MISMATCH' + | 'RETRIEVER_ERROR' + | 'PROVENANCE_MISMATCH' + | 'SPAN_INVALID' + | 'SPAN_UNGROUNDED' + | 'CHUNK_REF_UNKNOWN' + | 'CITATION_REQUIRED' + | 'GROUNDING_BELOW_THRESHOLD'; + +export interface RagAnswerGroundingSpan { + readonly start: number; + readonly end: number; + readonly chunkIds: readonly string[]; + readonly required?: boolean; +} + +export interface RagAnswerContract { + readonly id?: string; + readonly ragName?: string; + readonly prompt?: string; + readonly query: string; + readonly answer: string; + readonly retrieval: RetrieveResult | ProvenancedRetrieveResult; + readonly provenance?: RagRuntimeProvenance; + readonly groundingSpans?: readonly RagAnswerGroundingSpan[]; + readonly requireCitations?: boolean; + readonly minGroundingCoverage?: number; +} + +export interface RagAnswerContractDiagnostic { + readonly code: RagAnswerContractDiagnosticCode; + readonly message: string; + readonly spanIndex?: number; + readonly chunkId?: string; +} + +export interface RagAnswerContractResult { + readonly id?: string; + readonly ragName?: string; + readonly query: string; + readonly passed: boolean; + readonly status: RagAnswerContractStatus; + readonly groundingCoverage: number; + readonly groundedChars: number; + readonly answerChars: number; + readonly citedChunkIds: readonly string[]; + readonly sources: readonly string[]; + readonly provenance?: RagRuntimeProvenance; + readonly diagnostics: readonly RagAnswerContractDiagnostic[]; +} + export type RagEvalAssertionCode = | 'PASS' | 'ASSERTION_FAIL' @@ -286,6 +341,155 @@ export function ragMcpRetrieveProvenanceMapping( }; } +export function evaluateRagAnswerContract(contract: RagAnswerContract): RagAnswerContractResult { + const base = { + ...optionalStringValue('id', contract.id), + ...optionalStringValue('ragName', contract.ragName), + query: typeof contract.query === 'string' ? contract.query : '', + }; + let retrieval: RetrieveResult; + try { + retrieval = validateRetrieveResult(contract.retrieval); + } catch (error) { + return { + ...base, + passed: false, + status: 'invalid', + groundingCoverage: 0, + groundedChars: 0, + answerChars: 0, + citedChunkIds: [], + sources: [], + ...(contract.provenance ? { provenance: contract.provenance } : {}), + diagnostics: [ + { + code: 'RETRIEVER_ERROR', + message: `RAG answer contract retrieval failed: ${error instanceof Error ? error.message : String(error)}.`, + }, + ], + }; + } + + const answer = typeof contract.answer === 'string' ? contract.answer : ''; + const answerChars = countAnswerChars(answer); + const diagnostics: RagAnswerContractDiagnostic[] = []; + if (typeof contract.answer !== 'string' || answerChars === 0) { + diagnostics.push({ code: 'ANSWER_EMPTY', message: 'RAG answer contract answer must be a non-empty string.' }); + } + + const minGroundingCoverage = normalizeGroundingCoverageThreshold(contract.minGroundingCoverage); + const provenance = contract.provenance ?? retrieveResultProvenance(contract.retrieval); + if (contract.query !== retrieval.query) { + diagnostics.push({ + code: 'QUERY_MISMATCH', + message: 'RAG answer contract query does not match the retrieval result query.', + }); + } + if (provenance && !provenanceMatchesRetrieval(provenance, retrieval)) { + diagnostics.push({ + code: 'PROVENANCE_MISMATCH', + message: 'RAG answer contract provenance does not match the retrieval result.', + }); + } + + const chunkById = new Map(retrieval.chunks.map((chunk) => [chunk.id, chunk])); + const grounded = new Array(answer.length).fill(false) as boolean[]; + const citedChunkIds = new Set(); + const citationBearingChunkIds = new Set(); + const groundingSpans = Array.isArray(contract.groundingSpans) ? contract.groundingSpans : []; + if (contract.groundingSpans !== undefined && !Array.isArray(contract.groundingSpans)) { + diagnostics.push({ + code: 'SPAN_INVALID', + message: 'RAG answer contract groundingSpans must be an array.', + }); + } + + for (const [spanIndex, span] of groundingSpans.entries()) { + if (!isValidGroundingSpan(span, answer.length)) { + diagnostics.push({ + code: 'SPAN_INVALID', + spanIndex, + message: `RAG answer grounding span at index ${spanIndex} is invalid or outside the answer text.`, + }); + continue; + } + + const validChunkIds: string[] = []; + let spanHasCitation = false; + for (const chunkId of span.chunkIds) { + if (chunkById.has(chunkId)) { + validChunkIds.push(chunkId); + citedChunkIds.add(chunkId); + if (chunkHasCitation(chunkById.get(chunkId))) { + spanHasCitation = true; + citationBearingChunkIds.add(chunkId); + } + } else { + diagnostics.push({ + code: 'CHUNK_REF_UNKNOWN', + spanIndex, + chunkId, + message: `RAG answer grounding span at index ${spanIndex} references unknown chunk '${chunkId}'.`, + }); + } + } + + if (validChunkIds.length === 0) { + diagnostics.push({ + code: span.required ? 'CITATION_REQUIRED' : 'SPAN_UNGROUNDED', + spanIndex, + message: `RAG answer grounding span at index ${spanIndex} has no valid retrieved chunk citation.`, + }); + continue; + } + + if (contract.requireCitations && !spanHasCitation) { + diagnostics.push({ + code: 'CITATION_REQUIRED', + spanIndex, + message: `RAG answer grounding span at index ${spanIndex} requires a non-empty retrieved chunk citation.`, + }); + continue; + } + + for (let index = span.start; index < span.end; index += 1) grounded[index] = true; + } + + const groundedChars = countGroundedAnswerChars(answer, grounded); + const groundingCoverage = answerChars === 0 ? 0 : groundedChars / answerChars; + if ( + answerChars > 0 && + contract.requireCitations && + citationBearingChunkIds.size === 0 && + groundingSpans.length === 0 + ) { + diagnostics.push({ + code: 'CITATION_REQUIRED', + message: 'RAG answer contract requires citations but no retrieved chunks were cited.', + }); + } + if (answerChars > 0 && groundingCoverage < minGroundingCoverage) { + diagnostics.push({ + code: 'GROUNDING_BELOW_THRESHOLD', + message: `RAG answer grounding coverage ${groundingCoverage.toFixed(3)} is below required threshold ${minGroundingCoverage.toFixed(3)}.`, + }); + } + + const passed = diagnostics.length === 0; + return { + ...base, + passed, + status: passed ? 'grounded' : ragAnswerStatus(diagnostics, groundingCoverage), + groundingCoverage, + groundedChars, + answerChars, + citedChunkIds: [...citedChunkIds].sort(stableStringCompare), + sources: uniqueOrdered([...citedChunkIds].map((chunkId) => chunkById.get(chunkId)?.source).filter(isString)), + ...(provenance ? { provenance } : {}), + diagnostics, + }; +} + export function evaluateRagEvalContract( evaluation: RagSemanticEvalFact, retriever: RagContractRetriever, @@ -339,6 +543,101 @@ function normalizeRetrieveOptions(options: RetrieveOptions): Required 1) { + throw new Error('KERN RAG answer contract minGroundingCoverage must be between 0 and 1.'); + } + return value; +} + +function retrieveResultProvenance( + result: RetrieveResult | ProvenancedRetrieveResult, +): RagRuntimeProvenance | undefined { + return result && typeof result === 'object' && 'provenance' in result ? result.provenance : undefined; +} + +function provenanceMatchesRetrieval(provenance: RagRuntimeProvenance, retrieval: RetrieveResult): boolean { + if ( + !provenance || + typeof provenance.query !== 'string' || + typeof provenance.chunkCount !== 'number' || + !Array.isArray(provenance.chunkHashes) || + !Array.isArray(provenance.sources) + ) { + return false; + } + const chunkHashes = retrieval.chunks.map((chunk) => hashRetrievedChunkText(chunk.text)); + const sources = uniqueOrdered(retrieval.chunks.map((chunk) => chunk.source)); + return ( + provenance.query === retrieval.query && + provenance.chunkCount === retrieval.chunks.length && + arraysEqual(provenance.chunkHashes, chunkHashes) && + arraysEqual(provenance.sources, sources) + ); +} + +function chunkHasCitation(chunk: RetrievedChunk | undefined): boolean { + return ( + !!chunk && + ((typeof chunk.citation.uri === 'string' && chunk.citation.uri.trim().length > 0) || + (typeof chunk.citation.locator === 'string' && chunk.citation.locator.trim().length > 0)) + ); +} + +function isValidGroundingSpan(span: RagAnswerGroundingSpan, answerLength: number): boolean { + return ( + !!span && + Number.isInteger(span.start) && + Number.isInteger(span.end) && + span.start >= 0 && + span.end > span.start && + span.end <= answerLength && + Array.isArray(span.chunkIds) && + span.chunkIds.every(isString) + ); +} + +function countAnswerChars(answer: string): number { + if (typeof answer !== 'string') return 0; + let count = 0; + for (let index = 0; index < answer.length; index += 1) { + if (!/\s/u.test(answer[index] ?? '')) count += 1; + } + return count; +} + +function countGroundedAnswerChars(answer: string, grounded: readonly boolean[]): number { + let count = 0; + for (let index = 0; index < answer.length; index += 1) { + if (grounded[index] && !/\s/u.test(answer[index] ?? '')) count += 1; + } + return count; +} + +function ragAnswerStatus( + diagnostics: readonly RagAnswerContractDiagnostic[], + groundingCoverage: number, +): RagAnswerContractStatus { + if ( + diagnostics.some((diagnostic) => + [ + 'ANSWER_EMPTY', + 'CITATION_REQUIRED', + 'QUERY_MISMATCH', + 'RETRIEVER_ERROR', + 'PROVENANCE_MISMATCH', + 'SPAN_INVALID', + 'CHUNK_REF_UNKNOWN', + ].includes(diagnostic.code), + ) + ) { + return 'invalid'; + } + if (groundingCoverage === 0) return 'ungrounded'; + return 'partially_grounded'; +} + function evaluateRagCase( evaluation: RagSemanticEvalFact, evaluationCase: RagSemanticEvalCaseFact, @@ -787,6 +1086,10 @@ function stableStringCompare(left: unknown, right: unknown): number { return leftText < rightText ? -1 : leftText > rightText ? 1 : 0; } +function arraysEqual(left: readonly string[], right: readonly string[]): boolean { + return left.length === right.length && left.every((value, index) => value === right[index]); +} + function validateRetrieveResult(result: RetrieveResult): RetrieveResult { if (!result || typeof result.query !== 'string' || !Array.isArray(result.chunks)) { throw new Error('retriever result must include query string and chunks array.'); @@ -813,6 +1116,13 @@ function validateRetrieveResult(result: RetrieveResult): RetrieveResult { throw new Error(`retriever chunk at index ${index} is not a RetrievedChunk.`); } } + const chunkIds = new Set(); + for (const [index, chunk] of result.chunks.entries()) { + if (chunkIds.has(chunk.id)) { + throw new Error(`retriever chunk at index ${index} duplicates chunk id '${chunk.id}'.`); + } + chunkIds.add(chunk.id); + } return result; } @@ -825,6 +1135,10 @@ function isValidCitation(value: unknown): value is RagCitation { ); } +function isString(value: unknown): value is string { + return typeof value === 'string'; +} + export function tokenizeForRetrieval(value: string): ReadonlySet { return new Set( value diff --git a/packages/core/src/semantic-substrate.ts b/packages/core/src/semantic-substrate.ts index b4bca982..fb2f0758 100644 --- a/packages/core/src/semantic-substrate.ts +++ b/packages/core/src/semantic-substrate.ts @@ -13,7 +13,10 @@ import { type ClassSemanticFacts, collectClassSemanticFacts, collectRagSemanticFacts, + RAG_MCP_RETRIEVE_OUTPUT_ITEM_SHAPE, + RAG_MCP_RETRIEVE_OUTPUT_SHAPE, type RagSemanticFacts, + type RagSemanticMcpRetrievalFact, type SemanticViolation, validateClassSemantics, validateRagSemantics, @@ -79,6 +82,25 @@ export interface KernSemanticValidationSummary { readonly byRule: Readonly>; } +export type KernSemanticRagAnswerReviewStatus = 'ready' | 'incomplete' | 'invalid'; + +export interface KernSemanticRagAnswerReviewFact { + readonly pipelineName: string; + readonly retrieverName: string; + readonly prompt?: string; + readonly answer?: string; + readonly citationsRequired: boolean; + readonly groundingCount: number; + readonly evalCount: number; + readonly evalCaseCount: number; + readonly mcpRetrievalCount: number; + readonly compatibleMcpRetrievalCount: number; + readonly provenanceRequired: boolean; + readonly provenanceComplete: boolean; + readonly validationStatus: KernSemanticRagAnswerReviewStatus; + readonly issues: readonly string[]; +} + export interface KernSemanticSubstrate { readonly schemaVersion: 1; readonly generatedBy: 'kern-semantic-substrate'; @@ -98,6 +120,7 @@ export interface KernSemanticSubstrate { readonly classValidationSummary?: KernSemanticValidationSummary; readonly ragFacts?: RagSemanticFacts; readonly ragValidationSummary?: KernSemanticValidationSummary; + readonly ragAnswerReviewFacts?: readonly KernSemanticRagAnswerReviewFact[]; } export interface BuildKernSemanticSubstrateOptions { @@ -110,6 +133,7 @@ export interface BuildKernSemanticSubstrateOptions { } export function buildKernSemanticSubstrate(options: BuildKernSemanticSubstrateOptions = {}): KernSemanticSubstrate { + const ragFacts = options.documentRag ? collectRagSemanticFacts(options.documentRag) : undefined; const coreTypes = Object.values(CORE_TYPE_CONTRACTS.types).map((contract) => ({ id: `core.type.${contract.name}`, name: contract.name, @@ -159,7 +183,7 @@ export function buildKernSemanticSubstrate(options: BuildKernSemanticSubstrateOp ...(options.documentClasses && options.includeClassValidationSummary ? { classValidationSummary: semanticValidationSummary(options.documentClasses) } : {}), - ...(options.documentRag ? { ragFacts: collectRagSemanticFacts(options.documentRag) } : {}), + ...(ragFacts ? { ragFacts, ragAnswerReviewFacts: buildRagAnswerReviewFacts(ragFacts) } : {}), ...(options.documentRag && options.includeRagValidationSummary ? { ragValidationSummary: ragValidationSummary(options.documentRag) } : {}), @@ -259,6 +283,116 @@ function summarizeSemanticViolations(violations: readonly SemanticViolation[]): return { total: violations.length, byRule }; } +function buildRagAnswerReviewFacts(facts: RagSemanticFacts): KernSemanticRagAnswerReviewFact[] { + const unresolvedRetrievers = new Set(facts.unresolvedRetrieverRefs); + return facts.pipelines.map((pipeline) => { + const citationsRequired = pipeline.citations || pipeline.groundings.some((grounding) => grounding.requireCitations); + const mcpRetrievals = facts.mcpRetrievals.filter( + (retrieval) => retrieval.targetKind === 'rag' && retrieval.targetName === pipeline.name, + ); + const compatibleMcpRetrievals = mcpRetrievals.filter((retrieval) => + isRagAnswerCompatibleMcpRetrieval(retrieval, citationsRequired), + ); + const evalCaseCount = pipeline.evals.reduce((count, evaluation) => count + (evaluation.caseCount ?? 0), 0); + const issues = ragAnswerReviewIssues( + facts, + pipeline.name, + pipeline.retrieverName, + pipeline.prompt, + pipeline.answer, + citationsRequired, + pipeline.groundings.length, + pipeline.groundings.some((grounding) => grounding.requireCitations), + pipeline.evals.length, + evalCaseCount, + mcpRetrievals.length, + compatibleMcpRetrievals.length, + unresolvedRetrievers.has(pipeline.retrieverName), + ); + return { + pipelineName: pipeline.name, + retrieverName: pipeline.retrieverName, + ...optionalStringValue('prompt', pipeline.prompt), + ...optionalStringValue('answer', pipeline.answer), + citationsRequired, + groundingCount: pipeline.groundings.length, + evalCount: pipeline.evals.length, + evalCaseCount, + mcpRetrievalCount: mcpRetrievals.length, + compatibleMcpRetrievalCount: compatibleMcpRetrievals.length, + provenanceRequired: citationsRequired || mcpRetrievals.some((retrieval) => retrieval.requireGrounding), + provenanceComplete: mcpRetrievals.length === 0 || compatibleMcpRetrievals.length === mcpRetrievals.length, + validationStatus: ragAnswerReviewStatus(issues), + issues, + }; + }); +} + +function ragAnswerReviewIssues( + facts: RagSemanticFacts, + pipelineName: string, + retrieverName: string, + prompt: string | undefined, + answer: string | undefined, + citationsRequired: boolean, + groundingCount: number, + hasCitationGrounding: boolean, + evalCount: number, + evalCaseCount: number, + mcpRetrievalCount: number, + compatibleMcpRetrievalCount: number, + unresolvedRetriever: boolean, +): string[] { + const issues: string[] = []; + if (unresolvedRetriever) issues.push(`unresolved-retriever:${retrieverName}`); + if (!prompt && !answer) issues.push('missing-answer-surface'); + if (citationsRequired && groundingCount === 0) issues.push('missing-grounding'); + if (citationsRequired && !hasCitationGrounding) issues.push('missing-citation-grounding'); + if (evalCount === 0) issues.push('missing-eval'); + if (evalCount > 0 && evalCaseCount === 0) issues.push('missing-eval-case'); + if ( + facts.mcpRetrievals.some( + (retrieval) => + retrieval.targetKind === 'rag' && + retrieval.targetName === pipelineName && + retrieval.contractStatus === 'invalid', + ) + ) { + issues.push('invalid-mcp-retrieve-contract'); + } + if (mcpRetrievalCount > 0 && compatibleMcpRetrievalCount === 0) issues.push('missing-compatible-mcp-retrieve'); + return issues; +} + +function ragAnswerReviewStatus(issues: readonly string[]): KernSemanticRagAnswerReviewStatus { + if (issues.some((issue) => INVALID_RAG_ANSWER_REVIEW_ISSUES.has(issue.split(':', 1)[0] ?? ''))) return 'invalid'; + return issues.length === 0 ? 'ready' : 'incomplete'; +} + +function isRagAnswerCompatibleMcpRetrieval( + retrieval: RagSemanticMcpRetrievalFact, + citationsRequired: boolean, +): boolean { + if (retrieval.contractStatus !== 'valid') return false; + if (retrieval.outputShape !== RAG_MCP_RETRIEVE_OUTPUT_SHAPE) return false; + if (retrieval.outputItemShape !== undefined && retrieval.outputItemShape !== RAG_MCP_RETRIEVE_OUTPUT_ITEM_SHAPE) { + return false; + } + if (citationsRequired || retrieval.effectiveRequiresCitations) { + return ( + !!retrieval.citationField && + (!!retrieval.sourceField || retrieval.provenance === 'source' || retrieval.provenance === 'citation') + ); + } + return true; +} + +function optionalStringValue(key: string, value: string | undefined): Record { + return value === undefined ? {} : { [key]: value }; +} + +const INVALID_RAG_ANSWER_REVIEW_ISSUES = new Set(['unresolved-retriever', 'invalid-mcp-retrieve-contract']); + const KERN_PRIMITIVE_NAMES: Record = { 'collection.has': 'includes', 'collection.count': 'count', diff --git a/packages/core/tests/rag-runtime.test.ts b/packages/core/tests/rag-runtime.test.ts index a39c5191..b0a1f6b6 100644 --- a/packages/core/tests/rag-runtime.test.ts +++ b/packages/core/tests/rag-runtime.test.ts @@ -2,6 +2,7 @@ import type { RagSemanticEvalFact } from '../src/index.js'; import { createInMemoryRetriever, createRagRuntimeProvenance, + evaluateRagAnswerContract, evaluateRagEvalContract, hashRetrievedChunkText, InMemoryRagCorpus, @@ -632,6 +633,240 @@ describe('RAG runtime provenance envelopes', () => { }); }); +describe('RAG answer runtime contracts', () => { + test('validates a grounded answer against retrieved chunks and provenance', () => { + const retrieval = withRagRuntimeProvenance( + { + query: 'refund policy', + chunks: [ + { + id: 'refunds', + text: 'Refunds are allowed for thirty days.', + score: 0.95, + source: 'docs/refunds.md', + citation: { uri: 'docs/refunds.md', locator: 'L1-L2' }, + }, + ], + }, + { + retrieverName: 'DocsSearch', + targetKind: 'rag', + targetName: 'AnswerDocs', + retrieveOptions: { topK: 1, minScore: 0.8 }, + citationsRequired: true, + startedAtMs: 100, + }, + ); + const answer = 'Refunds are allowed for thirty days.'; + + const result = evaluateRagAnswerContract({ + id: 'AnswerDocs:refunds', + ragName: 'AnswerDocs', + prompt: './answer.md', + query: retrieval.query, + answer, + retrieval, + requireCitations: true, + minGroundingCoverage: 1, + groundingSpans: [{ start: 0, end: answer.length, chunkIds: ['refunds'], required: true }], + }); + + expect(result).toEqual( + expect.objectContaining({ + id: 'AnswerDocs:refunds', + ragName: 'AnswerDocs', + query: 'refund policy', + passed: true, + status: 'grounded', + groundingCoverage: 1, + citedChunkIds: ['refunds'], + sources: ['docs/refunds.md'], + provenance: retrieval.provenance, + diagnostics: [], + }), + ); + expect(JSON.parse(JSON.stringify(result))).toEqual(result); + }); + + test('reports partial and ungrounded answer contract failures', () => { + const retrieval = { + query: 'refund policy', + chunks: [ + { + id: 'refunds', + text: 'Refunds are allowed.', + score: 0.9, + source: 'docs/refunds.md', + citation: { uri: 'docs/refunds.md' }, + }, + ], + }; + const answer = 'Refunds are allowed. Shipping is separate.'; + const partial = evaluateRagAnswerContract({ + query: retrieval.query, + answer, + retrieval, + minGroundingCoverage: 0.9, + groundingSpans: [{ start: 0, end: 'Refunds are allowed.'.length, chunkIds: ['refunds'] }], + }); + const ungrounded = evaluateRagAnswerContract({ + query: retrieval.query, + answer, + retrieval, + requireCitations: true, + groundingSpans: [], + }); + + expect(partial.passed).toBe(false); + expect(partial.status).toBe('partially_grounded'); + expect(partial.diagnostics).toEqual([expect.objectContaining({ code: 'GROUNDING_BELOW_THRESHOLD' })]); + expect(ungrounded.passed).toBe(false); + expect(ungrounded.status).toBe('invalid'); + expect(ungrounded.diagnostics).toEqual( + expect.arrayContaining([ + expect.objectContaining({ code: 'CITATION_REQUIRED' }), + expect.objectContaining({ code: 'GROUNDING_BELOW_THRESHOLD' }), + ]), + ); + }); + + test('reports invalid answer contracts for bad spans chunk refs and provenance mismatches', () => { + const retrieval = withRagRuntimeProvenance( + { + query: 'refund policy', + chunks: [ + { + id: 'refunds', + text: 'Refunds are allowed.', + score: 0.9, + source: 'docs/refunds.md', + citation: { uri: 'docs/refunds.md' }, + }, + ], + }, + { targetKind: 'rag', targetName: 'AnswerDocs' }, + ); + const answer = 'Refunds are allowed.'; + const staleChunk = retrieval.chunks[0]; + if (!staleChunk) throw new Error('missing answer contract fixture chunk'); + + const invalid = evaluateRagAnswerContract({ + query: retrieval.query, + answer, + retrieval, + provenance: { ...retrieval.provenance, query: 'other query' }, + groundingSpans: [ + { start: 0, end: answer.length + 1, chunkIds: ['refunds'] }, + { start: 0, end: answer.length, chunkIds: ['missing'], required: true }, + ], + }); + const queryMismatch = evaluateRagAnswerContract({ + query: 'shipping policy', + answer, + retrieval, + groundingSpans: [{ start: 0, end: answer.length, chunkIds: ['refunds'] }], + }); + const staleProvenance = evaluateRagAnswerContract({ + query: retrieval.query, + answer, + retrieval: { + ...retrieval, + chunks: [{ ...staleChunk, text: 'Different retrieved text.' }], + }, + groundingSpans: [{ start: 0, end: answer.length, chunkIds: ['refunds'] }], + }); + const badAnswer = evaluateRagAnswerContract({ + query: retrieval.query, + answer: undefined as unknown as string, + retrieval, + }); + const badGroundingSpans = evaluateRagAnswerContract({ + query: retrieval.query, + answer, + retrieval, + groundingSpans: {} as unknown as [], + }); + const emptyCitation = evaluateRagAnswerContract({ + query: retrieval.query, + answer, + retrieval: { + ...retrieval, + chunks: [{ ...staleChunk, citation: { uri: '' } }], + }, + requireCitations: true, + groundingSpans: [{ start: 0, end: answer.length, chunkIds: ['refunds'] }], + }); + const nonStringChunkRef = evaluateRagAnswerContract({ + query: retrieval.query, + answer, + retrieval, + groundingSpans: [{ start: 0, end: answer.length, chunkIds: [1 as unknown as string] }], + }); + + expect(invalid.passed).toBe(false); + expect(invalid.status).toBe('invalid'); + expect(invalid.diagnostics.map((diagnostic) => diagnostic.code)).toEqual( + expect.arrayContaining([ + 'PROVENANCE_MISMATCH', + 'SPAN_INVALID', + 'CHUNK_REF_UNKNOWN', + 'CITATION_REQUIRED', + 'GROUNDING_BELOW_THRESHOLD', + ]), + ); + expect(queryMismatch.diagnostics).toEqual([expect.objectContaining({ code: 'QUERY_MISMATCH' })]); + expect(staleProvenance.diagnostics).toEqual([expect.objectContaining({ code: 'PROVENANCE_MISMATCH' })]); + expect(badAnswer.diagnostics).toEqual([expect.objectContaining({ code: 'ANSWER_EMPTY' })]); + expect(badGroundingSpans.diagnostics).toEqual( + expect.arrayContaining([ + expect.objectContaining({ code: 'SPAN_INVALID' }), + expect.objectContaining({ code: 'GROUNDING_BELOW_THRESHOLD' }), + ]), + ); + expect(emptyCitation.diagnostics).toEqual( + expect.arrayContaining([ + expect.objectContaining({ code: 'CITATION_REQUIRED' }), + expect.objectContaining({ code: 'GROUNDING_BELOW_THRESHOLD' }), + ]), + ); + expect(nonStringChunkRef.diagnostics).toEqual( + expect.arrayContaining([ + expect.objectContaining({ code: 'SPAN_INVALID' }), + expect.objectContaining({ code: 'GROUNDING_BELOW_THRESHOLD' }), + ]), + ); + expect(() => + evaluateRagAnswerContract({ + query: retrieval.query, + answer, + retrieval, + minGroundingCoverage: 1.1, + }), + ).toThrow('minGroundingCoverage'); + expect(() => + createRagRuntimeProvenance({ + query: 'refund', + chunks: [ + { + id: 'dupe', + text: 'one', + score: 0.5, + source: 'docs/one.md', + citation: { uri: 'docs/one.md' }, + }, + { + id: 'dupe', + text: 'two', + score: 0.5, + source: 'docs/two.md', + citation: { uri: 'docs/two.md' }, + }, + ], + }), + ).toThrow('duplicates chunk id'); + }); +}); + function assertFact(kind: string, value: string | number | boolean) { return { kind, diff --git a/packages/core/tests/semantic-substrate.test.ts b/packages/core/tests/semantic-substrate.test.ts index 87ee071a..82308957 100644 --- a/packages/core/tests/semantic-substrate.test.ts +++ b/packages/core/tests/semantic-substrate.test.ts @@ -65,6 +65,7 @@ describe('KERN semantic substrate', () => { expect(Object.hasOwn(substrate, 'classValidationSummary')).toBe(false); expect(Object.hasOwn(substrate, 'ragFacts')).toBe(false); expect(Object.hasOwn(substrate, 'ragValidationSummary')).toBe(false); + expect(Object.hasOwn(substrate, 'ragAnswerReviewFacts')).toBe(false); }); test('exports document class member inheritance and override facts when requested', () => { @@ -245,9 +246,10 @@ describe('KERN semantic substrate', () => { ' chunking source=manuals strategy=semantic maxTokens=600 overlap=80', 'embed name=DocsEmbedding corpus=Docs model=text-embedding-3-small dims=1536 metric=cosine', 'retriever name=DocsSearch corpus=Docs embed=DocsEmbedding mode=hybrid topK=8 minScore=0.72', - 'rag name=AnswerDocs retriever=DocsSearch citations=true', + 'rag name=AnswerDocs retriever=DocsSearch prompt="./answer.md" answer="grounded" citations=true', ' grounding requireCitations=true policy=strict maxContext=6000', - ' ragEval metric=faithfulness threshold=0.85', + ' ragEval name=Faithfulness metric=faithfulness threshold=0.85 mode=contract', + ' ragCase name=refunds query="refund policy"', 'mcp name=Support', ' resource name=DocsResource uri="docs://manuals"', ' tool name=answerQuestion', @@ -285,6 +287,8 @@ describe('KERN semantic substrate', () => { expect.objectContaining({ name: 'AnswerDocs', retrieverName: 'DocsSearch', + prompt: './answer.md', + answer: 'grounded', citations: true, groundings: [expect.objectContaining({ requireCitations: true, policy: 'strict' })], evals: [expect.objectContaining({ metric: 'faithfulness', threshold: 0.85 })], @@ -317,12 +321,38 @@ describe('KERN semantic substrate', () => { resourceName: 'DocsResource', }), ]); + expect(substrate.ragAnswerReviewFacts).toEqual([ + { + pipelineName: 'AnswerDocs', + retrieverName: 'DocsSearch', + prompt: './answer.md', + answer: 'grounded', + citationsRequired: true, + groundingCount: 1, + evalCount: 1, + evalCaseCount: 1, + mcpRetrievalCount: 1, + compatibleMcpRetrievalCount: 1, + provenanceRequired: true, + provenanceComplete: true, + validationStatus: 'ready', + issues: [], + }, + ]); const invalidSubstrate = buildKernSemanticSubstrate({ documentRag: parseRoot('rag name=Broken retriever=Missing'), includeRagValidationSummary: true, }); expect(invalidSubstrate.ragValidationSummary?.byRule['rag-unknown-retriever']).toBe(1); + expect(invalidSubstrate.ragAnswerReviewFacts).toEqual([ + expect.objectContaining({ + pipelineName: 'Broken', + retrieverName: 'Missing', + validationStatus: 'invalid', + issues: expect.arrayContaining(['unresolved-retriever:Missing', 'missing-answer-surface', 'missing-eval']), + }), + ]); }); test('exports portable review primitives as stable query objects', () => { From 398bcac9a3439a243413d1c3e4fa2a1ab8839a2a Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 01:18:38 +0200 Subject: [PATCH 28/63] feat(core): add rag answer contract surface --- .../native-test/kernlang-rag-contracts.kern | 16 + .../kernlang-rag-contracts.test.kern | 10 + packages/core/src/codegen-core.ts | 4 + packages/core/src/decompiler.ts | 42 +++ packages/core/src/index.ts | 5 + packages/core/src/rag-runtime.ts | 37 ++ packages/core/src/schema.ts | 29 +- packages/core/src/semantic-substrate.ts | 7 +- packages/core/src/semantic-validator.ts | 320 +++++++++++++++++- packages/core/src/spec.ts | 2 + packages/core/tests/rag-runtime.test.ts | 53 ++- packages/core/tests/rag-semantics.test.ts | 83 +++++ packages/core/tests/schema-validation.test.ts | 11 + .../core/tests/semantic-substrate.test.ts | 10 + 14 files changed, 625 insertions(+), 4 deletions(-) create mode 100644 packages/core/native-test/kernlang-rag-contracts.kern create mode 100644 packages/core/native-test/kernlang-rag-contracts.test.kern diff --git a/packages/core/native-test/kernlang-rag-contracts.kern b/packages/core/native-test/kernlang-rag-contracts.kern new file mode 100644 index 00000000..35ce7940 --- /dev/null +++ b/packages/core/native-test/kernlang-rag-contracts.kern @@ -0,0 +1,16 @@ +corpus name=Docs title="Support docs" + source name=manuals kind=local uri="./docs/**/*.md" media=markdown + chunking source=manuals strategy=semantic maxTokens=600 overlap=80 unit=tokens + +embed name=DocsEmbedding corpus=Docs model=text-embedding-3-small dims=1536 metric=cosine + +retriever name=DocsSearch corpus=Docs embed=DocsEmbedding mode=hybrid topK=8 minScore=0.72 + +rag name=AnswerDocs retriever=DocsSearch prompt="./answer.md" citations=true + grounding requireCitations=true policy=strict maxContext=6000 + ragEval name=Faithfulness metric=faithfulness threshold=0.85 mode=contract + ragCase name=refunds query="How do refunds work?" tags="smoke,policy" topK=4 minScore=0.72 chunkCount=1 sources="docs/refunds.md" + ragAssert kind=sourceGlob value="docs/refunds.md" required=true + ragAssert kind=citesRequired + ragAnswerContract name=RefundAnswer query="How do refunds work?" answer="Refunds follow the refund policy." requireCitations=true minGroundingCoverage=0.8 + answerSpan start=0 end=33 chunks=refunds required=true diff --git a/packages/core/native-test/kernlang-rag-contracts.test.kern b/packages/core/native-test/kernlang-rag-contracts.test.kern new file mode 100644 index 00000000..824bc443 --- /dev/null +++ b/packages/core/native-test/kernlang-rag-contracts.test.kern @@ -0,0 +1,10 @@ +test name="KERNlang RAG contract surface parity" target="./kernlang-rag-contracts.kern" coverage=false + it name="RAG contracts stay valid and roundtrip through native decompile" + expect no=schemaViolations + expect no=semanticViolations + expect decompile contains="corpus name=Docs title=\"Support docs\"" + expect decompile contains="retriever name=DocsSearch corpus=Docs" + expect decompile contains="rag name=AnswerDocs retriever=DocsSearch" + expect decompile contains="ragAnswerContract name=RefundAnswer" + expect decompile contains="answerSpan start=0 end=33 chunks=refunds required=true" + expect roundtrip=true diff --git a/packages/core/src/codegen-core.ts b/packages/core/src/codegen-core.ts index 5d3290d2..c1eda47d 100644 --- a/packages/core/src/codegen-core.ts +++ b/packages/core/src/codegen-core.ts @@ -732,6 +732,8 @@ export const CORE_NODE_TYPES = new Set([ 'ragEval', 'ragCase', 'ragAssert', + 'ragAnswerContract', + 'answerSpan', // Backend data layer (graduated nodes) 'model', 'column', @@ -1034,6 +1036,8 @@ export function generateCoreNode(node: IRNode, target?: string, runtime?: KernRu case 'ragEval': case 'ragCase': case 'ragAssert': + case 'ragAnswerContract': + case 'answerSpan': return []; // Graduated nodes — backend data layer case 'model': diff --git a/packages/core/src/decompiler.ts b/packages/core/src/decompiler.ts index 4cb505a8..2b9efccf 100644 --- a/packages/core/src/decompiler.ts +++ b/packages/core/src/decompiler.ts @@ -106,6 +106,23 @@ export function decompile(root: IRNode): DecompileResult { renderIndexer(node, indent); return; } + if ( + node.type === 'corpus' || + node.type === 'source' || + node.type === 'chunking' || + node.type === 'embed' || + node.type === 'retriever' || + node.type === 'rag' || + node.type === 'grounding' || + node.type === 'ragEval' || + node.type === 'ragCase' || + node.type === 'ragAssert' || + node.type === 'ragAnswerContract' || + node.type === 'answerSpan' + ) { + renderRagNode(node, indent); + return; + } if (node.type === 'handler') { pushHandler(node, indent); return; @@ -359,6 +376,31 @@ export function decompile(root: IRNode): DecompileResult { lines.push(`${indent}${parts.join(' ')}`); } + function renderRagNode(node: IRNode, indent: string): void { + const propOrderByType: Record = { + corpus: ['name', 'title', 'tenant', 'refresh'], + source: ['name', 'kind', 'uri', 'resource', 'media', 'acl'], + chunking: ['name', 'corpus', 'source', 'strategy', 'maxTokens', 'overlap', 'unit'], + embed: ['name', 'corpus', 'model', 'dims', 'metric'], + retriever: ['name', 'corpus', 'embed', 'mode', 'topK', 'minScore', 'rerank'], + rag: ['name', 'retriever', 'prompt', 'answer', 'citations'], + grounding: ['name', 'rag', 'requireCitations', 'policy', 'maxContext'], + ragEval: ['name', 'rag', 'metric', 'threshold', 'mode'], + ragCase: ['name', 'query', 'tags', 'topK', 'minScore', 'chunkCount', 'sources'], + ragAssert: ['kind', 'value', 'threshold', 'count', 'valueMs', 'required'], + ragAnswerContract: ['name', 'rag', 'query', 'answer', 'prompt', 'requireCitations', 'minGroundingCoverage'], + answerSpan: ['start', 'end', 'chunks', 'required'], + }; + const props = node.props || {}; + const quoted = node.__quotedProps ?? []; + const parts = [node.type]; + for (const propName of propOrderByType[node.type] ?? []) { + if (props[propName] !== undefined) parts.push(renderScalarProp(propName, props[propName], quoted)); + } + lines.push(`${indent}${parts.join(' ')}`); + for (const child of node.children || []) render(child, `${indent} `); + } + function renderClassLike(node: IRNode, indent: string): void { const props = node.props || {}; const quoted = node.__quotedProps ?? []; diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 4f432b34..177e4d7d 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -450,6 +450,7 @@ export type { RagRuntimeProvenance, RagRuntimeProvenanceOptions, RagRuntimeProvenanceStatus, + RagSemanticAnswerContractOptions, RetrievedChunk, RetrieveOptions, RetrieveResult, @@ -459,9 +460,11 @@ export { createRagRuntimeProvenance, evaluateRagAnswerContract, evaluateRagEvalContract, + evaluateRagSemanticAnswerContract, hashRetrievedChunkText, InMemoryRagCorpus, MAX_IN_MEMORY_RAG_TOP_K, + ragAnswerContractFromSemanticFact, ragMcpRetrieveProvenanceMapping, retrieveFromInMemoryCorpus, tokenizeForRetrieval, @@ -506,6 +509,8 @@ export type { ClassSemanticMemberKind, ClassSemanticOverrideFact, ClassSemanticOverrideStatus, + RagSemanticAnswerContractFact, + RagSemanticAnswerSpanFact, RagSemanticChunkingFact, RagSemanticCorpusFact, RagSemanticEmbedFact, diff --git a/packages/core/src/rag-runtime.ts b/packages/core/src/rag-runtime.ts index fa2295f1..f7c99639 100644 --- a/packages/core/src/rag-runtime.ts +++ b/packages/core/src/rag-runtime.ts @@ -1,4 +1,5 @@ import type { + RagSemanticAnswerContractFact, RagSemanticEvalAssertFact, RagSemanticEvalCaseFact, RagSemanticEvalFact, @@ -188,6 +189,10 @@ export interface RagEvalContractResult { readonly cases: readonly RagEvalCaseResult[]; } +export interface RagSemanticAnswerContractOptions { + readonly provenance?: RagRuntimeProvenance; +} + interface StoredRagChunk { readonly chunk: RagChunkInput; readonly terms: ReadonlySet; @@ -490,6 +495,38 @@ export function evaluateRagAnswerContract(contract: RagAnswerContract): RagAnswe }; } +export function ragAnswerContractFromSemanticFact( + fact: RagSemanticAnswerContractFact, + retrieval: RetrieveResult | ProvenancedRetrieveResult, + options: RagSemanticAnswerContractOptions = {}, +): RagAnswerContract { + return { + id: fact.name, + ...optionalStringValue('ragName', fact.ragName), + ...optionalStringValue('prompt', fact.prompt), + query: fact.query, + answer: fact.answer, + retrieval, + ...(options.provenance ? { provenance: options.provenance } : {}), + groundingSpans: fact.spans.map((span) => ({ + start: span.start, + end: span.end, + chunkIds: [...span.chunkIds], + ...(span.required ? { required: true } : {}), + })), + requireCitations: fact.requireCitations, + ...optionalNumberValue('minGroundingCoverage', fact.minGroundingCoverage), + }; +} + +export function evaluateRagSemanticAnswerContract( + fact: RagSemanticAnswerContractFact, + retrieval: RetrieveResult | ProvenancedRetrieveResult, + options: RagSemanticAnswerContractOptions = {}, +): RagAnswerContractResult { + return evaluateRagAnswerContract(ragAnswerContractFromSemanticFact(fact, retrieval, options)); +} + export function evaluateRagEvalContract( evaluation: RagSemanticEvalFact, retriever: RagContractRetriever, diff --git a/packages/core/src/schema.ts b/packages/core/src/schema.ts index 9e456045..4f33dedd 100644 --- a/packages/core/src/schema.ts +++ b/packages/core/src/schema.ts @@ -2491,7 +2491,7 @@ export const NODE_SCHEMAS: Record = { answer: { kind: 'string' }, citations: { kind: 'boolean' }, }, - allowedChildren: ['grounding', 'ragEval'], + allowedChildren: ['grounding', 'ragEval', 'ragAnswerContract'], }, grounding: { description: 'RAG grounding policy — declares citation and context constraints for a RAG pipeline.', @@ -2546,6 +2546,33 @@ export const NODE_SCHEMAS: Record = { }, allowedChildren: [], }, + ragAnswerContract: { + description: + 'RAG answer contract — declares the provider-free answer grounding shape evaluated by the core RAG runtime.', + example: + 'ragAnswerContract name=RefundAnswer query="How do refunds work?" answer="Refunds follow the refund policy." requireCitations=true minGroundingCoverage=0.8\n answerSpan start=0 end=34 chunks=refunds required=true', + props: { + name: { required: true, kind: 'identifier' }, + rag: { kind: 'identifier' }, + query: { required: true, kind: 'string' }, + answer: { required: true, kind: 'string' }, + prompt: { kind: 'string' }, + requireCitations: { kind: 'boolean' }, + minGroundingCoverage: { kind: 'number' }, + }, + allowedChildren: ['answerSpan'], + }, + answerSpan: { + description: 'RAG answer grounding span — maps answer text character ranges to retrieved chunk ids.', + example: 'answerSpan start=0 end=34 chunks="refunds,policy" required=true', + props: { + start: { required: true, kind: 'number' }, + end: { required: true, kind: 'number' }, + chunks: { required: true, kind: 'string' }, + required: { kind: 'boolean' }, + }, + allowedChildren: [], + }, // ── React / UI element nodes ────────────────────────────────────────── diff --git a/packages/core/src/semantic-substrate.ts b/packages/core/src/semantic-substrate.ts index fb2f0758..44892036 100644 --- a/packages/core/src/semantic-substrate.ts +++ b/packages/core/src/semantic-substrate.ts @@ -93,6 +93,7 @@ export interface KernSemanticRagAnswerReviewFact { readonly groundingCount: number; readonly evalCount: number; readonly evalCaseCount: number; + readonly answerContractCount: number; readonly mcpRetrievalCount: number; readonly compatibleMcpRetrievalCount: number; readonly provenanceRequired: boolean; @@ -294,12 +295,14 @@ function buildRagAnswerReviewFacts(facts: RagSemanticFacts): KernSemanticRagAnsw isRagAnswerCompatibleMcpRetrieval(retrieval, citationsRequired), ); const evalCaseCount = pipeline.evals.reduce((count, evaluation) => count + (evaluation.caseCount ?? 0), 0); + const answerContractCount = pipeline.answerContracts.length; const issues = ragAnswerReviewIssues( facts, pipeline.name, pipeline.retrieverName, pipeline.prompt, pipeline.answer, + answerContractCount, citationsRequired, pipeline.groundings.length, pipeline.groundings.some((grounding) => grounding.requireCitations), @@ -318,6 +321,7 @@ function buildRagAnswerReviewFacts(facts: RagSemanticFacts): KernSemanticRagAnsw groundingCount: pipeline.groundings.length, evalCount: pipeline.evals.length, evalCaseCount, + answerContractCount, mcpRetrievalCount: mcpRetrievals.length, compatibleMcpRetrievalCount: compatibleMcpRetrievals.length, provenanceRequired: citationsRequired || mcpRetrievals.some((retrieval) => retrieval.requireGrounding), @@ -334,6 +338,7 @@ function ragAnswerReviewIssues( retrieverName: string, prompt: string | undefined, answer: string | undefined, + answerContractCount: number, citationsRequired: boolean, groundingCount: number, hasCitationGrounding: boolean, @@ -345,7 +350,7 @@ function ragAnswerReviewIssues( ): string[] { const issues: string[] = []; if (unresolvedRetriever) issues.push(`unresolved-retriever:${retrieverName}`); - if (!prompt && !answer) issues.push('missing-answer-surface'); + if (!prompt && !answer && answerContractCount === 0) issues.push('missing-answer-surface'); if (citationsRequired && groundingCount === 0) issues.push('missing-grounding'); if (citationsRequired && !hasCitationGrounding) issues.push('missing-citation-grounding'); if (evalCount === 0) issues.push('missing-eval'); diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index 4a59e838..78b09946 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -197,6 +197,26 @@ export interface RagSemanticEvalAssertFact { readonly loc?: RagSemanticLocation; } +export interface RagSemanticAnswerSpanFact { + readonly start: number; + readonly end: number; + readonly chunkIds: readonly string[]; + readonly required: boolean; + readonly loc?: RagSemanticLocation; +} + +export interface RagSemanticAnswerContractFact { + readonly name: string; + readonly ragName?: string; + readonly query: string; + readonly answer: string; + readonly prompt?: string; + readonly requireCitations: boolean; + readonly minGroundingCoverage?: number; + readonly spans: readonly RagSemanticAnswerSpanFact[]; + readonly loc?: RagSemanticLocation; +} + export interface RagSemanticPipelineFact { readonly name: string; readonly retrieverName: string; @@ -205,6 +225,7 @@ export interface RagSemanticPipelineFact { readonly citations: boolean; readonly groundings: readonly RagSemanticGroundingFact[]; readonly evals: readonly RagSemanticEvalFact[]; + readonly answerContracts: readonly RagSemanticAnswerContractFact[]; readonly loc?: RagSemanticLocation; } @@ -757,6 +778,21 @@ interface RagAssertInfo { caseBound: boolean; } +interface RagAnswerContractInfo { + node: IRNode; + rootIndex: number; + name?: string; + ragName?: string; +} + +interface RagAnswerSpanInfo { + node: IRNode; + rootIndex: number; + contractName?: string; + contractNode?: IRNode; + contractBound: boolean; +} + interface RagMcpContainerInfo { node: IRNode; rootIndex: number; @@ -789,6 +825,8 @@ interface RagInfos { evals: RagEvalInfo[]; cases: RagCaseInfo[]; asserts: RagAssertInfo[]; + answerContracts: RagAnswerContractInfo[]; + answerSpans: RagAnswerSpanInfo[]; mcpRetrievals: RagMcpRetrievalInfo[]; mcpResources: RagMcpSymbolInfo[]; mcpTools: RagMcpSymbolInfo[]; @@ -812,6 +850,8 @@ function validateRagGraphRoots(roots: readonly IRNode[], violations: SemanticVio infos.evals.length === 0 && infos.cases.length === 0 && infos.asserts.length === 0 && + infos.answerContracts.length === 0 && + infos.answerSpans.length === 0 && infos.mcpRetrievals.length === 0 && infos.mcpResources.length === 0 && infos.mcpTools.length === 0 && @@ -862,6 +902,12 @@ function validateRagGraphRoots(roots: readonly IRNode[], violations: SemanticVio for (const assertion of infos.asserts) { validateRagAssert(assertion, citationRequiredRagNames, violations); } + for (const contract of infos.answerContracts) { + validateRagAnswerContract(contract, infos.answerSpans, ragByName, citationRequiredRagNames, violations); + } + for (const span of infos.answerSpans) { + validateRagAnswerSpan(span, violations); + } validateRagMcpRetrievalDuplicates(infos.mcpRetrievals, violations); for (const retrieval of infos.mcpRetrievals) { validateRagMcpRetrieval(retrieval, retrieverByName, ragByName, citationRequiredRagNames, violations); @@ -880,6 +926,8 @@ function collectRagInfosForRoots(roots: readonly IRNode[]): RagInfos { evals: [], cases: [], asserts: [], + answerContracts: [], + answerSpans: [], mcpRetrievals: [], mcpResources: [], mcpTools: [], @@ -902,6 +950,9 @@ function collectRagInfos(root: IRNode, rootIndex: number, out: RagInfos): void { nearestRagCaseBound = false, nearestRagEvalNode?: IRNode, nearestRagCaseNode?: IRNode, + nearestRagAnswerContractName?: string, + nearestRagAnswerContractNode?: IRNode, + nearestRagAnswerContractBound = false, nearestMcpContainer?: RagMcpContainerInfo, nearestMcpName?: string, ): void { @@ -918,6 +969,10 @@ function collectRagInfos(root: IRNode, rootIndex: number, out: RagInfos): void { const nextRagCaseBound = node.type === 'ragCase' || nearestRagCaseBound; const nextRagEvalNode = node.type === 'ragEval' ? node : nearestRagEvalNode; const nextRagCaseNode = node.type === 'ragCase' ? node : nearestRagCaseNode; + const nextRagAnswerContractName = + node.type === 'ragAnswerContract' ? stringProp(node, 'name') : nearestRagAnswerContractName; + const nextRagAnswerContractNode = node.type === 'ragAnswerContract' ? node : nearestRagAnswerContractNode; + const nextRagAnswerContractBound = node.type === 'ragAnswerContract' || nearestRagAnswerContractBound; const nextMcpName = node.type === 'mcp' ? stringProp(node, 'name') || '' : nearestMcpName; const nextMcpContainer = node.type === 'tool' || node.type === 'prompt' @@ -978,6 +1033,21 @@ function collectRagInfos(root: IRNode, rootIndex: number, out: RagInfos): void { evalBound: nearestRagEvalBound, caseBound: nearestRagCaseBound, }); + } else if (node.type === 'ragAnswerContract') { + out.answerContracts.push({ + node, + rootIndex, + name: stringProp(node, 'name'), + ragName: stringProp(node, 'rag') || nearestRagName, + }); + } else if (node.type === 'answerSpan') { + out.answerSpans.push({ + node, + rootIndex, + contractName: nearestRagAnswerContractName, + contractNode: nearestRagAnswerContractNode, + contractBound: nearestRagAnswerContractBound, + }); } else if (node.type === 'retrieve') { out.mcpRetrievals.push({ node, rootIndex, container: nearestMcpContainer }); } else if ( @@ -1005,6 +1075,9 @@ function collectRagInfos(root: IRNode, rootIndex: number, out: RagInfos): void { nextRagCaseBound, nextRagEvalNode, nextRagCaseNode, + nextRagAnswerContractName, + nextRagAnswerContractNode, + nextRagAnswerContractBound, nextMcpContainer, nextMcpName, ); @@ -1052,6 +1125,7 @@ function validateRagUniqueNames(infos: RagInfos, violations: SemanticViolation[] validateRagUniqueNameSet('rag', infos.pipelines, violations); validateRagUniqueEvalNames(infos.evals, violations); validateRagUniqueCaseNames(infos.cases, violations); + validateRagUniqueAnswerContractNames(infos.answerContracts, violations); } function validateRagUniqueNameSet( @@ -1134,6 +1208,28 @@ function validateRagUniqueCaseNames(cases: readonly RagCaseInfo[], violations: S } } +function validateRagUniqueAnswerContractNames( + contracts: readonly RagAnswerContractInfo[], + violations: SemanticViolation[], +): void { + const seen = new Map(); + for (const contract of contracts) { + if (!contract.name || !contract.ragName) continue; + const key = `${contract.ragName}:${contract.name}`; + const prev = seen.get(key); + if (prev) { + pushRagViolation( + violations, + 'rag-duplicate-answer-contract-name', + contract.node, + `Duplicate RAG answer contract named '${contract.name}' in rag '${contract.ragName}' — first defined at line ${prev.loc?.line ?? '?'}.`, + ); + } else { + seen.set(key, contract.node); + } + } +} + function validateRagSource( source: RagSourceInfo, mcpResourcesByName: ReadonlyMap, @@ -1668,6 +1764,170 @@ function validateRagAssert( } } +function validateRagAnswerContract( + contract: RagAnswerContractInfo, + spans: readonly RagAnswerSpanInfo[], + ragByName: ReadonlyMap, + citationRequiredRagNames: ReadonlySet, + violations: SemanticViolation[], +): void { + if (!contract.ragName) { + pushRagViolation( + violations, + 'rag-answer-contract-missing-rag', + contract.node, + 'RAG answer contract must be nested under a rag pipeline or declare rag=.', + ); + } else if (!ragByName.has(contract.ragName)) { + pushRagViolation( + violations, + 'rag-answer-contract-unknown-rag', + contract.node, + `RAG answer contract references unknown rag '${contract.ragName}'.`, + ); + } + + if (!contract.name) { + pushRagViolation( + violations, + 'rag-answer-contract-name-required', + contract.node, + 'RAG answer contract requires name=.', + ); + } + if (!stringProp(contract.node, 'query')) { + pushRagViolation( + violations, + 'rag-answer-contract-query-required', + contract.node, + 'RAG answer contract requires query=.', + ); + } + if (!stringProp(contract.node, 'answer')) { + pushRagViolation( + violations, + 'rag-answer-contract-answer-required', + contract.node, + 'RAG answer contract requires answer=.', + ); + } + + const minGroundingCoverage = numberProp(contract.node, 'minGroundingCoverage'); + if ( + invalidNumberProp(contract.node, 'minGroundingCoverage') || + (minGroundingCoverage !== undefined && (minGroundingCoverage < 0 || minGroundingCoverage > 1)) + ) { + pushRagViolation( + violations, + 'rag-answer-contract-min-grounding-coverage-invalid', + contract.node, + 'RAG answer contract minGroundingCoverage must be between 0 and 1.', + ); + } + + validateRagAnswerContractCoverage(contract, spans, minGroundingCoverage, violations); + + if ( + ragBooleanProp(contract.node, 'requireCitations') && + (!contract.ragName || !citationRequiredRagNames.has(contract.ragName)) + ) { + pushRagViolation( + violations, + 'rag-answer-contract-citations-require-grounding', + contract.node, + 'RAG answer contract requireCitations=true requires a citation-grounded rag.', + ); + } +} + +function validateRagAnswerContractCoverage( + contract: RagAnswerContractInfo, + spans: readonly RagAnswerSpanInfo[], + minGroundingCoverage: number | undefined, + violations: SemanticViolation[], +): void { + const answer = stringProp(contract.node, 'answer'); + if (!answer) return; + + const contractSpans = spans.filter((span) => span.contractNode === contract.node); + const grounded = new Array(answer.length).fill(false) as boolean[]; + for (const span of contractSpans) { + const start = numberProp(span.node, 'start'); + const end = numberProp(span.node, 'end'); + if ( + start !== undefined && + end !== undefined && + Number.isInteger(start) && + Number.isInteger(end) && + start >= 0 && + end > start && + end <= answer.length + ) { + for (let index = start; index < end; index += 1) grounded[index] = true; + } else if (end !== undefined && end > answer.length) { + pushRagViolation( + violations, + 'rag-answer-span-range-invalid', + span.node, + 'RAG answer span end must not exceed the parent answer length.', + ); + } + } + + if (minGroundingCoverage === undefined || minGroundingCoverage < 0 || minGroundingCoverage > 1) return; + const answerChars = countRagAnswerChars(answer); + const groundedChars = countRagGroundedAnswerChars(answer, grounded); + const coverage = answerChars === 0 ? 0 : groundedChars / answerChars; + if (answerChars > 0 && coverage < minGroundingCoverage) { + pushRagViolation( + violations, + 'rag-answer-contract-grounding-coverage-insufficient', + contract.node, + `RAG answer contract grounding coverage ${coverage.toFixed(3)} is below minGroundingCoverage ${minGroundingCoverage.toFixed(3)}.`, + ); + } +} + +function validateRagAnswerSpan(span: RagAnswerSpanInfo, violations: SemanticViolation[]): void { + if (!span.contractBound) { + pushRagViolation( + violations, + 'rag-answer-span-missing-contract', + span.node, + 'RAG answer span must be nested under ragAnswerContract.', + ); + } + + const start = numberProp(span.node, 'start'); + const end = numberProp(span.node, 'end'); + if ( + invalidNumberProp(span.node, 'start') || + invalidNumberProp(span.node, 'end') || + start === undefined || + end === undefined || + !Number.isInteger(start) || + !Number.isInteger(end) || + start < 0 || + end <= start + ) { + pushRagViolation( + violations, + 'rag-answer-span-range-invalid', + span.node, + 'RAG answer span start/end must be non-negative integers with start < end.', + ); + } + + if (splitRagList(stringProp(span.node, 'chunks')).length === 0) { + pushRagViolation( + violations, + 'rag-answer-span-chunks-required', + span.node, + 'RAG answer span requires chunks=.', + ); + } +} + function validateRagMcpRetrievalDuplicates( retrievals: readonly RagMcpRetrievalInfo[], violations: SemanticViolation[], @@ -1920,7 +2180,15 @@ export function collectRagSemanticFacts(root: IRNode | readonly IRNode[]): RagSe corpora: infos.corpora.map((info) => ragCorpusFact(info, infos)), retrievers: infos.retrievers.map(ragRetrieverFact), pipelines: infos.pipelines.map((info) => - ragPipelineFact(info, infos.groundings, infos.evals, infos.cases, infos.asserts), + ragPipelineFact( + info, + infos.groundings, + infos.evals, + infos.cases, + infos.asserts, + infos.answerContracts, + infos.answerSpans, + ), ), mcpRetrievals: infos.mcpRetrievals.map((info) => ragMcpRetrievalFact(info, citationRequiredRagNames)), resourceFeedsCorpora: infos.sources @@ -1950,6 +2218,7 @@ export function collectRagSemanticFacts(root: IRNode | readonly IRNode[]): RagSe [ ...infos.groundings.map((info) => info.ragName), ...infos.evals.map((info) => info.ragName), + ...infos.answerContracts.map((info) => info.ragName), ...infos.mcpRetrievals.map((info) => stringProp(info.node, 'rag')), ].filter((name): name is string => !!name && !ragNames.has(name)), ), @@ -2051,6 +2320,8 @@ function ragPipelineFact( evals: readonly RagEvalInfo[], cases: readonly RagCaseInfo[], asserts: readonly RagAssertInfo[], + answerContracts: readonly RagAnswerContractInfo[], + answerSpans: readonly RagAnswerSpanInfo[], ): RagSemanticPipelineFact { return { name: info.name, @@ -2062,6 +2333,9 @@ function ragPipelineFact( evals: evals .filter((evaluation) => evaluation.ragName === info.name) .map((evaluation) => ragEvalFact(evaluation, cases, asserts)), + answerContracts: answerContracts + .filter((contract) => contract.ragName === info.name) + .map((contract) => ragAnswerContractFact(contract, answerSpans)), ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), }; } @@ -2131,6 +2405,34 @@ function ragEvalAssertFact(info: RagAssertInfo): RagSemanticEvalAssertFact { }; } +function ragAnswerContractFact( + info: RagAnswerContractInfo, + spans: readonly RagAnswerSpanInfo[], +): RagSemanticAnswerContractFact { + const contractSpans = spans.filter((span) => span.contractNode === info.node); + return { + name: info.name ?? '', + ...optionalStringValue('ragName', info.ragName), + query: stringProp(info.node, 'query') ?? '', + answer: stringProp(info.node, 'answer') ?? '', + ...optionalStringFact(info.node, 'prompt', 'prompt'), + requireCitations: ragBooleanProp(info.node, 'requireCitations'), + ...optionalNumberFact(info.node, 'minGroundingCoverage', 'minGroundingCoverage'), + spans: contractSpans.map(ragAnswerSpanFact), + ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), + }; +} + +function ragAnswerSpanFact(info: RagAnswerSpanInfo): RagSemanticAnswerSpanFact { + return { + start: numberProp(info.node, 'start') ?? 0, + end: numberProp(info.node, 'end') ?? 0, + chunkIds: splitRagList(stringProp(info.node, 'chunks')), + required: ragBooleanProp(info.node, 'required'), + ...(info.node.loc ? { loc: ragLocation(info.node) } : {}), + }; +} + function ragAssertTarget(kind: string): RagSemanticEvalAssertFact['target'] { if (kind === 'uniqueSourcesGte' || kind === 'chunkCountEq') return 'retrieved-chunks'; if (kind === 'latencyLte') return 'latency'; @@ -2182,6 +2484,22 @@ function splitRagList(value: string | undefined): string[] { .filter((item) => item.length > 0); } +function countRagAnswerChars(answer: string): number { + let count = 0; + for (let index = 0; index < answer.length; index += 1) { + if (!/\s/u.test(answer[index] ?? '')) count += 1; + } + return count; +} + +function countRagGroundedAnswerChars(answer: string, grounded: readonly boolean[]): number { + let count = 0; + for (let index = 0; index < answer.length; index += 1) { + if (grounded[index] && !/\s/u.test(answer[index] ?? '')) count += 1; + } + return count; +} + function ragMcpRetrievalFact( info: RagMcpRetrievalInfo, citationRequiredRagNames: ReadonlySet, diff --git a/packages/core/src/spec.ts b/packages/core/src/spec.ts index 5bded53f..5bdaf48d 100644 --- a/packages/core/src/spec.ts +++ b/packages/core/src/spec.ts @@ -356,6 +356,8 @@ export const NODE_TYPES = [ 'ragEval', 'ragCase', 'ragAssert', + 'ragAnswerContract', + 'answerSpan', 'expression-v1', ] as const; diff --git a/packages/core/tests/rag-runtime.test.ts b/packages/core/tests/rag-runtime.test.ts index b0a1f6b6..1bf7ee30 100644 --- a/packages/core/tests/rag-runtime.test.ts +++ b/packages/core/tests/rag-runtime.test.ts @@ -1,12 +1,14 @@ -import type { RagSemanticEvalFact } from '../src/index.js'; +import type { RagSemanticAnswerContractFact, RagSemanticEvalFact } from '../src/index.js'; import { createInMemoryRetriever, createRagRuntimeProvenance, evaluateRagAnswerContract, evaluateRagEvalContract, + evaluateRagSemanticAnswerContract, hashRetrievedChunkText, InMemoryRagCorpus, MAX_IN_MEMORY_RAG_TOP_K, + ragAnswerContractFromSemanticFact, ragMcpRetrieveProvenanceMapping, retrieveFromInMemoryCorpus, tokenizeForRetrieval, @@ -688,6 +690,55 @@ describe('RAG answer runtime contracts', () => { expect(JSON.parse(JSON.stringify(result))).toEqual(result); }); + test('evaluates semantic answer contract facts through the runtime contract engine', () => { + const answer = 'Refunds follow the refund policy.'; + const fact: RagSemanticAnswerContractFact = { + name: 'RefundAnswer', + ragName: 'AnswerDocs', + query: 'refund policy', + answer, + prompt: './answer.md', + requireCitations: true, + minGroundingCoverage: 1, + spans: [{ start: 0, end: answer.length, chunkIds: ['refunds'], required: true }], + }; + const retrieval = { + query: 'refund policy', + chunks: [ + { + id: 'refunds', + text: 'Refunds follow the refund policy.', + score: 1, + source: 'docs/refunds.md', + citation: { uri: 'docs/refunds.md' }, + }, + ], + }; + + const contract = ragAnswerContractFromSemanticFact(fact, retrieval); + const result = evaluateRagSemanticAnswerContract(fact, retrieval); + + expect(contract).toEqual( + expect.objectContaining({ + id: 'RefundAnswer', + ragName: 'AnswerDocs', + prompt: './answer.md', + requireCitations: true, + minGroundingCoverage: 1, + }), + ); + expect(contract.groundingSpans).toEqual([{ start: 0, end: answer.length, chunkIds: ['refunds'], required: true }]); + expect(result).toEqual( + expect.objectContaining({ + id: 'RefundAnswer', + passed: true, + status: 'grounded', + citedChunkIds: ['refunds'], + sources: ['docs/refunds.md'], + }), + ); + }); + test('reports partial and ungrounded answer contract failures', () => { const retrieval = { query: 'refund policy', diff --git a/packages/core/tests/rag-semantics.test.ts b/packages/core/tests/rag-semantics.test.ts index 41e81290..b7b9c184 100644 --- a/packages/core/tests/rag-semantics.test.ts +++ b/packages/core/tests/rag-semantics.test.ts @@ -24,12 +24,28 @@ describe('RAG language semantics', () => { 'ragEval', 'ragCase', 'ragAssert', + 'ragAnswerContract', + 'answerSpan', ]) { expect(isCoreNode(type)).toBe(true); expect(generateCoreNode({ type, props: {} })).toEqual([]); } }); + test('parses RAG answer contract nodes without unknown-node diagnostics', () => { + const diagnostics = parseDocumentWithDiagnostics( + [ + 'corpus name=Docs', + 'retriever name=DocsSearch corpus=Docs', + 'rag name=AnswerDocs retriever=DocsSearch', + ' ragAnswerContract name=RefundAnswer query="q" answer="a"', + ' answerSpan start=0 end=1 chunks=refunds', + ].join('\n'), + ).diagnostics; + + expect(diagnostics.filter((diagnostic) => diagnostic.code === 'UNKNOWN_NODE_TYPE')).toEqual([]); + }); + test('accepts a minimal grounded RAG declaration graph', () => { const source = [ 'corpus name=Docs title="Support docs"', @@ -219,6 +235,73 @@ describe('RAG language semantics', () => { ]); }); + test('collects RAG answer contracts as semantic facts', () => { + const facts = collectRagSemanticFacts( + parseRoot( + [ + 'corpus name=Docs', + 'retriever name=DocsSearch corpus=Docs', + 'rag name=AnswerDocs retriever=DocsSearch citations=true', + ' grounding requireCitations=true', + ' ragAnswerContract name=RefundAnswer query="How do refunds work?" answer="Refunds follow the refund policy." prompt="./answer.md" requireCitations=true minGroundingCoverage=0.8', + ' answerSpan start=0 end=33 chunks="refunds,policy" required=true', + ].join('\n'), + ), + ); + + expect(facts.pipelines[0]?.answerContracts).toEqual([ + expect.objectContaining({ + name: 'RefundAnswer', + ragName: 'AnswerDocs', + query: 'How do refunds work?', + answer: 'Refunds follow the refund policy.', + prompt: './answer.md', + requireCitations: true, + minGroundingCoverage: 0.8, + spans: [ + expect.objectContaining({ + start: 0, + end: 33, + chunkIds: ['refunds', 'policy'], + required: true, + }), + ], + }), + ]); + }); + + test('reports invalid RAG answer contract declarations', () => { + const rules = rulesFor( + [ + 'corpus name=Docs', + 'retriever name=DocsSearch corpus=Docs', + 'rag name=AnswerDocs retriever=DocsSearch', + ' ragAnswerContract name=Bad query="" answer="" requireCitations=true minGroundingCoverage=1.5', + ' answerSpan start=4 end=4 chunks=""', + ' ragAnswerContract name=LowCoverage query="q" answer="abcd" minGroundingCoverage=1', + ' answerSpan start=0 end=2 chunks=half', + ' ragAnswerContract name=LongSpan query="q" answer="abcd"', + ' answerSpan start=0 end=10 chunks=tooLong', + 'ragAnswerContract name=Detached rag=Missing query="q" answer="a"', + 'answerSpan start=0 end=1 chunks=orphan', + ].join('\n'), + ); + + expect(rules).toEqual( + expect.arrayContaining([ + 'rag-answer-contract-query-required', + 'rag-answer-contract-answer-required', + 'rag-answer-contract-min-grounding-coverage-invalid', + 'rag-answer-contract-citations-require-grounding', + 'rag-answer-span-range-invalid', + 'rag-answer-span-chunks-required', + 'rag-answer-contract-grounding-coverage-insufficient', + 'rag-answer-contract-unknown-rag', + 'rag-answer-span-missing-contract', + ]), + ); + }); + test('keeps RAG eval case facts scoped to their parent eval node', () => { const facts = collectRagSemanticFacts( parseRoot( diff --git a/packages/core/tests/schema-validation.test.ts b/packages/core/tests/schema-validation.test.ts index 46242a26..b259a609 100644 --- a/packages/core/tests/schema-validation.test.ts +++ b/packages/core/tests/schema-validation.test.ts @@ -108,6 +108,8 @@ describe('Schema Validation', () => { ' ragEval name=Faithfulness metric=faithfulness threshold=0.85 mode=contract', ' ragCase name=refunds query="How do refunds work?"', ' ragAssert kind=scoreGte threshold=0.72', + ' ragAnswerContract name=RefundAnswer query="How do refunds work?" answer="Refunds follow policy." minGroundingCoverage=0.8', + ' answerSpan start=0 end=22 chunks=refunds required=true', ].join('\n'), ); expect(valid).toHaveLength(0); @@ -119,6 +121,8 @@ describe('Schema Validation', () => { 'embed name=NoCorpus', 'retriever name=NoCorpus', 'rag name=NoRetriever', + 'ragAnswerContract query="q"', + 'answerSpan start=0 end=1', ].join('\n'), ); expect(missing.some((violation) => violation.message.includes("'corpus' requires prop 'name'"))).toBe(true); @@ -126,6 +130,13 @@ describe('Schema Validation', () => { expect(missing.some((violation) => violation.message.includes("'embed' requires prop 'corpus'"))).toBe(true); expect(missing.some((violation) => violation.message.includes("'retriever' requires prop 'corpus'"))).toBe(true); expect(missing.some((violation) => violation.message.includes("'rag' requires prop 'retriever'"))).toBe(true); + expect(missing.some((violation) => violation.message.includes("'ragAnswerContract' requires prop 'name'"))).toBe( + true, + ); + expect( + missing.some((violation) => violation.message.includes("'ragAnswerContract' requires prop 'answer'")), + ).toBe(true); + expect(missing.some((violation) => violation.message.includes("'answerSpan' requires prop 'chunks'"))).toBe(true); const misplaced = validate( ['retriever name=DocsSearch corpus=Docs', ' grounding requireCitations=true'].join('\n'), diff --git a/packages/core/tests/semantic-substrate.test.ts b/packages/core/tests/semantic-substrate.test.ts index 82308957..4850943c 100644 --- a/packages/core/tests/semantic-substrate.test.ts +++ b/packages/core/tests/semantic-substrate.test.ts @@ -250,6 +250,8 @@ describe('KERN semantic substrate', () => { ' grounding requireCitations=true policy=strict maxContext=6000', ' ragEval name=Faithfulness metric=faithfulness threshold=0.85 mode=contract', ' ragCase name=refunds query="refund policy"', + ' ragAnswerContract name=RefundAnswer query="refund policy" answer="Refunds are policy-backed." requireCitations=true minGroundingCoverage=0.8', + ' answerSpan start=0 end=26 chunks=refunds required=true', 'mcp name=Support', ' resource name=DocsResource uri="docs://manuals"', ' tool name=answerQuestion', @@ -292,6 +294,13 @@ describe('KERN semantic substrate', () => { citations: true, groundings: [expect.objectContaining({ requireCitations: true, policy: 'strict' })], evals: [expect.objectContaining({ metric: 'faithfulness', threshold: 0.85 })], + answerContracts: [ + expect.objectContaining({ + name: 'RefundAnswer', + query: 'refund policy', + spans: [expect.objectContaining({ chunkIds: ['refunds'] })], + }), + ], }), ]); expect(substrate.ragFacts?.mcpRetrievals).toEqual([ @@ -331,6 +340,7 @@ describe('KERN semantic substrate', () => { groundingCount: 1, evalCount: 1, evalCaseCount: 1, + answerContractCount: 1, mcpRetrievalCount: 1, compatibleMcpRetrievalCount: 1, provenanceRequired: true, From 3aa88134e40d02c61bbcef3683adb9f7b9eeba4d Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 01:33:41 +0200 Subject: [PATCH 29/63] test(core): add rag contract conformance fixtures --- packages/core/src/decompiler.ts | 5 + .../rag-answer-contracts/full-grounded.kern | 14 + .../multi-span-grounded.kern | 14 + .../rag-answer-contracts/unknown-chunk.kern | 12 + packages/core/tests/rag-conformance.test.ts | 270 ++++++++++++++++++ 5 files changed, 315 insertions(+) create mode 100644 packages/core/tests/fixtures/rag-answer-contracts/full-grounded.kern create mode 100644 packages/core/tests/fixtures/rag-answer-contracts/multi-span-grounded.kern create mode 100644 packages/core/tests/fixtures/rag-answer-contracts/unknown-chunk.kern create mode 100644 packages/core/tests/rag-conformance.test.ts diff --git a/packages/core/src/decompiler.ts b/packages/core/src/decompiler.ts index 2b9efccf..05f9afe2 100644 --- a/packages/core/src/decompiler.ts +++ b/packages/core/src/decompiler.ts @@ -64,6 +64,11 @@ export function decompile(root: IRNode): DecompileResult { } const props = node.props || {}; + if (node.type === 'document') { + for (const child of node.children || []) render(child, indent); + return; + } + // Canonical-grammar cases — emit re-parseable KERN. Other node types // still fall through to the debug-shape serializer below; make them // canonical in a follow-up PR. diff --git a/packages/core/tests/fixtures/rag-answer-contracts/full-grounded.kern b/packages/core/tests/fixtures/rag-answer-contracts/full-grounded.kern new file mode 100644 index 00000000..a1a07821 --- /dev/null +++ b/packages/core/tests/fixtures/rag-answer-contracts/full-grounded.kern @@ -0,0 +1,14 @@ +corpus name=Docs + source name=manuals kind=local uri="./docs/**/*.md" media=markdown + chunking source=manuals strategy=semantic maxTokens=600 overlap=80 + +retriever name=DocsSearch corpus=Docs mode=hybrid topK=2 minScore=0.5 + +rag name=AnswerDocs retriever=DocsSearch prompt="./answer.md" citations=true + grounding requireCitations=true policy=strict maxContext=6000 + ragEval name=Faithfulness metric=faithfulness threshold=0.85 mode=contract + ragCase name=refunds query="refund policy" topK=2 minScore=0.5 chunkCount=1 sources="docs/refunds.md" + ragAssert kind=sourceGlob value="docs/refunds.md" required=true + ragAssert kind=citesRequired + ragAnswerContract name=RefundAnswer query="refund policy" answer="Refunds follow the refund policy." requireCitations=true minGroundingCoverage=1 + answerSpan start=0 end=33 chunks=refunds required=true diff --git a/packages/core/tests/fixtures/rag-answer-contracts/multi-span-grounded.kern b/packages/core/tests/fixtures/rag-answer-contracts/multi-span-grounded.kern new file mode 100644 index 00000000..7b4587ae --- /dev/null +++ b/packages/core/tests/fixtures/rag-answer-contracts/multi-span-grounded.kern @@ -0,0 +1,14 @@ +corpus name=Docs + source name=manuals kind=local uri="./docs/**/*.md" media=markdown + +retriever name=DocsSearch corpus=Docs topK=2 + +rag name=AnswerDocs retriever=DocsSearch answer="grounded" citations=true + grounding requireCitations=true policy=strict + ragEval name=Faithfulness metric=faithfulness threshold=0.85 mode=contract + ragCase name=refunds query="refund policy" chunkCount=2 sources="docs/refunds.md,docs/policies.md" + ragAssert kind=uniqueSourcesGte count=2 + ragAssert kind=citesRequired + ragAnswerContract name=MultiSpanAnswer query="refund policy" answer="Refunds follow policy. Shipping is separate." requireCitations=true minGroundingCoverage=1 + answerSpan start=0 end=22 chunks=refunds required=true + answerSpan start=23 end=44 chunks=shipping required=true diff --git a/packages/core/tests/fixtures/rag-answer-contracts/unknown-chunk.kern b/packages/core/tests/fixtures/rag-answer-contracts/unknown-chunk.kern new file mode 100644 index 00000000..8c3f9e29 --- /dev/null +++ b/packages/core/tests/fixtures/rag-answer-contracts/unknown-chunk.kern @@ -0,0 +1,12 @@ +corpus name=Docs + source name=manuals kind=local uri="./docs/**/*.md" media=markdown + +retriever name=DocsSearch corpus=Docs topK=1 + +rag name=AnswerDocs retriever=DocsSearch prompt="./answer.md" + grounding policy=strict maxContext=6000 + ragEval name=Faithfulness metric=faithfulness threshold=0.85 mode=contract + ragCase name=refunds query="refund policy" chunkCount=1 + ragAssert kind=contains value="refund" required=true + ragAnswerContract name=MissingChunkAnswer query="refund policy" answer="Refunds follow the refund policy." minGroundingCoverage=1 + answerSpan start=0 end=33 chunks=missing required=true diff --git a/packages/core/tests/rag-conformance.test.ts b/packages/core/tests/rag-conformance.test.ts new file mode 100644 index 00000000..8cf83361 --- /dev/null +++ b/packages/core/tests/rag-conformance.test.ts @@ -0,0 +1,270 @@ +import { readFileSync } from 'node:fs'; +import { dirname, resolve } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { + collectRagSemanticFacts, + decompile, + evaluateRagEvalContract, + evaluateRagSemanticAnswerContract, + generateCoreNode, + parseDocumentWithDiagnostics, + type RagSemanticFacts, + type RetrieveResult, + validateRagSemantics, + validateSchema, + withRagRuntimeProvenance, +} from '../src/index.js'; +import type { IRNode } from '../src/types.js'; + +interface RagConformanceFixture { + readonly file: string; + readonly answerContractName: string; + readonly evalContractName: string; + readonly expectedAnswerPassed: boolean; + readonly expectedAnswerStatus: string; + readonly expectedEvalPassed: boolean; + readonly retrieval: RetrieveResult; +} + +const FIXTURE_DIR = resolve(dirname(fileURLToPath(import.meta.url)), 'fixtures/rag-answer-contracts'); + +const FIXTURES: readonly RagConformanceFixture[] = [ + { + file: 'full-grounded.kern', + answerContractName: 'RefundAnswer', + evalContractName: 'Faithfulness', + expectedAnswerPassed: true, + expectedAnswerStatus: 'grounded', + expectedEvalPassed: true, + retrieval: { + query: 'refund policy', + chunks: [ + { + id: 'refunds', + text: 'Refunds follow the refund policy.', + score: 1, + source: 'docs/refunds.md', + citation: { uri: 'docs/refunds.md', locator: 'L1-L2' }, + }, + ], + }, + }, + { + file: 'multi-span-grounded.kern', + answerContractName: 'MultiSpanAnswer', + evalContractName: 'Faithfulness', + expectedAnswerPassed: true, + expectedAnswerStatus: 'grounded', + expectedEvalPassed: true, + retrieval: { + query: 'refund policy', + chunks: [ + { + id: 'refunds', + text: 'Refunds follow policy.', + score: 0.95, + source: 'docs/refunds.md', + citation: { uri: 'docs/refunds.md' }, + }, + { + id: 'shipping', + text: 'Shipping is separate.', + score: 0.9, + source: 'docs/policies.md', + citation: { uri: 'docs/policies.md' }, + }, + ], + }, + }, + { + file: 'unknown-chunk.kern', + answerContractName: 'MissingChunkAnswer', + evalContractName: 'Faithfulness', + expectedAnswerPassed: false, + expectedAnswerStatus: 'invalid', + expectedEvalPassed: true, + retrieval: { + query: 'refund policy', + chunks: [ + { + id: 'refunds', + text: 'refund policy', + score: 1, + source: 'docs/refunds.md', + citation: { uri: 'docs/refunds.md' }, + }, + ], + }, + }, +]; + +describe('RAG eval and answer contract conformance', () => { + for (const fixture of FIXTURES) { + test(`${fixture.file} agrees across semantic, decompile, codegen, and runtime views`, () => { + const source = readFixture(fixture.file); + const original = parseValidRagSource(source, fixture.file); + const decompiled = decompile(original).code; + expect(decompiled, `${fixture.file} decompiled answer contract`).toContain( + `ragAnswerContract name=${fixture.answerContractName}`, + ); + expect(decompiled, `${fixture.file} decompiled answer span`).toContain('answerSpan '); + expect(decompiled, `${fixture.file} decompiled eval contract`).toContain( + `ragEval name=${fixture.evalContractName}`, + ); + const reparsed = parseValidRagSource(decompiled, `${fixture.file}:decompiled`); + const originalFacts = collectRagSemanticFacts(original); + const decompiledFacts = collectRagSemanticFacts(reparsed); + + expect(normalizedRagFacts(decompiledFacts)).toEqual(normalizedRagFacts(originalFacts)); + expect(generateCoreNode(original)).toEqual([]); + expect(generateCoreNode(reparsed)).toEqual([]); + + const originalResult = evaluateFixtureAnswerContract(originalFacts, fixture); + const decompiledResult = evaluateFixtureAnswerContract(decompiledFacts, fixture); + expect(normalizedAnswerResult(decompiledResult)).toEqual(normalizedAnswerResult(originalResult)); + expect(originalResult.passed).toBe(fixture.expectedAnswerPassed); + expect(originalResult.status).toBe(fixture.expectedAnswerStatus); + + const originalEval = evaluateFixtureEval(originalFacts, fixture); + const decompiledEval = evaluateFixtureEval(decompiledFacts, fixture); + expect(normalizedEvalResult(decompiledEval)).toEqual(normalizedEvalResult(originalEval)); + expect(originalEval.passed).toBe(fixture.expectedEvalPassed); + }); + } +}); + +function readFixture(file: string): string { + const path = resolve(FIXTURE_DIR, file); + try { + return readFileSync(path, 'utf-8'); + } catch (error) { + throw new Error(`failed to read RAG conformance fixture ${path}`, { cause: error }); + } +} + +function parseValidRagSource(source: string, label: string): IRNode { + const parsed = parseDocumentWithDiagnostics(source); + const parseErrors = parsed.diagnostics.filter((diagnostic) => diagnostic.severity === 'error'); + expect(parseErrors, `${label} parse errors`).toEqual([]); + expect(parsed.diagnostics.filter((diagnostic) => diagnostic.code === 'UNKNOWN_NODE_TYPE')).toEqual([]); + expect(validateSchema(parsed.root), `${label} schema violations`).toEqual([]); + expect(validateRagSemantics(parsed.root), `${label} RAG semantic violations`).toEqual([]); + return parsed.root; +} + +function evaluateFixtureAnswerContract(facts: RagSemanticFacts, fixture: RagConformanceFixture) { + const answerContract = facts.pipelines + .flatMap((pipeline) => pipeline.answerContracts) + .find((contract) => contract.name === fixture.answerContractName); + if (!answerContract) throw new Error(`missing answer contract ${fixture.answerContractName}`); + const retrieval = withRagRuntimeProvenance(fixture.retrieval, { + retrieverName: 'DocsSearch', + targetKind: 'rag', + targetName: answerContract.ragName, + citationsRequired: answerContract.requireCitations, + startedAtMs: 100, + durationMs: 5, + }); + return evaluateRagSemanticAnswerContract(answerContract, retrieval); +} + +function evaluateFixtureEval(facts: RagSemanticFacts, fixture: RagConformanceFixture) { + const evaluation = facts.pipelines + .flatMap((pipeline) => pipeline.evals) + .find((contract) => contract.name === fixture.evalContractName); + if (!evaluation) throw new Error(`missing eval ${fixture.evalContractName} in ${fixture.file}`); + return evaluateRagEvalContract(evaluation, () => fixture.retrieval, { now: fixedNow() }); +} + +function fixedNow(): () => number { + let now = 1000; + return () => { + now += 5; + return now; + }; +} + +function normalizedRagFacts(facts: RagSemanticFacts) { + return { + corpora: facts.corpora.map((corpus) => ({ + name: corpus.name, + sources: corpus.sources.map((source) => source.name), + chunking: corpus.chunking.length, + })), + retrievers: facts.retrievers.map((retriever) => ({ + name: retriever.name, + corpusName: retriever.corpusName, + topK: retriever.topK, + minScore: retriever.minScore, + })), + pipelines: facts.pipelines.map((pipeline) => ({ + name: pipeline.name, + retrieverName: pipeline.retrieverName, + citations: pipeline.citations, + groundingCount: pipeline.groundings.length, + evals: pipeline.evals.map((evaluation) => ({ + name: evaluation.name, + caseCount: evaluation.caseCount, + assertCount: evaluation.assertCount, + })), + answerContracts: pipeline.answerContracts.map((contract) => ({ + name: contract.name, + ragName: contract.ragName, + query: contract.query, + answer: contract.answer, + requireCitations: contract.requireCitations, + minGroundingCoverage: contract.minGroundingCoverage, + spans: contract.spans.map((span) => ({ + start: span.start, + end: span.end, + chunkIds: [...span.chunkIds], + required: span.required, + })), + })), + })), + }; +} + +function normalizedAnswerResult(result: ReturnType) { + return { + id: result.id, + ragName: result.ragName, + query: result.query, + passed: result.passed, + status: result.status, + groundingCoverage: result.groundingCoverage, + groundedChars: result.groundedChars, + answerChars: result.answerChars, + citedChunkIds: [...result.citedChunkIds], + sources: [...result.sources], + diagnostics: result.diagnostics.map((diagnostic) => ({ + code: diagnostic.code, + spanIndex: diagnostic.spanIndex, + chunkId: diagnostic.chunkId, + })), + }; +} + +function normalizedEvalResult(result: ReturnType) { + return { + passed: result.passed, + ragName: result.ragName, + evalName: result.evalName, + caseCount: result.caseCount, + passedCaseCount: result.passedCaseCount, + assertionCount: result.assertionCount, + passedAssertionCount: result.passedAssertionCount, + cases: result.cases.map((evaluationCase) => ({ + name: evaluationCase.name, + query: evaluationCase.query, + passed: evaluationCase.passed, + retrieveOptions: evaluationCase.retrieveOptions, + chunks: evaluationCase.chunks.map((chunk) => ({ id: chunk.id, source: chunk.source })), + assertions: evaluationCase.assertions.map((assertion) => ({ + kind: assertion.kind, + passed: assertion.passed, + code: assertion.code, + })), + })), + }; +} From 7d287b7ea2b409708cd9cedfedfc83c1d43fc29b Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 02:15:30 +0200 Subject: [PATCH 30/63] feat(core): add declared shape validators --- .../core/src/core-runtime/shape-validator.ts | 633 ++++++++++++++++++ packages/core/src/index.ts | 10 + packages/core/src/semantic-substrate.ts | 4 + .../core/tests/core-shape-runtime.test.ts | 291 ++++++++ .../core/tests/semantic-substrate.test.ts | 40 ++ 5 files changed, 978 insertions(+) create mode 100644 packages/core/src/core-runtime/shape-validator.ts create mode 100644 packages/core/tests/core-shape-runtime.test.ts diff --git a/packages/core/src/core-runtime/shape-validator.ts b/packages/core/src/core-runtime/shape-validator.ts new file mode 100644 index 00000000..670df190 --- /dev/null +++ b/packages/core/src/core-runtime/shape-validator.ts @@ -0,0 +1,633 @@ +import type { IRNode } from '../types.js'; +import type { KernInstanceValue, KernValue } from './index.js'; + +export type CoreShapeDiagnosticCode = + | 'shape-extends-cycle' + | 'shape-extends-unknown' + | 'shape-field-conflict' + | 'shape-field-duplicate' + | 'shape-field-missing' + | 'shape-field-type' + | 'shape-generic-unsupported' + | 'shape-indexer-key-unsupported' + | 'shape-interface-not-found' + | 'shape-object-expected' + | 'shape-type-reference-unknown' + | 'shape-type-unsupported' + | 'shape-unexpected-field' + | 'shape-value-cycle'; + +export interface CoreShapeDiagnostic { + readonly code: CoreShapeDiagnosticCode; + readonly message: string; + readonly interfaceName?: string; + readonly fieldName?: string; + readonly path?: string; + readonly expected?: string; + readonly actual?: string; +} + +export interface CoreShapeFieldFact { + readonly name: string; + readonly type?: string; + readonly optional: boolean; + readonly inheritedFrom?: string; +} + +export interface CoreShapeIndexerFact { + readonly keyName: string; + readonly keyType: string; + readonly type: string; + readonly readonly: boolean; +} + +export interface CoreShapeInterfaceFact { + readonly name: string; + readonly extends: readonly string[]; + readonly fields: readonly CoreShapeFieldFact[]; + readonly indexers: readonly CoreShapeIndexerFact[]; + readonly generic: boolean; + readonly validatorAvailable: boolean; + readonly unsupportedReasons: readonly string[]; +} + +export interface CoreShapeFacts { + readonly interfaces: readonly CoreShapeInterfaceFact[]; + readonly extendsEdges: readonly { + readonly from: string; + readonly to: string; + readonly resolved: boolean; + }[]; + readonly validationDiagnostics: readonly CoreShapeDiagnostic[]; +} + +export interface CoreShapeValidationResult { + readonly passed: boolean; + readonly interfaceName: string; + readonly diagnostics: readonly CoreShapeDiagnostic[]; +} + +interface ShapeInterface { + readonly name: string; + readonly extendsNames: readonly string[]; + readonly fields: readonly ShapeField[]; + readonly indexers: readonly ShapeIndexer[]; + readonly generic: boolean; +} + +interface ShapeField { + readonly name: string; + readonly type?: string; + readonly optional: boolean; + readonly inheritedFrom?: string; +} + +interface ShapeIndexer { + readonly keyName: string; + readonly keyType: string; + readonly type: string; + readonly readonly: boolean; +} + +interface ShapeRegistry { + readonly interfaces: ReadonlyMap; + readonly diagnostics: readonly CoreShapeDiagnostic[]; +} + +interface ResolvedShape { + readonly fields: readonly ShapeField[]; + readonly indexers: readonly ShapeIndexer[]; + readonly diagnostics: readonly CoreShapeDiagnostic[]; +} + +/** + * Validate a runtime record or class instance against a declared interface shape. + * V1 supports primitives, arrays, nested interfaces, extends, and indexers. + * Class instances are checked against initialized fields only; getters and + * methods are not invoked during validation. + */ +export function validateCoreShape( + value: KernValue, + interfaceName: string, + rootOrNodes: IRNode | readonly IRNode[], +): CoreShapeValidationResult { + const registry = collectShapeRegistry(rootOrNodes); + const diagnostics: CoreShapeDiagnostic[] = []; + const shape = registry.interfaces.get(interfaceName); + if (!shape) { + diagnostics.push({ + code: 'shape-interface-not-found', + message: `KERN core shape '${interfaceName}' is not declared.`, + interfaceName, + }); + return { passed: false, interfaceName, diagnostics }; + } + diagnostics.push(...validateAgainstInterface(value, shape, registry, interfaceName, [], new WeakMap())); + return { passed: diagnostics.length === 0, interfaceName, diagnostics }; +} + +export function assertCoreShape( + value: KernValue, + interfaceName: string, + rootOrNodes: IRNode | readonly IRNode[], +): void { + const result = validateCoreShape(value, interfaceName, rootOrNodes); + if (result.passed) return; + throw new Error( + `KERN core shape validation failed for ${interfaceName}:\n${result.diagnostics + .map((diagnostic) => diagnostic.message) + .join('\n')}`, + ); +} + +/** + * Collect review/substrate facts for declared interface shapes without + * changing runtime behavior. The facts include effective inherited fields and + * indexers plus diagnostics for unsupported v1 contracts. + */ +export function collectCoreShapeFacts(rootOrNodes: IRNode | readonly IRNode[]): CoreShapeFacts { + const registry = collectShapeRegistry(rootOrNodes); + const resolvedByName = new Map(); + const resolvedShape = (shape: ShapeInterface): ResolvedShape => { + const cached = resolvedByName.get(shape.name); + if (cached) return cached; + const resolved = resolveShape(shape, registry, []); + resolvedByName.set(shape.name, resolved); + return resolved; + }; + const interfaces = Array.from(registry.interfaces.values()).map((shape) => { + const resolved = resolvedShape(shape); + const unsupportedReasons = shapeUnsupportedReasons(shape, resolved, registry); + return { + name: shape.name, + extends: [...shape.extendsNames], + fields: resolved.fields.map((field) => ({ ...field })), + indexers: resolved.indexers.map((indexer) => ({ ...indexer })), + generic: shape.generic, + validatorAvailable: unsupportedReasons.length === 0, + unsupportedReasons, + }; + }); + return { + interfaces, + extendsEdges: Array.from(registry.interfaces.values()).flatMap((shape) => + shape.extendsNames.map((base) => ({ + from: shape.name, + to: base, + resolved: registry.interfaces.has(base) && !extendsEdgeParticipatesInCycle(shape.name, base, registry), + })), + ), + validationDiagnostics: dedupeDiagnostics([ + ...registry.diagnostics, + ...Array.from(registry.interfaces.values()).flatMap((shape) => resolvedShape(shape).diagnostics), + ]), + }; +} + +function collectShapeRegistry(rootOrNodes: IRNode | readonly IRNode[]): ShapeRegistry { + const diagnostics: CoreShapeDiagnostic[] = []; + const interfaces = new Map(); + for (const node of interfaceNodes(rootOrNodes)) { + if (node.type !== 'interface') continue; + const name = stringProp(node.props?.name); + if (!name) continue; + const shape: ShapeInterface = { + name, + extendsNames: splitExtends(node.props?.extends), + fields: (node.children ?? []).filter((child) => child.type === 'field').map((field) => shapeField(field)), + indexers: (node.children ?? []) + .filter((child) => child.type === 'indexer') + .map((indexer) => shapeIndexer(indexer)), + generic: !!stringProp(node.props?.generics), + }; + if (shape.generic) { + diagnostics.push({ + code: 'shape-generic-unsupported', + message: `KERN core shape '${shape.name}' uses generics, which are not executable shape contracts in v1.`, + interfaceName: shape.name, + }); + } + interfaces.set(name, shape); + } + return { interfaces, diagnostics }; +} + +function validateAgainstInterface( + value: KernValue, + shape: ShapeInterface, + registry: ShapeRegistry, + path: string, + stack: readonly string[], + visited: WeakMap, Set>, +): CoreShapeDiagnostic[] { + const diagnostics: CoreShapeDiagnostic[] = []; + if (shape.generic) { + diagnostics.push({ + code: 'shape-generic-unsupported', + message: `KERN core shape '${shape.name}' uses generics, which are not executable shape contracts in v1.`, + interfaceName: shape.name, + path, + }); + } + const object = recordEntries(value); + if (!object) { + return [ + { + code: 'shape-object-expected', + message: `KERN core shape '${shape.name}' expected a record or instance at ${path}.`, + interfaceName: shape.name, + path, + expected: shape.name, + actual: value.kind, + }, + ]; + } + const activeForValue = visited.get(object) ?? new Set(); + if (activeForValue.has(shape.name)) { + return [ + { + code: 'shape-value-cycle', + message: `KERN core shape '${shape.name}' encountered a recursive value at ${path}.`, + interfaceName: shape.name, + path, + }, + ]; + } + activeForValue.add(shape.name); + visited.set(object, activeForValue); + const resolved = resolveShape(shape, registry, stack); + diagnostics.push(...resolved.diagnostics); + + try { + const declaredFieldNames = new Set(resolved.fields.map((field) => field.name)); + for (const field of resolved.fields) { + if (!Object.hasOwn(object, field.name)) { + if (!field.optional) { + diagnostics.push({ + code: 'shape-field-missing', + message: `KERN core shape '${shape.name}' missing required field ${fieldPath(path, field.name)}.`, + interfaceName: shape.name, + fieldName: field.name, + path: fieldPath(path, field.name), + expected: field.type, + }); + } + continue; + } + diagnostics.push( + ...validateType( + object[field.name] ?? kUndefinedValue(), + field.type, + registry, + fieldPath(path, field.name), + stack, + visited, + ), + ); + } + + for (const [key, entry] of Object.entries(object)) { + if (declaredFieldNames.has(key)) continue; + const matchingIndexers = resolved.indexers.filter((candidate) => keyMatchesIndexer(key, candidate)); + if (matchingIndexers.length === 0) { + diagnostics.push({ + code: 'shape-unexpected-field', + message: `KERN core shape '${shape.name}' does not declare field ${fieldPath(path, key)}.`, + interfaceName: shape.name, + fieldName: key, + path: fieldPath(path, key), + }); + continue; + } + for (const indexer of matchingIndexers) { + diagnostics.push(...validateType(entry, indexer.type, registry, fieldPath(path, key), stack, visited)); + } + } + } finally { + activeForValue.delete(shape.name); + if (activeForValue.size === 0) visited.delete(object); + } + + return diagnostics; +} + +function validateType( + value: KernValue, + rawType: string | undefined, + registry: ShapeRegistry, + path: string, + stack: readonly string[], + visited: WeakMap, Set>, +): CoreShapeDiagnostic[] { + const type = normalizeType(rawType); + if (!type || type === 'any' || type === 'unknown') return []; + if (type.endsWith('[]')) return validateArrayType(value, type.slice(0, -2), registry, path, stack, visited); + const arrayMatch = /^Array<(.+)>$/.exec(type); + if (arrayMatch) return validateArrayType(value, arrayMatch[1] ?? '', registry, path, stack, visited); + if (isPrimitiveType(type)) { + if (value.kind === type) return []; + return [ + { + code: 'shape-field-type', + message: `KERN core shape expected ${path} to be ${type}, got ${value.kind}.`, + path, + expected: type, + actual: value.kind, + }, + ]; + } + if (isSimpleIdentifier(type)) { + const nested = registry.interfaces.get(type); + if (!nested) { + return [ + { + code: 'shape-type-reference-unknown', + message: `KERN core shape field ${path} references unknown interface '${type}'.`, + path, + expected: type, + }, + ]; + } + return validateAgainstInterface(value, nested, registry, path, stack, visited); + } + return [ + { + code: 'shape-type-unsupported', + message: `KERN core shape field ${path} uses unsupported v1 type '${type}'.`, + path, + expected: type, + actual: value.kind, + }, + ]; +} + +function validateArrayType( + value: KernValue, + itemType: string, + registry: ShapeRegistry, + path: string, + stack: readonly string[], + visited: WeakMap, Set>, +): CoreShapeDiagnostic[] { + if (value.kind !== 'array') { + return [ + { + code: 'shape-field-type', + message: `KERN core shape expected ${path} to be array, got ${value.kind}.`, + path, + expected: `${itemType}[]`, + actual: value.kind, + }, + ]; + } + return value.items.flatMap((item, index) => + validateType(item, itemType, registry, `${path}[${index}]`, stack, visited), + ); +} + +function resolveShape(shape: ShapeInterface, registry: ShapeRegistry, stack: readonly string[]): ResolvedShape { + const diagnostics: CoreShapeDiagnostic[] = []; + if (stack.includes(shape.name)) { + return { + fields: [], + indexers: [], + diagnostics: [ + { + code: 'shape-extends-cycle', + message: `KERN core shape inheritance cycle: ${[...stack, shape.name].join(' -> ')}.`, + interfaceName: shape.name, + }, + ], + }; + } + const fields = new Map(); + const indexers: ShapeIndexer[] = []; + for (const baseName of shape.extendsNames) { + const base = registry.interfaces.get(baseName); + if (!base) { + diagnostics.push({ + code: 'shape-extends-unknown', + message: `KERN core shape '${shape.name}' extends unknown interface '${baseName}'.`, + interfaceName: shape.name, + expected: baseName, + }); + continue; + } + const resolved = resolveShape(base, registry, [...stack, shape.name]); + diagnostics.push(...resolved.diagnostics); + for (const field of resolved.fields) { + const inheritedField = { ...field, inheritedFrom: field.inheritedFrom ?? base.name }; + const existing = fields.get(field.name); + if (existing && !sameShapeField(existing, inheritedField)) { + diagnostics.push({ + code: 'shape-field-conflict', + message: `KERN core shape '${shape.name}' has conflicting inherited field '${field.name}'.`, + interfaceName: shape.name, + fieldName: field.name, + expected: existing.type, + actual: field.type, + }); + continue; + } + fields.set(field.name, inheritedField); + } + indexers.push(...resolved.indexers); + } + const ownFieldNames = new Set(); + for (const field of shape.fields) { + if (ownFieldNames.has(field.name)) { + diagnostics.push({ + code: 'shape-field-duplicate', + message: `KERN core shape '${shape.name}' declares duplicate field '${field.name}'.`, + interfaceName: shape.name, + fieldName: field.name, + expected: field.type, + actual: field.type, + }); + continue; + } + ownFieldNames.add(field.name); + const existing = fields.get(field.name); + if (existing && (existing.type !== field.type || existing.optional !== field.optional)) { + diagnostics.push({ + code: 'shape-field-conflict', + message: `KERN core shape '${shape.name}' conflicts with inherited field '${field.name}'.`, + interfaceName: shape.name, + fieldName: field.name, + expected: existing.type, + actual: field.type, + }); + } + fields.set(field.name, field); + } + indexers.push(...shape.indexers); + diagnostics.push(...indexers.flatMap((indexer) => validateIndexerShape(shape.name, indexer))); + return { fields: Array.from(fields.values()), indexers, diagnostics }; +} + +function validateIndexerShape(interfaceName: string, indexer: ShapeIndexer): CoreShapeDiagnostic[] { + if (indexer.keyType === 'string' || indexer.keyType === 'number') return []; + return [ + { + code: 'shape-indexer-key-unsupported', + message: `KERN core shape '${interfaceName}' indexer key type '${indexer.keyType}' is not supported in v1.`, + interfaceName, + expected: 'string|number', + actual: indexer.keyType, + }, + ]; +} + +function shapeUnsupportedReasons( + shape: ShapeInterface, + resolved: ResolvedShape, + registry: ShapeRegistry, +): readonly string[] { + const reasons = new Set(); + if (shape.generic) reasons.add('generic-interface'); + for (const diagnostic of resolved.diagnostics) reasons.add(diagnostic.code); + for (const field of resolved.fields) { + for (const issue of unsupportedTypeReasons(field.type, registry)) reasons.add(issue); + } + for (const indexer of resolved.indexers) { + if (indexer.keyType !== 'string' && indexer.keyType !== 'number') reasons.add('shape-indexer-key-unsupported'); + for (const issue of unsupportedTypeReasons(indexer.type, registry)) reasons.add(issue); + } + return [...reasons].sort(); +} + +function unsupportedTypeReasons(rawType: string | undefined, registry: ShapeRegistry): string[] { + const type = normalizeType(rawType); + if (!type || type === 'any' || type === 'unknown' || isPrimitiveType(type)) return []; + if (type.endsWith('[]')) return unsupportedTypeReasons(type.slice(0, -2), registry); + const arrayMatch = /^Array<(.+)>$/.exec(type); + if (arrayMatch) return unsupportedTypeReasons(arrayMatch[1], registry); + if (isSimpleIdentifier(type)) return registry.interfaces.has(type) ? [] : [`unknown-type:${type}`]; + return [`unsupported-type:${type}`]; +} + +function interfaceNodes(rootOrNodes: IRNode | readonly IRNode[]): readonly IRNode[] { + const found: IRNode[] = []; + for (const node of topLevelNodes(rootOrNodes)) visitInterfaceNodes(node, found); + return found; +} + +function topLevelNodes(rootOrNodes: IRNode | readonly IRNode[]): readonly IRNode[] { + return isIRNodeArray(rootOrNodes) ? rootOrNodes : [rootOrNodes]; +} + +function visitInterfaceNodes(node: IRNode, found: IRNode[]): void { + if (node.type === 'interface') found.push(node); + for (const child of node.children ?? []) visitInterfaceNodes(child, found); +} + +function isIRNodeArray(value: IRNode | readonly IRNode[]): value is readonly IRNode[] { + return Array.isArray(value); +} + +function shapeField(node: IRNode): ShapeField { + return { + name: stringProp(node.props?.name) ?? '', + type: stringProp(node.props?.type), + optional: trueFlag(node.props?.optional), + }; +} + +function shapeIndexer(node: IRNode): ShapeIndexer { + return { + keyName: stringProp(node.props?.keyName) ?? 'key', + keyType: normalizeType(stringProp(node.props?.keyType)) ?? '', + type: normalizeType(stringProp(node.props?.type)) ?? '', + readonly: trueFlag(node.props?.readonly), + }; +} + +function recordEntries(value: KernValue): Record | undefined { + if (value.kind === 'record') return value.entries; + if (value.kind === 'instance') return instanceEntries(value); + return undefined; +} + +function instanceEntries(value: KernInstanceValue): Record { + return value.fields; +} + +function sameShapeField(left: ShapeField, right: ShapeField): boolean { + return left.type === right.type && left.optional === right.optional; +} + +function keyMatchesIndexer(key: string, indexer: ShapeIndexer): boolean { + if (indexer.keyType === 'string') return true; + return indexer.keyType === 'number' && /^-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?$/.test(key); +} + +function fieldPath(path: string, field: string): string { + return `${path}.${field}`; +} + +function isPrimitiveType(type: string): type is KernValue['kind'] { + return type === 'string' || type === 'number' || type === 'boolean' || type === 'null' || type === 'undefined'; +} + +function isSimpleIdentifier(type: string): boolean { + return /^[A-Za-z_$][\w$]*$/.test(type); +} + +function normalizeType(value: string | undefined): string | undefined { + const trimmed = value?.trim(); + return trimmed ? trimmed : undefined; +} + +function splitExtends(value: unknown): string[] { + const raw = stringProp(value); + if (!raw) return []; + return raw + .split(',') + .map((part) => part.trim()) + .filter(Boolean); +} + +function extendsEdgeParticipatesInCycle(from: string, to: string, registry: ShapeRegistry): boolean { + return reachesInterface(to, from, registry, new Set()); +} + +function reachesInterface(current: string, target: string, registry: ShapeRegistry, seen: Set): boolean { + if (current === target) return true; + if (seen.has(current)) return false; + seen.add(current); + const shape = registry.interfaces.get(current); + if (!shape) return false; + return shape.extendsNames.some((base) => reachesInterface(base, target, registry, seen)); +} + +function dedupeDiagnostics(diagnostics: readonly CoreShapeDiagnostic[]): CoreShapeDiagnostic[] { + const seen = new Set(); + const unique: CoreShapeDiagnostic[] = []; + for (const diagnostic of diagnostics) { + const key = [ + diagnostic.code, + diagnostic.interfaceName, + diagnostic.fieldName, + diagnostic.path, + diagnostic.expected, + diagnostic.actual, + diagnostic.message, + ].join('\0'); + if (seen.has(key)) continue; + seen.add(key); + unique.push(diagnostic); + } + return unique; +} + +function stringProp(value: unknown): string | undefined { + return typeof value === 'string' ? value : undefined; +} + +function trueFlag(value: unknown): boolean { + return value === true || value === 'true'; +} + +function kUndefinedValue(): KernValue { + return { kind: 'undefined' }; +} diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 177e4d7d..c200561e 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -205,6 +205,16 @@ export { runCoreRuntime, toHostValue, } from './core-runtime/index.js'; +export type { + CoreShapeDiagnostic, + CoreShapeDiagnosticCode, + CoreShapeFacts, + CoreShapeFieldFact, + CoreShapeIndexerFact, + CoreShapeInterfaceFact, + CoreShapeValidationResult, +} from './core-runtime/shape-validator.js'; +export { assertCoreShape, collectCoreShapeFacts, validateCoreShape } from './core-runtime/shape-validator.js'; export type { CoverageGap } from './coverage-gap.js'; // Coverage gap emitter (v3) export { collectCoverageGaps, readCoverageGaps, writeCoverageGaps } from './coverage-gap.js'; diff --git a/packages/core/src/semantic-substrate.ts b/packages/core/src/semantic-substrate.ts index 44892036..dd0a9e6f 100644 --- a/packages/core/src/semantic-substrate.ts +++ b/packages/core/src/semantic-substrate.ts @@ -7,6 +7,7 @@ import { type PortableLogicTarget, } from './codegen/portable-logic-primitives.js'; import { CORE_TYPE_CONTRACTS, type CoreOperationReturns, contractToGraphEdges } from './core-contracts/index.js'; +import { type CoreShapeFacts, collectCoreShapeFacts } from './core-runtime/shape-validator.js'; import type { NodeContract } from './ir/semantics/index.js'; import { snapshotRegistry } from './ir/semantics/index.js'; import { @@ -122,6 +123,7 @@ export interface KernSemanticSubstrate { readonly ragFacts?: RagSemanticFacts; readonly ragValidationSummary?: KernSemanticValidationSummary; readonly ragAnswerReviewFacts?: readonly KernSemanticRagAnswerReviewFact[]; + readonly coreShapeFacts?: CoreShapeFacts; } export interface BuildKernSemanticSubstrateOptions { @@ -131,6 +133,7 @@ export interface BuildKernSemanticSubstrateOptions { readonly includeClassValidationSummary?: boolean; readonly documentRag?: IRNode | readonly IRNode[]; readonly includeRagValidationSummary?: boolean; + readonly documentShapes?: IRNode | readonly IRNode[]; } export function buildKernSemanticSubstrate(options: BuildKernSemanticSubstrateOptions = {}): KernSemanticSubstrate { @@ -188,6 +191,7 @@ export function buildKernSemanticSubstrate(options: BuildKernSemanticSubstrateOp ...(options.documentRag && options.includeRagValidationSummary ? { ragValidationSummary: ragValidationSummary(options.documentRag) } : {}), + ...(options.documentShapes ? { coreShapeFacts: collectCoreShapeFacts(options.documentShapes) } : {}), }; } diff --git a/packages/core/tests/core-shape-runtime.test.ts b/packages/core/tests/core-shape-runtime.test.ts new file mode 100644 index 00000000..1d2db4ac --- /dev/null +++ b/packages/core/tests/core-shape-runtime.test.ts @@ -0,0 +1,291 @@ +import { + assertCoreShape, + collectCoreShapeFacts, + createCoreRuntimeEnv, + evalCoreExpression, + fromHostValue, + type KernValue, + runCoreRuntime, + toHostValue, + validateCoreShape, +} from '../src/index.js'; +import { parse } from '../src/parser.js'; +import type { IRNode } from '../src/types.js'; + +function codes(result: ReturnType): string[] { + return result.diagnostics.map((diagnostic) => diagnostic.code); +} + +function classNodes(root: IRNode): IRNode[] { + return (root.children ?? []).filter((child) => child.type === 'class'); +} + +function cyclicRecord(): KernValue { + const entries = Object.create(null) as Record; + const value = { kind: 'record' as const, entries }; + entries.id = fromHostValue('n1'); + entries.next = value; + return value; +} + +describe('KERN core declared shape validators', () => { + test('validates required, optional, array, nested, and inherited fields', () => { + const root = parse( + [ + 'interface name=Entity', + ' field name=id type=string', + 'interface name=Profile', + ' field name=age type=number optional=true', + 'interface name=User extends=Entity', + ' field name=name type=string', + ' field name=active type=boolean', + ' field name=tags type="string[]"', + ' field name=profile type=Profile optional=true', + ].join('\n'), + ); + + const result = validateCoreShape( + fromHostValue({ id: 'u1', name: 'Ada', active: true, tags: ['admin'], profile: {} }), + 'User', + root, + ); + + expect(result).toEqual({ passed: true, interfaceName: 'User', diagnostics: [] }); + expect(() => + assertCoreShape(fromHostValue({ id: 'u1', name: 'Ada', active: true, tags: [] }), 'User', root), + ).not.toThrow(); + }); + + test('reports missing required and wrong primitive fields with stable paths', () => { + const root = parse( + ['interface name=User', ' field name=id type=string', ' field name=count type=number'].join('\n'), + ); + + const result = validateCoreShape(fromHostValue({ id: 7 }), 'User', root); + + expect(result.passed).toBe(false); + expect(result.diagnostics).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + code: 'shape-field-type', + path: 'User.id', + expected: 'string', + actual: 'number', + }), + expect.objectContaining({ + code: 'shape-field-missing', + path: 'User.count', + expected: 'number', + }), + ]), + ); + }); + + test('keeps runtime records open unless explicit shape validation is requested', () => { + const runtimeResult = runCoreRuntime({ + type: 'handler', + props: { lang: 'kern' }, + children: [ + { type: 'let', props: { name: 'record', value: '{ id: "u1" }' } }, + { type: 'assign', props: { target: 'record.extra', value: '2' } }, + { type: 'return', props: { value: 'record.extra' } }, + ], + }); + expect(toHostValue(runtimeResult.completion.value)).toBe(2); + + const root = parse('interface name=User\n field name=id type=string'); + const explicit = validateCoreShape(fromHostValue({ id: 'u1', extra: 2 }), 'User', root); + expect(codes(explicit)).toContain('shape-unexpected-field'); + }); + + test('allows explicit extra fields only through compatible indexers', () => { + const root = parse( + ['interface name=Scores', ' field name=id type=string', ' indexer keyType=string type=number'].join('\n'), + ); + + expect(validateCoreShape(fromHostValue({ id: 'u1', math: 10 }), 'Scores', root).passed).toBe(true); + + const result = validateCoreShape(fromHostValue({ id: 'u1', math: 'A' }), 'Scores', root); + expect(result.passed).toBe(false); + expect(result.diagnostics).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + code: 'shape-field-type', + path: 'Scores.math', + expected: 'number', + actual: 'string', + }), + ]), + ); + }); + + test('matches number indexers against numeric record keys', () => { + const root = parse('interface name=NumericMap\n indexer keyType=number type=string'); + + expect( + validateCoreShape(fromHostValue({ '-1': 'left', '1.5': 'half', '2e3': 'large' }), 'NumericMap', root).passed, + ).toBe(true); + expect(codes(validateCoreShape(fromHostValue({ label: 'not numeric' }), 'NumericMap', root))).toContain( + 'shape-unexpected-field', + ); + }); + + test('validates numeric keys against both string and number indexers', () => { + const root = parse( + ['interface name=DualMap', ' indexer keyType=string type=unknown', ' indexer keyType=number type=number'].join( + '\n', + ), + ); + + expect(validateCoreShape(fromHostValue({ label: 'free-form', 1: 7 }), 'DualMap', root).passed).toBe(true); + expect(codes(validateCoreShape(fromHostValue({ 1: 'bad' }), 'DualMap', root))).toContain('shape-field-type'); + }); + + test('reports inherited field conflicts and unknown type references', () => { + const root = parse( + [ + 'interface name=Entity', + ' field name=id type=string', + 'interface name=User extends=Entity', + ' field name=id type=number', + ' field name=profile type=MissingProfile', + ].join('\n'), + ); + + const result = validateCoreShape(fromHostValue({ id: 1, profile: {} }), 'User', root); + + expect(result.passed).toBe(false); + expect(codes(result)).toEqual(expect.arrayContaining(['shape-field-conflict', 'shape-type-reference-unknown'])); + }); + + test('reports recursive values instead of recursing through self-referential shape fields', () => { + const root = parse( + ['interface name=Node', ' field name=id type=string', ' field name=next type=Node optional=true'].join('\n'), + ); + + const result = validateCoreShape(cyclicRecord(), 'Node', root); + + expect(result.passed).toBe(false); + expect(result.diagnostics).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + code: 'shape-value-cycle', + path: 'Node.next', + interfaceName: 'Node', + }), + ]), + ); + }); + + test('reports unsupported generic and complex type contracts instead of silently passing', () => { + const root = parse( + [ + 'interface name=Box generics=""', + ' field name=value type=T', + 'interface name=MaybeName', + ' field name=name type="string | null"', + ].join('\n'), + ); + + expect(codes(validateCoreShape(fromHostValue({ value: 'x' }), 'Box', root))).toContain('shape-generic-unsupported'); + const maybeNameCodes = codes(validateCoreShape(fromHostValue({ name: 'Ada' }), 'MaybeName', root)); + expect(maybeNameCodes).toContain('shape-type-unsupported'); + expect(maybeNameCodes).not.toContain('shape-generic-unsupported'); + }); + + test('accepts field-backed class instances through declared shape validation', () => { + const root = parse( + [ + 'interface name=UserLike', + ' field name=id type=string', + 'class name=User', + ' field name=id type=string value="u1"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(classNodes(root), env); + + expect(validateCoreShape(evalCoreExpression('new User()', env), 'UserLike', root).passed).toBe(true); + }); + + test('exports shape facts for review and guard consumers', () => { + const root = parse( + [ + 'interface name=Entity', + ' field name=id type=string', + 'interface name=User extends=Entity', + ' field name=name type=string optional=true', + ' indexer keyType=string type=unknown', + ].join('\n'), + ); + + expect(collectCoreShapeFacts(root)).toEqual( + expect.objectContaining({ + extendsEdges: [{ from: 'User', to: 'Entity', resolved: true }], + validationDiagnostics: [], + interfaces: expect.arrayContaining([ + expect.objectContaining({ + name: 'User', + extends: ['Entity'], + validatorAvailable: true, + fields: expect.arrayContaining([ + expect.objectContaining({ name: 'id', type: 'string', inheritedFrom: 'Entity' }), + expect.objectContaining({ name: 'name', type: 'string', optional: true }), + ]), + }), + ]), + }), + ); + }); + + test('collects nested interface declarations and invalid graph diagnostics into facts', () => { + const root = parse( + [ + 'module name=Domain', + ' interface name=Nested', + ' field name=id type=string', + 'interface name=Left', + ' field name=id type=string', + 'interface name=Right', + ' field name=id type=number', + 'interface name=Joined extends=Left,Right', + 'interface name=Broken extends=Missing', + ' indexer keyType=symbol type=string', + 'interface name=Indexed', + ' indexer keyType=string type=unknown', + 'interface name=IndexedChild extends=Indexed', + 'interface name=Duplicate', + ' field name=id type=string', + ' field name=id type=string', + ].join('\n'), + ); + + const facts = collectCoreShapeFacts(root); + + expect(facts.interfaces.map((shape) => shape.name)).toEqual( + expect.arrayContaining(['Nested', 'Left', 'Right', 'Joined', 'Broken', 'Indexed', 'IndexedChild', 'Duplicate']), + ); + expect(facts.extendsEdges).toEqual( + expect.arrayContaining([ + { from: 'Joined', to: 'Left', resolved: true }, + { from: 'Joined', to: 'Right', resolved: true }, + { from: 'Broken', to: 'Missing', resolved: false }, + { from: 'IndexedChild', to: 'Indexed', resolved: true }, + ]), + ); + expect(facts.validationDiagnostics).toEqual( + expect.arrayContaining([ + expect.objectContaining({ code: 'shape-field-conflict', interfaceName: 'Joined' }), + expect.objectContaining({ code: 'shape-extends-unknown', interfaceName: 'Broken' }), + expect.objectContaining({ code: 'shape-indexer-key-unsupported', interfaceName: 'Broken' }), + expect.objectContaining({ code: 'shape-field-duplicate', interfaceName: 'Duplicate' }), + ]), + ); + expect(facts.interfaces.find((shape) => shape.name === 'Broken')?.unsupportedReasons).toEqual( + expect.arrayContaining(['shape-extends-unknown', 'shape-indexer-key-unsupported']), + ); + expect(facts.interfaces.find((shape) => shape.name === 'IndexedChild')?.indexers).toEqual([ + expect.objectContaining({ keyType: 'string', type: 'unknown' }), + ]); + }); +}); diff --git a/packages/core/tests/semantic-substrate.test.ts b/packages/core/tests/semantic-substrate.test.ts index 4850943c..c35929ff 100644 --- a/packages/core/tests/semantic-substrate.test.ts +++ b/packages/core/tests/semantic-substrate.test.ts @@ -66,6 +66,7 @@ describe('KERN semantic substrate', () => { expect(Object.hasOwn(substrate, 'ragFacts')).toBe(false); expect(Object.hasOwn(substrate, 'ragValidationSummary')).toBe(false); expect(Object.hasOwn(substrate, 'ragAnswerReviewFacts')).toBe(false); + expect(Object.hasOwn(substrate, 'coreShapeFacts')).toBe(false); }); test('exports document class member inheritance and override facts when requested', () => { @@ -365,6 +366,45 @@ describe('KERN semantic substrate', () => { ]); }); + test('exports declared interface shape facts when requested', () => { + const root = parseRoot( + [ + 'interface name=Entity', + ' field name=id type=string', + 'interface name=User extends=Entity', + ' field name=name type=string optional=true', + ' indexer keyType=string type=unknown', + 'interface name=Box generics=""', + ' field name=value type=T', + ].join('\n'), + ); + + const substrate = buildKernSemanticSubstrate({ documentShapes: root }); + + expect(substrate.coreShapeFacts?.extendsEdges).toEqual([{ from: 'User', to: 'Entity', resolved: true }]); + expect(substrate.coreShapeFacts?.interfaces).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + name: 'User', + extends: ['Entity'], + generic: false, + validatorAvailable: true, + fields: expect.arrayContaining([ + expect.objectContaining({ name: 'id', type: 'string', inheritedFrom: 'Entity' }), + expect.objectContaining({ name: 'name', type: 'string', optional: true }), + ]), + indexers: [expect.objectContaining({ keyType: 'string', type: 'unknown' })], + }), + expect.objectContaining({ + name: 'Box', + generic: true, + validatorAvailable: false, + unsupportedReasons: expect.arrayContaining(['generic-interface', 'unknown-type:T']), + }), + ]), + ); + }); + test('exports portable review primitives as stable query objects', () => { const substrate = buildKernSemanticSubstrate(); const clamp = lookupSemanticPrimitive(substrate, 'number.clamp'); From 2a2059a8ad7e5861b50f42ff770cea3f7680da83 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 02:43:37 +0200 Subject: [PATCH 31/63] feat(core): harden declared shape validators --- .../core/src/core-runtime/shape-validator.ts | 103 +++++++++++++++--- .../core/tests/core-shape-runtime.test.ts | 76 ++++++++++++- 2 files changed, 158 insertions(+), 21 deletions(-) diff --git a/packages/core/src/core-runtime/shape-validator.ts b/packages/core/src/core-runtime/shape-validator.ts index 670df190..2d5618e8 100644 --- a/packages/core/src/core-runtime/shape-validator.ts +++ b/packages/core/src/core-runtime/shape-validator.ts @@ -10,6 +10,7 @@ export type CoreShapeDiagnosticCode = | 'shape-field-type' | 'shape-generic-unsupported' | 'shape-indexer-key-unsupported' + | 'shape-interface-duplicate' | 'shape-interface-not-found' | 'shape-object-expected' | 'shape-type-reference-unknown' @@ -114,6 +115,7 @@ export function validateCoreShape( const registry = collectShapeRegistry(rootOrNodes); const diagnostics: CoreShapeDiagnostic[] = []; const shape = registry.interfaces.get(interfaceName); + diagnostics.push(...interfaceDuplicateDiagnostics(registry, interfaceName)); if (!shape) { diagnostics.push({ code: 'shape-interface-not-found', @@ -123,7 +125,8 @@ export function validateCoreShape( return { passed: false, interfaceName, diagnostics }; } diagnostics.push(...validateAgainstInterface(value, shape, registry, interfaceName, [], new WeakMap())); - return { passed: diagnostics.length === 0, interfaceName, diagnostics }; + const uniqueDiagnostics = dedupeDiagnostics(diagnostics); + return { passed: uniqueDiagnostics.length === 0, interfaceName, diagnostics: uniqueDiagnostics }; } export function assertCoreShape( @@ -191,6 +194,13 @@ function collectShapeRegistry(rootOrNodes: IRNode | readonly IRNode[]): ShapeReg if (node.type !== 'interface') continue; const name = stringProp(node.props?.name); if (!name) continue; + if (interfaces.has(name)) { + diagnostics.push({ + code: 'shape-interface-duplicate', + message: `KERN core shape '${name}' is declared more than once; the last declaration is used.`, + interfaceName: name, + }); + } const shape: ShapeInterface = { name, extendsNames: splitExtends(node.props?.extends), @@ -274,16 +284,9 @@ function validateAgainstInterface( } continue; } - diagnostics.push( - ...validateType( - object[field.name] ?? kUndefinedValue(), - field.type, - registry, - fieldPath(path, field.name), - stack, - visited, - ), - ); + const value = object[field.name] ?? kUndefinedValue(); + if (field.optional && value.kind === 'undefined') continue; + diagnostics.push(...validateType(value, field.type, registry, fieldPath(path, field.name), stack, visited)); } for (const [key, entry] of Object.entries(object)) { @@ -348,7 +351,10 @@ function validateType( }, ]; } - return validateAgainstInterface(value, nested, registry, path, stack, visited); + return [ + ...interfaceDuplicateDiagnostics(registry, type), + ...validateAgainstInterface(value, nested, registry, path, stack, visited), + ]; } return [ { @@ -413,6 +419,11 @@ function resolveShape(shape: ShapeInterface, registry: ShapeRegistry, stack: rea }); continue; } + diagnostics.push( + ...registry.diagnostics.filter( + (diagnostic) => diagnostic.code === 'shape-interface-duplicate' && diagnostic.interfaceName === baseName, + ), + ); const resolved = resolveShape(base, registry, [...stack, shape.name]); diagnostics.push(...resolved.diagnostics); for (const field of resolved.fields) { @@ -485,6 +496,7 @@ function shapeUnsupportedReasons( ): readonly string[] { const reasons = new Set(); if (shape.generic) reasons.add('generic-interface'); + for (const diagnostic of interfaceDuplicateDiagnostics(registry, shape.name)) reasons.add(diagnostic.code); for (const diagnostic of resolved.diagnostics) reasons.add(diagnostic.code); for (const field of resolved.fields) { for (const issue of unsupportedTypeReasons(field.type, registry)) reasons.add(issue); @@ -496,13 +508,22 @@ function shapeUnsupportedReasons( return [...reasons].sort(); } +function interfaceDuplicateDiagnostics(registry: ShapeRegistry, interfaceName: string): readonly CoreShapeDiagnostic[] { + return registry.diagnostics.filter( + (diagnostic) => diagnostic.interfaceName === interfaceName && diagnostic.code === 'shape-interface-duplicate', + ); +} + function unsupportedTypeReasons(rawType: string | undefined, registry: ShapeRegistry): string[] { const type = normalizeType(rawType); if (!type || type === 'any' || type === 'unknown' || isPrimitiveType(type)) return []; if (type.endsWith('[]')) return unsupportedTypeReasons(type.slice(0, -2), registry); const arrayMatch = /^Array<(.+)>$/.exec(type); if (arrayMatch) return unsupportedTypeReasons(arrayMatch[1], registry); - if (isSimpleIdentifier(type)) return registry.interfaces.has(type) ? [] : [`unknown-type:${type}`]; + if (isSimpleIdentifier(type)) { + if (!registry.interfaces.has(type)) return [`unknown-type:${type}`]; + return interfaceDuplicateDiagnostics(registry, type).map((diagnostic) => diagnostic.code); + } return [`unsupported-type:${type}`]; } @@ -558,7 +579,7 @@ function sameShapeField(left: ShapeField, right: ShapeField): boolean { function keyMatchesIndexer(key: string, indexer: ShapeIndexer): boolean { if (indexer.keyType === 'string') return true; - return indexer.keyType === 'number' && /^-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?$/.test(key); + return indexer.keyType === 'number' && /^-?(?:0|[1-9]\d*)$/.test(key); } function fieldPath(path: string, field: string): string { @@ -581,10 +602,56 @@ function normalizeType(value: string | undefined): string | undefined { function splitExtends(value: unknown): string[] { const raw = stringProp(value); if (!raw) return []; - return raw - .split(',') - .map((part) => part.trim()) - .filter(Boolean); + return splitTopLevelCommas(raw); +} + +function splitTopLevelCommas(value: string): string[] { + const parts: string[] = []; + let current = ''; + let angleDepth = 0; + let parenDepth = 0; + let bracketDepth = 0; + let braceDepth = 0; + let quote: '"' | "'" | '`' | undefined; + let escaped = false; + for (const char of value) { + if (quote) { + current += char; + if (escaped) { + escaped = false; + continue; + } + if (char === '\\') { + escaped = true; + continue; + } + if (char === quote) quote = undefined; + continue; + } + if (char === '"' || char === "'" || char === '`') { + quote = char; + current += char; + continue; + } + if (char === '<') angleDepth += 1; + else if (char === '>' && angleDepth > 0) angleDepth -= 1; + else if (char === '(') parenDepth += 1; + else if (char === ')' && parenDepth > 0) parenDepth -= 1; + else if (char === '[') bracketDepth += 1; + else if (char === ']' && bracketDepth > 0) bracketDepth -= 1; + else if (char === '{') braceDepth += 1; + else if (char === '}' && braceDepth > 0) braceDepth -= 1; + if (char === ',' && angleDepth === 0 && parenDepth === 0 && bracketDepth === 0 && braceDepth === 0) { + const part = current.trim(); + if (part) parts.push(part); + current = ''; + continue; + } + current += char; + } + const part = current.trim(); + if (part) parts.push(part); + return parts; } function extendsEdgeParticipatesInCycle(from: string, to: string, registry: ShapeRegistry): boolean { diff --git a/packages/core/tests/core-shape-runtime.test.ts b/packages/core/tests/core-shape-runtime.test.ts index 1d2db4ac..d9db8a40 100644 --- a/packages/core/tests/core-shape-runtime.test.ts +++ b/packages/core/tests/core-shape-runtime.test.ts @@ -123,11 +123,14 @@ describe('KERN core declared shape validators', () => { const root = parse('interface name=NumericMap\n indexer keyType=number type=string'); expect( - validateCoreShape(fromHostValue({ '-1': 'left', '1.5': 'half', '2e3': 'large' }), 'NumericMap', root).passed, + validateCoreShape(fromHostValue({ '-1': 'left', '0': 'zero', '42': 'answer' }), 'NumericMap', root).passed, ).toBe(true); expect(codes(validateCoreShape(fromHostValue({ label: 'not numeric' }), 'NumericMap', root))).toContain( 'shape-unexpected-field', ); + expect(codes(validateCoreShape(fromHostValue({ '1.5': 'half', '2e3': 'large' }), 'NumericMap', root))).toEqual( + expect.arrayContaining(['shape-unexpected-field', 'shape-unexpected-field']), + ); }); test('validates numeric keys against both string and number indexers', () => { @@ -158,6 +161,17 @@ describe('KERN core declared shape validators', () => { expect(codes(result)).toEqual(expect.arrayContaining(['shape-field-conflict', 'shape-type-reference-unknown'])); }); + test('allows explicit undefined for optional fields', () => { + const root = parse( + ['interface name=User', ' field name=id type=string', ' field name=nickname type=string optional=true'].join( + '\n', + ), + ); + + expect(validateCoreShape(fromHostValue({ id: 'u1', nickname: undefined }), 'User', root).passed).toBe(true); + expect(codes(validateCoreShape(fromHostValue({ id: undefined }), 'User', root))).toContain('shape-field-type'); + }); + test('reports recursive values instead of recursing through self-referential shape fields', () => { const root = parse( ['interface name=Node', ' field name=id type=string', ' field name=next type=Node optional=true'].join('\n'), @@ -187,7 +201,9 @@ describe('KERN core declared shape validators', () => { ].join('\n'), ); - expect(codes(validateCoreShape(fromHostValue({ value: 'x' }), 'Box', root))).toContain('shape-generic-unsupported'); + const boxCodes = codes(validateCoreShape(fromHostValue({ value: 'x' }), 'Box', root)); + expect(boxCodes).toContain('shape-generic-unsupported'); + expect(boxCodes.filter((code) => code === 'shape-generic-unsupported')).toHaveLength(1); const maybeNameCodes = codes(validateCoreShape(fromHostValue({ name: 'Ada' }), 'MaybeName', root)); expect(maybeNameCodes).toContain('shape-type-unsupported'); expect(maybeNameCodes).not.toContain('shape-generic-unsupported'); @@ -249,6 +265,8 @@ describe('KERN core declared shape validators', () => { 'interface name=Right', ' field name=id type=number', 'interface name=Joined extends=Left,Right', + 'interface name=GenericChild extends="Pair,Left"', + 'interface name=ObjectGenericChild extends="Wrapper<{ a: string, b: number }>,Left"', 'interface name=Broken extends=Missing', ' indexer keyType=symbol type=string', 'interface name=Indexed', @@ -257,28 +275,57 @@ describe('KERN core declared shape validators', () => { 'interface name=Duplicate', ' field name=id type=string', ' field name=id type=string', + 'interface name=Shadowed', + ' field name=id type=string', + 'interface name=Shadowed', + ' field name=id type=number', + 'interface name=ShadowedChild extends=Shadowed', + 'interface name=UsesShadowed', + ' field name=child type=Shadowed', ].join('\n'), ); const facts = collectCoreShapeFacts(root); expect(facts.interfaces.map((shape) => shape.name)).toEqual( - expect.arrayContaining(['Nested', 'Left', 'Right', 'Joined', 'Broken', 'Indexed', 'IndexedChild', 'Duplicate']), + expect.arrayContaining([ + 'Nested', + 'Left', + 'Right', + 'Joined', + 'GenericChild', + 'ObjectGenericChild', + 'Broken', + 'Indexed', + 'IndexedChild', + 'Duplicate', + 'Shadowed', + 'ShadowedChild', + 'UsesShadowed', + ]), ); expect(facts.extendsEdges).toEqual( expect.arrayContaining([ { from: 'Joined', to: 'Left', resolved: true }, { from: 'Joined', to: 'Right', resolved: true }, + { from: 'GenericChild', to: 'Pair', resolved: false }, + { from: 'GenericChild', to: 'Left', resolved: true }, + { from: 'ObjectGenericChild', to: 'Wrapper<{ a: string, b: number }>', resolved: false }, + { from: 'ObjectGenericChild', to: 'Left', resolved: true }, { from: 'Broken', to: 'Missing', resolved: false }, { from: 'IndexedChild', to: 'Indexed', resolved: true }, + { from: 'ShadowedChild', to: 'Shadowed', resolved: true }, ]), ); expect(facts.validationDiagnostics).toEqual( expect.arrayContaining([ expect.objectContaining({ code: 'shape-field-conflict', interfaceName: 'Joined' }), expect.objectContaining({ code: 'shape-extends-unknown', interfaceName: 'Broken' }), + expect.objectContaining({ code: 'shape-extends-unknown', interfaceName: 'GenericChild' }), + expect.objectContaining({ code: 'shape-extends-unknown', interfaceName: 'ObjectGenericChild' }), expect.objectContaining({ code: 'shape-indexer-key-unsupported', interfaceName: 'Broken' }), expect.objectContaining({ code: 'shape-field-duplicate', interfaceName: 'Duplicate' }), + expect.objectContaining({ code: 'shape-interface-duplicate', interfaceName: 'Shadowed' }), ]), ); expect(facts.interfaces.find((shape) => shape.name === 'Broken')?.unsupportedReasons).toEqual( @@ -287,5 +334,28 @@ describe('KERN core declared shape validators', () => { expect(facts.interfaces.find((shape) => shape.name === 'IndexedChild')?.indexers).toEqual([ expect.objectContaining({ keyType: 'string', type: 'unknown' }), ]); + expect(facts.interfaces.find((shape) => shape.name === 'Shadowed')).toEqual( + expect.objectContaining({ + validatorAvailable: false, + unsupportedReasons: expect.arrayContaining(['shape-interface-duplicate']), + }), + ); + expect(facts.interfaces.find((shape) => shape.name === 'UsesShadowed')).toEqual( + expect.objectContaining({ + validatorAvailable: false, + unsupportedReasons: expect.arrayContaining(['shape-interface-duplicate']), + }), + ); + const shadowed = validateCoreShape(fromHostValue({ id: 1 }), 'Shadowed', root); + expect(shadowed.passed).toBe(false); + expect(shadowed.diagnostics).toEqual( + expect.arrayContaining([expect.objectContaining({ code: 'shape-interface-duplicate' })]), + ); + expect(codes(validateCoreShape(fromHostValue({ id: 1 }), 'ShadowedChild', root))).toContain( + 'shape-interface-duplicate', + ); + expect(codes(validateCoreShape(fromHostValue({ child: { id: 1 } }), 'UsesShadowed', root))).toContain( + 'shape-interface-duplicate', + ); }); }); From 36db5f0e6a2b62bcee405dddcae5d40b679a0cee Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 02:59:26 +0200 Subject: [PATCH 32/63] feat(core): expose effective class member facts --- packages/core/src/semantic-validator.ts | 81 +++++++++++++++-- .../core/tests/semantic-substrate.test.ts | 88 ++++++++++++++++++- 2 files changed, 163 insertions(+), 6 deletions(-) diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index 78b09946..809e5311 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -49,6 +49,7 @@ export interface ClassSemanticMemberFact { readonly arity: number; readonly readable: boolean; readonly writable: boolean; + readonly inheritedFrom?: string; readonly loc?: ClassSemanticLocation; } @@ -58,6 +59,7 @@ export interface ClassSemanticClassFact { readonly hasConstructor: boolean; readonly constructorCount: number; readonly members: readonly ClassSemanticMemberFact[]; + readonly effectiveMembers: readonly ClassSemanticMemberFact[]; readonly loc?: ClassSemanticLocation; } @@ -2755,7 +2757,7 @@ export function collectClassSemanticFacts(root: IRNode | readonly IRNode[]): Cla } return { - classes: classes.map(classSemanticFact), + classes: classes.map((info) => classSemanticFact(info, classByName)), inheritanceEdges, overrides: collectClassOverrideFacts(classes, classByName), unresolvedBases: [...unresolvedBases].sort(), @@ -2763,20 +2765,25 @@ export function collectClassSemanticFacts(root: IRNode | readonly IRNode[]): Cla }; } -function classSemanticFact(info: ClassInfo): ClassSemanticClassFact { +function classSemanticFact(info: ClassInfo, classByName: ReadonlyMap): ClassSemanticClassFact { return { name: info.name, ...(info.baseName ? { baseName: info.baseName } : {}), hasConstructor: info.constructors.length > 0, constructorCount: info.constructors.length, - members: info.members.map(classMemberSemanticFact), + members: info.members.map((member) => classMemberSemanticFact(member)), + effectiveMembers: effectiveClassMemberFacts(info, classByName), ...(info.node.loc ? { loc: semanticLocation(info.node) } : {}), }; } -function classMemberSemanticFact(member: ClassMemberInfo): ClassSemanticMemberFact { +function classMemberSemanticFact( + member: ClassMemberInfo, + className = member.owner, + inheritedFrom?: string, +): ClassSemanticMemberFact { return { - className: member.owner, + className, owner: member.owner, name: member.name, kind: member.kind, @@ -2784,10 +2791,74 @@ function classMemberSemanticFact(member: ClassMemberInfo): ClassSemanticMemberFa arity: member.arity, readable: member.kind === 'field' || member.kind === 'getter' || member.kind === 'method', writable: member.kind === 'field' || member.kind === 'setter', + ...(inheritedFrom ? { inheritedFrom } : {}), ...(member.node.loc ? { loc: semanticLocation(member.node) } : {}), }; } +function effectiveClassMemberFacts( + info: ClassInfo, + classByName: ReadonlyMap, + seen: ReadonlySet = new Set(), +): ClassSemanticMemberFact[] { + const effective = new Map(); + if (seen.has(info.name) || classInfoParticipatesInCycle(info, classByName)) { + return info.members.map((member) => classMemberSemanticFact(member, info.name)); + } + const nextSeen = new Set(seen); + nextSeen.add(info.name); + const base = info.baseName ? classByName.get(info.baseName) : undefined; + if (base) { + for (const member of effectiveClassMemberFacts(base, classByName, nextSeen)) { + effective.set(classMemberEffectiveKey(member), { + ...member, + className: info.name, + inheritedFrom: member.inheritedFrom ?? member.owner, + }); + } + } + const ownGroups = new Map(); + for (const member of info.members) { + const group = ownGroups.get(classMemberShapeKey(member)) ?? []; + group.push(member); + ownGroups.set(classMemberShapeKey(member), group); + } + for (const [shapeKey, members] of ownGroups) { + const first = members[0]; + if (!first) continue; + for (const key of [...effective.keys()]) { + if (classMemberShapeKey(effective.get(key) ?? first) === shapeKey) effective.delete(key); + } + for (const member of members) { + effective.set(classMemberEffectiveKey(member), classMemberSemanticFact(member, info.name)); + } + } + return [...effective.values()]; +} + +function classMemberShapeKey(member: { readonly static: boolean; readonly name: string }): string { + return `${member.static ? 'static' : 'instance'}:${member.name}`; +} + +function classMemberEffectiveKey(member: { + readonly static: boolean; + readonly name: string; + readonly kind: ClassSemanticMemberKind | ClassMemberKind; +}): string { + return `${classMemberShapeKey(member)}:${member.kind}`; +} + +function classInfoParticipatesInCycle(info: ClassInfo, classByName: ReadonlyMap): boolean { + const seen = new Set(); + let current: ClassInfo | undefined = info; + while (current) { + if (seen.has(current.name)) return true; + seen.add(current.name); + current = current.baseName ? classByName.get(current.baseName) : undefined; + } + return false; +} + function collectClassOverrideFacts( classes: readonly ClassInfo[], classByName: ReadonlyMap, diff --git a/packages/core/tests/semantic-substrate.test.ts b/packages/core/tests/semantic-substrate.test.ts index c35929ff..f7f0f290 100644 --- a/packages/core/tests/semantic-substrate.test.ts +++ b/packages/core/tests/semantic-substrate.test.ts @@ -74,9 +74,13 @@ describe('KERN semantic substrate', () => { [ 'class name=Base', ' field name=id type=string', + ' field name=version type=number static=true', ' method name=load returns=string', ' param name=id type=string', ' getter name=label returns=string', + ' getter name=status returns=string', + ' setter name=status', + ' param name=value type=string', 'class name=Derived extends=Base', ' constructor', ' handler lang=kern', @@ -128,6 +132,79 @@ describe('KERN semantic substrate', () => { }), ]), ); + expect(derived?.effectiveMembers).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + className: 'Derived', + owner: 'Base', + inheritedFrom: 'Base', + name: 'id', + kind: 'field', + static: false, + readable: true, + writable: true, + }), + expect.objectContaining({ + className: 'Derived', + owner: 'Base', + inheritedFrom: 'Base', + name: 'status', + kind: 'getter', + static: false, + readable: true, + writable: false, + }), + expect.objectContaining({ + className: 'Derived', + owner: 'Base', + inheritedFrom: 'Base', + name: 'status', + kind: 'setter', + static: false, + readable: false, + writable: true, + }), + expect.objectContaining({ + className: 'Derived', + owner: 'Base', + inheritedFrom: 'Base', + name: 'version', + kind: 'field', + static: true, + readable: true, + writable: true, + }), + expect.objectContaining({ + className: 'Derived', + owner: 'Derived', + name: 'load', + kind: 'method', + arity: 2, + }), + expect.objectContaining({ + className: 'Derived', + owner: 'Derived', + name: 'label', + kind: 'setter', + readable: false, + writable: true, + }), + ]), + ); + expect(derived?.effectiveMembers).not.toEqual( + expect.arrayContaining([ + expect.objectContaining({ + className: 'Derived', + owner: 'Base', + name: 'load', + }), + expect.objectContaining({ + className: 'Derived', + owner: 'Base', + name: 'label', + }), + ]), + ); expect(substrate.classFacts?.overrides).toEqual( expect.arrayContaining([ @@ -156,7 +233,13 @@ describe('KERN semantic substrate', () => { test('reports unresolved bases and inheritance cycles as class facts', () => { const facts = collectClassSemanticFacts( parseRoot( - ['class name=UsesExternal extends=ExternalBase', 'class name=A extends=B', 'class name=B extends=A'].join('\n'), + [ + 'class name=UsesExternal extends=ExternalBase', + 'class name=A extends=B', + ' field name=onlyA type=string', + 'class name=B extends=A', + ' field name=onlyB type=string', + ].join('\n'), ), ); @@ -168,6 +251,9 @@ describe('KERN semantic substrate', () => { ]), ); expect(facts.cycles).toEqual([['A', 'B', 'A']]); + expect(facts.classes.find((candidate) => candidate.name === 'A')?.effectiveMembers).toEqual([ + expect.objectContaining({ className: 'A', owner: 'A', name: 'onlyA' }), + ]); }); test('resolves imported and cross-root class bases consistently with validation', () => { From fc06188e01b93189202cc0a3db8fe19be6e68b3b Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 03:37:33 +0200 Subject: [PATCH 33/63] feat(core): add class implements conformance facts --- .../kernlang-typescript-surface.test.kern | 1 + packages/core/src/index.ts | 3 + packages/core/src/semantic-validator.ts | 425 ++++++++++++++++++ packages/core/tests/class-semantics.test.ts | 110 +++++ .../core/tests/semantic-substrate.test.ts | 72 +++ 5 files changed, 611 insertions(+) diff --git a/packages/core/native-test/kernlang-typescript-surface.test.kern b/packages/core/native-test/kernlang-typescript-surface.test.kern index 2f8cf122..7069878d 100644 --- a/packages/core/native-test/kernlang-typescript-surface.test.kern +++ b/packages/core/native-test/kernlang-typescript-surface.test.kern @@ -66,4 +66,5 @@ test name="KERNlang TypeScript surface parity" target="./kernlang-typescript-sur expect decompile contains="indexer keyType=string type=number" expect decompile contains="fn name=add" expect decompile contains="overload params=\"a:number,b:number\" returns=number" + expect decompile contains="field name=role type=string" expect roundtrip=true diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index c200561e..6b657d35 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -513,12 +513,15 @@ export { export type { ClassSemanticClassFact, ClassSemanticFacts, + ClassSemanticImplementsEdge, ClassSemanticInheritanceEdge, ClassSemanticLocation, ClassSemanticMemberFact, ClassSemanticMemberKind, ClassSemanticOverrideFact, ClassSemanticOverrideStatus, + ClassSemanticProtocolConformanceFact, + ClassSemanticProtocolStatus, RagSemanticAnswerContractFact, RagSemanticAnswerSpanFact, RagSemanticChunkingFact, diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index 809e5311..81f3ba6e 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -15,6 +15,11 @@ * symbols that the resolver proved exist. */ +import { + type CoreShapeDiagnostic, + type CoreShapeInterfaceFact, + collectCoreShapeFacts, +} from './core-runtime/shape-validator.js'; import { collectExternalImportSymbols, type ExternalImportSymbolTable } from './external-symbols.js'; import { importRegistryOf } from './import-metadata.js'; import { parseExpression } from './parser-expression.js'; @@ -46,6 +51,8 @@ export interface ClassSemanticMemberFact { readonly name: string; readonly kind: ClassSemanticMemberKind; readonly static: boolean; + readonly type?: string; + readonly returns?: string; readonly arity: number; readonly readable: boolean; readonly writable: boolean; @@ -71,6 +78,14 @@ export interface ClassSemanticInheritanceEdge { readonly builtin: boolean; } +export interface ClassSemanticImplementsEdge { + readonly from: string; + readonly to: string; + readonly relation: 'implements'; + readonly resolved: boolean; + readonly external: boolean; +} + export interface ClassSemanticOverrideFact { readonly className: string; readonly memberName: string; @@ -84,11 +99,33 @@ export interface ClassSemanticOverrideFact { readonly loc?: ClassSemanticLocation; } +export type ClassSemanticProtocolStatus = + | 'satisfied' + | 'missing-members' + | 'external' + | 'unknown-interface' + | 'invalid-interface' + | 'unsupported-protocol'; + +export interface ClassSemanticProtocolConformanceFact { + readonly className: string; + readonly interfaceName: string; + readonly status: ClassSemanticProtocolStatus; + readonly missingMembers: readonly string[]; + readonly satisfiedMembers: readonly string[]; + readonly diagnostics?: readonly string[]; + readonly unsupportedReasons?: readonly string[]; + readonly loc?: ClassSemanticLocation; +} + export interface ClassSemanticFacts { readonly classes: readonly ClassSemanticClassFact[]; readonly inheritanceEdges: readonly ClassSemanticInheritanceEdge[]; + readonly implementsEdges: readonly ClassSemanticImplementsEdge[]; readonly overrides: readonly ClassSemanticOverrideFact[]; readonly unresolvedBases: readonly string[]; + readonly unresolvedImplements: readonly string[]; + readonly protocolConformance: readonly ClassSemanticProtocolConformanceFact[]; readonly cycles: readonly (readonly string[])[]; } @@ -2625,6 +2662,8 @@ interface ClassInfo { rootIndex: number; name: string; baseName?: string; + implementsNames: string[]; + implementsMalformed: boolean; members: ClassMemberInfo[]; constructors: IRNode[]; } @@ -2635,9 +2674,38 @@ interface ClassMemberInfo { name: string; kind: ClassMemberKind; static: boolean; + type?: string; + returns?: string; arity: number; } +interface InterfaceInfo { + node: IRNode; + rootIndex: number; + name: string; + extendsNames: string[]; + fields: InterfaceFieldInfo[]; +} + +interface InterfaceFieldInfo { + name: string; + type?: string; + optional: boolean; +} + +interface ClassProtocolShapeContext { + shapeByName: ReadonlyMap; + diagnosticsByName: ReadonlyMap; +} + +interface ClassInterfaceConformanceResult { + status: Exclude; + missingMembers: string[]; + satisfiedMembers: string[]; + diagnostics: string[]; + unsupportedReasons: string[]; +} + const BUILTIN_CLASS_BASES = new Set(['Error']); export const RAG_MCP_RETRIEVE_OUTPUT_SHAPE = 'RetrievedChunk[]'; export const RAG_MCP_RETRIEVE_OUTPUT_ITEM_SHAPE = 'RetrievedChunk'; @@ -2667,6 +2735,12 @@ function validateClassGraphRoots(roots: readonly IRNode[], violations: SemanticV const classes = classesByRoot.flat(); if (classes.length === 0) return; + const interfaces = roots.flatMap((root, rootIndex) => collectInterfaceInfos(root, rootIndex)); + const interfaceByName = new Map(); + for (const info of interfaces) { + if (!interfaceByName.has(info.name)) interfaceByName.set(info.name, info); + } + const protocolShapeContext = collectClassProtocolShapeContext(roots); const classByName = new Map(); const declaredClassNames = new Set(); for (const info of classes) { @@ -2681,9 +2755,18 @@ function validateClassGraphRoots(roots: readonly IRNode[], violations: SemanticV for (const className of declaredClassNames) visibleNames.add(className); return visibleNames; }); + const visibleProtocolNamesByRoot = roots.map((root) => collectVisibleProtocolNames(root)); for (const info of classes) { validateClassBaseReference(info, visibleNamesByRoot[info.rootIndex] ?? declaredClassNames, violations); + validateClassImplements( + info, + interfaceByName, + visibleProtocolNamesByRoot[info.rootIndex] ?? new Set(), + protocolShapeContext, + classByName, + violations, + ); validateClassConstructors(info, violations); validateClassMemberConflicts(info, violations); validateClassSuperUsage(info, violations); @@ -2705,6 +2788,8 @@ function collectClassInfos(root: IRNode, rootIndex = 0): ClassInfo[] { rootIndex, name, baseName: classBaseName(node.props?.extends), + implementsNames: classReferenceNames(node.props?.implements, 'class implements='), + implementsMalformed: classReferenceListMalformed(node.props?.implements, 'class implements='), members: collectClassMembers(node, name), constructors: (node.children ?? []).filter((child) => child.type === 'constructor'), }); @@ -2712,6 +2797,52 @@ function collectClassInfos(root: IRNode, rootIndex = 0): ClassInfo[] { return out; } +function collectInterfaceInfos(root: IRNode, rootIndex = 0): InterfaceInfo[] { + const out: InterfaceInfo[] = []; + walkSemanticTree(root, (node) => { + if (node.type !== 'interface') return; + const name = stringProp(node, 'name'); + if (!name) return; + out.push({ + node, + rootIndex, + name, + extendsNames: classReferenceNames(node.props?.extends, 'interface extends='), + fields: collectInterfaceFields(node), + }); + }); + return out; +} + +function collectInterfaceFields(node: IRNode): InterfaceFieldInfo[] { + const fields: InterfaceFieldInfo[] = []; + for (const child of node.children ?? []) { + if (child.type !== 'field') continue; + const name = stringProp(child, 'name'); + if (!name) continue; + fields.push({ + name, + ...(stringProp(child, 'type') ? { type: stringProp(child, 'type') } : {}), + optional: isTrueFlag(child.props?.optional), + }); + } + return fields; +} + +function collectClassProtocolShapeContext(roots: readonly IRNode[]): ClassProtocolShapeContext { + const facts = collectCoreShapeFacts(roots); + const shapeByName = new Map(); + for (const shape of facts.interfaces) shapeByName.set(shape.name, shape); + const diagnosticsByName = new Map(); + for (const diagnostic of facts.validationDiagnostics) { + if (!diagnostic.interfaceName) continue; + const diagnostics = diagnosticsByName.get(diagnostic.interfaceName) ?? []; + diagnostics.push(diagnostic); + diagnosticsByName.set(diagnostic.interfaceName, diagnostics); + } + return { shapeByName, diagnosticsByName }; +} + function collectClassMembers(node: IRNode, owner: string): ClassMemberInfo[] { const members: ClassMemberInfo[] = []; for (const child of node.children ?? []) { @@ -2724,6 +2855,8 @@ function collectClassMembers(node: IRNode, owner: string): ClassMemberInfo[] { name, kind: child.type, static: isTrueFlag(child.props?.static), + ...(stringProp(child, 'type') ? { type: stringProp(child, 'type') } : {}), + ...(stringProp(child, 'returns') ? { returns: stringProp(child, 'returns') } : {}), arity: memberArity(child), }); } @@ -2733,11 +2866,18 @@ function collectClassMembers(node: IRNode, owner: string): ClassMemberInfo[] { export function collectClassSemanticFacts(root: IRNode | readonly IRNode[]): ClassSemanticFacts { const roots = Array.isArray(root) ? root : [root]; const classes = roots.flatMap((candidate, rootIndex) => collectClassInfos(candidate, rootIndex)); + const interfaces = roots.flatMap((candidate, rootIndex) => collectInterfaceInfos(candidate, rootIndex)); const classByName = new Map(); for (const info of classes) { if (!classByName.has(info.name)) classByName.set(info.name, info); } + const interfaceByName = new Map(); + for (const info of interfaces) { + if (!interfaceByName.has(info.name)) interfaceByName.set(info.name, info); + } + const protocolShapeContext = collectClassProtocolShapeContext(roots); const visibleNamesByRoot = roots.map((candidate) => collectVisibleClassBaseNames(candidate)); + const visibleProtocolNamesByRoot = roots.map((candidate) => collectVisibleProtocolNames(candidate)); const inheritanceEdges: ClassSemanticInheritanceEdge[] = []; const unresolvedBases = new Set(); @@ -2756,11 +2896,39 @@ export function collectClassSemanticFacts(root: IRNode | readonly IRNode[]): Cla if (!resolved) unresolvedBases.add(info.baseName); } + const implementsEdges: ClassSemanticImplementsEdge[] = []; + const unresolvedImplements = new Set(); + for (const info of classes) { + for (const interfaceName of info.implementsNames) { + const external = + !interfaceByName.has(interfaceName) && + (visibleProtocolNamesByRoot[info.rootIndex] ?? new Set()).has(interfaceName); + const resolved = interfaceByName.has(interfaceName) || external; + implementsEdges.push({ + from: info.name, + to: interfaceName, + relation: 'implements', + resolved, + external, + }); + if (!resolved) unresolvedImplements.add(interfaceName); + } + } + return { classes: classes.map((info) => classSemanticFact(info, classByName)), inheritanceEdges, + implementsEdges, overrides: collectClassOverrideFacts(classes, classByName), unresolvedBases: [...unresolvedBases].sort(), + unresolvedImplements: [...unresolvedImplements].sort(), + protocolConformance: collectClassProtocolConformanceFacts( + classes, + interfaceByName, + visibleProtocolNamesByRoot, + protocolShapeContext, + classByName, + ), cycles: collectClassCycleFacts(classes, classByName), }; } @@ -2788,6 +2956,8 @@ function classMemberSemanticFact( name: member.name, kind: member.kind, static: member.static, + ...(member.type ? { type: member.type } : {}), + ...(member.returns ? { returns: member.returns } : {}), arity: member.arity, readable: member.kind === 'field' || member.kind === 'getter' || member.kind === 'method', writable: member.kind === 'field' || member.kind === 'setter', @@ -2922,6 +3092,111 @@ function collectClassCycleFacts( return cycles; } +function collectClassProtocolConformanceFacts( + classes: readonly ClassInfo[], + interfaceByName: ReadonlyMap, + visibleProtocolNamesByRoot: readonly ReadonlySet[], + protocolShapeContext: ClassProtocolShapeContext, + classByName: ReadonlyMap, +): ClassSemanticProtocolConformanceFact[] { + const facts: ClassSemanticProtocolConformanceFact[] = []; + for (const info of classes) { + for (const interfaceName of info.implementsNames) { + const protocol = interfaceByName.get(interfaceName); + if (!protocol) { + const visible = (visibleProtocolNamesByRoot[info.rootIndex] ?? new Set()).has(interfaceName); + facts.push({ + className: info.name, + interfaceName, + status: visible ? 'external' : 'unknown-interface', + missingMembers: [], + satisfiedMembers: [], + ...(info.node.loc ? { loc: semanticLocation(info.node) } : {}), + }); + continue; + } + const result = classInterfaceConformance(info, protocol, protocolShapeContext, classByName); + facts.push({ + className: info.name, + interfaceName, + status: result.status, + missingMembers: result.missingMembers, + satisfiedMembers: result.satisfiedMembers, + ...(result.diagnostics.length > 0 ? { diagnostics: result.diagnostics } : {}), + ...(result.unsupportedReasons.length > 0 ? { unsupportedReasons: result.unsupportedReasons } : {}), + ...(info.node.loc ? { loc: semanticLocation(info.node) } : {}), + }); + } + } + return facts; +} + +function classInterfaceConformance( + info: ClassInfo, + protocol: InterfaceInfo, + protocolShapeContext: ClassProtocolShapeContext, + classByName: ReadonlyMap, +): ClassInterfaceConformanceResult { + const shape = protocolShapeContext.shapeByName.get(protocol.name); + const diagnostics = (protocolShapeContext.diagnosticsByName.get(protocol.name) ?? []).map( + (diagnostic) => diagnostic.code, + ); + if (diagnostics.length > 0) { + return { + status: 'invalid-interface', + missingMembers: [], + satisfiedMembers: [], + diagnostics: sortedUnique(diagnostics), + unsupportedReasons: [], + }; + } + if (shape && (shape.indexers.length > 0 || !shape.validatorAvailable)) { + return { + status: 'unsupported-protocol', + missingMembers: [], + satisfiedMembers: [], + diagnostics: [], + unsupportedReasons: sortedUnique([ + ...shape.unsupportedReasons, + ...(shape.indexers.length > 0 ? ['indexer'] : []), + ]), + }; + } + const effectiveMembers = effectiveClassMemberFacts(info, classByName); + const fields = shape?.fields ?? protocol.fields; + const requiredFields = fields.filter((field) => !field.optional); + const missingMembers: string[] = []; + const satisfiedMembers: string[] = []; + for (const field of requiredFields) { + if (classHasReadableInstanceMember(effectiveMembers, field)) { + satisfiedMembers.push(field.name); + } else { + missingMembers.push(field.name); + } + } + const missing = sortedUnique(missingMembers); + const satisfied = sortedUnique(satisfiedMembers); + return { + status: missing.length > 0 ? 'missing-members' : 'satisfied', + missingMembers: missing, + satisfiedMembers: satisfied, + diagnostics: [], + unsupportedReasons: [], + }; +} + +function classHasReadableInstanceMember( + members: readonly ClassSemanticMemberFact[], + field: { readonly name: string; readonly type?: string }, +): boolean { + return members.some((member) => { + if (member.name !== field.name || member.static) return false; + if (member.kind !== 'field' && member.kind !== 'getter') return false; + const actualType = member.kind === 'getter' ? member.returns : member.type; + return !field.type || actualType === field.type; + }); +} + function semanticLocation(node: IRNode): ClassSemanticLocation | undefined { return node.loc ? { line: node.loc.line, col: node.loc.col } : undefined; } @@ -2946,6 +3221,69 @@ function validateClassBaseReference( }); } +function validateClassImplements( + info: ClassInfo, + interfaceByName: ReadonlyMap, + visibleProtocolNames: ReadonlySet, + protocolShapeContext: ClassProtocolShapeContext, + classByName: ReadonlyMap, + violations: SemanticViolation[], +): void { + if (info.implementsMalformed) { + violations.push({ + rule: 'class-implements-invalid-reference-list', + nodeType: 'class', + message: `Class '${info.name}' has an invalid implements= reference list. Use a comma-separated list of interface names.`, + line: info.node.loc?.line, + col: info.node.loc?.col, + }); + } + for (const interfaceName of info.implementsNames) { + const protocol = interfaceByName.get(interfaceName); + if (!protocol) { + if (!visibleProtocolNames.has(interfaceName)) { + violations.push({ + rule: 'class-implements-unknown', + nodeType: 'class', + message: `Class '${info.name}' implements unknown interface '${interfaceName}'. Declare or import the interface before implementing it.`, + line: info.node.loc?.line, + col: info.node.loc?.col, + }); + } + continue; + } + const conformance = classInterfaceConformance(info, protocol, protocolShapeContext, classByName); + if (conformance.status === 'invalid-interface') { + violations.push({ + rule: 'class-implements-invalid-interface', + nodeType: 'class', + message: `Class '${info.name}' implements invalid interface '${interfaceName}' (${conformance.diagnostics.join(', ')}). Fix the interface shape before relying on protocol conformance.`, + line: info.node.loc?.line, + col: info.node.loc?.col, + }); + continue; + } + if (conformance.status === 'unsupported-protocol') { + violations.push({ + rule: 'class-implements-unsupported-protocol', + nodeType: 'class', + message: `Class '${info.name}' implements interface '${interfaceName}' whose shape is not class-satisfiable in protocol v1 (${conformance.unsupportedReasons.join(', ')}).`, + line: info.node.loc?.line, + col: info.node.loc?.col, + }); + continue; + } + if (conformance.missingMembers.length === 0) continue; + violations.push({ + rule: 'class-implements-missing-member', + nodeType: 'class', + message: `Class '${info.name}' does not satisfy interface '${interfaceName}'. Missing readable instance member(s): ${conformance.missingMembers.join(', ')}.`, + line: info.node.loc?.line, + col: info.node.loc?.col, + }); + } +} + function validateClassConstructors(info: ClassInfo, violations: SemanticViolation[]): void { if (info.constructors.length <= 1) return; for (const extra of info.constructors.slice(1)) { @@ -3514,6 +3852,27 @@ function collectVisibleClassBaseNames(root: IRNode): Set { return names; } +function collectVisibleProtocolNames(root: IRNode): Set { + const names = new Set(); + walkSemanticTree(root, (node) => { + const name = stringProp(node, 'name'); + if (name && node.type === 'interface') names.add(name); + if (node.type === 'import') { + for (const binding of importLocalBindings(node)) names.add(binding.name); + } + if (node.type === 'use') { + for (const child of node.children ?? []) { + if (child.type !== 'from') continue; + const kind = stringProp(child, 'kind'); + if (kind && kind !== 'interface' && kind !== 'type') continue; + const localName = stringProp(child, 'as') ?? stringProp(child, 'name'); + if (localName) names.add(localName); + } + } + }); + return names; +} + function isVisibleClassBaseDeclaration(nodeType: string): boolean { return nodeType === 'class' || nodeType === 'error'; } @@ -3635,6 +3994,72 @@ function classBaseName(value: unknown): string | undefined { return match?.[1]; } +function classReferenceNames(value: unknown, propName: string): string[] { + if (typeof value !== 'string' || !value.trim()) return []; + let parts: string[]; + try { + parts = splitClassReferenceList(value, propName); + } catch { + parts = []; + } + const names = new Set(); + for (const part of parts) { + const name = classBaseName(part); + if (name) names.add(name); + } + return [...names]; +} + +function classReferenceListMalformed(value: unknown, propName: string): boolean { + if (typeof value !== 'string' || !value.trim()) return false; + try { + splitClassReferenceList(value, propName); + return false; + } catch { + return true; + } +} + +function splitClassReferenceList(raw: string, propName: string): string[] { + const out: string[] = []; + let current = ''; + let depth = 0; + let angleDepth = 0; + let quote: '"' | "'" | '`' | null = null; + for (let index = 0; index < raw.length; index++) { + const ch = raw[index]; + if (quote !== null) { + current += ch; + if (ch === '\\' && index + 1 < raw.length) current += raw[++index]; + else if (ch === quote) quote = null; + continue; + } + if (ch === '"' || ch === "'" || ch === '`') { + quote = ch; + current += ch; + continue; + } + if (ch === '(' || ch === '[' || ch === '{') depth++; + else if (ch === ')' || ch === ']' || ch === '}') depth--; + else if (ch === '<') angleDepth++; + else if (ch === '>' && angleDepth > 0) angleDepth--; + if (depth < 0 || angleDepth < 0) throw new Error(`${propName} has unbalanced delimiters.`); + if (ch === ',' && depth === 0 && angleDepth === 0) { + const part = current.trim(); + if (part.length === 0) throw new Error(`${propName} contains an empty reference.`); + out.push(part); + current = ''; + continue; + } + current += ch; + } + if (quote !== null || depth !== 0 || angleDepth !== 0) throw new Error(`${propName} has unbalanced delimiters.`); + const tail = current.trim(); + if (tail.length === 0 && raw.trim().endsWith(',')) throw new Error(`${propName} contains an empty reference.`); + if (tail.length > 0) out.push(tail); + return out; +} + function stringProp(node: IRNode, prop: string): string | undefined; function stringProp(props: IRNode['props'] | undefined, prop: string): string | undefined; function stringProp(nodeOrProps: IRNode | IRNode['props'] | undefined, prop: string): string | undefined { diff --git a/packages/core/tests/class-semantics.test.ts b/packages/core/tests/class-semantics.test.ts index 50cb7d0b..31e5f544 100644 --- a/packages/core/tests/class-semantics.test.ts +++ b/packages/core/tests/class-semantics.test.ts @@ -54,6 +54,116 @@ describe('semantic-validator — class object model', () => { expect(rulesFor(source)).not.toContain('class-extends-unknown'); }); + test('accepts class implements when effective readable instance members satisfy local interfaces', () => { + const source = [ + 'interface name=Entity', + ' field name=id type=string', + 'interface name=Named extends=Entity', + ' field name=name type=string', + ' field name=nickname type=string optional=true', + 'class name=Base', + ' field name=id type=string', + 'class name=User extends=Base implements=Named', + ' getter name=name returns=string', + ' handler lang=kern', + ' return value="this.id"', + ].join('\n'); + + const rules = rulesFor(source); + expect(rules).not.toContain('class-implements-unknown'); + expect(rules).not.toContain('class-implements-missing-member'); + }); + + test('reports unknown class implements targets unless imported', () => { + const localRules = rulesFor('class name=User implements=MissingProtocol'); + expect(localRules).toContain('class-implements-unknown'); + + const importedRules = rulesFor( + ['import from="./protocols" names=ExternalProtocol', 'class name=User implements=ExternalProtocol'].join('\n'), + ); + expect(importedRules).not.toContain('class-implements-unknown'); + }); + + test('reports malformed class implements reference lists', () => { + const rules = rulesFor('class name=User implements="Known,"'); + + expect(rules).toContain('class-implements-invalid-reference-list'); + }); + + test('reports missing required readable instance members for class implements', () => { + const violations = violationsFor( + [ + 'interface name=RoleBearing', + ' field name=role type=string', + ' field name=status type=string optional=true', + 'class name=Account implements=RoleBearing', + ' field name=role type=string static=true', + ].join('\n'), + ); + + expect(violations).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + rule: 'class-implements-missing-member', + message: expect.stringContaining('role'), + }), + ]), + ); + }); + + test('does not satisfy interface fields with methods or mismatched field types', () => { + const rules = rulesFor( + [ + 'interface name=RoleBearing', + ' field name=role type=string', + 'class name=MethodRole implements=RoleBearing', + ' method name=role returns=string', + ' handler lang=kern', + ' return value="\'admin\'"', + 'class name=NumberRole implements=RoleBearing', + ' field name=role type=number', + ].join('\n'), + ); + + expect(rules.filter((rule) => rule === 'class-implements-missing-member')).toHaveLength(2); + }); + + test('reports invalid interface shapes before class implements conformance', () => { + const unknownBaseRules = rulesFor( + [ + 'interface name=Protocol extends=MissingProtocol', + ' field name=id type=string', + 'class name=User implements=Protocol', + ' field name=id type=string', + ].join('\n'), + ); + expect(unknownBaseRules).toContain('class-implements-invalid-interface'); + + const optionalityConflictRules = rulesFor( + [ + 'interface name=BaseProtocol', + ' field name=id type=string', + 'interface name=Protocol extends=BaseProtocol', + ' field name=id type=string optional=true', + 'class name=User implements=Protocol', + ' field name=id type=string', + ].join('\n'), + ); + expect(optionalityConflictRules).toContain('class-implements-invalid-interface'); + }); + + test('reports interface indexers as unsupported class implements protocols in v1', () => { + const rules = rulesFor( + [ + 'interface name=DictionaryProtocol', + ' indexer keyName=key keyType=string type=number', + 'class name=Dictionary implements=DictionaryProtocol', + ].join('\n'), + ); + + expect(rules).toContain('class-implements-unsupported-protocol'); + }); + test('reports unknown base class names', () => { const violations = violationsFor('class name=User extends=MissingBase'); diff --git a/packages/core/tests/semantic-substrate.test.ts b/packages/core/tests/semantic-substrate.test.ts index f7f0f290..03f68a86 100644 --- a/packages/core/tests/semantic-substrate.test.ts +++ b/packages/core/tests/semantic-substrate.test.ts @@ -293,6 +293,78 @@ describe('KERN semantic substrate', () => { expect(invalidSubstrate.classValidationSummary?.byRule['class-extends-unknown']).toBe(1); }); + test('exports class implements edges and protocol conformance facts', () => { + const facts = collectClassSemanticFacts( + parseRoot( + [ + 'import from="./protocols" names=ExternalProtocol', + 'interface name=Entity', + ' field name=id type=string', + 'interface name=Named extends=Entity', + ' field name=name type=string', + 'interface name=BrokenProtocol extends=MissingBaseProtocol', + ' field name=id type=string', + 'interface name=DictionaryProtocol', + ' indexer keyName=key keyType=string type=number', + 'class name=Base', + ' field name=id type=string', + 'class name=User extends=Base implements="Named,ExternalProtocol,MissingProtocol"', + ' getter name=name returns=string', + ' handler lang=kern', + ' return value="this.id"', + 'class name=Broken implements=Named', + ' field name=id type=string', + 'class name=Invalid implements=BrokenProtocol', + ' field name=id type=string', + 'class name=Dictionary implements=DictionaryProtocol', + ].join('\n'), + ), + ); + + expect(facts.implementsEdges).toEqual( + expect.arrayContaining([ + { from: 'User', to: 'Named', relation: 'implements', resolved: true, external: false }, + { from: 'User', to: 'ExternalProtocol', relation: 'implements', resolved: true, external: true }, + { from: 'User', to: 'MissingProtocol', relation: 'implements', resolved: false, external: false }, + ]), + ); + expect(facts.unresolvedImplements).toEqual(['MissingProtocol']); + expect(facts.protocolConformance).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + className: 'User', + interfaceName: 'Named', + status: 'satisfied', + satisfiedMembers: ['id', 'name'], + missingMembers: [], + }), + expect.objectContaining({ + className: 'User', + interfaceName: 'ExternalProtocol', + status: 'external', + }), + expect.objectContaining({ + className: 'Broken', + interfaceName: 'Named', + status: 'missing-members', + missingMembers: ['name'], + }), + expect.objectContaining({ + className: 'Invalid', + interfaceName: 'BrokenProtocol', + status: 'invalid-interface', + diagnostics: ['shape-extends-unknown'], + }), + expect.objectContaining({ + className: 'Dictionary', + interfaceName: 'DictionaryProtocol', + status: 'unsupported-protocol', + unsupportedReasons: ['indexer'], + }), + ]), + ); + }); + test('can summarize class validation rules alongside class facts', () => { const root = parseRoot( [ From 189122193de8b615937ea37fc95a5ffedb411b13 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 04:21:45 +0200 Subject: [PATCH 34/63] feat(core): add constructor discipline facts --- packages/core/src/index.ts | 2 + packages/core/src/semantic-validator.ts | 266 ++++++++++++++++++ .../core/tests/semantic-substrate.test.ts | 225 +++++++++++++++ 3 files changed, 493 insertions(+) diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 6b657d35..a83c3795 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -512,6 +512,8 @@ export { // Semantic validation export type { ClassSemanticClassFact, + ClassSemanticConstructorFact, + ClassSemanticConstructorSuperStatus, ClassSemanticFacts, ClassSemanticImplementsEdge, ClassSemanticInheritanceEdge, diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index 81f3ba6e..b9cef9f5 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -70,6 +70,30 @@ export interface ClassSemanticClassFact { readonly loc?: ClassSemanticLocation; } +export type ClassSemanticConstructorSuperStatus = + | 'not-required' + | 'satisfied' + | 'missing' + | 'conditional' + | 'double' + | 'this-before-super'; + +export interface ClassSemanticConstructorFact { + readonly className: string; + readonly hasConstructor: boolean; + readonly constructorCount: number; + readonly hasBase: boolean; + readonly requiresSuper: boolean; + readonly superStatus: ClassSemanticConstructorSuperStatus; + readonly superCallCount: number; + readonly thisBeforeSuper: boolean; + readonly declaredFields: readonly string[]; + readonly initializedFields: readonly string[]; + readonly uninitializedRequiredFields: readonly string[]; + readonly provenance: 'static-analysis'; + readonly loc?: ClassSemanticLocation; +} + export interface ClassSemanticInheritanceEdge { readonly from: string; readonly to: string; @@ -120,6 +144,7 @@ export interface ClassSemanticProtocolConformanceFact { export interface ClassSemanticFacts { readonly classes: readonly ClassSemanticClassFact[]; + readonly constructorFacts: readonly ClassSemanticConstructorFact[]; readonly inheritanceEdges: readonly ClassSemanticInheritanceEdge[]; readonly implementsEdges: readonly ClassSemanticImplementsEdge[]; readonly overrides: readonly ClassSemanticOverrideFact[]; @@ -2917,6 +2942,7 @@ export function collectClassSemanticFacts(root: IRNode | readonly IRNode[]): Cla return { classes: classes.map((info) => classSemanticFact(info, classByName)), + constructorFacts: classes.map((info) => classConstructorSemanticFact(info)), inheritanceEdges, implementsEdges, overrides: collectClassOverrideFacts(classes, classByName), @@ -2945,6 +2971,37 @@ function classSemanticFact(info: ClassInfo, classByName: ReadonlyMap + count + constructorBodyStatements(ctor).reduce((sum, statement) => sum + superCallCountInNode(statement), 0), + 0, + ); + const declaredFields = declaredInstanceFieldNames(info); + const declaredFieldSet = new Set(declaredFields); + const initializedFields = sortedUnique( + [...fieldInitializerNames(info), ...constructorThisAssignmentNames(info)].filter((name) => + declaredFieldSet.has(name), + ), + ); + return { + className: info.name, + hasConstructor: info.constructors.length > 0, + constructorCount: info.constructors.length, + hasBase: Boolean(info.baseName), + requiresSuper: Boolean(info.baseName) && info.constructors.length > 0, + superStatus: constructorSuperStatus(info, superDiagnostics), + superCallCount, + thisBeforeSuper: superDiagnostics.some((diagnostic) => diagnostic.rule === 'class-constructor-this-before-super'), + declaredFields, + initializedFields, + uninitializedRequiredFields: uninitializedRequiredFieldNames(info, initializedFields), + provenance: 'static-analysis', + ...(info.node.loc ? { loc: semanticLocation(info.node) } : {}), + }; +} + function classMemberSemanticFact( member: ClassMemberInfo, className = member.owner, @@ -2966,6 +3023,215 @@ function classMemberSemanticFact( }; } +function constructorSuperDiagnostics(info: ClassInfo): SemanticViolation[] { + const violations: SemanticViolation[] = []; + if (!info.baseName) return violations; + for (const ctor of info.constructors) validateDerivedConstructorDiscipline(info, ctor, violations); + return violations; +} + +function constructorSuperStatus( + info: ClassInfo, + diagnostics: readonly SemanticViolation[], +): ClassSemanticConstructorSuperStatus { + if (!info.baseName || info.constructors.length === 0) return 'not-required'; + const rules = new Set(diagnostics.map((diagnostic) => diagnostic.rule)); + if (rules.has('class-constructor-this-before-super')) return 'this-before-super'; + if (rules.has('class-constructor-double-super')) return 'double'; + if (rules.has('class-constructor-conditional-super')) return 'conditional'; + if (rules.has('class-constructor-missing-super')) return 'missing'; + return 'satisfied'; +} + +function declaredInstanceFieldNames(info: ClassInfo): string[] { + return sortedUnique( + info.members.filter((member) => member.kind === 'field' && !member.static).map((member) => member.name), + ); +} + +function requiredInstanceFieldNames(info: ClassInfo): string[] { + return sortedUnique( + info.members + .filter((member) => member.kind === 'field' && !member.static && !isTrueFlag(member.node.props?.optional)) + .map((member) => member.name), + ); +} + +function fieldInitializerNames(info: ClassInfo): string[] { + return sortedUnique( + info.members + .filter( + (member) => + member.kind === 'field' && + !member.static && + (Object.hasOwn(member.node.props ?? {}, 'value') || Object.hasOwn(member.node.props ?? {}, 'default')), + ) + .map((member) => member.name), + ); +} + +function constructorThisAssignmentNames(info: ClassInfo): string[] { + if (info.constructors.length === 0) return []; + const constructorAssignments: string[][] = []; + for (const ctor of info.constructors) { + constructorAssignments.push([...definiteThisAssignmentsInStatements(constructorBodyStatements(ctor))]); + } + const [first = [], ...rest] = constructorAssignments; + return sortedUnique([...rest.reduce((common, names) => setIntersection(common, new Set(names)), new Set(first))]); +} + +interface ThisAssignmentPathStates { + readonly continuing: Set[]; + readonly exited: Set[]; +} + +function definiteThisAssignmentsInStatements(statements: readonly IRNode[], initial = new Set()): Set { + const states = thisAssignmentPathStatesInStatements(statements, [new Set(initial)]); + const [first = new Set(), ...rest] = states.exited.concat(states.continuing); + return rest.reduce((common, names) => setIntersection(common, names), new Set(first)); +} + +function thisAssignmentPathStatesInStatements( + statements: readonly IRNode[], + initialStates: readonly ReadonlySet[], +): ThisAssignmentPathStates { + let continuing = initialStates.map((state) => new Set(state)); + const exited: Set[] = []; + for (let index = 0; index < statements.length; index += 1) { + const statement = statements[index]; + if (statement.type === 'else') continue; + const nextContinuing: Set[] = []; + if (statement.type === 'if') { + const maybeElse = statements[index + 1]?.type === 'else' ? statements[index + 1] : undefined; + for (const state of continuing) { + const thenStates = thisAssignmentPathStatesInStatements(statement.children ?? [], [state]); + const elseStates = maybeElse + ? thisAssignmentPathStatesInStatements(maybeElse.children ?? [], [state]) + : { continuing: [new Set(state)], exited: [] }; + nextContinuing.push(...thenStates.continuing, ...elseStates.continuing); + exited.push(...thenStates.exited, ...elseStates.exited); + } + continuing = nextContinuing; + if (maybeElse) index += 1; + continue; + } + if (statement.type === 'try') { + const tryStates = thisAssignmentTryPathStates(statement, continuing); + continuing = tryStates.continuing; + exited.push(...tryStates.exited); + continue; + } + if (statement.type === 'return') { + exited.push(...continuing.map((state) => new Set(state))); + continuing = []; + continue; + } + if (statement.type === 'throw' || statement.type === 'break' || statement.type === 'continue') { + continuing = []; + continue; + } + if (statement.type === 'while' || statement.type === 'for' || statement.type === 'each') { + continue; + } + for (const state of continuing) { + const next = new Set(state); + const directName = + statement.type === 'assign' && isSimpleAssignment(statement) + ? thisMemberName(expressionPropText(statement.props?.target)) + : undefined; + if (directName) next.add(directName); + nextContinuing.push(next); + } + continuing = nextContinuing; + } + return { continuing, exited }; +} + +function thisAssignmentTryPathStates( + statement: IRNode, + initialStates: readonly ReadonlySet[], +): ThisAssignmentPathStates { + const children = statement.children ?? []; + const catchNode = children.find((child) => child.type === 'catch'); + const finallyNode = children.find((child) => child.type === 'finally'); + const tryChildren = children.filter((child) => child.type !== 'catch' && child.type !== 'finally'); + const tryStates = thisAssignmentPathStatesInStatements(tryChildren, initialStates); + const catchStates = catchNode + ? thisAssignmentPathStatesInStatements(catchNode.children ?? [], initialStates) + : { continuing: [], exited: [] }; + const continuing = [...tryStates.continuing, ...catchStates.continuing]; + const exited = [...tryStates.exited, ...catchStates.exited]; + if (!finallyNode) return { continuing, exited }; + + const continuingAfterFinally = thisAssignmentPathStatesInStatements(finallyNode.children ?? [], continuing); + const exitingAfterFinally = thisAssignmentPathStatesInStatements(finallyNode.children ?? [], exited); + return { + continuing: continuingAfterFinally.continuing, + exited: [...continuingAfterFinally.exited, ...exitingAfterFinally.continuing, ...exitingAfterFinally.exited], + }; +} + +function isSimpleAssignment(statement: IRNode): boolean { + const op = statement.props?.op; + return op === undefined || op === null || op === '' || op === '='; +} + +function setIntersection(left: ReadonlySet, right: ReadonlySet): Set { + const out = new Set(); + for (const value of left) { + if (right.has(value)) out.add(value); + } + return out; +} + +function uninitializedRequiredFieldNames(info: ClassInfo, initializedFields: readonly string[]): string[] { + const initialized = new Set(initializedFields); + return requiredInstanceFieldNames(info).filter((name) => !initialized.has(name)); +} + +function thisMemberName(text: string | undefined): string | undefined { + if (!text) return undefined; + try { + const value = parseExpression(text); + if (value.kind === 'member' && value.object.kind === 'ident' && value.object.name === 'this') { + return value.property; + } + if ( + value.kind === 'index' && + value.object.kind === 'ident' && + value.object.name === 'this' && + value.index.kind === 'strLit' + ) { + return value.index.value; + } + return undefined; + } catch { + return undefined; + } +} + +function superCallCountInNode(node: IRNode): number { + let count = 0; + walkSemanticTreeUntil(node, (candidate) => { + if (candidate !== node && candidate.type === 'class') return 'stop'; + for (const prop of BODY_EXPRESSION_PROPS) { + const text = expressionPropText(candidate.props?.[prop]); + if (!text) continue; + try { + count += valueIRSuperConstructorCallCount(parseExpression(text)); + } catch {} + } + return 'continue'; + }); + return count; +} + +function valueIRSuperConstructorCallCount(value: ValueIR): number { + if (value.kind === 'lambda') return 0; + const own = value.kind === 'call' && value.callee.kind === 'ident' && value.callee.name === 'super' ? 1 : 0; + return own + valueIRChildren(value).reduce((count, child) => count + valueIRSuperConstructorCallCount(child), 0); +} + function effectiveClassMemberFacts( info: ClassInfo, classByName: ReadonlyMap, diff --git a/packages/core/tests/semantic-substrate.test.ts b/packages/core/tests/semantic-substrate.test.ts index 03f68a86..fc8a1649 100644 --- a/packages/core/tests/semantic-substrate.test.ts +++ b/packages/core/tests/semantic-substrate.test.ts @@ -230,6 +230,231 @@ describe('KERN semantic substrate', () => { ); }); + test('exports constructor discipline and field initialization facts', () => { + const facts = collectClassSemanticFacts( + parseRoot( + [ + 'class name=Base', + ' field name=id type=string', + 'class name=Good extends=Base', + ' field name=name type=string', + ' constructor', + ' param name=id type=string', + ' param name=name type=string', + ' handler lang=kern', + ' do value="super(id)"', + ' assign target="this.name" value="name"', + 'class name=MissingSuper extends=Base', + ' constructor', + ' handler lang=kern', + ' do value=1', + 'class name=DoubleSuper extends=Base', + ' constructor', + ' handler lang=kern', + ' do value="super()"', + ' do value="super()"', + 'class name=ConditionalSuper extends=Base', + ' constructor', + ' param name=ready type=boolean', + ' handler lang=kern', + ' if cond=ready', + ' do value="super()"', + 'class name=ThisBeforeSuper extends=Base', + ' constructor', + ' handler lang=kern', + ' assign target="this.name" value="\'Ada\'"', + ' do value="super()"', + 'class name=DelayedSuper extends=Base', + ' constructor', + ' handler lang=kern', + ' do value="(() => super())"', + 'class name=BranchInit', + ' field name=name type=string', + ' constructor', + ' param name=ready type=boolean', + ' handler lang=kern', + ' if cond=ready', + ' assign target="this.name" value="\'Ada\'"', + 'class name=CompleteBranchInit', + ' field name=name type=string', + ' constructor', + ' param name=ready type=boolean', + ' handler lang=kern', + ' if cond=ready', + ' assign target="this.name" value="\'Ada\'"', + ' else', + ' assign target="this.name" value="\'Grace\'"', + 'class name=CompoundInit', + ' field name=count type=number', + ' constructor', + ' handler lang=kern', + ' assign target="this.count" op="+=" value=1', + 'class name=IndexInit', + ' field name=name type=string', + ' constructor', + ' handler lang=kern', + ' assign target="this[\'name\']" value="\'Ada\'"', + 'class name=UndeclaredInit', + ' field name=name type=string', + ' constructor', + ' handler lang=kern', + ' assign target="this.extra" value="\'ignored\'"', + ' assign target="this.name" value="\'Ada\'"', + 'class name=EarlyReturnInit', + ' field name=name type=string', + ' constructor', + ' param name=ready type=boolean', + ' handler lang=kern', + ' if cond=ready', + ' return value=undefined', + ' assign target="this.name" value="\'Ada\'"', + 'class name=TryCatchInit', + ' field name=name type=string', + ' constructor', + ' handler lang=kern', + ' try', + ' assign target="this.name" value="\'Ada\'"', + ' catch name=err', + ' assign target="this.name" value="\'Grace\'"', + 'class name=FinallyReturnInit', + ' field name=name type=string', + ' constructor', + ' handler lang=kern', + ' try', + ' return value=undefined', + ' finally', + ' assign target="this.name" value="\'Ada\'"', + 'class name=LoopInit', + ' field name=name type=string', + ' constructor', + ' param name=ready type=boolean', + ' handler lang=kern', + ' while cond=ready', + ' assign target="this.name" value="\'Ada\'"', + 'class name=ThrowBranchInit', + ' field name=name type=string', + ' constructor', + ' param name=ready type=boolean', + ' handler lang=kern', + ' if cond=ready', + ' throw value="new Error(\'stop\')"', + ' assign target="this.name" value="\'Ada\'"', + 'class name=Defaults', + ' field name=ready type=boolean value=true', + ' field name=optionalName type=string optional=true', + ' field name=missing type=string', + ].join('\n'), + ), + ); + + expect(facts.constructorFacts).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + className: 'Base', + requiresSuper: false, + superStatus: 'not-required', + declaredFields: ['id'], + initializedFields: [], + uninitializedRequiredFields: ['id'], + provenance: 'static-analysis', + }), + expect.objectContaining({ + className: 'Good', + requiresSuper: true, + superStatus: 'satisfied', + superCallCount: 1, + thisBeforeSuper: false, + declaredFields: ['name'], + initializedFields: ['name'], + uninitializedRequiredFields: [], + }), + expect.objectContaining({ + className: 'MissingSuper', + superStatus: 'missing', + superCallCount: 0, + }), + expect.objectContaining({ + className: 'DoubleSuper', + superStatus: 'double', + superCallCount: 2, + }), + expect.objectContaining({ + className: 'ConditionalSuper', + superStatus: 'conditional', + superCallCount: 1, + }), + expect.objectContaining({ + className: 'ThisBeforeSuper', + superStatus: 'this-before-super', + thisBeforeSuper: true, + }), + expect.objectContaining({ + className: 'DelayedSuper', + superStatus: 'missing', + superCallCount: 0, + }), + expect.objectContaining({ + className: 'BranchInit', + initializedFields: [], + uninitializedRequiredFields: ['name'], + }), + expect.objectContaining({ + className: 'CompleteBranchInit', + initializedFields: ['name'], + uninitializedRequiredFields: [], + }), + expect.objectContaining({ + className: 'CompoundInit', + initializedFields: [], + uninitializedRequiredFields: ['count'], + }), + expect.objectContaining({ + className: 'IndexInit', + initializedFields: ['name'], + uninitializedRequiredFields: [], + }), + expect.objectContaining({ + className: 'UndeclaredInit', + initializedFields: ['name'], + uninitializedRequiredFields: [], + }), + expect.objectContaining({ + className: 'EarlyReturnInit', + initializedFields: [], + uninitializedRequiredFields: ['name'], + }), + expect.objectContaining({ + className: 'TryCatchInit', + initializedFields: ['name'], + uninitializedRequiredFields: [], + }), + expect.objectContaining({ + className: 'FinallyReturnInit', + initializedFields: ['name'], + uninitializedRequiredFields: [], + }), + expect.objectContaining({ + className: 'LoopInit', + initializedFields: [], + uninitializedRequiredFields: ['name'], + }), + expect.objectContaining({ + className: 'ThrowBranchInit', + initializedFields: ['name'], + uninitializedRequiredFields: [], + }), + expect.objectContaining({ + className: 'Defaults', + requiresSuper: false, + superStatus: 'not-required', + declaredFields: ['missing', 'optionalName', 'ready'], + initializedFields: ['ready'], + uninitializedRequiredFields: ['missing'], + }), + ]), + ); + }); + test('reports unresolved bases and inheritance cycles as class facts', () => { const facts = collectClassSemanticFacts( parseRoot( From fb1febf0477eb575224ba1511884d23ded06fdce Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 05:14:34 +0200 Subject: [PATCH 35/63] feat(core): enforce class implements at runtime --- packages/core/src/core-runtime/index.ts | 174 ++++++++++++++- packages/core/tests/core-runtime.test.ts | 273 +++++++++++++++++++++++ 2 files changed, 446 insertions(+), 1 deletion(-) diff --git a/packages/core/src/core-runtime/index.ts b/packages/core/src/core-runtime/index.ts index 0f68395f..fc6b7863 100644 --- a/packages/core/src/core-runtime/index.ts +++ b/packages/core/src/core-runtime/index.ts @@ -13,6 +13,7 @@ import { coreFixtureValueToKernValue, kernValueToCoreFixtureValue, } from './contract-adapter.js'; +import { collectCoreShapeFacts, validateCoreShape } from './shape-validator.js'; import { brandValue, KERN_VALUE_BRAND } from './value-brand.js'; const INTEGER_INDEX_RE = /^(0|[1-9]\d*)$/; @@ -55,6 +56,7 @@ export interface KernClassValue { node: IRNode; env: CoreRuntimeEnv; staticFields: Record; + runtimeRootContext?: IRNode | readonly IRNode[]; } export interface KernInstanceValue { @@ -104,6 +106,7 @@ export interface CreateCoreRuntimeEnvOptions { export class CoreRuntimeEnv { private readonly bindings = new Map(); + private runtimeRootContext?: IRNode | readonly IRNode[]; constructor(readonly parent?: CoreRuntimeEnv) {} @@ -135,6 +138,14 @@ export class CoreRuntimeEnv { child(): CoreRuntimeEnv { return new CoreRuntimeEnv(this); } + + setRuntimeRootContext(root: IRNode | readonly IRNode[]): void { + this.runtimeRootContext = root; + } + + getRuntimeRootContext(): IRNode | readonly IRNode[] | undefined { + return this.runtimeRootContext ?? this.parent?.getRuntimeRootContext(); + } } export const kNull = (): KernValue => brandValue({ kind: 'null' }); @@ -247,6 +258,7 @@ export function runCoreRuntime( nodeOrNodes: IRNode | readonly IRNode[], env = createCoreRuntimeEnv(), ): CoreRuntimeResult { + env.setRuntimeRootContext(nodeOrNodes); const nodes: readonly IRNode[] = isIRNodeArray(nodeOrNodes) ? nodeOrNodes : runtimeChildren(nodeOrNodes); return { completion: executeSequence(nodes, env), env }; } @@ -279,6 +291,10 @@ function executeSequence(nodes: readonly IRNode[], env: CoreRuntimeEnv): CoreCom function executeNode(node: IRNode, env: CoreRuntimeEnv): CoreCompletion { switch (node.type) { + case 'interface': + case 'import': + case 'use': + return { kind: 'normal', value: kUndefined() }; case 'handler': case '__block': return executeSequence(node.children ?? [], env); @@ -723,6 +739,7 @@ function makeClass(node: IRNode, env: CoreRuntimeEnv): KernClassValue { node, env, staticFields: createRecordEntries(), + ...(env.getRuntimeRootContext() ? { runtimeRootContext: env.getRuntimeRootContext() } : {}), }); } @@ -746,9 +763,102 @@ function constructClassValue(klass: KernClassValue, args: readonly KernValue[]): initializedClasses: new Set(), }); initializeClassLayer(instance, klass, args, true); + validateImplementedClassProtocols(instance, klass); return instance; } +function validateImplementedClassProtocols(instance: KernInstanceValue, klass: KernClassValue): void { + const factsByRoot = new Map>(); + for (const layer of classHierarchyFromBase(klass)) { + const root = layer.runtimeRootContext ?? layer.env.getRuntimeRootContext(); + if (!root) continue; + const facts = factsByRoot.get(root) ?? collectCoreShapeFacts(root); + factsByRoot.set(root, facts); + const shapeByName = new Map(facts.interfaces.map((shape) => [shape.name, shape])); + const importedProtocolNames = runtimeImportedProtocolNames(root); + for (const interfaceName of runtimeClassReferenceNames(layer.node.props?.implements)) { + const shape = shapeByName.get(interfaceName); + if (!shape) { + if (importedProtocolNames.has(interfaceName)) continue; + throw new Error(`KERN core runtime class '${klass.name}' implements unknown interface '${interfaceName}'.`); + } + if (!shape.validatorAvailable || shape.indexers.length > 0) { + throw new Error( + `KERN core runtime class '${klass.name}' implements interface '${interfaceName}' that is not executable as a class protocol in v1.`, + ); + } + const projection = classProtocolProjection( + instance, + shape.fields.map((field) => field.name), + ); + const result = validateCoreShape(projection, interfaceName, root); + if (result.passed) continue; + throw new Error( + `KERN core runtime class '${klass.name}' violates implemented interface '${interfaceName}':\n${result.diagnostics + .map((diagnostic) => diagnostic.message) + .join('\n')}`, + ); + } + } +} + +function classProtocolProjection(instance: KernInstanceValue, fieldNames: readonly string[]): KernValue { + const entries = createRecordEntries(); + for (const fieldName of fieldNames) { + if (Object.hasOwn(instance.fields, fieldName)) { + entries[fieldName] = instance.fields[fieldName] ?? kUndefined(); + continue; + } + const member = findReadableClassShapeMember(instance.classValue, fieldName, false); + if (member?.kind !== 'getter') continue; + entries[fieldName] = evalInstanceMember(instance, fieldName); + } + return brandValue({ kind: 'record', entries }); +} + +function classHierarchyFromBase(klass: KernClassValue): KernClassValue[] { + const base = resolveBaseClass(klass); + return base ? [...classHierarchyFromBase(base), klass] : [klass]; +} + +function runtimeImportedProtocolNames(rootOrNodes: IRNode | readonly IRNode[]): Set { + const names = new Set(); + const visit = (node: IRNode): void => { + if (node.type === 'import') { + for (const name of runtimeImportLocalNames(node)) names.add(name); + } + if (node.type === 'use') { + for (const child of node.children ?? []) { + if (child.type !== 'from') continue; + const kind = runtimeStringProp(child.props?.kind); + if (kind && kind !== 'interface' && kind !== 'type') continue; + const localName = runtimeStringProp(child.props?.as) ?? runtimeStringProp(child.props?.name); + if (localName) names.add(localName); + } + } + for (const child of node.children ?? []) visit(child); + }; + for (const node of isIRNodeArray(rootOrNodes) ? rootOrNodes : [rootOrNodes]) visit(node); + return names; +} + +function runtimeImportLocalNames(node: IRNode): string[] { + const names: string[] = []; + const props = node.props ?? {}; + const defaultName = runtimeStringProp(props.default); + if (defaultName && defaultName !== 'true') names.push(defaultName); + const rawNames = runtimeStringProp(props.names); + if (rawNames) { + for (const raw of rawNames.split(',')) { + const name = raw.trim(); + const aliasMatch = /^([A-Za-z_$][\w$]*)(?:\s+as\s+([A-Za-z_$][\w$]*))?$/u.exec(name); + if (aliasMatch) names.push(aliasMatch[2] ?? aliasMatch[1]); + else if (/^[A-Za-z_$][\w$]*$/u.test(name)) names.push(name); + } + } + return names; +} + function initializeClassLayer( instance: KernInstanceValue, klass: KernClassValue, @@ -1217,6 +1327,68 @@ function classBaseName(value: unknown): string | undefined { return match?.[1]; } +function runtimeStringProp(value: unknown): string | undefined { + return typeof value === 'string' && value.length > 0 ? value : undefined; +} + +function runtimeClassReferenceNames(value: unknown): string[] { + if (typeof value !== 'string' || !value.trim()) return []; + const parts = splitRuntimeClassReferenceList(value); + const names = new Set(); + for (const part of parts) { + const name = runtimeClassReferenceName(part); + if (!name) throw new Error(`implements= contains an invalid reference: ${part}.`); + names.add(name); + } + return [...names]; +} + +function runtimeClassReferenceName(value: string): string | undefined { + const trimmed = value.trim(); + const match = /^([A-Za-z_$][\w$]*)(?:\s*<[\s\S]*>)?$/u.exec(trimmed); + return match?.[1]; +} + +function splitRuntimeClassReferenceList(raw: string): string[] { + const out: string[] = []; + let current = ''; + let depth = 0; + let angleDepth = 0; + let quote: '"' | "'" | '`' | null = null; + for (let index = 0; index < raw.length; index += 1) { + const ch = raw[index]; + if (quote !== null) { + current += ch; + if (ch === '\\' && index + 1 < raw.length) current += raw[++index]; + else if (ch === quote) quote = null; + continue; + } + if (ch === '"' || ch === "'" || ch === '`') { + quote = ch; + current += ch; + continue; + } + if (ch === '(' || ch === '[' || ch === '{') depth += 1; + else if (ch === ')' || ch === ']' || ch === '}') depth -= 1; + else if (ch === '<') angleDepth += 1; + else if (ch === '>' && angleDepth > 0) angleDepth -= 1; + if (depth < 0 || angleDepth < 0) throw new Error('implements= has unbalanced delimiters.'); + if (ch === ',' && depth === 0 && angleDepth === 0) { + const part = current.trim(); + if (part.length === 0) throw new Error('implements= contains an empty reference.'); + out.push(part); + current = ''; + continue; + } + current += ch; + } + if (quote !== null || depth !== 0 || angleDepth !== 0) throw new Error('implements= has unbalanced delimiters.'); + const tail = current.trim(); + if (tail.length === 0 && raw.trim().endsWith(',')) throw new Error('implements= contains an empty reference.'); + if (tail.length > 0) out.push(tail); + return out; +} + function classThisEnv(klass: KernClassValue, receiver: KernInstanceValue): CoreRuntimeEnv { const env = klass.env.child(); env.define('this', receiver); @@ -1510,7 +1682,7 @@ function isKernValueShape(value: unknown, seen: WeakSet): value is KernV ); case 'class': return ( - hasOnlyKeys(value, ['kind', 'name', 'node', 'env', 'staticFields']) && + hasOnlyKeys(value, ['kind', 'name', 'node', 'env', 'staticFields'], ['runtimeRootContext']) && typeof value.name === 'string' && isPlainRecord(value.node) && value.env instanceof CoreRuntimeEnv && diff --git a/packages/core/tests/core-runtime.test.ts b/packages/core/tests/core-runtime.test.ts index 8c9cbd17..10d7e6bd 100644 --- a/packages/core/tests/core-runtime.test.ts +++ b/packages/core/tests/core-runtime.test.ts @@ -359,6 +359,279 @@ describe('KERN core runtime statements', () => { expect(toHostValue(evalCoreExpression('make()', env))).toBe(6); }); + test('enforces implemented interface fields after class construction', () => { + const root = parse( + [ + 'interface name=Named', + ' field name=id type=string', + 'class name=User implements=Named', + ' field name=id type=string value="unset"', + ' field name=name type=string value="Ada"', + ' constructor', + ' param name=id type=string', + ' handler', + ' assign target="this.id" value="id"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new User("u1").id', env))).toBe('u1'); + expect(toHostValue(evalCoreExpression('new User("u1").name', env))).toBe('Ada'); + }); + + test('rejects constructed classes that miss implemented interface fields', () => { + const root = parse( + [ + 'interface name=Named', + ' field name=name type=string', + 'class name=User implements=Named', + ' constructor', + ' handler', + ' do value="1"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new User()', env)).toThrow("class 'User' violates implemented interface 'Named'"); + expect(() => evalCoreExpression('new User()', env)).toThrow('missing required field Named.name'); + }); + + test('rejects constructed classes with wrong implemented interface field types', () => { + const root = parse( + [ + 'interface name=Named', + ' field name=id type=string', + 'class name=User implements=Named', + ' field name=id type=number value={{ 1 }}', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new User()', env)).toThrow('expected Named.id to be string, got number'); + }); + + test('enforces inherited interface fields for implemented protocols', () => { + const root = parse( + [ + 'interface name=Entity', + ' field name=id type=string', + 'interface name=Named extends=Entity', + ' field name=name type=string', + 'class name=User implements=Named', + ' field name=id type=string value="u1"', + ' field name=name type=string value="Ada"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new User().id', env))).toBe('u1'); + expect(toHostValue(evalCoreExpression('new User().name', env))).toBe('Ada'); + }); + + test('rejects classes missing inherited implemented interface fields', () => { + const root = parse( + [ + 'interface name=Entity', + ' field name=id type=string', + 'interface name=Named extends=Entity', + ' field name=name type=string', + 'class name=User implements=Named', + ' field name=name type=string value="Ada"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new User()', env)).toThrow('missing required field Named.id'); + }); + + test('validates getter-backed implemented interface fields', () => { + const root = parse( + [ + 'interface name=Named', + ' field name=name type=string', + 'class name=User implements=Named', + ' field name=first type=string value="Ada"', + ' getter name=name returns=string', + ' handler', + ' return value="this.first"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new User().name', env))).toBe('Ada'); + }); + + test('enforces base class implemented protocols on derived instances', () => { + const root = parse( + [ + 'interface name=EntityLike', + ' field name=id type=string', + 'class name=Entity implements=EntityLike', + ' field name=id type=string value="base"', + 'class name=User extends=Entity', + ' field name=name type=string value="Ada"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new User().id', env))).toBe('base'); + expect(toHostValue(evalCoreExpression('new User().name', env))).toBe('Ada'); + }); + + test('rejects derived instances when a base implemented protocol is unsatisfied', () => { + const root = parse( + [ + 'interface name=EntityLike', + ' field name=id type=string', + 'class name=Entity implements=EntityLike', + 'class name=User extends=Entity', + ' field name=name type=string value="Ada"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new User()', env)).toThrow( + "class 'User' violates implemented interface 'EntityLike'", + ); + }); + + test('class implements validation uses the declaration root context', () => { + const firstRoot = parse( + [ + 'interface name=Named', + ' field name=id type=string', + 'class name=User implements=Named', + ' field name=id type=string value="u1"', + ].join('\n'), + ); + const secondRoot = parse(['interface name=Named', ' field name=id type=number'].join('\n')); + const env = createCoreRuntimeEnv(); + runCoreRuntime(firstRoot, env); + runCoreRuntime(secondRoot, env); + + expect(toHostValue(evalCoreExpression('new User().id', env))).toBe('u1'); + }); + + test('base implemented protocols use the base declaration root context', () => { + const firstRoot = parse( + [ + 'interface name=EntityLike', + ' field name=id type=string', + 'class name=Entity implements=EntityLike', + ' field name=id type=string value="base"', + ].join('\n'), + ); + const secondRoot = parse( + [ + 'interface name=EntityLike', + ' field name=id type=number', + 'class name=User extends=Entity', + ' field name=name type=string value="Ada"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(firstRoot, env); + runCoreRuntime(secondRoot, env); + + expect(toHostValue(evalCoreExpression('new User().id', env))).toBe('base'); + }); + + test('runtime class protocols reject unsupported indexer interfaces', () => { + const root = parse( + [ + 'interface name=Dictionary', + ' indexer keyType=string type=string', + 'class name=User implements=Dictionary', + ' field name=id type=string value="u1"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new User()', env)).toThrow( + "implements interface 'Dictionary' that is not executable as a class protocol in v1", + ); + }); + + test('malformed runtime implements lists fail instead of skipping validation', () => { + const root = parse( + [ + 'interface name=Named', + ' field name=id type=string', + 'class name=User implements="Named,"', + ' field name=id type=string value="u1"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new User()', env)).toThrow('implements= contains an empty reference'); + }); + + test('invalid runtime implements entries fail instead of being ignored', () => { + const root = parse( + [ + 'interface name=Named', + ' field name=id type=string', + 'class name=User implements="123"', + ' field name=id type=string value="u1"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new User()', env)).toThrow('implements= contains an invalid reference: 123'); + }); + + test('runtime implements entries reject trailing junk', () => { + const root = parse( + [ + 'interface name=Named', + ' field name=id type=string', + 'class name=User implements="Named junk"', + ' field name=id type=string value="u1"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new User()', env)).toThrow( + 'implements= contains an invalid reference: Named junk', + ); + }); + + test('unknown local runtime implements targets fail instead of being ignored', () => { + const root = parse(['class name=User implements=MissingProtocol'].join('\n')); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new User()', env)).toThrow( + "class 'User' implements unknown interface 'MissingProtocol'", + ); + }); + + test('imported runtime implements targets are treated as external protocols', () => { + const root = parse( + [ + 'import from="./protocols" names=ExternalProtocol', + 'class name=User implements=ExternalProtocol', + ' field name=id type=string value="u1"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new User().id', env))).toBe('u1'); + }); + test('executes inherited fields getters methods and overrides', () => { const root = parse( [ From 4d85443411da95b6c117ffd0249ed836a13f5f72 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 06:47:19 +0200 Subject: [PATCH 36/63] feat(core): enforce interface method protocols --- packages/core/src/codegen/type-system.ts | 25 ++ packages/core/src/core-runtime/index.ts | 233 ++++++++++++++- packages/core/src/schema.ts | 3 +- packages/core/src/semantic-validator.ts | 281 +++++++++++++++++- packages/core/tests/class-semantics.test.ts | 216 ++++++++++++++ packages/core/tests/codegen-core.test.ts | 33 ++ packages/core/tests/core-runtime.test.ts | 269 +++++++++++++++++ packages/core/tests/schema-validation.test.ts | 9 +- .../core/tests/semantic-substrate.test.ts | 8 +- 9 files changed, 1061 insertions(+), 16 deletions(-) diff --git a/packages/core/src/codegen/type-system.ts b/packages/core/src/codegen/type-system.ts index 8d812e9f..318e4f64 100644 --- a/packages/core/src/codegen/type-system.ts +++ b/packages/core/src/codegen/type-system.ts @@ -92,6 +92,14 @@ export function generateInterface(node: IRNode): string[] { const opt = fp.optional === 'true' || fp.optional === true ? '?' : ''; lines.push(` ${fieldName}${opt}: ${emitTypeAnnotation(fp.type, 'unknown', field)};`); } + for (const method of kids(node, 'method')) { + const mp = propsOf<'method'>(method); + const methodName = emitIdentifier(mp.name, 'method', method); + const generics = mp.generics ? emitTypeAnnotation(mp.generics, '', method) : ''; + const params = emitParamList(method, { stripDefaults: true }); + const returns = interfaceMethodReturnType(method, mp); + lines.push(` ${methodName}${generics}(${params}): ${returns};`); + } for (const idx of kids(node, 'indexer')) { const ip = propsOf<'indexer'>(idx); // `||` (not `??`) so an empty-string keyName also falls back to 'key'. @@ -105,6 +113,23 @@ export function generateInterface(node: IRNode): string[] { return lines; } +function interfaceMethodReturnType( + node: IRNode, + props: { returns?: string; async?: unknown; stream?: unknown; generator?: unknown }, +): string { + const isAsync = props.async === 'true' || props.async === true; + const isStream = props.stream === 'true' || props.stream === true; + const isGenerator = props.generator === 'true' || props.generator === true; + const returns = props.returns ? emitTypeAnnotation(props.returns, 'unknown', node) : ''; + const generatorPrefix = isAsync ? 'AsyncGenerator<' : 'Generator<'; + if (isStream) return returns.startsWith('AsyncGenerator<') ? returns : `AsyncGenerator<${returns || 'unknown'}>`; + if (isGenerator) { + if (returns.startsWith('Generator<') || returns.startsWith('AsyncGenerator<')) return returns; + return `${generatorPrefix}${returns || 'unknown'}>`; + } + return returns || 'void'; +} + // ── Discriminated Union ────────────────────────────────────────────────── export function generateUnion(node: IRNode): string[] { diff --git a/packages/core/src/core-runtime/index.ts b/packages/core/src/core-runtime/index.ts index fc6b7863..44d921cd 100644 --- a/packages/core/src/core-runtime/index.ts +++ b/packages/core/src/core-runtime/index.ts @@ -792,12 +792,21 @@ function validateImplementedClassProtocols(instance: KernInstanceValue, klass: K shape.fields.map((field) => field.name), ); const result = validateCoreShape(projection, interfaceName, root); - if (result.passed) continue; - throw new Error( - `KERN core runtime class '${klass.name}' violates implemented interface '${interfaceName}':\n${result.diagnostics - .map((diagnostic) => diagnostic.message) - .join('\n')}`, - ); + if (!result.passed) { + throw new Error( + `KERN core runtime class '${klass.name}' violates implemented interface '${interfaceName}':\n${result.diagnostics + .map((diagnostic) => diagnostic.message) + .join('\n')}`, + ); + } + const missingMethods = runtimeInterfaceProtocolMethods(root, interfaceName) + .filter((method) => !classHasRuntimeProtocolMethod(instance.classValue, method)) + .map((method) => method.name); + if (missingMethods.length > 0) { + throw new Error( + `KERN core runtime class '${klass.name}' violates implemented interface '${interfaceName}': missing or incompatible method(s): ${missingMethods.join(', ')}.`, + ); + } } } } @@ -816,6 +825,147 @@ function classProtocolProjection(instance: KernInstanceValue, fieldNames: readon return brandValue({ kind: 'record', entries }); } +interface RuntimeInterfaceProtocolMethod { + readonly name: string; + readonly arity: number; + readonly paramTypes: readonly string[]; + readonly async: boolean; + readonly stream: boolean; + readonly generator: boolean; + readonly returns?: string; +} + +function runtimeInterfaceProtocolMethods( + rootOrNodes: IRNode | readonly IRNode[], + interfaceName: string, +): RuntimeInterfaceProtocolMethod[] { + const interfaceByName = new Map(); + const visit = (node: IRNode): void => { + if (node.type === 'interface') { + const name = runtimeStringProp(node.props?.name); + if (name && !interfaceByName.has(name)) interfaceByName.set(name, node); + } + for (const child of node.children ?? []) visit(child); + }; + for (const node of isIRNodeArray(rootOrNodes) ? rootOrNodes : [rootOrNodes]) visit(node); + + const resolve = (name: string, seen: ReadonlySet): RuntimeInterfaceProtocolMethod[] => { + if (seen.has(name)) return []; + const node = interfaceByName.get(name); + if (!node) return []; + const nextSeen = new Set(seen); + nextSeen.add(name); + const methods = new Map(); + for (const baseName of runtimeClassReferenceNames(node.props?.extends)) { + for (const method of resolve(baseName, nextSeen)) methods.set(method.name, method); + } + for (const child of node.children ?? []) { + if (child.type !== 'method') continue; + const name = runtimeStringProp(child.props?.name); + if (!name) continue; + methods.set(name, { + name, + arity: runtimeParams(child).length, + paramTypes: runtimeParams(child).map((param) => param.type ?? ''), + async: runtimeBooleanProp(child.props?.async), + stream: runtimeBooleanProp(child.props?.stream), + generator: runtimeBooleanProp(child.props?.generator), + ...(runtimeStringProp(child.props?.returns) ? { returns: runtimeStringProp(child.props?.returns) } : {}), + }); + } + return [...methods.values()]; + }; + + return resolve(interfaceName, new Set()); +} + +function classHasRuntimeProtocolMethod(klass: KernClassValue, method: RuntimeInterfaceProtocolMethod): boolean { + const member = findReadableClassShapeMember(klass, method.name, false); + if (member?.kind !== 'method') return false; + if (runtimeBooleanProp(member.node.props?.private)) return false; + const params = runtimeParams(member.node); + if (params.length !== method.arity) return false; + if ( + !runtimeProtocolParamTypesCompatible( + params.map((param) => param.type ?? ''), + method.paramTypes, + ) + ) + return false; + if (runtimeBooleanProp(member.node.props?.async) !== method.async) return false; + if (runtimeBooleanProp(member.node.props?.stream) !== method.stream) return false; + if (runtimeBooleanProp(member.node.props?.generator) !== method.generator) return false; + const returns = runtimeStringProp(member.node.props?.returns); + return runtimeProtocolReturnTypesCompatible( + returns, + { + async: runtimeBooleanProp(member.node.props?.async), + stream: runtimeBooleanProp(member.node.props?.stream), + generator: runtimeBooleanProp(member.node.props?.generator), + }, + method.returns, + method, + ); +} + +function runtimeProtocolParamTypesCompatible(actual: readonly string[], expected: readonly string[]): boolean { + return expected.every( + (type, index) => !type || normalizeRuntimeProtocolType(actual[index]) === normalizeRuntimeProtocolType(type), + ); +} + +function normalizeRuntimeProtocolType(type: string | undefined): string { + return compactRuntimeProtocolTypeWhitespace(type); +} + +function compactRuntimeProtocolTypeWhitespace(type: string | undefined): string { + let out = ''; + let quote: '"' | "'" | '`' | null = null; + for (let index = 0; index < (type ?? '').length; index += 1) { + const ch = (type ?? '')[index]; + if (quote !== null) { + out += ch; + if (ch === '\\' && index + 1 < (type ?? '').length) out += (type ?? '')[++index]; + else if (ch === quote) quote = null; + continue; + } + if (ch === '"' || ch === "'" || ch === '`') { + quote = ch; + out += ch; + continue; + } + if (!/\s/.test(ch)) out += ch; + } + return out; +} + +function runtimeProtocolReturnTypesCompatible( + actual: string | undefined, + actualFlags: { readonly async: boolean; readonly stream: boolean; readonly generator: boolean }, + expected: string | undefined, + expectedFlags: { readonly async: boolean; readonly stream: boolean; readonly generator: boolean }, +): boolean { + return ( + normalizeRuntimeProtocolReturnType(actual, actualFlags) === + normalizeRuntimeProtocolReturnType(expected, expectedFlags) + ); +} + +function normalizeRuntimeProtocolReturnType( + returns: string | undefined, + flags: { readonly async: boolean; readonly stream: boolean; readonly generator: boolean }, +): string { + if (flags.stream) { + if (returns?.startsWith('AsyncGenerator<')) return returns; + return `AsyncGenerator<${returns || 'unknown'}>`; + } + if (flags.generator) { + if (returns?.startsWith('Generator<') || returns?.startsWith('AsyncGenerator<')) return returns; + return `${flags.async ? 'AsyncGenerator' : 'Generator'}<${returns || 'unknown'}>`; + } + return !returns || returns === 'void' ? 'void' : returns; +} + function classHierarchyFromBase(klass: KernClassValue): KernClassValue[] { const base = resolveBaseClass(klass); return base ? [...classHierarchyFromBase(base), klass] : [klass]; @@ -1331,6 +1481,10 @@ function runtimeStringProp(value: unknown): string | undefined { return typeof value === 'string' && value.length > 0 ? value : undefined; } +function runtimeBooleanProp(value: unknown): boolean { + return value === true || (typeof value === 'string' && value.trim().toLowerCase() === 'true'); +} + function runtimeClassReferenceNames(value: unknown): string[] { if (typeof value !== 'string' || !value.trim()) return []; const parts = splitRuntimeClassReferenceList(value); @@ -1389,6 +1543,28 @@ function splitRuntimeClassReferenceList(raw: string): string[] { return out; } +function runtimeAngleClosesBeforeNextTopLevelComma(raw: string, start: number): boolean { + let depth = 0; + let quote: '"' | "'" | '`' | null = null; + for (let index = start; index < raw.length; index += 1) { + const ch = raw[index]; + if (quote !== null) { + if (ch === '\\' && index + 1 < raw.length) index += 1; + else if (ch === quote) quote = null; + continue; + } + if (ch === '"' || ch === "'" || ch === '`') { + quote = ch; + continue; + } + if (ch === '(' || ch === '[' || ch === '{') depth += 1; + else if ((ch === ')' || ch === ']' || ch === '}') && depth > 0) depth -= 1; + else if (ch === '>' && depth === 0) return true; + else if (ch === ',' && depth === 0) return false; + } + return false; +} + function classThisEnv(klass: KernClassValue, receiver: KernInstanceValue): CoreRuntimeEnv { const env = klass.env.child(); env.define('this', receiver); @@ -1540,7 +1716,7 @@ function runtimeParams(node: IRNode): RuntimeParam[] { const raw = typeof node.props?.params === 'string' ? node.props.params : ''; if (!raw.trim()) return []; - return splitPortableExpressionList(raw, 'fn params=').map((part) => { + return splitRuntimeParamList(raw, 'fn params=').map((part) => { const defaultIndex = findRuntimeDefaultSeparator(part); const beforeDefault = defaultIndex >= 0 ? part.slice(0, defaultIndex) : part; const defaultExpr = defaultIndex >= 0 ? part.slice(defaultIndex + 1).trim() : undefined; @@ -1555,6 +1731,49 @@ function runtimeParams(node: IRNode): RuntimeParam[] { }); } +function splitRuntimeParamList(raw: string, propName: string): string[] { + const out: string[] = []; + let current = ''; + let depth = 0; + let angleDepth = 0; + let inDefault = false; + let quote: '"' | "'" | '`' | null = null; + for (let index = 0; index < raw.length; index += 1) { + const ch = raw[index]; + if (quote !== null) { + current += ch; + if (ch === '\\' && index + 1 < raw.length) current += raw[++index]; + else if (ch === quote) quote = null; + continue; + } + if (ch === '"' || ch === "'" || ch === '`') { + quote = ch; + current += ch; + continue; + } + if (ch === '(' || ch === '[' || ch === '{') depth += 1; + else if (ch === ')' || ch === ']' || ch === '}') depth -= 1; + else if (ch === '=' && depth === 0 && angleDepth === 0 && raw[index + 1] !== '>') inDefault = true; + else if (ch === '<' && (!inDefault || runtimeAngleClosesBeforeNextTopLevelComma(raw, index + 1))) angleDepth += 1; + else if (ch === '>' && angleDepth > 0) angleDepth -= 1; + if (depth < 0 || angleDepth < 0) throw new Error(`${propName} has unbalanced delimiters.`); + if (ch === ',' && depth === 0 && angleDepth === 0) { + const part = current.trim(); + if (part.length === 0) throw new Error(`${propName} contains an empty expression.`); + out.push(part); + current = ''; + inDefault = false; + continue; + } + current += ch; + } + if (quote !== null || depth !== 0 || angleDepth !== 0) throw new Error(`${propName} has unbalanced delimiters.`); + const tail = current.trim(); + if (tail.length === 0 && raw.trim().endsWith(',')) throw new Error(`${propName} contains an empty expression.`); + if (tail.length > 0) out.push(tail); + return out; +} + function runtimeParamDefaultExpr(node: IRNode): string | undefined { const propName = Object.hasOwn(node.props ?? {}, 'value') ? 'value' : 'default'; const rawValue = propName === 'value' ? node.props?.value : node.props?.default; diff --git a/packages/core/src/schema.ts b/packages/core/src/schema.ts index 4f33dedd..01471c37 100644 --- a/packages/core/src/schema.ts +++ b/packages/core/src/schema.ts @@ -96,7 +96,7 @@ export const NODE_SCHEMAS: Record = { generics: { kind: 'rawExpr' }, export: { kind: 'boolean' }, }, - allowedChildren: ['field', 'indexer'], + allowedChildren: ['field', 'indexer', 'method'], }, indexer: { description: 'Index signature for an interface — [keyName: keyType]: type', @@ -219,6 +219,7 @@ export const NODE_SCHEMAS: Record = { returns: { kind: 'typeAnnotation' }, async: { kind: 'boolean' }, stream: { kind: 'boolean' }, + generator: { kind: 'boolean' }, private: { kind: 'boolean' }, static: { kind: 'boolean' }, generics: { kind: 'rawExpr' }, diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index b9cef9f5..27b523ee 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -23,7 +23,6 @@ import { import { collectExternalImportSymbols, type ExternalImportSymbolTable } from './external-symbols.js'; import { importRegistryOf } from './import-metadata.js'; import { parseExpression } from './parser-expression.js'; -import { splitPortableExpressionList } from './portable-expression-list.js'; import { RAG_ASSERTION_KIND_SET, RAG_ASSERTION_KINDS } from './rag-assertions.js'; import type { IRNode } from './types.js'; import type { ValueIR } from './value-ir.js'; @@ -51,8 +50,13 @@ export interface ClassSemanticMemberFact { readonly name: string; readonly kind: ClassSemanticMemberKind; readonly static: boolean; + readonly private?: boolean; + readonly async?: boolean; + readonly stream?: boolean; + readonly generator?: boolean; readonly type?: string; readonly returns?: string; + readonly paramTypes?: readonly string[]; readonly arity: number; readonly readable: boolean; readonly writable: boolean; @@ -2699,8 +2703,13 @@ interface ClassMemberInfo { name: string; kind: ClassMemberKind; static: boolean; + private: boolean; + async: boolean; + stream: boolean; + generator: boolean; type?: string; returns?: string; + paramTypes: readonly string[]; arity: number; } @@ -2710,6 +2719,7 @@ interface InterfaceInfo { name: string; extendsNames: string[]; fields: InterfaceFieldInfo[]; + methods: InterfaceMethodInfo[]; } interface InterfaceFieldInfo { @@ -2718,6 +2728,16 @@ interface InterfaceFieldInfo { optional: boolean; } +interface InterfaceMethodInfo { + name: string; + returns?: string; + paramTypes: readonly string[]; + arity: number; + async: boolean; + stream: boolean; + generator: boolean; +} + interface ClassProtocolShapeContext { shapeByName: ReadonlyMap; diagnosticsByName: ReadonlyMap; @@ -2834,6 +2854,7 @@ function collectInterfaceInfos(root: IRNode, rootIndex = 0): InterfaceInfo[] { name, extendsNames: classReferenceNames(node.props?.extends, 'interface extends='), fields: collectInterfaceFields(node), + methods: collectInterfaceMethods(node), }); }); return out; @@ -2854,6 +2875,25 @@ function collectInterfaceFields(node: IRNode): InterfaceFieldInfo[] { return fields; } +function collectInterfaceMethods(node: IRNode): InterfaceMethodInfo[] { + const methods: InterfaceMethodInfo[] = []; + for (const child of node.children ?? []) { + if (child.type !== 'method') continue; + const name = stringProp(child, 'name'); + if (!name) continue; + methods.push({ + name, + ...(stringProp(child, 'returns') ? { returns: stringProp(child, 'returns') } : {}), + paramTypes: memberParamTypes(child), + arity: memberArity(child), + async: isTrueFlag(child.props?.async), + stream: isTrueFlag(child.props?.stream), + generator: isTrueFlag(child.props?.generator), + }); + } + return methods; +} + function collectClassProtocolShapeContext(roots: readonly IRNode[]): ClassProtocolShapeContext { const facts = collectCoreShapeFacts(roots); const shapeByName = new Map(); @@ -2880,8 +2920,13 @@ function collectClassMembers(node: IRNode, owner: string): ClassMemberInfo[] { name, kind: child.type, static: isTrueFlag(child.props?.static), + private: isTrueFlag(child.props?.private), + async: isTrueFlag(child.props?.async), + stream: isTrueFlag(child.props?.stream), + generator: isTrueFlag(child.props?.generator), ...(stringProp(child, 'type') ? { type: stringProp(child, 'type') } : {}), ...(stringProp(child, 'returns') ? { returns: stringProp(child, 'returns') } : {}), + paramTypes: memberParamTypes(child), arity: memberArity(child), }); } @@ -3013,8 +3058,13 @@ function classMemberSemanticFact( name: member.name, kind: member.kind, static: member.static, + ...(member.private ? { private: true } : {}), + ...(member.async ? { async: true } : {}), + ...(member.stream ? { stream: true } : {}), + ...(member.generator ? { generator: true } : {}), ...(member.type ? { type: member.type } : {}), ...(member.returns ? { returns: member.returns } : {}), + ...(member.kind === 'method' && member.paramTypes.length > 0 ? { paramTypes: member.paramTypes } : {}), arity: member.arity, readable: member.kind === 'field' || member.kind === 'getter' || member.kind === 'method', writable: member.kind === 'field' || member.kind === 'setter', @@ -3381,7 +3431,7 @@ function collectClassProtocolConformanceFacts( }); continue; } - const result = classInterfaceConformance(info, protocol, protocolShapeContext, classByName); + const result = classInterfaceConformance(info, protocol, protocolShapeContext, classByName, interfaceByName); facts.push({ className: info.name, interfaceName, @@ -3402,6 +3452,7 @@ function classInterfaceConformance( protocol: InterfaceInfo, protocolShapeContext: ClassProtocolShapeContext, classByName: ReadonlyMap, + interfaceByName: ReadonlyMap, ): ClassInterfaceConformanceResult { const shape = protocolShapeContext.shapeByName.get(protocol.name); const diagnostics = (protocolShapeContext.diagnosticsByName.get(protocol.name) ?? []).map( @@ -3431,6 +3482,7 @@ function classInterfaceConformance( const effectiveMembers = effectiveClassMemberFacts(info, classByName); const fields = shape?.fields ?? protocol.fields; const requiredFields = fields.filter((field) => !field.optional); + const requiredMethods = effectiveInterfaceMethods(protocol, interfaceByName); const missingMembers: string[] = []; const satisfiedMembers: string[] = []; for (const field of requiredFields) { @@ -3440,6 +3492,13 @@ function classInterfaceConformance( missingMembers.push(field.name); } } + for (const method of requiredMethods) { + if (classHasCallableInstanceMethod(effectiveMembers, method)) { + satisfiedMembers.push(method.name); + } else { + missingMembers.push(method.name); + } + } const missing = sortedUnique(missingMembers); const satisfied = sortedUnique(satisfiedMembers); return { @@ -3451,6 +3510,24 @@ function classInterfaceConformance( }; } +function effectiveInterfaceMethods( + protocol: InterfaceInfo, + interfaceByName: ReadonlyMap, + seen: ReadonlySet = new Set(), +): InterfaceMethodInfo[] { + if (seen.has(protocol.name)) return []; + const nextSeen = new Set(seen); + nextSeen.add(protocol.name); + const methods = new Map(); + for (const baseName of protocol.extendsNames) { + const base = interfaceByName.get(baseName); + if (!base) continue; + for (const method of effectiveInterfaceMethods(base, interfaceByName, nextSeen)) methods.set(method.name, method); + } + for (const method of protocol.methods) methods.set(method.name, method); + return [...methods.values()]; +} + function classHasReadableInstanceMember( members: readonly ClassSemanticMemberFact[], field: { readonly name: string; readonly type?: string }, @@ -3463,6 +3540,83 @@ function classHasReadableInstanceMember( }); } +function classHasCallableInstanceMethod( + members: readonly ClassSemanticMemberFact[], + method: InterfaceMethodInfo, +): boolean { + return members.some((member) => { + if (member.name !== method.name || member.static || member.private || member.kind !== 'method') return false; + if (member.arity !== method.arity) return false; + if (!methodParamTypesCompatible(member.paramTypes ?? [], method.paramTypes)) return false; + if ((member.async === true) !== method.async) return false; + if ((member.stream === true) !== method.stream) return false; + if ((member.generator === true) !== method.generator) return false; + return methodReturnTypesCompatible( + member.returns, + { + async: member.async === true, + stream: member.stream === true, + generator: member.generator === true, + }, + method.returns, + method, + ); + }); +} + +function methodParamTypesCompatible(actual: readonly string[], expected: readonly string[]): boolean { + return expected.every((type, index) => !type || normalizeProtocolType(actual[index]) === normalizeProtocolType(type)); +} + +function normalizeProtocolType(type: string | undefined): string { + return compactProtocolTypeWhitespace(type); +} + +function compactProtocolTypeWhitespace(type: string | undefined): string { + let out = ''; + let quote: '"' | "'" | '`' | null = null; + for (let index = 0; index < (type ?? '').length; index += 1) { + const ch = (type ?? '')[index]; + if (quote !== null) { + out += ch; + if (ch === '\\' && index + 1 < (type ?? '').length) out += (type ?? '')[++index]; + else if (ch === quote) quote = null; + continue; + } + if (ch === '"' || ch === "'" || ch === '`') { + quote = ch; + out += ch; + continue; + } + if (!/\s/.test(ch)) out += ch; + } + return out; +} + +function methodReturnTypesCompatible( + actual: string | undefined, + actualFlags: { readonly async: boolean; readonly stream: boolean; readonly generator: boolean }, + expected: string | undefined, + expectedFlags: { readonly async: boolean; readonly stream: boolean; readonly generator: boolean }, +): boolean { + return normalizeMethodReturnType(actual, actualFlags) === normalizeMethodReturnType(expected, expectedFlags); +} + +function normalizeMethodReturnType( + returns: string | undefined, + flags: { readonly async: boolean; readonly stream: boolean; readonly generator: boolean }, +): string { + if (flags.stream) { + if (returns?.startsWith('AsyncGenerator<')) return returns; + return `AsyncGenerator<${returns || 'unknown'}>`; + } + if (flags.generator) { + if (returns?.startsWith('Generator<') || returns?.startsWith('AsyncGenerator<')) return returns; + return `${flags.async ? 'AsyncGenerator' : 'Generator'}<${returns || 'unknown'}>`; + } + return !returns || returns === 'void' ? 'void' : returns; +} + function semanticLocation(node: IRNode): ClassSemanticLocation | undefined { return node.loc ? { line: node.loc.line, col: node.loc.col } : undefined; } @@ -3518,7 +3672,7 @@ function validateClassImplements( } continue; } - const conformance = classInterfaceConformance(info, protocol, protocolShapeContext, classByName); + const conformance = classInterfaceConformance(info, protocol, protocolShapeContext, classByName, interfaceByName); if (conformance.status === 'invalid-interface') { violations.push({ rule: 'class-implements-invalid-interface', @@ -4154,12 +4308,131 @@ function memberArity(node: IRNode): number { const params = node.props?.params; if (typeof params !== 'string' || !params.trim()) return 0; try { - return splitPortableExpressionList(params, `${node.type} params=`).length; + return splitSemanticParamList(params, `${node.type} params=`).length; } catch { return 0; } } +function memberParamTypes(node: IRNode): string[] { + const childParams = node.children?.filter((child) => child.type === 'param') ?? []; + if (childParams.length > 0) { + return childParams.map((param) => stringProp(param, 'type') ?? ''); + } + const params = node.props?.params; + if (typeof params !== 'string' || !params.trim()) return []; + try { + return splitSemanticParamList(params, `${node.type} params=`).map((part) => { + const typeIndex = part.indexOf(':'); + if (typeIndex < 0) return ''; + const typeAndMaybeDefault = part.slice(typeIndex + 1); + const defaultIndex = paramDefaultSeparatorIndex(typeAndMaybeDefault); + return (defaultIndex >= 0 ? typeAndMaybeDefault.slice(0, defaultIndex) : typeAndMaybeDefault).trim(); + }); + } catch { + return []; + } +} + +function paramDefaultSeparatorIndex(value: string): number { + let depth = 0; + let quote: '"' | "'" | '`' | null = null; + for (let index = 0; index < value.length; index += 1) { + const ch = value[index]; + if (quote !== null) { + if (ch === '\\' && index + 1 < value.length) index += 1; + else if (ch === quote) quote = null; + continue; + } + if (ch === '"' || ch === "'" || ch === '`') { + quote = ch; + continue; + } + if (ch === '<' || ch === '(' || ch === '{' || ch === '[') depth += 1; + else if ((ch === '>' || ch === ')' || ch === '}' || ch === ']') && depth > 0) depth -= 1; + else if (ch === '=' && depth === 0) { + if ( + value[index + 1] === '>' || + value[index + 1] === '=' || + value[index - 1] === '=' || + value[index - 1] === '<' || + value[index - 1] === '>' || + value[index - 1] === '!' + ) { + continue; + } + return index; + } + } + return -1; +} + +function splitSemanticParamList(raw: string, propName: string): string[] { + const out: string[] = []; + let current = ''; + let depth = 0; + let angleDepth = 0; + let inDefault = false; + let quote: '"' | "'" | '`' | null = null; + for (let index = 0; index < raw.length; index += 1) { + const ch = raw[index]; + if (quote !== null) { + current += ch; + if (ch === '\\' && index + 1 < raw.length) current += raw[++index]; + else if (ch === quote) quote = null; + continue; + } + if (ch === '"' || ch === "'" || ch === '`') { + quote = ch; + current += ch; + continue; + } + if (ch === '(' || ch === '[' || ch === '{') depth += 1; + else if (ch === ')' || ch === ']' || ch === '}') depth -= 1; + else if (ch === '=' && depth === 0 && angleDepth === 0 && raw[index + 1] !== '>') inDefault = true; + else if (ch === '<' && (!inDefault || angleClosesBeforeNextTopLevelComma(raw, index + 1))) angleDepth += 1; + else if (!inDefault && ch === '>' && angleDepth > 0) angleDepth -= 1; + else if (inDefault && ch === '>' && angleDepth > 0) angleDepth -= 1; + if (depth < 0 || angleDepth < 0) throw new Error(`${propName} has unbalanced delimiters.`); + if (ch === ',' && depth === 0 && angleDepth === 0) { + const part = current.trim(); + if (part.length === 0) throw new Error(`${propName} contains an empty expression.`); + out.push(part); + current = ''; + inDefault = false; + continue; + } + current += ch; + } + if (quote !== null || depth !== 0 || angleDepth !== 0) throw new Error(`${propName} has unbalanced delimiters.`); + const tail = current.trim(); + if (tail.length === 0 && raw.trim().endsWith(',')) throw new Error(`${propName} contains an empty expression.`); + if (tail.length > 0) out.push(tail); + return out; +} + +function angleClosesBeforeNextTopLevelComma(raw: string, start: number): boolean { + let depth = 0; + let quote: '"' | "'" | '`' | null = null; + for (let index = start; index < raw.length; index += 1) { + const ch = raw[index]; + if (quote !== null) { + if (ch === '\\' && index + 1 < raw.length) index += 1; + else if (ch === quote) quote = null; + continue; + } + if (ch === '"' || ch === "'" || ch === '`') { + quote = ch; + continue; + } + if (ch === '(' || ch === '[' || ch === '{') depth += 1; + else if ((ch === ')' || ch === ']' || ch === '}') && depth > 0) depth -= 1; + else if (ch === '>' && depth === 0) return true; + else if (ch === ',' && depth === 0) return false; + } + return false; +} + function nodeBodyUsesSuper(node: IRNode): boolean { return nodeBodyExpressions(node).some((expr) => { try { diff --git a/packages/core/tests/class-semantics.test.ts b/packages/core/tests/class-semantics.test.ts index 31e5f544..74c8dcaf 100644 --- a/packages/core/tests/class-semantics.test.ts +++ b/packages/core/tests/class-semantics.test.ts @@ -74,6 +74,195 @@ describe('semantic-validator — class object model', () => { expect(rules).not.toContain('class-implements-missing-member'); }); + test('accepts class implements when instance methods satisfy interface methods', () => { + const rules = rulesFor( + [ + 'interface name=Runnable', + ' method name=run params="input:string" returns=number', + 'class name=Base', + ' method name=run params="input:string" returns=number', + ' handler lang=kern', + ' return value="input.length"', + 'class name=Job extends=Base implements=Runnable', + ].join('\n'), + ); + + expect(rules).not.toContain('class-implements-missing-member'); + }); + + test('reports missing and incompatible interface methods for class implements', () => { + const violations = violationsFor( + [ + 'interface name=Runnable', + ' method name=run params="input:string" returns=number', + ' method name=stop returns=void', + 'class name=Job implements=Runnable', + ' method name=run returns=number', + ' handler lang=kern', + ' return value="1"', + ' getter name=stop returns=void', + ' handler lang=kern', + ' return value="undefined"', + ].join('\n'), + ); + + const violation = violations.find((candidate) => candidate.rule === 'class-implements-missing-member'); + expect(violation?.message).toContain('run'); + expect(violation?.message).toContain('stop'); + }); + + test('checks interface method parameter types and accepts implicit void returns', () => { + const acceptedRules = rulesFor( + [ + 'interface name=Lifecycle', + ' method name=close returns=void', + 'class name=Socket implements=Lifecycle', + ' method name=close', + ' handler lang=kern', + ' do value="undefined"', + ].join('\n'), + ); + expect(acceptedRules).not.toContain('class-implements-missing-member'); + + const rejectedRules = rulesFor( + [ + 'interface name=Runnable', + ' method name=run params="input:string" returns=number', + 'class name=Job implements=Runnable', + ' method name=run params="input:number" returns=number', + ' handler lang=kern', + ' return value="input"', + ].join('\n'), + ); + expect(rejectedRules).toContain('class-implements-missing-member'); + }); + + test('requires stream interface methods to be implemented as stream methods', () => { + const acceptedRules = rulesFor( + [ + 'interface name=Events', + ' method name=read returns=Event stream=true', + 'class name=Reader implements=Events', + ' method name=read returns=Event stream=true', + ' handler lang=kern', + ' return value="undefined"', + ].join('\n'), + ); + expect(acceptedRules).not.toContain('class-implements-missing-member'); + + const rejectedRules = rulesFor( + [ + 'interface name=Events', + ' method name=read returns=Event stream=true', + 'class name=Reader implements=Events', + ' method name=read returns=Event', + ' handler lang=kern', + ' return value="undefined"', + ].join('\n'), + ); + expect(rejectedRules).toContain('class-implements-missing-member'); + }); + + test('normalizes streamed method returns and generic parameter types for class implements', () => { + const streamedRules = rulesFor( + [ + 'interface name=Events', + ' method name=read returns="AsyncGenerator" stream=true', + 'class name=Reader implements=Events', + ' method name=read returns=Event stream=true', + ' handler lang=kern', + ' return value="undefined"', + ].join('\n'), + ); + expect(streamedRules).not.toContain('class-implements-missing-member'); + + const genericParamRules = rulesFor( + [ + 'interface name=Sink', + ' method name=write params="item:Record" returns=void', + 'class name=BadSink implements=Sink', + ' method name=write params="item:Record" returns=void', + ' handler lang=kern', + ' do value="undefined"', + ].join('\n'), + ); + expect(genericParamRules).toContain('class-implements-missing-member'); + + const literalWhitespaceRules = rulesFor( + [ + 'interface name=Sink', + ' method name=write params="item:\'a b\'" returns=void', + 'class name=BadSink implements=Sink', + ' method name=write params="item:\'ab\'" returns=void', + ' handler lang=kern', + ' do value="undefined"', + ].join('\n'), + ); + expect(literalWhitespaceRules).toContain('class-implements-missing-member'); + }); + + test('rejects private protocol methods and tolerates whitespace/default comparison params', () => { + const privateRules = rulesFor( + [ + 'interface name=Runnable', + ' method name=run returns=number', + 'class name=Job implements=Runnable', + ' method name=run private=true returns=number', + ' handler lang=kern', + ' return value="1"', + ].join('\n'), + ); + expect(privateRules).toContain('class-implements-missing-member'); + + const whitespaceRules = rulesFor( + [ + 'interface name=Sink', + ' method name=write params="item:Record" returns=void', + 'class name=GoodSink implements=Sink', + ' method name=write params="item:Record" returns=void', + ' handler lang=kern', + ' do value="undefined"', + ].join('\n'), + ); + expect(whitespaceRules).not.toContain('class-implements-missing-member'); + + const defaultComparisonRules = rulesFor( + [ + 'interface name=Calculator', + ' method name=calc params="value:number=1 < 2,unit:string" returns=number', + 'class name=DefaultCalc implements=Calculator', + ' method name=calc params="value:number=1 < 2,unit:string" returns=number', + ' handler lang=kern', + ' return value="value"', + ].join('\n'), + ); + expect(defaultComparisonRules).not.toContain('class-implements-missing-member'); + + const defaultEqualityRules = rulesFor( + [ + 'interface name=Comparator', + ' method name=cmp params="value:number=a==b,unit:string" returns=number', + 'class name=DefaultCmp implements=Comparator', + ' method name=cmp params="value:number=a==b,unit:string" returns=number', + ' handler lang=kern', + ' return value="value"', + ].join('\n'), + ); + expect(defaultEqualityRules).not.toContain('class-implements-missing-member'); + + const genericDefaultRules = rulesFor( + [ + 'interface name=Formatter', + ' method name=format params="value:Map=make>(),unit:string" returns=number', + 'class name=DefaultFormatter implements=Formatter', + ' method name=format params="value:Map=make>(),unit:string" returns=number', + ' handler lang=kern', + ' return value="1"', + ].join('\n'), + ); + expect(genericDefaultRules).not.toContain('class-implements-missing-member'); + }); + test('reports unknown class implements targets unless imported', () => { const localRules = rulesFor('class name=User implements=MissingProtocol'); expect(localRules).toContain('class-implements-unknown'); @@ -90,6 +279,15 @@ describe('semantic-validator — class object model', () => { expect(rules).toContain('class-implements-invalid-reference-list'); }); + test('parses generic implements references with default types containing commas', () => { + const rules = rulesFor( + ['interface name=Protocol', 'class name=User implements="Protocol>"'].join('\n'), + ); + + expect(rules).not.toContain('class-implements-invalid-reference-list'); + expect(rules).not.toContain('class-implements-unknown'); + }); + test('reports missing required readable instance members for class implements', () => { const violations = violationsFor( [ @@ -152,6 +350,24 @@ describe('semantic-validator — class object model', () => { expect(optionalityConflictRules).toContain('class-implements-invalid-interface'); }); + test('reports cyclic method protocols as invalid interfaces', () => { + const rules = rulesFor( + [ + 'interface name=A extends=B', + ' method name=a returns=void', + 'interface name=B extends=A', + ' method name=b returns=void', + 'class name=CycleImpl implements=A', + ' method name=a returns=void', + ' handler lang=kern', + ' do value="undefined"', + ].join('\n'), + ); + + expect(rules).toContain('class-implements-invalid-interface'); + expect(rules).not.toContain('class-implements-missing-member'); + }); + test('reports interface indexers as unsupported class implements protocols in v1', () => { const rules = rulesFor( [ diff --git a/packages/core/tests/codegen-core.test.ts b/packages/core/tests/codegen-core.test.ts index 3cf70c23..b58771d1 100644 --- a/packages/core/tests/codegen-core.test.ts +++ b/packages/core/tests/codegen-core.test.ts @@ -562,6 +562,39 @@ describe('Core Language Codegen', () => { }); }); + describe('interface', () => { + it('generates method signatures', () => { + const code = gen( + [ + 'interface name=Formatter', + ' field name=id type=string', + ' method name=format params="value:string,count:number" returns=string', + ].join('\n'), + ); + + expect(code).toContain('export interface Formatter {'); + expect(code).toContain('id: string;'); + expect(code).toContain('format(value: string, count: number): string;'); + }); + + it('strips defaults from interface method signatures', () => { + const code = gen( + ['interface name=Formatter', ' method name=format params="value:string,count:number=1" returns=string'].join( + '\n', + ), + ); + + expect(code).toContain('format(value: string, count: number): string;'); + expect(code).not.toContain('count: number = 1'); + }); + + it('generates streamed interface method signatures', () => { + const code = gen('interface name=Events\n method name=read returns=Event stream=true'); + + expect(code).toContain('read(): AsyncGenerator;'); + }); + }); + // ── Gap 1: Service (class) ── describe('service', () => { diff --git a/packages/core/tests/core-runtime.test.ts b/packages/core/tests/core-runtime.test.ts index 10d7e6bd..fb30cd58 100644 --- a/packages/core/tests/core-runtime.test.ts +++ b/packages/core/tests/core-runtime.test.ts @@ -467,6 +467,275 @@ describe('KERN core runtime statements', () => { expect(toHostValue(evalCoreExpression('new User().name', env))).toBe('Ada'); }); + test('validates implemented interface methods without invoking them', () => { + const root = parse( + [ + 'interface name=Runnable', + ' method name=run params="input:string" returns=number', + 'class name=Job implements=Runnable', + ' field name=count type=number value={{ 0 }}', + ' method name=run params="input:string" returns=number', + ' handler', + ' assign target="this.count" value="this.count + 1"', + ' return value="input.length"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new Job().count', env))).toBe(0); + expect(toHostValue(evalCoreExpression('new Job().run("abc")', env))).toBe(3); + }); + + test('rejects missing implemented interface methods', () => { + const root = parse( + [ + 'interface name=Runnable', + ' method name=run params="input:string" returns=number', + 'class name=Job implements=Runnable', + ' field name=id type=string value="j1"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new Job()', env)).toThrow('missing or incompatible method(s): run'); + }); + + test('rejects incompatible implemented interface method signatures', () => { + const root = parse( + [ + 'interface name=Runnable', + ' method name=run params="input:string" returns=number', + 'class name=Job implements=Runnable', + ' method name=run returns=number', + ' handler', + ' return value="1"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new Job()', env)).toThrow('missing or incompatible method(s): run'); + }); + + test('rejects implemented interface methods with incompatible parameter types', () => { + const root = parse( + [ + 'interface name=Runnable', + ' method name=run params="input:string" returns=number', + 'class name=Job implements=Runnable', + ' method name=run params="input:number" returns=number', + ' handler', + ' return value="input"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new Job()', env)).toThrow('missing or incompatible method(s): run'); + }); + + test('accepts implicit void methods for explicit void interface methods', () => { + const root = parse( + [ + 'interface name=Lifecycle', + ' method name=close returns=void', + 'class name=Socket implements=Lifecycle', + ' method name=close', + ' handler', + ' do value="undefined"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new Socket().close()', env))).toBeUndefined(); + }); + + test('rejects non-stream methods for stream interface methods', () => { + const root = parse( + [ + 'interface name=Events', + ' method name=read returns=Event stream=true', + 'class name=Reader implements=Events', + ' method name=read returns=Event', + ' handler', + ' return value="undefined"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new Reader()', env)).toThrow('missing or incompatible method(s): read'); + }); + + test('normalizes streamed method returns for implemented interface methods', () => { + const root = parse( + [ + 'interface name=Events', + ' method name=read returns="AsyncGenerator" stream=true', + 'class name=Reader implements=Events', + ' method name=read returns=Event stream=true', + ' handler', + ' return value="undefined"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new Reader()', env)).not.toThrow(); + }); + + test('rejects generic parameter type mismatches in implemented interface methods', () => { + const root = parse( + [ + 'interface name=Sink', + ' method name=write params="item:Record" returns=void', + 'class name=BadSink implements=Sink', + ' method name=write params="item:Record" returns=void', + ' handler', + ' do value="undefined"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new BadSink()', env)).toThrow('missing or incompatible method(s): write'); + }); + + test('preserves quoted whitespace in implemented interface method parameter types', () => { + const root = parse( + [ + 'interface name=Sink', + ' method name=write params="item:\'a b\'" returns=void', + 'class name=BadSink implements=Sink', + ' method name=write params="item:\'ab\'" returns=void', + ' handler', + ' do value="undefined"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new BadSink()', env)).toThrow('missing or incompatible method(s): write'); + }); + + test('rejects private methods for implemented interface methods', () => { + const root = parse( + [ + 'interface name=Runnable', + ' method name=run returns=number', + 'class name=Job implements=Runnable', + ' method name=run private=true returns=number', + ' handler', + ' return value="1"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new Job()', env)).toThrow('missing or incompatible method(s): run'); + }); + + test('normalizes whitespace in implemented interface method parameter types', () => { + const root = parse( + [ + 'interface name=Sink', + ' method name=write params="item:Record" returns=void', + 'class name=GoodSink implements=Sink', + ' method name=write params="item:Record" returns=void', + ' handler', + ' do value="undefined"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new GoodSink()', env)).not.toThrow(); + }); + + test('parses default comparison expressions in implemented interface method params', () => { + const root = parse( + [ + 'interface name=Calculator', + ' method name=calc params="value:number=1 < 2,unit:string=\'m\'" returns=number', + 'class name=DefaultCalc implements=Calculator', + ' method name=calc params="value:number=1 < 2,unit:string=\'m\'" returns=number', + ' handler', + ' return value="value"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new DefaultCalc().calc()', env))).toBe(true); + }); + + test('parses default equality expressions in implemented interface method params', () => { + const root = parse( + [ + 'interface name=Comparator', + ' method name=cmp params="value:number=1==1,unit:string=\'m\'" returns=number', + 'class name=DefaultCmp implements=Comparator', + ' method name=cmp params="value:number=1==1,unit:string=\'m\'" returns=number', + ' handler', + ' return value="value"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new DefaultCmp().cmp()', env))).toBe(true); + }); + + test('parses generic default expressions in implemented interface method params', () => { + const root = parse( + [ + 'interface name=Formatter', + ' method name=format params="value:Map=make>(),unit:string" returns=number', + 'class name=DefaultFormatter implements=Formatter', + ' method name=format params="value:Map=make>(),unit:string" returns=number', + ' handler', + ' return value="1"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new DefaultFormatter()', env)).not.toThrow(); + }); + + test('enforces inherited interface methods for implemented protocols', () => { + const root = parse( + [ + 'interface name=Runnable', + ' method name=run params="input:string" returns=number', + 'interface name=NamedRunnable extends=Runnable', + ' field name=name type=string', + 'class name=Job implements=NamedRunnable', + ' field name=name type=string value="job"', + ' method name=run params="input:string" returns=number', + ' handler', + ' return value="input.length"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new Job().run("abcd")', env))).toBe(4); + }); + + test('parses generic implements references with default types containing commas', () => { + const root = parse( + ['interface name=Protocol', 'class name=User implements="Protocol>"'].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(() => evalCoreExpression('new User()', env)).not.toThrow(); + }); + test('enforces base class implemented protocols on derived instances', () => { const root = parse( [ diff --git a/packages/core/tests/schema-validation.test.ts b/packages/core/tests/schema-validation.test.ts index b259a609..40012e58 100644 --- a/packages/core/tests/schema-validation.test.ts +++ b/packages/core/tests/schema-validation.test.ts @@ -437,8 +437,8 @@ describe('Schema Validation', () => { describe('allowed children', () => { it('flags wrong child type in interface', () => { - const v = validate(['interface name=User', ' method name=foo'].join('\n')); - expect(v.some((v) => v.message.includes("does not allow child type 'method'"))).toBe(true); + const v = validate(['interface name=User', ' const name=foo value=1'].join('\n')); + expect(v.some((v) => v.message.includes("does not allow child type 'const'"))).toBe(true); }); it('allows field in interface', () => { @@ -446,6 +446,11 @@ describe('Schema Validation', () => { expect(v).toHaveLength(0); }); + it('allows method in interface', () => { + const v = validate(['interface name=User', ' method name=displayName returns=string'].join('\n')); + expect(v).toHaveLength(0); + }); + it('allows handler as universal child', () => { // handler is a universal child allowed everywhere const v = validate(['fn name=foo', ' handler <<>>'].join('\n')); diff --git a/packages/core/tests/semantic-substrate.test.ts b/packages/core/tests/semantic-substrate.test.ts index fc8a1649..7f29874b 100644 --- a/packages/core/tests/semantic-substrate.test.ts +++ b/packages/core/tests/semantic-substrate.test.ts @@ -525,6 +525,7 @@ describe('KERN semantic substrate', () => { 'import from="./protocols" names=ExternalProtocol', 'interface name=Entity', ' field name=id type=string', + ' method name=load params="id:string" returns=string', 'interface name=Named extends=Entity', ' field name=name type=string', 'interface name=BrokenProtocol extends=MissingBaseProtocol', @@ -537,6 +538,9 @@ describe('KERN semantic substrate', () => { ' getter name=name returns=string', ' handler lang=kern', ' return value="this.id"', + ' method name=load params="id:string" returns=string', + ' handler lang=kern', + ' return value="id"', 'class name=Broken implements=Named', ' field name=id type=string', 'class name=Invalid implements=BrokenProtocol', @@ -560,7 +564,7 @@ describe('KERN semantic substrate', () => { className: 'User', interfaceName: 'Named', status: 'satisfied', - satisfiedMembers: ['id', 'name'], + satisfiedMembers: ['id', 'load', 'name'], missingMembers: [], }), expect.objectContaining({ @@ -572,7 +576,7 @@ describe('KERN semantic substrate', () => { className: 'Broken', interfaceName: 'Named', status: 'missing-members', - missingMembers: ['name'], + missingMembers: ['load', 'name'], }), expect.objectContaining({ className: 'Invalid', From f02dd28f947ab2b6a30a88e031c0eb0d7ee01752 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 07:33:18 +0200 Subject: [PATCH 37/63] feat(core): enforce static interface member protocols --- packages/core/src/core-runtime/index.ts | 184 ++++++++++++++++-- .../core/src/core-runtime/shape-validator.ts | 4 +- packages/core/src/semantic-validator.ts | 105 ++++++++-- packages/core/tests/class-semantics.test.ts | 64 ++++++ packages/core/tests/core-runtime.test.ts | 130 +++++++++++-- .../core/tests/semantic-substrate.test.ts | 17 ++ 6 files changed, 462 insertions(+), 42 deletions(-) diff --git a/packages/core/src/core-runtime/index.ts b/packages/core/src/core-runtime/index.ts index 44d921cd..8344936c 100644 --- a/packages/core/src/core-runtime/index.ts +++ b/packages/core/src/core-runtime/index.ts @@ -320,6 +320,7 @@ function executeNode(node: IRNode, env: CoreRuntimeEnv): CoreCompletion { const klass = makeClass(node, env); env.define(klass.name, klass); initializeClassStaticFields(klass); + validateImplementedClassStaticProtocols(klass); return { kind: 'normal', value: kUndefined() }; } case 'assign': @@ -811,6 +812,57 @@ function validateImplementedClassProtocols(instance: KernInstanceValue, klass: K } } +function validateImplementedClassStaticProtocols(klass: KernClassValue): void { + const root = klass.runtimeRootContext ?? klass.env.getRuntimeRootContext(); + if (!root) return; + const facts = collectCoreShapeFacts(root); + const shapeByName = new Map(facts.interfaces.map((shape) => [shape.name, shape])); + const importedProtocolNames = runtimeImportedProtocolNames(root); + for (const interfaceName of runtimeClassReferenceNames(klass.node.props?.implements)) { + const shape = shapeByName.get(interfaceName); + if (!shape) { + if (importedProtocolNames.has(interfaceName)) continue; + throw new Error(`KERN core runtime class '${klass.name}' implements unknown interface '${interfaceName}'.`); + } + if (!shape.validatorAvailable || shape.indexers.length > 0) { + throw new Error( + `KERN core runtime class '${klass.name}' implements interface '${interfaceName}' that is not executable as a class protocol in v1.`, + ); + } + const staticFields = runtimeInterfaceProtocolFields(root, interfaceName, true); + if (staticFields.length > 0) { + const missingFields = staticFields.filter((field) => !classHasRuntimeProtocolField(klass, field)); + if (missingFields.length > 0) { + throw new Error( + `KERN core runtime class '${klass.name}' violates implemented interface '${interfaceName}': missing or incompatible static member(s): ${missingFields + .map((field) => field.name) + .join(', ')}.`, + ); + } + const fieldBackedFields = staticFields.filter( + (field) => findReadableClassShapeMember(klass, field.name, true)?.kind === 'field', + ); + const projection = classStaticProtocolProjection(klass, fieldBackedFields); + const result = validateProjectedProtocolFields(projection, interfaceName, fieldBackedFields, root); + if (!result.passed) { + throw new Error( + `KERN core runtime class '${klass.name}' violates implemented interface '${interfaceName}' static field contract:\n${result.diagnostics + .map((diagnostic) => diagnostic.message) + .join('\n')}`, + ); + } + } + const missingMethods = runtimeInterfaceProtocolMethods(root, interfaceName, true) + .filter((method) => !classHasRuntimeProtocolMethod(klass, method, true)) + .map((method) => method.name); + if (missingMethods.length > 0) { + throw new Error( + `KERN core runtime class '${klass.name}' violates implemented interface '${interfaceName}': missing or incompatible static member(s): ${missingMethods.join(', ')}.`, + ); + } + } +} + function classProtocolProjection(instance: KernInstanceValue, fieldNames: readonly string[]): KernValue { const entries = createRecordEntries(); for (const fieldName of fieldNames) { @@ -825,6 +877,49 @@ function classProtocolProjection(instance: KernInstanceValue, fieldNames: readon return brandValue({ kind: 'record', entries }); } +function classStaticProtocolProjection( + klass: KernClassValue, + fields: readonly RuntimeInterfaceProtocolField[], +): KernValue { + const entries = createRecordEntries(); + for (const field of fields) { + const member = findReadableClassShapeMember(klass, field.name, true); + if (member?.kind !== 'field') continue; + entries[field.name] = evalClassMember(klass, field.name); + } + return brandValue({ kind: 'record', entries }); +} + +function validateProjectedProtocolFields( + projection: KernValue, + interfaceName: string, + fields: readonly RuntimeInterfaceProtocolField[], + rootOrNodes: IRNode | readonly IRNode[], +): ReturnType { + const syntheticName = `__KernStaticProtocol_${interfaceName}`; + const syntheticInterface: IRNode = { + type: 'interface', + props: { name: syntheticName }, + children: fields.map((field) => ({ + type: 'field', + props: { + name: field.name, + optional: field.optional, + ...(field.type ? { type: field.type } : {}), + }, + })), + }; + const roots = [...(isIRNodeArray(rootOrNodes) ? rootOrNodes : [rootOrNodes]), syntheticInterface]; + return validateCoreShape(projection, syntheticName, roots); +} + +interface RuntimeInterfaceProtocolField { + readonly name: string; + readonly type?: string; + readonly optional: boolean; + readonly static: boolean; +} + interface RuntimeInterfaceProtocolMethod { readonly name: string; readonly arity: number; @@ -832,23 +927,50 @@ interface RuntimeInterfaceProtocolMethod { readonly async: boolean; readonly stream: boolean; readonly generator: boolean; + readonly static: boolean; readonly returns?: string; } -function runtimeInterfaceProtocolMethods( +function runtimeInterfaceProtocolFields( rootOrNodes: IRNode | readonly IRNode[], interfaceName: string, -): RuntimeInterfaceProtocolMethod[] { - const interfaceByName = new Map(); - const visit = (node: IRNode): void => { - if (node.type === 'interface') { - const name = runtimeStringProp(node.props?.name); - if (name && !interfaceByName.has(name)) interfaceByName.set(name, node); + staticOnly: boolean, +): RuntimeInterfaceProtocolField[] { + const interfaceByName = runtimeInterfaceNodesByName(rootOrNodes); + const resolve = (name: string, seen: ReadonlySet): RuntimeInterfaceProtocolField[] => { + if (seen.has(name)) return []; + const node = interfaceByName.get(name); + if (!node) return []; + const nextSeen = new Set(seen); + nextSeen.add(name); + const fields = new Map(); + for (const baseName of runtimeClassReferenceNames(node.props?.extends)) { + for (const field of resolve(baseName, nextSeen)) fields.set(runtimeInterfaceMemberShapeKey(field), field); } - for (const child of node.children ?? []) visit(child); + for (const child of node.children ?? []) { + if (child.type !== 'field') continue; + const name = runtimeStringProp(child.props?.name); + if (!name) continue; + const isStatic = runtimeBooleanProp(child.props?.static); + if (isStatic !== staticOnly) continue; + fields.set(runtimeInterfaceMemberShapeKey({ name, static: isStatic }), { + name, + optional: runtimeBooleanProp(child.props?.optional), + static: isStatic, + ...(runtimeStringProp(child.props?.type) ? { type: runtimeStringProp(child.props?.type) } : {}), + }); + } + return [...fields.values()]; }; - for (const node of isIRNodeArray(rootOrNodes) ? rootOrNodes : [rootOrNodes]) visit(node); + return resolve(interfaceName, new Set()); +} +function runtimeInterfaceProtocolMethods( + rootOrNodes: IRNode | readonly IRNode[], + interfaceName: string, + staticOnly = false, +): RuntimeInterfaceProtocolMethod[] { + const interfaceByName = runtimeInterfaceNodesByName(rootOrNodes); const resolve = (name: string, seen: ReadonlySet): RuntimeInterfaceProtocolMethod[] => { if (seen.has(name)) return []; const node = interfaceByName.get(name); @@ -857,19 +979,22 @@ function runtimeInterfaceProtocolMethods( nextSeen.add(name); const methods = new Map(); for (const baseName of runtimeClassReferenceNames(node.props?.extends)) { - for (const method of resolve(baseName, nextSeen)) methods.set(method.name, method); + for (const method of resolve(baseName, nextSeen)) methods.set(runtimeInterfaceMemberShapeKey(method), method); } for (const child of node.children ?? []) { if (child.type !== 'method') continue; const name = runtimeStringProp(child.props?.name); if (!name) continue; - methods.set(name, { + const isStatic = runtimeBooleanProp(child.props?.static); + if (isStatic !== staticOnly) continue; + methods.set(runtimeInterfaceMemberShapeKey({ name, static: isStatic }), { name, arity: runtimeParams(child).length, paramTypes: runtimeParams(child).map((param) => param.type ?? ''), async: runtimeBooleanProp(child.props?.async), stream: runtimeBooleanProp(child.props?.stream), generator: runtimeBooleanProp(child.props?.generator), + static: isStatic, ...(runtimeStringProp(child.props?.returns) ? { returns: runtimeStringProp(child.props?.returns) } : {}), }); } @@ -879,8 +1004,29 @@ function runtimeInterfaceProtocolMethods( return resolve(interfaceName, new Set()); } -function classHasRuntimeProtocolMethod(klass: KernClassValue, method: RuntimeInterfaceProtocolMethod): boolean { - const member = findReadableClassShapeMember(klass, method.name, false); +function runtimeInterfaceNodesByName(rootOrNodes: IRNode | readonly IRNode[]): Map { + const interfaceByName = new Map(); + const visit = (node: IRNode): void => { + if (node.type === 'interface') { + const name = runtimeStringProp(node.props?.name); + if (name && !interfaceByName.has(name)) interfaceByName.set(name, node); + } + for (const child of node.children ?? []) visit(child); + }; + for (const node of isIRNodeArray(rootOrNodes) ? rootOrNodes : [rootOrNodes]) visit(node); + return interfaceByName; +} + +function runtimeInterfaceMemberShapeKey(member: { readonly name: string; readonly static: boolean }): string { + return `${member.static ? 'static' : 'instance'}:${member.name}`; +} + +function classHasRuntimeProtocolMethod( + klass: KernClassValue, + method: RuntimeInterfaceProtocolMethod, + staticOnly = false, +): boolean { + const member = findReadableClassShapeMember(klass, method.name, staticOnly); if (member?.kind !== 'method') return false; if (runtimeBooleanProp(member.node.props?.private)) return false; const params = runtimeParams(member.node); @@ -908,6 +1054,18 @@ function classHasRuntimeProtocolMethod(klass: KernClassValue, method: RuntimeInt ); } +function classHasRuntimeProtocolField(klass: KernClassValue, field: RuntimeInterfaceProtocolField): boolean { + const member = findReadableClassShapeMember(klass, field.name, true); + if (!member) return field.optional; + if (member.kind !== 'field' && member.kind !== 'getter') return false; + if (runtimeBooleanProp(member.node.props?.private)) return false; + const actualType = + member.kind === 'getter' + ? runtimeStringProp(member.node.props?.returns) + : runtimeStringProp(member.node.props?.type); + return !field.type || normalizeRuntimeProtocolType(actualType) === normalizeRuntimeProtocolType(field.type); +} + function runtimeProtocolParamTypesCompatible(actual: readonly string[], expected: readonly string[]): boolean { return expected.every( (type, index) => !type || normalizeRuntimeProtocolType(actual[index]) === normalizeRuntimeProtocolType(type), diff --git a/packages/core/src/core-runtime/shape-validator.ts b/packages/core/src/core-runtime/shape-validator.ts index 2d5618e8..88d35d35 100644 --- a/packages/core/src/core-runtime/shape-validator.ts +++ b/packages/core/src/core-runtime/shape-validator.ts @@ -204,7 +204,9 @@ function collectShapeRegistry(rootOrNodes: IRNode | readonly IRNode[]): ShapeReg const shape: ShapeInterface = { name, extendsNames: splitExtends(node.props?.extends), - fields: (node.children ?? []).filter((child) => child.type === 'field').map((field) => shapeField(field)), + fields: (node.children ?? []) + .filter((child) => child.type === 'field' && !trueFlag(child.props?.static)) + .map((field) => shapeField(field)), indexers: (node.children ?? []) .filter((child) => child.type === 'indexer') .map((indexer) => shapeIndexer(indexer)), diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index 27b523ee..bd45943d 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -141,6 +141,8 @@ export interface ClassSemanticProtocolConformanceFact { readonly status: ClassSemanticProtocolStatus; readonly missingMembers: readonly string[]; readonly satisfiedMembers: readonly string[]; + readonly missingStaticMembers: readonly string[]; + readonly satisfiedStaticMembers: readonly string[]; readonly diagnostics?: readonly string[]; readonly unsupportedReasons?: readonly string[]; readonly loc?: ClassSemanticLocation; @@ -2726,6 +2728,7 @@ interface InterfaceFieldInfo { name: string; type?: string; optional: boolean; + static: boolean; } interface InterfaceMethodInfo { @@ -2736,6 +2739,7 @@ interface InterfaceMethodInfo { async: boolean; stream: boolean; generator: boolean; + static: boolean; } interface ClassProtocolShapeContext { @@ -2747,6 +2751,8 @@ interface ClassInterfaceConformanceResult { status: Exclude; missingMembers: string[]; satisfiedMembers: string[]; + missingStaticMembers: string[]; + satisfiedStaticMembers: string[]; diagnostics: string[]; unsupportedReasons: string[]; } @@ -2870,6 +2876,7 @@ function collectInterfaceFields(node: IRNode): InterfaceFieldInfo[] { name, ...(stringProp(child, 'type') ? { type: stringProp(child, 'type') } : {}), optional: isTrueFlag(child.props?.optional), + static: isTrueFlag(child.props?.static), }); } return fields; @@ -2889,6 +2896,7 @@ function collectInterfaceMethods(node: IRNode): InterfaceMethodInfo[] { async: isTrueFlag(child.props?.async), stream: isTrueFlag(child.props?.stream), generator: isTrueFlag(child.props?.generator), + static: isTrueFlag(child.props?.static), }); } return methods; @@ -3427,6 +3435,8 @@ function collectClassProtocolConformanceFacts( status: visible ? 'external' : 'unknown-interface', missingMembers: [], satisfiedMembers: [], + missingStaticMembers: [], + satisfiedStaticMembers: [], ...(info.node.loc ? { loc: semanticLocation(info.node) } : {}), }); continue; @@ -3438,6 +3448,8 @@ function collectClassProtocolConformanceFacts( status: result.status, missingMembers: result.missingMembers, satisfiedMembers: result.satisfiedMembers, + missingStaticMembers: result.missingStaticMembers, + satisfiedStaticMembers: result.satisfiedStaticMembers, ...(result.diagnostics.length > 0 ? { diagnostics: result.diagnostics } : {}), ...(result.unsupportedReasons.length > 0 ? { unsupportedReasons: result.unsupportedReasons } : {}), ...(info.node.loc ? { loc: semanticLocation(info.node) } : {}), @@ -3463,6 +3475,8 @@ function classInterfaceConformance( status: 'invalid-interface', missingMembers: [], satisfiedMembers: [], + missingStaticMembers: [], + satisfiedStaticMembers: [], diagnostics: sortedUnique(diagnostics), unsupportedReasons: [], }; @@ -3472,6 +3486,8 @@ function classInterfaceConformance( status: 'unsupported-protocol', missingMembers: [], satisfiedMembers: [], + missingStaticMembers: [], + satisfiedStaticMembers: [], diagnostics: [], unsupportedReasons: sortedUnique([ ...shape.unsupportedReasons, @@ -3480,36 +3496,79 @@ function classInterfaceConformance( }; } const effectiveMembers = effectiveClassMemberFacts(info, classByName); - const fields = shape?.fields ?? protocol.fields; - const requiredFields = fields.filter((field) => !field.optional); + const fields = effectiveInterfaceFields(protocol, interfaceByName); + const requiredFields = fields.filter((field) => !field.optional && !field.static); + const requiredStaticFields = fields.filter((field) => !field.optional && field.static); const requiredMethods = effectiveInterfaceMethods(protocol, interfaceByName); + const requiredInstanceMethods = requiredMethods.filter((method) => !method.static); + const requiredStaticMethods = requiredMethods.filter((method) => method.static); const missingMembers: string[] = []; const satisfiedMembers: string[] = []; + const missingStaticMembers: string[] = []; + const satisfiedStaticMembers: string[] = []; for (const field of requiredFields) { - if (classHasReadableInstanceMember(effectiveMembers, field)) { + if (classHasReadableMember(effectiveMembers, field, false)) { satisfiedMembers.push(field.name); } else { missingMembers.push(field.name); } } - for (const method of requiredMethods) { - if (classHasCallableInstanceMethod(effectiveMembers, method)) { + for (const field of requiredStaticFields) { + if (classHasReadableMember(effectiveMembers, field, true)) { + satisfiedStaticMembers.push(field.name); + } else { + missingStaticMembers.push(field.name); + } + } + for (const method of requiredInstanceMethods) { + if (classHasCallableMethod(effectiveMembers, method, false)) { satisfiedMembers.push(method.name); } else { missingMembers.push(method.name); } } + for (const method of requiredStaticMethods) { + if (classHasCallableMethod(effectiveMembers, method, true)) { + satisfiedStaticMembers.push(method.name); + } else { + missingStaticMembers.push(method.name); + } + } const missing = sortedUnique(missingMembers); const satisfied = sortedUnique(satisfiedMembers); + const missingStatic = sortedUnique(missingStaticMembers); + const satisfiedStatic = sortedUnique(satisfiedStaticMembers); return { - status: missing.length > 0 ? 'missing-members' : 'satisfied', + status: missing.length > 0 || missingStatic.length > 0 ? 'missing-members' : 'satisfied', missingMembers: missing, satisfiedMembers: satisfied, + missingStaticMembers: missingStatic, + satisfiedStaticMembers: satisfiedStatic, diagnostics: [], unsupportedReasons: [], }; } +function effectiveInterfaceFields( + protocol: InterfaceInfo, + interfaceByName: ReadonlyMap, + seen: ReadonlySet = new Set(), +): InterfaceFieldInfo[] { + if (seen.has(protocol.name)) return []; + const nextSeen = new Set(seen); + nextSeen.add(protocol.name); + const fields = new Map(); + for (const baseName of protocol.extendsNames) { + const base = interfaceByName.get(baseName); + if (!base) continue; + for (const field of effectiveInterfaceFields(base, interfaceByName, nextSeen)) { + fields.set(interfaceMemberShapeKey(field), field); + } + } + for (const field of protocol.fields) fields.set(interfaceMemberShapeKey(field), field); + return [...fields.values()]; +} + function effectiveInterfaceMethods( protocol: InterfaceInfo, interfaceByName: ReadonlyMap, @@ -3522,30 +3581,40 @@ function effectiveInterfaceMethods( for (const baseName of protocol.extendsNames) { const base = interfaceByName.get(baseName); if (!base) continue; - for (const method of effectiveInterfaceMethods(base, interfaceByName, nextSeen)) methods.set(method.name, method); + for (const method of effectiveInterfaceMethods(base, interfaceByName, nextSeen)) { + methods.set(interfaceMemberShapeKey(method), method); + } } - for (const method of protocol.methods) methods.set(method.name, method); + for (const method of protocol.methods) methods.set(interfaceMemberShapeKey(method), method); return [...methods.values()]; } -function classHasReadableInstanceMember( +function interfaceMemberShapeKey(member: { readonly name: string; readonly static: boolean }): string { + return `${member.static ? 'static' : 'instance'}:${member.name}`; +} + +function classHasReadableMember( members: readonly ClassSemanticMemberFact[], field: { readonly name: string; readonly type?: string }, + staticOnly: boolean, ): boolean { return members.some((member) => { - if (member.name !== field.name || member.static) return false; + if (member.name !== field.name || member.static !== staticOnly || member.private) return false; if (member.kind !== 'field' && member.kind !== 'getter') return false; const actualType = member.kind === 'getter' ? member.returns : member.type; return !field.type || actualType === field.type; }); } -function classHasCallableInstanceMethod( +function classHasCallableMethod( members: readonly ClassSemanticMemberFact[], method: InterfaceMethodInfo, + staticOnly: boolean, ): boolean { return members.some((member) => { - if (member.name !== method.name || member.static || member.private || member.kind !== 'method') return false; + if (member.name !== method.name || member.static !== staticOnly || member.private || member.kind !== 'method') { + return false; + } if (member.arity !== method.arity) return false; if (!methodParamTypesCompatible(member.paramTypes ?? [], method.paramTypes)) return false; if ((member.async === true) !== method.async) return false; @@ -3693,11 +3762,19 @@ function validateClassImplements( }); continue; } - if (conformance.missingMembers.length === 0) continue; + if (conformance.missingMembers.length === 0 && conformance.missingStaticMembers.length === 0) continue; + const missingParts = [ + ...(conformance.missingMembers.length > 0 + ? [`instance member(s): ${conformance.missingMembers.join(', ')}`] + : []), + ...(conformance.missingStaticMembers.length > 0 + ? [`static member(s): ${conformance.missingStaticMembers.join(', ')}`] + : []), + ]; violations.push({ rule: 'class-implements-missing-member', nodeType: 'class', - message: `Class '${info.name}' does not satisfy interface '${interfaceName}'. Missing readable instance member(s): ${conformance.missingMembers.join(', ')}.`, + message: `Class '${info.name}' does not satisfy interface '${interfaceName}'. Missing readable ${missingParts.join('; ')}.`, line: info.node.loc?.line, col: info.node.loc?.col, }); diff --git a/packages/core/tests/class-semantics.test.ts b/packages/core/tests/class-semantics.test.ts index 74c8dcaf..81e27cce 100644 --- a/packages/core/tests/class-semantics.test.ts +++ b/packages/core/tests/class-semantics.test.ts @@ -74,6 +74,20 @@ describe('semantic-validator — class object model', () => { expect(rules).not.toContain('class-implements-missing-member'); }); + test('rejects private instance fields as protocol members', () => { + const violations = violationsFor( + [ + 'interface name=Named', + ' field name=name type=string', + 'class name=User implements=Named', + ' field name=name type=string private=true', + ].join('\n'), + ); + + const violation = violations.find((candidate) => candidate.rule === 'class-implements-missing-member'); + expect(violation?.message).toContain('instance member(s): name'); + }); + test('accepts class implements when instance methods satisfy interface methods', () => { const rules = rulesFor( [ @@ -90,6 +104,56 @@ describe('semantic-validator — class object model', () => { expect(rules).not.toContain('class-implements-missing-member'); }); + test('accepts static fields and inherited static methods for class implements', () => { + const rules = rulesFor( + [ + 'interface name=Factory', + ' field name=kind type=string static=true', + ' method name=create params="id:string" returns=string static=true', + 'class name=Base', + ' method name=create params="id:string" returns=string static=true', + ' handler lang=kern', + ' return value="id"', + 'class name=UserFactory extends=Base implements=Factory', + ' field name=kind type=string static=true', + ].join('\n'), + ); + + expect(rules).not.toContain('class-implements-missing-member'); + }); + + test('rejects private static fields as protocol members', () => { + const violations = violationsFor( + [ + 'interface name=Factory', + ' field name=kind type=string static=true', + 'class name=UserFactory implements=Factory', + ' field name=kind type=string static=true private=true', + ].join('\n'), + ); + + const violation = violations.find((candidate) => candidate.rule === 'class-implements-missing-member'); + expect(violation?.message).toContain('static member(s): kind'); + }); + + test('rejects static protocol members satisfied only by instance members', () => { + const violations = violationsFor( + [ + 'interface name=Factory', + ' field name=kind type=string static=true', + ' method name=create params="id:string" returns=string static=true', + 'class name=Confused implements=Factory', + ' field name=kind type=string', + ' method name=create params="id:string" returns=string', + ' handler lang=kern', + ' return value="id"', + ].join('\n'), + ); + + const violation = violations.find((candidate) => candidate.rule === 'class-implements-missing-member'); + expect(violation?.message).toContain('static member(s): create, kind'); + }); + test('reports missing and incompatible interface methods for class implements', () => { const violations = violationsFor( [ diff --git a/packages/core/tests/core-runtime.test.ts b/packages/core/tests/core-runtime.test.ts index fb30cd58..918e6e87 100644 --- a/packages/core/tests/core-runtime.test.ts +++ b/packages/core/tests/core-runtime.test.ts @@ -467,6 +467,117 @@ describe('KERN core runtime statements', () => { expect(toHostValue(evalCoreExpression('new User().name', env))).toBe('Ada'); }); + test('validates static implemented interface fields at class definition', () => { + const root = parse( + [ + 'interface name=Factory', + ' field name=kind type=string static=true', + 'class name=UserFactory implements=Factory', + ' field name=kind type=string static=true value="user"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + + expect(() => runCoreRuntime(root, env)).not.toThrow(); + expect(toHostValue(evalCoreExpression('UserFactory.kind', env))).toBe('user'); + }); + + test('accepts missing optional static implemented interface fields', () => { + const root = parse( + [ + 'interface name=Factory', + ' field name=kind type=string static=true optional=true', + 'class name=UserFactory implements=Factory', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + + expect(() => runCoreRuntime(root, env)).not.toThrow(); + }); + + test('rejects private static implemented interface fields', () => { + const root = parse( + [ + 'interface name=Factory', + ' field name=kind type=string static=true', + 'class name=UserFactory implements=Factory', + ' field name=kind type=string static=true private=true value="user"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + + expect(() => runCoreRuntime(root, env)).toThrow('missing or incompatible static member(s): kind'); + }); + + test('rejects static implemented interface field type mismatches', () => { + const root = parse( + [ + 'interface name=Factory', + ' field name=kind type=string static=true', + 'class name=BadFactory implements=Factory', + ' field name=kind type=number static=true value=1', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + + expect(() => runCoreRuntime(root, env)).toThrow('missing or incompatible static member(s): kind'); + }); + + test('rejects static implemented interface members satisfied only by instance members', () => { + const root = parse( + [ + 'interface name=Factory', + ' field name=kind type=string static=true', + ' method name=create params="id:string" returns=string static=true', + 'class name=Confused implements=Factory', + ' field name=kind type=string value="user"', + ' method name=create params="id:string" returns=string', + ' handler', + ' return value="id"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + + expect(() => runCoreRuntime(root, env)).toThrow('missing or incompatible static member(s): kind'); + }); + + test('does not invoke static getters while validating implemented interface fields', () => { + const root = parse( + [ + 'interface name=Factory', + ' field name=kind type=string static=true', + 'class name=UserFactory implements=Factory', + ' getter name=kind returns=string static=true', + ' handler', + ' return value="Later.kind"', + 'class name=Later', + ' field name=kind type=string static=true value="user"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('UserFactory.kind', env))).toBe('user'); + }); + + test('validates inherited static methods for implemented interfaces', () => { + const root = parse( + [ + 'interface name=Factory', + ' method name=create params="id:string" returns=string static=true', + 'class name=BaseFactory', + ' method name=create params="id:string" returns=string static=true', + ' handler', + ' return value="id"', + 'class name=UserFactory extends=BaseFactory implements=Factory', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('UserFactory.create("u1")', env))).toBe('u1'); + }); + test('validates implemented interface methods without invoking them', () => { const root = parse( [ @@ -823,9 +934,8 @@ describe('KERN core runtime statements', () => { ].join('\n'), ); const env = createCoreRuntimeEnv(); - runCoreRuntime(root, env); - expect(() => evalCoreExpression('new User()', env)).toThrow( + expect(() => runCoreRuntime(root, env)).toThrow( "implements interface 'Dictionary' that is not executable as a class protocol in v1", ); }); @@ -840,9 +950,8 @@ describe('KERN core runtime statements', () => { ].join('\n'), ); const env = createCoreRuntimeEnv(); - runCoreRuntime(root, env); - expect(() => evalCoreExpression('new User()', env)).toThrow('implements= contains an empty reference'); + expect(() => runCoreRuntime(root, env)).toThrow('implements= contains an empty reference'); }); test('invalid runtime implements entries fail instead of being ignored', () => { @@ -855,9 +964,8 @@ describe('KERN core runtime statements', () => { ].join('\n'), ); const env = createCoreRuntimeEnv(); - runCoreRuntime(root, env); - expect(() => evalCoreExpression('new User()', env)).toThrow('implements= contains an invalid reference: 123'); + expect(() => runCoreRuntime(root, env)).toThrow('implements= contains an invalid reference: 123'); }); test('runtime implements entries reject trailing junk', () => { @@ -870,21 +978,15 @@ describe('KERN core runtime statements', () => { ].join('\n'), ); const env = createCoreRuntimeEnv(); - runCoreRuntime(root, env); - expect(() => evalCoreExpression('new User()', env)).toThrow( - 'implements= contains an invalid reference: Named junk', - ); + expect(() => runCoreRuntime(root, env)).toThrow('implements= contains an invalid reference: Named junk'); }); test('unknown local runtime implements targets fail instead of being ignored', () => { const root = parse(['class name=User implements=MissingProtocol'].join('\n')); const env = createCoreRuntimeEnv(); - runCoreRuntime(root, env); - expect(() => evalCoreExpression('new User()', env)).toThrow( - "class 'User' implements unknown interface 'MissingProtocol'", - ); + expect(() => runCoreRuntime(root, env)).toThrow("class 'User' implements unknown interface 'MissingProtocol'"); }); test('imported runtime implements targets are treated as external protocols', () => { diff --git a/packages/core/tests/semantic-substrate.test.ts b/packages/core/tests/semantic-substrate.test.ts index 7f29874b..435da00c 100644 --- a/packages/core/tests/semantic-substrate.test.ts +++ b/packages/core/tests/semantic-substrate.test.ts @@ -532,8 +532,15 @@ describe('KERN semantic substrate', () => { ' field name=id type=string', 'interface name=DictionaryProtocol', ' indexer keyName=key keyType=string type=number', + 'interface name=FactoryProtocol', + ' field name=kind type=string static=true', + ' method name=create params="id:string" returns=string static=true', 'class name=Base', ' field name=id type=string', + 'class name=BaseFactory', + ' method name=create params="id:string" returns=string static=true', + ' handler lang=kern', + ' return value="id"', 'class name=User extends=Base implements="Named,ExternalProtocol,MissingProtocol"', ' getter name=name returns=string', ' handler lang=kern', @@ -546,6 +553,8 @@ describe('KERN semantic substrate', () => { 'class name=Invalid implements=BrokenProtocol', ' field name=id type=string', 'class name=Dictionary implements=DictionaryProtocol', + 'class name=FactoryImpl extends=BaseFactory implements=FactoryProtocol', + ' field name=kind type=string static=true', ].join('\n'), ), ); @@ -555,6 +564,7 @@ describe('KERN semantic substrate', () => { { from: 'User', to: 'Named', relation: 'implements', resolved: true, external: false }, { from: 'User', to: 'ExternalProtocol', relation: 'implements', resolved: true, external: true }, { from: 'User', to: 'MissingProtocol', relation: 'implements', resolved: false, external: false }, + { from: 'FactoryImpl', to: 'FactoryProtocol', relation: 'implements', resolved: true, external: false }, ]), ); expect(facts.unresolvedImplements).toEqual(['MissingProtocol']); @@ -590,6 +600,13 @@ describe('KERN semantic substrate', () => { status: 'unsupported-protocol', unsupportedReasons: ['indexer'], }), + expect.objectContaining({ + className: 'FactoryImpl', + interfaceName: 'FactoryProtocol', + status: 'satisfied', + satisfiedStaticMembers: ['create', 'kind'], + missingStaticMembers: [], + }), ]), ); }); From 863f9b85f8d2eb6c7924139f2e2e5fd1f23b3d71 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 07:33:18 +0200 Subject: [PATCH 38/63] fix(core): pin core contract element type to satisfy strict tsc --- packages/core/src/semantic-substrate.ts | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/packages/core/src/semantic-substrate.ts b/packages/core/src/semantic-substrate.ts index dd0a9e6f..891cd89a 100644 --- a/packages/core/src/semantic-substrate.ts +++ b/packages/core/src/semantic-substrate.ts @@ -6,7 +6,12 @@ import { type PortableLogicSupport, type PortableLogicTarget, } from './codegen/portable-logic-primitives.js'; -import { CORE_TYPE_CONTRACTS, type CoreOperationReturns, contractToGraphEdges } from './core-contracts/index.js'; +import { + CORE_TYPE_CONTRACTS, + type CoreOperationReturns, + type CoreTypeContract, + contractToGraphEdges, +} from './core-contracts/index.js'; import { type CoreShapeFacts, collectCoreShapeFacts } from './core-runtime/shape-validator.js'; import type { NodeContract } from './ir/semantics/index.js'; import { snapshotRegistry } from './ir/semantics/index.js'; @@ -138,7 +143,10 @@ export interface BuildKernSemanticSubstrateOptions { export function buildKernSemanticSubstrate(options: BuildKernSemanticSubstrateOptions = {}): KernSemanticSubstrate { const ragFacts = options.documentRag ? collectRagSemanticFacts(options.documentRag) : undefined; - const coreTypes = Object.values(CORE_TYPE_CONTRACTS.types).map((contract) => ({ + // The registry guarantees every value is a CoreTypeContract; pin the element type so + // Object.values does not widen to unknown/any under stricter tsconfig settings (ts18046). + const coreContracts = Object.values(CORE_TYPE_CONTRACTS.types) as readonly CoreTypeContract[]; + const coreTypes = coreContracts.map((contract) => ({ id: `core.type.${contract.name}`, name: contract.name, kind: contract.kind, @@ -160,7 +168,7 @@ export function buildKernSemanticSubstrate(options: BuildKernSemanticSubstrateOp generatedBy: 'kern-semantic-substrate', source: options.source ?? 'codegen-from-ts', coreTypes, - coreGraphEdges: Object.values(CORE_TYPE_CONTRACTS.types).flatMap((contract) => contractToGraphEdges(contract)), + coreGraphEdges: coreContracts.flatMap((contract) => contractToGraphEdges(contract)), portablePrimitives: PORTABLE_LOGIC_PRIMITIVE_IDS.map((id) => { const primitive = PORTABLE_LOGIC_PRIMITIVES[id]; return { From 04a1e06b11b382fec9dbf8d9f56127c5f04266c8 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 08:10:30 +0200 Subject: [PATCH 39/63] fix(core): clear kern-guard findings in class semantic validator --- packages/core/src/semantic-validator.ts | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index bd45943d..53c8669f 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -3130,10 +3130,9 @@ function fieldInitializerNames(info: ClassInfo): string[] { function constructorThisAssignmentNames(info: ClassInfo): string[] { if (info.constructors.length === 0) return []; - const constructorAssignments: string[][] = []; - for (const ctor of info.constructors) { - constructorAssignments.push([...definiteThisAssignmentsInStatements(constructorBodyStatements(ctor))]); - } + const constructorAssignments = info.constructors.map((ctor) => [ + ...definiteThisAssignmentsInStatements(constructorBodyStatements(ctor)), + ]); const [first = [], ...rest] = constructorAssignments; return sortedUnique([...rest.reduce((common, names) => setIntersection(common, new Set(names)), new Set(first))]); } @@ -3277,7 +3276,9 @@ function superCallCountInNode(node: IRNode): number { if (!text) continue; try { count += valueIRSuperConstructorCallCount(parseExpression(text)); - } catch {} + } catch { + // Unparseable expression text contributes no super() calls. + } } return 'continue'; }); From 750fd932f8b6efed66f99e530335e127a0fa7a74 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 09:50:10 +0200 Subject: [PATCH 40/63] feat(python): lower KERN classes to pure Python --- packages/python/src/codegen-body-python.ts | 18 +++ packages/python/src/codegen-python.ts | 3 + packages/python/src/generators/data.ts | 126 ++++++++++++++++++++- packages/python/src/targets/python.ts | 12 ++ 4 files changed, 155 insertions(+), 4 deletions(-) diff --git a/packages/python/src/codegen-body-python.ts b/packages/python/src/codegen-body-python.ts index 4fb213fa..4266a8ac 100644 --- a/packages/python/src/codegen-body-python.ts +++ b/packages/python/src/codegen-body-python.ts @@ -74,6 +74,15 @@ export interface BodyEmitOptions { * the KERN-form `userId` resolves to the snake_cased Python parameter. * Identifiers not in the map pass through unchanged. */ symbolMap?: Record; + /** When true, the handler is a class member body: identifier `super` + * lowers to Python `super()` (so `super.m()` -> `super().m()`) and a + * direct `super(...)` call lowers to `super().__init__(...)`. Paired with + * a `symbolMap` entry `this -> self` by the class generator. */ + inClassBody?: boolean; + /** When true, the handler is specifically a constructor body, so a direct + * `super(...)` call lowers to `super().__init__(...)`. Outside a constructor + * `super(...)` is not a parent-constructor call and is left untouched. */ + inConstructor?: boolean; /** Slice 4a review fix (Gemini #5) — how to lower the `?` propagation * hoist's err-branch return: * - 'value' (default for `fn`): `return __k_tN` so the caller sees @@ -137,6 +146,8 @@ interface BodyEmitContext { * `each` pair-mode). Consumer emits each entry at module scope. */ helpers: Set; symbolMap: Record; + inClassBody: boolean; + inConstructor: boolean; shadowedSymbols: Set; localScopes: Array>; regexScopes: Array | null>>; @@ -171,6 +182,8 @@ function freshCtx(options?: BodyEmitOptions): BodyEmitContext { imports: new Set(), helpers: new Set(), symbolMap: options?.symbolMap ?? {}, + inClassBody: options?.inClassBody ?? false, + inConstructor: options?.inConstructor ?? false, shadowedSymbols: new Set(), localScopes: [], regexScopes: [], @@ -1695,6 +1708,7 @@ function emitPyExprCtx(node: ValueIR, ctx: BodyEmitContext): string { // Python-form `user_id`. Identifiers not in the map (locals, globals, // module names) pass through unchanged. if (ctx.shadowedSymbols.has(node.name)) return node.name; + if (ctx.inClassBody && node.name === 'super') return 'super()'; return ctx.symbolMap[node.name] ?? node.name; } case 'member': @@ -2101,6 +2115,10 @@ function lowerChain(node: ChainNode, ctx: BodyEmitContext): GuardedExpr { if (regex !== null) return { guard: null, expr: regex }; const stdlib = applyStdlibLoweringPython(node, ctx); if (stdlib !== null) return { guard: null, expr: stdlib }; + if (ctx.inConstructor && node.callee.kind === 'ident' && node.callee.name === 'super') { + const superArgs = node.args.map((arg) => emitPyExprCtx(arg, ctx)).join(', '); + return { guard: null, expr: `super().__init__(${superArgs})` }; + } if (node.callee.kind === 'ident' && node.callee.name === 'String') { if (node.args.length !== 1) { throw new Error('String() portable coercion expects exactly one argument on Python target.'); diff --git a/packages/python/src/codegen-python.ts b/packages/python/src/codegen-python.ts index a3950b84..0fd5a782 100644 --- a/packages/python/src/codegen-python.ts +++ b/packages/python/src/codegen-python.ts @@ -27,6 +27,7 @@ import { // Data layer generators (model, repository, cache, dependency, service, union) import { generatePythonCache, + generatePythonClass, generatePythonDependency, generatePythonModel, generatePythonRepository, @@ -180,6 +181,8 @@ export function generatePythonCoreNode(node: IRNode, options: PythonCodegenOptio return generatePythonDependency(node); case 'service': return generatePythonService(node); + case 'class': + return generatePythonClass(node); case 'union': return generatePythonUnion(node); // Backend infrastructure diff --git a/packages/python/src/generators/data.ts b/packages/python/src/generators/data.ts index cf88b89d..516d9d86 100644 --- a/packages/python/src/generators/data.ts +++ b/packages/python/src/generators/data.ts @@ -22,7 +22,10 @@ import { mapTsTypeToPython, toSnakeCase } from '../type-map.js'; * * When the handler is legacy raw, returns `{ code: handlerCode(method), * imports: empty }`. */ -function methodBodyCodePython(method: IRNode): { code: string; imports: Set; helpers: Set } { +function methodBodyCodePython( + method: IRNode, + opts?: { classBody?: boolean; isConstructor?: boolean }, +): { code: string; imports: Set; helpers: Set } { const handler = getFirstChild(method, 'handler'); if (!handler || getProps(handler).lang !== 'kern') { return { code: handlerCode(method), imports: new Set(), helpers: new Set() }; @@ -54,7 +57,14 @@ function methodBodyCodePython(method: IRNode): { code: string; imports: Set`self`, `super.m()`->`super().m()`, `new X()`->`X()`) is the next +// sub-problem the differential class fixtures will drive. +export function generatePythonClass(node: IRNode): string[] { + const props = p(node); + const name = emitIdentifier(props.name as string, 'UnknownClass', node); + const baseRaw = typeof props.extends === 'string' ? (props.extends as string) : ''; + const base = baseRaw ? emitIdentifier(baseRaw, 'object', node) : ''; + const header = base ? `class ${name}(${base}):` : `class ${name}:`; + + const isStatic = (n: IRNode): boolean => { + const np = p(n); + return np.static === 'true' || np.static === true; + }; + + const fields = kids(node, 'field'); + const staticFields = fields.filter(isStatic); + const methods = kids(node, 'method'); + const getters = kids(node, 'getter'); + const setters = kids(node, 'setter'); + const ctor = firstChild(node, 'constructor'); + + const body: string[] = []; + + // Static fields -> class-level attributes. + for (const f of staticFields) { + const fp = p(f); + const fname = toSnakeCase((fp.name as string) || 'field'); + const ftype = fp.type ? mapTsTypeToPython(fp.type as string) : 'Any'; + const raw = typeof fp.value === 'string' ? (fp.value as string).replace(/\bnew\s+/g, '') : undefined; + const value = raw !== undefined ? formatPythonDefault(raw, (fp.type as string) || '') : 'None'; + body.push(` ${fname}: ${ftype} = ${value}`); + } + if (staticFields.length > 0) body.push(''); + + // Constructor -> __init__. + if (ctor) { + body.push(` def __init__(${buildPythonParamList(ctor, { selfPrefix: true })}):`); + body.push(...methodBodyLinesPython(ctor, { classBody: true, isConstructor: true })); + body.push(''); + } + + // Methods (instance + static). + for (const m of methods) { + const mp = p(m); + const mname = toSnakeCase((mp.name as string) || 'method'); + const asyncKw = mp.async === 'true' || mp.async === true ? 'async ' : ''; + const returns = mp.returns ? ` -> ${mapTsTypeToPython(mp.returns as string)}` : ''; + if (isStatic(m)) { + body.push(' @staticmethod'); + body.push(` ${asyncKw}def ${mname}(${buildPythonParamList(m, { selfPrefix: false })})${returns}:`); + } else { + body.push(` ${asyncKw}def ${mname}(${buildPythonParamList(m, { selfPrefix: true })})${returns}:`); + } + body.push(...methodBodyLinesPython(m, { classBody: !isStatic(m) })); + body.push(''); + } + + // Getters -> @property. Static accessors need a metaclass/classmethod-property + // and are a follow-up; skip them with a marker rather than emit broken code. + const instanceGetterNames = new Set(); + for (const g of getters) { + const gp = p(g); + const gname = toSnakeCase((gp.name as string) || 'prop'); + if (isStatic(g)) { + body.push(` # static getter '${gname}' is not yet supported on the Python target`); + continue; + } + instanceGetterNames.add(gname); + const returns = gp.returns ? ` -> ${mapTsTypeToPython(gp.returns as string)}` : ''; + body.push(' @property'); + body.push(` def ${gname}(self)${returns}:`); + body.push(...methodBodyLinesPython(g, { classBody: true })); + body.push(''); + } + // Setters -> @.setter. Python requires a property to exist before its + // `.setter`; KERN allows setter-only properties, so synthesize a getter when + // none was declared (write-only -> returns None, matching a TS getter-less read). + for (const s of setters) { + const sp = p(s); + const sname = toSnakeCase((sp.name as string) || 'prop'); + if (isStatic(s)) { + body.push(` # static setter '${sname}' is not yet supported on the Python target`); + continue; + } + if (!instanceGetterNames.has(sname)) { + body.push(' @property'); + body.push(` def ${sname}(self): # write-only property (no getter declared in KERN)`); + body.push(' return None'); + body.push(''); + instanceGetterNames.add(sname); + } + body.push(` @${sname}.setter`); + body.push(` def ${sname}(${buildPythonParamList(s, { selfPrefix: true })}):`); + body.push(...methodBodyLinesPython(s, { classBody: true })); + body.push(''); + } + + if (body.length === 0) body.push(' pass'); + + return [header, ...body]; +} + // ── Union (Pydantic Discriminated Union) ──────────────────────────────── // union name=ContentSegment discriminant=type // variant name=prose diff --git a/packages/python/src/targets/python.ts b/packages/python/src/targets/python.ts index 35d1593a..be642a7a 100644 --- a/packages/python/src/targets/python.ts +++ b/packages/python/src/targets/python.ts @@ -5,6 +5,7 @@ import { emitModels } from '../core/emit-models.js'; import { collectFenceDiagnostics } from '../core/fence-diagnostics.js'; import { emitPureHandlers } from '../core/handlers/index.js'; import { findServerNode } from '../fastapi-utils.js'; +import { generatePythonClass } from '../generators/data.js'; /** * The PyDotDict / _DotList shim, emitted at the top of every `--emit=backend` @@ -132,6 +133,11 @@ export function transpilePython(root: IRNode, config?: ResolvedKernConfig): Tran target: 'python', }); + // 3b. Class declarations -> pure Python classes. Additive: files without + // `class` nodes (e.g. the models-only byte-invariance corpus) are untouched. + const classNodes = (root.children ?? []).filter((child) => child.type === 'class'); + const classesCode = classNodes.map((node) => generatePythonClass(node).join('\n')).join('\n\n'); + const lines: string[] = []; // Sort and print imports @@ -166,6 +172,12 @@ export function transpilePython(root: IRNode, config?: ResolvedKernConfig): Tran lines.push(modelsCode); } + // Class definitions (pure Python — not FastAPI/Pydantic). + if (classesCode.trim().length > 0) { + lines.push(''); + lines.push(classesCode); + } + // Pure handlers (additive) if (handlersCode) { lines.push(''); From 377a8ee4096a463b75e3b8a54e3c0a0a4f0f83d0 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 09:50:10 +0200 Subject: [PATCH 41/63] test(python): lock single-source class codegen --- packages/python/tests/class-python.test.ts | 121 +++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 packages/python/tests/class-python.test.ts diff --git a/packages/python/tests/class-python.test.ts b/packages/python/tests/class-python.test.ts new file mode 100644 index 00000000..53ce5d1e --- /dev/null +++ b/packages/python/tests/class-python.test.ts @@ -0,0 +1,121 @@ +/** Single-source class slice — Python target. + * + * KERN `class` nodes lower to pure Python (NOT FastAPI/Pydantic) via + * `generatePythonClass`. Class member bodies translate through the shared + * Python body emitter with `inClassBody`/`inConstructor`: + * - `this` -> `self` (symbol map) + * - `super(args)` -> `super().__init__(args)` (constructor only) + * - `super.m()` / `super.x` -> `super().m()` / `super().x` (any member) + * + * Behaviour locked here was driven by an Agon review of the slice + * (setter-only synthesis + static-accessor skip closed two blocking findings). + */ + +import type { IRNode } from '@kernlang/core'; +import { generatePythonClass } from '../src/generators/data.js'; + +function handler(children: IRNode[]): IRNode { + return { type: 'handler', props: { lang: 'kern' }, children }; +} +function param(name: string, type?: string): IRNode { + return { type: 'param', props: type ? { name, type } : { name }, children: [] }; +} + +describe('Python class codegen (single-source class slice)', () => { + test('emits a pure-Python class: __init__, this->self, instance method, getter', () => { + const animal: IRNode = { + type: 'class', + props: { name: 'Animal' }, + children: [ + { + type: 'constructor', + props: {}, + children: [ + param('name', 'string'), + param('legs', 'number'), + handler([ + { type: 'assign', props: { target: 'this.name', value: 'name' }, children: [] }, + { type: 'assign', props: { target: 'this.legs', value: 'legs' }, children: [] }, + ]), + ], + }, + { + type: 'getter', + props: { name: 'legCount', returns: 'number' }, + children: [handler([{ type: 'return', props: { value: 'this.legs' }, children: [] }])], + }, + ], + }; + const code = generatePythonClass(animal).join('\n'); + expect(code).toContain('class Animal:'); + expect(code).toContain('def __init__(self, name: str, legs: float):'); + expect(code).toContain('self.name = name'); + expect(code).toContain('@property'); + expect(code).toContain('def leg_count(self) -> float:'); + expect(code).toContain('return self.legs'); + expect(code).not.toContain('this.'); // no JS-ism leaks + }); + + test('inheritance: super(...) -> super().__init__ in constructor, super.m() -> super().m()', () => { + const dog: IRNode = { + type: 'class', + props: { name: 'Dog', extends: 'Animal' }, + children: [ + { + type: 'constructor', + props: {}, + children: [ + param('name', 'string'), + handler([{ type: 'do', props: { value: 'super(name, 4)' }, children: [] }]), + ], + }, + { + type: 'method', + props: { name: 'summary', returns: 'string' }, + children: [handler([{ type: 'return', props: { value: '`${super.describe()}`' }, children: [] }])], + }, + ], + }; + const code = generatePythonClass(dog).join('\n'); + expect(code).toContain('class Dog(Animal):'); + expect(code).toContain('super().__init__(name, 4)'); + expect(code).toContain('super().describe()'); + }); + + test('setter-only property synthesizes a write-only getter (valid Python, no NameError)', () => { + const box: IRNode = { + type: 'class', + props: { name: 'Box' }, + children: [ + { + type: 'setter', + props: { name: 'items' }, + children: [ + param('next', 'object[]'), + handler([{ type: 'assign', props: { target: 'this.store', value: 'next' }, children: [] }]), + ], + }, + ], + }; + const code = generatePythonClass(box).join('\n'); + expect(code).toContain('def items(self):'); // synthesized getter precedes the setter + expect(code).toContain('@items.setter'); + }); + + test('static accessors are skipped (not emitted as broken instance @property)', () => { + const reg: IRNode = { + type: 'class', + props: { name: 'Reg' }, + children: [ + { + type: 'getter', + props: { name: 'label', static: 'true', returns: 'string' }, + children: [handler([{ type: 'return', props: { value: '"x"' }, children: [] }])], + }, + ], + }; + const code = generatePythonClass(reg).join('\n'); + expect(code).not.toContain('def label(self)'); + expect(code).toContain("static getter 'label'"); + }); +}); From a2459ec09d294c55b89476d4b781aa2066b0bb7f Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 10:21:35 +0200 Subject: [PATCH 42/63] feat(python): per-instance field defaults and static field values --- packages/python/src/generators/data.ts | 59 +++++++++++++++++++++++--- packages/python/src/targets/python.ts | 2 +- 2 files changed, 54 insertions(+), 7 deletions(-) diff --git a/packages/python/src/generators/data.ts b/packages/python/src/generators/data.ts index 516d9d86..d620922f 100644 --- a/packages/python/src/generators/data.ts +++ b/packages/python/src/generators/data.ts @@ -119,6 +119,25 @@ export function formatPythonDefault(value: string, kernType: string): string { return trimmed; } +/** Lower a field's default to a Python expression, or undefined when none. + * A `value={{ }}` block parses to `{ __expr: true, code: '' }`; + * a bare `default=...` is a raw string. `new X(...)` -> `X(...)`; literals go + * through formatPythonDefault (true/false/null/number/string handling). */ +function fieldDefaultPython(field: IRNode): string | undefined { + const fp = p(field); + const v = fp.value as unknown; + let code: string | undefined; + if (v && typeof v === 'object' && (v as { __expr?: boolean }).__expr) { + code = (v as { code?: string }).code; + } else if (typeof v === 'string') { + code = v; + } else if (typeof fp.default === 'string') { + code = fp.default as string; + } + if (code === undefined) return undefined; + return formatPythonDefault(code.replace(/\bnew\s+/g, ''), (fp.type as string) || ''); +} + // SQLModel column override: pydantic validator types -> plain DB types for column declarations const SQLMODEL_COLUMN_OVERRIDE: Record = { Email: 'str', @@ -494,21 +513,49 @@ export function generatePythonClass(node: IRNode): string[] { const body: string[] = []; - // Static fields -> class-level attributes. + // Static fields -> class-level attributes (shared across instances, like TS statics). for (const f of staticFields) { const fp = p(f); const fname = toSnakeCase((fp.name as string) || 'field'); const ftype = fp.type ? mapTsTypeToPython(fp.type as string) : 'Any'; - const raw = typeof fp.value === 'string' ? (fp.value as string).replace(/\bnew\s+/g, '') : undefined; - const value = raw !== undefined ? formatPythonDefault(raw, (fp.type as string) || '') : 'None'; - body.push(` ${fname}: ${ftype} = ${value}`); + body.push(` ${fname}: ${ftype} = ${fieldDefaultPython(f) ?? 'None'}`); } if (staticFields.length > 0) body.push(''); - // Constructor -> __init__. + // Constructor -> __init__. Instance-field defaults are emitted INSIDE __init__ + // (never as class-level attributes) so each instance gets a fresh value — + // matching TS per-instance field initialization and avoiding Python's + // shared-mutable-default trap (a class-level `items = []` would be shared by + // every instance). Defaults precede the constructor body, which may reassign + // them (TS field-init-then-constructor order). + const instanceDefaults = fields.filter((f) => !isStatic(f) && fieldDefaultPython(f) !== undefined); + const defaultLines = instanceDefaults.map( + (f) => ` self.${toSnakeCase((p(f).name as string) || 'field')} = ${fieldDefaultPython(f)}`, + ); if (ctor) { body.push(` def __init__(${buildPythonParamList(ctor, { selfPrefix: true })}):`); - body.push(...methodBodyLinesPython(ctor, { classBody: true, isConstructor: true })); + const ctorLines = methodBodyLinesPython(ctor, { classBody: true, isConstructor: true }); + // Field initializers run AFTER super().__init__() (TS field-init-after-super + // order), so inject defaults right after the super call when present, else at + // the top of the constructor body. + const superIdx = ctorLines.findIndex((line) => line.includes('super().__init__')); + if (superIdx >= 0) { + body.push(...ctorLines.slice(0, superIdx + 1), ...defaultLines, ...ctorLines.slice(superIdx + 1)); + } else { + body.push(...defaultLines, ...ctorLines); + } + body.push(''); + } else if (instanceDefaults.length > 0) { + // No explicit constructor. A derived class still forwards to its base + // initializer (TS subclasses without a constructor auto-forward args), then + // applies its own field defaults. + if (base) { + body.push(' def __init__(self, *args, **kwargs):'); + body.push(' super().__init__(*args, **kwargs)'); + } else { + body.push(' def __init__(self):'); + } + body.push(...defaultLines); body.push(''); } diff --git a/packages/python/src/targets/python.ts b/packages/python/src/targets/python.ts index be642a7a..ab921d9b 100644 --- a/packages/python/src/targets/python.ts +++ b/packages/python/src/targets/python.ts @@ -135,7 +135,7 @@ export function transpilePython(root: IRNode, config?: ResolvedKernConfig): Tran // 3b. Class declarations -> pure Python classes. Additive: files without // `class` nodes (e.g. the models-only byte-invariance corpus) are untouched. - const classNodes = (root.children ?? []).filter((child) => child.type === 'class'); + const classNodes = root.type === 'class' ? [root] : (root.children ?? []).filter((child) => child.type === 'class'); const classesCode = classNodes.map((node) => generatePythonClass(node).join('\n')).join('\n\n'); const lines: string[] = []; From c0e7fe6675afe80d16bf4c6dc9fcc357df3b57ac Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 10:21:35 +0200 Subject: [PATCH 43/63] test(python): cover field defaults and super-ordering --- packages/python/tests/class-python.test.ts | 83 ++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/packages/python/tests/class-python.test.ts b/packages/python/tests/class-python.test.ts index 53ce5d1e..c3cc0b3b 100644 --- a/packages/python/tests/class-python.test.ts +++ b/packages/python/tests/class-python.test.ts @@ -118,4 +118,87 @@ describe('Python class codegen (single-source class slice)', () => { expect(code).not.toContain('def label(self)'); expect(code).toContain("static getter 'label'"); }); + + test('instance-field defaults emit in __init__, never as a shared class attr', () => { + const bag: IRNode = { + type: 'class', + props: { name: 'Bag' }, + children: [ + { + type: 'field', + props: { name: 'items', type: 'object[]', value: { __expr: true, code: '[]' } }, + children: [], + }, + { + type: 'field', + props: { name: 'tag', type: 'string', value: { __expr: true, code: '"empty"' } }, + children: [], + }, + ], + }; + const code = generatePythonClass(bag).join('\n'); + expect(code).toContain('def __init__(self):'); + expect(code).toContain('self.items = []'); + expect(code).toContain('self.tag = "empty"'); + // Shared-mutable-default trap: instance fields must NOT become class-level attrs. + expect(code).not.toMatch(/^ {4}items\s*[:=]/m); + }); + + test('static field values are extracted from value={{...}} (not None)', () => { + const reg: IRNode = { + type: 'class', + props: { name: 'Reg' }, + children: [ + { + type: 'field', + props: { name: 'kind', type: 'string', static: 'true', value: { __expr: true, code: '"audited"' } }, + children: [], + }, + ], + }; + const code = generatePythonClass(reg).join('\n'); + expect(code).toContain('kind: str = "audited"'); + expect(code).not.toContain('kind: str = None'); + }); + + test('derived class without a constructor forwards to base init, then applies defaults', () => { + const dog: IRNode = { + type: 'class', + props: { name: 'Dog', extends: 'Animal' }, + children: [ + { + type: 'field', + props: { name: 'tricks', type: 'object[]', value: { __expr: true, code: '[]' } }, + children: [], + }, + ], + }; + const code = generatePythonClass(dog).join('\n'); + expect(code).toContain('def __init__(self, *args, **kwargs):'); + expect(code).toContain('super().__init__(*args, **kwargs)'); + expect(code).toContain('self.tricks = []'); + expect(code.indexOf('super().__init__')).toBeLessThan(code.indexOf('self.tricks = []')); + }); + + test('field defaults run AFTER super() inside an explicit derived constructor', () => { + const dog: IRNode = { + type: 'class', + props: { name: 'Dog', extends: 'Animal' }, + children: [ + { + type: 'field', + props: { name: 'tricks', type: 'object[]', value: { __expr: true, code: '[]' } }, + children: [], + }, + { + type: 'constructor', + props: {}, + children: [param('name', 'string'), handler([{ type: 'do', props: { value: 'super(name)' }, children: [] }])], + }, + ], + }; + const code = generatePythonClass(dog).join('\n'); + expect(code).toContain('super().__init__(name)'); + expect(code.indexOf('super().__init__(name)')).toBeLessThan(code.indexOf('self.tricks = []')); + }); }); From fcd4b7b0d4507f1b6d4fd54a04429dbf616fd93d Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 10:32:19 +0200 Subject: [PATCH 44/63] test(conformance): CI-enforce class TS<->Python parity --- package.json | 7 +- scripts/class-conformance.mjs | 190 ++++++++++++++++++++++++++++++++++ 2 files changed, 194 insertions(+), 3 deletions(-) create mode 100644 scripts/class-conformance.mjs diff --git a/package.json b/package.json index f8f118e4..1952fa1a 100644 --- a/package.json +++ b/package.json @@ -4,7 +4,7 @@ "private": true, "type": "module", "packageManager": "pnpm@10.32.1", - "description": "KERN — backend structure and portable route logic for TypeScript/Express and Python/FastAPI parity.", + "description": "KERN \u2014 backend structure and portable route logic for TypeScript/Express and Python/FastAPI parity.", "author": "cukas", "repository": { "type": "git", @@ -18,7 +18,7 @@ "test:non-semantics": "pnpm -r --filter '!kern-monorepo' --filter '!@kernlang/review-python' test --testPathIgnorePatterns=ir-semantics && pnpm test:prepush && pnpm check:rule-coverage", "check:rule-coverage": "node ./scripts/check-rule-coverage.mjs", "check:python-codegen": "pnpm --filter @kernlang/core --filter @kernlang/python build && node ./scripts/lift-rate-python.mjs --check", - "check:conformance": "pnpm --filter @kernlang/core --filter @kernlang/python --filter @kernlang/express build && node ./scripts/conformance.mjs", + "check:conformance": "pnpm --filter @kernlang/core --filter @kernlang/python --filter @kernlang/express build && node ./scripts/conformance.mjs && node ./scripts/class-conformance.mjs", "docs:contracts": "pnpm --filter @kernlang/core build && node ./scripts/generate-ir-semantics-docs.mjs --format=markdown --out=-", "docs:contracts:json": "pnpm --filter @kernlang/core build && node ./scripts/generate-ir-semantics-docs.mjs --format=json --out=generated/contracts/registry.json", "docs:contracts:check": "pnpm --filter @kernlang/core build && node ./scripts/check-contract-docs.mjs", @@ -32,7 +32,8 @@ "lint:fix": "biome check --fix", "format": "biome format --write", "prepush": "node ./scripts/pre-push.mjs", - "prepare": "node ./scripts/install-git-hooks.mjs" + "prepare": "node ./scripts/install-git-hooks.mjs", + "check:class-conformance": "pnpm --filter @kernlang/core --filter @kernlang/python build && node ./scripts/class-conformance.mjs" }, "pnpm": { "onlyBuiltDependencies": [ diff --git a/scripts/class-conformance.mjs b/scripts/class-conformance.mjs new file mode 100644 index 00000000..e8a2e7de --- /dev/null +++ b/scripts/class-conformance.mjs @@ -0,0 +1,190 @@ +/** + * Class differential conformance — KERN single-source class parity. + * + * Each fixture is a self-contained KERN module: a class (or class hierarchy) + * plus a zero-arg `fn probe` that exercises it. The module is compiled through + * BOTH codegen paths (core -> TypeScript, python -> pure Python), each driver + * calls `probe()` and prints its JSON-normalized return, and we assert + * ts == python == expected. This proves class behavior is identical across + * targets BY CONSTRUCTION (both derive from one definition), not by hand-diffing + * two emitters. + * + * Scope: portable probes only (number/string ops). List mutation needs a + * portable list-append lowering and is exercised separately (unit tests prove + * the instance-field-default isolation directly). + * + * Run: node scripts/class-conformance.mjs (or via `pnpm check:class-conformance`) + */ + +import { execFileSync } from 'node:child_process'; +import { mkdtempSync, rmSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { dirname, join } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const REPO = dirname(dirname(fileURLToPath(import.meta.url))); +const { parse, generateCoreNode } = await import(join(REPO, 'packages/core/dist/index.js')); +const { generatePythonCoreNode } = await import(join(REPO, 'packages/python/dist/codegen-python.js')); +const tsCompiler = await import('typescript'); + +const FIXTURES = [ + { + name: 'construction + fields + method', + kern: `class name=Point export=true + field name=x type=number + field name=y type=number + constructor + param name=x type=number + param name=y type=number + handler + assign target="this.x" value="x" + assign target="this.y" value="y" + method name=sum returns=number + handler + return value="this.x + this.y" +fn name=probe returns=number + handler + return value="new Point(3, 4).sum()"`, + expected: 7, + }, + { + name: 'single inheritance + super constructor + super method', + kern: `class name=Animal export=true + field name=name type=string + constructor + param name=name type=string + handler + assign target="this.name" value="name" + method name=describe returns=string + handler + return value="\`\${this.name} is an animal\`" +class name=Dog extends=Animal export=true + constructor + param name=name type=string + handler + do value="super(name)" + method name=describe returns=string + handler + return value="\`\${super.describe()} (a dog)\`" +fn name=probe returns=string + handler + return value="new Dog(\\"Rex\\").describe()"`, + expected: 'Rex is an animal (a dog)', + }, + { + name: 'instance getter', + kern: `class name=Person export=true + field name=first type=string + field name=last type=string + constructor + param name=first type=string + param name=last type=string + handler + assign target="this.first" value="first" + assign target="this.last" value="last" + getter name=full returns=string + handler + return value="\`\${this.first} \${this.last}\`" +fn name=probe returns=string + handler + return value="new Person(\\"Ada\\", \\"Lovelace\\").full"`, + expected: 'Ada Lovelace', + }, + { + name: 'static method', + kern: `class name=MathBox export=true + method name=double static=true returns=number + param name=n type=number + handler + return value="n * 2" +fn name=probe returns=number + handler + return value="MathBox.double(21)"`, + expected: 42, + }, + { + name: 'instance field default (read, no constructor)', + kern: `class name=Config export=true + field name=mode type=string value={{ "dev" }} +fn name=probe returns=string + handler + return value="new Config().mode"`, + expected: 'dev', + }, + { + name: 'getter + setter + field default round-trip', + kern: `class name=Cell export=true + field name=v type=number value={{ 0 }} + getter name=value returns=number + handler + return value="this.v" + setter name=value + param name=next type=number + handler + assign target="this.v" value="next" +fn name=probe returns=number + handler + let name=c value="new Cell()" + assign target="c.value" value="9" + return value="c.value"`, + expected: 9, + }, +]; + +const canon = (v) => JSON.stringify(v); + +const dir = mkdtempSync(join(tmpdir(), 'kern-class-conf-')); +process.on('exit', () => { + try { + rmSync(dir, { recursive: true, force: true }); + } catch { + // best-effort tmp cleanup — never fail the run on it + } +}); + +let pass = 0; +const failures = []; + +for (let i = 0; i < FIXTURES.length; i++) { + const fx = FIXTURES[i]; + try { + const root = parse(fx.kern); + // A single top-level decl parses as the node itself; multiple decls wrap in a root. + const topNodes = root.type === 'class' || root.type === 'fn' ? [root] : (root.children ?? []); + + // TypeScript module + const tsSource = `${topNodes.map((n) => generateCoreNode(n).join('\n')).join('\n\n')}\nconsole.log(JSON.stringify(probe()));`; + const tsFile = join(dir, `mod-${i}.mjs`); + writeFileSync( + tsFile, + tsCompiler.transpileModule(tsSource, { + compilerOptions: { module: tsCompiler.ModuleKind.ESNext, target: tsCompiler.ScriptTarget.ES2022 }, + }).outputText, + ); + + // Python module + const pySource = `import json\n${topNodes.map((n) => generatePythonCoreNode(n).join('\n')).join('\n\n')}\nprint(json.dumps(probe()))`; + const pyFile = join(dir, `mod-${i}.py`); + writeFileSync(pyFile, pySource); + + const opts = { encoding: 'utf8', timeout: 10_000 }; + const tsOut = JSON.parse(execFileSync('node', [tsFile], opts).trim()); + const pyOut = JSON.parse(execFileSync('python3', [pyFile], opts).trim()); + + if (canon(tsOut) === canon(fx.expected) && canon(pyOut) === canon(fx.expected)) { + pass++; + } else { + failures.push({ name: fx.name, expected: fx.expected, ts: tsOut, py: pyOut }); + } + } catch (err) { + failures.push({ name: fx.name, error: err?.stderr?.toString?.() || err?.message || String(err) }); + } +} + +console.log(`Class conformance: ${pass}/${FIXTURES.length} fixtures passed (ts == python == expected)`); +for (const f of failures) { + if (f.error) console.error(` FAIL ${f.name}: ${f.error}`); + else console.error(` FAIL ${f.name}: expected ${canon(f.expected)} | ts ${canon(f.ts)} | py ${canon(f.py)}`); +} +if (failures.length > 0) process.exit(1); +console.log('All passed.'); From bba9db8105a5f7e2abb881a0a36a9c20131e14eb Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 11:13:47 +0200 Subject: [PATCH 45/63] feat(python): static accessors via per-class metaclass (with chaining) --- packages/python/src/generators/data.ts | 70 ++++++++++++++++++++------ 1 file changed, 55 insertions(+), 15 deletions(-) diff --git a/packages/python/src/generators/data.ts b/packages/python/src/generators/data.ts index d620922f..242da074 100644 --- a/packages/python/src/generators/data.ts +++ b/packages/python/src/generators/data.ts @@ -24,7 +24,7 @@ import { mapTsTypeToPython, toSnakeCase } from '../type-map.js'; * imports: empty }`. */ function methodBodyCodePython( method: IRNode, - opts?: { classBody?: boolean; isConstructor?: boolean }, + opts?: { classBody?: boolean; isConstructor?: boolean; staticReceiver?: boolean }, ): { code: string; imports: Set; helpers: Set } { const handler = getFirstChild(method, 'handler'); if (!handler || getProps(handler).lang !== 'kern') { @@ -59,7 +59,8 @@ function methodBodyCodePython( } // Class member bodies: `this` resolves to `self`, and `super(...)`/`super.x` // lower to `super().__init__(...)`/`super().x` via the inClassBody flag. - if (opts?.classBody) symbolMap.this = 'self'; + // In a static accessor (metaclass property) body `this` is the class -> `cls`. + if (opts?.classBody) symbolMap.this = opts?.staticReceiver ? 'cls' : 'self'; const { code, imports, helpers } = emitNativeKernBodyPythonWithImports(handler, { symbolMap, inClassBody: opts?.classBody ?? false, @@ -74,7 +75,10 @@ function methodBodyCodePython( * scope absorbs them, and Python caches modules after first import. * Returns the indented lines (4-space prefix) ready to push into the * enclosing class definition. Empty body yields a single `pass`. */ -function methodBodyLinesPython(method: IRNode, opts?: { classBody?: boolean; isConstructor?: boolean }): string[] { +function methodBodyLinesPython( + method: IRNode, + opts?: { classBody?: boolean; isConstructor?: boolean; staticReceiver?: boolean }, +): string[] { const { code, imports, helpers } = methodBodyCodePython(method, opts); const lines: string[] = []; for (const mod of [...imports].sort()) { @@ -497,7 +501,6 @@ export function generatePythonClass(node: IRNode): string[] { const name = emitIdentifier(props.name as string, 'UnknownClass', node); const baseRaw = typeof props.extends === 'string' ? (props.extends as string) : ''; const base = baseRaw ? emitIdentifier(baseRaw, 'object', node) : ''; - const header = base ? `class ${name}(${base}):` : `class ${name}:`; const isStatic = (n: IRNode): boolean => { const np = p(n); @@ -511,6 +514,49 @@ export function generatePythonClass(node: IRNode): string[] { const setters = kids(node, 'setter'); const ctor = firstChild(node, 'constructor'); + // Static accessors (static get/set) lower to a per-class metaclass: both + // `Box.label` reads and `Box.label = x` writes dispatch through the metaclass + // @property/.setter (a plain descriptor would be shadowed on assignment). The + // static backing field stays a class attribute. The metaclass extends + // `type()` so that when the base ALSO has static accessors the derived + // metaclass subclasses the base metaclass (no `metaclass conflict`, and the + // base's static accessors are inherited); when the base has none, `type()` + // is just `type`. + const staticGetters = getters.filter(isStatic); + const staticSetters = setters.filter(isStatic); + const metaName = `_${name}Meta`; + const metaLines: string[] = []; + if (staticGetters.length + staticSetters.length > 0) { + metaLines.push(`class ${metaName}(${base ? `type(${base})` : 'type'}):`); + const metaGetterNames = new Set(); + for (const g of staticGetters) { + const gp = p(g); + const gname = toSnakeCase((gp.name as string) || 'prop'); + const returns = gp.returns ? ` -> ${mapTsTypeToPython(gp.returns as string)}` : ''; + metaGetterNames.add(gname); + metaLines.push(' @property'); + metaLines.push(` def ${gname}(cls)${returns}:`); + metaLines.push(...methodBodyLinesPython(g, { classBody: true, staticReceiver: true })); + metaLines.push(''); + } + for (const s of staticSetters) { + const sname = toSnakeCase((p(s).name as string) || 'prop'); + if (!metaGetterNames.has(sname)) { + metaLines.push(' @property'); + metaLines.push(` def ${sname}(cls): # write-only static property`); + metaLines.push(' return None'); + metaLines.push(''); + metaGetterNames.add(sname); + } + metaLines.push(` @${sname}.setter`); + metaLines.push(` def ${sname}(cls, ${buildPythonParamList(s, { selfPrefix: false })}):`); + metaLines.push(...methodBodyLinesPython(s, { classBody: true, staticReceiver: true })); + metaLines.push(''); + } + } + const baseParts = [base, metaLines.length > 0 ? `metaclass=${metaName}` : ''].filter(Boolean); + const header = baseParts.length > 0 ? `class ${name}(${baseParts.join(', ')}):` : `class ${name}:`; + const body: string[] = []; // Static fields -> class-level attributes (shared across instances, like TS statics). @@ -575,16 +621,12 @@ export function generatePythonClass(node: IRNode): string[] { body.push(''); } - // Getters -> @property. Static accessors need a metaclass/classmethod-property - // and are a follow-up; skip them with a marker rather than emit broken code. + // Getters -> @property. Static getters were already emitted on the metaclass. const instanceGetterNames = new Set(); for (const g of getters) { + if (isStatic(g)) continue; const gp = p(g); const gname = toSnakeCase((gp.name as string) || 'prop'); - if (isStatic(g)) { - body.push(` # static getter '${gname}' is not yet supported on the Python target`); - continue; - } instanceGetterNames.add(gname); const returns = gp.returns ? ` -> ${mapTsTypeToPython(gp.returns as string)}` : ''; body.push(' @property'); @@ -596,12 +638,9 @@ export function generatePythonClass(node: IRNode): string[] { // `.setter`; KERN allows setter-only properties, so synthesize a getter when // none was declared (write-only -> returns None, matching a TS getter-less read). for (const s of setters) { + if (isStatic(s)) continue; // static setters were already emitted on the metaclass const sp = p(s); const sname = toSnakeCase((sp.name as string) || 'prop'); - if (isStatic(s)) { - body.push(` # static setter '${sname}' is not yet supported on the Python target`); - continue; - } if (!instanceGetterNames.has(sname)) { body.push(' @property'); body.push(` def ${sname}(self): # write-only property (no getter declared in KERN)`); @@ -617,7 +656,8 @@ export function generatePythonClass(node: IRNode): string[] { if (body.length === 0) body.push(' pass'); - return [header, ...body]; + // Metaclass (if any) must be defined before the class that references it. + return metaLines.length > 0 ? [...metaLines, header, ...body] : [header, ...body]; } // ── Union (Pydantic Discriminated Union) ──────────────────────────────── From 9f8615d32166453c5016b043f8224091c3fbdbe3 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 11:13:47 +0200 Subject: [PATCH 46/63] test(python): static-accessor metaclass + inheritance conformance --- packages/python/tests/class-python.test.ts | 18 +++++++-- scripts/class-conformance.mjs | 43 ++++++++++++++++++++++ 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/packages/python/tests/class-python.test.ts b/packages/python/tests/class-python.test.ts index c3cc0b3b..9b54e0e3 100644 --- a/packages/python/tests/class-python.test.ts +++ b/packages/python/tests/class-python.test.ts @@ -102,7 +102,7 @@ describe('Python class codegen (single-source class slice)', () => { expect(code).toContain('@items.setter'); }); - test('static accessors are skipped (not emitted as broken instance @property)', () => { + test('static accessors lower to a per-class metaclass property (this -> cls)', () => { const reg: IRNode = { type: 'class', props: { name: 'Reg' }, @@ -110,13 +110,25 @@ describe('Python class codegen (single-source class slice)', () => { { type: 'getter', props: { name: 'label', static: 'true', returns: 'string' }, - children: [handler([{ type: 'return', props: { value: '"x"' }, children: [] }])], + children: [handler([{ type: 'return', props: { value: 'this.store' }, children: [] }])], + }, + { + type: 'setter', + props: { name: 'label', static: 'true' }, + children: [ + param('v', 'string'), + handler([{ type: 'assign', props: { target: 'this.store', value: 'v' }, children: [] }]), + ], }, ], }; const code = generatePythonClass(reg).join('\n'); + expect(code).toContain('class _RegMeta(type):'); + expect(code).toContain('class Reg(metaclass=_RegMeta):'); + expect(code).toContain('def label(cls) -> str:'); + expect(code).toContain('return cls.store'); // this -> cls inside a static accessor + expect(code).toContain('@label.setter'); expect(code).not.toContain('def label(self)'); - expect(code).toContain("static getter 'label'"); }); test('instance-field defaults emit in __init__, never as a shared class attr', () => { diff --git a/scripts/class-conformance.mjs b/scripts/class-conformance.mjs index e8a2e7de..36e93643 100644 --- a/scripts/class-conformance.mjs +++ b/scripts/class-conformance.mjs @@ -129,6 +129,49 @@ fn name=probe returns=number return value="c.value"`, expected: 9, }, + { + name: 'static accessor read + write round-trip', + kern: `class name=Counter export=true + field name=_count type=number static=true value={{ 0 }} + getter name=count static=true returns=number + handler + return value="this._count" + setter name=count static=true + param name=v type=number + handler + assign target="this._count" value="v" +fn name=probe returns=number + handler + assign target="Counter.count" value="Counter.count + 5" + assign target="Counter.count" value="Counter.count + 5" + return value="Counter.count"`, + expected: 10, + }, + { + name: 'inherited + overridden static accessor (metaclass chaining)', + kern: `class name=Base export=true + field name=_val type=number static=true value={{ 0 }} + getter name=val static=true returns=number + handler + return value="this._val" + setter name=val static=true + param name=v type=number + handler + assign target="this._val" value="v" +class name=Derived extends=Base export=true + getter name=val static=true returns=number + handler + return value="this._val * 2" + setter name=val static=true + param name=v type=number + handler + assign target="this._val" value="v + 1" +fn name=probe returns=number + handler + assign target="Derived.val" value="5" + return value="Derived.val"`, + expected: 12, + }, ]; const canon = (v) => JSON.stringify(v); From dfb80800d56daa0df402f2abdca7fc5d57d983c0 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 13:25:24 +0200 Subject: [PATCH 47/63] fix(python): reject static-accessor params that shadow the cls receiver A static setter whose param snake-cased to `cls` emitted invalid Python (`def label(cls, cls):`). The body collision guard reserved only `self`, never the injected metaclass receiver. Reserve the actual receiver (`cls` for static accessors, `self` for instance members) and fail codegen early with a clear message instead of producing a SyntaxError. Closes the codex (0.96) finding from the PR 2.1 static-accessor agon review. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/python/src/generators/data.ts | 15 ++++++++++++++- packages/python/tests/class-python.test.ts | 18 ++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/packages/python/src/generators/data.ts b/packages/python/src/generators/data.ts index 242da074..83d2fb73 100644 --- a/packages/python/src/generators/data.ts +++ b/packages/python/src/generators/data.ts @@ -31,10 +31,23 @@ function methodBodyCodePython( return { code: handlerCode(method), imports: new Set(), helpers: new Set() }; } const symbolMap: Record = {}; - const claimedSnake = new Set(['self']); + // The implicit receiver occupies the first parameter slot: `self` for an + // instance member, `cls` for a static accessor (metaclass property). A user + // parameter that snake-cases to the receiver name would emit invalid Python + // (e.g. `def label(cls, cls):`), so reserve it and fail codegen early with a + // clear message rather than generate a SyntaxError. + const receiver = opts?.staticReceiver ? 'cls' : 'self'; + const claimedSnake = new Set([receiver]); const recordParam = (rawName: string): void => { if (!rawName) return; const snake = toSnakeCase(rawName); + if (snake === receiver) { + throw new Error( + `KERN-Python codegen: parameter '${rawName}' snake-cases to '${snake}', the implicit ` + + `${opts?.staticReceiver ? 'static-accessor receiver (cls)' : 'method receiver (self)'}. ` + + 'Rename the parameter to avoid shadowing the receiver.', + ); + } if (claimedSnake.has(snake)) { throw new Error( `KERN-Python codegen: method param '${rawName}' snake-cases to '${snake}', which collides with another param on this method. ` + diff --git a/packages/python/tests/class-python.test.ts b/packages/python/tests/class-python.test.ts index 9b54e0e3..ad4eac61 100644 --- a/packages/python/tests/class-python.test.ts +++ b/packages/python/tests/class-python.test.ts @@ -131,6 +131,24 @@ describe('Python class codegen (single-source class slice)', () => { expect(code).not.toContain('def label(self)'); }); + test('static setter param shadowing the cls receiver fails codegen (no `def label(cls, cls):`)', () => { + const reg: IRNode = { + type: 'class', + props: { name: 'Reg' }, + children: [ + { + type: 'setter', + props: { name: 'label', static: 'true' }, + children: [ + param('cls', 'string'), // snake-cases to the implicit metaclass receiver + handler([{ type: 'assign', props: { target: 'this.store', value: 'cls' }, children: [] }]), + ], + }, + ], + }; + expect(() => generatePythonClass(reg)).toThrow(/receiver/); + }); + test('instance-field defaults emit in __init__, never as a shared class attr', () => { const bag: IRNode = { type: 'class', From bd884b53b471f0efa4e9a6a98ca88d993328baf8 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 13:58:27 +0200 Subject: [PATCH 48/63] feat(python): lower list push in class methods via shared list-ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Class-method bodies emitted `this.items.push(x)` verbatim — invalid Python, since lists have no `.push` — while the route emitter already lowered it. The two Python emitters had drifted: each open-coded its own array dispatch. Extract the portable-array lowering into one shared module (core/expr/list-ops.ts) that BOTH the route emitter and the class-method body emitter delegate to, so `arr.push(x)` lowers identically (TS native; Python `(recv.append(x) or len(recv))`, matching JS push's new-length return) wherever it appears — parity by construction, no per-path drift. The class-body hook lowers only a single-arg call on a pure, guard-free receiver; the shim names the receiver twice, so an impure receiver such as `makeBag().items.push(x)` would run its effects twice on Python and break JS parity — those fall through unchanged. Agon-planned (6-engine brainstorm, unanimous Approach C) and agon-reviewed (codex 0.97 receiver double-evaluation closed via the isReceiverChainPure gate; the method-name set is kept private behind a predicate). Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/python/src/codegen-body-python.ts | 40 ++++++++++++++ packages/python/src/core/expr/index.ts | 10 ++-- packages/python/src/core/expr/list-ops.ts | 61 ++++++++++++++++++++++ 3 files changed, 107 insertions(+), 4 deletions(-) create mode 100644 packages/python/src/core/expr/list-ops.ts diff --git a/packages/python/src/codegen-body-python.ts b/packages/python/src/codegen-body-python.ts index 4266a8ac..d84d2795 100644 --- a/packages/python/src/codegen-body-python.ts +++ b/packages/python/src/codegen-body-python.ts @@ -62,6 +62,7 @@ import { KERN_PAIR_HELPERS_PY, KERN_TMOD_HELPER_PY, } from './core/expr/index.js'; +import { isSharedPortableArrayMethod, lowerPortableArrayMethodPy } from './core/expr/list-ops.js'; import { mapTsTypeToPython } from './type-map.js'; /** Slice 3e — caller-provided options for the Python body emitter. @@ -2115,6 +2116,11 @@ function lowerChain(node: ChainNode, ctx: BodyEmitContext): GuardedExpr { if (regex !== null) return { guard: null, expr: regex }; const stdlib = applyStdlibLoweringPython(node, ctx); if (stdlib !== null) return { guard: null, expr: stdlib }; + // Portable array methods (e.g. `arr.push(x)`) lower through the SAME shared + // helper the route emitter uses, so a class method's `this.items.push(x)` + // matches a route handler's `arr.push(x)` by construction (no per-path drift). + const portableArray = lowerPortableArrayCallPython(node, ctx); + if (portableArray !== null) return { guard: null, expr: portableArray }; if (ctx.inConstructor && node.callee.kind === 'ident' && node.callee.name === 'super') { const superArgs = node.args.map((arg) => emitPyExprCtx(arg, ctx)).join(', '); return { guard: null, expr: `super().__init__(${superArgs})` }; @@ -2137,6 +2143,40 @@ function lowerChain(node: ChainNode, ctx: BodyEmitContext): GuardedExpr { return { guard: inner.guard, expr: `${inner.expr}(${args})` }; } +/** + * Lower a portable Array *method call* (e.g. `arr.push(x)`) through the shared + * `list-ops` module, so a class-method body and a route handler lower the same + * portable subset to identical Python. Returns `null` — and the caller falls + * through to the generic call emission — for anything that is not a bare, + * non-optional member call of a shared portable method on a guard-free + * receiver. Mirrors the peek-then-emit shape of `lowerRegexCallPython`. + */ +function lowerPortableArrayCallPython(call: Extract, ctx: BodyEmitContext): string | null { + const callee = call.callee; + if (callee.kind !== 'member' || callee.optional) return null; + // Gate on method name + arity BEFORE emitting receiver/args, so a non-shared + // or malformed call falls through without any duplicated emission. The only + // shared method (push) is single-arg; a 0-/2-arg push is left to the generic + // path (an unsupported case, pre-existing on the route emitter too). + if (!isSharedPortableArrayMethod(callee.property)) return null; + if (call.args.length !== 1) return null; + const recvNode = callee.object; + // The shim names the receiver twice (`(recv.append(x) or len(recv))`), so a + // side-effectful receiver — `makeBag().items.push(x)`, `bags[idx()].push(x)` — + // would run those effects twice on Python and break JS parity. Lower only a + // provably-pure receiver; let impure ones fall through unchanged. + if (!isReceiverChainPure(recvNode)) return null; + const recv: GuardedExpr = + recvNode.kind === 'member' || recvNode.kind === 'call' || recvNode.kind === 'index' + ? lowerChain(recvNode, ctx) + : { guard: null, expr: emitPyExprCtx(recvNode, ctx) }; + // A pure receiver can still be an optional chain (`a?.b`), which carries a + // None-guard the flat shim can't honor — fall through for those too. + if (recv.guard !== null) return null; + const args = call.args.map((a) => emitPyExprCtx(a, ctx)); + return lowerPortableArrayMethodPy(recv.expr, callee.property, args); +} + function lowerRegexCallPython(call: Extract, ctx: BodyEmitContext): string | null { const callee = call.callee; if (callee.kind !== 'member') return null; diff --git a/packages/python/src/core/expr/index.ts b/packages/python/src/core/expr/index.ts index 0866f63e..e7f46d9f 100644 --- a/packages/python/src/core/expr/index.ts +++ b/packages/python/src/core/expr/index.ts @@ -11,6 +11,7 @@ import { KERN_JS_STRING_HELPERS_PY, KERN_TMOD_HELPER_PY, } from './helpers.js'; +import { lowerPortableArrayMethodPy } from './list-ops.js'; export { KERN_FMT_HELPER_PY, @@ -344,10 +345,11 @@ function lowerJsArrayMethods(expr: string, ctx: ExprRewriteContext): string { lowered = `(next((__i for __i, __v in enumerate(${receiver}) if __v == ${needle}), -1))`; } } else if (method === 'push') { - // JS Array.push mutates AND returns the new length. Python list.append - // returns None, so emit `(recv.append(x) or len(recv))` for exact parity - // (mutate + length). Single-arg only; varargs push left unsupported. - if (args.length === 1) lowered = `(${receiver}.append(${args[0]}) or len(${receiver}))`; + // Delegate to the single shared push lowering (also used by the + // class-method body emitter) so routes and class methods can't drift. + // Single-arg only; varargs push left unsupported. + const portable = lowerPortableArrayMethodPy(receiver, 'push', args); + if (portable !== null) lowered = portable; } else if (method === 'reverse') { // JS Array.reverse mutates AND returns the (same, reversed) array; Python // list.reverse returns None -> `(recv.reverse() or recv)` mutates + returns it. diff --git a/packages/python/src/core/expr/list-ops.ts b/packages/python/src/core/expr/list-ops.ts new file mode 100644 index 00000000..dcef700e --- /dev/null +++ b/packages/python/src/core/expr/list-ops.ts @@ -0,0 +1,61 @@ +/** + * Portable Array → Python lowering — the SINGLE source shared by both Python + * emission paths: + * - the route/expression emitter (`core/expr/index.ts`), and + * - the class-method body emitter (`codegen-body-python.ts`). + * + * A route handler's `arr.push(x)` and a class method's `arr.push(x)` MUST lower + * to the same Python. Before this module the two emitters open-coded their own + * dispatch tables and drifted — the route path lowered `.push`, the class path + * did not, so `this.items.push(x)` inside a class method emitted invalid + * `self.items.push(x)`. Routing both paths through one function makes the + * portable lowering identical by construction (the parity invariant KERN + * exists to enforce), so the two can never drift again. + * + * The TypeScript target keeps the native host syntax (`arr.push(x)` already + * returns the new length); only Python needs a shim, so this module is + * Python-only. + * + * Scope note: the lambda-taking methods (`.map`/`.filter`) are NOT shared here. + * They operate on representation-specific inputs (the route path rewrites arrow + * *strings*; the class path lowers `ValueIR` *lambdas*), so a single + * string-based helper cannot express them cleanly — each path keeps its own. + * The remaining scalar methods (`.length`, `.slice`, `.concat`, …) are a + * tracked follow-up; today they are route-only (no class-path counterpart, so + * no drift). This module owns the methods that are actually shared. + */ + +/** + * Method names this module lowers. Kept module-private (reached only through the + * `isSharedPortableArrayMethod` predicate) so the gate cannot be mutated by a + * consumer — exporting the `Set` itself would be a runtime footgun, since a + * `ReadonlySet` type does not freeze the underlying `Set`. + */ +const SHARED_PORTABLE_ARRAY_METHODS: ReadonlySet = new Set(['push']); + +/** + * True when `method` is a portable Array method this module lowers. A peek-style + * caller (the class-method body emitter) gates on this BEFORE emitting receiver + * and argument strings, avoiding duplicated emission when the method isn't ours. + */ +export function isSharedPortableArrayMethod(method: string): boolean { + return SHARED_PORTABLE_ARRAY_METHODS.has(method); +} + +/** + * Lower a portable Array *method call* to its Python form, operating purely on + * already-emitted receiver/argument strings so both call sites (which hold + * different input representations) can delegate to it. Returns `null` when the + * method is not a shared portable method, so callers fall through to their + * existing handling. + */ +export function lowerPortableArrayMethodPy(receiver: string, method: string, args: string[]): string | null { + if (method === 'push' && args.length === 1) { + // JS `Array.push` mutates AND returns the new length; Python `list.append` + // mutates but returns `None`. `(recv.append(x) or len(recv))` reproduces + // both effects: append runs, then the falsy `None` yields to `len(recv)`, + // which is always >= 1 after an append — exact parity with the JS return. + return `(${receiver}.append(${args[0]}) or len(${receiver}))`; + } + return null; +} From b23f0282f746be21914aabbc23184468606bdcae Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 13:58:35 +0200 Subject: [PATCH 49/63] test(python): cover class-method push lowering + per-instance isolation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - class-conformance.mjs: differential fixture proving per-instance list isolation AND push return-parity — two Bags, append twice to one, the other's first push returns 1 (not 3). Kills shared-mutable-default, unlowered push (Python crash, ts != py), and push without JS-return parity (None != 1). Also refresh the stale "list mutation excluded" scope comment. - class-python.test.ts: pure-receiver push lowers to the shared append+len shim; impure-receiver push does NOT lower (locks the codex double-eval fix). Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/python/tests/class-python.test.ts | 55 ++++++++++++++++++++++ scripts/class-conformance.mjs | 29 ++++++++++-- 2 files changed, 81 insertions(+), 3 deletions(-) diff --git a/packages/python/tests/class-python.test.ts b/packages/python/tests/class-python.test.ts index ad4eac61..86bdbd47 100644 --- a/packages/python/tests/class-python.test.ts +++ b/packages/python/tests/class-python.test.ts @@ -231,4 +231,59 @@ describe('Python class codegen (single-source class slice)', () => { expect(code).toContain('super().__init__(name)'); expect(code.indexOf('super().__init__(name)')).toBeLessThan(code.indexOf('self.tricks = []')); }); + + test('list push on a pure receiver lowers to the shared append+len shim', () => { + const bag: IRNode = { + type: 'class', + props: { name: 'Bag' }, + children: [ + { + type: 'field', + props: { name: 'items', type: 'number[]', value: { __expr: true, code: '[]' } }, + children: [], + }, + { + type: 'method', + props: { name: 'add', returns: 'number' }, + children: [ + param('x', 'number'), + handler([{ type: 'return', props: { value: 'this.items.push(x)' }, children: [] }]), + ], + }, + ], + }; + const code = generatePythonClass(bag).join('\n'); + // Same lowering the route emitter uses — JS push's new-length return parity. + expect(code).toContain('(self.items.append(x) or len(self.items))'); + expect(code).not.toContain('self.items.push'); // no JS-ism leaks + }); + + test('list push on an IMPURE receiver does NOT lower (no double-evaluation)', () => { + // The shim names the receiver twice; a side-effectful receiver would run its + // effects twice on Python and break parity. It must fall through unchanged. + const box: IRNode = { + type: 'class', + props: { name: 'Box' }, + children: [ + { + type: 'field', + props: { name: 'items', type: 'number[]', value: { __expr: true, code: '[]' } }, + children: [], + }, + { + type: 'method', + props: { name: 'fresh', returns: 'number[]' }, + children: [handler([{ type: 'return', props: { value: 'this.items' }, children: [] }])], + }, + { + type: 'method', + props: { name: 'danger', returns: 'number' }, + children: [handler([{ type: 'return', props: { value: 'this.fresh().push(9)' }, children: [] }])], + }, + ], + }; + const code = generatePythonClass(box).join('\n'); + expect(code).not.toContain('.append(9)'); // shim NOT applied to the impure receiver + expect(code).toContain('self.fresh().push(9)'); // receiver named exactly once + }); }); diff --git a/scripts/class-conformance.mjs b/scripts/class-conformance.mjs index 36e93643..b5a39153 100644 --- a/scripts/class-conformance.mjs +++ b/scripts/class-conformance.mjs @@ -9,9 +9,11 @@ * targets BY CONSTRUCTION (both derive from one definition), not by hand-diffing * two emitters. * - * Scope: portable probes only (number/string ops). List mutation needs a - * portable list-append lowering and is exercised separately (unit tests prove - * the instance-field-default isolation directly). + * Scope: portable probes (number/string ops) plus portable list mutation — + * `arr.push(x)` lowers identically in class methods and route handlers via the + * shared `core/expr/list-ops` module, so per-instance list isolation is proven + * differentially here (not only in unit tests). Other list ops (`.length`, + * `.slice`, …) are a tracked follow-up; `.map`/`.filter` stay per-path. * * Run: node scripts/class-conformance.mjs (or via `pnpm check:class-conformance`) */ @@ -147,6 +149,27 @@ fn name=probe returns=number return value="Counter.count"`, expected: 10, }, + { + name: 'portable list mutation: per-instance isolation + push return parity', + kern: `class name=Bag export=true + field name=items type=number[] value={{ [] }} + method name=add returns=number + param name=x type=number + handler + return value="this.items.push(x)" +fn name=probe returns=number + handler + let name=a value="new Bag()" + let name=b value="new Bag()" + do value="a.add(10)" + do value="a.add(20)" + return value="b.add(99)"`, + // Discriminating: b is a SEPARATE instance, so b.add returns 1 — not 3. + // Kills (a) shared-mutable-default (items aliased -> b.add returns 3), + // (b) push not lowered (Python `list.push` -> AttributeError, ts != py), + // (c) push without JS return parity (`append` returns None -> b.add != 1). + expected: 1, + }, { name: 'inherited + overridden static accessor (metaclass chaining)', kern: `class name=Base export=true From 66411147ad6f8145c1bc53fb1b68a57593fc0a98 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 15:23:34 +0200 Subject: [PATCH 50/63] feat(class): erase abstract + implements at codegen with symmetric fail-fast stubs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KERN classes can be `abstract` and `implements` an interface, but Python codegen dropped both and TS only carried the keywords. Define KERN's semantics and emit them with TS<->Python parity by construction (Approach B+, agon-chosen): - `abstract` is ERASED at runtime on both targets (a plain, instantiable class) — matching how TS `abstract` is erased in compiled JS. A handler-less member under an abstract class lowers to a fail-fast body: `raise NotImplementedError` (Python) / `throw new Error` (TS), so an un-overridden abstract member fails identically on both. TS keeps the class-level `abstract` keyword (tsc still rejects `new X()`); Python emits a plain class (no ABC/metaclass). Covers instance methods, getters, setters, AND static accessors (the metaclass path). - `implements` is erased on Python (the semantic validator owns conformance) and left as a `# implements: Y` marker comment; the single-inheritance base is unchanged. TS keeps its existing `implements` emission. Abstract enforcement (reject `new Abstract()`, require concrete overrides) lives in KERN's validator, not the host type-checker — a deferred follow-up. Agon-planned (6-engine brainstorm, Approach B+) and agon-reviewed (codex 0.99: abstract static accessors must raise too, now fixed). Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/core/src/codegen/type-system.ts | 19 +++++-- .../__snapshots__/golden-codegen.test.ts.snap | 1 + packages/python/src/generators/data.ts | 51 ++++++++++++++++--- 3 files changed, 62 insertions(+), 9 deletions(-) diff --git a/packages/core/src/codegen/type-system.ts b/packages/core/src/codegen/type-system.ts index 318e4f64..4a4bb3f4 100644 --- a/packages/core/src/codegen/type-system.ts +++ b/packages/core/src/codegen/type-system.ts @@ -243,6 +243,17 @@ function emitSingletons(node: IRNode, lines: string[], className: string, exp: s } function emitClassBody(node: IRNode, lines: string[]): void { + // Abstract members — handler-less methods/getters/setters under an + // `abstract=true` class — emit a fail-fast `throw` body, identical to the + // Python `raise`, so an un-overridden abstract member fails the same way on + // both targets. The class-level `abstract` keyword stays (tsc still rejects + // `new X()`); only the member BODY is synthesized, since TS forbids a body on + // an `abstract` method. + const className = emitIdentifier(p(node).name as string | undefined, 'Unknown', node); + const isAbstractClass = p(node).abstract === 'true' || p(node).abstract === true; + const isHandlerless = (m: IRNode): boolean => firstChild(m, 'handler') === undefined; + const abstractThrow = (kind: string, memberName: string): string => + `throw new Error("abstract ${kind} ${className}.${memberName} not implemented");`; // Fields for (const field of kids(node, 'field')) { const fp = propsOf<'field'>(field); @@ -310,7 +321,7 @@ function emitClassBody(node: IRNode, lines: string[]): void { const staticKw = isStatic ? 'static ' : ''; const star = isStream || isGenerator ? '*' : ''; const asyncKw = isAsync || isStream ? 'async ' : ''; - const mcode = methodBodyCode(method); + const mcode = isAbstractClass && isHandlerless(method) ? abstractThrow('method', mname) : methodBodyCode(method); // stream=true → AsyncGenerator, generator=true → Generator/AsyncGenerator // If user already declared full Generator<...>/AsyncGenerator<...>, use as-is @@ -345,7 +356,8 @@ function emitClassBody(node: IRNode, lines: string[]): void { const gvis = gp.private === 'true' || gp.private === true ? 'private ' : ''; const gstatic = gp.static === 'true' || gp.static === true ? 'static ' : ''; const greturns = gp.returns ? `: ${emitTypeAnnotation(gp.returns, 'unknown', getter)}` : ''; - const gcode = classMemberBodyCode(getter); + const gcode = + isAbstractClass && isHandlerless(getter) ? abstractThrow('getter', gname) : classMemberBodyCode(getter); lines.push(''); lines.push(` ${gvis}${gstatic}get ${gname}()${greturns} {`); if (gcode) { @@ -363,7 +375,8 @@ function emitClassBody(node: IRNode, lines: string[]): void { const svis = sp.private === 'true' || sp.private === true ? 'private ' : ''; const sstatic = sp.static === 'true' || sp.static === true ? 'static ' : ''; const sparams = emitParamList(setter, { fallback: 'value: unknown' }); - const scode = classMemberBodyCode(setter); + const scode = + isAbstractClass && isHandlerless(setter) ? abstractThrow('setter', sname) : classMemberBodyCode(setter); lines.push(''); lines.push(` ${svis}${sstatic}set ${sname}(${sparams}) {`); if (scode) { diff --git a/packages/core/tests/__snapshots__/golden-codegen.test.ts.snap b/packages/core/tests/__snapshots__/golden-codegen.test.ts.snap index 0ed7790c..65361ac5 100644 --- a/packages/core/tests/__snapshots__/golden-codegen.test.ts.snap +++ b/packages/core/tests/__snapshots__/golden-codegen.test.ts.snap @@ -57,6 +57,7 @@ exports[`golden: class abstract class 1`] = ` private area: number; render(): void { + throw new Error("abstract method Shape.render not implemented"); } }" `; diff --git a/packages/python/src/generators/data.ts b/packages/python/src/generators/data.ts index 83d2fb73..19a06485 100644 --- a/packages/python/src/generators/data.ts +++ b/packages/python/src/generators/data.ts @@ -520,6 +520,19 @@ export function generatePythonClass(node: IRNode): string[] { return np.static === 'true' || np.static === true; }; + // `abstract` is ERASED at codegen on both targets (a plain, instantiable + // class — matching TS, where `abstract` is compile-time-only and gone from + // emitted JS). An abstract member is a handler-less method/getter/setter under + // an abstract class; it lowers to a fail-fast `raise`, so an un-overridden + // abstract member fails identically on TS (throw) and Python (raise) — parity + // by construction. `implements` is likewise erased (the semantic validator + // owns conformance); only a human-readable marker comment is emitted. + const isAbstractClass = props.abstract === 'true' || props.abstract === true; + const implementsRaw = typeof props.implements === 'string' ? (props.implements as string) : ''; + const isAbstractMember = (m: IRNode): boolean => isAbstractClass && firstChild(m, 'handler') === undefined; + const abstractRaise = (kind: string, memberName: string): string => + ` raise NotImplementedError("abstract ${kind} ${name}.${memberName} not implemented")`; + const fields = kids(node, 'field'); const staticFields = fields.filter(isStatic); const methods = kids(node, 'method'); @@ -549,7 +562,14 @@ export function generatePythonClass(node: IRNode): string[] { metaGetterNames.add(gname); metaLines.push(' @property'); metaLines.push(` def ${gname}(cls)${returns}:`); - metaLines.push(...methodBodyLinesPython(g, { classBody: true, staticReceiver: true })); + // Abstract static accessors fail-fast like instance ones, so an + // un-overridden abstract static getter raises on Python the same way it + // throws on TS (was silently `pass` -> None before). + if (isAbstractMember(g)) { + metaLines.push(abstractRaise('getter', gname)); + } else { + metaLines.push(...methodBodyLinesPython(g, { classBody: true, staticReceiver: true })); + } metaLines.push(''); } for (const s of staticSetters) { @@ -563,7 +583,11 @@ export function generatePythonClass(node: IRNode): string[] { } metaLines.push(` @${sname}.setter`); metaLines.push(` def ${sname}(cls, ${buildPythonParamList(s, { selfPrefix: false })}):`); - metaLines.push(...methodBodyLinesPython(s, { classBody: true, staticReceiver: true })); + if (isAbstractMember(s)) { + metaLines.push(abstractRaise('setter', sname)); + } else { + metaLines.push(...methodBodyLinesPython(s, { classBody: true, staticReceiver: true })); + } metaLines.push(''); } } @@ -630,7 +654,11 @@ export function generatePythonClass(node: IRNode): string[] { } else { body.push(` ${asyncKw}def ${mname}(${buildPythonParamList(m, { selfPrefix: true })})${returns}:`); } - body.push(...methodBodyLinesPython(m, { classBody: !isStatic(m) })); + if (isAbstractMember(m)) { + body.push(abstractRaise('method', mname)); + } else { + body.push(...methodBodyLinesPython(m, { classBody: !isStatic(m) })); + } body.push(''); } @@ -644,7 +672,11 @@ export function generatePythonClass(node: IRNode): string[] { const returns = gp.returns ? ` -> ${mapTsTypeToPython(gp.returns as string)}` : ''; body.push(' @property'); body.push(` def ${gname}(self)${returns}:`); - body.push(...methodBodyLinesPython(g, { classBody: true })); + if (isAbstractMember(g)) { + body.push(abstractRaise('getter', gname)); + } else { + body.push(...methodBodyLinesPython(g, { classBody: true })); + } body.push(''); } // Setters -> @.setter. Python requires a property to exist before its @@ -663,14 +695,21 @@ export function generatePythonClass(node: IRNode): string[] { } body.push(` @${sname}.setter`); body.push(` def ${sname}(${buildPythonParamList(s, { selfPrefix: true })}):`); - body.push(...methodBodyLinesPython(s, { classBody: true })); + if (isAbstractMember(s)) { + body.push(abstractRaise('setter', sname)); + } else { + body.push(...methodBodyLinesPython(s, { classBody: true })); + } body.push(''); } if (body.length === 0) body.push(' pass'); + // `implements` is erased at codegen (the validator owns conformance); emit a + // human-readable marker so the relationship survives in the generated source. + const headerLines = implementsRaw ? [`# implements: ${implementsRaw}`, header] : [header]; // Metaclass (if any) must be defined before the class that references it. - return metaLines.length > 0 ? [...metaLines, header, ...body] : [header, ...body]; + return metaLines.length > 0 ? [...metaLines, ...headerLines, ...body] : [...headerLines, ...body]; } // ── Union (Pydantic Discriminated Union) ──────────────────────────────── From 845c214e6fb8db9e709e43c3d4726790c984eaf4 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 15:23:34 +0200 Subject: [PATCH 51/63] test(class): abstract + interface parity fixtures and codegen guards - class-conformance.mjs (now 12/12): abstract polymorphic dispatch (Shape/Square), abstract template method reading an inherited field default (Formatter), and an abstract static accessor override dispatched through a chained metaclass. - class-python.test.ts: abstract instance method + abstract static accessor each emit the fail-fast `raise` (not a silent body); `implements` is erased to a marker comment with no runtime base. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/python/tests/class-python.test.ts | 37 +++++++++++++ scripts/class-conformance.mjs | 63 ++++++++++++++++++++++ 2 files changed, 100 insertions(+) diff --git a/packages/python/tests/class-python.test.ts b/packages/python/tests/class-python.test.ts index 86bdbd47..92efb548 100644 --- a/packages/python/tests/class-python.test.ts +++ b/packages/python/tests/class-python.test.ts @@ -232,6 +232,43 @@ describe('Python class codegen (single-source class slice)', () => { expect(code.indexOf('super().__init__(name)')).toBeLessThan(code.indexOf('self.tricks = []')); }); + test('abstract instance method (handler-less, under abstract class) emits a fail-fast raise', () => { + const shape: IRNode = { + type: 'class', + props: { name: 'Shape', abstract: 'true' }, + children: [{ type: 'method', props: { name: 'area', returns: 'number' }, children: [] }], // no handler -> abstract + }; + const code = generatePythonClass(shape).join('\n'); + expect(code).toContain('class Shape:'); // abstract erased -> plain instantiable class (no ABC/metaclass) + expect(code).toContain('raise NotImplementedError("abstract method Shape.area not implemented")'); + }); + + test('abstract STATIC accessor emits a fail-fast raise (not a silent metaclass pass)', () => { + const base: IRNode = { + type: 'class', + props: { name: 'Base', abstract: 'true' }, + children: [{ type: 'getter', props: { name: 'tag', static: 'true', returns: 'string' }, children: [] }], + }; + const code = generatePythonClass(base).join('\n'); + // The metaclass static getter must raise, matching the TS throw — not `pass`/None. + expect(code).toContain('raise NotImplementedError("abstract getter Base.tag not implemented")'); + expect(code).not.toMatch(/def tag\(cls\)[^\n]*:\n\s*pass\b/); + }); + + test('implements is erased on the Python target, left as a marker comment', () => { + const user: IRNode = { + type: 'class', + props: { name: 'User', implements: 'Serializable' }, + children: [ + { type: 'field', props: { name: 'id', type: 'string', value: { __expr: true, code: '"x"' } }, children: [] }, + ], + }; + const code = generatePythonClass(user).join('\n'); + expect(code).toContain('# implements: Serializable'); + expect(code).toContain('class User:'); // no Protocol/ABC base injected + expect(code).not.toContain('Serializable)'); // implements is NOT a runtime base + }); + test('list push on a pure receiver lowers to the shared append+len shim', () => { const bag: IRNode = { type: 'class', diff --git a/scripts/class-conformance.mjs b/scripts/class-conformance.mjs index b5a39153..53d8d019 100644 --- a/scripts/class-conformance.mjs +++ b/scripts/class-conformance.mjs @@ -195,6 +195,69 @@ fn name=probe returns=number return value="Derived.val"`, expected: 12, }, + { + name: 'abstract class: erased at codegen, polymorphic dispatch to override', + kern: `class name=Shape abstract=true export=true + method name=area returns=number +class name=Square extends=Shape export=true + field name=side type=number value={{ 3 }} + method name=area returns=number + handler + return value="this.side * this.side" +fn name=measure returns=number + param name=shape type=Shape + handler + return value="shape.area()" +fn name=probe returns=number + handler + return value="measure(new Square())"`, + // `abstract` is erased on both targets (plain instantiable class), so a + // Shape-typed reference dispatches to Square.area on TS AND Python. + // Kills: Python dropping the override (AttributeError), the abstract base + // stub running instead of the override (NotImplementedError != 9), and any + // ABC/metaclass lowering that would make `new Square()` diverge. + expected: 9, + }, + { + name: 'abstract method: template method calls override + inherited field default', + kern: `class name=Formatter abstract=true export=true + field name=prefix type=string value={{ "[" }} + method name=suffix returns=string + method name=format returns=string + param name=input type=string + handler + return value="\`\${this.prefix}\${input}\${this.suffix()}\`" +class name=BracketFormatter extends=Formatter export=true + method name=suffix returns=string + handler + return value="\\"]\\"" +fn name=probe returns=string + handler + return value="new BracketFormatter().format(\\"test\\")"`, + // The concrete `format` (inherited) reads the inherited field default + // `prefix` and calls the abstract `suffix`, which dispatches to the override. + // Kills: dropped inherited field default, dropped inherited concrete method, + // and the abstract stub running instead of the BracketFormatter override. + expected: '[test]', + }, + { + name: 'abstract static accessor: override dispatches through chained metaclass', + kern: `class name=Base abstract=true export=true + getter name=tag static=true returns=string +class name=Impl extends=Base export=true + getter name=tag static=true returns=string + handler + return value="\\"impl\\"" +fn name=probe returns=string + handler + return value="Impl.tag"`, + // Abstract static getter on Base (fail-fast raise stub) + override on Impl, + // dispatched through the chained metaclass _ImplMeta(type(Base)). Reading + // Impl.tag resolves to the override on BOTH targets. Kills a Python lowering + // where the abstract static stub is `pass` (returns None) instead of a raise, + // or where the chained metaclass drops the override. + expected: 'impl', + }, ]; const canon = (v) => JSON.stringify(v); From b8184b07cf6cfb4d368ab058995f4f4106eacd0d Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 15:40:18 +0200 Subject: [PATCH 52/63] feat(class): inject implicit super() in derived constructors (KERN ctor semantics) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A derived class whose explicit constructor touches `this`/`self` without calling super() crashed on TS ("must call super constructor before accessing this") but worked on Python — the JS "super-before-this" rule leaking into KERN as a parity break. KERN now OWNS the rule: a derived constructor that omits super() gets an implicit no-arg super() / super().__init__() injected as the FIRST statement on BOTH targets (Java/Python ergonomics), threaded so field defaults and the body still run after it. The author writes explicit super(args) only to pass args up. Agon-planned (6-engine brainstorm, unanimous Option C). This is C's codegen arm (implicit injection). C's safety arm — a validator diagnostic when the base constructor REQUIRES args and a derived ctor omits super(args) — is a follow-up in the consolidated validator-enforcement slice (with the deferred abstract concrete-must-override check); until then that edge injects a no-arg super() that fails identically on both targets (parity preserved, just without a friendly compile-time error). Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/core/src/codegen/type-system.ts | 9 +++++++++ packages/python/src/generators/data.ts | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/packages/core/src/codegen/type-system.ts b/packages/core/src/codegen/type-system.ts index 4a4bb3f4..e7f42138 100644 --- a/packages/core/src/codegen/type-system.ts +++ b/packages/core/src/codegen/type-system.ts @@ -299,6 +299,15 @@ function emitClassBody(node: IRNode, lines: string[]): void { const ctorCode = classMemberBodyCode(ctorNode); lines.push(''); lines.push(` constructor${generics}(${ctorParams}) {`); + // KERN constructor semantic: a DERIVED constructor that omits super() gets an + // implicit no-arg super() injected FIRST, so `this`/field access is legal (JS + // forbids touching `this` before super in a derived ctor). Mirrors the Python + // side's injected base-init; class-field initializers then run after super per + // JS semantics. The author writes explicit `super(args)` only to pass args up. + const isDerived = typeof p(node).extends === 'string' && p(node).extends !== ''; + if (isDerived && !/\bsuper\s*\(/.test(ctorCode)) { + lines.push(' super();'); + } if (ctorCode) { for (const line of ctorCode.split('\n')) { lines.push(` ${line}`); diff --git a/packages/python/src/generators/data.ts b/packages/python/src/generators/data.ts index 19a06485..15513776 100644 --- a/packages/python/src/generators/data.ts +++ b/packages/python/src/generators/data.ts @@ -624,6 +624,14 @@ export function generatePythonClass(node: IRNode): string[] { const superIdx = ctorLines.findIndex((line) => line.includes('super().__init__')); if (superIdx >= 0) { body.push(...ctorLines.slice(0, superIdx + 1), ...defaultLines, ...ctorLines.slice(superIdx + 1)); + } else if (base) { + // KERN constructor semantic: a DERIVED constructor that omits super() + // gets an implicit no-arg base-init injected FIRST, then field defaults, + // then the body — so `this`/field access is always legal (TS requires + // super-before-this; Python is lax, but we emit identically for parity). + // The author writes explicit `super(args)` only to pass args up; when the + // base constructor REQUIRES args, that's a validator concern (a follow-up). + body.push(' super().__init__()', ...defaultLines, ...ctorLines); } else { body.push(...defaultLines, ...ctorLines); } From e7c25102e32bc12becd0a39078f42dab3b15f1b5 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 15:40:18 +0200 Subject: [PATCH 53/63] test(class): derived-constructor implicit super() parity + injection guards - class-conformance.mjs (now 13/13): a derived class whose ctor omits super() and reads an inherited field default -> get() == 7 + 1 identically on both targets. Kills no-super-injected (TS crash), super-after-this (TS crash), base-init skipped (this.tag undefined -> NaN/AttributeError), field-defaults-before-super. - class-python.test.ts: derived ctor injects super().__init__() before field defaults and body; a non-derived ctor gets none. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/python/tests/class-python.test.ts | 42 ++++++++++++++++++++++ scripts/class-conformance.mjs | 24 +++++++++++++ 2 files changed, 66 insertions(+) diff --git a/packages/python/tests/class-python.test.ts b/packages/python/tests/class-python.test.ts index 92efb548..f0a22bab 100644 --- a/packages/python/tests/class-python.test.ts +++ b/packages/python/tests/class-python.test.ts @@ -323,4 +323,46 @@ describe('Python class codegen (single-source class slice)', () => { expect(code).not.toContain('.append(9)'); // shim NOT applied to the impure receiver expect(code).toContain('self.fresh().push(9)'); // receiver named exactly once }); + + test('derived constructor omitting super() gets an implicit super().__init__() first', () => { + const box: IRNode = { + type: 'class', + props: { name: 'Box', extends: 'Base' }, + children: [ + { type: 'field', props: { name: 'x', type: 'number', value: { __expr: true, code: '0' } }, children: [] }, + { + type: 'constructor', + props: {}, + children: [ + param('v', 'number'), + handler([{ type: 'assign', props: { target: 'this.x', value: 'v' }, children: [] }]), + ], + }, + ], + }; + const code = generatePythonClass(box).join('\n'); + expect(code).toContain('super().__init__()'); + // Order must be: implicit super -> field default -> constructor body. + expect(code.indexOf('super().__init__()')).toBeLessThan(code.indexOf('self.x = 0')); + expect(code.indexOf('self.x = 0')).toBeLessThan(code.lastIndexOf('self.x = v')); + }); + + test('non-derived constructor gets NO implicit super (only derived classes base-init)', () => { + const box: IRNode = { + type: 'class', + props: { name: 'Box' }, + children: [ + { + type: 'constructor', + props: {}, + children: [ + param('v', 'number'), + handler([{ type: 'assign', props: { target: 'this.x', value: 'v' }, children: [] }]), + ], + }, + ], + }; + const code = generatePythonClass(box).join('\n'); + expect(code).not.toContain('super().__init__'); + }); }); diff --git a/scripts/class-conformance.mjs b/scripts/class-conformance.mjs index 53d8d019..c79cbb34 100644 --- a/scripts/class-conformance.mjs +++ b/scripts/class-conformance.mjs @@ -258,6 +258,30 @@ fn name=probe returns=string // or where the chained metaclass drops the override. expected: 'impl', }, + { + name: 'derived constructor without super(): implicit base-init injected', + kern: `class name=Base export=true + field name=tag type=number value={{ 1 }} +class name=Box extends=Base export=true + field name=x type=number value={{ 0 }} + constructor + param name=v type=number + handler + assign target="this.x" value="v" + method name=get returns=number + handler + return value="this.x + this.tag" +fn name=probe returns=number + handler + return value="new Box(7).get()"`, + // Box's constructor touches `this.x` but never calls super(). KERN injects an + // implicit super() FIRST on both targets, so (a) TS doesn't crash with "must + // call super before this", and (b) the base's `tag=1` default runs via that + // super, giving get() = 7 + 1. Kills: no super injected (TS crash); super + // injected AFTER this.x (TS crash); base init skipped (this.tag undefined -> + // NaN on TS / AttributeError on Python); field defaults before super. + expected: 8, + }, ]; const canon = (v) => JSON.stringify(v); From 944c3cdece822841c456e97d0ff1b8066b2687a6 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 16:33:50 +0200 Subject: [PATCH 54/63] feat(class): reconcile derived-ctor super semantics across validator, runtime, codegen MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR4 Part 1 made codegen inject an implicit super() for derived constructors that omit it, but the semantic validator and the in-process KERN runtime still enforced the old "explicit super is mandatory" rule — so a program legal in generated TS/Python was rejected by the validator and threw in the interpreter (a target-vs-runtime split, flagged blocking by review). This lands Option C as one coherent semantic across all four layers, behind a single source of truth — a new canonical predicate `hasDirectSuperCtorCall(ctor)` (packages/core/src/constructor-super.ts). It answers structurally, from the IR, the one question every layer must answer identically: does the constructor make a direct super(...) call (counting if-branches, not lambdas, not super.member)? This replaces three divergent textual heuristics — the validator's own walk plus the codegens' emitted-text scans (`/\bsuper\s*\(/` and `"super().__init__"`) that 5/6 reviewers flagged as false-matching `super(` inside strings/comments. - Validator: a derived ctor with no direct super is now LEGAL (implicit base init); the only error is class-constructor-implicit-super-needs-args, raised when the base ctor requires arguments an arg-less implicit super can't supply. A direct super keeps the full discipline (double / conditional / this-before-super). class-constructor-missing-super is retired from the user path. The descriptive superStatus substrate fact is left untouched (still reports 'missing' etc.). - Runtime: initializeClassLayer initializes the base FIRST, then derived field defaults, then the body for implicit-mode ctors (frame pre-marked superCalled so this/super access is unguarded; a stray late super still trips double-init). - TS + Python codegen: the inject/splice decision now consumes the shared predicate instead of scanning emitted text. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/core/src/codegen/type-system.ts | 17 ++-- packages/core/src/constructor-super.ts | 120 +++++++++++++++++++++++ packages/core/src/core-runtime/index.ts | 29 +++++- packages/core/src/index.ts | 3 + packages/core/src/semantic-validator.ts | 87 +++++++++++++++- packages/python/src/generators/data.ts | 41 +++++--- 6 files changed, 272 insertions(+), 25 deletions(-) create mode 100644 packages/core/src/constructor-super.ts diff --git a/packages/core/src/codegen/type-system.ts b/packages/core/src/codegen/type-system.ts index e7f42138..20b4b5e1 100644 --- a/packages/core/src/codegen/type-system.ts +++ b/packages/core/src/codegen/type-system.ts @@ -5,6 +5,7 @@ */ import { emitExpression } from '../codegen-expression.js'; +import { hasDirectSuperCtorCall } from '../constructor-super.js'; import { propsOf } from '../node-props.js'; import { parseExpression } from '../parser-expression.js'; import { type IRNode, isExprObject } from '../types.js'; @@ -299,13 +300,17 @@ function emitClassBody(node: IRNode, lines: string[]): void { const ctorCode = classMemberBodyCode(ctorNode); lines.push(''); lines.push(` constructor${generics}(${ctorParams}) {`); - // KERN constructor semantic: a DERIVED constructor that omits super() gets an - // implicit no-arg super() injected FIRST, so `this`/field access is legal (JS - // forbids touching `this` before super in a derived ctor). Mirrors the Python - // side's injected base-init; class-field initializers then run after super per - // JS semantics. The author writes explicit `super(args)` only to pass args up. + // KERN constructor semantic: a DERIVED constructor that omits a direct + // super(...) call gets an implicit no-arg super() injected FIRST, so + // `this`/field access is legal (JS forbids touching `this` before super in a + // derived ctor). Mirrors the Python side's injected base-init; class-field + // initializers then run after super per JS semantics. The author writes + // explicit `super(args)` only to pass args up. The inject decision uses the + // canonical structural predicate (shared with the validator, runtime, and + // Python target) rather than scanning emitted text — a `super(` inside a + // string literal or comment no longer suppresses injection. const isDerived = typeof p(node).extends === 'string' && p(node).extends !== ''; - if (isDerived && !/\bsuper\s*\(/.test(ctorCode)) { + if (isDerived && !hasDirectSuperCtorCall(ctorNode)) { lines.push(' super();'); } if (ctorCode) { diff --git a/packages/core/src/constructor-super.ts b/packages/core/src/constructor-super.ts new file mode 100644 index 00000000..d7705c17 --- /dev/null +++ b/packages/core/src/constructor-super.ts @@ -0,0 +1,120 @@ +/** + * Canonical constructor-super analysis — the SINGLE source of truth for the one + * question every KERN layer must answer the same way: does a constructor contain + * a direct `super(...)` constructor call? + * + * KERN's constructor semantic (Option C): a derived constructor MAY omit + * `super(...)`. When it does, KERN implicitly initializes the base first; when it + * writes an explicit `super(...)`, the author owns its placement and the strict + * discipline (no double/conditional super, no `this` before super) applies. The + * fork between those two modes is decided by exactly this predicate, and it MUST + * be decided identically by the semantic validator, the in-process core runtime, + * and BOTH codegen targets (TS + Python) — otherwise a program is legal in one + * layer and rejected/divergent in another (the precise bug this module exists to + * prevent). Previously each layer answered it differently: the validator walked + * the IR, while both codegens scanned EMITTED text (`/\bsuper\s*\(/` / + * `"super().__init__"`), which false-matched `super(` inside string literals and + * comments. One structural predicate, consumed everywhere, removes that drift. + * + * "Direct" mirrors the validator's long-standing rule precisely: + * - a `super(...)` call where the callee is the bare `super` identifier counts; + * - `super.method()` (a super MEMBER call) does NOT — it never initializes base; + * - a `super(...)` inside a lambda/arrow body does NOT — it never runs at + * construction time; + * - calls inside `if`/`else` branches DO count (the call is structurally present; + * whether it runs on every path is a separate discipline concern). + */ + +import { parseExpression } from './parser-expression.js'; +import type { IRNode } from './types.js'; + +// Props on a body statement whose value is an expression we must scan. Kept in +// sync with the validator's BODY_EXPRESSION_PROPS — a `super(...)` can appear in +// a `do value=...`, a `return value=...`, an `if cond=...`, etc. +const SUPER_SCAN_PROPS = [ + 'value', + 'expr', + 'target', + 'cond', + 'on', + 'in', + 'from', + 'to', + 'initial', + 'source', + 'sources', + 'cleanup', + 'min', + 'max', +] as const; + +/** True when `value` is the parser's wrapped-expression object `{__expr:true, code}`. */ +function expressionCode(value: unknown): string | undefined { + if (typeof value === 'string') return value; + if ( + typeof value === 'object' && + value !== null && + (value as { readonly __expr?: unknown }).__expr === true && + typeof (value as { readonly code?: unknown }).code === 'string' + ) { + return (value as { readonly code: string }).code; + } + if (typeof value === 'number' || typeof value === 'boolean') return String(value); + return undefined; +} + +/** + * Structural recursion over a parsed expression looking for a direct `super(...)` + * constructor call. Equivalent to the validator's `valueIRCallsSuperConstructor` + * (super-ident callee => yes; lambda => stop, never descend; else recurse), but + * self-contained so this module depends only on the parser + node types. + */ +function valueContainsSuperCtorCall(value: unknown): boolean { + if (!value || typeof value !== 'object') return false; + const node = value as { kind?: string; callee?: { kind?: string; name?: string } }; + // A lambda body that calls super never runs during construction — do not descend. + if (node.kind === 'lambda') return false; + if (node.kind === 'call' && node.callee?.kind === 'ident' && node.callee.name === 'super') { + return true; + } + for (const child of Object.values(value as Record)) { + if (Array.isArray(child)) { + if (child.some(valueContainsSuperCtorCall)) return true; + } else if (child && typeof child === 'object') { + if (valueContainsSuperCtorCall(child)) return true; + } + } + return false; +} + +/** The constructor's executable statements (handler body, minus params/decorators). */ +function constructorBodyStatements(ctor: IRNode): readonly IRNode[] { + const handler = ctor.children?.find((child) => child.type === 'handler'); + const body = handler ? (handler.children ?? []) : (ctor.children ?? []); + return body.filter((child) => child.type !== 'param' && child.type !== 'decorator'); +} + +/** Walk a statement subtree, stopping at a nested `class` (its super belongs to it). */ +function statementContainsSuperCtorCall(node: IRNode, isRoot: boolean): boolean { + if (!isRoot && node.type === 'class') return false; + for (const prop of SUPER_SCAN_PROPS) { + const code = expressionCode(node.props?.[prop]); + if (code === undefined) continue; + try { + if (valueContainsSuperCtorCall(parseExpression(code))) return true; + } catch { + // Unparseable expression text can't be a structural super call — ignore. + } + } + return (node.children ?? []).some((child) => statementContainsSuperCtorCall(child, false)); +} + +/** + * Does this constructor contain a direct `super(...)` constructor call anywhere + * in its body (including inside `if`/`else` branches, but not inside lambdas or + * nested classes)? `true` => explicit-super mode (author owns placement, strict + * discipline applies). `false` => implicit-super mode (KERN injects base init). + */ +export function hasDirectSuperCtorCall(ctor: IRNode): boolean { + return constructorBodyStatements(ctor).some((stmt) => statementContainsSuperCtorCall(stmt, true)); +} diff --git a/packages/core/src/core-runtime/index.ts b/packages/core/src/core-runtime/index.ts index 8344936c..6f5a0347 100644 --- a/packages/core/src/core-runtime/index.ts +++ b/packages/core/src/core-runtime/index.ts @@ -1,3 +1,4 @@ +import { hasDirectSuperCtorCall } from '../constructor-super.js'; import { CORE_TYPE_CONTRACTS, CoreContractEvaluationError, @@ -1187,8 +1188,25 @@ function initializeClassLayer( instance.initializedClasses.add(klass.name); return; } + if (base && !hasDirectSuperCtorCall(ctor)) { + // Implicit-super mode (KERN Option C): a derived constructor that omits a + // direct super(...) gets base init injected FIRST, then its own field + // defaults, then its body — identical to what both codegen targets emit, so + // the interpreter and generated TS/Python agree. The frame starts with + // superCalled=true so this/super access inside the body is unguarded; an + // unexpected late super(...) would still trip the double-init guard. The same + // `hasDirectSuperCtorCall` predicate decides this mode in the validator and + // both codegens, so all four layers classify the constructor identically. + initializeClassLayer(instance, base, [], false); + initializeClassFields(instance, klass); + withConstructionFrame(instance, klass, true, () => { + callClassMemberBody(ctor, klass, instance, receivesConstructorArgs ? args : []).value; + }); + instance.initializedClasses.add(klass.name); + return; + } if (base) { - withConstructionFrame(instance, klass, () => { + withConstructionFrame(instance, klass, false, () => { callClassMemberBody(ctor, klass, instance, receivesConstructorArgs ? args : []).value; }); } else { @@ -1432,9 +1450,14 @@ function callSuperConstructor(value: KernSuperValue, args: readonly KernValue[]) return value.receiver; } -function withConstructionFrame(instance: KernInstanceValue, ownerClass: KernClassValue, run: () => void): void { +function withConstructionFrame( + instance: KernInstanceValue, + ownerClass: KernClassValue, + initialSuperCalled: boolean, + run: () => void, +): void { const stack = ACTIVE_CONSTRUCTORS.get(instance) ?? []; - const frame: RuntimeConstructionFrame = { ownerClass, superCalled: false }; + const frame: RuntimeConstructionFrame = { ownerClass, superCalled: initialSuperCalled }; stack.push(frame); ACTIVE_CONSTRUCTORS.set(instance, stack); try { diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index a83c3795..e76e7847 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -140,6 +140,9 @@ export { VALID_STRUCTURES, VALID_TARGETS, } from './config.js'; +// Canonical constructor-super predicate — single source of truth shared by the +// validator, runtime, and both codegen targets (TS here + Python via this export). +export { hasDirectSuperCtorCall } from './constructor-super.js'; export type { CoreFixture, CoreFixtureError, diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index 53c8669f..ecb077b0 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -15,6 +15,7 @@ * symbols that the resolver proved exist. */ +import { hasDirectSuperCtorCall } from './constructor-super.js'; import { type CoreShapeDiagnostic, type CoreShapeInterfaceFact, @@ -2820,7 +2821,7 @@ function validateClassGraphRoots(roots: readonly IRNode[], violations: SemanticV ); validateClassConstructors(info, violations); validateClassMemberConflicts(info, violations); - validateClassSuperUsage(info, violations); + validateClassSuperUsage(info, classByName, violations); } validateClassInheritanceCycles(classes, classByName, violations); @@ -3817,11 +3818,16 @@ function validateClassMemberConflicts(info: ClassInfo, violations: SemanticViola } } -function validateClassSuperUsage(info: ClassInfo, violations: SemanticViolation[]): void { +function validateClassSuperUsage( + info: ClassInfo, + classByName: ReadonlyMap, + violations: SemanticViolation[], +): void { const hasBase = Boolean(info.baseName); + const baseRequiresArgs = hasBase && baseConstructorRequiresArgs(info, classByName); for (const ctor of info.constructors) { if (hasBase) { - validateDerivedConstructorDiscipline(info, ctor, violations); + validateDerivedConstructorSuper(info, ctor, baseRequiresArgs, violations); } if (!hasBase && nodeBodyUsesSuper(ctor)) { violations.push({ @@ -3862,6 +3868,12 @@ interface ConstructorAnalysis { sawSuper: boolean; } +// DESCRIPTIVE analyzer — feeds the `superStatus` substrate fact (via +// `constructorSuperDiagnostics`), NOT user-facing violations. It still classifies +// an omitted super as `missing` and a pre-super `this` access as `this-before-super` +// so the FACT keeps describing the constructor's structure faithfully. The +// user-facing legality judgment lives in `validateDerivedConstructorSuper`, which +// applies KERN's Option-C semantics on top of this description. function validateDerivedConstructorDiscipline(info: ClassInfo, ctor: IRNode, violations: SemanticViolation[]): void { const ctx: ConstructorDisciplineContext = { info, @@ -3885,6 +3897,75 @@ function validateDerivedConstructorDiscipline(info: ClassInfo, ctor: IRNode, vio } } +/** + * User-facing derived-constructor validation under KERN's Option-C super + * semantics. The mode is decided by the canonical `hasDirectSuperCtorCall` + * predicate — shared verbatim with the runtime and both codegen targets so all + * four layers agree on whether a constructor opted into explicit-super mode: + * + * - No direct `super(...)` call (implicit mode): KERN injects base init at + * constructor entry, so omitting super is LEGAL and `this`/super-member access + * is always safe. The only error is when the base constructor REQUIRES + * arguments — an arg-less implicit super cannot satisfy it. + * - A direct `super(...)` call exists (explicit mode): the author owns its + * placement, so the full discipline applies — reject double-super, + * conditional-super (not on every path), and `this`/super before super. + * + * `class-constructor-missing-super` is intentionally unreachable here: an omitted + * super is no longer an error, and an explicit super means a direct call exists. + */ +function validateDerivedConstructorSuper( + info: ClassInfo, + ctor: IRNode, + baseRequiresArgs: boolean, + violations: SemanticViolation[], +): void { + if (!hasDirectSuperCtorCall(ctor)) { + if (baseRequiresArgs) { + violations.push({ + rule: 'class-constructor-implicit-super-needs-args', + nodeType: 'constructor', + message: `Class '${info.name}' omits \`super(...)\` but base class '${info.baseName}' has a constructor that requires arguments. Call \`super(...)\` explicitly to pass them.`, + line: ctor.loc?.line, + col: ctor.loc?.col, + }); + } + return; + } + // Explicit-super mode: replay the discipline analysis. Its walk emits + // double-super / this-before-super as side effects; the tail covers "super + // present but not on every path" (conditional-super). + const ctx: ConstructorDisciplineContext = { + info, + violations, + sawSuper: false, + emittedConditionalSuper: false, + }; + const analysis = analyzeConstructorStatements(constructorBodyStatements(ctor), 'uninit', ctx); + if (analysis.state !== 'init') emitConstructorConditionalSuper(ctx, ctor); +} + +/** + * True when the base class's own constructor declares at least one required + * (no-default) parameter — i.e. an implicit no-arg `super()` would fail at + * runtime. Mirrors the runtime's required-arg rule (a param is required unless it + * carries a `value`/`default`). A base with no own constructor, or an unresolved + * base, is treated as requiring no args (the implicit super forwards safely); + * required args inherited transitively through a constructor-less base are a + * deliberate follow-up, not caught here. + */ +function baseConstructorRequiresArgs(info: ClassInfo, classByName: ReadonlyMap): boolean { + const base = info.baseName ? classByName.get(info.baseName) : undefined; + const baseCtor = base?.constructors[0]; + if (!baseCtor) return false; + return (baseCtor.children ?? []).some( + (child) => + child.type === 'param' && + !Object.hasOwn(child.props ?? {}, 'value') && + !Object.hasOwn(child.props ?? {}, 'default'), + ); +} + function analyzeConstructorStatements( statements: readonly IRNode[], initialState: ConstructorSuperState, diff --git a/packages/python/src/generators/data.ts b/packages/python/src/generators/data.ts index 15513776..ed97d260 100644 --- a/packages/python/src/generators/data.ts +++ b/packages/python/src/generators/data.ts @@ -4,7 +4,15 @@ */ import type { IRNode } from '@kernlang/core'; -import { emitIdentifier, getFirstChild, getProps, handlerCode, mapSemanticType, propsOf } from '@kernlang/core'; +import { + emitIdentifier, + getFirstChild, + getProps, + handlerCode, + hasDirectSuperCtorCall, + mapSemanticType, + propsOf, +} from '@kernlang/core'; import { emitNativeKernBodyPythonWithImports } from '../codegen-body-python.js'; import { buildPythonParamList, firstChild, kids, p, parseLegacyParamParts } from '../codegen-helpers.js'; import { mapTsTypeToPython, toSnakeCase } from '../type-map.js'; @@ -618,19 +626,26 @@ export function generatePythonClass(node: IRNode): string[] { if (ctor) { body.push(` def __init__(${buildPythonParamList(ctor, { selfPrefix: true })}):`); const ctorLines = methodBodyLinesPython(ctor, { classBody: true, isConstructor: true }); - // Field initializers run AFTER super().__init__() (TS field-init-after-super - // order), so inject defaults right after the super call when present, else at - // the top of the constructor body. - const superIdx = ctorLines.findIndex((line) => line.includes('super().__init__')); - if (superIdx >= 0) { - body.push(...ctorLines.slice(0, superIdx + 1), ...defaultLines, ...ctorLines.slice(superIdx + 1)); + // Whether the constructor already calls super(...) is decided by the canonical + // structural predicate (shared with the validator, runtime, and TS target) — + // NEVER by scanning emitted text, so a `super().__init__` substring inside a + // string literal or comment can't change codegen. Field initializers run AFTER + // super (TS field-init-after-super order). + if (hasDirectSuperCtorCall(ctor)) { + // Explicit-super mode: position the field defaults right after the emitted + // super line. The predicate already proved a direct super exists, so this + // locates the real call (a native super(...) lowers to `super().__init__(...)`); + // the index is used only for placement, not the inject decision. + const superIdx = ctorLines.findIndex((line) => line.includes('super().__init__')); + const splice = superIdx >= 0 ? superIdx + 1 : 0; + body.push(...ctorLines.slice(0, splice), ...defaultLines, ...ctorLines.slice(splice)); } else if (base) { - // KERN constructor semantic: a DERIVED constructor that omits super() - // gets an implicit no-arg base-init injected FIRST, then field defaults, - // then the body — so `this`/field access is always legal (TS requires - // super-before-this; Python is lax, but we emit identically for parity). - // The author writes explicit `super(args)` only to pass args up; when the - // base constructor REQUIRES args, that's a validator concern (a follow-up). + // Implicit-super mode (KERN Option C): a DERIVED constructor that omits + // super() gets an implicit no-arg base-init injected FIRST, then field + // defaults, then the body — so `this`/field access is always legal (TS + // requires super-before-this; Python is lax, but we emit identically for + // parity). The author writes explicit `super(args)` only to pass args up; + // when the base constructor REQUIRES args, the validator flags it. body.push(' super().__init__()', ...defaultLines, ...ctorLines); } else { body.push(...defaultLines, ...ctorLines); From 7ceb28615b22d9af9701328f151cac9e256145bd Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 16:33:58 +0200 Subject: [PATCH 55/63] test(class): cover Option-C super reconciliation + canonical predicate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - constructor-super.test.ts: pins hasDirectSuperCtorCall on the canonical cases (omit, straight-line super, super-in-if, lambda-only, super.member, double) so all four layers can't drift from the single source of truth. - class-semantics: flips the omitted-super and lambda-only cases to legal; the super.kind() (member, no ctor super) case drops to implicit-mode legal (this-before-super 2 -> 1); adds coverage for the new class-constructor-implicit-super-needs-args arity diagnostic and that an explicit super(args) satisfies an arg-requiring base. - core-runtime: a lambda-only super now attempts implicit base init (fails an arg-requiring base with "missing required argument" — proving the lambda super isn't counted); adds the positive implicit-base-init case mirroring the class-conformance Box/Base fixture inside the interpreter. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/core/tests/class-semantics.test.ts | 72 ++++++++++++-- packages/core/tests/constructor-super.test.ts | 96 +++++++++++++++++++ packages/core/tests/core-runtime.test.ts | 36 ++++++- 3 files changed, 194 insertions(+), 10 deletions(-) create mode 100644 packages/core/tests/constructor-super.test.ts diff --git a/packages/core/tests/class-semantics.test.ts b/packages/core/tests/class-semantics.test.ts index 81e27cce..714fe34e 100644 --- a/packages/core/tests/class-semantics.test.ts +++ b/packages/core/tests/class-semantics.test.ts @@ -543,8 +543,11 @@ describe('semantic-validator — class object model', () => { expect(violations.map((violation) => violation.rule)).toContain('class-member-conflict'); }); - test('reports derived constructors that omit super', () => { - const violations = violationsFor( + test('accepts a derived constructor that omits super (implicit base init)', () => { + // KERN Option C: a derived constructor may omit super(); KERN injects an + // implicit no-arg base init at entry, so omitting it is legal when the base + // constructor needs no arguments. No missing-super / needs-args violation. + const rules = rulesFor( [ 'class name=Entity', 'class name=User extends=Entity', @@ -554,11 +557,15 @@ describe('semantic-validator — class object model', () => { ].join('\n'), ); - expect(violations.map((violation) => violation.rule)).toContain('class-constructor-missing-super'); + expect(rules).not.toContain('class-constructor-missing-super'); + expect(rules).not.toContain('class-constructor-implicit-super-needs-args'); }); - test('does not accept delayed super calls inside constructor lambdas', () => { - const violations = violationsFor( + test('treats a lambda-only super as no effective super (implicit base init)', () => { + // A super() that only appears inside a lambda never runs at construction, so + // it is not an effective super call. Under Option C the constructor falls + // into implicit mode and is legal (base needs no args) — no missing-super. + const rules = rulesFor( [ 'class name=Entity', 'class name=User extends=Entity', @@ -568,10 +575,59 @@ describe('semantic-validator — class object model', () => { ].join('\n'), ); - expect(violations.map((violation) => violation.rule)).toContain('class-constructor-missing-super'); + expect(rules).not.toContain('class-constructor-missing-super'); + }); + + test('flags an omitted super when the base constructor requires arguments', () => { + // Implicit no-arg super() cannot satisfy a base whose constructor needs `id`, + // so KERN raises the arity-specific diagnostic (NOT the retired missing-super). + const rules = rulesFor( + [ + 'class name=Entity', + ' field name=id type=string', + ' constructor', + ' param name=id type=string', + ' handler lang=kern', + ' assign target="this.id" value="id"', + 'class name=User extends=Entity', + ' field name=label type=string', + ' constructor', + ' param name=label type=string', + ' handler lang=kern', + ' assign target="this.label" value="label"', + ].join('\n'), + ); + + expect(rules).toContain('class-constructor-implicit-super-needs-args'); + expect(rules).not.toContain('class-constructor-missing-super'); + }); + + test('accepts an explicit super(args) when the base constructor requires arguments', () => { + const rules = rulesFor( + [ + 'class name=Entity', + ' field name=id type=string', + ' constructor', + ' param name=id type=string', + ' handler lang=kern', + ' assign target="this.id" value="id"', + 'class name=User extends=Entity', + ' constructor', + ' param name=id type=string', + ' handler lang=kern', + ' do value="super(id)"', + ].join('\n'), + ); + + expect(rules).not.toContain('class-constructor-implicit-super-needs-args'); + expect(rules).not.toContain('class-constructor-missing-super'); }); - test('reports this and super member access before constructor super', () => { + test('reports this access before an explicit super, but allows super.member in implicit mode', () => { + // User writes an explicit super() AFTER touching `this` -> this-before-super. + // Admin only reads super.kind() (a super MEMBER call, not a super constructor + // call), so it has no explicit super and runs in implicit mode, where base + // init happens at entry and super.kind() is legal. Only User is flagged. const rules = rulesFor( [ 'class name=Entity', @@ -590,7 +646,7 @@ describe('semantic-validator — class object model', () => { ].join('\n'), ); - expect(rules.filter((rule) => rule === 'class-constructor-this-before-super')).toHaveLength(2); + expect(rules.filter((rule) => rule === 'class-constructor-this-before-super')).toHaveLength(1); }); test('reports double constructor super calls', () => { diff --git a/packages/core/tests/constructor-super.test.ts b/packages/core/tests/constructor-super.test.ts new file mode 100644 index 00000000..627cf72b --- /dev/null +++ b/packages/core/tests/constructor-super.test.ts @@ -0,0 +1,96 @@ +import { hasDirectSuperCtorCall } from '../src/constructor-super.js'; +import { parse } from '../src/parser.js'; +import type { IRNode } from '../src/types.js'; + +// Extract the (single) class's constructor node from a parsed KERN module. The +// canonical predicate operates on a constructor IR node, so these tests pin the +// ONE classification every layer (validator, runtime, TS + Python codegen) relies +// on — if this drifts, all four drift together, which is exactly what the shared +// predicate exists to prevent. +function ctorOf(source: string): IRNode { + const root = parse(source); + const cls = root.type === 'class' ? root : (root.children ?? []).find((c) => c.type === 'class'); + if (!cls) throw new Error('test fixture parsed no class'); + const ctor = (cls.children ?? []).find((c) => c.type === 'constructor'); + if (!ctor) throw new Error('test fixture parsed no constructor'); + return ctor; +} + +describe('hasDirectSuperCtorCall — canonical constructor-super predicate', () => { + test('false when the constructor omits super entirely (implicit mode)', () => { + const ctor = ctorOf( + [ + 'class name=Box extends=Base', + ' constructor', + ' param name=v type=number', + ' handler lang=kern', + ' assign target="this.x" value="v"', + ].join('\n'), + ); + expect(hasDirectSuperCtorCall(ctor)).toBe(false); + }); + + test('true for a straight-line direct super(...) call (explicit mode)', () => { + const ctor = ctorOf( + [ + 'class name=Dog extends=Animal', + ' constructor', + ' param name=name type=string', + ' handler lang=kern', + ' do value="super(name)"', + ].join('\n'), + ); + expect(hasDirectSuperCtorCall(ctor)).toBe(true); + }); + + test('true for a super() inside an if branch — presence, not satisfaction', () => { + const ctor = ctorOf( + [ + 'class name=User extends=Entity', + ' constructor', + ' param name=ready type=boolean', + ' handler lang=kern', + ' if cond=ready', + ' do value="super()"', + ].join('\n'), + ); + expect(hasDirectSuperCtorCall(ctor)).toBe(true); + }); + + test('false for a super() that only appears inside a lambda (never runs at construction)', () => { + const ctor = ctorOf( + [ + 'class name=User extends=Entity', + ' constructor', + ' handler lang=kern', + ' do value="(() => super())"', + ].join('\n'), + ); + expect(hasDirectSuperCtorCall(ctor)).toBe(false); + }); + + test('false for a super MEMBER call (super.method), which never initializes the base', () => { + const ctor = ctorOf( + [ + 'class name=Admin extends=Entity', + ' constructor', + ' handler lang=kern', + ' return value="super.kind()"', + ].join('\n'), + ); + expect(hasDirectSuperCtorCall(ctor)).toBe(false); + }); + + test('true for a double super (both calls are direct, structurally present)', () => { + const ctor = ctorOf( + [ + 'class name=User extends=Entity', + ' constructor', + ' handler lang=kern', + ' do value="super()"', + ' do value="super()"', + ].join('\n'), + ); + expect(hasDirectSuperCtorCall(ctor)).toBe(true); + }); +}); diff --git a/packages/core/tests/core-runtime.test.ts b/packages/core/tests/core-runtime.test.ts index 918e6e87..e7dc2453 100644 --- a/packages/core/tests/core-runtime.test.ts +++ b/packages/core/tests/core-runtime.test.ts @@ -1529,7 +1529,12 @@ describe('KERN core runtime statements', () => { expect(toHostValue(evalCoreExpression('setChain()', env))).toBe(10); }); - test('does not count delayed lambda super calls as constructor initialization', () => { + test('a lambda-only super is not effective: implicit base init runs and fails an arg-requiring base', () => { + // The only super(id) sits inside a lambda, so it never runs at construction. + // Under Option C the derived constructor is in implicit mode: KERN attempts a + // no-arg base init FIRST, which fails because Entity's constructor requires + // `id`. The 'missing required argument: id' error (not a lambda error) proves + // the lambda super was NOT counted AND implicit base init was attempted. const root = parse( [ 'class name=Entity', @@ -1547,7 +1552,34 @@ describe('KERN core runtime statements', () => { const env = createCoreRuntimeEnv(); runCoreRuntime(root, env); - expect(() => evalCoreExpression('new User("u1")', env)).toThrow('lambda expressions are not supported'); + expect(() => evalCoreExpression('new User("u1")', env)).toThrow('missing required argument: id'); + }); + + test('derived constructor that omits super gets implicit base init (Option C, parity with codegen)', () => { + // Mirrors the class-conformance Box/Base fixture inside the interpreter: Box's + // constructor touches this.x but never calls super(). KERN injects base init + // FIRST (so Base.tag=1 default is present), then derived field defaults, then + // the body — get() = x(7) + tag(1) = 8. Proves the runtime now agrees with + // generated TS/Python instead of throwing "must call super(...)". + const root = parse( + [ + 'class name=Base', + ' field name=tag type=number value={{ 1 }}', + 'class name=Box extends=Base', + ' field name=x type=number value={{ 0 }}', + ' constructor', + ' param name=v type=number', + ' handler', + ' assign target="this.x" value="v"', + ' method name=get returns=number', + ' handler', + ' return value="this.x + this.tag"', + ].join('\n'), + ); + const env = createCoreRuntimeEnv(); + runCoreRuntime(root, env); + + expect(toHostValue(evalCoreExpression('new Box(7).get()', env))).toBe(8); }); }); From f0d9d1c3c65e034f9b9c9616c8278f3e4b6e9950 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 16:45:19 +0200 Subject: [PATCH 56/63] fix(class): resolve effective base ctor transitively for implicit-super arity check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review (codex, confidence 1.0, verified repro) found baseConstructorRequiresArgs only inspected the immediate base's own constructor. For `C extends B extends A` where B has no constructor and A requires arguments, the validator accepted C's omitted super() while the runtime threw `A.constructor missing required argument` — re-opening the exact validator/runtime split this reconciliation set out to close (initializeClassLayer forwards [] through constructor-less bases to A). Walk up the inheritance chain through constructor-less bases to the first ancestor that actually declares a constructor — the one implicit init reaches with no args — and test ITS required params (with a cycle guard). This matches the runtime's forwarding precisely. Adds regression coverage for the transitive arg-requiring base (flagged) and the transitive no-arg base (accepted). Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/core/src/semantic-validator.ts | 45 +++++++++++++------- packages/core/tests/class-semantics.test.ts | 46 +++++++++++++++++++++ 2 files changed, 75 insertions(+), 16 deletions(-) diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index ecb077b0..95a39bf4 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -3946,24 +3946,37 @@ function validateDerivedConstructorSuper( } /** - * True when the base class's own constructor declares at least one required - * (no-default) parameter — i.e. an implicit no-arg `super()` would fail at - * runtime. Mirrors the runtime's required-arg rule (a param is required unless it - * carries a `value`/`default`). A base with no own constructor, or an unresolved - * base, is treated as requiring no args (the implicit super forwards safely); - * required args inherited transitively through a constructor-less base are a - * deliberate follow-up, not caught here. + * True when the EFFECTIVE base constructor reached by an implicit no-arg + * `super()` declares at least one required (no-default) parameter — i.e. that + * implicit init would fail at runtime. The effective base ctor is found by walking + * up the inheritance chain through constructor-less bases, exactly as the runtime + * does: `initializeClassLayer` forwards `[]` through a base that has no + * constructor (`base && !ctor`) to ITS base, so the first ancestor that actually + * declares a constructor is the one invoked with no args. Checking only the + * immediate base would let `C extends B extends A` (B ctor-less, A arg-requiring) + * pass validation yet throw at runtime — re-creating the validator/runtime split + * this reconciliation closes. Mirrors the runtime's required-arg rule (a param is + * required unless it carries a `value`/`default`); a chain with no constructor + * anywhere (or an unresolved base) requires no args. */ function baseConstructorRequiresArgs(info: ClassInfo, classByName: ReadonlyMap): boolean { - const base = info.baseName ? classByName.get(info.baseName) : undefined; - const baseCtor = base?.constructors[0]; - if (!baseCtor) return false; - return (baseCtor.children ?? []).some( - (child) => - child.type === 'param' && - !Object.hasOwn(child.props ?? {}, 'value') && - !Object.hasOwn(child.props ?? {}, 'default'), - ); + const seen = new Set(); + let current = info.baseName ? classByName.get(info.baseName) : undefined; + while (current && !seen.has(current.name)) { + seen.add(current.name); + const ctor = current.constructors[0]; + if (ctor) { + return (ctor.children ?? []).some( + (child) => + child.type === 'param' && + !Object.hasOwn(child.props ?? {}, 'value') && + !Object.hasOwn(child.props ?? {}, 'default'), + ); + } + // Constructor-less base: the runtime forwards [] to its base — keep walking. + current = current.baseName ? classByName.get(current.baseName) : undefined; + } + return false; } function analyzeConstructorStatements( diff --git a/packages/core/tests/class-semantics.test.ts b/packages/core/tests/class-semantics.test.ts index 714fe34e..c3143004 100644 --- a/packages/core/tests/class-semantics.test.ts +++ b/packages/core/tests/class-semantics.test.ts @@ -623,6 +623,52 @@ describe('semantic-validator — class object model', () => { expect(rules).not.toContain('class-constructor-missing-super'); }); + test('flags an omitted super when an arg-requiring base is reached transitively through a ctor-less base', () => { + // C extends B extends A: B has no constructor, so an implicit super() in C + // forwards [] through B to A — which requires `id`. The validator must walk + // through the constructor-less B to A (matching the runtime), or it would pass + // here while the runtime throws, re-opening the split this reconciliation closes. + const rules = rulesFor( + [ + 'class name=A', + ' field name=id type=string', + ' constructor', + ' param name=id type=string', + ' handler lang=kern', + ' assign target="this.id" value="id"', + 'class name=B extends=A', + 'class name=C extends=B', + ' field name=label type=string', + ' constructor', + ' param name=label type=string', + ' handler lang=kern', + ' assign target="this.label" value="label"', + ].join('\n'), + ); + + expect(rules).toContain('class-constructor-implicit-super-needs-args'); + }); + + test('accepts an omitted super when the transitively-reached base needs no args', () => { + // Same ctor-less intermediate, but the effective base A takes no required args, + // so an implicit no-arg super() succeeds end-to-end — no diagnostic. + const rules = rulesFor( + [ + 'class name=A', + ' field name=tag type=string value="base"', + 'class name=B extends=A', + 'class name=C extends=B', + ' field name=label type=string', + ' constructor', + ' param name=label type=string', + ' handler lang=kern', + ' assign target="this.label" value="label"', + ].join('\n'), + ); + + expect(rules).not.toContain('class-constructor-implicit-super-needs-args'); + }); + test('reports this access before an explicit super, but allows super.member in implicit mode', () => { // User writes an explicit super() AFTER touching `this` -> this-before-super. // Admin only reads super.kind() (a super MEMBER call, not a super constructor From 62855ebfa9c38cb65ed0578a1cbe7478a4b844a2 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 16:49:38 +0200 Subject: [PATCH 57/63] fix(class): name the actual arg-requiring base in implicit-super diagnostic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Re-review (codex, confidence 1.0) noted the transitive arity check rejected the right constructor but the message still named the immediate base. For `C extends B extends A` (B constructor-less, A requires args) it read "base class 'B' has a constructor that requires arguments" — but B has no constructor; A does. argRequiringEffectiveBaseName now returns the name of the class whose constructor is actually reached and requires args (undefined when implicit init succeeds), and the diagnostic names that class. Test asserts the transitive message names 'A', not 'B'. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/core/src/semantic-validator.ts | 49 ++++++++++++--------- packages/core/tests/class-semantics.test.ts | 9 +++- 2 files changed, 36 insertions(+), 22 deletions(-) diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index 95a39bf4..91d56a44 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -3824,10 +3824,10 @@ function validateClassSuperUsage( violations: SemanticViolation[], ): void { const hasBase = Boolean(info.baseName); - const baseRequiresArgs = hasBase && baseConstructorRequiresArgs(info, classByName); + const argRequiringBaseName = hasBase ? argRequiringEffectiveBaseName(info, classByName) : undefined; for (const ctor of info.constructors) { if (hasBase) { - validateDerivedConstructorSuper(info, ctor, baseRequiresArgs, violations); + validateDerivedConstructorSuper(info, ctor, argRequiringBaseName, violations); } if (!hasBase && nodeBodyUsesSuper(ctor)) { violations.push({ @@ -3917,15 +3917,18 @@ function validateDerivedConstructorDiscipline(info: ClassInfo, ctor: IRNode, vio function validateDerivedConstructorSuper( info: ClassInfo, ctor: IRNode, - baseRequiresArgs: boolean, + argRequiringBaseName: string | undefined, violations: SemanticViolation[], ): void { if (!hasDirectSuperCtorCall(ctor)) { - if (baseRequiresArgs) { + if (argRequiringBaseName) { + // Name the class whose constructor actually requires args — which may be a + // transitive ancestor reached through constructor-less bases, not the + // immediate base — so the diagnostic points the author at the real source. violations.push({ rule: 'class-constructor-implicit-super-needs-args', nodeType: 'constructor', - message: `Class '${info.name}' omits \`super(...)\` but base class '${info.baseName}' has a constructor that requires arguments. Call \`super(...)\` explicitly to pass them.`, + message: `Class '${info.name}' omits \`super(...)\` but base class '${argRequiringBaseName}' has a constructor that requires arguments. Call \`super(...)\` explicitly to pass them.`, line: ctor.loc?.line, col: ctor.loc?.col, }); @@ -3946,37 +3949,43 @@ function validateDerivedConstructorSuper( } /** - * True when the EFFECTIVE base constructor reached by an implicit no-arg - * `super()` declares at least one required (no-default) parameter — i.e. that - * implicit init would fail at runtime. The effective base ctor is found by walking - * up the inheritance chain through constructor-less bases, exactly as the runtime - * does: `initializeClassLayer` forwards `[]` through a base that has no - * constructor (`base && !ctor`) to ITS base, so the first ancestor that actually - * declares a constructor is the one invoked with no args. Checking only the - * immediate base would let `C extends B extends A` (B ctor-less, A arg-requiring) - * pass validation yet throw at runtime — re-creating the validator/runtime split - * this reconciliation closes. Mirrors the runtime's required-arg rule (a param is - * required unless it carries a `value`/`default`); a chain with no constructor - * anywhere (or an unresolved base) requires no args. + * The name of the EFFECTIVE base class whose constructor an implicit no-arg + * `super()` would reach and fail to satisfy — i.e. the first ancestor that + * declares a constructor with a required (no-default) parameter — or `undefined` + * when implicit init succeeds. The effective base ctor is found by walking up the + * inheritance chain through constructor-less bases, exactly as the runtime does: + * `initializeClassLayer` forwards `[]` through a base that has no constructor + * (`base && !ctor`) to ITS base, so the first ancestor that actually declares a + * constructor is the one invoked with no args. Checking only the immediate base + * would let `C extends B extends A` (B ctor-less, A arg-requiring) pass validation + * yet throw at runtime — re-creating the validator/runtime split this + * reconciliation closes. Returning the name (not a bool) lets the diagnostic point + * at the real source rather than the immediate base. Mirrors the runtime's + * required-arg rule (a param is required unless it carries a `value`/`default`); a + * chain with no constructor anywhere (or an unresolved base) needs no args. */ -function baseConstructorRequiresArgs(info: ClassInfo, classByName: ReadonlyMap): boolean { +function argRequiringEffectiveBaseName( + info: ClassInfo, + classByName: ReadonlyMap, +): string | undefined { const seen = new Set(); let current = info.baseName ? classByName.get(info.baseName) : undefined; while (current && !seen.has(current.name)) { seen.add(current.name); const ctor = current.constructors[0]; if (ctor) { - return (ctor.children ?? []).some( + const requiresArgs = (ctor.children ?? []).some( (child) => child.type === 'param' && !Object.hasOwn(child.props ?? {}, 'value') && !Object.hasOwn(child.props ?? {}, 'default'), ); + return requiresArgs ? current.name : undefined; } // Constructor-less base: the runtime forwards [] to its base — keep walking. current = current.baseName ? classByName.get(current.baseName) : undefined; } - return false; + return undefined; } function analyzeConstructorStatements( diff --git a/packages/core/tests/class-semantics.test.ts b/packages/core/tests/class-semantics.test.ts index c3143004..3e61f943 100644 --- a/packages/core/tests/class-semantics.test.ts +++ b/packages/core/tests/class-semantics.test.ts @@ -628,7 +628,7 @@ describe('semantic-validator — class object model', () => { // forwards [] through B to A — which requires `id`. The validator must walk // through the constructor-less B to A (matching the runtime), or it would pass // here while the runtime throws, re-opening the split this reconciliation closes. - const rules = rulesFor( + const violations = violationsFor( [ 'class name=A', ' field name=id type=string', @@ -646,7 +646,12 @@ describe('semantic-validator — class object model', () => { ].join('\n'), ); - expect(rules).toContain('class-constructor-implicit-super-needs-args'); + const needsArgs = violations.find((v) => v.rule === 'class-constructor-implicit-super-needs-args'); + expect(needsArgs).toBeDefined(); + // The message must name the class that actually has the arg-requiring ctor (A), + // not the immediate constructor-less base (B). + expect(needsArgs?.message).toContain("base class 'A'"); + expect(needsArgs?.message).not.toContain("base class 'B'"); }); test('accepts an omitted super when the transitively-reached base needs no args', () => { From 844a7dfa8a489bd315f0bc276427f012c00acaaf Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 17:34:55 +0200 Subject: [PATCH 58/63] feat(class): enforce KERN's abstract-class contract in the validator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KERN now owns its abstract contract at the VALIDATOR layer (codegen/runtime stay the loud backstop), so TS and Python reject the same programs by construction — the validator runs before codegen, making enforcement parity-free. Closes the soundness/asymmetry gap where `new Abstract()` and unimplemented abstract members only failed at runtime (and only tsc, not Python, caught the direct instantiation). Two rules: - class-abstract-instantiation: reject `new (...)` anywhere, including the abstract class's own static factory (matches TS). Module-wide pass over BODY_EXPRESSION_PROPS; resolves the constructor target by descending the postfix chain to its head ident (so `new Shape().area()` is attributed to Shape per JS `new` precedence) and skips qualified (`pkg.Shape`), non-ident, and unresolved callees conservatively. - class-abstract-member-unimplemented: a CONCRETE class must implement every inherited abstract member. Driven by a dedicated collectAbstractObligations lineage walker keyed by (static, name, kind) — NOT effectiveClassMemberFacts, which collapses members by name+static and would let a getter-only override silently erase the sibling abstract setter (or a same-name different-kind member erase an abstract method). Getter/setter are independent obligations; a same-name different-kind collision stays owned by class-member-conflict. Multi-level abstract chains require the override only at the concrete leaf; abstract subclasses may carry/inherit abstract members. Runtime `new Abstract()` guard deferred (validator is the gate; the codegen raise/throw stub remains the backstop) — panel-unanimous. Design via 6-engine agon brainstorm; the effectiveClassMemberFacts soundness hole was flagged independently by codex/kimi/minimax and verified in code. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/core/src/semantic-validator.ts | 199 ++++++++++++++++++++++++ 1 file changed, 199 insertions(+) diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index 91d56a44..eed6ee74 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -2822,11 +2822,13 @@ function validateClassGraphRoots(roots: readonly IRNode[], violations: SemanticV validateClassConstructors(info, violations); validateClassMemberConflicts(info, violations); validateClassSuperUsage(info, classByName, violations); + validateClassAbstractMembers(info, classByName, violations); } validateClassInheritanceCycles(classes, classByName, violations); validateClassOverrides(classes, classByName, violations); validateClassShapeUsage(classes, classByName, violations); + validateAbstractInstantiations(roots, classByName, visibleNamesByRoot, violations); } function collectClassInfos(root: IRNode, rootIndex = 0): ClassInfo[] { @@ -3355,6 +3357,203 @@ function classInfoParticipatesInCycle(info: ClassInfo, classByName: ReadonlyMap< return false; } +// ── Abstract-class contract enforcement ────────────────────────────────────── +// KERN owns its abstract contract at the VALIDATOR layer (codegen/runtime stay +// the loud backstop): a concrete class must implement every abstract member it +// inherits, and an abstract class may never be instantiated. The validator runs +// before codegen, so enforcement is parity-free — TS and Python reject the same +// programs by construction. +// +// PR3 convention: an "abstract member" is a handler-less method/getter/setter +// declared under an `abstract=true` class. Fields always carry a value, so they +// are never abstract. + +function isAbstractClassNode(node: IRNode): boolean { + const raw = node.props?.abstract; + return raw === true || raw === 'true'; +} + +function memberHasHandler(node: IRNode): boolean { + return (node.children ?? []).some((child) => child.type === 'handler'); +} + +interface AbstractObligation { + readonly name: string; + readonly kind: ClassMemberKind; + readonly static: boolean; + // The nearest abstract ancestor that left this member unimplemented. + readonly declaredIn: string; +} + +function abstractObligationKey(member: { + readonly static: boolean; + readonly name: string; + readonly kind: ClassMemberKind; +}): string { + return `${member.static ? 'static' : 'instance'}:${member.name}:${member.kind}`; +} + +// Walk the lineage base→derived and return the abstract members still owed by +// `info`. Keyed by (static, name, kind) so a getter override never clears the +// sibling setter obligation, and a same-name different-kind member never erases +// an inherited abstract member (the exact soundness hole that drove this off +// `effectiveClassMemberFacts`, which collapses members by name+static only). +function collectAbstractObligations( + info: ClassInfo, + classByName: ReadonlyMap, + seen: ReadonlySet = new Set(), +): AbstractObligation[] { + // Inheritance cycles carry their own primary diagnostic; do not also walk a + // cyclic chain here (it would never terminate cleanly nor add signal). + if (seen.has(info.name) || classInfoParticipatesInCycle(info, classByName)) return []; + const nextSeen = new Set(seen); + nextSeen.add(info.name); + const obligations = new Map(); + const base = info.baseName ? classByName.get(info.baseName) : undefined; + if (base) { + for (const obligation of collectAbstractObligations(base, classByName, nextSeen)) { + obligations.set(abstractObligationKey(obligation), obligation); + } + } + const ownIsAbstract = isAbstractClassNode(info.node); + for (const member of info.members) { + if (member.kind === 'field') continue; // fields are never abstract + const key = abstractObligationKey(member); + if (memberHasHandler(member.node)) { + // A concrete definition for this exact (static,name,kind) satisfies the + // obligation — same-kind only. + obligations.delete(key); + } else if (ownIsAbstract) { + // Handler-less member under an abstract owner declares an obligation. + obligations.set(key, { + name: member.name, + kind: member.kind, + static: member.static, + declaredIn: info.name, + }); + } + // A handler-less member under a CONCRETE owner neither satisfies nor + // declares: any inherited obligation stands and is flagged below. + } + return [...obligations.values()].sort((a, b) => abstractObligationKey(a).localeCompare(abstractObligationKey(b))); +} + +function validateClassAbstractMembers( + info: ClassInfo, + classByName: ReadonlyMap, + violations: SemanticViolation[], +): void { + // Abstract classes are allowed to carry (and inherit) abstract members. + if (isAbstractClassNode(info.node)) return; + for (const obligation of collectAbstractObligations(info, classByName)) { + violations.push({ + rule: 'class-abstract-member-unimplemented', + nodeType: info.node.type, + message: `Concrete class '${info.name}' must implement abstract ${obligation.kind} '${obligation.name}' inherited from '${obligation.declaredIn}'.`, + line: info.node.loc?.line, + col: info.node.loc?.col, + }); + } +} + +// Resolve the class a `new` expression constructs. KERN parses `new` greedily +// (the argument is a full postfix chain), and codegen prefixes `new ` to the +// emitted chain, so KERN follows JS `new` precedence: +// new Shape -> Shape +// new Shape() -> Shape +// new Shape().area() -> Shape (new binds to Shape(); `.area()` is after) +// new pkg.Shape() -> pkg.Shape (qualified) -> not a bare local class, skip +// new makeShape()() -> makeShape (head ident; not a class -> skipped on lookup) +// We descend the spine to the head ident and skip qualified constructors (a head +// reached as a member's object, e.g. `pkg.Shape`). +function newExpressionClassName(argument: ValueIR): string | undefined { + let node: ValueIR = argument; + let edge: 'root' | 'callee' | 'object' = 'root'; + while (true) { + switch (node.kind) { + case 'ident': + // A member-object head (`pkg.Shape`) is a qualified constructor; every + // other head (root, or a called ident) is a bare construction target. + return edge === 'object' ? undefined : node.name; + case 'call': + node = node.callee; + edge = 'callee'; + continue; + case 'member': + node = node.object; + edge = 'object'; + continue; + case 'index': + node = node.object; + edge = 'object'; + continue; + case 'nonNull': + node = node.expression; + continue; + default: + return undefined; // dynamic / non-resolvable constructor + } + } +} + +// Module-wide pass: reject `new (...)` anywhere — including inside +// the abstract class's own static factory (KERN matches TS: abstract is not +// self-instantiable). Conservative by design — non-ident callees, names not +// resolving to a visible local class, and (consistent with every other +// class-name resolution in this validator) names rebound by a local binding are +// not pursued; the validator does not track lexical shadowing for any class +// reference, so abstract instantiation follows the same name+visibility rule. +function validateAbstractInstantiations( + roots: readonly IRNode[], + classByName: ReadonlyMap, + visibleNamesByRoot: readonly ReadonlySet[], + violations: SemanticViolation[], +): void { + roots.forEach((root, rootIndex) => { + const visible = visibleNamesByRoot[rootIndex]; + walkSemanticTree(root, (node) => { + for (const prop of BODY_EXPRESSION_PROPS) { + const text = expressionPropText(node.props?.[prop]); + if (!text) continue; + let value: ValueIR; + try { + value = parseExpression(text); + } catch { + continue; + } + collectAbstractInstantiations(value, node, visible, classByName, violations); + } + }); + }); +} + +function collectAbstractInstantiations( + value: ValueIR, + node: IRNode, + visible: ReadonlySet | undefined, + classByName: ReadonlyMap, + violations: SemanticViolation[], +): void { + if (value.kind === 'new') { + const name = newExpressionClassName(value.argument); + if (name && (!visible || visible.has(name))) { + const target = classByName.get(name); + if (target && isAbstractClassNode(target.node)) { + violations.push({ + rule: 'class-abstract-instantiation', + nodeType: node.type, + message: `Cannot instantiate abstract class '${name}'.`, + line: node.loc?.line, + col: node.loc?.col, + }); + } + } + } + for (const child of valueIRChildren(value)) { + collectAbstractInstantiations(child, node, visible, classByName, violations); + } +} + function collectClassOverrideFacts( classes: readonly ClassInfo[], classByName: ReadonlyMap, From 763aa870d4cd50fe9fd6545301905bc9cdb6e2f9 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 17:34:56 +0200 Subject: [PATCH 59/63] test(class): cover abstract-class contract enforcement Validator unit tests for both abstract rules: - instantiation: direct `new Abstract()` rejected; concrete subclass accepted; rejected inside the abstract class's own static factory; concrete/unresolved `new` not flagged. - concrete-must-override: missing override rejected (message names class + kind + member + declaring ancestor); full override accepted; abstract subclass may leave it unimplemented; multi-level chain requires it only at the concrete leaf (names the original abstract declarer). - getter/setter pair: overriding only the getter still flags the abstract setter (the soundness case); overriding both is accepted. - same-name different-kind (abstract method vs field) does not satisfy the method obligation. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/core/tests/class-semantics.test.ts | 177 ++++++++++++++++++++ 1 file changed, 177 insertions(+) diff --git a/packages/core/tests/class-semantics.test.ts b/packages/core/tests/class-semantics.test.ts index 3e61f943..5c910e10 100644 --- a/packages/core/tests/class-semantics.test.ts +++ b/packages/core/tests/class-semantics.test.ts @@ -958,3 +958,180 @@ describe('semantic-validator — class object model', () => { expect(rules).toContain('class-inheritance-cycle'); }); }); + +describe('semantic-validator — abstract-class contract', () => { + // ── class-abstract-instantiation: `new ()` is rejected ────── + test('rejects instantiating an abstract class directly', () => { + const violations = violationsFor( + [ + 'class name=Shape abstract=true', + ' method name=area returns=number', + 'fn name=probe returns=number', + ' handler lang=kern', + ' return value="new Shape().area()"', + ].join('\n'), + ); + const violation = violations.find((candidate) => candidate.rule === 'class-abstract-instantiation'); + expect(violation?.message).toContain("Cannot instantiate abstract class 'Shape'"); + }); + + test('accepts instantiating a concrete subclass that overrides the abstract member', () => { + const rules = rulesFor( + [ + 'class name=Shape abstract=true', + ' method name=area returns=number', + 'class name=Square extends=Shape', + ' method name=area returns=number', + ' handler lang=kern', + ' return value=9', + 'fn name=probe returns=number', + ' handler lang=kern', + ' return value="new Square().area()"', + ].join('\n'), + ); + expect(rules).not.toContain('class-abstract-instantiation'); + expect(rules).not.toContain('class-abstract-member-unimplemented'); + }); + + test('rejects abstract instantiation even inside the abstract class own static factory', () => { + // KERN matches TS: an abstract class is not self-instantiable, anywhere. + const rules = rulesFor( + [ + 'class name=Shape abstract=true', + ' method name=area returns=number', + ' method name=make returns=Shape static=true', + ' handler lang=kern', + ' return value="new Shape()"', + ].join('\n'), + ); + expect(rules).toContain('class-abstract-instantiation'); + }); + + test('does not flag new of a concrete class or an unresolved identifier', () => { + const rules = rulesFor( + [ + 'class name=Widget', + ' method name=run returns=number', + ' handler lang=kern', + ' return value=1', + 'fn name=probe returns=number', + ' handler lang=kern', + ' return value="new Widget().run() + new Unknown().x"', + ].join('\n'), + ); + expect(rules).not.toContain('class-abstract-instantiation'); + }); + + // ── class-abstract-member-unimplemented: concrete must override ──────────── + test('rejects a concrete subclass that leaves an inherited abstract member unimplemented', () => { + const violations = violationsFor( + [ + 'class name=Shape abstract=true', + ' method name=area returns=number', + 'class name=Square extends=Shape', + ' field name=side type=number value={{ 3 }}', + ].join('\n'), + ); + const violation = violations.find((candidate) => candidate.rule === 'class-abstract-member-unimplemented'); + expect(violation?.message).toContain("Concrete class 'Square' must implement abstract method 'area'"); + expect(violation?.message).toContain("inherited from 'Shape'"); + }); + + test('accepts a concrete subclass that overrides every abstract member', () => { + const rules = rulesFor( + [ + 'class name=Shape abstract=true', + ' method name=area returns=number', + 'class name=Square extends=Shape', + ' method name=area returns=number', + ' handler lang=kern', + ' return value=9', + ].join('\n'), + ); + expect(rules).not.toContain('class-abstract-member-unimplemented'); + }); + + test('allows an abstract subclass to leave an inherited abstract member unimplemented', () => { + const rules = rulesFor( + [ + 'class name=Shape abstract=true', + ' method name=area returns=number', + 'class name=Polygon extends=Shape abstract=true', + ' method name=sides returns=number', + ].join('\n'), + ); + expect(rules).not.toContain('class-abstract-member-unimplemented'); + }); + + test('requires the override only at the concrete leaf of a multi-level abstract chain', () => { + // A(abstract area) -> B(abstract, no override) -> C(concrete, no override). + const violations = violationsFor( + [ + 'class name=A abstract=true', + ' method name=area returns=number', + 'class name=B extends=A abstract=true', + 'class name=C extends=B', + ].join('\n'), + ); + const matches = violations.filter((candidate) => candidate.rule === 'class-abstract-member-unimplemented'); + expect(matches).toHaveLength(1); + expect(matches[0]?.message).toContain("Concrete class 'C'"); + expect(matches[0]?.message).toContain("inherited from 'A'"); + }); + + test('requires overriding BOTH an abstract getter and setter pair (no sibling erasure)', () => { + // The soundness case: overriding only the getter must NOT silently satisfy + // the sibling abstract setter (effectiveClassMemberFacts would collapse by + // name+static and drop it — collectAbstractObligations keys by kind). + const violations = violationsFor( + [ + 'class name=Cell abstract=true', + ' getter name=value returns=number', + ' setter name=value', + ' param name=next type=number', + 'class name=IntCell extends=Cell', + ' getter name=value returns=number', + ' handler lang=kern', + ' return value="this._value"', + ].join('\n'), + ); + const matches = violations.filter((candidate) => candidate.rule === 'class-abstract-member-unimplemented'); + expect(matches).toHaveLength(1); + expect(matches[0]?.message).toContain("abstract setter 'value'"); + }); + + test('accepts overriding both members of an abstract getter/setter pair', () => { + const rules = rulesFor( + [ + 'class name=Cell abstract=true', + ' getter name=value returns=number', + ' setter name=value', + ' param name=next type=number', + 'class name=IntCell extends=Cell', + ' getter name=value returns=number', + ' handler lang=kern', + ' return value="this._value"', + ' setter name=value', + ' param name=next type=number', + ' handler lang=kern', + ' assign target="this._value" value="next"', + ].join('\n'), + ); + expect(rules).not.toContain('class-abstract-member-unimplemented'); + }); + + test('a same-name different-kind member does not satisfy an abstract method obligation', () => { + // Base abstract METHOD `area`; subclass declares a FIELD `area`. The method + // obligation stands (kind-specific); the kind collision is owned separately + // by class-member-conflict. + const violations = violationsFor( + [ + 'class name=Shape abstract=true', + ' method name=area returns=number', + 'class name=Square extends=Shape', + ' field name=area type=number value={{ 9 }}', + ].join('\n'), + ); + expect(violations.map((candidate) => candidate.rule)).toContain('class-abstract-member-unimplemented'); + }); +}); From 43f12190e71c4badf154260268912c5ad0fb9578 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 17:47:25 +0200 Subject: [PATCH 60/63] fix(class): scan field/param default= for abstract instantiation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review (codex 0.98, kimi, agy 0.85 — 3-engine convergence) found the class-abstract-instantiation pass scanned only BODY_EXPRESSION_PROPS, which excludes `default`. So `new Abstract()` in a field `default=` or `param default=` initializer was silently un-checked — a real soundness gap, since field initializers treat `value` and `default` equivalently and both lower to runtime code. Verified empirically: `field s default="new Shape()"` was MISSED while `value={{ new Shape() }}` was caught. Scan `default` too, local to this pass (INSTANTIATION_EXPRESSION_PROPS) so the shared super-detection / shape-usage walks are unaffected. Regression test added. Also documented the multi-root visibility-union behavior (codex 0.86): the pass resolves classes across roots via the same union extends/implements already use; all production callers validate a single root, so it is not a false-positive surface in practice. The cycle-short-circuit (minimax 0.75) only affects programs that already carry a class-inheritance-cycle diagnostic; deferred as the cycle is the primary error. Remaining review items were nits or self-disproven. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/core/src/semantic-validator.ts | 14 +++++++++++++- packages/core/tests/class-semantics.test.ts | 14 ++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/packages/core/src/semantic-validator.ts b/packages/core/src/semantic-validator.ts index eed6ee74..ea40ee5b 100644 --- a/packages/core/src/semantic-validator.ts +++ b/packages/core/src/semantic-validator.ts @@ -3496,6 +3496,14 @@ function newExpressionClassName(argument: ValueIR): string | undefined { } } +// `default` is an executable initializer site (field `default=` and +// `param default=`) that is NOT in BODY_EXPRESSION_PROPS — field initializers +// treat `value` and `default` equivalently and both lower to runtime code, so a +// `new Abstract()` in a default must be checked too. Scanned local to this pass +// so the shared super-detection / shape-usage walks are unaffected. A non-`new` +// default just parses to a harmless expression that matches nothing. +const INSTANTIATION_EXPRESSION_PROPS: readonly string[] = [...BODY_EXPRESSION_PROPS, 'default']; + // Module-wide pass: reject `new (...)` anywhere — including inside // the abstract class's own static factory (KERN matches TS: abstract is not // self-instantiable). Conservative by design — non-ident callees, names not @@ -3503,6 +3511,10 @@ function newExpressionClassName(argument: ValueIR): string | undefined { // class-name resolution in this validator) names rebound by a local binding are // not pursued; the validator does not track lexical shadowing for any class // reference, so abstract instantiation follows the same name+visibility rule. +// Multi-root note: visibleNamesByRoot unions every root's declared class names +// (as extends/implements resolution already does), so this resolves classes +// across roots; all production callers validate a single root, so the +// cross-root union is not a false-positive surface in practice. function validateAbstractInstantiations( roots: readonly IRNode[], classByName: ReadonlyMap, @@ -3512,7 +3524,7 @@ function validateAbstractInstantiations( roots.forEach((root, rootIndex) => { const visible = visibleNamesByRoot[rootIndex]; walkSemanticTree(root, (node) => { - for (const prop of BODY_EXPRESSION_PROPS) { + for (const prop of INSTANTIATION_EXPRESSION_PROPS) { const text = expressionPropText(node.props?.[prop]); if (!text) continue; let value: ValueIR; diff --git a/packages/core/tests/class-semantics.test.ts b/packages/core/tests/class-semantics.test.ts index 5c910e10..8fa6fd13 100644 --- a/packages/core/tests/class-semantics.test.ts +++ b/packages/core/tests/class-semantics.test.ts @@ -1007,6 +1007,20 @@ describe('semantic-validator — abstract-class contract', () => { expect(rules).toContain('class-abstract-instantiation'); }); + test('rejects abstract instantiation in a field default= initializer (not just value={{}})', () => { + // Review (codex/kimi/agy): `default=` is an executable initializer site like + // `value`, but is not in BODY_EXPRESSION_PROPS, so it must be scanned too. + const fieldDefault = rulesFor( + [ + 'class name=Shape abstract=true', + ' method name=area returns=number', + 'class name=Holder', + ' field name=s type=Shape default="new Shape()"', + ].join('\n'), + ); + expect(fieldDefault).toContain('class-abstract-instantiation'); + }); + test('does not flag new of a concrete class or an unresolved identifier', () => { const rules = rulesFor( [ From 48ebd2e03f99e4d8bbcb4955826875c5c08ed45b Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 18:43:33 +0200 Subject: [PATCH 61/63] fix(test): align bad-cases super fixture with Option-C semantics The conformance-bad-cases suite asserted a `class-constructor-missing-super` ("constructor does not call super") violation, but Option-C implicit-super (commits f0d9d1c3/62855ebf) intentionally retired that rule: omitting `super(...)` is now legal because KERN injects base init. `class-semantics` unit tests already assert the rule is absent, so the conformance fixture was stale and CI's `pnpm test:kern` failed on it. Give `ProtocolBase` a required-arg constructor so `MissingSuper` (which omits super) now trips the *replacement* detector `class-constructor-implicit-super-needs-args`, and reword the assertion to match. Preserves detector coverage; the derived classes that call `super('u1')` explicitly are unaffected. Full test:kern green (233/233, coverage 100%). Co-Authored-By: Claude Opus 4.8 (1M context) --- examples/native-test/conformance-bad-cases.kern | 4 ++++ examples/native-test/conformance-bad-cases.test.kern | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/examples/native-test/conformance-bad-cases.kern b/examples/native-test/conformance-bad-cases.kern index fa25261d..1364a024 100644 --- a/examples/native-test/conformance-bad-cases.kern +++ b/examples/native-test/conformance-bad-cases.kern @@ -88,6 +88,10 @@ class name=PlainSuper class name=ProtocolBase field name=id type=string + constructor + param name=id type=string + handler + assign target="this.id" value="id" method name=load returns=string param name=id type=string handler diff --git a/examples/native-test/conformance-bad-cases.test.kern b/examples/native-test/conformance-bad-cases.test.kern index ab8b8c51..4e1a67d2 100644 --- a/examples/native-test/conformance-bad-cases.test.kern +++ b/examples/native-test/conformance-bad-cases.test.kern @@ -22,7 +22,7 @@ test name="Bad KERN conformance" target="./conformance-bad-cases.kern" coverage= expect has=semanticViolations matches="declares more than one constructor" expect has=semanticViolations matches="conflicting instance member 'value'" expect has=semanticViolations matches="uses .*super.* does not extend a base class" - expect has=semanticViolations matches="constructor does not call .*super" + expect has=semanticViolations matches="omits .*super.* requires arguments" expect has=semanticViolations matches="member access before .*super" expect has=semanticViolations matches="calls .*super.* more than once" expect has=semanticViolations matches="must call .*super.* definitely on every path" From 792dcdaa16fa38a90aac07151bab9fca7bf3f43f Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 18:48:44 +0200 Subject: [PATCH 62/63] test(python): add coercion differential conformance oracle (red at base) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Frozen oracle for the value→string coercion-parity slice. 14 differential fixtures (compile to TS + Python, run both, assert ts==python==expected): 10 RED-at-base (Python diverges: bool→"True", null→"None", undefined→"None", 1.0→"1.0", arrays→"[1, 2, 3]", str+x→TypeError) force the implementation; 4 GREEN guards (2+3==5, 5/null/undefined ?? 9) must stay green so an over-eager `+` coercion or a non-nullish `undefined` sentinel is caught. Expected values are JS/TS truth → correct by construction. Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/coercion-conformance.mjs | 117 +++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 scripts/coercion-conformance.mjs diff --git a/scripts/coercion-conformance.mjs b/scripts/coercion-conformance.mjs new file mode 100644 index 00000000..10dea1f2 --- /dev/null +++ b/scripts/coercion-conformance.mjs @@ -0,0 +1,117 @@ +/** + * Coercion differential conformance — KERN single-source value→string parity. + * + * KERN is one language emitted to BOTH TypeScript and Python; the contract is + * parity by construction. JS coerces values to strings with well-known rules + * (`true`→"true", `null`→"null", `undefined`→"undefined", `1.0`→"1", + * `[1,2,3]`→"1,2,3", `"a"+true`→"atrue") that Python's `str()`/`+` do NOT match + * (`True`/`None`/`1.0`/`[1, 2, 3]`/TypeError). Implicit coercion sites — + * template interpolation `${x}` and string `+` concatenation — must therefore + * be lowered to JS semantics on the Python target (TS already IS JS). + * + * Each fixture is a zero-arg `fn probe` whose return value exercises one + * coercion. The module is compiled through BOTH codegen paths (core → TS, + * python → pure Python), each driver calls `probe()` and prints its + * JSON-normalized return, and we assert ts == python == expected. Expected + * values are JS/TS truth, so the oracle is correct by construction. + * + * Discrimination: most fixtures are RED at base (Python diverges) and force the + * implementation. Four are GREEN guards that must STAY green — `2 + 3 == 5` + * catches an additive `+` that over-coerces to string concat, and the `??` + * fixtures (notably `undefined ?? 9 == 9`) catch an `undefined` representation + * that stops being nullish. A half-built fix turns a guard RED. + * + * Run: node scripts/coercion-conformance.mjs (or via `pnpm check:coercion-conformance`) + */ + +import { execFileSync } from 'node:child_process'; +import { mkdtempSync, rmSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { dirname, join } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const REPO = dirname(dirname(fileURLToPath(import.meta.url))); +const { parse, generateCoreNode } = await import(join(REPO, 'packages/core/dist/index.js')); +const { generatePythonCoreNode } = await import(join(REPO, 'packages/python/dist/codegen-python.js')); +const tsCompiler = await import('typescript'); + +// Each fixture: a probe() returning the value under test. `expected` is JS/TS truth. +const FIXTURES = [ + // ── Template interpolation: scalar coercion ─────────────────────────────── + { name: 'bool in template', ret: 'string', expr: '`${true} ${false}`', expected: 'true false' }, + { name: 'null in template', ret: 'string', expr: '`${null}`', expected: 'null' }, + { name: 'undefined in template', ret: 'string', expr: '`${undefined}`', expected: 'undefined' }, + { name: 'integer-valued float in template', ret: 'string', expr: '`${1.0} ${2.5}`', expected: '1 2.5' }, + // ── Template interpolation: array / object toString ─────────────────────── + { name: 'flat array in template', ret: 'string', expr: '`${[1, 2, 3]}`', expected: '1,2,3' }, + { name: 'nested array in template (recursive)', ret: 'string', expr: '`${[1, [2, 3]]}`', expected: '1,2,3' }, + { name: 'array with nullish elements → empty', ret: 'string', expr: '`${[null, undefined, 3]}`', expected: ',,3' }, + // ── String `+` concatenation coercion ───────────────────────────────────── + { name: 'concat string + number', ret: 'string', expr: '"n=" + 5', expected: 'n=5' }, + { name: 'concat string + bool', ret: 'string', expr: '"a" + true', expected: 'atrue' }, + // ── Mixed ───────────────────────────────────────────────────────────────── + { name: 'mixed template (arith + bool)', ret: 'string', expr: '`count: ${1 + 2}, ok: ${true}`', expected: 'count: 3, ok: true' }, + // ── GUARD fixtures — currently GREEN, must STAY green (catch over-fixes) ─── + { name: 'GUARD numeric + stays additive', ret: 'number', expr: '2 + 3', expected: 5 }, + { name: 'GUARD nullish keeps present value', ret: 'number', expr: '5 ?? 9', expected: 5 }, + { name: 'GUARD null is nullish', ret: 'number', expr: 'null ?? 9', expected: 9 }, + { name: 'GUARD undefined stays nullish', ret: 'number', expr: 'undefined ?? 9', expected: 9 }, +]; + +function canon(value) { + return JSON.stringify(value); +} + +const dir = mkdtempSync(join(tmpdir(), 'kern-coercion-conformance-')); +process.on('exit', () => { + try { + rmSync(dir, { recursive: true, force: true }); + } catch { + // best-effort tmp cleanup — never fail the run on it + } +}); + +let pass = 0; +const failures = []; + +for (let i = 0; i < FIXTURES.length; i++) { + const fx = FIXTURES[i]; + try { + const kern = `fn name=probe returns=${fx.ret}\n handler\n return value=${JSON.stringify(fx.expr)}`; + const root = parse(kern); + const topNodes = root.type === 'class' || root.type === 'fn' ? [root] : (root.children ?? []); + + const tsSource = `${topNodes.map((n) => generateCoreNode(n).join('\n')).join('\n\n')}\nconsole.log(JSON.stringify(probe()));`; + const tsFile = join(dir, `mod-${i}.mjs`); + writeFileSync( + tsFile, + tsCompiler.transpileModule(tsSource, { + compilerOptions: { module: tsCompiler.ModuleKind.ESNext, target: tsCompiler.ScriptTarget.ES2022 }, + }).outputText, + ); + + const pySource = `import json\n${topNodes.map((n) => generatePythonCoreNode(n).join('\n')).join('\n\n')}\nprint(json.dumps(probe()))`; + const pyFile = join(dir, `mod-${i}.py`); + writeFileSync(pyFile, pySource); + + const opts = { encoding: 'utf8', timeout: 10_000 }; + const tsOut = JSON.parse(execFileSync('node', [tsFile], opts).trim()); + const pyOut = JSON.parse(execFileSync('python3', [pyFile], opts).trim()); + + if (canon(tsOut) === canon(fx.expected) && canon(pyOut) === canon(fx.expected)) { + pass++; + } else { + failures.push({ name: fx.name, expected: fx.expected, ts: tsOut, py: pyOut }); + } + } catch (err) { + failures.push({ name: fx.name, error: err?.stderr?.toString?.() || err?.message || String(err) }); + } +} + +console.log(`Coercion conformance: ${pass}/${FIXTURES.length} fixtures passed (ts == python == expected)`); +for (const f of failures) { + if (f.error) console.error(` FAIL ${f.name}: ${f.error}`); + else console.error(` FAIL ${f.name}: expected ${canon(f.expected)} | ts ${canon(f.ts)} | py ${canon(f.py)}`); +} +if (failures.length > 0) process.exit(1); +console.log('All passed.'); From a569d11803e5c10254940ed1790f2b74d614f855 Mon Sep 17 00:00:00 2001 From: cukas Date: Tue, 9 Jun 2026 20:48:59 +0200 Subject: [PATCH 63/63] =?UTF-8?q?feat(python):=20JS=20value=E2=86=92string?= =?UTF-8?q?=20coercion=20parity=20on=20native=20KERN=20bodies?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KERN is one language emitted to BOTH TS and Python; coercion must be parity by construction. JS coerces values to strings (true→"true", null→"null", undefined→"undefined", 1.0→"1", [1,2,3]→"1,2,3", "a"+true→"atrue", Infinity→"Infinity") in ways Python's str()/+ do not. This lowers those semantics on the Python target (TS already IS JS). Scope: native KERN handler bodies only, gated by `coerceJsValues` (default true; helpers inject function-locally there). The helper-less Ground/React declarative layer opts out (GROUND_EMIT) and keeps the pre-slice forms, so its output is byte-identical — no regression, no NameError from undefined helper references. - helpers.ts: _kern_fmt extended (str/bool/null/int-float/array/dict/ Infinity/NaN); _KERN_UNDEFINED sentinel with __bool__→False so JS undefined stays falsy (!undefined, ternary, ||, if); __kern_add for the JS `+` overload (string concat if string-ish incl tuple, else ToNumber add null→0/undefined→NaN/bool→0,1, try/except fallback for exotic hosts). - codegen-body-python.ts: undefLit→sentinel; tmplLit interpolation wrapped in _kern_fmt; binary `+` → __kern_add / _kern_fmt; `??` excludes the sentinel; dynamic typeof reports "undefined" for a stored sentinel. All gated on coerceJsValues. - ground.ts: 6 emitPyExpression calls opt out via GROUND_EMIT. - coercion-conformance.mjs: 20 discriminating fixtures (ts==python== expected), incl Infinity/-Infinity via float overflow (KERN has no `e` notation). Verified: oracle 20/20, class 13/13, expr 273/273, full monorepo test green (# fail 0), lint clean. Follow-ups (documented, out of slice scope): extend the fmt IR-semantics contract to the coercion superset; sentinel JSON-serialization at the route boundary; cross-function sentinel identity (function-local helper injection). Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/python/src/codegen-body-python.ts | 92 ++++++++++++++++++- packages/python/src/core/expr/helpers.ts | 57 +++++++++++- packages/python/src/generators/ground.ts | 22 +++-- .../tests/native-handlers-cell-python.test.ts | 9 +- .../tests/native-handlers-python.test.ts | 57 ++++++------ ...ndlers-slice-alpha2-ternary-python.test.ts | 4 +- .../native-handlers-slice2-python.test.ts | 38 +++++--- .../native-handlers-slice3-python.test.ts | 7 +- .../native-handlers-slice4c-nullish.test.ts | 20 ++-- .../native-handlers-stdlib-python.test.ts | 2 +- scripts/coercion-conformance.mjs | 13 +++ 11 files changed, 255 insertions(+), 66 deletions(-) diff --git a/packages/python/src/codegen-body-python.ts b/packages/python/src/codegen-body-python.ts index d84d2795..dbfc3489 100644 --- a/packages/python/src/codegen-body-python.ts +++ b/packages/python/src/codegen-body-python.ts @@ -104,6 +104,16 @@ export interface BodyEmitOptions { * packages/core/src/ir/semantics/python-leg.ts for the runtime contract. */ traceHooks?: { eachIterNext?: boolean; forIterNext?: boolean; letAssign?: boolean }; + /** Coercion-slice opt-out for the helper-less Ground/React declarative + * layer. Defaults to `true` (native KERN bodies + expression unit tests + * get full JS value→string coercion, injecting helpers function-locally). + * The Ground generators (`coalesce`/`firstDefined`/`firstTruthy`/`objectMerge` + * /…) emit module-level statements via `emitPyExpression` and have no + * channel to define `_kern_fmt`/`__kern_add`/`_KERN_UNDEFINED`, so they pass + * `false` to keep the pre-slice output (zero regression). Extending coercion + * to the Ground layer needs module-level (single-definition) helper + * injection — a separate follow-up. */ + coerceJsValues?: boolean; /** Outer-scope names the body INHERITS — typically function parameters and * module-level globals the wrapper has bound. Pre-populated as the * outermost `localScopes` map so an inner-block `let` that shadows ANY of @@ -173,6 +183,16 @@ interface BodyEmitContext { * override pending control flow, so it gets a finally-specific error. */ finallyDepth: number; standaloneExpression: boolean; + /** When true, helper-dependent JS value→string coercion is emitted + * (`__kern_add`, `_kern_fmt`-wrapped templates, the `_KERN_UNDEFINED` + * sentinel + sentinel-aware `??`/`typeof`). Native KERN bodies inject the + * required helpers function-locally, so the default is true. The Ground/ + * React declarative layer (`coalesce`/`firstDefined`/etc.) emits module- + * level statements through `emitPyExpression` with NO per-statement helper + * channel, so it opts out and keeps the pre-coercion-slice forms (raw `+`, + * raw f-string interpolation, `None` for undefined, None-only `??`). + * See BodyEmitOptions.coerceJsValues. */ + coerceJsValues: boolean; } const INDENT_STEP = ' '; @@ -194,6 +214,7 @@ function freshCtx(options?: BodyEmitOptions): BodyEmitContext { tryDepth: 0, finallyDepth: 0, standaloneExpression: false, + coerceJsValues: options?.coerceJsValues ?? true, traceHooks: options?.traceHooks, }; } @@ -1692,7 +1713,12 @@ function emitPyExprCtx(node: ValueIR, ctx: BodyEmitContext): string { case 'nullLit': return 'None'; case 'undefLit': - return 'None'; + // Ground/React layer (no helper channel) keeps the pre-slice collapse to + // None; native bodies materialize the sentinel so `${undefined}` renders + // "undefined" (vs null's "null") and `?? `/`typeof` can distinguish it. + if (!ctx.coerceJsValues) return 'None'; + ctx.helpers.add(KERN_FMT_HELPER_PY); + return '_KERN_UNDEFINED'; case 'regexLit': ctx.imports.add('re'); return `__k_re.compile(${pyRegexPattern(node)}, ${pyRegexFlags(node.flags, { allowGlobal: true })})`; @@ -1741,7 +1767,13 @@ function emitPyExprCtx(node: ValueIR, ctx: BodyEmitContext): string { case 'nonNull': return emitPyExprCtx(node.expression, ctx); case 'tmplLit': { - // Lower TS template literals to Python f-strings. + // Lower TS template literals to Python f-strings. In native bodies, wrap + // each interpolation in _kern_fmt so JS value→string coercion semantics + // (true→"true", null→"null", undefined→"undefined", 1.0→"1", arrays→ + // comma-joined, objects→"[object Object]") are preserved. The helper-less + // Ground/React layer keeps the pre-slice raw f-string interpolation. + const coerce = ctx.coerceJsValues; + if (coerce) ctx.helpers.add(KERN_FMT_HELPER_PY); let out = 'f"'; for (let i = 0; i < node.quasis.length; i++) { out += node.quasis[i] @@ -1750,7 +1782,10 @@ function emitPyExprCtx(node: ValueIR, ctx: BodyEmitContext): string { .replace(/\n/g, '\\n') .replace(/\{/g, '{{') .replace(/\}/g, '}}'); - if (i < node.expressions.length) out += `{${emitPyExprCtx(node.expressions[i], ctx)}}`; + if (i < node.expressions.length) { + const inner = emitPyExprCtx(node.expressions[i], ctx); + out += coerce ? `{_kern_fmt(${inner})}` : `{${inner}}`; + } } out += '"'; return out; @@ -1791,6 +1826,25 @@ function emitPyExprCtx(node: ValueIR, ctx: BodyEmitContext): string { return `isinstance(${left}, ${right})`; } + if (node.op === '+' && ctx.coerceJsValues) { + // JS `+` is overloaded: string concat if either operand is string-ish, + // numeric addition otherwise. Python has no implicit coercion, so we + // lower based on syntactic hints: + // - If either operand is syntactically string-producing (strLit/tmplLit), + // emit _kern_fmt(left) + _kern_fmt(right) for JS string concat. + // - Otherwise (idents/calls/members/numbers — type unknown at emit time), + // emit __kern_add(left, right) so numeric + stays additive and dynamic + // string concat is coerced at runtime. + // The helper-less Ground/React layer skips this and falls through to the + // generic raw `+` path below (pre-slice behavior, zero regression). + ctx.helpers.add(KERN_FMT_HELPER_PY); + const isStr = (n: ValueIR) => n.kind === 'strLit' || n.kind === 'tmplLit'; + if (isStr(node.left) || isStr(node.right)) { + return `_kern_fmt(${left}) + _kern_fmt(${right})`; + } + return `__kern_add(${left}, ${right})`; + } + if (node.op === '??') { // Slice 4c — nullish coalesce lowering. Two shapes: // @@ -1813,11 +1867,22 @@ function emitPyExprCtx(node: ValueIR, ctx: BodyEmitContext): string { // Slice 4c (post-buddy-review) was the easy-win expansion after the // 22.7% empirical-gate scan; this lifts the slice-2 `??` throw and // adds an estimated +7% to native eligibility on Agon-AI bodies. + // Ground/React layer keeps the pre-slice None-only nullish test (no + // sentinel, no helper). Native bodies also exclude the undefined + // sentinel so `undefined ?? x` coalesces. + if (!ctx.coerceJsValues) { + if (isReceiverChainPure(node.left)) { + return `(${left} if ${left} is not None else ${right})`; + } + const tmp = `__k_nc${++ctx.gensymCounter}`; + return `(${tmp} if (${tmp} := ${left}) is not None else ${right})`; + } + ctx.helpers.add(KERN_FMT_HELPER_PY); if (isReceiverChainPure(node.left)) { - return `(${left} if ${left} is not None else ${right})`; + return `(${left} if (${left} is not None and ${left} is not _KERN_UNDEFINED) else ${right})`; } const tmp = `__k_nc${++ctx.gensymCounter}`; - return `(${tmp} if (${tmp} := ${left}) is not None else ${right})`; + return `(${tmp} if ((${tmp} := ${left}) is not None and ${tmp} is not _KERN_UNDEFINED) else ${right})`; } const forceLeft = needsComparisonChainParens(node.left, node.op); @@ -1922,6 +1987,23 @@ function emitPyTypeof(argument: ValueIR, ctx: BodyEmitContext): string { const value = emitPyExprCtx(argument, ctx); const wrapped = needsArgParens(argument) ? `(${value})` : value; const tmp = `__k_typeof${++ctx.gensymCounter}`; + // Native bodies: a runtime value holding the undefined sentinel reports + // "undefined" (JS `typeof undefined`), not "object". The walrus binds in the + // first test so the sentinel branch is checked before the None branch. The + // helper-less Ground layer never materializes the sentinel, so it keeps the + // pre-slice None-first form. + if (ctx.coerceJsValues) { + ctx.helpers.add(KERN_FMT_HELPER_PY); + return ( + `("undefined" if (${tmp} := ${wrapped}) is _KERN_UNDEFINED ` + + `else "object" if ${tmp} is None ` + + `else "boolean" if isinstance(${tmp}, bool) ` + + `else "number" if isinstance(${tmp}, (int, float)) ` + + `else "string" if isinstance(${tmp}, str) ` + + `else "function" if callable(${tmp}) ` + + `else "object")` + ); + } return ( `("object" if (${tmp} := ${wrapped}) is None ` + `else "boolean" if isinstance(${tmp}, bool) ` + diff --git a/packages/python/src/core/expr/helpers.ts b/packages/python/src/core/expr/helpers.ts index 4f5b4534..6e3008a2 100644 --- a/packages/python/src/core/expr/helpers.ts +++ b/packages/python/src/core/expr/helpers.ts @@ -12,14 +12,67 @@ export const KERN_PAIR_HELPERS_PY = [ ].join('\n'); export const KERN_FMT_HELPER_PY = [ + 'class _KernUndefined:', + // JS `undefined` is falsy: `!undefined`, `undefined ? a : b`, `if (undefined)`, + // and `undefined || x` must behave as falsy. A bare object is truthy in Python, + // so override __bool__ — without this the sentinel diverges from JS in every + // truthiness position. Identity (`is`) is unaffected, so the `??` checks hold. + ' def __bool__(self): return False', + " def __repr__(self): return 'undefined'", + " def __str__(self): return 'undefined'", + '_KERN_UNDEFINED = _KernUndefined()', + '', 'def _kern_fmt(__k_v):', - ' if isinstance(__k_v, bool):', - " return 'true' if __k_v else 'false'", + ' if __k_v is _KERN_UNDEFINED:', + " return 'undefined'", ' if __k_v is None:', " return 'null'", + ' if isinstance(__k_v, bool):', + " return 'true' if __k_v else 'false'", + ' if isinstance(__k_v, str):', + ' return __k_v', + ' if isinstance(__k_v, float) and __k_v != __k_v:', + " return 'NaN'", + // JS String(Infinity) is "Infinity"/"-Infinity"; Python str(inf) is "inf". + // Check before is_integer() — inf.is_integer() is False and int(inf) raises. + " if isinstance(__k_v, float) and __k_v == float('inf'):", + " return 'Infinity'", + " if isinstance(__k_v, float) and __k_v == float('-inf'):", + " return '-Infinity'", ' if isinstance(__k_v, float) and __k_v.is_integer():', ' return str(int(__k_v))', + ' if isinstance(__k_v, (int, float)):', + ' return str(__k_v)', + ' if isinstance(__k_v, (list, tuple)):', + " return ','.join(", + " '' if x is None or x is _KERN_UNDEFINED else _kern_fmt(x)", + ' for x in __k_v', + ' )', + ' if isinstance(__k_v, dict):', + " return '[object Object]'", ' return str(__k_v)', + '', + 'def __kern_add(left, right):', + ' # JS `+`: string concat when either operand is string-ish (ToPrimitive →', + ' # string for str/array/object/tuple); otherwise numeric addition with ToNumber', + ' # coercion (null→0, undefined→NaN, bool→0/1) so `5 + null` is 5, not "5null".', + ' if isinstance(left, (str, list, tuple, dict)) or isinstance(right, (str, list, tuple, dict)):', + ' return _kern_fmt(left) + _kern_fmt(right)', + ' def _num(v):', + ' if v is _KERN_UNDEFINED:', + " return float('nan')", + ' if v is None:', + ' return 0', + ' if isinstance(v, bool):', + ' return 1 if v else 0', + ' return v', + ' # ToNumber path for the KERN value domain (numbers/bool/null/undefined).', + ' # Any exotic host type (set, custom object) that escapes the string-ish', + ' # check falls back to JS object→string concat rather than raising.', + ' try:', + ' return _num(left) + _num(right)', + ' except TypeError:', + ' return _kern_fmt(left) + _kern_fmt(right)', ].join('\n'); export const KERN_I32_HELPER_PY = [ diff --git a/packages/python/src/generators/ground.ts b/packages/python/src/generators/ground.ts index 0a2cbb1c..6b752f02 100644 --- a/packages/python/src/generators/ground.ts +++ b/packages/python/src/generators/ground.ts @@ -16,6 +16,16 @@ import { } from '../codegen-helpers.js'; import { mapTsTypeToPython, toPythonBindingName, toSnakeCase } from '../type-map.js'; +/** Ground/React Layer generators emit module-level statements and have NO + * per-statement channel to define the runtime helpers (`_kern_fmt`, + * `__kern_add`, the `_KERN_UNDEFINED` sentinel) that JS value→string coercion + * needs. So every `emitPyExpression` here opts out of coercion and keeps the + * pre-slice forms (raw `+`, raw f-string interpolation, `None` for undefined, + * None-only `??`). Coercion remains scoped to native KERN bodies, where + * helpers inject function-locally. Extending it to this layer is a follow-up + * that needs module-level (single-definition) helper injection. */ +const GROUND_EMIT = { coerceJsValues: false } as const; + /** * Common preamble extracted from all ground layer generators. * Returns { annotations, todo, props, name } ready for use. @@ -145,7 +155,7 @@ export function generateFirstTruthy(node: IRNode): string[] { } function emitFirstTruthyOperandPy(valueIR: ValueIR): string { - const emitted = emitPyExpression(valueIR); + const emitted = emitPyExpression(valueIR, GROUND_EMIT); return valueIR.kind === 'conditional' ? `(${emitted})` : emitted; } @@ -172,7 +182,7 @@ export function generateCoalesce(node: IRNode): string[] { const constType = props.type as string | undefined; const typeAnnotation = constType ? `: ${mapTsTypeToPython(constType)}` : ''; - const chain = emitPyExpression(buildNullishCoalesceIR(valueIRs)); + const chain = emitPyExpression(buildNullishCoalesceIR(valueIRs), GROUND_EMIT); return [...todo, ...annotations, `${name}${typeAnnotation} = ${chain}`]; } @@ -193,7 +203,7 @@ export function generateFirstDefined(node: IRNode): string[] { const constType = props.type as string | undefined; const typeAnnotation = constType ? `: ${mapTsTypeToPython(constType)}` : ''; - const chain = emitPyExpression(buildNullishCoalesceIR(valueIRs)); + const chain = emitPyExpression(buildNullishCoalesceIR(valueIRs), GROUND_EMIT); return [...todo, ...annotations, `${name}${typeAnnotation} = ${chain}`]; } @@ -213,7 +223,7 @@ export function generateObjectMerge(node: IRNode): string[] { if (sourceIR.kind === 'propagate') { throw new Error("Propagation '?' is not allowed in `objectMerge sources=` — bind the value first."); } - emitted.push(`**(${emitPyExpression(sourceIR)})`); + emitted.push(`**(${emitPyExpression(sourceIR, GROUND_EMIT)})`); } const constType = props.type as string | undefined; @@ -235,7 +245,7 @@ export function generateObjectPick(node: IRNode): string[] { if (inIR.kind === 'propagate') { throw new Error("Propagation '?' is not allowed in objectPick in="); } - const inExpr = emitPyExpression(inIR); + const inExpr = emitPyExpression(inIR, GROUND_EMIT); const keysList = parseKeys(rawKeys, node, 'objectPick keys='); const formattedKeys = emitStringKeyArray(keysList); @@ -264,7 +274,7 @@ export function generateObjectOmit(node: IRNode): string[] { if (inIR.kind === 'propagate') { throw new Error("Propagation '?' is not allowed in objectOmit in="); } - const inExpr = emitPyExpression(inIR); + const inExpr = emitPyExpression(inIR, GROUND_EMIT); const keysList = parseKeys(rawKeys, node, 'objectOmit keys='); const formattedKeys = emitStringKeyArray(keysList); diff --git a/packages/python/tests/native-handlers-cell-python.test.ts b/packages/python/tests/native-handlers-cell-python.test.ts index 42fda4c3..7c964155 100644 --- a/packages/python/tests/native-handlers-cell-python.test.ts +++ b/packages/python/tests/native-handlers-cell-python.test.ts @@ -9,6 +9,7 @@ import type { IRNode } from '@kernlang/core'; import { emitNativeKernBodyPython } from '../src/codegen-body-python.js'; +import { KERN_FMT_HELPER_PY } from '../src/core/expr/helpers.js'; function makeHandler(children: Array<{ type: string; props?: Record }>): IRNode { return { @@ -18,6 +19,10 @@ function makeHandler(children: Array<{ type: string; props?: Record { test('lowers to plain assignment', () => { const handler = makeHandler([{ type: 'cell', props: { name: 'count', initial: '0' } }]); @@ -39,7 +44,9 @@ describe('cell body-statement — Python codegen', () => { { type: 'cell', props: { name: 'count', initial: '0' } }, { type: 'set', props: { name: 'count', to: 'count + 1' } }, ]); - expect(emitNativeKernBodyPython(handler)).toBe(['count = 0', 'count = count + 1'].join('\n')); + expect(emitNativeKernBodyPython(handler)).toBe( + PY_PRELUDE + ['count = 0', 'count = __kern_add(count, 1)'].join('\n'), + ); }); test('throws on missing name', () => { diff --git a/packages/python/tests/native-handlers-python.test.ts b/packages/python/tests/native-handlers-python.test.ts index 21f42353..563ae1d4 100644 --- a/packages/python/tests/native-handlers-python.test.ts +++ b/packages/python/tests/native-handlers-python.test.ts @@ -12,6 +12,7 @@ import { emitNativeKernBodyPythonWithImports, emitPyExpression, } from '../src/codegen-body-python.js'; +import { KERN_FMT_HELPER_PY } from '../src/core/expr/helpers.js'; import { generateFunction } from '../src/generators/core.js'; function makeHandler(stmts: Array<{ type: string; props: Record; children?: IRNode[] }>): IRNode { @@ -22,6 +23,11 @@ function makeHandler(stmts: Array<{ type: string; props: Record }; } +// JS value→string coercion runtime prelude. Body-emit prepends this whole block +// (the _KERN_UNDEFINED sentinel + _kern_fmt + __kern_add helpers) whenever a body +// is lowered, ending with a blank-line separator before the body statements. +const PY_PRELUDE = `${KERN_FMT_HELPER_PY}\n\n`; + describe('emitPyExpression — slice 1 lowering rules', () => { test('booleans lower to Python True/False', () => { expect(emitPyExpression(parseExpression('true'))).toBe('True'); @@ -33,8 +39,8 @@ describe('emitPyExpression — slice 1 lowering rules', () => { expect(emitPyExpression(parseExpression('none'))).toBe('None'); }); - test('undefined lowers to None (slice 1 simplification)', () => { - expect(emitPyExpression(parseExpression('undefined'))).toBe('None'); + test('undefined lowers to the _KERN_UNDEFINED sentinel', () => { + expect(emitPyExpression(parseExpression('undefined'))).toBe('_KERN_UNDEFINED'); }); test('await lowers to Python `await ${expr}`', () => { @@ -113,7 +119,7 @@ describe('emitPyExpression — slice 1 lowering rules', () => { left: { kind: 'numLit', value: 1, raw: '1' }, right: { kind: 'numLit', value: 2, raw: '2' }, }), - ).toBe('1 + 2'); + ).toBe('__kern_add(1, 2)'); }); }); @@ -124,19 +130,7 @@ describe('emitNativeKernBodyPython — expression-v1 and nested fn statements', { type: 'return', props: { value: 'label' } }, ]); expect(emitNativeKernBodyPython(handler)).toBe( - [ - 'def _kern_fmt(__k_v):', - ' if isinstance(__k_v, bool):', - " return 'true' if __k_v else 'false'", - ' if __k_v is None:', - " return 'null'", - ' if isinstance(__k_v, float) and __k_v.is_integer():', - ' return str(int(__k_v))', - ' return str(__k_v)', - '', - 'label = _kern_fmt(value)', - 'return label', - ].join('\n'), + PY_PRELUDE + ['label = _kern_fmt(value)', 'return label'].join('\n'), ); }); @@ -152,7 +146,8 @@ describe('emitNativeKernBodyPython — expression-v1 and nested fn statements', { type: 'return', props: { value: 'add(2, 3)' } }, ]); expect(emitNativeKernBodyPython(handler)).toBe( - ['def add(a: float, b: float) -> float:', ' return a + b', 'return add(2, 3)'].join('\n'), + PY_PRELUDE + + ['def add(a: float, b: float) -> float:', ' return __kern_add(a, b)', 'return add(2, 3)'].join('\n'), ); }); @@ -190,9 +185,12 @@ describe('emitNativeKernBodyPython — expression-v1 and nested fn statements', }, ]); expect(emitNativeKernBodyPython(handler)).toBe( - ['async def loadTotal(amount: float) -> float:', ' loaded = await load(amount)', ' return loaded + 5'].join( - '\n', - ), + PY_PRELUDE + + [ + 'async def loadTotal(amount: float) -> float:', + ' loaded = await load(amount)', + ' return __kern_add(loaded, 5)', + ].join('\n'), ); }); @@ -212,7 +210,9 @@ describe('emitNativeKernBodyPython — expression-v1 and nested fn statements', { type: 'expression-v1', props: { name: 'total', expr: { __expr: true, code: 'amount + 1' } } }, { type: 'return', props: { value: 'total' } }, ]); - expect(emitNativeKernBodyPython(handler)).toBe(['total = amount + 1', 'return total'].join('\n')); + expect(emitNativeKernBodyPython(handler)).toBe( + PY_PRELUDE + ['total = __kern_add(amount, 1)', 'return total'].join('\n'), + ); }); test('nested fn rejects mixed legacy and structured params', () => { @@ -591,10 +591,11 @@ describe('emitNativeKernBodyPython — coalesce / firstDefined body statement', { type: 'return', props: { value: 'winner' } }, ]); expect(emitNativeKernBodyPython(handler)).toBe( - [ - 'winner = (count if count is not None else (flag if flag is not None else (label if label is not None else "fallback")))', - 'return winner', - ].join('\n'), + PY_PRELUDE + + [ + 'winner = (count if (count is not None and count is not _KERN_UNDEFINED) else (flag if (flag is not None and flag is not _KERN_UNDEFINED) else (label if (label is not None and label is not _KERN_UNDEFINED) else "fallback")))', + 'return winner', + ].join('\n'), ); }); @@ -604,7 +605,11 @@ describe('emitNativeKernBodyPython — coalesce / firstDefined body statement', { type: 'return', props: { value: 'winner' } }, ]); expect(emitNativeKernBodyPython(handler)).toBe( - ['winner = (primary if primary is not None else secondary)', 'return winner'].join('\n'), + PY_PRELUDE + + [ + 'winner = (primary if (primary is not None and primary is not _KERN_UNDEFINED) else secondary)', + 'return winner', + ].join('\n'), ); }); diff --git a/packages/python/tests/native-handlers-slice-alpha2-ternary-python.test.ts b/packages/python/tests/native-handlers-slice-alpha2-ternary-python.test.ts index a58a0b27..0daa71bb 100644 --- a/packages/python/tests/native-handlers-slice-alpha2-ternary-python.test.ts +++ b/packages/python/tests/native-handlers-slice-alpha2-ternary-python.test.ts @@ -12,8 +12,8 @@ describe('emitPyExpression — ternary lowering', () => { }); test('binary test gets parens around the test in Python form', () => { - // Python: `b if (a + 1) else c` - expect(emitPyExpression(parseExpression('a + 1 ? b : c'))).toBe('b if (a + 1) else c'); + // Python: `b if (__kern_add(a, 1)) else c` — the `+` test lowers to __kern_add. + expect(emitPyExpression(parseExpression('a + 1 ? b : c'))).toBe('b if (__kern_add(a, 1)) else c'); }); test('nested ternary in alternate gets parens', () => { diff --git a/packages/python/tests/native-handlers-slice2-python.test.ts b/packages/python/tests/native-handlers-slice2-python.test.ts index e69cb204..0708dfe6 100644 --- a/packages/python/tests/native-handlers-slice2-python.test.ts +++ b/packages/python/tests/native-handlers-slice2-python.test.ts @@ -13,14 +13,21 @@ import type { IRNode } from '@kernlang/core'; import { parseDocument, parseExpression } from '@kernlang/core'; import { emitNativeKernBodyPython, emitPyExpression } from '../src/codegen-body-python.js'; +import { KERN_FMT_HELPER_PY } from '../src/core/expr/helpers.js'; import { generateFunction } from '../src/generators/core.js'; function makeHandler(children: IRNode[]): IRNode { return { type: 'handler', props: { lang: 'kern' }, children }; } +// A dynamic `typeof` references the `_KERN_UNDEFINED` sentinel, so any BODY that +// lowers one carries the coercion helper prelude (derived from the source const +// so it can never drift). Expression-only `emitPyExpression` returns just the +// expression — no prelude — since it discards the collected helper set. +const PY_PRELUDE = `${KERN_FMT_HELPER_PY}\n\n`; + const TYPEOF_VALUE_PY = - '("object" if (__k_typeof1 := value) is None else "boolean" if isinstance(__k_typeof1, bool) else "number" if isinstance(__k_typeof1, (int, float)) else "string" if isinstance(__k_typeof1, str) else "function" if callable(__k_typeof1) else "object")'; + '("undefined" if (__k_typeof1 := value) is _KERN_UNDEFINED else "object" if __k_typeof1 is None else "boolean" if isinstance(__k_typeof1, bool) else "number" if isinstance(__k_typeof1, (int, float)) else "string" if isinstance(__k_typeof1, str) else "function" if callable(__k_typeof1) else "object")'; // ── 2b: stdlib expansion (Python) ──────────────────────────────────────── @@ -72,12 +79,12 @@ describe('KERN-stdlib expansion — Python target', () => { // ── 2c: arithmetic + comparison (Python) ───────────────────────────────── describe('emitPyExpression — arithmetic + comparison + unary', () => { - test('addition emits verbatim', () => { - expect(emitPyExpression(parseExpression('a + b'))).toBe('a + b'); + test('addition lowers to __kern_add (JS + string-coercion guard)', () => { + expect(emitPyExpression(parseExpression('a + b'))).toBe('__kern_add(a, b)'); }); test('multiplication binds tighter (precedence)', () => { - expect(emitPyExpression(parseExpression('a + b * c'))).toBe('a + b * c'); + expect(emitPyExpression(parseExpression('a + b * c'))).toBe('__kern_add(a, b * c)'); }); test('strict equality === lowers to Python ==', () => { @@ -134,7 +141,7 @@ describe('emitPyExpression — arithmetic + comparison + unary', () => { test('typeof in return body codegen does not throw on Python target', () => { const handler = makeHandler([{ type: 'return', props: { value: 'typeof value === "string"' }, children: [] }]); - expect(emitNativeKernBodyPython(handler)).toBe(`return ${TYPEOF_VALUE_PY} == "string"`); + expect(emitNativeKernBodyPython(handler)).toBe(`${PY_PRELUDE}return ${TYPEOF_VALUE_PY} == "string"`); }); test('typeof composes in Python if conditions', () => { @@ -145,15 +152,15 @@ describe('emitPyExpression — arithmetic + comparison + unary', () => { children: [{ type: 'return', props: { value: 'value' }, children: [] }], }, ]); - expect(emitNativeKernBodyPython(handler)).toBe(`if ${TYPEOF_VALUE_PY} == "string":\n return value`); + expect(emitNativeKernBodyPython(handler)).toBe(`${PY_PRELUDE}if ${TYPEOF_VALUE_PY} == "string":\n return value`); }); test('nested typeof and await keep stable temp numbering', () => { expect(emitPyExpression(parseExpression('typeof typeof value'))).toBe( - '("object" if (__k_typeof2 := (("object" if (__k_typeof1 := value) is None else "boolean" if isinstance(__k_typeof1, bool) else "number" if isinstance(__k_typeof1, (int, float)) else "string" if isinstance(__k_typeof1, str) else "function" if callable(__k_typeof1) else "object"))) is None else "boolean" if isinstance(__k_typeof2, bool) else "number" if isinstance(__k_typeof2, (int, float)) else "string" if isinstance(__k_typeof2, str) else "function" if callable(__k_typeof2) else "object")', + '("undefined" if (__k_typeof2 := (("undefined" if (__k_typeof1 := value) is _KERN_UNDEFINED else "object" if __k_typeof1 is None else "boolean" if isinstance(__k_typeof1, bool) else "number" if isinstance(__k_typeof1, (int, float)) else "string" if isinstance(__k_typeof1, str) else "function" if callable(__k_typeof1) else "object"))) is _KERN_UNDEFINED else "object" if __k_typeof2 is None else "boolean" if isinstance(__k_typeof2, bool) else "number" if isinstance(__k_typeof2, (int, float)) else "string" if isinstance(__k_typeof2, str) else "function" if callable(__k_typeof2) else "object")', ); expect(emitPyExpression(parseExpression('typeof await readValue()'))).toBe( - '("object" if (__k_typeof1 := (await readValue())) is None else "boolean" if isinstance(__k_typeof1, bool) else "number" if isinstance(__k_typeof1, (int, float)) else "string" if isinstance(__k_typeof1, str) else "function" if callable(__k_typeof1) else "object")', + '("undefined" if (__k_typeof1 := (await readValue())) is _KERN_UNDEFINED else "object" if __k_typeof1 is None else "boolean" if isinstance(__k_typeof1, bool) else "number" if isinstance(__k_typeof1, (int, float)) else "string" if isinstance(__k_typeof1, str) else "function" if callable(__k_typeof1) else "object")', ); }); @@ -442,16 +449,20 @@ describe('Cross-target parity — slice 2 stdlib hard cases', () => { describe('Review fixes — Python', () => { test('`??` nullish coalesce lowers to Python ternary with None check', () => { - expect(emitPyExpression(parseExpression('user ?? guest'))).toBe('(user if user is not None else guest)'); + expect(emitPyExpression(parseExpression('user ?? guest'))).toBe( + '(user if (user is not None and user is not _KERN_UNDEFINED) else guest)', + ); }); test('`??` on member chain also works', () => { - expect(emitPyExpression(parseExpression('user.id ?? 0'))).toBe('(user.id if user.id is not None else 0)'); + expect(emitPyExpression(parseExpression('user.id ?? 0'))).toBe( + '(user.id if (user.id is not None and user.id is not _KERN_UNDEFINED) else 0)', + ); }); test('`??` with side-effecting left side uses walrus for single-eval (slice 4c)', () => { expect(emitPyExpression(parseExpression('call() ?? b'))).toBe( - '(__k_nc1 if (__k_nc1 := call()) is not None else b)', + '(__k_nc1 if ((__k_nc1 := call()) is not None and __k_nc1 is not _KERN_UNDEFINED) else b)', ); }); @@ -466,8 +477,9 @@ describe('Review fixes — Python', () => { test('non-comparison binary ops do NOT trigger force-paren', () => { // `a + b - c` should NOT get extra parens (force-paren only applies to - // comparison-comparison nesting). - expect(emitPyExpression(parseExpression('a + b - c'))).toBe('a + b - c'); + // comparison-comparison nesting). The `+` lowers to __kern_add; the `-` + // is a non-`+` op and stays verbatim. + expect(emitPyExpression(parseExpression('a + b - c'))).toBe('__kern_add(a, b) - c'); }); test('stdlib arity mismatch — Python target also throws', () => { diff --git a/packages/python/tests/native-handlers-slice3-python.test.ts b/packages/python/tests/native-handlers-slice3-python.test.ts index c237f74c..0c806940 100644 --- a/packages/python/tests/native-handlers-slice3-python.test.ts +++ b/packages/python/tests/native-handlers-slice3-python.test.ts @@ -32,6 +32,7 @@ import { emitNativeKernBodyPythonWithImports, emitPyExpression, } from '../src/codegen-body-python.js'; +import { KERN_FMT_HELPER_PY } from '../src/core/expr/helpers.js'; import { generateFunction } from '../src/generators/core.js'; function makeHandler(children: IRNode[]): IRNode { @@ -46,6 +47,10 @@ function makeFn(props: Record, handlerChildren: IRNode[], param }; } +// JS value→string coercion runtime prelude (sentinel + _kern_fmt + __kern_add), +// prepended whenever a body lowers a `+` to __kern_add (string-coercion guard). +const PY_PRELUDE = `${KERN_FMT_HELPER_PY}\n\n`; + // ── 3a: symbol map (snake_case rename) ──────────────────────────────────── describe('slice 3a — Python symbol-map for snake_case params', () => { @@ -67,7 +72,7 @@ describe('slice 3a — Python symbol-map for snake_case params', () => { test('identifiers absent from symbolMap pass through unchanged', () => { const handler = makeHandler([{ type: 'return', props: { value: 'localVar + helperFn(x)' } }]); const out = emitNativeKernBodyPython(handler, { symbolMap: { onlyThisOne: 'only_this_one' } }); - expect(out).toBe('return localVar + helperFn(x)'); + expect(out).toBe(`${PY_PRELUDE}return __kern_add(localVar, helperFn(x))`); }); test('without symbolMap (legacy slice 1/2 callers) bodies emit unchanged', () => { diff --git a/packages/python/tests/native-handlers-slice4c-nullish.test.ts b/packages/python/tests/native-handlers-slice4c-nullish.test.ts index ac497dca..eb3a958b 100644 --- a/packages/python/tests/native-handlers-slice4c-nullish.test.ts +++ b/packages/python/tests/native-handlers-slice4c-nullish.test.ts @@ -21,30 +21,32 @@ import { emitPyExpression } from '../src/codegen-body-python.js'; describe('slice 4c — ?? nullish coalesce on Python target', () => { test('ident left lowers to readable double-name form', () => { - expect(emitPyExpression(parseExpression('user ?? guest'))).toBe('(user if user is not None else guest)'); + expect(emitPyExpression(parseExpression('user ?? guest'))).toBe( + '(user if (user is not None and user is not _KERN_UNDEFINED) else guest)', + ); }); test('member chain left also uses double-name form (pure receiver)', () => { expect(emitPyExpression(parseExpression('user.name ?? "anon"'))).toBe( - '(user.name if user.name is not None else "anon")', + '(user.name if (user.name is not None and user.name is not _KERN_UNDEFINED) else "anon")', ); }); test('deep member chain stays in pure form', () => { expect(emitPyExpression(parseExpression('user.profile.email ?? "no-email"'))).toBe( - '(user.profile.email if user.profile.email is not None else "no-email")', + '(user.profile.email if (user.profile.email is not None and user.profile.email is not _KERN_UNDEFINED) else "no-email")', ); }); test('call() left switches to walrus for single-eval', () => { expect(emitPyExpression(parseExpression('fetchName() ?? "default"'))).toBe( - '(__k_nc1 if (__k_nc1 := fetchName()) is not None else "default")', + '(__k_nc1 if ((__k_nc1 := fetchName()) is not None and __k_nc1 is not _KERN_UNDEFINED) else "default")', ); }); test('await left switches to walrus', () => { expect(emitPyExpression(parseExpression('(await loadName()) ?? "default"'))).toBe( - '(__k_nc1 if (__k_nc1 := await loadName()) is not None else "default")', + '(__k_nc1 if ((__k_nc1 := await loadName()) is not None and __k_nc1 is not _KERN_UNDEFINED) else "default")', ); }); @@ -53,7 +55,7 @@ describe('slice 4c — ?? nullish coalesce on Python target', () => { // double-name form (re-evaluating a + b is technically fine for pure // arithmetic, but the purity heuristic conservatively walrus-binds). expect(emitPyExpression(parseExpression('(a + b) ?? 0'))).toBe( - '(__k_nc1 if (__k_nc1 := a + b) is not None else 0)', + '(__k_nc1 if ((__k_nc1 := __kern_add(a, b)) is not None and __k_nc1 is not _KERN_UNDEFINED) else 0)', ); }); @@ -61,7 +63,7 @@ describe('slice 4c — ?? nullish coalesce on Python target', () => { // a ?? (call() ?? b) — outer pure (a is ident), inner non-pure (call). // Inner gets walrus __k_nc1; outer stays in double-name form. expect(emitPyExpression(parseExpression('a ?? (call() ?? b)'))).toBe( - '(a if a is not None else (__k_nc1 if (__k_nc1 := call()) is not None else b))', + '(a if (a is not None and a is not _KERN_UNDEFINED) else (__k_nc1 if ((__k_nc1 := call()) is not None and __k_nc1 is not _KERN_UNDEFINED) else b))', ); }); @@ -71,7 +73,7 @@ describe('slice 4c — ?? nullish coalesce on Python target', () => { // side which doesn't itself trigger walrus (since walrus only fires on // the LEFT of a ??). expect(emitPyExpression(parseExpression('call1() ?? call2()'))).toBe( - '(__k_nc1 if (__k_nc1 := call1()) is not None else call2())', + '(__k_nc1 if ((__k_nc1 := call1()) is not None and __k_nc1 is not _KERN_UNDEFINED) else call2())', ); }); @@ -79,7 +81,7 @@ describe('slice 4c — ?? nullish coalesce on Python target', () => { // Number.floor(x) lowers to __k_math.floor(x) — a call expression, // hence non-pure for the purity check, hence walrus. expect(emitPyExpression(parseExpression('Number.floor(x) ?? 0'))).toBe( - '(__k_nc1 if (__k_nc1 := __k_math.floor(x)) is not None else 0)', + '(__k_nc1 if ((__k_nc1 := __k_math.floor(x)) is not None and __k_nc1 is not _KERN_UNDEFINED) else 0)', ); }); }); diff --git a/packages/python/tests/native-handlers-stdlib-python.test.ts b/packages/python/tests/native-handlers-stdlib-python.test.ts index b0a34504..7d1731e0 100644 --- a/packages/python/tests/native-handlers-stdlib-python.test.ts +++ b/packages/python/tests/native-handlers-stdlib-python.test.ts @@ -45,7 +45,7 @@ describe('emitPyExpression — KERN-stdlib dispatch (Text module)', () => { test('lambda callbacks lower to Python lambda expressions', () => { expect(emitPyExpression(parseExpression('visit(() => value)'))).toBe('visit(lambda: value)'); - expect(emitPyExpression(parseExpression('visit((a, b) => a + b)'))).toBe('visit(lambda a, b: a + b)'); + expect(emitPyExpression(parseExpression('visit((a, b) => a + b)'))).toBe('visit(lambda a, b: __kern_add(a, b))'); expect(emitPyExpression(parseExpression('visit(user => user.name)'))).toBe('visit(lambda user: user.name)'); expect(emitPyExpression(parseExpression('visit((user: User) => user.name)'))).toBe('visit(lambda user: user.name)'); }); diff --git a/scripts/coercion-conformance.mjs b/scripts/coercion-conformance.mjs index 10dea1f2..efd50f94 100644 --- a/scripts/coercion-conformance.mjs +++ b/scripts/coercion-conformance.mjs @@ -35,6 +35,11 @@ const { parse, generateCoreNode } = await import(join(REPO, 'packages/core/dist/ const { generatePythonCoreNode } = await import(join(REPO, 'packages/python/dist/codegen-python.js')); const tsCompiler = await import('typescript'); +// A big decimal float literal (KERN has no `e` exponent syntax) whose square +// overflows IEEE-754 double → inf on Python / Infinity on JS. Used to exercise +// the non-finite-float coercion branch at runtime. +const HUGE = `1${'0'.repeat(200)}.0`; + // Each fixture: a probe() returning the value under test. `expected` is JS/TS truth. const FIXTURES = [ // ── Template interpolation: scalar coercion ─────────────────────────────── @@ -51,6 +56,14 @@ const FIXTURES = [ { name: 'concat string + bool', ret: 'string', expr: '"a" + true', expected: 'atrue' }, // ── Mixed ───────────────────────────────────────────────────────────────── { name: 'mixed template (arith + bool)', ret: 'string', expr: '`count: ${1 + 2}, ok: ${true}`', expected: 'count: 3, ok: true' }, + // ── `+` with nullish: JS numeric ToNumber coercion (null→0, undef→NaN) ───── + { name: 'number + null → numeric (null→0)', ret: 'number', expr: '5 + null', expected: 5 }, + { name: 'null + number → numeric (null→0)', ret: 'number', expr: 'null + 5', expected: 5 }, + { name: 'number + bool → numeric (true→1)', ret: 'number', expr: '5 + true', expected: 6 }, + { name: 'number + undefined → NaN renders "NaN"', ret: 'string', expr: '`${5 + undefined}`', expected: 'NaN' }, + // ── Non-finite floats: JS String() → "Infinity"/"-Infinity" (Python str → "inf") ── + { name: 'Infinity renders "Infinity"', ret: 'string', expr: `\`\${${HUGE} * ${HUGE}}\``, expected: 'Infinity' }, + { name: 'negative Infinity renders "-Infinity"', ret: 'string', expr: `\`\${-${HUGE} * ${HUGE}}\``, expected: '-Infinity' }, // ── GUARD fixtures — currently GREEN, must STAY green (catch over-fixes) ─── { name: 'GUARD numeric + stays additive', ret: 'number', expr: '2 + 3', expected: 5 }, { name: 'GUARD nullish keeps present value', ret: 'number', expr: '5 ?? 9', expected: 5 },