diff --git a/spec-generators/src/defaults.ts b/spec-generators/src/defaults.ts index a9cd2bb..7995558 100644 --- a/spec-generators/src/defaults.ts +++ b/spec-generators/src/defaults.ts @@ -51,35 +51,6 @@ export const CATEGORY_DIRECTORIES: ReadonlyMap = new Map([ ['value', 'value_nodes'], ]); -/** - * Per-inline-union configuration. - * - * Most emittable unions are the category-main union (the standalone - * twin of a `registered…`); those have an obvious variant-naming - * rule (strip the category suffix). Inline / synthetic unions — - * unions used in a single attribute, with no registered twin — need - * an explicit allowlist so the generator knows to emit them, plus - * their own variant-naming convention. - * - * Each entry's key is the spec union name; the value carries: - * - * - `stripSuffix`: the PascalCase suffix to strip from each leaf - * node's kind when deriving its Rust variant name. For example, - * `constantPdaSeedValue`'s flattened members include - * `numberValueNode`, `programIdValueNode`, …; stripping the - * `ValueNode` suffix yields variant names `Number`, `ProgramId`, - * etc. — matching the convention used by today's hand-written - * inline unions in the same category family. - */ -export interface InlineUnionConfig { - readonly stripSuffix?: string; -} - -export const INLINE_UNIONS: ReadonlyMap = new Map([ - ['constantPdaSeedValue', { stripSuffix: 'ValueNode' }], - ['enumValuePayload', { stripSuffix: 'ValueNode' }], -]); - /** * Spec union names the generator must NOT emit because their Rust * counterpart is bespoke hand-written code that can't be reproduced diff --git a/spec-generators/src/fragments/unionPage.ts b/spec-generators/src/fragments/unionPage.ts index d6d90c9..33b302d 100644 --- a/spec-generators/src/fragments/unionPage.ts +++ b/spec-generators/src/fragments/unionPage.ts @@ -2,8 +2,7 @@ import { pascalCase } from '@codama/fragments'; import { type Fragment, fragment, mergeFragments } from '@codama/fragments/rust'; import type { NodeSpec, Spec, UnionSpec } from '@codama/spec'; -import { INLINE_UNIONS } from '../defaults'; -import { flattenNodeUnion } from '../unions'; +import { flattenNodeUnion, getInlineUnionStripSuffix, getReferencedUnionNames, isInlineUnion } from '../unions'; import { getUnionHasNameImplFragment } from './hasNameImpl'; import { use } from './helpers'; @@ -37,7 +36,7 @@ interface UnionVariant { } function buildVariants(union: UnionSpec, spec: Spec): readonly UnionVariant[] { - const suffix = variantStripSuffix(union); + const suffix = variantStripSuffix(union, spec); return [...flattenNodeUnion(union, spec)] .map(node => ({ name: variantNameForNode(node.kind, suffix), node })) .toSorted((a, b) => a.name.localeCompare(b.name)); @@ -45,15 +44,17 @@ function buildVariants(union: UnionSpec, spec: Spec): readonly UnionVariant[] { /** * The PascalCase suffix to strip from each leaf node's kind when - * deriving variant names. For category-main unions it defaults to - * `pascalCase(union.name)` (e.g. `LinkNode`, `CountNode`). For inline - * unions, the suffix is taken from {@link INLINE_UNIONS}; inline - * unions whose members don't share a common suffix can omit - * `stripSuffix` (no stripping happens then). + * deriving variant names. For category-main unions (those with a + * `registered` twin), strip the union's own `pascalCase` name + * (e.g. `LinkNode`, `CountNode`). For inline unions, compute the + * longest common PascalCase suffix of the members + * ({@link getInlineUnionStripSuffix}). */ -function variantStripSuffix(union: UnionSpec): string { - const inline = INLINE_UNIONS.get(union.name); - if (inline !== undefined) return inline.stripSuffix ?? ''; +function variantStripSuffix(union: UnionSpec, spec: Spec): string { + const allUnionNames = new Set(spec.categories.flatMap(c => c.unions).map(u => u.name)); + if (isInlineUnion(union, allUnionNames, getReferencedUnionNames(spec))) { + return getInlineUnionStripSuffix(union, spec); + } return pascalCase(union.name); } diff --git a/spec-generators/src/index.ts b/spec-generators/src/index.ts index 87aa5c4..1ad4fa0 100644 --- a/spec-generators/src/index.ts +++ b/spec-generators/src/index.ts @@ -29,8 +29,6 @@ export { CATEGORY_ROUTING, FIELD_TYPE_OVERRIDES, HAND_WRITTEN_UNIONS, - type InlineUnionConfig, - INLINE_UNIONS, } from './defaults'; export { buildRenderScope, @@ -103,7 +101,7 @@ function getSpecPagesRenderMap(spec: Spec, scope: RenderScope): RenderMap` sibling in the same category — * i.e. it's the category's main union (the standalone twin of a * registered/dispatch union); OR - * - It appears in the {@link INLINE_UNIONS} allowlist — an - * opt-in registry of inline / synthetic unions the generator - * should emit despite not having a registered twin. + * - It's an inline union per {@link isInlineUnion}: no `registered` + * twin AND referenced by at least one node attribute somewhere + * in the spec. This rule is derived from the spec; no hand-list. * * Unions in {@link HAND_WRITTEN_UNIONS} are skipped — their Rust * counterpart is bespoke (e.g. `valueNode` → `RegisteredValueNode` @@ -28,15 +28,91 @@ const REGISTERED_UNION_PREFIX = 'registered'; * * Sorted alphabetically by name for stable output. */ -export function getEmittableUnions(category: Spec['categories'][number]): readonly UnionSpec[] { - const unionNames = new Set(category.unions.map(u => u.name)); +export function getEmittableUnions(category: Spec['categories'][number], spec: Spec): readonly UnionSpec[] { + const referenced = getReferencedUnionNames(spec); + const allUnionNames = new Set(spec.categories.flatMap(c => c.unions).map(u => u.name)); return category.unions .filter(u => !u.name.startsWith(REGISTERED_UNION_PREFIX)) .filter(u => !HAND_WRITTEN_UNIONS.has(u.name)) - .filter(u => unionNames.has(`${REGISTERED_UNION_PREFIX}${pascalCase(u.name)}`) || INLINE_UNIONS.has(u.name)) + .filter(u => hasRegisteredTwin(u.name, allUnionNames) || isInlineUnion(u, allUnionNames, referenced)) .toSorted((a, b) => a.name.localeCompare(b.name)); } +/** + * `true` when `union` is an inline / synthetic union: it has no + * `registered` sibling anywhere in the spec AND it is + * actually referenced by some node attribute (we only emit unions + * that are used). Derived purely from the spec structure — no + * hand-maintained allowlist. + */ +export function isInlineUnion( + union: UnionSpec, + allUnionNames: ReadonlySet, + referenced: ReadonlySet, +): boolean { + if (hasRegisteredTwin(union.name, allUnionNames)) return false; + return referenced.has(union.name); +} + +function hasRegisteredTwin(unionName: string, allUnionNames: ReadonlySet): boolean { + return allUnionNames.has(`${REGISTERED_UNION_PREFIX}${pascalCase(unionName)}`); +} + +/** + * Every spec union name reachable from at least one node attribute + * (recursively through `array(of)` / `tuple(items)` / etc.). Used by + * {@link isInlineUnion} so we only emit unions that something + * actually references. + */ +export function getReferencedUnionNames(spec: Spec): ReadonlySet { + const referenced = new Set(); + const walk = (t: unknown): void => { + if (!t || typeof t !== 'object') return; + const node = t as { kind?: string; name?: string }; + if (node.kind === 'union' && typeof node.name === 'string') referenced.add(node.name); + for (const v of Object.values(t)) walk(v); + }; + for (const cat of spec.categories) { + for (const n of cat.nodes) { + for (const a of n.attributes) walk(a.type); + } + } + return referenced; +} + +/** + * The PascalCase suffix to strip from each leaf node's kind when + * deriving variant names for an inline union. Computed as the + * longest common PascalCase suffix shared by every leaf's + * `pascalCase(kind)`, trimmed back to start at an uppercase letter + * so we never strip mid-word. + * + * - `constantPdaSeedValue` (15 leaves) → `'ValueNode'` + * - `enumValuePayload` (2 leaves) → `'ValueNode'` + * - `pdaValuePda` (2 leaves) → `'Node'` + * + * For category-main unions (those with a `registered` twin) the + * stripped suffix is the union's own pascalCase name — handled in + * {@link variantStripSuffix} of `unionPage.ts`. + */ +export function getInlineUnionStripSuffix(union: UnionSpec, spec: Spec): string { + const leaves = [...flattenNodeUnion(union, spec)].map(n => pascalCase(n.kind)); + if (leaves.length === 0) return ''; + let suffix = ''; + const minLen = Math.min(...leaves.map(s => s.length)); + for (let i = 1; i <= minLen; i++) { + const ch = leaves[0][leaves[0].length - i]; + if (!leaves.every(s => s[s.length - i] === ch)) break; + suffix = ch + suffix; + } + // Trim back so we always start at an uppercase letter (word boundary). + for (let i = 0; i < suffix.length; i++) { + const ch = suffix[i]; + if (ch >= 'A' && ch <= 'Z') return suffix.slice(i); + } + return ''; +} + /** * Walk a union's members, recursively expanding nested `union(...)` * references down to their leaf nodes. Returns the flat list of diff --git a/spec-generators/test/fragments/unionPage.test.ts b/spec-generators/test/fragments/unionPage.test.ts index 8b219b2..fab017d 100644 --- a/spec-generators/test/fragments/unionPage.test.ts +++ b/spec-generators/test/fragments/unionPage.test.ts @@ -50,10 +50,11 @@ describe('getUnionPageFragment', () => { expect(imports).toContain('crate::CamelCaseString'); }); - it('honours the INLINE_UNIONS stripSuffix when naming variants of an inline union', () => { - // `constantPdaSeedValue` is in INLINE_UNIONS with stripSuffix: 'ValueNode'. - // Its flattened members include `programIdValueNode` + 14 value-node - // leaves; the suffix strip should yield `ProgramId`, `Number`, etc. + it('strips the longest common PascalCase suffix when naming variants of an inline union', () => { + // `constantPdaSeedValue`'s flattened members include + // `programIdValueNode` + 14 value-node leaves; the derived + // common suffix is `ValueNode`, so variants are stripped to + // `ProgramId`, `Number`, … const result = getUnionPageFragment(constantPdaSeedValueUnion, spec); expect(result.content).toContain('pub enum ConstantPdaSeedValue {'); expect(result.content).toContain('ProgramId(ProgramIdValueNode),'); diff --git a/spec-generators/test/unions.test.ts b/spec-generators/test/unions.test.ts index 0d0eff7..75a8444 100644 --- a/spec-generators/test/unions.test.ts +++ b/spec-generators/test/unions.test.ts @@ -1,38 +1,57 @@ import { getSpec } from '@codama/spec'; import { describe, expect, it } from 'vitest'; -import { flattenNodeUnion, getEmittableUnions } from '../src/unions'; +import { flattenNodeUnion, getEmittableUnions, getInlineUnionStripSuffix } from '../src/unions'; const spec = getSpec(); const linkCategory = spec.categories.find(c => c.name === 'link')!; const pdaSeedCategory = spec.categories.find(c => c.name === 'pdaSeed')!; +const valueCategory = spec.categories.find(c => c.name === 'value')!; describe('getEmittableUnions', () => { - it('returns the category-main union (the standalone twin of a `registered…`), sorted alphabetically', () => { - // `pdaSeed` also has `constantPdaSeedValue` in INLINE_UNIONS, - // so both are emittable; the sort puts `constantPdaSeedValue` - // before `pdaSeedNode`. - expect(getEmittableUnions(linkCategory).map(u => u.name)).toEqual(['linkNode']); - expect(getEmittableUnions(pdaSeedCategory).map(u => u.name)).toEqual(['constantPdaSeedValue', 'pdaSeedNode']); + it('returns the category-main union (the standalone twin of a `registered…`) plus any referenced inline unions, sorted alphabetically', () => { + // `pdaSeed` also has `constantPdaSeedValue` (inline, + // referenced by `constantPdaSeedNode.value`), so both are + // emittable; the sort puts `constantPdaSeedValue` first. + expect(getEmittableUnions(linkCategory, spec).map(u => u.name)).toEqual(['linkNode']); + expect(getEmittableUnions(pdaSeedCategory, spec).map(u => u.name)).toEqual([ + 'constantPdaSeedValue', + 'pdaSeedNode', + ]); }); it('skips category-registry unions (`registered*`)', () => { - expect(getEmittableUnions(linkCategory).map(u => u.name)).not.toContain('registeredLinkNode'); + expect(getEmittableUnions(linkCategory, spec).map(u => u.name)).not.toContain('registeredLinkNode'); }); - it('skips inline / synthetic unions that are NOT in the INLINE_UNIONS allowlist', () => { - // `linkNode`'s category has no inline-union members, so we can - // just confirm no spurious emission. A category with inline - // unions out of the allowlist would also be filtered out (no - // such case in pdaSeed today — constantPdaSeedValue IS in the - // allowlist, so it appears). - const names = getEmittableUnions(linkCategory).map(u => u.name); + it('skips inline unions that are not referenced anywhere in the spec', () => { + // The derived rule only emits an inline union if at least one + // node attribute references it. linkCategory has no inline + // members at all, so the rule yields just `linkNode`. + const names = getEmittableUnions(linkCategory, spec).map(u => u.name); for (const u of linkCategory.unions) { - if (u.name.startsWith('registered')) continue; - if (u.name === 'linkNode') continue; + if (u.name.startsWith('registered') || u.name === 'linkNode') continue; expect(names).not.toContain(u.name); } }); + + it('skips HAND_WRITTEN_UNIONS even when they have a registered twin (e.g. value/valueNode)', () => { + expect(getEmittableUnions(valueCategory, spec).map(u => u.name)).not.toContain('valueNode'); + }); +}); + +describe('getInlineUnionStripSuffix', () => { + it('returns the longest common PascalCase suffix shared by every flattened leaf', () => { + const constantPdaSeedValue = pdaSeedCategory.unions.find(u => u.name === 'constantPdaSeedValue')!; + // Leaves include `programIdValueNode` + every `valueNode` leaf + // (all suffixed `ValueNode`). + expect(getInlineUnionStripSuffix(constantPdaSeedValue, spec)).toBe('ValueNode'); + }); + + it('handles a small inline union (enumValuePayload: structValueNode | tupleValueNode)', () => { + const enumValuePayload = valueCategory.unions.find(u => u.name === 'enumValuePayload')!; + expect(getInlineUnionStripSuffix(enumValuePayload, spec)).toBe('ValueNode'); + }); }); describe('flattenNodeUnion', () => {