diff --git a/packages/core/package.json b/packages/core/package.json index adc6084e..42c46455 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -44,6 +44,7 @@ "vitest": "4.0.4" }, "dependencies": { + "linkedom": "^0.18.12", "zod": "4.1.12" } } diff --git a/packages/core/src/bible-html-transformer.node.test.ts b/packages/core/src/bible-html-transformer.node.test.ts new file mode 100644 index 00000000..7ea23ac0 --- /dev/null +++ b/packages/core/src/bible-html-transformer.node.test.ts @@ -0,0 +1,122 @@ +/** + * @vitest-environment node + */ +import { describe, it, expect } from 'vitest'; +import { transformBibleHtmlForNode } from './bible-html-transformer'; + +describe('transformBibleHtmlForNode', () => { + it('should transform HTML using linkedom', () => { + const html = ` +
+
+ 1Verse textNote. +
+
+ `; + + const result = transformBibleHtmlForNode(html); + + expect(result.html).toBeDefined(); + expect(result.html).toContain('data-verse-footnote="1"'); + }); + + it('should handle empty HTML', () => { + const result = transformBibleHtmlForNode(''); + + expect(result.html).toBeDefined(); + }); + + it('should embed footnote content in data-verse-footnote-content', () => { + const html = ` +
+
+ 1TextFirst note. + 2More textSecond note. +
+
+ `; + + const result = transformBibleHtmlForNode(html); + + expect(result.html).toContain('data-verse-footnote="1"'); + expect(result.html).toContain('data-verse-footnote="2"'); + expect(result.html).toContain('First note'); + expect(result.html).toContain('Second note'); + }); + + it('should wrap verse content in .yv-v[v] elements', () => { + const html = ` +
+
+ 1Verse one text. +
+
+ 2Verse two text. +
+
+ `; + + const result = transformBibleHtmlForNode(html); + + // linkedom may serialize attributes in different order than browsers + expect(result.html).toContain('class="yv-v"'); + expect(result.html).toContain('v="1"'); + expect(result.html).toContain('v="2"'); + expect(result.html).toContain('Verse one text.'); + expect(result.html).toContain('Verse two text.'); + }); + + it('should add non-breaking space after verse labels', () => { + const html = ` +
+
+ 1Verse text. +
+
+ `; + + const result = transformBibleHtmlForNode(html); + + // linkedom encodes non-breaking space as   instead of the raw character + expect(result.html).toMatch(/1(\u00A0| )/); + }); + + it('should handle intro chapter footnotes', () => { + const html = ` +
+
Some intro textFirst note and more textSecond note.
+
+ `; + + const result = transformBibleHtmlForNode(html); + + expect(result.html).toContain('data-verse-footnote="intro-0"'); + expect(result.html).toContain('data-verse-footnote="intro-1"'); + expect(result.html).toContain('First note'); + expect(result.html).toContain('Second note'); + }); + + it('should include data-verse-footnote-content attribute', () => { + const html = ` +
+
+ 1Verse textSee Rashi. +
+
+ `; + + const result = transformBibleHtmlForNode(html); + + expect(result.html).toContain('data-verse-footnote-content='); + expect(result.html).toContain('See Rashi'); + }); + + it('should return html property only', () => { + const html = '
Test
'; + const result = transformBibleHtmlForNode(html); + + expect(result).toHaveProperty('html'); + expect(result).not.toHaveProperty('notes'); + expect(typeof result.html).toBe('string'); + }); +}); diff --git a/packages/core/src/bible-html-transformer.test.ts b/packages/core/src/bible-html-transformer.test.ts new file mode 100644 index 00000000..3010fc13 --- /dev/null +++ b/packages/core/src/bible-html-transformer.test.ts @@ -0,0 +1,288 @@ +/** + * @vitest-environment jsdom + */ +import { describe, it, expect } from 'vitest'; +import { transformBibleHtml, transformBibleHtmlForBrowser } from './bible-html-transformer'; + +function createAdapters() { + return { + parseHtml: (html: string) => new DOMParser().parseFromString(html, 'text/html'), + serializeHtml: (doc: Document) => doc.body.innerHTML, + }; +} + +describe('transformBibleHtml - intro chapter footnotes', () => { + it('should create data-verse-footnote anchors with intro keys for orphaned footnotes', () => { + const html = ` +
+
Some intro textFirst note and more textSecond note.
+
+ `; + + const result = transformBibleHtml(html, createAdapters()); + + expect(result.html).toContain('data-verse-footnote="intro-0"'); + expect(result.html).toContain('data-verse-footnote="intro-1"'); + expect(result.html).not.toContain('yv-n f'); + }); + + it('should preserve footnote content in data-verse-footnote-content attribute', () => { + const html = ` +
+
TextSee Rashi more.
+
+ `; + + const result = transformBibleHtml(html, createAdapters()); + + expect(result.html).toContain('data-verse-footnote-content='); + expect(result.html).toContain('See Rashi'); + }); + + it('should not interfere with regular verse footnotes when mixed', () => { + const html = ` +
+
Intro textIntro note.
+
+ 1Verse textVerse note. +
+
+ `; + + const result = transformBibleHtml(html, createAdapters()); + + expect(result.html).toContain('data-verse-footnote="intro-0"'); + expect(result.html).toContain('data-verse-footnote="1"'); + expect(result.html).toContain('Intro note'); + expect(result.html).toContain('Verse note'); + }); + + it('should insert space when orphaned footnote is between two words', () => { + const html = ` +
+
overcomeNoteit.
+
+ `; + + const result = transformBibleHtml(html, createAdapters()); + + expect(result.html).toContain('overcome '); + expect(result.html).not.toMatch(/overcome { + const html = ` +
+
overcomeNote.
+
+ `; + + const result = transformBibleHtml(html, createAdapters()); + + expect(result.html).not.toContain('overcome .'); + }); +}); + +describe('transformBibleHtml - verse wrapping', () => { + it('should wrap verse content in .yv-v[v] elements', () => { + const html = ` +
+
+ 1Verse one text. +
+
+ 2Verse two text. +
+
+ `; + + const result = transformBibleHtml(html, createAdapters()); + + expect(result.html).toMatch(//); + expect(result.html).toMatch(//); + expect(result.html).not.toContain(''); + }); + + it('should not wrap heading elements inside verse content', () => { + const html = ` +
+
+ 1Text before heading +
+
A Heading
+
+ 2Text after heading +
+
+ `; + + const result = transformBibleHtml(html, createAdapters()); + + const doc = new DOMParser().parseFromString(result.html, 'text/html'); + const heading = doc.querySelector('.s1'); + expect(heading).not.toBeNull(); + expect(heading!.closest('.yv-v')).toBeNull(); + }); +}); + +describe('transformBibleHtml - addNbspToVerseLabels', () => { + it('should add non-breaking space after verse labels', () => { + const html = ` +
+
+ 1Verse text. +
+
+ `; + + const result = transformBibleHtml(html, createAdapters()); + + const doc = new DOMParser().parseFromString(result.html, 'text/html'); + const label = doc.querySelector('.yv-vlbl'); + expect(label).not.toBeNull(); + expect(label!.textContent).toContain('\u00A0'); + }); + + it('should not duplicate non-breaking space if already present', () => { + const html = ` +
+
+ 1\u00A0Verse text. +
+
+ `; + + const result = transformBibleHtml(html, createAdapters()); + + const doc = new DOMParser().parseFromString(result.html, 'text/html'); + const label = doc.querySelector('.yv-vlbl'); + const text = label!.textContent ?? ''; + const count = (text.match(/\u00A0/g) || []).length; + expect(count).toBeLessThanOrEqual(1); + }); +}); + +describe('transformBibleHtml - fixIrregularTables', () => { + it('should set colspan on single-cell rows in multi-column tables', () => { + const html = ` +
+ + + +
Header Col 1Header Col 2
Single cell spanning full width
+
+ `; + + const result = transformBibleHtml(html, createAdapters()); + + const doc = new DOMParser().parseFromString(result.html, 'text/html'); + const singleCell = doc.querySelector('tr:nth-child(2) td'); + expect(singleCell).not.toBeNull(); + expect(singleCell!.getAttribute('colspan')).toBe('2'); + }); +}); + +describe('transformBibleHtml - data attributes', () => { + it('should include data-verse-footnote attribute with verse key', () => { + const html = ` +
+
+ 1Verse textNote. +
+
+ `; + + const result = transformBibleHtml(html, createAdapters()); + + expect(result.html).toContain('data-verse-footnote="1"'); + }); + + it('should include data-verse-footnote-content attribute with footnote HTML', () => { + const html = ` +
+
+ 1Verse textSee Rashi. +
+
+ `; + + const result = transformBibleHtml(html, createAdapters()); + + expect(result.html).toContain('data-verse-footnote-content='); + expect(result.html).toContain('See Rashi'); + }); + + it('should preserve footnote HTML structure in data-verse-footnote-content', () => { + const html = ` +
+
+ 1TextEmphasized note.
+
+ `; + + const result = transformBibleHtml(html, createAdapters()); + + const doc = new DOMParser().parseFromString(result.html, 'text/html'); + const anchor = doc.querySelector('[data-verse-footnote="1"]'); + expect(anchor).not.toBeNull(); + const content = anchor!.getAttribute('data-verse-footnote-content'); + expect(content).toContain(''); + expect(content).toContain('Emphasized'); + }); +}); + +describe('transformBibleHtmlForBrowser', () => { + it('should transform HTML using native DOMParser', () => { + const html = ` +
+
+ 1Verse textNote. +
+
+ `; + + const result = transformBibleHtmlForBrowser(html); + + expect(result.html).toBeDefined(); + expect(result.html).toContain('data-verse-footnote="1"'); + }); + + it('should return same result as transformBibleHtml with browser adapters', () => { + const html = ` +
+
+ 1Verse text. +
+
+ `; + + const result1 = transformBibleHtmlForBrowser(html); + const result2 = transformBibleHtml(html, createAdapters()); + + expect(result1.html).toBe(result2.html); + }); + + it('should handle empty HTML', () => { + const result = transformBibleHtmlForBrowser(''); + + expect(result.html).toBeDefined(); + }); +}); + +describe('transformBibleHtml - return type', () => { + it('should return html property', () => { + const html = '
Test
'; + const result = transformBibleHtml(html, createAdapters()); + + expect(result).toHaveProperty('html'); + expect(typeof result.html).toBe('string'); + }); + + it('should not have notes or rawHtml properties', () => { + const html = '
Test
'; + const result = transformBibleHtml(html, createAdapters()); + + expect(result).not.toHaveProperty('notes'); + expect(result).not.toHaveProperty('rawHtml'); + }); +}); diff --git a/packages/core/src/bible-html-transformer.ts b/packages/core/src/bible-html-transformer.ts new file mode 100644 index 00000000..727ea01b --- /dev/null +++ b/packages/core/src/bible-html-transformer.ts @@ -0,0 +1,317 @@ +const NON_BREAKING_SPACE = '\u00A0'; + +const FOOTNOTE_KEY_ATTR = 'data-footnote-key'; + +const NEEDS_SPACE_BEFORE = /^[^\s.,;:!?)}\]'"»›]/; + +/** + * Options for transforming Bible HTML. Requires DOM adapter functions + * to parse and serialize HTML, making the transformer runtime-agnostic. + */ +export type TransformBibleHtmlOptions = { + /** Parses an HTML string into a DOM Document */ + parseHtml: (html: string) => Document; + /** Serializes a Document back to an HTML string */ + serializeHtml: (doc: Document) => string; +}; + +/** + * The result of transforming Bible HTML. + * + * The returned HTML is self-contained — footnote data is embedded as attributes: + * - `data-verse-footnote="KEY"` marks the footnote position + * - `data-verse-footnote-content="HTML"` contains the footnote's inner HTML + * + * Consumers can access verse context by walking up from a footnote anchor + * to `.closest('.yv-v[v]')`. + */ +export type TransformedBibleHtml = { + /** The transformed HTML with footnotes replaced by marker elements */ + html: string; +}; + +function wrapVerseContent(doc: Document): void { + function wrapParagraphContent(doc: Document, paragraph: Element, verseNum: string): void { + const children = Array.from(paragraph.childNodes); + if (children.length === 0) return; + + const wrapper = doc.createElement('span'); + wrapper.className = 'yv-v'; + wrapper.setAttribute('v', verseNum); + + const firstChild = children[0]; + if (firstChild) { + paragraph.insertBefore(wrapper, firstChild); + } + children.forEach((child) => { + wrapper.appendChild(child); + }); + } + + function wrapParagraphsUntilBoundary( + doc: Document, + verseNum: string, + startParagraph: Element | null, + endParagraph?: Element | null, + ): void { + if (!startParagraph) return; + + let currentP: Element | null = startParagraph.nextElementSibling; + + while (currentP && currentP !== endParagraph) { + const isHeading = + currentP.classList.contains('yv-h') || + currentP.matches('.s1, .s2, .s3, .s4, .ms, .ms1, .ms2, .ms3, .ms4, .mr, .sp, .sr, .qa, .r'); + if (isHeading) { + currentP = currentP.nextElementSibling; + continue; + } + + if (currentP.querySelector('.yv-v[v]')) break; + + if (currentP.classList.contains('p') || currentP.tagName === 'P') { + wrapParagraphContent(doc, currentP, verseNum); + } + + currentP = currentP.nextElementSibling; + } + } + + function handleParagraphWrapping( + doc: Document, + currentParagraph: Element | null, + nextParagraph: Element | null, + verseNum: string, + ): void { + if (!currentParagraph) return; + + if (!nextParagraph) { + wrapParagraphsUntilBoundary(doc, verseNum, currentParagraph); + return; + } + + if (currentParagraph !== nextParagraph) { + wrapParagraphsUntilBoundary(doc, verseNum, currentParagraph, nextParagraph); + } + } + + function processVerseMarker(marker: Element, index: number, markers: Element[]): void { + const verseNum = marker.getAttribute('v'); + if (!verseNum) return; + + const nextMarker = markers[index + 1]; + + const nodesToWrap = collectNodesBetweenMarkers(marker, nextMarker); + if (nodesToWrap.length === 0) return; + + const currentParagraph = marker.closest('.p, p, div.p'); + const nextParagraph = nextMarker?.closest('.p, p, div.p') || null; + const doc = marker.ownerDocument; + + wrapNodesInVerse(marker, verseNum, nodesToWrap); + handleParagraphWrapping(doc, currentParagraph, nextParagraph, verseNum); + } + + function wrapNodesInVerse(marker: Element, verseNum: string, nodes: Node[]): void { + const wrapper = marker.ownerDocument.createElement('span'); + wrapper.className = 'yv-v'; + wrapper.setAttribute('v', verseNum); + + const firstNode = nodes[0]; + if (firstNode) { + marker.parentNode?.insertBefore(wrapper, firstNode); + } + + nodes.forEach((node) => { + wrapper.appendChild(node); + }); + marker.remove(); + } + + function shouldStopCollecting(node: Node, endMarker: Element | undefined): boolean { + if (node === endMarker) return true; + if (endMarker && node.nodeType === 1 && (node as Element).contains(endMarker)) return true; + return false; + } + + function shouldSkipNode(node: Node): boolean { + return node.nodeType === 1 && (node as Element).classList.contains('yv-h'); + } + + function collectNodesBetweenMarkers( + startMarker: Element, + endMarker: Element | undefined, + ): Node[] { + const nodes: Node[] = []; + let current: Node | null = startMarker.nextSibling; + + while (current && !shouldStopCollecting(current, endMarker)) { + if (shouldSkipNode(current)) { + current = current.nextSibling; + continue; + } + nodes.push(current); + current = current.nextSibling; + } + + return nodes; + } + + const verseMarkers = Array.from(doc.querySelectorAll('.yv-v[v]')); + verseMarkers.forEach(processVerseMarker); +} + +function assignFootnoteKeys(doc: Document): void { + let introIdx = 0; + doc.querySelectorAll('.yv-n.f').forEach((fn) => { + const verseNum = fn.closest('.yv-v[v]')?.getAttribute('v'); + fn.setAttribute(FOOTNOTE_KEY_ATTR, verseNum ?? `intro-${introIdx++}`); + }); +} + +function replaceFootnotesWithAnchors(doc: Document, footnotes: Element[]): void { + for (const fn of footnotes) { + const key = fn.getAttribute(FOOTNOTE_KEY_ATTR); + if (!key) continue; + + const prev = fn.previousSibling; + const next = fn.nextSibling; + + const prevText = prev?.textContent ?? ''; + const nextText = next?.textContent ?? ''; + + const prevNeedsSpace = prevText.length > 0 && !/\s$/.test(prevText); + const nextNeedsSpace = nextText.length > 0 && NEEDS_SPACE_BEFORE.test(nextText); + + if (prevNeedsSpace && nextNeedsSpace && fn.parentNode) { + fn.parentNode.insertBefore(doc.createTextNode(' '), fn); + } + + const anchor = doc.createElement('span'); + anchor.setAttribute('data-verse-footnote', key); + anchor.setAttribute('data-verse-footnote-content', fn.innerHTML); + fn.replaceWith(anchor); + } +} + +function addNbspToVerseLabels(doc: Document): void { + doc.querySelectorAll('.yv-vlbl').forEach((label) => { + const text = label.textContent || ''; + if (!text.endsWith(NON_BREAKING_SPACE)) { + label.textContent = text + NON_BREAKING_SPACE; + } + }); +} + +function fixIrregularTables(doc: Document): void { + doc.querySelectorAll('table').forEach((table) => { + const rows = table.querySelectorAll('tr'); + if (rows.length === 0) return; + + let maxColumns = 0; + rows.forEach((row) => { + let count = 0; + row.querySelectorAll('td, th').forEach((cell) => { + count += parseInt(cell.getAttribute('colspan') || '1', 10); + }); + maxColumns = Math.max(maxColumns, count); + }); + + if (maxColumns > 1) { + rows.forEach((row) => { + const cells = row.querySelectorAll('td, th'); + if (cells.length === 1) { + const existing = parseInt(cells[0]!.getAttribute('colspan') || '1', 10); + if (existing < maxColumns) { + cells[0]!.setAttribute('colspan', maxColumns.toString()); + } + } + }); + } + }); +} + +/** + * Transforms Bible HTML by cleaning up verse structure, extracting footnotes, + * and replacing them with self-contained anchor elements. + * + * Footnote data is embedded directly in the HTML via attributes: + * - `data-verse-footnote="KEY"` — the footnote key (verse number or `intro-N`) + * - `data-verse-footnote-content="HTML"` — the footnote's inner HTML content + * + * Verse context is available by walking up from a footnote anchor: + * `anchor.closest('.yv-v[v]')` returns the verse wrapper (null for intro footnotes). + * + * @param html - The raw Bible HTML from the YouVersion API + * @param options - DOM adapter options for parsing and serializing HTML + * @returns The transformed HTML + * + * @example + * ```ts + * import { transformBibleHtml } from '@youversion/platform-core'; + * + * const result = transformBibleHtml(rawHtml, { + * parseHtml: (html) => new DOMParser().parseFromString(html, 'text/html'), + * serializeHtml: (doc) => doc.body.innerHTML, + * }); + * + * console.log(result.html); // Clean HTML with self-contained footnote anchors + * ``` + */ +export function transformBibleHtml( + html: string, + options: TransformBibleHtmlOptions, +): TransformedBibleHtml { + const doc = options.parseHtml(html); + + wrapVerseContent(doc); + assignFootnoteKeys(doc); + + const footnotes = Array.from(doc.querySelectorAll('.yv-n.f')); + replaceFootnotesWithAnchors(doc, footnotes); + + addNbspToVerseLabels(doc); + fixIrregularTables(doc); + + const transformedHtml = options.serializeHtml(doc); + return { html: transformedHtml }; +} + +/** + * Transforms Bible HTML for browser environments using the native DOMParser API. + * + * @param html - The raw Bible HTML from the YouVersion API + * @returns The transformed HTML + */ +export function transformBibleHtmlForBrowser(html: string): TransformedBibleHtml { + if (typeof globalThis.DOMParser === 'undefined') { + return { html }; + } + + return transformBibleHtml(html, { + parseHtml: (h) => new DOMParser().parseFromString(h, 'text/html'), + serializeHtml: (doc) => doc.body.innerHTML, + }); +} + +/** + * Transforms Bible HTML for Node.js environments using linkedom. + * + * linkedom requires HTML to be wrapped in body tags for `doc.body.innerHTML` + * to work correctly, so this function handles that wrapping automatically. + * + * @param html - The raw Bible HTML from the YouVersion API + * @returns The transformed HTML + */ +export function transformBibleHtmlForNode(html: string): TransformedBibleHtml { + // eslint-disable-next-line @typescript-eslint/no-require-imports + const { DOMParser } = require('linkedom') as { + DOMParser: new () => { parseFromString(html: string, type: string): Document }; + }; + + return transformBibleHtml(html, { + parseHtml: (h: string) => + new DOMParser().parseFromString(`${h}`, 'text/html'), + serializeHtml: (doc: Document) => doc.body.innerHTML, + }); +} diff --git a/packages/core/src/bible.ts b/packages/core/src/bible.ts index 62a04b9f..918fdf3a 100644 --- a/packages/core/src/bible.ts +++ b/packages/core/src/bible.ts @@ -235,12 +235,18 @@ export class BibleClient { /** * Fetches a passage (range of verses) from the Bible using the passages endpoint. * This is the new API format that returns HTML-formatted content. + * + * Note: The HTML returned from the API contains inline footnote content that should + * be transformed before rendering. Use `transformBibleHtml()` or + * `transformBibleHtmlForBrowser()` to clean up the HTML and extract footnotes. + * * @param versionId The version ID. * @param usfm The USFM reference (e.g., "JHN.3.1-2", "GEN.1", "JHN.3.16"). * @param format The format to return ("html" or "text", default: "html"). * @param include_headings Whether to include headings in the content. * @param include_notes Whether to include notes in the content. * @returns The requested BiblePassage object with HTML content. + * * @example * ```ts * // Get a single verse @@ -251,6 +257,10 @@ export class BibleClient { * * // Get an entire chapter * const chapter = await bibleClient.getPassage(3034, "GEN.1"); + * + * // Transform HTML before rendering + * const passage = await bibleClient.getPassage(3034, "JHN.3.16", "html", true, true); + * const transformed = transformBibleHtmlForBrowser(passage.content); * ``` */ async getPassage( diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 15fb7d91..a5c01f95 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -15,3 +15,10 @@ export * from './YouVersionPlatformConfiguration'; export * from './types'; export * from './utils/constants'; export { getAdjacentChapter } from './getAdjacentChapter'; +export { + transformBibleHtml, + transformBibleHtmlForBrowser, + transformBibleHtmlForNode, + type TransformBibleHtmlOptions, + type TransformedBibleHtml, +} from './bible-html-transformer'; diff --git a/packages/ui/package.json b/packages/ui/package.json index 3f2e1eda..b3ac4eb6 100644 --- a/packages/ui/package.json +++ b/packages/ui/package.json @@ -53,7 +53,6 @@ "@youversion/platform-react-hooks": "workspace:*", "class-variance-authority": "0.7.1", "clsx": "2.1.1", - "isomorphic-dompurify": "2.23.0", "tailwind-merge": "3.3.1", "tw-animate-css": "1.4.0" }, diff --git a/packages/ui/src/components/bible-reader.stories.tsx b/packages/ui/src/components/bible-reader.stories.tsx index 015e7e48..a1bff3ca 100644 --- a/packages/ui/src/components/bible-reader.stories.tsx +++ b/packages/ui/src/components/bible-reader.stories.tsx @@ -677,9 +677,8 @@ export const VersionButtonLoadingStates: Story = { ), play: async () => { - // The version button should exist in the toolbar (label varies by loading state) - const versionButton = screen.getByRole('button', { name: /bible version/i }); - await expect(versionButton).toBeInTheDocument(); + // Wait for the toolbar to mount, then capture the button in loading state + const versionButton = await screen.findByRole('button', { name: /bible version/i }); // The delayed MSW handler guarantees the loading state is visible const spinner = versionButton.querySelector('[role="status"]'); diff --git a/packages/ui/src/components/verse.test.tsx b/packages/ui/src/components/verse.test.tsx index 3a7c127d..935d18e1 100644 --- a/packages/ui/src/components/verse.test.tsx +++ b/packages/ui/src/components/verse.test.tsx @@ -19,52 +19,6 @@ vi.mock('@youversion/platform-react-hooks', async () => { describe('Verse.Html - XSS Protection', () => { describe('DOMPurify sanitization', () => { - it('should remove script tags from HTML', async () => { - const maliciousHtml = '

Safe text

'; - - const { container } = render(); - - await waitFor(() => { - const scriptTags = container.querySelectorAll('script'); - expect(scriptTags).toHaveLength(0); - }); - }); - - it('should remove inline event handlers (onerror)', async () => { - const maliciousHtml = ''; - - const { container } = render(); - - await waitFor(() => { - const img = container.querySelector('img'); - expect(img).not.toHaveAttribute('onerror'); - }); - }); - - it('should remove inline event handlers (onclick)', async () => { - const maliciousHtml = '

Click me

'; - - const { container } = render(); - - await waitFor(() => { - const paragraph = container.querySelector('p'); - expect(paragraph).not.toBeNull(); - expect(paragraph?.getAttribute('onclick')).toBeNull(); - expect(paragraph?.textContent).toBe('Click me'); - }); - }); - - it('should remove javascript: URLs', async () => { - const maliciousHtml = 'Link'; - - const { container } = render(); - - await waitFor(() => { - const link = container.querySelector('a'); - expect(link).not.toHaveAttribute('href'); - }); - }); - it('should preserve safe HTML paragraph tags', async () => { const safeHtml = '

Safe Bible content

'; diff --git a/packages/ui/src/components/verse.tsx b/packages/ui/src/components/verse.tsx index 40ff1afc..21fbb4d0 100644 --- a/packages/ui/src/components/verse.tsx +++ b/packages/ui/src/components/verse.tsx @@ -16,21 +16,29 @@ import { Footnote } from '@/components/icons/footnote'; import { LoaderIcon } from '@/components/icons/loader'; import { Popover, PopoverContent, PopoverTrigger } from '@/components/ui/popover'; import { cn } from '@/lib/utils'; -import { - type FontFamily, - getFootnoteMarker, - transformBibleHtml, - type VerseNotes, -} from '@/lib/verse-html-utils'; +import { type FontFamily } from '@/lib/verse-html-utils'; +import { transformBibleHtmlForBrowser } from '@youversion/platform-core'; -type TransformedBibleHtml = { - html: string; - notes: Record; -}; +const LETTERS = 'abcdefghijklmnopqrstuvwxyz'; + +function getFootnoteMarker(index: number): string { + const base = LETTERS.length; + if (base === 0) return String(index + 1); + let value = index; + let marker = ''; + do { + marker = LETTERS[value % base] + marker; + value = Math.floor(value / base) - 1; + } while (value >= 0); + return marker; +} -type VerseFootnotePlaceholder = { +type VerseFootnoteData = { verseNum: string; el: Element; + notes: string[]; + verseHtml: string; + hasVerseContext: boolean; }; type PassageResult = ReturnType; @@ -41,20 +49,50 @@ export type BibleTextViewPassageState = { error: PassageResult['error']; }; +/** + * Builds verse HTML for the footnote popover by cloning verse wrappers from the live DOM. + * Strips headings and verse labels, replaces footnote anchors with superscript markers. + */ +function getVerseHtmlFromDom(container: HTMLElement, verseNum: string): string { + const wrappers = container.querySelectorAll(`.yv-v[v="${verseNum}"]`); + if (!wrappers.length) return ''; + + const parts: string[] = []; + let noteIdx = 0; + + wrappers.forEach((wrapper, i) => { + if (i > 0) parts.push(' '); + const clone = wrapper.cloneNode(true) as Element; + clone.querySelectorAll('.yv-h, .yv-vlbl').forEach((el) => el.remove()); + clone.querySelectorAll('[data-verse-footnote]').forEach((anchor) => { + const sup = document.createElement('sup'); + sup.className = 'yv:text-muted-foreground'; + sup.textContent = getFootnoteMarker(noteIdx++); + anchor.replaceWith(sup); + }); + parts.push(clone.innerHTML); + }); + + return parts.join(''); +} + const VerseFootnoteButton = memo(function VerseFootnoteButton({ verseNum, - verseNotes, + notes, + verseHtml, + hasVerseContext, reference, fontSize, theme, }: { verseNum: string; - verseNotes: VerseNotes; + notes: string[]; + verseHtml: string; + hasVerseContext: boolean; reference?: string; fontSize?: number; theme: 'light' | 'dark'; }) { - const { hasVerseContext } = verseNotes; const verseReference = reference ? `${reference}:${verseNum}` : `Verse ${verseNum}`; return ( @@ -78,13 +116,13 @@ const VerseFootnoteButton = memo(function VerseFootnoteButton({
)}
    - {verseNotes.notes.map((note, index) => { + {notes.map((note, index) => { const marker = getFootnoteMarker(index); return (
  • {marker}. - {/** biome-ignore lint/security/noDangerouslySetInnerHtml: HTML has been run through DOMPurify and is safe */} + {/** biome-ignore lint/security/noDangerouslySetInnerHtml: Bible footnote HTML comes from our YouVersion APIs and is safe */}
  • ); @@ -127,7 +165,6 @@ function VerseUnavailableMessage(): React.ReactElement { function BibleTextHtml({ html, - notes, reference, fontSize, theme, @@ -136,7 +173,6 @@ function BibleTextHtml({ highlightedVerses = {}, }: { html: string; - notes: Record; reference?: string; fontSize?: number; theme?: 'light' | 'dark'; @@ -145,23 +181,40 @@ function BibleTextHtml({ highlightedVerses?: Record; }) { const contentRef = useRef(null); - const [placeholders, setPlaceholders] = useState([]); + const [footnoteData, setFootnoteData] = useState([]); const providerTheme = useTheme(); const currentTheme = theme || providerTheme; - // Set innerHTML manually so the DOM nodes persist across renders - // (portals need stable element references). + // Set innerHTML and extract footnote data from the DOM. + // Portals need stable element references, so we set innerHTML manually. useLayoutEffect(() => { if (!contentRef.current) return; contentRef.current.innerHTML = html; const anchors = contentRef.current.querySelectorAll('[data-verse-footnote]'); - const result: VerseFootnotePlaceholder[] = []; + + // First pass: collect all notes per verse key + const notesByKey = new Map(); + anchors.forEach((el) => { + const verseNum = el.getAttribute('data-verse-footnote'); + if (!verseNum) return; + const content = el.getAttribute('data-verse-footnote-content') || ''; + const existing = notesByKey.get(verseNum); + if (existing) existing.push(content); + else notesByKey.set(verseNum, [content]); + }); + + // Second pass: create one entry per anchor (each anchor gets its own portal) + const result: VerseFootnoteData[] = []; anchors.forEach((el) => { const verseNum = el.getAttribute('data-verse-footnote'); - if (verseNum) result.push({ verseNum, el }); + if (!verseNum) return; + const allNotes = notesByKey.get(verseNum) || []; + const hasVerseContext = el.closest('.yv-v[v]') !== null; + const verseHtml = hasVerseContext ? getVerseHtmlFromDom(contentRef.current!, verseNum) : ''; + result.push({ verseNum, el, notes: allNotes, verseHtml, hasVerseContext }); }); - setPlaceholders(result); + setFootnoteData(result); }, [html]); // Toggle selected/highlighted classes on verse wrappers. @@ -192,21 +245,21 @@ function BibleTextHtml({ return ( <>
    - {placeholders.map(({ verseNum, el }, index) => { - const verseNotes = notes[verseNum]; - if (!verseNotes) return null; - return createPortal( + {footnoteData.map(({ verseNum, el, notes, verseHtml, hasVerseContext }, index) => + createPortal( , el, `${verseNum}-${index}`, - ); - })} + ), + )} ); } @@ -295,7 +348,7 @@ export const Verse = { }: VerseHtmlProps, ref, ): ReactNode => { - const transformedData = useMemo(() => transformBibleHtml(html), [html]); + const transformedHtml = useMemo(() => transformBibleHtmlForBrowser(html).html, [html]); const providerTheme = useTheme(); const currentTheme = theme || providerTheme; @@ -315,8 +368,7 @@ export const Verse = { data-selectable={onVerseSelect ? 'true' : 'false'} > { - it('should return notes keyed by "intro-0", "intro-1" for orphaned footnotes', () => { +describe('transformBibleHtmlForBrowser - intro chapter footnotes', () => { + it('should create data-verse-footnote anchors with intro keys for orphaned footnotes', () => { const html = `
    Some intro textFirst note and more textSecond note.
    `; - const result = transformBibleHtml(html); + const result = transformBibleHtmlForBrowser(html); - expect(result.notes['intro-0']).toBeDefined(); - expect(result.notes['intro-1']).toBeDefined(); - expect(Object.keys(result.notes)).toHaveLength(2); + expect(result.html).toContain('data-verse-footnote="intro-0"'); + expect(result.html).toContain('data-verse-footnote="intro-1"'); + expect(result.html).not.toContain('yv-n f'); }); - it('should set verseHtml to empty string for intro footnotes', () => { + it('should preserve footnote content in data-verse-footnote-content', () => { const html = `
    Text with aA footnote note.
    `; - const result = transformBibleHtml(html); + const result = transformBibleHtmlForBrowser(html); - expect(result.notes['intro-0']!.verseHtml).toBe(''); - expect(result.notes['intro-0']!.hasVerseContext).toBe(false); + expect(result.html).toContain('data-verse-footnote-content='); + expect(result.html).toContain('A footnote'); }); it('should extract correct note content for intro footnotes', () => { @@ -39,24 +39,9 @@ describe('transformBibleHtml - intro chapter footnotes', () => {
    `; - const result = transformBibleHtml(html); - - expect(result.notes['intro-0']!.notes).toHaveLength(1); - expect(result.notes['intro-0']!.notes[0]).toContain('See Rashi'); - }); - - it('should create data-verse-footnote anchors with intro keys in the output HTML', () => { - const html = ` -
    -
    BeforeNote A afterNote B.
    -
    - `; - - const result = transformBibleHtml(html); + const result = transformBibleHtmlForBrowser(html); - expect(result.html).toContain('data-verse-footnote="intro-0"'); - expect(result.html).toContain('data-verse-footnote="intro-1"'); - expect(result.html).not.toContain('yv-n f'); + expect(result.html).toContain('See Rashi'); }); it('should not interfere with regular verse footnotes when mixed', () => { @@ -69,17 +54,12 @@ describe('transformBibleHtml - intro chapter footnotes', () => {
`; - const result = transformBibleHtml(html); + const result = transformBibleHtmlForBrowser(html); - expect(result.notes['intro-0']).toBeDefined(); - expect(result.notes['intro-0']!.verseHtml).toBe(''); - expect(result.notes['intro-0']!.hasVerseContext).toBe(false); - expect(result.notes['intro-0']!.notes[0]).toContain('Intro note'); - - expect(result.notes['1']).toBeDefined(); - expect(result.notes['1']!.verseHtml).not.toBe(''); - expect(result.notes['1']!.hasVerseContext).toBe(true); - expect(result.notes['1']!.notes[0]).toContain('Verse note'); + expect(result.html).toContain('data-verse-footnote="intro-0"'); + expect(result.html).toContain('data-verse-footnote="1"'); + expect(result.html).toContain('Intro note'); + expect(result.html).toContain('Verse note'); }); it('should insert space when orphaned footnote is between two words', () => { @@ -89,7 +69,7 @@ describe('transformBibleHtml - intro chapter footnotes', () => { `; - const result = transformBibleHtml(html); + const result = transformBibleHtmlForBrowser(html); expect(result.html).toContain('overcome '); expect(result.html).not.toMatch(/overcome { `; - const result = transformBibleHtml(html); + const result = transformBibleHtmlForBrowser(html); expect(result.html).not.toContain('overcome .'); }); diff --git a/packages/ui/src/lib/verse-html-utils.ts b/packages/ui/src/lib/verse-html-utils.ts index 08a4a300..51839218 100644 --- a/packages/ui/src/lib/verse-html-utils.ts +++ b/packages/ui/src/lib/verse-html-utils.ts @@ -1,413 +1,3 @@ -import DOMPurify from 'isomorphic-dompurify'; - -const NON_BREAKING_SPACE = '\u00A0'; - -const LETTERS = 'abcdefghijklmnopqrstuvwxyz'; - -/** - * Converts a 0-based footnote index into an alphabetic marker. - * - * Examples with LETTERS = "abcdefghijklmnopqrstuvwxyz": - * 0 -> "a", 25 -> "z", 26 -> "aa", 27 -> "ab" - * - * This uses spreadsheet-style indexing and derives its base from - * LETTERS.length so there are no hardcoded numeric assumptions. - */ -export function getFootnoteMarker(index: number): string { - const base = LETTERS.length; - if (base === 0) return String(index + 1); - - let value = index; - let marker = ''; - - do { - marker = LETTERS[value % base] + marker; - value = Math.floor(value / base) - 1; - } while (value >= 0); - - return marker; -} - -export type VerseNotes = { - verseHtml: string; - notes: string[]; - hasVerseContext: boolean; -}; - export const INTER_FONT = '"Inter", sans-serif' as const; export const SOURCE_SERIF_FONT = '"Source Serif 4", serif' as const; export type FontFamily = typeof INTER_FONT | typeof SOURCE_SERIF_FONT | (string & {}); - -/** - * Wraps verse content in `yv-v` elements for easier CSS targeting. - * - * Transforms empty verse markers into wrapping containers. When a verse spans - * multiple paragraphs, creates duplicate wrappers in each paragraph (Bible.com pattern). - * - * Before: 1Text... - * After: 1Text... - * - * This enables simple CSS selectors like `.yv-v[v="1"] { background: yellow; }` - */ -function wrapVerseContent(doc: Document): void { - /** - * Wraps all content in a paragraph with a verse span. - */ - function wrapParagraphContent(doc: Document, paragraph: Element, verseNum: string): void { - const children = Array.from(paragraph.childNodes); - if (children.length === 0) return; - - const wrapper = doc.createElement('span'); - wrapper.className = 'yv-v'; - wrapper.setAttribute('v', verseNum); - - const firstChild = children[0]; - if (firstChild) { - paragraph.insertBefore(wrapper, firstChild); - } - children.forEach((child) => { - wrapper.appendChild(child); - }); - } - - /** - * Wraps paragraphs between startParagraph and an optional endParagraph boundary. - * If no endParagraph is provided, wraps until a verse marker is found or siblings are exhausted. - */ - function wrapParagraphsUntilBoundary( - doc: Document, - verseNum: string, - startParagraph: Element | null, - endParagraph?: Element | null, - ): void { - if (!startParagraph) return; - - let currentP: Element | null = startParagraph.nextElementSibling; - - while (currentP && currentP !== endParagraph) { - // Skip heading elements - these are structural, not verse content - // See iOS implementation: https://github.com/youversion/platform-sdk-swift/blob/main/Sources/YouVersionPlatformUI/Views/Rendering/BibleVersionRendering.swift - const isHeading = - currentP.classList.contains('yv-h') || - currentP.matches('.s1, .s2, .s3, .s4, .ms, .ms1, .ms2, .ms3, .ms4, .mr, .sp, .sr, .qa, .r'); - if (isHeading) { - currentP = currentP.nextElementSibling; - continue; - } - - if (currentP.querySelector('.yv-v[v]')) break; - - if ( - currentP.classList.contains('p') || - currentP.tagName === 'P' - ) { - wrapParagraphContent(doc, currentP, verseNum); - } - - currentP = currentP.nextElementSibling; - } - } - - function handleParagraphWrapping( - doc: Document, - currentParagraph: Element | null, - nextParagraph: Element | null, - verseNum: string, - ): void { - if (!currentParagraph) return; - - if (!nextParagraph) { - wrapParagraphsUntilBoundary(doc, verseNum, currentParagraph); - return; - } - - if (currentParagraph !== nextParagraph) { - wrapParagraphsUntilBoundary(doc, verseNum, currentParagraph, nextParagraph); - } - } - - function processVerseMarker(marker: Element, index: number, markers: Element[]): void { - const verseNum = marker.getAttribute('v'); - if (!verseNum) return; - - const nextMarker = markers[index + 1]; - - const nodesToWrap = collectNodesBetweenMarkers(marker, nextMarker); - if (nodesToWrap.length === 0) return; - - const currentParagraph = marker.closest('.p, p, div.p'); - const nextParagraph = nextMarker?.closest('.p, p, div.p') || null; - const doc = marker.ownerDocument; - - wrapNodesInVerse(marker, verseNum, nodesToWrap); - handleParagraphWrapping(doc, currentParagraph, nextParagraph, verseNum); - } - - function wrapNodesInVerse(marker: Element, verseNum: string, nodes: Node[]): void { - const wrapper = marker.ownerDocument.createElement('span'); - wrapper.className = 'yv-v'; - wrapper.setAttribute('v', verseNum); - - const firstNode = nodes[0]; - if (firstNode) { - marker.parentNode?.insertBefore(wrapper, firstNode); - } - - nodes.forEach((node) => { - wrapper.appendChild(node); - }); - marker.remove(); - } - - function shouldStopCollecting(node: Node, endMarker: Element | undefined): boolean { - if (node === endMarker) return true; - if (endMarker && node instanceof Element && node.contains(endMarker)) return true; - return false; - } - - function shouldSkipNode(node: Node): boolean { - return node instanceof Element && node.classList.contains('yv-h'); - } - - function collectNodesBetweenMarkers(startMarker: Element, endMarker: Element | undefined): Node[] { - const nodes: Node[] = []; - let current: Node | null = startMarker.nextSibling; - - while (current && !shouldStopCollecting(current, endMarker)) { - if (shouldSkipNode(current)) { - current = current.nextSibling; - continue; - } - nodes.push(current); - current = current.nextSibling; - } - - return nodes; - } - - const verseMarkers = Array.from(doc.querySelectorAll('.yv-v[v]')); - verseMarkers.forEach(processVerseMarker); -} - -/** - * Matches text that needs a space inserted before it (not whitespace or punctuation). - * Used when replacing footnotes to prevent word concatenation. - */ -const NEEDS_SPACE_BEFORE = /^[^\s.,;:!?)}\]'"»›]/; - -/** - * Builds the verse text shown inside the footnote popover. - * - * Works on clones of the verse wrappers so it never mutates the real DOM. - * Strips headings and labels, replaces each footnote with a superscript - * marker (a, b, … z, aa, ab, …). - */ -function buildVerseHtml(wrappers: Element[]): string { - const parts: string[] = []; - let noteIdx = 0; - - for (let i = 0; i < wrappers.length; i++) { - if (i > 0) parts.push(' '); - - const clone = wrappers[i]!.cloneNode(true) as Element; - const ownerDoc = wrappers[i]!.ownerDocument; - - // Remove structural elements that shouldn't appear in the popover. - clone.querySelectorAll('.yv-h, .yv-vlbl').forEach((el) => el.remove()); - - // Replace each footnote with a superscript marker. - clone.querySelectorAll('.yv-n.f').forEach((fn) => { - const marker = ownerDoc.createElement('sup'); - marker.className = 'yv:text-muted-foreground'; - marker.textContent = getFootnoteMarker(noteIdx++); - fn.replaceWith(marker); - }); - - parts.push(clone.innerHTML); - } - - return parts.join(''); -} - -/** - * Assigns a stable key to every footnote element in document order. - * - * Verse-bound footnotes get the verse number; orphaned footnotes (intro - * chapters with no `.yv-v[v]` ancestor) get synthetic keys `"intro-0"`, etc. - * Called once so both extraction and anchor replacement read the same key. - */ -const FOOTNOTE_KEY_ATTR = 'data-footnote-key'; - -function assignFootnoteKeys(doc: Document): void { - let introIdx = 0; - doc.querySelectorAll('.yv-n.f').forEach((fn) => { - const verseNum = fn.closest('.yv-v[v]')?.getAttribute('v'); - fn.setAttribute(FOOTNOTE_KEY_ATTR, verseNum ?? `intro-${introIdx++}`); - }); -} - -/** - * Replaces each footnote element in the real DOM with a clean anchor span - * that React portals can target. - * - * Also inserts a space when the removal of the footnote would cause two - * adjacent words to merge (e.g., "overcome" + "it" → "overcomeit"). - */ -function replaceFootnotesWithAnchors(doc: Document, footnotes: Element[]): void { - for (const fn of footnotes) { - const key = fn.getAttribute(FOOTNOTE_KEY_ATTR)!; - - const prev = fn.previousSibling; - const next = fn.nextSibling; - - const prevText = prev?.textContent ?? ''; - const nextText = next?.textContent ?? ''; - - const prevNeedsSpace = prevText.length > 0 && !/\s$/.test(prevText); - const nextNeedsSpace = nextText.length > 0 && NEEDS_SPACE_BEFORE.test(nextText); - - if (prevNeedsSpace && nextNeedsSpace && fn.parentNode) { - fn.parentNode.insertBefore(doc.createTextNode(' '), fn); - } - - const anchor = doc.createElement('span'); - anchor.setAttribute('data-verse-footnote', key); - fn.replaceWith(anchor); - } -} - -/** - * Extracts footnotes from wrapped verse HTML and prepares data for footnote popovers. - * - * Assumes verses are already wrapped in `.yv-v[v]` elements (by wrapVerseContent) - * and footnote keys assigned (by assignFootnoteKeys). - * - * Two-phase approach: - * 1. Build popover data (verseHtml + note content) using cloned DOM — no side effects. - * 2. Replace footnotes in the real DOM with clean anchor spans for React portals. - * - * @returns Notes data for popovers, keyed by verse number (or synthetic intro key). - */ -function extractNotesFromWrappedHtml(doc: Document): Record { - const footnotes = Array.from(doc.querySelectorAll('.yv-n.f')); - if (!footnotes.length) return {}; - - // Group footnotes by their assigned key. - const footnotesByKey = new Map(); - for (const fn of footnotes) { - const key = fn.getAttribute(FOOTNOTE_KEY_ATTR)!; - let arr = footnotesByKey.get(key); - if (!arr) { - arr = []; - footnotesByKey.set(key, arr); - } - arr.push(fn); - } - - // Build verse-wrapper lookup. - const wrappersByVerse = new Map(); - doc.querySelectorAll('.yv-v[v]').forEach((el) => { - const verseNum = el.getAttribute('v'); - if (!verseNum) return; - const arr = wrappersByVerse.get(verseNum); - if (arr) arr.push(el); - else wrappersByVerse.set(verseNum, [el]); - }); - - // Phase 1: Extract data (cloned DOM — no mutations). - const notes: Record = {}; - for (const [key, fns] of footnotesByKey) { - const wrappers = wrappersByVerse.get(key); - notes[key] = { - verseHtml: wrappers ? buildVerseHtml(wrappers) : '', - notes: fns.map((fn) => fn.innerHTML), - hasVerseContext: !!wrappers, - }; - } - - // Phase 2: Replace footnotes with portal anchors (real DOM mutation). - replaceFootnotesWithAnchors(doc, footnotes); - - return notes; -} - -/** - * Adds non-breaking space after verse labels for better copy/paste - * (e.g., "3For God so loved..." → "3 For God so loved..."). - */ -function addNbspToVerseLabels(doc: Document): void { - doc.querySelectorAll('.yv-vlbl').forEach((label) => { - const text = label.textContent || ''; - if (!text.endsWith(NON_BREAKING_SPACE)) { - label.textContent = text + NON_BREAKING_SPACE; - } - }); -} - -/** - * Fixes irregular tables by adding colspan to single-cell rows in multi-column tables. - * (e.g., https://www.bible.com/bible/111/EZR.2.NIV) - */ -function fixIrregularTables(doc: Document): void { - doc.querySelectorAll('table').forEach((table) => { - const rows = table.querySelectorAll('tr'); - if (rows.length === 0) return; - - let maxColumns = 0; - rows.forEach((row) => { - let count = 0; - row.querySelectorAll('td, th').forEach((cell) => { - count += - cell instanceof HTMLTableCellElement - ? parseInt(cell.getAttribute('colspan') || '1', 10) - : 1; - }); - maxColumns = Math.max(maxColumns, count); - }); - - if (maxColumns > 1) { - rows.forEach((row) => { - const cells = row.querySelectorAll('td, th'); - if (cells.length === 1 && cells[0] instanceof HTMLTableCellElement) { - const existing = parseInt(cells[0].getAttribute('colspan') || '1', 10); - if (existing < maxColumns) { - cells[0].setAttribute('colspan', maxColumns.toString()); - } - } - }); - } - }); -} - -const DOMPURIFY_CONFIG = { - ALLOWED_ATTR: ['class', 'style', 'id', 'v', 'usfm'], - ALLOW_DATA_ATTR: true, -}; - -/** - * Full transformation pipeline for Bible HTML from the API. - * - * 1. Sanitize (DOMPurify) - * 2. Wrap verse content in selectable spans - * 3. Extract footnotes and replace with portal anchors - * 4. Add non-breaking spaces to verse labels - * 5. Fix irregular table layouts - */ -export function transformBibleHtml(html: string): { html: string; notes: Record } { - if (typeof window === 'undefined' || !('DOMParser' in window)) { - return { html, notes: {} }; - } - - const doc = new DOMParser().parseFromString( - DOMPurify.sanitize(html, DOMPURIFY_CONFIG), - 'text/html', - ); - - wrapVerseContent(doc); - assignFootnoteKeys(doc); - const notes = extractNotesFromWrappedHtml(doc); - addNbspToVerseLabels(doc); - fixIrregularTables(doc); - - return { html: doc.body.innerHTML, notes }; -} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index aea61f21..d14d74ae 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -136,6 +136,9 @@ importers: packages/core: dependencies: + linkedom: + specifier: ^0.18.12 + version: 0.18.12 zod: specifier: 4.1.12 version: 4.1.12 @@ -249,9 +252,6 @@ importers: clsx: specifier: 2.1.1 version: 2.1.1 - isomorphic-dompurify: - specifier: 2.23.0 - version: 2.23.0 react: specifier: 19.1.2 version: 19.1.2 @@ -2849,9 +2849,6 @@ packages: '@types/statuses@2.0.6': resolution: {integrity: sha512-xMAgYwceFhRA2zY+XbEA7mxYbA093wdiW8Vu6gZPGWy9cmOyU9XesH1tNcEWsKFd5Vzrqx5T3D38PWx1FIIXkA==} - '@types/trusted-types@2.0.7': - resolution: {integrity: sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw==} - '@types/validate-npm-package-name@4.0.2': resolution: {integrity: sha512-lrpDziQipxCEeK5kWxvljWYhUvOiB2A9izZd9B2AFarYAkqZshb4lPbRs7zKEic6eGtH8V/2qJW+dPp9OtF6bw==} @@ -3249,6 +3246,9 @@ packages: resolution: {integrity: sha512-oP5VkATKlNwcgvxi0vM0p/D3n2C3EReYVX+DNYs5TjZFn/oQt2j+4sVJtSMr18pdRr8wjTcBl6LoV+FUwzPmNA==} engines: {node: '>=18'} + boolbase@1.0.0: + resolution: {integrity: sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==} + brace-expansion@1.1.12: resolution: {integrity: sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==} @@ -3468,10 +3468,17 @@ packages: resolution: {integrity: sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==} engines: {node: '>= 8'} + css-select@5.2.2: + resolution: {integrity: sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw==} + css-tree@3.1.0: resolution: {integrity: sha512-0eW44TGN5SQXU1mWSkKwFstI/22X2bG1nYzZTYMAWjylYURhse752YgbE4Cx46AC+bAvI+/dYTPRk1LqSUnu6w==} engines: {node: ^10 || ^12.20.0 || ^14.13.0 || >=15.0.0} + css-what@6.2.2: + resolution: {integrity: sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA==} + engines: {node: '>= 6'} + css.escape@1.5.1: resolution: {integrity: sha512-YUifsXXuknHlUsmlgyY0PKzgPOr7/FjCePfHNt0jxm83wHZi44VDMQ7/fGNkjY3/jV1MC+1CmZbaHzugyeRtpg==} @@ -3480,6 +3487,9 @@ packages: engines: {node: '>=4'} hasBin: true + cssom@0.5.0: + resolution: {integrity: sha512-iKuQcq+NdHqlAcwUY0o/HL69XQrUaQdMjmStJ8JFmUaiiQErlhrmuigkg/CU4E2J0IyUKUrMAgl36TvN67MqTw==} + cssstyle@4.6.0: resolution: {integrity: sha512-2z+rWdzbbSZv6/rhtvzvqeZQHrBaqgogqt85sqFNbabZOuFbCVFb8kPeEtZjiKkbrm395irpNKiYeFeLiQnFPg==} engines: {node: '>=18'} @@ -3625,8 +3635,18 @@ packages: dom-accessibility-api@0.6.3: resolution: {integrity: sha512-7ZgogeTnjuHbo+ct10G9Ffp0mif17idi0IyWNVA/wcwcm7NPOD/WEHVP3n7n3MhXqxoIYm8d6MuZohYWIZ4T3w==} - dompurify@3.3.0: - resolution: {integrity: sha512-r+f6MYR1gGN1eJv0TVQbhA7if/U7P87cdPl3HN5rikqaBSBxLiCb/b9O+2eG0cxz0ghyU+mU1QkbsOwERMYlWQ==} + dom-serializer@2.0.0: + resolution: {integrity: sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==} + + domelementtype@2.3.0: + resolution: {integrity: sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==} + + domhandler@5.0.3: + resolution: {integrity: sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==} + engines: {node: '>= 4'} + + domutils@3.2.2: + resolution: {integrity: sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==} dotenv-cli@7.4.2: resolution: {integrity: sha512-SbUj8l61zIbzyhIbg0FwPJq6+wjbzdn9oEtozQpZ6kW2ihCcapKVZj49oCT3oPM+mgQm+itgvUQcG5szxVrZTA==} @@ -3697,10 +3717,18 @@ packages: resolution: {integrity: sha512-rRqJg/6gd538VHvR3PSrdRBb/1Vy2YfzHqzvbhGIQpDRKIa4FgV/54b5Q1xYSxOOwKvjXweS26E0Q+nAMwp2pQ==} engines: {node: '>=8.6'} + entities@4.5.0: + resolution: {integrity: sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==} + engines: {node: '>=0.12'} + entities@6.0.1: resolution: {integrity: sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==} engines: {node: '>=0.12'} + entities@7.0.1: + resolution: {integrity: sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA==} + engines: {node: '>=0.12'} + env-paths@2.2.1: resolution: {integrity: sha512-+h1lkLKhZMTYjog1VEpJNG7NZJWcuc2DDk/qsqSTRRCOXiLjeQ1d1/udrUGhqMxUgAlwKNZ0cf2uqan5GLuS2A==} engines: {node: '>=6'} @@ -4211,6 +4239,12 @@ packages: html-escaper@2.0.2: resolution: {integrity: sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==} + html-escaper@3.0.3: + resolution: {integrity: sha512-RuMffC89BOWQoY0WKGpIhn5gX3iI54O6nRA0yC124NYVtzjmFWBIiFd8M0x+ZdX0P9R4lADg1mgP8C7PxGOWuQ==} + + htmlparser2@10.1.0: + resolution: {integrity: sha512-VTZkM9GWRAtEpveh7MSF6SjjrpNVNNVJfFup7xTY3UpFtm67foy9HDVXneLtFVt4pMz5kZtgNcvCniNFb1hlEQ==} + http-errors@2.0.1: resolution: {integrity: sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ==} engines: {node: '>= 0.8'} @@ -4483,10 +4517,6 @@ packages: resolution: {integrity: sha512-6B3tLtFqtQS4ekarvLVMZ+X+VlvQekbe4taUkf/rhVO3d/h0M2rfARm/pXLcPEsjjMsFgrFgSrhQIxcSVrBz8w==} engines: {node: '>=18'} - isomorphic-dompurify@2.23.0: - resolution: {integrity: sha512-f9w5fPJwlu+VK1uowFy4eWYgd7uxl0nQJbtorGp1OAs6JeY1qPkBQKNee1RXrnr68GqZ86PwQ6LF/5rW1TrOZQ==} - engines: {node: '>=18'} - istanbul-lib-coverage@3.2.2: resolution: {integrity: sha512-O8dpsF+r0WV/8MNRKfnmrtCWhuKjxrq2w+jpzBL5UZKTi2LeVWnWOmWRxFlesJONmc+wLAGvKQZEOanko0LFTg==} engines: {node: '>=8'} @@ -4559,15 +4589,6 @@ packages: canvas: optional: true - jsdom@26.1.0: - resolution: {integrity: sha512-Cvc9WUhxSMEo4McES3P7oK3QaXldCfNWp7pl2NNeiIFlCoLr3kfq9kb1fxftiwk1FLV7CvpvDfonxtzUDeSOPg==} - engines: {node: '>=18'} - peerDependencies: - canvas: ^3.0.0 - peerDependenciesMeta: - canvas: - optional: true - jsdom@27.0.1: resolution: {integrity: sha512-SNSQteBL1IlV2zqhwwolaG9CwhIhTvVHWg3kTss/cLE7H/X4644mtPQqYvCfsSrGQWt9hSZcgOXX8bOZaMN+kA==} engines: {node: '>=20'} @@ -4785,6 +4806,15 @@ packages: lines-and-columns@1.2.4: resolution: {integrity: sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==} + linkedom@0.18.12: + resolution: {integrity: sha512-jalJsOwIKuQJSeTvsgzPe9iJzyfVaEJiEXl+25EkKevsULHvMJzpNqwvj1jOESWdmgKDiXObyjOYwlUqG7wo1Q==} + engines: {node: '>=16'} + peerDependencies: + canvas: '>= 2' + peerDependenciesMeta: + canvas: + optional: true + lint-staged@16.2.5: resolution: {integrity: sha512-o36wH3OX0jRWqDw5dOa8a8x6GXTKaLM+LvhRaucZxez0IxA+KNDUCiyjBfNgsMNmchwSX6urLSL7wShcUqAang==} engines: {node: '>=20.17'} @@ -5052,6 +5082,9 @@ packages: resolution: {integrity: sha512-9qny7Z9DsQU8Ou39ERsPU4OZQlSTP47ShQzuKZ6PRXpYLtIFgl/DEBYEXKlvcEa+9tHVcK8CF81Y2V72qaZhWA==} engines: {node: '>=18'} + nth-check@2.1.1: + resolution: {integrity: sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==} + nwsapi@2.2.22: resolution: {integrity: sha512-ujSMe1OWVn55euT1ihwCI1ZcAaAU3nxUiDwfDQldc51ZXaB9m2AyOn6/jh1BLe2t/G8xd6uKG1UBF2aZJeg2SQ==} @@ -5906,16 +5939,9 @@ packages: resolution: {integrity: sha512-azl+t0z7pw/z958Gy9svOTuzqIk6xq+NSheJzn5MMWtWTFywIacg2wUlzKFGtt3cthx0r2SxMK0yzJOR0IES7Q==} engines: {node: '>=14.0.0'} - tldts-core@6.1.86: - resolution: {integrity: sha512-Je6p7pkk+KMzMv2XXKmAE3McmolOQFdxkKw0R8EYNr7sELW46JqnNeTX8ybPiQgvg1ymCoF8LXs5fzFaZvJPTA==} - tldts-core@7.0.17: resolution: {integrity: sha512-DieYoGrP78PWKsrXr8MZwtQ7GLCUeLxihtjC1jZsW1DnvSMdKPitJSe8OSYDM2u5H6g3kWJZpePqkp43TfLh0g==} - tldts@6.1.86: - resolution: {integrity: sha512-WMi/OQ2axVTf/ykqCQgXiIct+mSQDFdH2fkwhPwgEwvJ1kSzZRiinb0zF2Xb8u4+OqPChmyI6MEu4EezNJz+FQ==} - hasBin: true - tldts@7.0.17: resolution: {integrity: sha512-Y1KQBgDd/NUc+LfOtKS6mNsC9CCaH+m2P1RoIZy7RAPo3C3/t8X45+zgut31cRZtZ3xKPjfn3TkGTrctC2TQIQ==} hasBin: true @@ -5936,10 +5962,6 @@ packages: resolution: {integrity: sha512-Loo5UUvLD9ScZ6jh8beX1T6sO1w2/MpCRpEP7V280GKMVUQ0Jzar2U3UJPsrdbziLEMMhu3Ujnq//rhiFuIeag==} engines: {node: '>=6'} - tough-cookie@5.1.2: - resolution: {integrity: sha512-FVDYdxtnj0G6Qm/DhNPSb8Ju59ULcup3tuJxkFb5K8Bv2pUXILbf0xZWU8PX8Ov19OXljbUyveOFwRMwkXzO+A==} - engines: {node: '>=16'} - tough-cookie@6.0.0: resolution: {integrity: sha512-kXuRi1mtaKMrsLUxz3sQYvVl37B0Ns6MzfrtV5DvJceE9bPyspOqk9xxv7XbZWcfLWbFmm997vl83qUWVJA64w==} engines: {node: '>=16'} @@ -6108,6 +6130,9 @@ packages: ufo@1.6.1: resolution: {integrity: sha512-9a4/uxlTWJ4+a5i0ooc1rU7C7YOw3wT+UGqdeNNHWnOF9qcMBgLRS+4IYUqbczewFx4mLEig6gawh7X6mFlEkA==} + uhyphen@0.2.0: + resolution: {integrity: sha512-qz3o9CHXmJJPGBdqzab7qAYuW8kQGKNEuoHFYrBwV6hWIMcpAmxDLXojcHfFr9US1Pe6zUswEIJIbLI610fuqA==} + unbox-primitive@1.1.0: resolution: {integrity: sha512-nWJ91DjeOkej/TA8pXQ3myruKpKEYgqvpw9lz4OPHj/NWFNluYrjbz9j01CJ8yKQd2g4jFoOkINCTW2I5LEEyw==} engines: {node: '>= 0.4'} @@ -9082,9 +9107,6 @@ snapshots: '@types/statuses@2.0.6': {} - '@types/trusted-types@2.0.7': - optional: true - '@types/validate-npm-package-name@4.0.2': {} '@typescript-eslint/eslint-plugin@8.46.2(@typescript-eslint/parser@8.46.2(eslint@8.57.1)(typescript@5.9.2))(eslint@8.57.1)(typescript@5.9.2)': @@ -9710,6 +9732,8 @@ snapshots: transitivePeerDependencies: - supports-color + boolbase@1.0.0: {} + brace-expansion@1.1.12: dependencies: balanced-match: 1.0.2 @@ -9907,15 +9931,27 @@ snapshots: shebang-command: 2.0.0 which: 2.0.2 + css-select@5.2.2: + dependencies: + boolbase: 1.0.0 + css-what: 6.2.2 + domhandler: 5.0.3 + domutils: 3.2.2 + nth-check: 2.1.1 + css-tree@3.1.0: dependencies: mdn-data: 2.12.2 source-map-js: 1.2.1 + css-what@6.2.2: {} + css.escape@1.5.1: {} cssesc@3.0.0: {} + cssom@0.5.0: {} + cssstyle@4.6.0: dependencies: '@asamuzakjp/css-color': 3.2.0 @@ -10032,9 +10068,23 @@ snapshots: dom-accessibility-api@0.6.3: {} - dompurify@3.3.0: - optionalDependencies: - '@types/trusted-types': 2.0.7 + dom-serializer@2.0.0: + dependencies: + domelementtype: 2.3.0 + domhandler: 5.0.3 + entities: 4.5.0 + + domelementtype@2.3.0: {} + + domhandler@5.0.3: + dependencies: + domelementtype: 2.3.0 + + domutils@3.2.2: + dependencies: + dom-serializer: 2.0.0 + domelementtype: 2.3.0 + domhandler: 5.0.3 dotenv-cli@7.4.2: dependencies: @@ -10097,8 +10147,12 @@ snapshots: ansi-colors: 4.1.3 strip-ansi: 6.0.1 + entities@4.5.0: {} + entities@6.0.1: {} + entities@7.0.1: {} + env-paths@2.2.1: {} environment@1.1.0: {} @@ -10894,6 +10948,15 @@ snapshots: html-escaper@2.0.2: {} + html-escaper@3.0.3: {} + + htmlparser2@10.1.0: + dependencies: + domelementtype: 2.3.0 + domhandler: 5.0.3 + domutils: 3.2.2 + entities: 7.0.1 + http-errors@2.0.1: dependencies: depd: 2.0.0 @@ -11127,16 +11190,6 @@ snapshots: isexe@3.1.5: {} - isomorphic-dompurify@2.23.0: - dependencies: - dompurify: 3.3.0 - jsdom: 26.1.0 - transitivePeerDependencies: - - bufferutil - - canvas - - supports-color - - utf-8-validate - istanbul-lib-coverage@3.2.2: {} istanbul-lib-instrument@4.0.3: @@ -11245,33 +11298,6 @@ snapshots: - supports-color - utf-8-validate - jsdom@26.1.0: - dependencies: - cssstyle: 4.6.0 - data-urls: 5.0.0 - decimal.js: 10.6.0 - html-encoding-sniffer: 4.0.0 - http-proxy-agent: 7.0.2 - https-proxy-agent: 7.0.6 - is-potential-custom-element-name: 1.0.1 - nwsapi: 2.2.22 - parse5: 7.3.0 - rrweb-cssom: 0.8.0 - saxes: 6.0.0 - symbol-tree: 3.2.4 - tough-cookie: 5.1.2 - w3c-xmlserializer: 5.0.0 - webidl-conversions: 7.0.0 - whatwg-encoding: 3.1.1 - whatwg-mimetype: 4.0.0 - whatwg-url: 14.2.0 - ws: 8.18.3 - xml-name-validator: 5.0.0 - transitivePeerDependencies: - - bufferutil - - supports-color - - utf-8-validate - jsdom@27.0.1(postcss@8.5.6): dependencies: '@asamuzakjp/dom-selector': 6.7.3 @@ -11465,6 +11491,14 @@ snapshots: lines-and-columns@1.2.4: {} + linkedom@0.18.12: + dependencies: + css-select: 5.2.2 + cssom: 0.5.0 + html-escaper: 3.0.3 + htmlparser2: 10.1.0 + uhyphen: 0.2.0 + lint-staged@16.2.5: dependencies: commander: 14.0.1 @@ -11710,6 +11744,10 @@ snapshots: path-key: 4.0.0 unicorn-magic: 0.3.0 + nth-check@2.1.1: + dependencies: + boolbase: 1.0.0 + nwsapi@2.2.22: {} object-assign@4.1.1: {} @@ -12725,14 +12763,8 @@ snapshots: tinyspy@4.0.4: {} - tldts-core@6.1.86: {} - tldts-core@7.0.17: {} - tldts@6.1.86: - dependencies: - tldts-core: 6.1.86 - tldts@7.0.17: dependencies: tldts-core: 7.0.17 @@ -12752,10 +12784,6 @@ snapshots: universalify: 0.2.0 url-parse: 1.5.10 - tough-cookie@5.1.2: - dependencies: - tldts: 6.1.86 - tough-cookie@6.0.0: dependencies: tldts: 7.0.17 @@ -12969,6 +12997,8 @@ snapshots: ufo@1.6.1: {} + uhyphen@0.2.0: {} + unbox-primitive@1.1.0: dependencies: call-bound: 1.0.4