diff --git a/package.json b/package.json index 5f6a018..399bd5b 100644 --- a/package.json +++ b/package.json @@ -39,6 +39,7 @@ "@tailwindcss/vite": "^4.1.18", "astro": "^5.17.1", "astro-custom-toc": "^3.0.2", + "rehype-parse": "^9.0.1", "rehype-sanitize": "^6.0.0", "rehype-slug": "^6.0.0", "rehype-stringify": "^10.0.1", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 34de533..96a0ea7 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -32,6 +32,9 @@ importers: astro-custom-toc: specifier: ^3.0.2 version: 3.0.2 + rehype-parse: + specifier: ^9.0.1 + version: 9.0.1 rehype-sanitize: specifier: ^6.0.0 version: 6.0.0 diff --git a/src/lib/content/types.ts b/src/lib/content/types.ts index 2a5d2f7..270a712 100644 --- a/src/lib/content/types.ts +++ b/src/lib/content/types.ts @@ -28,7 +28,7 @@ export interface BlogPost { tags: string[]; categories: string[]; category?: { name: string; slug: string } | undefined; - content: string; + content: string; // HTML (CMS) or Markdown (local) rendered?: { html: string; headings: BlogPostHeading[] } | undefined; seo?: BlogPostSeo | undefined; sitemapEligible: boolean; diff --git a/src/lib/directus/markdown.ts b/src/lib/directus/markdown.ts index a4a7600..6aabf76 100644 --- a/src/lib/directus/markdown.ts +++ b/src/lib/directus/markdown.ts @@ -2,6 +2,7 @@ import { unified } from 'unified'; import remarkParse from 'remark-parse'; import remarkGfm from 'remark-gfm'; import remarkRehype from 'remark-rehype'; +import rehypeParse from 'rehype-parse'; import rehypeSanitize from 'rehype-sanitize'; import rehypeSlug from 'rehype-slug'; import rehypeStringify from 'rehype-stringify'; @@ -64,6 +65,21 @@ const sanitizationSchema: Parameters[0] & object = { }, }; +const blogHtmlSanitizationSchema: Parameters[0] & object = { + strip: ['script', 'style'], + tagNames: [ + ...sanitizationSchema.tagNames!, + 'iframe', 'mark', 'aside', + ], + attributes: { + ...sanitizationSchema.attributes, + '*': ['id', 'className', 'style'], + iframe: ['src', 'title', 'width', 'height', 'frameBorder', 'allowFullScreen', 'loading', 'allow'], + img: ['src', 'alt', 'title', 'width', 'height', 'loading'], + }, + protocols: sanitizationSchema.protocols, +}; + function extractHeadings(tree: Root): Heading[] { const headings: Heading[] = []; @@ -123,3 +139,23 @@ export async function renderMarkdown( headings, }; } + +const htmlProcessor = unified() + .use(rehypeParse, { fragment: true }) + .use(rehypeSanitize, blogHtmlSanitizationSchema) + .use(rehypeSlug) + .use(rehypeStringify); + +export async function renderHtml( + content: string +): Promise<{ html: string; headings: Heading[] }> { + const hast = htmlProcessor.parse(content); + const processed = await htmlProcessor.run(hast); + const headings = extractHeadings(processed as Root); + const html = htmlProcessor.stringify(processed as Parameters[0]); + + return { + html: String(html), + headings, + }; +} diff --git a/src/lib/directus/normalize.ts b/src/lib/directus/normalize.ts index 37d9061..4e2b942 100644 --- a/src/lib/directus/normalize.ts +++ b/src/lib/directus/normalize.ts @@ -1,7 +1,7 @@ import type { DirectusBlogArticle, DirectusBlogCategory } from './types.js'; import type { BlogPost, BlogCategory, ImageMeta } from '../content/types.js'; import { resolveAssetUrl, assertNoTokenLeakage } from './assets.js'; -import { renderMarkdown } from './markdown.js'; +import { renderHtml } from './markdown.js'; import { logger } from './logger.js'; function resolveImage( @@ -45,7 +45,7 @@ export async function normalizeArticle(raw: DirectusBlogArticle): Promise ({ })); vi.mock('../../src/lib/directus/markdown', () => ({ - renderMarkdown: vi.fn(), + renderHtml: vi.fn(), })); vi.mock('../../src/lib/directus/logger', () => ({ @@ -15,7 +15,7 @@ vi.mock('../../src/lib/directus/logger', () => ({ })); import { resolveAssetUrl, assertNoTokenLeakage } from '../../src/lib/directus/assets'; -import { renderMarkdown } from '../../src/lib/directus/markdown'; +import { renderHtml } from '../../src/lib/directus/markdown'; import { logger } from '../../src/lib/directus/logger'; import { normalizeArticle, @@ -24,7 +24,7 @@ import { } from '../../src/lib/directus/normalize'; const mockResolveAssetUrl = resolveAssetUrl as ReturnType; -const mockRenderMarkdown = renderMarkdown as ReturnType; +const mockRenderHtml = renderHtml as ReturnType; const mockAssertNoTokenLeakage = assertNoTokenLeakage as ReturnType; const mockLoggerWarn = logger.warn as ReturnType; @@ -32,7 +32,7 @@ describe('CMS data normalization', () => { beforeEach(() => { vi.clearAllMocks(); mockResolveAssetUrl.mockReturnValue(null); - mockRenderMarkdown.mockResolvedValue({ html: '

rendered

', headings: [] }); + mockRenderHtml.mockResolvedValue({ html: '

rendered

', headings: [] }); }); describe('normalizeArticle', () => { @@ -101,13 +101,13 @@ describe('CMS data normalization', () => { it('renders content via markdown pipeline', async () => { console.log('[TEST:normalize] markdown rendering'); - mockRenderMarkdown.mockResolvedValue({ + mockRenderHtml.mockResolvedValue({ html: '

Hello

', headings: [{ depth: 2, slug: 'hello', text: 'Hello' }], }); const raw = createArticle({ content: '## Hello' }); const post = await normalizeArticle(raw); - expect(mockRenderMarkdown).toHaveBeenCalledWith('## Hello'); + expect(mockRenderHtml).toHaveBeenCalledWith('## Hello'); expect(post.rendered).toEqual({ html: '

Hello

', headings: [{ depth: 2, slug: 'hello', text: 'Hello' }], @@ -127,7 +127,7 @@ describe('CMS data normalization', () => { const post = await normalizeArticle(raw); expect(post.content).toBe(''); expect(post.rendered).toBeUndefined(); - expect(mockRenderMarkdown).not.toHaveBeenCalled(); + expect(mockRenderHtml).not.toHaveBeenCalled(); }); it('maps featured_image_file via resolveAssetUrl', async () => {