Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions src/offload/storage.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import { describe, expect, it } from "vitest";

import { sanitizeText } from "./storage.js";

describe("sanitizeText", () => {
it("preserves plain ASCII", () => {
expect(sanitizeText("hello world")).toBe("hello world");
});

it("preserves emoji and other non-BMP code points", () => {
// 🎉 = U+1F389, 𠮷 = U+20BB7 (CJK Extension B), 𝐀 = U+1D400 (math bold A).
// Each is a surrogate pair in UTF-16. Without the `u` flag, the
// [\uD800-\uDFFF] range in UNSAFE_CHAR_RE would strip each half
// independently and silently destroy these characters.
expect(sanitizeText("emoji \u{1F389} here")).toBe("emoji \u{1F389} here");
expect(sanitizeText("CJK ext-B \u{20BB7} here")).toBe(
"CJK ext-B \u{20BB7} here",
);
expect(sanitizeText("math bold \u{1D400} here")).toBe(
"math bold \u{1D400} here",
);
});

it("strips lone (malformed) surrogates", () => {
expect(sanitizeText("lone \uD800 surrogate")).toBe("lone surrogate");
expect(sanitizeText("lone \uDC00 surrogate")).toBe("lone surrogate");
});

it("strips C0 and C1 control characters", () => {
expect(sanitizeText("ctrlhere")).toBe("ctrlhere");
expect(sanitizeText("c1…here")).toBe("c1here");
});

it("strips zero-width characters and BOM", () => {
expect(sanitizeText("a​b")).toBe("ab");
expect(sanitizeText("ab")).toBe("ab");
});

it("returns non-string input unchanged", () => {
// Matches the existing typeof guard in sanitizeText.
expect(sanitizeText(42 as unknown as string)).toBe(42);
});
});
2 changes: 1 addition & 1 deletion src/offload/storage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ export async function listRegisteredSessions(
// ─── JSONL Defense Layer ─────────────────────────────────────────────────────

const UNSAFE_CHAR_RE =
/[\uFFFD\u0000-\u0008\u000B\u000C\u000E-\u001F\u0080-\u009F\uD800-\uDFFF\u200B-\u200F\u2028\u2029\uFEFF]/g;
/[\uFFFD\u0000-\u0008\u000B\u000C\u000E-\u001F\u0080-\u009F\uD800-\uDFFF\u200B-\u200F\u2028\u2029\uFEFF]/gu;

/** Layer 0 — Source text sanitize. Strips unsafe characters from arbitrary text. */
export function sanitizeText(text: string): string {
Expand Down