Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
130 commits
Select commit Hold shift + click to select a range
400a55d
feat: add anonymize core crate
jan-kubica Jun 24, 2026
5c744fd
fix: satisfy stable clippy
jan-kubica Jun 24, 2026
c8d7e47
feat: add core search layer
jan-kubica Jun 24, 2026
235f8ab
chore: allow internal core crate license checks
jan-kubica Jun 24, 2026
1d202ab
fix: harden core redaction contracts
jan-kubica Jun 24, 2026
7801b4f
docs: clarify core contracts
jan-kubica Jun 24, 2026
c2ff1a3
fix: normalize identifier cues
jan-kubica Jun 24, 2026
0abb52f
feat: add core entity resolution
jan-kubica Jun 24, 2026
1508c84
feat: add core boundary resolution
jan-kubica Jun 24, 2026
dcca9be
chore: split core resolution modules
jan-kubica Jun 24, 2026
8067ca6
feat: add core match processors
jan-kubica Jun 24, 2026
6c3b7e2
feat: add core search normalization
jan-kubica Jun 24, 2026
9c534ab
feat: support literal pattern options
jan-kubica Jun 24, 2026
7af815b
feat: add prepared core search
jan-kubica Jun 24, 2026
c792979
feat: add static core redaction
jan-kubica Jun 24, 2026
93cf0e6
feat: add core language bindings
jan-kubica Jun 24, 2026
0e2cd08
feat: share native adapter contract
jan-kubica Jun 24, 2026
a41fe2b
fix: tighten native redaction contracts
jan-kubica Jun 24, 2026
3dd2a7f
test: add migration fixture gate
jan-kubica Jun 24, 2026
cafd2d8
feat: port custom deny-list slice
jan-kubica Jun 24, 2026
11fb186
fix: tighten cli dictionary scope type
jan-kubica Jun 24, 2026
6947fab
test: report migration fixture runtime coverage
jan-kubica Jun 24, 2026
7adb235
feat: wire native static redaction
jan-kubica Jun 24, 2026
539164e
perf: skip ts search for native prep
jan-kubica Jun 24, 2026
f219599
fix: clean migration benchmark helper
jan-kubica Jun 24, 2026
f7f2036
feat: wire native static redaction package
jan-kubica Jun 25, 2026
3782e53
feat: add core prepared packages
jan-kubica Jun 25, 2026
061bbea
chore: update prepared search core pin
jan-kubica Jun 25, 2026
ddf93b5
fix: satisfy fixture perf lint
jan-kubica Jun 25, 2026
aba881d
chore: update dependencies
jan-kubica Jun 25, 2026
0778c82
fix: keep migration fixtures behavior-stable
jan-kubica Jun 25, 2026
1b92e22
fix: address native adapter review
jan-kubica Jun 25, 2026
d2e66cf
fix: address prepared search review
jan-kubica Jun 25, 2026
942b095
fix: trim date span fillers
jan-kubica Jun 25, 2026
a178545
chore: use stdnum core validators
jan-kubica Jun 25, 2026
401ad51
chore: update stdnum validator pin
jan-kubica Jun 25, 2026
d5bf5c2
chore: update stdnum validator pin
jan-kubica Jun 25, 2026
d2f923c
fix: tighten native static parity
jan-kubica Jun 25, 2026
5004645
fix: tighten native redaction config
jan-kubica Jun 25, 2026
cfd7edc
fix: tighten native fixture spans
jan-kubica Jun 25, 2026
072dfd1
fix: sync address boundary mirror
jan-kubica Jun 25, 2026
386011e
feat: add native anonymizer facade
jan-kubica Jun 25, 2026
8dee9dc
feat: expose native anonymize entrypoint
jan-kubica Jun 25, 2026
b30a8b0
test: cover native fixture adapter parity
jan-kubica Jun 25, 2026
aa7bc1f
test: broaden native adapter fixture parity
jan-kubica Jun 25, 2026
4b14fa5
feat: expose native binding version
jan-kubica Jun 25, 2026
3a1f50e
feat: add native node loader
jan-kubica Jun 25, 2026
ffff4a3
fix: tighten native parity edges
jan-kubica Jun 25, 2026
9dce652
fix: address native review edges
jan-kubica Jun 25, 2026
8b57400
fix: mirror false-positive shape data
jan-kubica Jun 25, 2026
0eb7bb8
feat: expose native pipeline package path
jan-kubica Jun 25, 2026
5506957
fix: cap prepared package payloads
jan-kubica Jun 25, 2026
c1e28de
feat: support native confidence boost
jan-kubica Jun 25, 2026
7ee8fc8
feat: support native hotword rules
jan-kubica Jun 25, 2026
feb9fae
fix: tighten native pipeline parity
jan-kubica Jun 25, 2026
5d5f014
fix: cover native cache and address seeds
jan-kubica Jun 25, 2026
1e126bd
fix: port address context to native core
jan-kubica Jun 25, 2026
7e7be13
fix: mirror address context data
jan-kubica Jun 25, 2026
7e16361
chore: bump search core revisions
jan-kubica Jun 26, 2026
0a32a17
fix: cache native pipeline packages
jan-kubica Jun 26, 2026
7580d03
fix: preserve native detection edges
jan-kubica Jun 26, 2026
d89ea15
test: track native fixture deltas
jan-kubica Jun 26, 2026
5b19800
fix: align native prepared edge cases
jan-kubica Jun 26, 2026
f7e29f2
fix: build native node artifact
jan-kubica Jun 26, 2026
dc02e42
fix: align native text-offset edges
jan-kubica Jun 26, 2026
430e495
fix: mirror address unit data
jan-kubica Jun 26, 2026
7f254b2
feat: port native coreference pass
jan-kubica Jun 26, 2026
03e4855
feat: port native zone adjustments
jan-kubica Jun 26, 2026
8e59b0b
fix: align native coreference context
jan-kubica Jun 26, 2026
fcdebca
fix: mirror coreference data
jan-kubica Jun 26, 2026
84f6139
feat: port supplemental name corpus
jan-kubica Jun 26, 2026
b55644b
feat: expand native validator coverage
jan-kubica Jun 26, 2026
273c060
perf: prewarm prepared package cache
jan-kubica Jun 26, 2026
e5816fe
fix: align native pipeline review edges
jan-kubica Jun 26, 2026
da57db8
feat: add native package file workflow
jan-kubica Jun 26, 2026
e603420
fix: align native static review edges
jan-kubica Jun 26, 2026
1dc2747
fix: tighten native adapter edges
jan-kubica Jun 26, 2026
31527f3
fix: load city dictionaries in node builds
jan-kubica Jun 26, 2026
4186159
feat: build default native pipeline package
jan-kubica Jun 26, 2026
162a16b
chore: filter prepare stages from fixture diagnostics
jan-kubica Jun 26, 2026
d8677b0
feat: cache default native pipeline
jan-kubica Jun 26, 2026
9787f80
fix: align native trigger edge cases
jan-kubica Jun 26, 2026
4de1e8d
fix: simplify native binding loading
jan-kubica Jun 26, 2026
5d404fc
chore: pin rust toolchain
jan-kubica Jun 26, 2026
ba7ded5
fix: tighten native review edge cases
jan-kubica Jun 26, 2026
aa4e1fc
fix: restore native build outputs
jan-kubica Jun 26, 2026
15c3bc5
fix: guard overlapping deny-list names
jan-kubica Jun 26, 2026
c6a9c00
test: add rust primitive properties
jan-kubica Jun 26, 2026
9afba93
test: strengthen rust primitive properties
jan-kubica Jun 26, 2026
4b81304
test: reject stale prepared search artifacts
jan-kubica Jun 26, 2026
ab23a72
chore: pin text-search artifact identity fix
jan-kubica Jun 26, 2026
d72ab1a
test: strengthen search artifact properties
jan-kubica Jun 26, 2026
84a8ecc
chore: pin text-search overlap artifact fix
jan-kubica Jun 26, 2026
9e2b476
test: gate native fixture parity
jan-kubica Jun 26, 2026
edc60b0
test: strengthen primitive invariants
jan-kubica Jun 26, 2026
e3ce0cc
test: add native package timing harness
jan-kubica Jun 26, 2026
ae4a66c
feat: add typed python sdk facade
jan-kubica Jun 26, 2026
f8f52e8
fix: simplify native timing mode
jan-kubica Jun 26, 2026
933c9b1
chore: pin python type checker
jan-kubica Jun 26, 2026
5b7b19d
feat: align native sdk facades
jan-kubica Jun 26, 2026
f36a0d3
feat: enforce native sdk parity
jan-kubica Jun 26, 2026
ca283da
fix: align native redaction parity
jan-kubica Jun 26, 2026
2b7eb71
test: enforce native sdk parity
jan-kubica Jun 26, 2026
b417d94
feat: align native sdk surface
jan-kubica Jun 26, 2026
5ffdd07
chore: enforce brand casing
jan-kubica Jun 26, 2026
f085e7a
chore: report native sdk perf
jan-kubica Jun 26, 2026
2585631
chore: verify python wheel packaging
jan-kubica Jun 26, 2026
7a89466
test: enforce native adapter perf parity
jan-kubica Jun 26, 2026
cce3ff0
test: smoke python wheel install
jan-kubica Jun 26, 2026
4ace954
perf: narrow native match work
jan-kubica Jun 26, 2026
92da7bb
feat: scope native package builds
jan-kubica Jun 26, 2026
599a62a
chore: consume merged text search artifacts
jan-kubica Jun 26, 2026
d7150ec
docs: adopt Rust agent conventions
jan-kubica Jun 26, 2026
f080fed
chore: wire Rust Dylint checks
jan-kubica Jun 26, 2026
734ed7f
chore: add Rust config builders
jan-kubica Jun 26, 2026
5e6a100
chore: satisfy Rust lint gate
jan-kubica Jun 26, 2026
960a8eb
docs: refresh Rust agent conventions
jan-kubica Jun 26, 2026
fecf43c
chore: consume merged Rust lint tooling
jan-kubica Jun 27, 2026
cbb1e3b
test: name native fixture improvements
jan-kubica Jun 27, 2026
2d94722
perf: cache Python config facade
jan-kubica Jun 27, 2026
24de826
chore: wire local prepared regex stack
jan-kubica Jun 27, 2026
d51b119
perf: preload default native pipeline
jan-kubica Jun 27, 2026
f67806e
perf: select scoped native packages
jan-kubica Jun 27, 2026
6129f2d
perf: add static regex prefilters
jan-kubica Jun 27, 2026
f3611fd
perf: warm native regex preloads
jan-kubica Jun 27, 2026
97f0735
perf: report native package warmup scenarios
jan-kubica Jun 27, 2026
97367d2
chore: pin text search core
jan-kubica Jun 27, 2026
0aca8a7
chore: avoid native package shadowing
jan-kubica Jun 27, 2026
29b7453
fix: address native review gaps
jan-kubica Jun 27, 2026
913d09e
test: allow slow native parity
jan-kubica Jun 27, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .ai/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"stella-public-repo",
"engineering",
"typescript",
"rust",
"testing",
"linting"
],
Expand Down
2 changes: 1 addition & 1 deletion .ai/shared
9 changes: 9 additions & 0 deletions .cargo/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Use sparse index for faster resolution.
[registries.crates-io]
protocol = "sparse"

[alias]
ci-fmt = "fmt --all -- --check"
ci-clippy = "clippy --workspace --all-targets --all-features --locked -- -D warnings"
ci-dylint = "dylint --workspace --all"
ci-test = "test --workspace --all-features --locked"
75 changes: 75 additions & 0 deletions .github/tools/check-brand-case.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import { execFileSync } from "node:child_process";
import { readFileSync } from "node:fs";
import process from "node:process";

const DISALLOWED = ["S", "tella"].join("");
const EXPECTED = DISALLOWED.toLowerCase();
const IGNORED_PATHS = new Set(["AGENTS.md", "CLAUDE.md", "GEMINI.md"]);
const IGNORED_PREFIXES = [
".ai/",
".agents/",
".claude/",
".github/assets/",
"packages/data/dictionaries/",
"packages/*/dist/",
"packages/anonymize/wasm/dist/",
"target/",
];

const trackedFiles = execFileSync("git", ["ls-files", "-z"], {
encoding: "utf8",
})
.split("\0")
.filter(Boolean)
.filter((file) => !isIgnored(file));

let hasFailure = false;

for (const file of trackedFiles) {
const content = readFileSync(file);
if (content.includes(0)) {
continue;
}

const text = content.toString("utf8");
let index = text.indexOf(DISALLOWED);
while (index !== -1) {
const { line, column } = lineColumnFor(text, index);
console.error(
`${file}:${line}:${column} uses disallowed brand casing; use "${EXPECTED}"`,
);
hasFailure = true;
index = text.indexOf(DISALLOWED, index + DISALLOWED.length);
}
}

if (hasFailure) {
process.exit(1);
}

function isIgnored(file) {
if (IGNORED_PATHS.has(file)) {
return true;
}
return IGNORED_PREFIXES.some((pattern) => {
if (!pattern.includes("*")) {
return file.startsWith(pattern);
}
const [prefix, suffix] = pattern.split("*");
return file.startsWith(prefix) && file.includes(suffix);
});
}

function lineColumnFor(text, index) {
let line = 1;
let column = 1;
for (let i = 0; i < index; i += 1) {
if (text.charCodeAt(i) === 10) {
line += 1;
column = 1;
continue;
}
column += 1;
}
return { line, column };
}
7 changes: 7 additions & 0 deletions .github/tools/check-packlist.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,13 @@ const PACKAGES = [
expected: [
"dist/index.d.mts",
"dist/index.mjs",
"dist/native.d.mts",
"dist/native.mjs",
"dist/native-node.d.mts",
"dist/native-node.mjs",
"index.cjs",
"stella_anonymize_napi.node",
"native-pipeline.stlanonpkg",
// Dynamically imported corpus chunk; missing means the
// bundler stopped resolving the non-Western name imports.
"dist/names-nw-in.mjs",
Expand Down
132 changes: 132 additions & 0 deletions .github/tools/check-python-wheel.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
import { execFileSync } from "node:child_process";
import { mkdtempSync, readdirSync, rmSync } from "node:fs";
import { join } from "node:path";
import { tmpdir } from "node:os";
import process from "node:process";

const outDir = mkdtempSync(join(tmpdir(), "stella-anonymize-wheel-"));
const profile = process.env.ANONYMIZE_PYTHON_WHEEL_PROFILE ?? "ci";

try {
execFileSync(
"uvx",
[
"--from",
"maturin>=1.14,<2",
"maturin",
"build",
"--manifest-path",
"crates/anonymize-py/Cargo.toml",
"--locked",
"--profile",
profile,
"--out",
outDir,
],
{ stdio: "inherit" },
);

const wheel = readdirSync(outDir).find((file) => file.endsWith(".whl"));
if (wheel === undefined) {
throw new Error("maturin did not emit a wheel");
}

const wheelPath = join(outDir, wheel);
const files = new Set(JSON.parse(readWheelFiles(wheelPath)));
const required = [
"stella_anonymize/__init__.py",
"stella_anonymize/__init__.pyi",
"stella_anonymize/_native.pyi",
"stella_anonymize/py.typed",
];
const missing = required.filter((file) => !files.has(file));
if (missing.length > 0) {
throw new Error(`wheel is missing files: ${missing.join(", ")}`);
}
if (![...files].some(isNativeExtension)) {
throw new Error("wheel is missing the native _native extension");
}
smokeInstalledWheel(wheelPath);

console.log(
JSON.stringify({
event: "python-wheel-check",
wheel,
profile,
}),
);
} finally {
rmSync(outDir, { force: true, recursive: true });
}

function readWheelFiles(wheelPath) {
return execFileSync(
"python3",
[
"-c",
[
"import json, sys, zipfile",
"with zipfile.ZipFile(sys.argv[1]) as wheel:",
" print(json.dumps(wheel.namelist()))",
].join("\n"),
wheelPath,
],
{ encoding: "utf8" },
);
}

function smokeInstalledWheel(wheelPath) {
execFileSync(
"uv",
[
"run",
"--isolated",
"--no-project",
"--python",
"3.11",
"--with",
wheelPath,
"python",
"-c",
[
"import json",
"import stella_anonymize as anonymize",
"required = [",
" 'PreparedAnonymizer',",
" 'PreparedSearch',",
" 'load_prepared_package',",
" 'prepare_search_package',",
" 'redact_text',",
"]",
"missing = [name for name in required if not hasattr(anonymize, name)]",
"if missing:",
" raise SystemExit(f'missing exports: {missing}')",
"config_json = json.dumps({",
" 'regex_patterns': [{'kind': 'regex', 'pattern': r'\\b[A-Z]{2}\\d{4}\\b'}],",
" 'slices': {'regex': {'start': 0, 'end': 1}},",
" 'regex_meta': [{'label': 'registration number', 'score': 1.0}],",
"})",
"package_bytes = anonymize.prepare_search_package(config_json)",
"prepared = anonymize.load_prepared_package(package_bytes)",
"result = prepared.redact_text('Reference AB1234')",
"if result.redaction.entity_count != 1:",
" raise SystemExit(f'unexpected entity count: {result.redaction.entity_count}')",
"if result.redaction.redacted_text == 'Reference AB1234':",
" raise SystemExit('redaction did not change text')",
"print(json.dumps({",
" 'event': 'python-wheel-import-smoke',",
" 'version': anonymize.native_package_version(),",
" 'entity_count': result.redaction.entity_count,",
"}))",
].join("\n"),
],
{ stdio: "inherit" },
);
}

function isNativeExtension(file) {
return (
file.startsWith("stella_anonymize/_native.") &&
[".so", ".pyd", ".dll", ".dylib"].some((suffix) => file.endsWith(suffix))
);
}
99 changes: 99 additions & 0 deletions .github/tools/sync-runtime-version.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,16 @@ const PACKAGE_FILES = [
"packages/cli/package.json",
];

const CARGO_WORKSPACE_MANIFEST = "Cargo.toml";
const CARGO_LOCKED_PACKAGES = [
"stella-anonymize-adapter-contract",
"stella-anonymize-core",
"stella-anonymize-napi",
"stella-anonymize-py",
];
const PYPROJECT_FILES = ["crates/anonymize-py/pyproject.toml"];
const LOCK_FILE = "bun.lock";
const CARGO_LOCK_FILE = "Cargo.lock";
const checkOnly = process.argv.includes("--check");
const version = readFileSync("VERSION", "utf8").trim();

Expand All @@ -30,6 +39,29 @@ const SYNCED_DEPENDENCY_RANGE_RE = /("@stll\/anonymize": "\^)([^"]+)(")/g;

const escapeRegExp = (value) => value.replaceAll(/[.*+?^${}()|[\]\\]/g, "\\$&");

const syncTextVersion = ({ file, label, re }) => {
const text = readFileSync(file, "utf8");
const match = text.match(re);
if (!match) {
console.error(`${file} has no ${label} version entry`);
hasMismatch = true;
return;
}
const current = match[2];
if (current === version) {
return;
}
if (checkOnly) {
console.error(
`${file} has ${label} version ${current}; expected ${version}`,
);
hasMismatch = true;
return;
}
writeFileSync(file, text.replace(re, `$1${version}$3`));
console.log(`Updated ${file} ${label} version to ${version}`);
};

for (const file of PACKAGE_FILES) {
const pkg = JSON.parse(readFileSync(file, "utf8"));
const wantedRange = `^${version}`;
Expand Down Expand Up @@ -61,6 +93,34 @@ for (const file of PACKAGE_FILES) {
console.log(`Updated ${file} to ${version}`);
}

syncTextVersion({
file: CARGO_WORKSPACE_MANIFEST,
label: "Cargo workspace",
re: /(\[workspace\.package\][\s\S]*?\nversion\s*=\s*")([^"]+)(")/,
});

for (const file of PYPROJECT_FILES) {
const text = readFileSync(file, "utf8");
const explicitVersion = text.match(/^version\s*=\s*"([^"]+)"/m);
if (explicitVersion) {
syncTextVersion({
file,
label: "Python project",
re: /(^version\s*=\s*")([^"]+)(")/m,
});
continue;
}

if (/\bdynamic\s*=\s*\[[^\]]*"version"[^\]]*\]/m.test(text)) {
continue;
}

console.error(
`${file} must either derive version dynamically from Cargo or match VERSION`,
);
hasMismatch = true;
}

const lockText = readFileSync(LOCK_FILE, "utf8");
let lockChanged = false;
let syncedLockText = lockText.replaceAll(
Expand Down Expand Up @@ -116,6 +176,45 @@ if (lockChanged) {
);
}

const cargoLockText = readFileSync(CARGO_LOCK_FILE, "utf8");
let cargoLockChanged = false;
let syncedCargoLockText = cargoLockText;

for (const packageName of CARGO_LOCKED_PACKAGES) {
const packageVersionRe = new RegExp(
`(\\[\\[package\\]\\]\\nname = "${escapeRegExp(packageName)}"\\nversion = ")([^"]+)(")`,
);
const match = syncedCargoLockText.match(packageVersionRe);
if (!match) {
console.error(`${CARGO_LOCK_FILE} has no package entry for ${packageName}`);
hasMismatch = true;
continue;
}
const lockedVersion = match[2];
if (lockedVersion === version) {
continue;
}
if (checkOnly) {
console.error(
`${CARGO_LOCK_FILE} package ${packageName} has version ${lockedVersion}; expected ${version}`,
);
hasMismatch = true;
continue;
}
syncedCargoLockText = syncedCargoLockText.replace(
packageVersionRe,
`$1${version}$3`,
);
cargoLockChanged = true;
}

if (cargoLockChanged) {
writeFileSync(CARGO_LOCK_FILE, syncedCargoLockText);
console.log(
`Updated ${CARGO_LOCK_FILE} local package versions to ${version}`,
);
}

if (hasMismatch) {
process.exit(1);
}
Loading
Loading