faraa2m · faraa2m · May 20, 2026 · May 20, 2026 · chatgpt-codex-connector · May 20, 2026
diff --git a/.changeset/open-source-polish.md b/.changeset/open-source-polish.md
@@ -0,0 +1,6 @@
+---
+"@routerlab/core": patch
+"@routerlab/cli": patch
+---
+
+Polish the repository's open-source project documentation and contributor guidance.
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,14 @@
+## Summary
+
+- 
+
+## Validation
+
+- [ ] `bun run --filter '*' build`
+- [ ] `bun test`
+- [ ] `bun run eval:smoke` for routing or eval changes
+
+## Release
+
+- [ ] Changeset added for user-visible package changes
+- [ ] No release needed
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
@@ -0,0 +1,8 @@
+# Code of Conduct
+
+This project follows the Contributor Covenant Code of Conduct, version 2.1.
+
+Contributors should keep discussion respectful, technical, and constructive.
+Harassment, personal attacks, and repeated disruption are not acceptable.
+
+Report conduct concerns to the maintainer listed in `package.json`.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -0,0 +1,27 @@
+# Contributing
+
+Thanks for improving routerlab. This project is about transparent cost-quality
+routing, so contributions should keep model selection explainable and
+reproducible.
+
+## Development
+
+```bash
+bun install --frozen-lockfile
+bun run --filter '*' build
+bun test
+```
+
+Run `bun run eval:smoke` for changes that affect routing or evaluation code.
+
+## Pull requests
+
+- Keep changes focused on one routing, gateway, eval, CLI, or docs concern.
+- Add tests for model selection, frontier, policy, or CLI behavior changes.
+- Document user-facing behavior in the relevant README or `docs/`.
+- Add a Changeset for package changes.
+
+## Evaluation changes
+
+Changes to frontiers, task classes, or judge methodology should include enough
+metadata for another contributor to reproduce the result.
diff --git a/README.md b/README.md
@@ -10,7 +10,10 @@ Where existing routers tend to hand-wave cost or hide their methodology, routerl
 
 ## Status
 
-Early / pre-release. Engine, eval harness, and per-task frontiers are under active development. Expect breaking changes until v0.1.0.
+The `@routerlab/core` and `@routerlab/cli` packages are published and usable
+for early production experiments. Engine, gateway, and eval APIs are still
+evolving, but documented behavior is tested and release-managed through
+Changesets.
 
 ## Install
 
@@ -69,6 +72,13 @@ Cached judge outputs and provider responses keep this affordable (default judge
 }
 ```
 
+## Project Health
+
+- [Contributing guide](./CONTRIBUTING.md)
+- [Security policy](./SECURITY.md)
+- [Code of Conduct](./CODE_OF_CONDUCT.md)
+- [Changelog](./CHANGELOG.md)
+
 ## License
 
 [Apache-2.0](./LICENSE)
diff --git a/SECURITY.md b/SECURITY.md
@@ -0,0 +1,19 @@
+# Security Policy
+
+## Supported versions
+
+Security fixes are applied to the latest published `@routerlab/*` packages.
+
+## Reporting a vulnerability
+
+Use GitHub private vulnerability reporting for this repository, or contact the
+maintainer listed in `package.json` if private reporting is unavailable.
+
+Do not open a public issue for vulnerabilities involving API keys, prompt data,
+gateway request handling, or CI/package-publishing credentials.
+
+## Security model
+
+routerlab can sit in front of LLM API calls and may process prompts supplied by
+applications. Keep credential handling explicit, avoid logging secrets, and
+prefer opt-in persistence for prompts or provider responses.
diff --git a/eval/cli.ts b/eval/cli.ts
@@ -17,8 +17,8 @@
 // provider whose credentials are missing — running with a partial keyring
 // is supported (you get fewer rows in `summary.json`, not an abort).
 
-import { existsSync, readFileSync, writeFileSync } from "node:fs";
-import { join } from "node:path";
+import { copyFileSync, existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
+import { dirname, join } from "node:path";
 
 import type { TaskClass } from "../packages/core/src/types.ts";
 
@@ -36,6 +36,7 @@ import {
 } from "./frontier/runner.ts";
 import type { RunResponse, Runner } from "./runners/_types.ts";
 import type { ProviderId } from "./runners/_factory.ts";
+import { datasetCachePath } from "./tasks/_types.ts";
 
 // ---------------------------------------------------------------------------
 // Argv parsing
@@ -153,6 +154,25 @@ function smokeRunnerFactory(provider: ProviderId): Runner {
   return fixtureRunner(provider, `smoke-${provider}-fixture`);
 }
 
+function seedSmokeFixtures(): void {
+  const fixtures: Record<string, string> = {
+    qa: "qa.jsonl",
+    classification: "classification.jsonl",
+    summarization: "summarization.jsonl",
+    reasoning: "reasoning.jsonl",
+  };
+
+  for (const [task, filename] of Object.entries(fixtures)) {
+    const source = new URL(`./tasks/fixtures/${filename}`, import.meta.url).pathname;
+    const target = datasetCachePath(filename);
+    if (!existsSync(source)) {
+      throw new Error(`missing smoke fixture for ${task}: ${source}`);
+    }
+    mkdirSync(dirname(target), { recursive: true });
+    copyFileSync(source, target);
+  }
+}
+
 // ---------------------------------------------------------------------------
 // Command dispatch
 // ---------------------------------------------------------------------------
@@ -244,6 +264,7 @@ async function cmdSmoke(ctx: CommandContext): Promise<number> {
   // subprocesses that, while sandboxed, take time and have a real OS
   // dependency we don't want in CI smoke).
   const tasks: TaskClass[] = ["qa", "classification", "summarization", "reasoning"];
+  seedSmokeFixtures();
   const { summary } = await runFrontier({
     tasks,
     examplesPerTask: SMOKE_N,

diff --git a/eval/tasks/fixtures/classification.jsonl b/eval/tasks/fixtures/classification.jsonl
@@ -0,0 +1,3 @@
+{"id":"tweet-fixture-0","input":{"text":"The new release is stable and noticeably faster."},"reference":"positive","metadata":{"label":"positive"}}
+{"id":"tweet-fixture-1","input":{"text":"The command failed twice and the logs were unclear."},"reference":"negative","metadata":{"label":"negative"}}
+{"id":"tweet-fixture-2","input":{"text":"The package installs and prints its help output."},"reference":"neutral","metadata":{"label":"neutral"}}
diff --git a/eval/tasks/fixtures/qa.jsonl b/eval/tasks/fixtures/qa.jsonl
@@ -0,0 +1,3 @@
+{"id":"squad-fixture-0","input":{"context":"RouterLab chooses among language models by comparing expected quality, latency, and token cost before making a call.","question":"What does RouterLab compare before making a call?"},"reference":{"goldAnswers":["expected quality, latency, and token cost"],"isImpossible":false}}
+{"id":"squad-fixture-1","input":{"context":"The fixture dataset is intentionally tiny so smoke tests can run without downloading public benchmark slices.","question":"Why is the fixture dataset tiny?"},"reference":{"goldAnswers":["so smoke tests can run without downloading public benchmark slices"],"isImpossible":false}}
+{"id":"squad-fixture-2","input":{"context":"RouterLab keeps cached evaluation examples under .cache/eval-datasets and does not commit generated cache files.","question":"Where are generated evaluation caches stored?"},"reference":{"goldAnswers":[".cache/eval-datasets"],"isImpossible":false}}