diff --git a/.changeset/open-source-polish.md b/.changeset/open-source-polish.md new file mode 100644 index 0000000..944fe15 --- /dev/null +++ b/.changeset/open-source-polish.md @@ -0,0 +1,6 @@ +--- +"@routerlab/core": patch +"@routerlab/cli": patch +--- + +Polish the repository's open-source project documentation and contributor guidance. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..5c185da --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,14 @@ +## Summary + +- + +## Validation + +- [ ] `bun run --filter '*' build` +- [ ] `bun test` +- [ ] `bun run eval:smoke` for routing or eval changes + +## Release + +- [ ] Changeset added for user-visible package changes +- [ ] No release needed diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..2acec4a --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,8 @@ +# Code of Conduct + +This project follows the Contributor Covenant Code of Conduct, version 2.1. + +Contributors should keep discussion respectful, technical, and constructive. +Harassment, personal attacks, and repeated disruption are not acceptable. + +Report conduct concerns to the maintainer listed in `package.json`. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..9f6dff0 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,27 @@ +# Contributing + +Thanks for improving routerlab. This project is about transparent cost-quality +routing, so contributions should keep model selection explainable and +reproducible. + +## Development + +```bash +bun install --frozen-lockfile +bun run --filter '*' build +bun test +``` + +Run `bun run eval:smoke` for changes that affect routing or evaluation code. + +## Pull requests + +- Keep changes focused on one routing, gateway, eval, CLI, or docs concern. +- Add tests for model selection, frontier, policy, or CLI behavior changes. +- Document user-facing behavior in the relevant README or `docs/`. +- Add a Changeset for package changes. + +## Evaluation changes + +Changes to frontiers, task classes, or judge methodology should include enough +metadata for another contributor to reproduce the result. diff --git a/README.md b/README.md index 7eefb04..905253b 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,10 @@ Where existing routers tend to hand-wave cost or hide their methodology, routerl ## Status -Early / pre-release. Engine, eval harness, and per-task frontiers are under active development. Expect breaking changes until v0.1.0. +The `@routerlab/core` and `@routerlab/cli` packages are published and usable +for early production experiments. Engine, gateway, and eval APIs are still +evolving, but documented behavior is tested and release-managed through +Changesets. ## Install @@ -69,6 +72,13 @@ Cached judge outputs and provider responses keep this affordable (default judge } ``` +## Project Health + +- [Contributing guide](./CONTRIBUTING.md) +- [Security policy](./SECURITY.md) +- [Code of Conduct](./CODE_OF_CONDUCT.md) +- [Changelog](./CHANGELOG.md) + ## License [Apache-2.0](./LICENSE) diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..892833b --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,19 @@ +# Security Policy + +## Supported versions + +Security fixes are applied to the latest published `@routerlab/*` packages. + +## Reporting a vulnerability + +Use GitHub private vulnerability reporting for this repository, or contact the +maintainer listed in `package.json` if private reporting is unavailable. + +Do not open a public issue for vulnerabilities involving API keys, prompt data, +gateway request handling, or CI/package-publishing credentials. + +## Security model + +routerlab can sit in front of LLM API calls and may process prompts supplied by +applications. Keep credential handling explicit, avoid logging secrets, and +prefer opt-in persistence for prompts or provider responses. diff --git a/eval/cli.ts b/eval/cli.ts index 0d50d23..c64ab4c 100644 --- a/eval/cli.ts +++ b/eval/cli.ts @@ -17,8 +17,8 @@ // provider whose credentials are missing — running with a partial keyring // is supported (you get fewer rows in `summary.json`, not an abort). -import { existsSync, readFileSync, writeFileSync } from "node:fs"; -import { join } from "node:path"; +import { copyFileSync, existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"; +import { dirname, join } from "node:path"; import type { TaskClass } from "../packages/core/src/types.ts"; @@ -36,6 +36,7 @@ import { } from "./frontier/runner.ts"; import type { RunResponse, Runner } from "./runners/_types.ts"; import type { ProviderId } from "./runners/_factory.ts"; +import { datasetCachePath } from "./tasks/_types.ts"; // --------------------------------------------------------------------------- // Argv parsing @@ -153,6 +154,25 @@ function smokeRunnerFactory(provider: ProviderId): Runner { return fixtureRunner(provider, `smoke-${provider}-fixture`); } +function seedSmokeFixtures(): void { + const fixtures: Record = { + qa: "qa.jsonl", + classification: "classification.jsonl", + summarization: "summarization.jsonl", + reasoning: "reasoning.jsonl", + }; + + for (const [task, filename] of Object.entries(fixtures)) { + const source = new URL(`./tasks/fixtures/${filename}`, import.meta.url).pathname; + const target = datasetCachePath(filename); + if (!existsSync(source)) { + throw new Error(`missing smoke fixture for ${task}: ${source}`); + } + mkdirSync(dirname(target), { recursive: true }); + copyFileSync(source, target); + } +} + // --------------------------------------------------------------------------- // Command dispatch // --------------------------------------------------------------------------- @@ -244,6 +264,7 @@ async function cmdSmoke(ctx: CommandContext): Promise { // subprocesses that, while sandboxed, take time and have a real OS // dependency we don't want in CI smoke). const tasks: TaskClass[] = ["qa", "classification", "summarization", "reasoning"]; + seedSmokeFixtures(); const { summary } = await runFrontier({ tasks, examplesPerTask: SMOKE_N, diff --git a/eval/tasks/fixtures/classification.jsonl b/eval/tasks/fixtures/classification.jsonl new file mode 100644 index 0000000..89ee5c7 --- /dev/null +++ b/eval/tasks/fixtures/classification.jsonl @@ -0,0 +1,3 @@ +{"id":"tweet-fixture-0","input":{"text":"The new release is stable and noticeably faster."},"reference":"positive","metadata":{"label":"positive"}} +{"id":"tweet-fixture-1","input":{"text":"The command failed twice and the logs were unclear."},"reference":"negative","metadata":{"label":"negative"}} +{"id":"tweet-fixture-2","input":{"text":"The package installs and prints its help output."},"reference":"neutral","metadata":{"label":"neutral"}} diff --git a/eval/tasks/fixtures/qa.jsonl b/eval/tasks/fixtures/qa.jsonl new file mode 100644 index 0000000..d7de5cf --- /dev/null +++ b/eval/tasks/fixtures/qa.jsonl @@ -0,0 +1,3 @@ +{"id":"squad-fixture-0","input":{"context":"RouterLab chooses among language models by comparing expected quality, latency, and token cost before making a call.","question":"What does RouterLab compare before making a call?"},"reference":{"goldAnswers":["expected quality, latency, and token cost"],"isImpossible":false}} +{"id":"squad-fixture-1","input":{"context":"The fixture dataset is intentionally tiny so smoke tests can run without downloading public benchmark slices.","question":"Why is the fixture dataset tiny?"},"reference":{"goldAnswers":["so smoke tests can run without downloading public benchmark slices"],"isImpossible":false}} +{"id":"squad-fixture-2","input":{"context":"RouterLab keeps cached evaluation examples under .cache/eval-datasets and does not commit generated cache files.","question":"Where are generated evaluation caches stored?"},"reference":{"goldAnswers":[".cache/eval-datasets"],"isImpossible":false}}