From 079aa121a5ea675f340a1c96c2013c68ec696947 Mon Sep 17 00:00:00 2001 From: Inky Quill Date: Mon, 11 May 2026 00:11:42 +0300 Subject: [PATCH] chore: clean release docs and workflows --- .github/workflows/release-plz.yml | 9 +- .github/workflows/release.yml | 17 - CONTRIBUTING.md | 3 +- PLAN.md | 591 ---- docs/README.md | 5 - docs/REFERENCE.md | 1 - docs/superpowers/plans/2026-04-18-sel-v0.2.md | 2907 ----------------- .../specs/2026-04-18-sel-v0.2-design.md | 266 -- release-plz.toml | 1 + 9 files changed, 5 insertions(+), 3795 deletions(-) delete mode 100644 PLAN.md delete mode 100644 docs/superpowers/plans/2026-04-18-sel-v0.2.md delete mode 100644 docs/superpowers/specs/2026-04-18-sel-v0.2-design.md diff --git a/.github/workflows/release-plz.yml b/.github/workflows/release-plz.yml index ee2d1fa..faa7bfd 100644 --- a/.github/workflows/release-plz.yml +++ b/.github/workflows/release-plz.yml @@ -20,12 +20,6 @@ on: push: branches: [main] -env: - # actions/upload-artifact@v5 and download-artifact@v5 still run on Node - # 20. Forcing Node 24 here silences the deprecation notice. - # https://github.blog/changelog/2025-09-19-deprecation-of-node-20-on-github-actions-runners/ - FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true" - permissions: contents: read @@ -38,6 +32,9 @@ jobs: runs-on: ubuntu-24.04 permissions: contents: write + concurrency: + group: release-plz-release-${{ github.ref }} + cancel-in-progress: false steps: - name: Checkout uses: actions/checkout@v5 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 0a2be5e..9b6a519 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -285,20 +285,3 @@ jobs: env: CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} run: cargo publish --locked - - announce: - needs: - - plan - - host - - publish-crates-io - # use "always() && ..." to allow us to wait for all publish jobs while - # still allowing individual publish jobs to skip themselves (for prereleases). - # "host" however must run to completion, no skipping allowed! - if: ${{ always() && needs.host.result == 'success' && (needs.publish-crates-io.result == 'success' || needs.publish-crates-io.result == 'skipped') }} - runs-on: "ubuntu-24.04" - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - steps: - - uses: actions/checkout@v5 - with: - submodules: recursive diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6a6f361..a7a667d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -66,8 +66,7 @@ tests/ Black-box integration tests per feature (selectors, regex, context, stdin, multi_file, …). benches/large_file.rs Criterion micro-benchmarks for the streaming path. -docs/ User docs: README index, USAGE, ARCHITECTURE; - superpowers/ holds optional design session notes. +docs/ User docs: README index, USAGE, ARCHITECTURE. ``` See [`docs/README.md`](docs/README.md) for the full documentation index and diff --git a/PLAN.md b/PLAN.md deleted file mode 100644 index 64e6f36..0000000 --- a/PLAN.md +++ /dev/null @@ -1,591 +0,0 @@ -# План реализации утилиты `sel` - -## Обзор - -Утилита `sel` — консольная утилита для извлечения фрагментов текстовых файлов по номерам строк, диапазонам, позициям или регулярным выражениям. Работает потоково, поддерживает контекст, подходит для больших файлов. - -**Исправление**: Если селектор опущен (и нет `-e`), выводим весь файл с номерами строк (эмуляция `cat -n`). - ---- - -## Этап 1: Базовая структура проекта - -**Цель**: Создать минимально работающий прототип. - -### Задачи - -- [ ] Инициализация проекта `cargo init` -- [ ] Настройка `Cargo.toml` с зависимостями -- [ ] Базовая структура `src/`: - ``` - src/ - ├── main.rs # Точка входа, разбор аргументов - ├── cli.rs # Определение CLI через clap derive - ├── selector.rs # Парсинг селекторов (строки/диапазоны/позиции) - ├── reader.rs # Потоковое чтение файлов - ├── output.rs # Форматирование вывода - ├── error.rs # Типы ошибок - └── lib.rs # Библиотечная экспозиция (для тестов) - ``` - -- [ ] Реализация базового CLI с `clap` derive: - ```rust - #[derive(Parser)] - #[command(name = "sel")] - #[command(about = "Select slices from text files", long_about = None)] - struct Cli { - /// Selector (line number, range, position, or omitted for all lines) - selector: Option, - - /// Show N lines of context before and after matches - #[arg(short = 'c', long = "context", value_name = "N")] - context: Option, - - /// Show N characters of context around position - #[arg(short = 'n', long = "char-context", value_name = "N")] - char_context: Option, - - /// Don't output line numbers - #[arg(short = 'l', long = "no-line-numbers")] - no_line_numbers: bool, - - /// Regular expression pattern (PCRE-like) - #[arg(short = 'e', long = "regex", value_name = "PAT")] - regex: Option, - - /// Always print filename - #[arg(short = 'H', long = "with-filename")] - with_filename: bool, - - /// Color output [auto, always, never] - #[arg(long = "color", value_name = "WHEN")] - color: Option, - - /// Input file(s) - #[arg(value_name = "FILE")] - files: Vec, - } - ``` - -### Критерии завершения - -- `cargo build` успешно собирается -- `sel --help` показывает справку -- `sel --version` показывает версию - ---- - -## Этап 2: Парсер селекторов - -**Цель**: Реализовать парсинг всех форматов селекторов. - -### Задачи - -- [ ] Определить перечисление селекторов: - ```rust - pub enum Selector { - All, // Без селектора - все строки - LineNumbers(Vec), // N, M-N, N1,N2,M1-M2 - Positions(Vec), // L:C, L1:C1,L2:C2 - } - - pub enum LineSpec { - Single(usize), // N - Range(usize, usize), // M-N - } - - pub struct Position { - pub line: usize, - pub column: usize, // в байтах, начиная с 1 - } - ``` - -- [ ] Парсинг непозиционного селектора: - - `"42"` → `Single(42)` - - `"10-20"` → `Range(10, 20)` - - `"1,5,10-15,20"` → `[Single(1), Single(5), Range(10, 15), Single(20)]` - - Проверка: `M <= N` для диапазона - - Проверка: номера > 0 - -- [ ] Парсинг позиционного селектора: - - `"23:260"` → `Position { line: 23, column: 260 }` - - `"15:30,23:260"` → две позиции - - Проверка: колонки > 0 - -- [ ] Валидация смешивания: - - Если есть `:` хотя бы в одном элементе → все должны иметь `:` - - Иначе → ошибка - -- [ ] Обработка пустого селектора: - - `None` или `""` → `Selector::All` - -### Тесты - -```rust -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn parse_single_line() { - assert_eq!(parse_selector("42"), Ok(Selector::LineNumbers(vec![LineSpec::Single(42)]))); - } - - #[test] - fn parse_range() { - assert_eq!(parse_selector("10-20"), Ok(Selector::LineNumbers(vec![LineSpec::Range(10, 20)]))); - } - - #[test] - fn parse_mixed() { - let sel = parse_selector("1,5,10-15,20").unwrap(); - // ... - } - - #[test] - fn parse_position() { - assert_eq!(parse_selector("23:260"), Ok(Selector::Positions(vec![Position::new(23, 260)]))); - } - - #[test] - fn reject_mixed_selector() { - assert!(parse_selector("1,23:260").is_err()); - } - - #[test] - fn parse_empty_selector() { - assert_eq!(parse_selector(""), Ok(Selector::All)); - assert_eq!(parse_selector(None), Ok(Selector::All)); - } -} -``` - -### Критерии завершения - -- Все форматы селекторов парсятся корректно -- Ошибки валидации обрабатываются с понятными сообщениями -- Тесты покрывают все случаи - ---- - -## Этап 3: Потоковое чтение и базовый вывод - -**Цель**: Реализовать чтение файла и вывод строк по номерам. - -### Задачи - -- [ ] Чтение файла с `BufReader`: - ```rust - pub struct LineReader { - reader: BufReader, - current_line: usize, - } - ``` - -- [ ] Итератор по строкам с номерами: - ```rust - pub struct LinesWithNumbers { - reader: LineReader, - } - - impl Iterator for LinesWithNumbers { - type Item = io::Result<(usize, String)>; - // ... - } - ``` - -- [ ] Фильтрация по номерам строк: - - Для `Selector::LineNumbers` — проверка, входит ли номер в список - - Оптимизация: сортировка и бинарьный поиск для больших списков - - Объединение пересекающихся диапазонов - -- [ ] Обработка `Selector::All`: - - Вывод всех строк с номерами - -- [ ] Базовый вывод: - ```rust - pub struct OutputFormatter { - show_line_numbers: bool, - show_filename: bool, - filename: Option, - color: ColorMode, - } - ``` - -### Критерии завершения - -- `sel 10-20 file.txt` выводит строки 10-20 -- `sel 5,10,15 file.txt` выводит строки 5, 10, 15 -- `sel file.txt` выводит весь файл с номерами строк -- Работа с большими файлами без загрузки в память - ---- - -## Этап 4: Позиционные селекторы и символьный контекст - -**Цель**: Реализовать опцию `-n` для позиционных селекторов. - -### Задачи - -- [ ] Парсинг позиционных селекторов (уже сделано в этапе 2) - -- [ ] Реализация `-n` — символьного контекста: - ```rust - pub struct Fragment { - pub line_number: usize, - pub content: String, - pub start_column: usize, // начало фрагмента в строке - pub target_column: usize, // целевая позиция - } - - impl Fragment { - pub fn new(line: &str, column: usize, context: usize) -> Self { - let line_bytes = line.as_bytes(); - let line_len = line_bytes.len(); - - let start = if column <= context + 1 { - 0 - } else { - column - context - 1 - }; - let end = min(line_len, column + context); - - // ... - } - - pub fn format(&self) -> String { - // "23: Это пример строки..." - } - - pub fn pointer_line(&self) -> String { - // " ^" - } - } - ``` - -- [ ] Вывод с указателем: - - Фрагмент строки - - Строка с указателем `^` под целевой колонкой - -- [ ] Обработка выхода за границы: - - Колонка > длины строки → фрагмент до конца - - Указатель в конец или не выводится - -### Тесты - -```rust -#[test] -fn test_char_context_middle() { - let line = "Это пример строки с длинным текстом"; - let frag = Fragment::new(line, 10, 5); - assert!(frag.content.contains("пример")); -} - -#[test] -fn test_char_context_boundary() { - let line = "short"; - let frag = Fragment::new(line, 100, 10); - assert_eq!(frag.content, "short"); -} -``` - -### Критерии завершения - -- `sel -n 10 23:260 file.txt` работает корректно -- Указатель выводится в правильной позиции -- Граничные случаи обрабатываются - ---- - -## Этап 5: Строчный контекст (`-c`) - -**Цель**: Реализовать вывод контекста вокруг совпадений. - -### Задачи - -- [ ] Кольцевой буфер для строк контекста: - ```rust - pub struct ContextBuffer { - buffer: VecDeque>, - capacity: usize, - current_line: usize, - } - - impl ContextBuffer { - pub fn new(context_size: usize) -> Self { - // Храним N строк "до" + текущую + N строк "после" - // Но для поточного чтения: храним N строк "до" - let capacity = context_size; - // ... - } - - pub fn push(&mut self, line_no: usize, line: String) { - // ... - } - - pub fn get_context(&self, target_line: usize) -> Vec<(usize, String, bool)> { - // bool = true для целевой строки - } - } - ``` - -- [ ] Алгоритм с одним проходом: - 1. Читаем файл построчно - 2. Для `Selector::LineNumbers`: - - Если строка в списке → выводим контекст - - Используем кольцевой буфер для строк "до" - - Читаем N строк "после" при совпадении - 3. Объединение пересекающихся интервалов контекста - -- [ ] Пометка целевых строк: - - Символ `>` в начале (перед номером или цветом) - -- [ ] Совместимость с `-n`: - - Целевая строка выводится с фрагментом и указателем - - Контекстные строки — полностью - -### Оптимизация - -Для больших файлов и множества селекторов: -- Сортировка и объединение диапазонов -- Предварительное вычисление интервалов вывода -- Слияние пересекающихся контекстов - -### Критерии завершения - -- `sel -c 3 42 file.txt` показывает 3 строки до и после -- Пересекающиеся контексты объединяются -- Целевые строки помечаются - ---- - -## Этап 6: Режим регулярных выражений (`-e`) - -**Цель**: Реализовать поиск по регулярным выражениям. - -### Задачи - -- [ ] Интеграция крейта `regex`: - ```rust - pub struct RegexMatcher { - regex: Regex, - } - - impl RegexMatcher { - pub fn matches(&self, line: &str) -> bool { - self.regex.is_match(line) - } - - pub fn find(&self, line: &str) -> Option<(usize, usize)> { - // (start, end) в байтах - self.regex.find(line).map(|m| (m.start(), m.end())) - } - } - ``` - -- [ ] Режим `-e` без `-n`: - - Вывод полных строк, содержащих совпадение - -- [ ] Режим `-e` с `-n`: - - Фрагмент вокруг первого совпадения - - Указатель под началом совпадения - -- [ ] Поддержка нескольких файлов: - - Формат `{filename}:{line}:{content}` - - Опция `-H` для принудительного вывода имени файла - -- [ ] Обработка ошибок компиляции regex: - - Понятное сообщение об ошибке - - Код возврата 1 - -### Критерии завершения - -- `sel -e ERROR log.txt` ищет "ERROR" -- `sel -c 2 -e TODO source.rs` с контекстом -- `sel -e 'pattern' *.rs` по нескольким файлам - ---- - -## Этап 7: Подавление номеров строк (`-l`) и форматирование - -**Цель**: Реализовать опцию `-l` и finalize форматирование. - -### Задачи - -- [ ] Опция `-l`: - - Подавление номеров строк в выводе - - Сохранение имен файлов для режима `-e` с несколькими файлами - -- [ ] Форматирование вывода: - ```rust - pub enum OutputFormat { - FullLine, // Полная строка - LineWithNumber, // N:content - FileLineWithNumber, // file:N:content - FileLine, // file:content - Fragment, // Фрагмент с указателем - } - ``` - -- [ ] Цветной вывод (`--color`): - - `auto` — если stdout — терминал - - `always` — всегда - - `never` — никогда - - Использование `termcolor` - -- [ ] Подсветка: - - Целевые строки — зелёным (или `>`) - - Совпадения regex — инверсным цветом - - Указатель `^` — цветным - -### Критерии завершения - -- `sel -l 10-20 file.txt` без номеров -- `sel --color=always -e ERROR log.txt` с подсветкой - ---- - -## Этап 8: Обработка ошибок и граничные случаи - -**Цель**: Надёжная обработка всех ошибок. - -### Задачи - -- [ ] Типы ошибок: - ```rust - #[derive(thiserror::Error, Debug)] - pub enum SelError { - #[error("File not found: {0}")] - FileNotFound(PathBuf), - - #[error("Invalid selector: {0}")] - InvalidSelector(String), - - #[error("Mixed positional and non-positional selectors")] - MixedSelectors, - - #[error("Char context requires positional selector or -e")] - CharContextWithoutPosition, - - #[error("Invalid regex: {0}")] - InvalidRegex(String), - - #[error("IO error: {0}")] - Io(#[from] io::Error), - } - ``` - -- [ ] Обработка граничных случаев: - - Пустой файл - - Несуществующий файл - - Неверный формат селектора - - Отрицательные/нулевые значения N - - Выход за границы строки/файла - -- [ ] Коды возврата: - - 0 — успех - - 1 — ошибка (файл не найден, неверный селектор, etc.) - - 0 но без вывода — если ничего не найдено (как `grep`) - -### Критерии завершения - -- Все ошибки обрабатываются с понятными сообщениями -- Коды возврата соответствуют ожиданиям - ---- - -## Этап 9: Тестирование - -**Цель**: Комплексное покрытие тестами. - -### Задачи - -- [ ] Unit тесты для каждого модуля: - - `selector.rs` — все форматы - - `reader.rs` — потоковое чтение - - `output.rs` — форматирование - -- [ ] Интеграционные тесты: - ``` - tests/ - ├── basic.rs - ├── selectors.rs - ├── positions.rs - ├── context.rs - ├── regex.rs - ├── multi_file.rs - └── errors.rs - ``` - -- [ ] Property-based тесты: - - Округление фрагментов всегда валидно - - Контекстные интервалы корректны - -- [ ] Тесты с большими файлами: - - Проверка потребления памяти - - Производительность - -### Критерии завершения - -- Покрытие > 80% -- Прохождение всех тестов -- Нет memory leaks - ---- - -## Этап 10: Оптимизация и полировка - -**Цель**: Финальная оптимизация и подготовка к релизу. - -### Задачи - -- [ ] Профилирование: - - `cargo flamegraph` для поиска hotspots - - Оптимизация критических путей - -- [ ] Benchmark: - - Сравнение с альтернативами (`sed`, `grep`) - - Большие файлы (>1GB) - -- [ ] Уменьшение размера бинарника: - - `strip = true` - - `lto = true` - - `panic = "abort"` - -- [ ] Документация: - - `README.md` с примерами - - `man` page (опционально) - - Комментарии в коде - -- [ ] CI/CD: - - GitHub Actions - - Тесты на Linux/macOS/Windows - - `cargo clippy` и `cargo fmt --check` - -### Критерии завершения - -- Бинарник < 1MB (после strip) -- Производительность comparable с `sed` -- Готовность к публикации - ---- - -## Дополнительные идеи (будущие версии) - -- Поддержка `--chars` для счёта колонок в символах Unicode -- Чтение из stdin -- Интерактивный режим -- Поддержка других форматов (JSON, CSV) -- Вывод в формате diff - ---- - -## Порядок реализации (рекомендуется) - -1. Этапы 1-2: Структура и парсинг -2. Этапы 3-4: Базовый функционал -3. Этап 5: Контекст -4. Этап 6: Regex -5. Этапы 7-10: Полировка - -Ориентировочное время: 2-3 недели активной разработки. diff --git a/docs/README.md b/docs/README.md index 9d66afe..b09fe91 100644 --- a/docs/README.md +++ b/docs/README.md @@ -15,8 +15,3 @@ Human-oriented documentation for **sel** (Select Slices from Text Files). Published on [docs.rs/sel-rs](https://docs.rs/sel-rs) for library users embedding the same pipeline as the CLI. - -## Design notes (historical) - -Under [superpowers/](superpowers/) — session plans and specs from v0.2 design work; -they are optional background, not the canonical user manual. diff --git a/docs/REFERENCE.md b/docs/REFERENCE.md index 44607f3..772e241 100644 --- a/docs/REFERENCE.md +++ b/docs/REFERENCE.md @@ -1127,7 +1127,6 @@ CONTRIBUTING.md dev loop, test rules, release steps CODE_OF_CONDUCT.md community standards LICENSE-MIT MIT LICENSE-APACHE Apache-2.0 -PLAN.md design notes (historical) src/main.rs binary entry point (30 lines) src/lib.rs public re-exports diff --git a/docs/superpowers/plans/2026-04-18-sel-v0.2.md b/docs/superpowers/plans/2026-04-18-sel-v0.2.md deleted file mode 100644 index 32f0950..0000000 --- a/docs/superpowers/plans/2026-04-18-sel-v0.2.md +++ /dev/null @@ -1,2907 +0,0 @@ -# sel v0.2 Implementation Plan - -> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. - -**Goal:** Refactor `sel` into a clean five-stage pipeline (Source → Matcher → Expander → Formatter → Sink), add stdin/`-o`/`-v` features, trim dependencies, and replace the hand-rolled release pipeline with `cargo-dist` + crates.io auto-publish. - -**Architecture:** One `pipeline::run()` function replaces the current nine `process_*` functions. Each pipeline stage is a small trait with swappable impls. A typed `App` builder makes invalid combinations (positional selectors + stdin) a compile error, with a friendly runtime error at the CLI boundary. - -**Tech Stack:** Rust 2024 edition, `clap` (derive), `regex`, `thiserror`, standard library `io::IsTerminal`. Drops: `anyhow`, `termcolor`, `is-terminal` (all either dead or subsumed by std). - -**Spec:** `docs/superpowers/specs/2026-04-18-sel-v0.2-design.md` - ---- - -## Phase A — Baseline & dependency trim - -These tasks preserve behavior. Existing tests must stay green throughout. - -### Task 1: Baseline green + branch - -**Files:** -- No source edits. - -- [ ] **Step 1: Verify tests pass on main before we start** - -Run: `cd /home/inky/Development/sel && cargo test` -Expected: all tests pass. - -- [ ] **Step 2: Verify clippy clean** - -Run: `cargo clippy --all-targets -- -D warnings` -Expected: no warnings. - -- [ ] **Step 3: Verify fmt clean** - -Run: `cargo fmt --check` -Expected: no output. - -- [ ] **Step 4: Create feature branch** - -```bash -git checkout -b v0.2-refactor -``` - -- [ ] **Step 5: No commit needed** (branch creation suffices) - ---- - -### Task 2: Drop dead `anyhow` dependency - -`anyhow` appears in `Cargo.toml` but has zero imports in `src/`. Removing it is a pure win. - -**Files:** -- Modify: `Cargo.toml` - -- [ ] **Step 1: Confirm zero usages** - -Run: `grep -rn "anyhow" src/ tests/ benches/` -Expected: no matches. - -- [ ] **Step 2: Remove from Cargo.toml** - -Delete the `anyhow = "1.0"` line from `[dependencies]`. The surrounding block should read: - -```toml -# Better error handling -thiserror = "2.0" -``` - -- [ ] **Step 3: Verify build + tests** - -Run: `cargo build && cargo test` -Expected: all tests pass. - -- [ ] **Step 4: Commit** - -```bash -git add Cargo.toml Cargo.lock -git commit -m "chore: drop unused anyhow dependency" -``` - ---- - -### Task 3: Drop dead `termcolor` dependency - -Same story: declared, never imported. All color output already uses raw ANSI escape strings. - -**Files:** -- Modify: `Cargo.toml` - -- [ ] **Step 1: Confirm zero usages** - -Run: `grep -rn "termcolor" src/ tests/ benches/` -Expected: no matches. - -- [ ] **Step 2: Remove from Cargo.toml** - -Delete the `termcolor = "1.4"` line and its comment. - -- [ ] **Step 3: Verify build + tests** - -Run: `cargo build && cargo test` -Expected: all tests pass. - -- [ ] **Step 4: Commit** - -```bash -git add Cargo.toml Cargo.lock -git commit -m "chore: drop unused termcolor dependency" -``` - ---- - -### Task 4: Replace `is-terminal` crate with `std::io::IsTerminal` - -`std::io::IsTerminal` has been stable since Rust 1.70. Our MSRV is 1.92. The crate is redundant. - -**Files:** -- Modify: `Cargo.toml` -- Modify: `src/cli.rs` - -- [ ] **Step 1: Confirm current usage** - -Run: `grep -rn "is_terminal\|is-terminal\|IsTerminal" src/ Cargo.toml` -Expected: one crate declaration in Cargo.toml; `cli.rs` already uses `std::io::IsTerminal` — the crate is dead. - -- [ ] **Step 2: Remove from Cargo.toml** - -Delete `is-terminal = "0.4"` and its comment line. - -- [ ] **Step 3: Verify build + tests** - -Run: `cargo build && cargo test` -Expected: all tests pass. `cli.rs` already imports `std::io::IsTerminal` (line 4), so no code change needed. - -- [ ] **Step 4: Commit** - -```bash -git add Cargo.toml Cargo.lock -git commit -m "chore: drop redundant is-terminal crate (std has it)" -``` - ---- - -### Task 5: Rework `SelError` — add new variants, path-aware Io, drop `Message` - -Prepare the error enum for the features to come: positional+stdin guard, invert-without-regex, output file collision. Wrap I/O errors with the path that caused them. Remove the catch-all `Message` variant — every error now has a specific variant. - -**Files:** -- Modify: `src/error.rs` -- Modify: `src/main.rs` (replace `SelError::Message` usages) -- Modify: `src/reader.rs` (wrap I/O with path) -- Modify: `src/cli.rs` (rewrite validation to not use `Message`) - -- [ ] **Step 1: Write the updated error enum test** - -Add at the bottom of `src/error.rs`: - -```rust -#[cfg(test)] -mod tests { - use super::*; - use std::io; - use std::path::PathBuf; - - #[test] - fn io_error_includes_path() { - let err = SelError::Io { - path: "nope.txt".into(), - source: io::Error::new(io::ErrorKind::NotFound, "no such"), - }; - let msg = format!("{err}"); - assert!(msg.contains("nope.txt"), "got: {msg}"); - } - - #[test] - fn positional_with_stdin_has_clear_message() { - let err = SelError::PositionalWithStdin; - let msg = format!("{err}"); - assert!(msg.contains("stdin")); - } - - #[test] - fn output_exists_names_path() { - let err = SelError::OutputExists(PathBuf::from("out.txt")); - let msg = format!("{err}"); - assert!(msg.contains("out.txt")); - assert!(msg.contains("--force")); - } -} -``` - -- [ ] **Step 2: Run test to verify it fails to compile (variants don't exist yet)** - -Run: `cargo test --lib error::tests` -Expected: FAIL — `SelError::Io { path, source }` not the current shape; `PositionalWithStdin`, `OutputExists`, `InvertWithoutRegex` don't exist. - -- [ ] **Step 3: Rewrite `src/error.rs`** - -Replace the file with: - -```rust -//! Error types for `sel`. - -use std::io; -use std::path::PathBuf; -use thiserror::Error; - -/// Main error type for the `sel` utility. -#[derive(Error, Debug)] -pub enum SelError { - /// Invalid selector syntax. - #[error("invalid selector: {0}")] - InvalidSelector(String), - - /// Invalid regular expression. - #[error("invalid regex: {0}")] - InvalidRegex(String), - - /// Positional selectors used with stdin (unseekable). - #[error("positional selectors require a seekable file; stdin is line-only")] - PositionalWithStdin, - - /// `--invert-match` used without `--regex`. - #[error("--invert-match requires --regex")] - InvertWithoutRegex, - - /// `--char-context` used without a target. - #[error("--char-context requires --regex or a positional selector")] - CharContextWithoutTarget, - - /// I/O error with the offending path. - #[error("{path}: {source}")] - Io { - path: String, - #[source] - source: io::Error, - }, - - /// Output file already exists and `--force` was not given. - #[error("output file already exists: {} (use --force to overwrite)", .0.display())] - OutputExists(PathBuf), -} - -/// Result type alias for `sel`. -pub type Result = std::result::Result; - -// (tests module as written in Step 1) -``` - -- [ ] **Step 4: Update `src/reader.rs::open_file` to wrap path** - -Replace the `open_file` function (lines 54–63) with: - -```rust -pub fn open_file(path: &Path) -> Result { - std::fs::File::open(path).map_err(|source| crate::error::SelError::Io { - path: path.display().to_string(), - source, - }) -} -``` - -Also remove any remaining `FileNotFound` references in `reader.rs` (there are none beyond `open_file`, but double-check with grep). - -- [ ] **Step 5: Update `src/cli.rs::validate` — stop using `SelError::Message`** - -Replace the body of `validate(&self)` with: - -```rust -pub fn validate(&self) -> crate::Result<()> { - if self.get_files().is_empty() { - return Err(crate::SelError::InvalidSelector( - "no input files specified".to_string(), - )); - } - - if self.char_context.is_some() - && self.regex.is_none() - && !self - .get_selector() - .as_ref() - .is_some_and(|s| s.contains(':')) - { - return Err(crate::SelError::CharContextWithoutTarget); - } - - Ok(()) -} -``` - -Note: "no input files" is a user-input problem — `InvalidSelector` is a reasonable home. We'll revisit when CLI is rebuilt around `Cli::into_app()` in Task 19. - -- [ ] **Step 6: Update `src/main.rs` — replace `SelError::Message` and `SelError::FileNotFound` usages** - -Replace any `SelError::Message(...)` with the appropriate specific variant. Replace any pattern-match on `FileNotFound` with the new `Io { path, source }` form. Check with: - -Run: `grep -n "SelError::Message\|FileNotFound\|CharContextWithoutPosition" src/` -Expected after changes: no matches. - -- [ ] **Step 7: Run all tests** - -Run: `cargo test` -Expected: all tests pass, including the three new error tests. - -- [ ] **Step 8: Commit** - -```bash -git add src/error.rs src/main.rs src/reader.rs src/cli.rs -git commit -m "refactor: redesign SelError — path-aware Io, drop Message, add v0.2 variants" -``` - ---- - -## Phase B — Refactor into pipeline stages - -Each task here preserves behavior. The existing `tests/*.rs` integration tests are the regression harness. If any go red, stop and fix before proceeding. - -### Task 6: Introduce shared pipeline types - -Add `Line`, `MatchInfo`, `Emit`, `Role` types that every future stage will use. This is pure addition — nothing yet consumes them. - -**Files:** -- Create: `src/types.rs` -- Modify: `src/lib.rs` - -- [ ] **Step 1: Create `src/types.rs`** - -```rust -//! Shared types used across pipeline stages. - -use std::ops::Range; - -/// A line of input with its 1-indexed line number. -#[derive(Debug, Clone)] -pub struct Line { - pub no: u64, - pub bytes: Vec, -} - -impl Line { - pub fn new(no: u64, bytes: Vec) -> Self { - Self { no, bytes } - } - - /// Borrow the line content as a string, substituting U+FFFD for invalid UTF-8. - pub fn as_str_lossy(&self) -> std::borrow::Cow<'_, str> { - String::from_utf8_lossy(&self.bytes) - } -} - -/// Result of running a `Matcher` on a `Line`. -#[derive(Debug, Default, Clone)] -pub struct MatchInfo { - /// Did this line hit? - pub hit: bool, - /// Byte ranges to highlight (for regex matches). - pub spans: Vec>, - /// Target column (1-indexed) for positional matches. - pub col: Option, -} - -/// Role of an emitted line in the output stream. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Role { - /// A line that matched (primary output). - Target, - /// A neighbouring line included as context. - Context, -} - -/// One line being emitted by the pipeline. -#[derive(Debug, Clone)] -pub struct Emit<'a> { - pub line: &'a Line, - pub role: Role, - pub match_info: &'a MatchInfo, -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn line_as_str_lossy_handles_invalid_utf8() { - let line = Line::new(1, vec![0xFF, b'a']); - let s = line.as_str_lossy(); - assert!(s.ends_with('a')); - } - - #[test] - fn match_info_default_is_miss() { - let mi = MatchInfo::default(); - assert!(!mi.hit); - assert!(mi.spans.is_empty()); - assert!(mi.col.is_none()); - } -} -``` - -- [ ] **Step 2: Register the module in `src/lib.rs`** - -Add `pub mod types;` and `pub use types::{Emit, Line, MatchInfo, Role};`. The file becomes: - -```rust -//! # sel — Select Slices from Text Files - -pub mod cli; -pub mod error; -pub mod output; -pub mod reader; -pub mod selector; -pub mod types; - -pub use error::{Result, SelError}; -pub use selector::{LineSpec, Position, Selector}; -pub use types::{Emit, Line, MatchInfo, Role}; -``` - -- [ ] **Step 3: Verify tests pass** - -Run: `cargo test` -Expected: all tests pass, including the two new ones in `types::tests`. - -- [ ] **Step 4: Commit** - -```bash -git add src/types.rs src/lib.rs -git commit -m "feat: add shared pipeline types (Line, MatchInfo, Emit, Role)" -``` - ---- - -### Task 7: Introduce `Source` trait with `FileSource` impl - -Define the `Source` trait and build `FileSource` on top. Keep existing `LineReader` alive — nothing switches to `Source` yet. Behavior preserved. - -**Files:** -- Create: `src/source/mod.rs` -- Create: `src/source/file.rs` -- Modify: `src/lib.rs` - -- [ ] **Step 1: Create `src/source/mod.rs`** - -```rust -//! Input sources — iterators that yield `Line`s one at a time. - -pub mod file; - -pub use file::FileSource; - -use crate::Line; -use crate::Result; - -/// A stream of input lines with 1-indexed line numbers. -/// -/// Implementations own the underlying reader and handle newline stripping. -pub trait Source { - /// Read the next line. Returns `Ok(None)` at EOF. - fn next_line(&mut self) -> Result>; - - /// A short label for this source, used for the filename prefix (`-` for stdin). - fn label(&self) -> &str; - - /// Can this source be paired with positional selectors? - /// `false` for stdin. - fn is_seekable(&self) -> bool; -} -``` - -- [ ] **Step 2: Create `src/source/file.rs`** - -```rust -//! File-backed `Source`. - -use super::Source; -use crate::error::SelError; -use crate::{Line, Result}; -use std::fs::File; -use std::io::{BufRead, BufReader}; -use std::path::{Path, PathBuf}; - -pub struct FileSource { - reader: BufReader, - label: String, - path: PathBuf, - line_no: u64, -} - -impl FileSource { - pub fn open(path: &Path) -> Result { - let file = File::open(path).map_err(|source| SelError::Io { - path: path.display().to_string(), - source, - })?; - Ok(Self { - reader: BufReader::new(file), - label: path.display().to_string(), - path: path.to_path_buf(), - line_no: 0, - }) - } - - pub fn path(&self) -> &Path { - &self.path - } -} - -impl Source for FileSource { - fn next_line(&mut self) -> Result> { - let mut buf: Vec = Vec::new(); - let n = self - .reader - .read_until(b'\n', &mut buf) - .map_err(|source| SelError::Io { - path: self.label.clone(), - source, - })?; - if n == 0 { - return Ok(None); - } - // Strip trailing \n and optional \r - if buf.ends_with(b"\n") { - buf.pop(); - if buf.ends_with(b"\r") { - buf.pop(); - } - } - self.line_no += 1; - Ok(Some(Line::new(self.line_no, buf))) - } - - fn label(&self) -> &str { - &self.label - } - - fn is_seekable(&self) -> bool { - true - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::io::Write; - use tempfile::NamedTempFile; - - #[test] - fn reads_three_lines_numbered() { - let mut f = NamedTempFile::new().unwrap(); - writeln!(f, "alpha").unwrap(); - writeln!(f, "beta").unwrap(); - writeln!(f, "gamma").unwrap(); - - let mut src = FileSource::open(f.path()).unwrap(); - let l1 = src.next_line().unwrap().unwrap(); - let l2 = src.next_line().unwrap().unwrap(); - let l3 = src.next_line().unwrap().unwrap(); - assert!(src.next_line().unwrap().is_none()); - - assert_eq!(l1.no, 1); - assert_eq!(&l1.bytes, b"alpha"); - assert_eq!(l2.no, 2); - assert_eq!(&l2.bytes, b"beta"); - assert_eq!(l3.no, 3); - assert_eq!(&l3.bytes, b"gamma"); - } - - #[test] - fn handles_crlf() { - let mut f = NamedTempFile::new().unwrap(); - f.write_all(b"one\r\ntwo\r\n").unwrap(); - - let mut src = FileSource::open(f.path()).unwrap(); - let l1 = src.next_line().unwrap().unwrap(); - assert_eq!(&l1.bytes, b"one"); - } - - #[test] - fn nonexistent_file_returns_io_error_with_path() { - let err = FileSource::open(Path::new("/nonexistent-xyz-123")).unwrap_err(); - let msg = format!("{err}"); - assert!(msg.contains("nonexistent-xyz-123")); - } -} -``` - -- [ ] **Step 3: Register in `src/lib.rs`** - -Add `pub mod source;` next to the other module declarations. - -- [ ] **Step 4: Run all tests** - -Run: `cargo test` -Expected: all existing tests pass plus three new `source::file::tests`. - -- [ ] **Step 5: Commit** - -```bash -git add src/source/ src/lib.rs -git commit -m "feat: add Source trait and FileSource impl (not yet wired)" -``` - ---- - -### Task 8: Introduce `Matcher` trait with `LineMatcher` - -Build the line-range matcher on top of existing `Selector::LineNumbers` logic. Don't delete the old code yet — both live in parallel while we migrate. - -**Files:** -- Create: `src/matcher/mod.rs` -- Create: `src/matcher/lines.rs` -- Modify: `src/lib.rs` - -- [ ] **Step 1: Create `src/matcher/mod.rs`** - -```rust -//! Matcher stage — decides whether a given line is a hit. - -pub mod lines; - -pub use lines::LineMatcher; - -use crate::{Line, MatchInfo}; - -/// Classifies each line as hit or miss. -/// -/// Implementations may be stateful (e.g., a position matcher that advances -/// through a sorted list of targets). -pub trait Matcher { - fn match_line(&mut self, line: &Line) -> MatchInfo; -} - -/// A matcher that hits every line. -pub struct AllMatcher; - -impl Matcher for AllMatcher { - fn match_line(&mut self, _line: &Line) -> MatchInfo { - MatchInfo { hit: true, ..MatchInfo::default() } - } -} -``` - -- [ ] **Step 2: Create `src/matcher/lines.rs`** - -```rust -//! Line-number matcher built from sorted, merged ranges. - -use super::Matcher; -use crate::selector::{LineSpec, Selector}; -use crate::{Line, MatchInfo}; - -/// Matches lines whose 1-indexed number falls in a set of merged ranges. -pub struct LineMatcher { - /// Sorted, non-overlapping, inclusive `(start, end)` ranges (1-indexed). - ranges: Vec<(u64, u64)>, -} - -impl LineMatcher { - /// Build from a `Selector::LineNumbers`. Panics on other variants. - pub fn from_selector(sel: &Selector) -> Self { - let normalized = sel.normalize(); - let specs: &[LineSpec] = match &normalized { - Selector::LineNumbers(s) => s, - Selector::All => &[], - Selector::Positions(_) => { - panic!("LineMatcher::from_selector called with positional selector") - } - }; - let ranges = specs - .iter() - .map(|s| match s { - LineSpec::Single(n) => (*n as u64, *n as u64), - LineSpec::Range(a, b) => (*a as u64, *b as u64), - }) - .collect(); - Self { ranges } - } -} - -impl Matcher for LineMatcher { - fn match_line(&mut self, line: &Line) -> MatchInfo { - let hit = self.ranges.iter().any(|&(a, b)| line.no >= a && line.no <= b); - MatchInfo { hit, ..MatchInfo::default() } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn mk_line(n: u64) -> Line { - Line::new(n, Vec::new()) - } - - #[test] - fn single_line_hits_only_that_line() { - let sel = Selector::parse("5").unwrap(); - let mut m = LineMatcher::from_selector(&sel); - assert!(!m.match_line(&mk_line(4)).hit); - assert!(m.match_line(&mk_line(5)).hit); - assert!(!m.match_line(&mk_line(6)).hit); - } - - #[test] - fn range_hits_inclusive() { - let sel = Selector::parse("10-12").unwrap(); - let mut m = LineMatcher::from_selector(&sel); - assert!(!m.match_line(&mk_line(9)).hit); - assert!(m.match_line(&mk_line(10)).hit); - assert!(m.match_line(&mk_line(11)).hit); - assert!(m.match_line(&mk_line(12)).hit); - assert!(!m.match_line(&mk_line(13)).hit); - } - - #[test] - fn mixed_list_merges_ranges() { - let sel = Selector::parse("1,5,10-15,14").unwrap(); - let mut m = LineMatcher::from_selector(&sel); - assert!(m.match_line(&mk_line(1)).hit); - assert!(!m.match_line(&mk_line(2)).hit); - assert!(m.match_line(&mk_line(5)).hit); - assert!(m.match_line(&mk_line(12)).hit); - assert!(m.match_line(&mk_line(15)).hit); - assert!(!m.match_line(&mk_line(16)).hit); - } -} -``` - -- [ ] **Step 3: Register in `src/lib.rs`** - -Add `pub mod matcher;` and under the re-exports add: - -```rust -pub use matcher::{AllMatcher, LineMatcher, Matcher}; -``` - -- [ ] **Step 4: Run all tests** - -Run: `cargo test` -Expected: all tests pass, including new `matcher::lines::tests`. - -- [ ] **Step 5: Commit** - -```bash -git add src/matcher/ src/lib.rs -git commit -m "feat: add Matcher trait, AllMatcher, LineMatcher (not yet wired)" -``` - ---- - -### Task 9: `PositionMatcher` - -Holds a sorted list of `Position`s. Hits when line number matches; emits `col` so the formatter can render a caret. - -**Files:** -- Create: `src/matcher/position.rs` -- Modify: `src/matcher/mod.rs` - -- [ ] **Step 1: Create `src/matcher/position.rs`** - -```rust -//! Positional matcher (line:column). - -use super::Matcher; -use crate::selector::{Position, Selector}; -use crate::{Line, MatchInfo}; - -pub struct PositionMatcher { - /// Positions sorted by `(line, col)`. - positions: Vec, - /// Index of next unconsumed position (positions with line < current are skipped). - cursor: usize, -} - -impl PositionMatcher { - /// Panics if `sel` is not `Selector::Positions`. - pub fn from_selector(sel: &Selector) -> Self { - let positions = match sel.normalize() { - Selector::Positions(mut p) => { - p.sort(); - p.dedup(); - p - } - _ => panic!("PositionMatcher::from_selector needs Selector::Positions"), - }; - Self { positions, cursor: 0 } - } -} - -impl Matcher for PositionMatcher { - fn match_line(&mut self, line: &Line) -> MatchInfo { - // Advance cursor past any positions for earlier lines. - while self.cursor < self.positions.len() - && (self.positions[self.cursor].line as u64) < line.no - { - self.cursor += 1; - } - // First position on this line (if any) becomes the target column. - if let Some(p) = self.positions.get(self.cursor) { - if p.line as u64 == line.no { - return MatchInfo { - hit: true, - spans: Vec::new(), - col: Some(p.column), - }; - } - } - MatchInfo::default() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn mk_line(n: u64) -> Line { - Line::new(n, Vec::new()) - } - - #[test] - fn single_position_hits_correct_line_with_col() { - let sel = Selector::parse("23:260").unwrap(); - let mut m = PositionMatcher::from_selector(&sel); - assert!(!m.match_line(&mk_line(22)).hit); - let info = m.match_line(&mk_line(23)); - assert!(info.hit); - assert_eq!(info.col, Some(260)); - assert!(!m.match_line(&mk_line(24)).hit); - } - - #[test] - fn multiple_positions_hit_in_order() { - let sel = Selector::parse("5:10,5:20,9:3").unwrap(); - let mut m = PositionMatcher::from_selector(&sel); - let h5 = m.match_line(&mk_line(5)); - assert!(h5.hit); - assert_eq!(h5.col, Some(10)); - let h9 = m.match_line(&mk_line(9)); - assert!(h9.hit); - assert_eq!(h9.col, Some(3)); - } -} -``` - -Note: this simple cursor logic reports only the *first* position on any given line. That matches current `sel` behavior for positional selectors (each position renders one fragment per target line). Multiple positions on the same line will be a future extension. - -- [ ] **Step 2: Update `src/matcher/mod.rs`** - -Add `pub mod position;` and `pub use position::PositionMatcher;`. - -- [ ] **Step 3: Re-export in `src/lib.rs`** - -Extend the matcher re-export line: - -```rust -pub use matcher::{AllMatcher, LineMatcher, Matcher, PositionMatcher}; -``` - -- [ ] **Step 4: Run all tests** - -Run: `cargo test` -Expected: all pass. - -- [ ] **Step 5: Commit** - -```bash -git add src/matcher/ src/lib.rs -git commit -m "feat: add PositionMatcher" -``` - ---- - -### Task 10: `RegexMatcher` (with `invert`) - -Wrap `regex::Regex`. Include the `invert` field from day one so Phase C's `-v` flag is a pure wiring change. - -**Files:** -- Create: `src/matcher/regex.rs` -- Modify: `src/matcher/mod.rs` - -- [ ] **Step 1: Create `src/matcher/regex.rs`** - -```rust -//! Regex matcher. - -use super::Matcher; -use crate::error::SelError; -use crate::{Line, MatchInfo, Result}; -use regex::bytes::Regex; - -pub struct RegexMatcher { - regex: Regex, - invert: bool, -} - -impl RegexMatcher { - pub fn new(pattern: &str, invert: bool) -> Result { - let regex = Regex::new(pattern).map_err(|e| SelError::InvalidRegex(e.to_string()))?; - Ok(Self { regex, invert }) - } -} - -impl Matcher for RegexMatcher { - fn match_line(&mut self, line: &Line) -> MatchInfo { - let is_match = self.regex.is_match(&line.bytes); - let hit = is_match ^ self.invert; - // Inverted hits have nothing to highlight. - let spans = if hit && !self.invert { - self.regex - .find_iter(&line.bytes) - .map(|m| m.start()..m.end()) - .collect() - } else { - Vec::new() - }; - MatchInfo { hit, spans, col: None } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn mk(bytes: &[u8]) -> Line { - Line::new(1, bytes.to_vec()) - } - - #[test] - fn basic_match_gives_spans() { - let mut m = RegexMatcher::new("ERROR", false).unwrap(); - let info = m.match_line(&mk(b"an ERROR happened")); - assert!(info.hit); - assert_eq!(info.spans.len(), 1); - assert_eq!(info.spans[0], 3..8); - } - - #[test] - fn invert_flips_hit_and_clears_spans() { - let mut m = RegexMatcher::new("ERROR", true).unwrap(); - let miss_inverted = m.match_line(&mk(b"an ERROR happened")); - assert!(!miss_inverted.hit); - assert!(miss_inverted.spans.is_empty()); - - let hit_inverted = m.match_line(&mk(b"all clear")); - assert!(hit_inverted.hit); - assert!(hit_inverted.spans.is_empty()); - } - - #[test] - fn invalid_regex_errors() { - let err = RegexMatcher::new("(unclosed", false).unwrap_err(); - let msg = format!("{err}"); - assert!(msg.contains("invalid regex")); - } -} -``` - -Note the switch to `regex::bytes::Regex` — this lets us match on `&[u8]` directly, preserving byte-accurate offsets for highlight spans without forcing UTF-8 decoding. `regex` is already a dependency; `regex::bytes` is part of it. - -- [ ] **Step 2: Update `src/matcher/mod.rs`** - -Add `pub mod regex;` and `pub use self::regex::RegexMatcher;`. - -- [ ] **Step 3: Re-export in `src/lib.rs`** - -```rust -pub use matcher::{AllMatcher, LineMatcher, Matcher, PositionMatcher, RegexMatcher}; -``` - -- [ ] **Step 4: Run all tests** - -Run: `cargo test` -Expected: all pass. - -- [ ] **Step 5: Commit** - -```bash -git add src/matcher/ src/lib.rs -git commit -m "feat: add RegexMatcher with invert support" -``` - ---- - -### Task 11: Introduce `Expander` trait with `NoContext` and `LineContext` - -The context expander takes `(Line, MatchInfo)` and yields a stream of `Emit`s — either just hits (`NoContext`) or hits-plus-surrounding-lines (`LineContext`). - -**Files:** -- Create: `src/context.rs` -- Modify: `src/lib.rs` - -- [ ] **Step 1: Create `src/context.rs`** - -```rust -//! Context expander — turns hits into an emit plan, optionally including neighbors. - -use crate::{Emit, Line, MatchInfo, Role}; -use std::collections::VecDeque; - -/// An expander consumes `(Line, MatchInfo)` pairs and produces `Emit`s. -/// -/// The expander owns the line and match info until it emits them, because -/// it may need to buffer lines as context. -pub trait Expander { - /// Feed the next line/match pair. Call `drain()` after EOF to flush remaining context. - fn push(&mut self, line: Line, info: MatchInfo, out: &mut dyn FnMut(EmitOwned)); - - /// Called once at EOF to flush any buffered trailing context. - fn drain(&mut self, out: &mut dyn FnMut(EmitOwned)); -} - -/// Owned form of `Emit` — the expander hands these to the caller. -#[derive(Debug, Clone)] -pub struct EmitOwned { - pub line: Line, - pub role: Role, - pub match_info: MatchInfo, -} - -impl EmitOwned { - pub fn borrow(&self) -> Emit<'_> { - Emit { - line: &self.line, - role: self.role, - match_info: &self.match_info, - } - } -} - -/// Emits only hits, nothing else. -pub struct NoContext; - -impl Expander for NoContext { - fn push(&mut self, line: Line, info: MatchInfo, out: &mut dyn FnMut(EmitOwned)) { - if info.hit { - out(EmitOwned { - line, - role: Role::Target, - match_info: info, - }); - } - } - - fn drain(&mut self, _out: &mut dyn FnMut(EmitOwned)) {} -} - -/// Emits each hit plus `n` lines before and after, merging overlapping windows. -pub struct LineContext { - n: usize, - /// Ring buffer of the last `n` lines (oldest at front). - before: VecDeque<(Line, MatchInfo)>, - /// Lines remaining to emit as trailing context for a recent hit. - trailing: usize, - /// Highest line number already emitted (avoids duplicates on overlap). - last_emitted: u64, -} - -impl LineContext { - pub fn new(n: usize) -> Self { - Self { - n, - before: VecDeque::with_capacity(n), - trailing: 0, - last_emitted: 0, - } - } - - fn emit(&mut self, line: Line, info: MatchInfo, role: Role, out: &mut dyn FnMut(EmitOwned)) { - if line.no <= self.last_emitted { - return; - } - self.last_emitted = line.no; - out(EmitOwned { line, role, match_info: info }); - } -} - -impl Expander for LineContext { - fn push(&mut self, line: Line, info: MatchInfo, out: &mut dyn FnMut(EmitOwned)) { - if info.hit { - // Flush stored "before" lines as context. - let buffered: Vec<_> = self.before.drain(..).collect(); - for (bl, bi) in buffered { - self.emit(bl, bi, Role::Context, out); - } - let hit_line = line; - let hit_info = info; - self.emit(hit_line, hit_info, Role::Target, out); - self.trailing = self.n; - } else if self.trailing > 0 { - self.trailing -= 1; - self.emit(line, info, Role::Context, out); - } else { - // Record as potential "before" context. - if self.n > 0 { - if self.before.len() == self.n { - self.before.pop_front(); - } - self.before.push_back((line, info)); - } - } - } - - fn drain(&mut self, _out: &mut dyn FnMut(EmitOwned)) { - // Trailing lines were already emitted as they came. "Before" buffer is just dropped. - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn hit(n: u64) -> (Line, MatchInfo) { - ( - Line::new(n, format!("line{n}").into_bytes()), - MatchInfo { hit: true, ..Default::default() }, - ) - } - fn miss(n: u64) -> (Line, MatchInfo) { - ( - Line::new(n, format!("line{n}").into_bytes()), - MatchInfo::default(), - ) - } - - fn collect(mut e: E, inputs: Vec<(Line, MatchInfo)>) -> Vec<(u64, Role)> { - let mut out: Vec<(u64, Role)> = Vec::new(); - { - let mut f = |emit: EmitOwned| out.push((emit.line.no, emit.role)); - for (l, i) in inputs { - e.push(l, i, &mut f); - } - e.drain(&mut f); - } - out - } - - #[test] - fn no_context_emits_only_hits() { - let out = collect(NoContext, vec![miss(1), hit(2), miss(3), hit(4)]); - assert_eq!(out, vec![(2, Role::Target), (4, Role::Target)]); - } - - #[test] - fn line_context_emits_around_hit() { - let out = collect( - LineContext::new(1), - vec![miss(1), miss(2), hit(3), miss(4), miss(5)], - ); - assert_eq!( - out, - vec![ - (2, Role::Context), - (3, Role::Target), - (4, Role::Context), - ] - ); - } - - #[test] - fn overlapping_contexts_do_not_duplicate() { - let out = collect( - LineContext::new(1), - vec![miss(1), hit(2), hit(3), miss(4)], - ); - assert_eq!( - out, - vec![ - (1, Role::Context), - (2, Role::Target), - (3, Role::Target), - (4, Role::Context), - ] - ); - } -} -``` - -- [ ] **Step 2: Register in `src/lib.rs`** - -Add `pub mod context;` and `pub use context::{EmitOwned, Expander, LineContext, NoContext};`. - -- [ ] **Step 3: Run all tests** - -Run: `cargo test` -Expected: all pass. - -- [ ] **Step 4: Commit** - -```bash -git add src/context.rs src/lib.rs -git commit -m "feat: add Expander trait with NoContext and LineContext" -``` - ---- - -### Task 12: Split `output.rs` into `format/` module with ANSI helper - -Split the existing `OutputFormatter` into `format/plain.rs` (full-line rendering with line-no, filename, highlight) and `format/fragment.rs` (char-context with caret). Extract the ANSI escapes into a tiny helper. The old `OutputFormatter` can stay behind an alias so `main.rs` keeps compiling until Task 16 rewrites the wiring. - -**Files:** -- Create: `src/format/mod.rs` -- Create: `src/format/ansi.rs` -- Create: `src/format/plain.rs` -- Create: `src/format/fragment.rs` -- Modify: `src/lib.rs` - -- [ ] **Step 1: Create `src/format/ansi.rs`** - -```rust -//! Minimal ANSI escape helpers — replaces the termcolor crate. - -pub const GREEN: &str = "\x1b[32m"; -pub const INVERSE: &str = "\x1b[7m"; -pub const RESET: &str = "\x1b[0m"; - -/// Wrap `text` in `code` … RESET if `enabled`, otherwise pass through. -pub fn paint(enabled: bool, code: &str, text: &str) -> String { - if enabled { - format!("{code}{text}{RESET}") - } else { - text.to_string() - } -} -``` - -- [ ] **Step 2: Create `src/format/mod.rs`** - -```rust -//! Output formatting. - -pub mod ansi; -pub mod fragment; -pub mod plain; - -pub use fragment::FragmentFormatter; -pub use plain::PlainFormatter; - -use crate::Emit; -use std::io; - -/// A formatter serializes one `Emit` into bytes. -pub trait Formatter { - fn write(&mut self, sink: &mut dyn io::Write, emit: &Emit) -> io::Result<()>; -} - -/// Common configuration shared by plain and fragment formatters. -#[derive(Debug, Clone)] -pub struct FormatOpts { - pub show_line_numbers: bool, - pub show_filename: bool, - pub filename: Option, - pub color: bool, -} - -impl FormatOpts { - pub fn prefix(&self, line_no: u64) -> String { - let mut p = String::new(); - if self.show_filename { - if let Some(f) = &self.filename { - p.push_str(f); - p.push(':'); - } - } - if self.show_line_numbers { - p.push_str(&line_no.to_string()); - p.push(':'); - } - p - } -} -``` - -- [ ] **Step 3: Create `src/format/plain.rs`** - -```rust -//! Plain-line formatter with optional line number, filename, and highlight. - -use super::{ansi, FormatOpts, Formatter}; -use crate::{Emit, Role}; -use std::io::{self, Write}; -use std::ops::Range; - -pub struct PlainFormatter { - pub opts: FormatOpts, -} - -impl PlainFormatter { - pub fn new(opts: FormatOpts) -> Self { - Self { opts } - } - - fn render_content(&self, bytes: &[u8], spans: &[Range]) -> String { - let text = String::from_utf8_lossy(bytes); - if !self.opts.color || spans.is_empty() { - return text.to_string(); - } - let mut sorted = spans.to_vec(); - sorted.sort_by_key(|r| r.start); - let mut out = String::new(); - let mut cursor = 0usize; - let t: &str = text.as_ref(); - for r in sorted { - let s = r.start.min(t.len()); - let e = r.end.min(t.len()); - if s < cursor { - continue; // overlap — skip - } - out.push_str(&t[cursor..s]); - out.push_str(ansi::INVERSE); - out.push_str(&t[s..e]); - out.push_str(ansi::RESET); - cursor = e; - } - out.push_str(&t[cursor..]); - out - } -} - -impl Formatter for PlainFormatter { - fn write(&mut self, sink: &mut dyn Write, emit: &Emit) -> io::Result<()> { - let marker = match emit.role { - Role::Target => ansi::paint(self.opts.color, ansi::GREEN, ">") + " ", - Role::Context => String::new(), - }; - let prefix = self.opts.prefix(emit.line.no); - let content = self.render_content(&emit.line.bytes, &emit.match_info.spans); - writeln!(sink, "{marker}{prefix}{content}") - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::{Line, MatchInfo}; - - fn opts(color: bool) -> FormatOpts { - FormatOpts { - show_line_numbers: true, - show_filename: false, - filename: None, - color, - } - } - - #[test] - fn target_gets_marker_and_prefix() { - let line = Line::new(7, b"hello".to_vec()); - let mi = MatchInfo { hit: true, ..Default::default() }; - let emit = Emit { line: &line, role: Role::Target, match_info: &mi }; - let mut f = PlainFormatter::new(opts(false)); - let mut buf: Vec = Vec::new(); - f.write(&mut buf, &emit).unwrap(); - assert_eq!(String::from_utf8(buf).unwrap(), "> 7:hello\n"); - } - - #[test] - fn context_has_no_marker() { - let line = Line::new(3, b"ctx".to_vec()); - let mi = MatchInfo::default(); - let emit = Emit { line: &line, role: Role::Context, match_info: &mi }; - let mut f = PlainFormatter::new(opts(false)); - let mut buf: Vec = Vec::new(); - f.write(&mut buf, &emit).unwrap(); - assert_eq!(String::from_utf8(buf).unwrap(), "3:ctx\n"); - } - - #[test] - fn spans_highlight_when_color_enabled() { - let line = Line::new(1, b"an ERROR today".to_vec()); - let mi = MatchInfo { hit: true, spans: vec![3..8], ..Default::default() }; - let emit = Emit { line: &line, role: Role::Target, match_info: &mi }; - let mut f = PlainFormatter::new(opts(true)); - let mut buf: Vec = Vec::new(); - f.write(&mut buf, &emit).unwrap(); - let s = String::from_utf8(buf).unwrap(); - assert!(s.contains("\x1b[7mERROR\x1b[0m")); - } -} -``` - -- [ ] **Step 4: Create `src/format/fragment.rs`** - -```rust -//! Fragment formatter — char-context window with caret. -//! -//! Used when `-n` is set (positional selectors or `-e` + `-n`). - -use super::{ansi, FormatOpts, Formatter}; -use crate::Emit; -use std::io::{self, Write}; - -pub struct FragmentFormatter { - pub opts: FormatOpts, - pub char_context: usize, -} - -impl FragmentFormatter { - pub fn new(opts: FormatOpts, char_context: usize) -> Self { - Self { opts, char_context } - } -} - -impl Formatter for FragmentFormatter { - fn write(&mut self, sink: &mut dyn Write, emit: &Emit) -> io::Result<()> { - // Target column: from position matcher (`col`), else start of first regex span. - let target_col_1 = emit - .match_info - .col - .or_else(|| emit.match_info.spans.first().map(|r| r.start + 1)) - .unwrap_or(1); - - let bytes = &emit.line.bytes; - let col_idx = target_col_1 - .saturating_sub(1) - .min(bytes.len().saturating_sub(1)); - let start = col_idx.saturating_sub(self.char_context); - let end = bytes.len().min(col_idx + self.char_context + 1); - - let frag = String::from_utf8_lossy(&bytes[start..end]).to_string(); - let prefix = self.opts.prefix(emit.line.no); - - // Highlight the target span within the fragment if regex spans exist. - let rendered = if let Some(span) = emit.match_info.spans.first() { - if self.opts.color { - let hs = span.start.saturating_sub(start).min(frag.len()); - let he = (span.end.saturating_sub(start)).min(frag.len()); - if hs < he { - let (a, rest) = frag.split_at(hs); - let (b, c) = rest.split_at(he - hs); - format!("{a}{}{b}{}{c}", ansi::INVERSE, ansi::RESET) - } else { - frag.clone() - } - } else { - frag.clone() - } - } else { - frag.clone() - }; - - writeln!(sink, "{prefix}{rendered}")?; - - // Caret line, aligned under the target column within the fragment. - let caret_offset = col_idx - start + prefix.len(); - let spaces = " ".repeat(caret_offset); - let caret = ansi::paint(self.opts.color, ansi::GREEN, "^"); - writeln!(sink, "{spaces}{caret}") - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::{Line, MatchInfo, Role}; - - #[test] - fn renders_fragment_with_caret_under_col() { - let line = Line::new(1, b"abcdefghij".to_vec()); - let mi = MatchInfo { hit: true, col: Some(5), ..Default::default() }; - let emit = Emit { line: &line, role: Role::Target, match_info: &mi }; - let opts = FormatOpts { - show_line_numbers: false, - show_filename: false, - filename: None, - color: false, - }; - let mut f = FragmentFormatter::new(opts, 2); - let mut buf: Vec = Vec::new(); - f.write(&mut buf, &emit).unwrap(); - let s = String::from_utf8(buf).unwrap(); - // Fragment: col=5 with context=2 → bytes [2..7] = "cdefg" - // Caret at col 5 → offset 2 in fragment - assert_eq!(s, "cdefg\n ^\n"); - } -} -``` - -- [ ] **Step 5: Register module in `src/lib.rs`** - -Add `pub mod format;` (and keep `pub mod output;` for now — we'll delete it when `main.rs` is rewritten in Task 16). - -Add re-exports: - -```rust -pub use format::{FormatOpts, Formatter, FragmentFormatter, PlainFormatter}; -``` - -- [ ] **Step 6: Run all tests** - -Run: `cargo test` -Expected: all pass, including new formatter tests. - -- [ ] **Step 7: Commit** - -```bash -git add src/format/ src/lib.rs -git commit -m "feat: add format/ module (PlainFormatter, FragmentFormatter, ANSI helper)" -``` - ---- - -### Task 13: `Sink` trait with `StdoutSink` - -The sink is a buffered `io::Write` with a flush hook and terminal detection for `--color=auto`. - -**Files:** -- Create: `src/sink/mod.rs` -- Create: `src/sink/stdout.rs` -- Modify: `src/lib.rs` - -- [ ] **Step 1: Create `src/sink/mod.rs`** - -```rust -//! Output sinks. - -pub mod stdout; - -pub use stdout::StdoutSink; - -use std::io::{self, Write}; - -/// A sink is a buffered writer that may know whether it's a terminal. -pub trait Sink: Write { - /// Is this sink attached to a terminal? Used for `--color=auto`. - fn is_terminal(&self) -> bool; - - /// Finalize output — flush and surface any error. - fn finish(self: Box) -> io::Result<()>; -} -``` - -- [ ] **Step 2: Create `src/sink/stdout.rs`** - -```rust -//! Buffered stdout sink. - -use super::Sink; -use std::io::{self, BufWriter, IsTerminal, Stdout, StdoutLock, Write}; - -pub struct StdoutSink { - writer: BufWriter>, - is_tty: bool, -} - -impl StdoutSink { - pub fn new() -> Self { - let out: Stdout = io::stdout(); - let is_tty = out.is_terminal(); - // SAFETY: we intentionally leak the lock for the lifetime of the process. - // The sink is consumed once at the end of `main`, which is the same lifetime. - let lock: StdoutLock<'static> = Box::leak(Box::new(out)).lock(); - Self { - writer: BufWriter::with_capacity(64 * 1024, lock), - is_tty, - } - } -} - -impl Default for StdoutSink { - fn default() -> Self { - Self::new() - } -} - -impl Write for StdoutSink { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.writer.write(buf) - } - fn flush(&mut self) -> io::Result<()> { - self.writer.flush() - } -} - -impl Sink for StdoutSink { - fn is_terminal(&self) -> bool { - self.is_tty - } - fn finish(self: Box) -> io::Result<()> { - let mut this = *self; - this.writer.flush() - } -} -``` - -- [ ] **Step 3: Register in `src/lib.rs`** - -Add `pub mod sink;` and `pub use sink::{Sink, StdoutSink};`. - -- [ ] **Step 4: Run all tests** - -Run: `cargo test` -Expected: all pass. - -- [ ] **Step 5: Commit** - -```bash -git add src/sink/ src/lib.rs -git commit -m "feat: add Sink trait and StdoutSink" -``` - ---- - -### Task 14: Typed `App` builder - -Enforce at compile time that `PositionMatcher` and `StdinSource` cannot combine. Phase-C `StdinSource` doesn't exist yet; we set up the type-level scaffolding so dropping it in later is one line. - -**Files:** -- Create: `src/app.rs` -- Modify: `src/lib.rs` - -- [ ] **Step 1: Create `src/app.rs`** - -```rust -//! Typed builder for a ready-to-run pipeline. -//! -//! Encodes the invariant that positional selectors cannot be paired with stdin. - -use crate::context::Expander; -use crate::format::Formatter; -use crate::matcher::Matcher; -use crate::sink::Sink; -use crate::source::Source; - -/// Type-level marker: any source. -pub trait SourceKind {} -/// Type-level marker: sources that allow positional selectors. -pub trait Seekable: SourceKind {} - -pub struct Seek; -pub struct NonSeek; - -impl SourceKind for Seek {} -impl SourceKind for NonSeek {} -impl Seekable for Seek {} - -pub struct App { - pub source: Box, - pub matcher: Box, - pub expander: Box, - pub formatter: Box, - pub sink: Box, - _k: std::marker::PhantomData, -} - -/// Stage 1: pick a source. -pub struct Stage1; - -impl Stage1 { - pub fn with_seekable_source(source: Box) -> Stage2 { - Stage2 { source, _k: std::marker::PhantomData } - } - pub fn with_nonseekable_source(source: Box) -> Stage2 { - Stage2 { source, _k: std::marker::PhantomData } - } -} - -/// Stage 2: pick a matcher. Positional only allowed on `Seek`. -pub struct Stage2 { - source: Box, - _k: std::marker::PhantomData, -} - -impl Stage2 { - pub fn with_matcher(self, matcher: Box) -> Stage3 { - Stage3 { source: self.source, matcher, _k: std::marker::PhantomData } - } -} - -impl Stage2 { - /// Positional matcher — only available on seekable sources. - pub fn with_position_matcher( - self, - matcher: crate::matcher::PositionMatcher, - ) -> Stage3 { - Stage3 { - source: self.source, - matcher: Box::new(matcher), - _k: std::marker::PhantomData, - } - } -} - -pub struct Stage3 { - source: Box, - matcher: Box, - _k: std::marker::PhantomData, -} - -impl Stage3 { - pub fn with_expander(self, expander: Box) -> Stage4 { - Stage4 { source: self.source, matcher: self.matcher, expander, _k: std::marker::PhantomData } - } -} - -pub struct Stage4 { - source: Box, - matcher: Box, - expander: Box, - _k: std::marker::PhantomData, -} - -impl Stage4 { - pub fn with_formatter(self, formatter: Box) -> Stage5 { - Stage5 { - source: self.source, - matcher: self.matcher, - expander: self.expander, - formatter, - _k: std::marker::PhantomData, - } - } -} - -pub struct Stage5 { - source: Box, - matcher: Box, - expander: Box, - formatter: Box, - _k: std::marker::PhantomData, -} - -impl Stage5 { - pub fn with_sink(self, sink: Box) -> App { - App { - source: self.source, - matcher: self.matcher, - expander: self.expander, - formatter: self.formatter, - sink, - _k: std::marker::PhantomData, - } - } -} -``` - -- [ ] **Step 2: Register in `src/lib.rs`** - -Add `pub mod app;` and `pub use app::{App, Stage1};`. - -- [ ] **Step 3: Run all tests** - -Run: `cargo test` -Expected: all pass. - -- [ ] **Step 4: Commit** - -```bash -git add src/app.rs src/lib.rs -git commit -m "feat: add typed App builder (Seek/NonSeek marker types)" -``` - ---- - -### Task 15: `pipeline::run()` - -The single generic driver. Replaces the nine `process_*` functions. - -**Files:** -- Create: `src/pipeline.rs` -- Modify: `src/lib.rs` - -- [ ] **Step 1: Create `src/pipeline.rs`** - -```rust -//! Single driver for all pipelines. - -use crate::app::{App, SourceKind}; -use crate::context::EmitOwned; -use crate::Emit; -use crate::Result; - -pub fn run(mut app: App) -> Result<()> { - // Read lines, run matcher, feed expander, write via formatter. - while let Some(line) = app.source.next_line()? { - let info = app.matcher.match_line(&line); - let formatter = &mut app.formatter; - let sink = &mut app.sink; - app.expander - .push(line, info, &mut |emit: EmitOwned| { - let borrowed = Emit { - line: &emit.line, - role: emit.role, - match_info: &emit.match_info, - }; - let _ = formatter.write(sink.as_mut(), &borrowed); - }); - } - let formatter = &mut app.formatter; - let sink = &mut app.sink; - app.expander.drain(&mut |emit: EmitOwned| { - let borrowed = Emit { - line: &emit.line, - role: emit.role, - match_info: &emit.match_info, - }; - let _ = formatter.write(sink.as_mut(), &borrowed); - }); - // Destructure and finalize the sink. - let App { sink, .. } = app; - sink.finish().map_err(|source| crate::SelError::Io { - path: "".to_string(), - source, - }) -} -``` - -Note: this intentionally swallows per-line `io::Error`s from `formatter.write` (broken pipe on head/less is common and expected). The final `finish()` surfaces a genuine flush error. If you prefer strict error propagation, change `let _` to `?` — but note that doing so makes `sel | head` spam stderr. - -- [ ] **Step 2: Register in `src/lib.rs`** - -Add `pub mod pipeline;` and `pub use pipeline::run;`. - -- [ ] **Step 3: Run all tests** - -Run: `cargo test` -Expected: all pass. - -- [ ] **Step 4: Commit** - -```bash -git add src/pipeline.rs src/lib.rs -git commit -m "feat: add pipeline::run — single driver for all modes" -``` - ---- - -### Task 16: Rewire `main.rs` around `App`/`pipeline::run`; delete old code - -Now the big switch. `main.rs` shrinks to parsing + building an `App` + calling `run()`. The nine `process_*` functions are deleted. `src/output.rs` and `src/reader.rs` are deleted once nothing imports them. - -**Files:** -- Modify: `src/main.rs` (rewrite) -- Modify: `src/cli.rs` (add `into_app` helper) -- Delete: `src/output.rs`, `src/reader.rs` -- Modify: `src/lib.rs` (drop old module decls + re-exports) - -- [ ] **Step 1: Write `Cli::into_app()` in `src/cli.rs`** - -Below the existing `impl Cli`, add: - -```rust -use crate::app::{Stage1, Seek}; -use crate::context::{LineContext, NoContext}; -use crate::format::{FormatOpts, FragmentFormatter, PlainFormatter}; -use crate::matcher::{AllMatcher, LineMatcher, PositionMatcher, RegexMatcher}; -use crate::sink::StdoutSink; -use crate::source::FileSource; -use crate::{App, Selector}; - -impl Cli { - /// Build a ready-to-run `App` for a single file. - /// Callers iterate over `get_files()` and build one App per file. - pub fn into_app(&self, path: &std::path::Path, show_filename: bool) -> crate::Result> { - let source = FileSource::open(path)?; - let filename = if show_filename { Some(source.label().to_string()) } else { None }; - let sink = StdoutSink::new(); - let color = match self.color.as_deref() { - Some("always") => true, - Some("never") => false, - _ => crate::sink::Sink::is_terminal(&sink), - }; - let opts = FormatOpts { - show_line_numbers: !self.no_line_numbers, - show_filename, - filename, - color, - }; - - // Matcher + seek-stage - let stage2 = Stage1::with_seekable_source(Box::new(source)); - let stage3 = if let Some(pat) = &self.regex { - stage2.with_matcher(Box::new(RegexMatcher::new(pat, false)?)) - } else if let Some(raw) = self.get_selector() { - let sel = Selector::parse(&raw)?; - match sel { - Selector::All => stage2.with_matcher(Box::new(AllMatcher)), - Selector::LineNumbers(_) => stage2.with_matcher(Box::new(LineMatcher::from_selector(&sel))), - Selector::Positions(_) => stage2.with_position_matcher(PositionMatcher::from_selector(&sel)), - } - } else { - stage2.with_matcher(Box::new(AllMatcher)) - }; - - // Expander - let stage4 = match self.context { - Some(n) if n > 0 => stage3.with_expander(Box::new(LineContext::new(n))), - _ => stage3.with_expander(Box::new(NoContext)), - }; - - // Formatter - let stage5 = if let Some(n) = self.char_context { - stage4.with_formatter(Box::new(FragmentFormatter::new(opts, n))) - } else { - stage4.with_formatter(Box::new(PlainFormatter::new(opts))) - }; - - Ok(stage5.with_sink(Box::new(sink))) - } -} -``` - -The `Sink::is_terminal()` trait method works on an owned `StdoutSink` too (trait methods resolve on both owned and boxed values). We call it via fully-qualified syntax above to avoid ambiguity if `StdoutSink` later grows an inherent method of the same name. - -- [ ] **Step 2: Rewrite `src/main.rs`** - -Replace the entire file with: - -```rust -//! `sel` — Select Slices from Text Files - -use clap::Parser; -use sel::cli::Cli; -use std::process; - -fn main() { - let cli = Cli::parse(); - if let Err(e) = cli.validate() { - eprintln!("error: {e}"); - process::exit(1); - } - if let Err(e) = run(cli) { - eprintln!("error: {e}"); - process::exit(1); - } -} - -fn run(cli: Cli) -> sel::Result<()> { - let files = cli.get_files(); - if files.is_empty() { - return Err(sel::SelError::InvalidSelector( - "no input files specified".to_string(), - )); - } - let show_filename = cli.with_filename || files.len() > 1; - for path in &files { - let app = cli.into_app(path, show_filename)?; - sel::pipeline::run(app)?; - } - Ok(()) -} -``` - -- [ ] **Step 3: Delete old modules** - -```bash -git rm src/output.rs src/reader.rs -``` - -- [ ] **Step 4: Update `src/lib.rs`** - -Remove `pub mod output;` and `pub mod reader;`. Remove any re-exports that referenced them (none should remain after prior tasks). - -- [ ] **Step 5: Run all tests** - -Run: `cargo test` -Expected: all integration tests pass. This is the moment of truth — if something fails, the pipeline doesn't match legacy behavior somewhere. - -Known-good diagnostic steps if tests fail: -1. Re-run a single failing test with `--nocapture` to see actual output. -2. Compare to expected output — most likely culprit is `-c` with regex, which now merges context via `LineContext` instead of the legacy full-buffer approach. -3. If a test depends on specific formatting of `FragmentFormatter`, cross-check with the old `Fragment::format` logic. - -- [ ] **Step 6: Run clippy + fmt** - -Run: `cargo clippy --all-targets -- -D warnings && cargo fmt --check` -Expected: clean. - -- [ ] **Step 7: Commit** - -```bash -git add -A -git commit -m "refactor: wire main.rs around App+pipeline; delete legacy output/reader modules" -``` - ---- - -## Phase C — New features - -### Task 17: `StdinSource` - -**Files:** -- Create: `src/source/stdin.rs` -- Modify: `src/source/mod.rs` -- Modify: `src/lib.rs` - -- [ ] **Step 1: Create `src/source/stdin.rs`** - -```rust -//! Stdin-backed Source. - -use super::Source; -use crate::error::SelError; -use crate::{Line, Result}; -use std::io::{self, BufRead, BufReader, Stdin, StdinLock}; - -pub struct StdinSource { - reader: BufReader>, - line_no: u64, -} - -impl StdinSource { - pub fn new() -> Self { - let stdin: Stdin = io::stdin(); - // SAFETY: mirrors StdoutSink — lock lifetime = process lifetime. - let lock: StdinLock<'static> = Box::leak(Box::new(stdin)).lock(); - Self { - reader: BufReader::new(lock), - line_no: 0, - } - } -} - -impl Default for StdinSource { - fn default() -> Self { - Self::new() - } -} - -impl Source for StdinSource { - fn next_line(&mut self) -> Result> { - let mut buf: Vec = Vec::new(); - let n = self - .reader - .read_until(b'\n', &mut buf) - .map_err(|source| SelError::Io { - path: "-".to_string(), - source, - })?; - if n == 0 { - return Ok(None); - } - if buf.ends_with(b"\n") { - buf.pop(); - if buf.ends_with(b"\r") { - buf.pop(); - } - } - self.line_no += 1; - Ok(Some(Line::new(self.line_no, buf))) - } - - fn label(&self) -> &str { - "-" - } - - fn is_seekable(&self) -> bool { - false - } -} -``` - -- [ ] **Step 2: Register in `src/source/mod.rs`** - -Add `pub mod stdin;` and `pub use stdin::StdinSource;`. - -- [ ] **Step 3: Run existing tests** - -Run: `cargo test` -Expected: all pass. No new tests yet — stdin is tricky to unit-test; integration tests follow in Task 18. - -- [ ] **Step 4: Commit** - -```bash -git add src/source/ src/lib.rs -git commit -m "feat: add StdinSource" -``` - ---- - -### Task 18: CLI accepts `-` and empty file list for stdin; integration tests - -**Files:** -- Modify: `src/cli.rs` -- Modify: `src/main.rs` -- Create: `tests/stdin.rs` - -- [ ] **Step 1: Write integration tests first** - -Create `tests/stdin.rs`: - -```rust -use std::io::Write; -use std::process::{Command, Stdio}; - -fn sel_bin() -> Command { - Command::new(env!("CARGO_BIN_EXE_sel")) -} - -fn run_with_stdin(args: &[&str], stdin: &str) -> (String, String, i32) { - let mut child = sel_bin() - .args(args) - .stdin(Stdio::piped()) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) - .spawn() - .unwrap(); - child.stdin.as_mut().unwrap().write_all(stdin.as_bytes()).unwrap(); - let out = child.wait_with_output().unwrap(); - ( - String::from_utf8(out.stdout).unwrap(), - String::from_utf8(out.stderr).unwrap(), - out.status.code().unwrap_or(-1), - ) -} - -#[test] -fn no_args_reads_stdin_as_cat_n() { - let (stdout, _, code) = run_with_stdin(&[], "alpha\nbeta\ngamma\n"); - assert_eq!(code, 0); - assert_eq!(stdout, "1:alpha\n2:beta\n3:gamma\n"); -} - -#[test] -fn dash_is_stdin() { - let (stdout, _, code) = run_with_stdin(&["2", "-"], "one\ntwo\nthree\n"); - assert_eq!(code, 0); - assert_eq!(stdout, "2:two\n"); -} - -#[test] -fn positional_selector_with_stdin_errors() { - let (_, stderr, code) = run_with_stdin(&["1:5"], "hello world\n"); - assert_ne!(code, 0); - assert!(stderr.contains("stdin")); -} -``` - -- [ ] **Step 2: Run tests to verify they fail** - -Run: `cargo test --test stdin` -Expected: FAIL — stdin not yet supported. - -- [ ] **Step 3: Update `Cli::get_files` and `looks_like_selector`** - -In `src/cli.rs`, `looks_like_selector` should now also return `false` for `"-"` (so `sel 2 -` is recognized as selector+file). Add this early return near the top of the function: - -```rust -if s == "-" { - return false; -} -``` - -Update `Cli::get_files` to allow empty-args → stdin: - -```rust -pub fn get_files(&self) -> Vec { - if self.args.is_empty() { - return vec![std::path::PathBuf::from("-")]; - } - if self.regex.is_some() { - return self.args.iter().map(std::path::PathBuf::from).collect(); - } - let start = if self.looks_like_selector(&self.args[0]) { 1 } else { 0 }; - let files: Vec<_> = self.args[start..].iter().map(std::path::PathBuf::from).collect(); - if files.is_empty() { - vec![std::path::PathBuf::from("-")] - } else { - files - } -} -``` - -Also remove `#[arg(..., required = true)]` from `args: Vec` so clap accepts zero positional args. - -Update `validate()` — no longer require non-empty files (stdin is the default): - -```rust -pub fn validate(&self) -> crate::Result<()> { - if self.char_context.is_some() - && self.regex.is_none() - && !self.get_selector().as_ref().is_some_and(|s| s.contains(':')) - { - return Err(crate::SelError::CharContextWithoutTarget); - } - Ok(()) -} -``` - -- [ ] **Step 4: Teach `Cli::into_app` and `main::run` about stdin** - -Refactor `Cli::into_app` into two builders — one for seekable sources (files), one for non-seekable (stdin). For positional selectors with stdin, return `SelError::PositionalWithStdin` instead. - -First extend the import line at the top of `src/cli.rs` that Task 16 added, from: - -```rust -use crate::app::{Stage1, Seek}; -``` - -to: - -```rust -use crate::app::{NonSeek, Seek, Stage1}; -``` - -Then replace the `into_app` method with: - -```rust -pub fn into_app_for_file(&self, path: &std::path::Path, show_filename: bool) -> crate::Result> { - // ... (prior body, unchanged) -} - -pub fn into_app_for_stdin(&self, show_filename: bool) -> crate::Result> { - if let Some(raw) = self.get_selector() { - if raw.contains(':') { - return Err(crate::SelError::PositionalWithStdin); - } - } - let source = crate::source::StdinSource::new(); - let filename = if show_filename { Some("-".to_string()) } else { None }; - let sink = crate::sink::StdoutSink::new(); - let color = match self.color.as_deref() { - Some("always") => true, - Some("never") => false, - _ => crate::sink::Sink::is_terminal(&sink), - }; - let opts = crate::format::FormatOpts { - show_line_numbers: !self.no_line_numbers, - show_filename, - filename, - color, - }; - - let stage2 = crate::app::Stage1::with_nonseekable_source(Box::new(source)); - let stage3 = if let Some(pat) = &self.regex { - stage2.with_matcher(Box::new(crate::matcher::RegexMatcher::new(pat, false)?)) - } else if let Some(raw) = self.get_selector() { - let sel = crate::Selector::parse(&raw)?; - match sel { - crate::Selector::All => stage2.with_matcher(Box::new(crate::matcher::AllMatcher)), - crate::Selector::LineNumbers(_) => { - stage2.with_matcher(Box::new(crate::matcher::LineMatcher::from_selector(&sel))) - } - crate::Selector::Positions(_) => return Err(crate::SelError::PositionalWithStdin), - } - } else { - stage2.with_matcher(Box::new(crate::matcher::AllMatcher)) - }; - - let stage4 = match self.context { - Some(n) if n > 0 => stage3.with_expander(Box::new(crate::context::LineContext::new(n))), - _ => stage3.with_expander(Box::new(crate::context::NoContext)), - }; - - let stage5 = if let Some(n) = self.char_context { - stage4.with_formatter(Box::new(crate::format::FragmentFormatter::new(opts, n))) - } else { - stage4.with_formatter(Box::new(crate::format::PlainFormatter::new(opts))) - }; - - Ok(stage5.with_sink(Box::new(sink))) -} -``` - -Rename the old `into_app` → `into_app_for_file`. Update `main.rs::run` to dispatch: - -```rust -fn run(cli: Cli) -> sel::Result<()> { - let files = cli.get_files(); - let show_filename = cli.with_filename || files.len() > 1; - for path in &files { - if path.as_os_str() == "-" { - sel::pipeline::run(cli.into_app_for_stdin(show_filename)?)?; - } else { - sel::pipeline::run(cli.into_app_for_file(path, show_filename)?)?; - } - } - Ok(()) -} -``` - -Because `App` and `App` are different types, `pipeline::run` being generic over `K: SourceKind` lets both calls compile. - -- [ ] **Step 5: Run all tests** - -Run: `cargo test` -Expected: all pass, including the three new `tests/stdin.rs` tests. - -- [ ] **Step 6: Commit** - -```bash -git add src/cli.rs src/main.rs tests/stdin.rs -git commit -m "feat: stdin input (no-arg or '-'), typed-guard positional+stdin" -``` - ---- - -### Task 19: `FileSink` - -**Files:** -- Create: `src/sink/file.rs` -- Modify: `src/sink/mod.rs` - -- [ ] **Step 1: Write the tests first** - -Append to `src/sink/file.rs` (create file with tests + impl together): - -```rust -//! File sink with create-new/force behaviour. - -use super::Sink; -use crate::error::SelError; -use crate::Result; -use std::fs::{File, OpenOptions}; -use std::io::{self, BufWriter, Write}; -use std::path::{Path, PathBuf}; - -pub struct FileSink { - writer: BufWriter, - path: PathBuf, -} - -impl FileSink { - pub fn create(path: &Path, force: bool) -> Result { - let mut opts = OpenOptions::new(); - opts.write(true); - if force { - opts.create(true).truncate(true); - } else { - opts.create_new(true); - } - let file = opts.open(path).map_err(|source| { - if source.kind() == io::ErrorKind::AlreadyExists { - SelError::OutputExists(path.to_path_buf()) - } else { - SelError::Io { path: path.display().to_string(), source } - } - })?; - Ok(Self { - writer: BufWriter::with_capacity(64 * 1024, file), - path: path.to_path_buf(), - }) - } -} - -impl Write for FileSink { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.writer.write(buf) - } - fn flush(&mut self) -> io::Result<()> { - self.writer.flush() - } -} - -impl Sink for FileSink { - fn is_terminal(&self) -> bool { - false - } - fn finish(self: Box) -> io::Result<()> { - let mut this = *self; - this.writer.flush() - } -} - -#[cfg(test)] -mod tests { - use super::*; - use tempfile::tempdir; - - #[test] - fn create_new_succeeds_on_fresh_path() { - let dir = tempdir().unwrap(); - let p = dir.path().join("out.txt"); - let mut s = FileSink::create(&p, false).unwrap(); - s.write_all(b"hi\n").unwrap(); - Box::new(s).finish().unwrap(); - assert_eq!(std::fs::read_to_string(&p).unwrap(), "hi\n"); - } - - #[test] - fn create_fails_when_exists_without_force() { - let dir = tempdir().unwrap(); - let p = dir.path().join("exists.txt"); - std::fs::write(&p, "prior").unwrap(); - let err = FileSink::create(&p, false).unwrap_err(); - assert!(matches!(err, SelError::OutputExists(_))); - } - - #[test] - fn force_truncates_existing() { - let dir = tempdir().unwrap(); - let p = dir.path().join("force.txt"); - std::fs::write(&p, "old content that is longer than new").unwrap(); - let mut s = FileSink::create(&p, true).unwrap(); - s.write_all(b"new\n").unwrap(); - Box::new(s).finish().unwrap(); - assert_eq!(std::fs::read_to_string(&p).unwrap(), "new\n"); - } -} -``` - -- [ ] **Step 2: Register in `src/sink/mod.rs`** - -Add `pub mod file;` and `pub use file::FileSink;`. - -- [ ] **Step 3: Run tests** - -Run: `cargo test --lib sink::file::tests` -Expected: all three pass. - -- [ ] **Step 4: Commit** - -```bash -git add src/sink/ -git commit -m "feat: add FileSink with create-new and force semantics" -``` - ---- - -### Task 20: CLI `-o` / `--output` and `--force`, integration tests - -**Files:** -- Modify: `src/cli.rs` (add flags, rewire sink selection) -- Create: `tests/output_file.rs` - -- [ ] **Step 1: Write integration tests first** - -Create `tests/output_file.rs`: - -```rust -use std::io::Write; -use std::process::{Command, Stdio}; -use tempfile::tempdir; - -fn sel_bin() -> Command { - Command::new(env!("CARGO_BIN_EXE_sel")) -} - -#[test] -fn writes_to_output_file() { - let dir = tempdir().unwrap(); - let input = dir.path().join("in.txt"); - let output = dir.path().join("out.txt"); - std::fs::write(&input, "a\nb\nc\n").unwrap(); - - let status = sel_bin() - .args(["2", input.to_str().unwrap(), "-o", output.to_str().unwrap()]) - .status() - .unwrap(); - assert!(status.success()); - assert_eq!(std::fs::read_to_string(&output).unwrap(), "2:b\n"); -} - -#[test] -fn refuses_to_overwrite_by_default() { - let dir = tempdir().unwrap(); - let input = dir.path().join("in.txt"); - let output = dir.path().join("out.txt"); - std::fs::write(&input, "x\n").unwrap(); - std::fs::write(&output, "do not clobber").unwrap(); - - let out = sel_bin() - .args(["1", input.to_str().unwrap(), "-o", output.to_str().unwrap()]) - .stderr(Stdio::piped()) - .output() - .unwrap(); - assert!(!out.status.success()); - let msg = String::from_utf8(out.stderr).unwrap(); - assert!(msg.contains("already exists")); - assert!(msg.contains("--force")); - assert_eq!(std::fs::read_to_string(&output).unwrap(), "do not clobber"); -} - -#[test] -fn force_overwrites() { - let dir = tempdir().unwrap(); - let input = dir.path().join("in.txt"); - let output = dir.path().join("out.txt"); - std::fs::write(&input, "new\n").unwrap(); - std::fs::write(&output, "old").unwrap(); - - let status = sel_bin() - .args(["1", input.to_str().unwrap(), "-o", output.to_str().unwrap(), "--force"]) - .status() - .unwrap(); - assert!(status.success()); - assert_eq!(std::fs::read_to_string(&output).unwrap(), "1:new\n"); -} - -#[test] -fn dash_output_is_stdout() { - let dir = tempdir().unwrap(); - let input = dir.path().join("in.txt"); - std::fs::write(&input, "stdout\n").unwrap(); - - let out = sel_bin() - .args(["1", input.to_str().unwrap(), "-o", "-"]) - .output() - .unwrap(); - assert!(out.status.success()); - assert_eq!(String::from_utf8(out.stdout).unwrap(), "1:stdout\n"); -} -``` - -- [ ] **Step 2: Run tests to verify they fail** - -Run: `cargo test --test output_file` -Expected: FAIL — `-o` unknown arg. - -- [ ] **Step 3: Add flags to `Cli`** - -In `src/cli.rs`, add these fields to the `Cli` struct: - -```rust -/// Write output to FILE instead of stdout. Use `-` for stdout explicitly. -#[arg(short = 'o', long = "output", value_name = "FILE")] -pub output: Option, - -/// With `-o`, overwrite an existing file. -#[arg(long = "force")] -pub force: bool, -``` - -- [ ] **Step 4: Rewire sink selection in `Cli::into_app_for_file`/`_for_stdin`** - -Extract a helper: - -```rust -impl Cli { - fn make_sink(&self) -> crate::Result> { - match self.output.as_deref() { - None | Some("-") => Ok(Box::new(crate::sink::StdoutSink::new())), - Some(path) => { - let sink = crate::sink::FileSink::create(std::path::Path::new(path), self.force)?; - Ok(Box::new(sink)) - } - } - } - - fn resolve_color(&self, to_terminal: bool) -> bool { - match self.color.as_deref() { - Some("always") => true, - Some("never") => false, - _ => to_terminal, - } - } -} -``` - -In both `into_app_for_file` and `into_app_for_stdin`, replace the inline sink construction with `let sink = self.make_sink()?;`. When the sink is a `FileSink`, `is_terminal()` is `false`, so `--color=auto` resolves to `never` automatically. Adjust the `color` computation: - -```rust -let color = self.resolve_color(sink.is_terminal()); -``` - -- [ ] **Step 5: Run all tests** - -Run: `cargo test` -Expected: all pass, including the four new `output_file` tests. - -- [ ] **Step 6: Commit** - -```bash -git add src/cli.rs tests/output_file.rs -git commit -m "feat: add -o/--output and --force flags" -``` - ---- - -### Task 21: `-v` / `--invert-match` - -**Files:** -- Modify: `src/cli.rs` (add `invert`, validate, thread into `RegexMatcher`) -- Create: `tests/invert.rs` - -- [ ] **Step 1: Write integration tests first** - -Create `tests/invert.rs`: - -```rust -use std::io::Write; -use std::process::{Command, Stdio}; -use tempfile::NamedTempFile; - -fn sel_bin() -> Command { - Command::new(env!("CARGO_BIN_EXE_sel")) -} - -#[test] -fn invert_emits_non_matching_lines() { - let mut f = NamedTempFile::new().unwrap(); - writeln!(f, "keep").unwrap(); - writeln!(f, "drop ERROR").unwrap(); - writeln!(f, "keep too").unwrap(); - - let out = sel_bin() - .args(["-v", "-e", "ERROR", f.path().to_str().unwrap()]) - .output() - .unwrap(); - assert!(out.status.success()); - let stdout = String::from_utf8(out.stdout).unwrap(); - assert!(stdout.contains("keep")); - assert!(stdout.contains("keep too")); - assert!(!stdout.contains("drop ERROR")); -} - -#[test] -fn invert_without_regex_is_error() { - let mut f = NamedTempFile::new().unwrap(); - writeln!(f, "whatever").unwrap(); - let out = sel_bin() - .args(["-v", f.path().to_str().unwrap()]) - .stderr(Stdio::piped()) - .output() - .unwrap(); - assert!(!out.status.success()); - let stderr = String::from_utf8(out.stderr).unwrap(); - assert!(stderr.contains("--invert-match requires --regex")); -} -``` - -- [ ] **Step 2: Run tests to verify failure** - -Run: `cargo test --test invert` -Expected: FAIL — `-v` unknown. - -- [ ] **Step 3: Add flag to `Cli`** - -```rust -/// Invert the regex match: emit lines that do NOT match -e. -#[arg(short = 'v', long = "invert-match", requires = "regex")] -pub invert: bool, -``` - -Note: `requires = "regex"` tells clap to reject `-v` without `-e` during parsing, giving a clean error message. - -- [ ] **Step 4: Thread `invert` into `RegexMatcher`** - -In both `into_app_for_file` and `into_app_for_stdin`, change: - -```rust -stage2.with_matcher(Box::new(RegexMatcher::new(pat, false)?)) -``` - -to - -```rust -stage2.with_matcher(Box::new(RegexMatcher::new(pat, self.invert)?)) -``` - -The clap `requires` attribute provides the error; we don't need a secondary check in `validate()`. But the spec specifies error text "--invert-match requires --regex", so verify clap's default message matches closely enough. If not, keep the attribute AND add a `validate()` check that emits `SelError::InvertWithoutRegex` directly — the `validate()` path produces the exact text. Simpler: drop `requires` and add manual check: - -```rust -pub fn validate(&self) -> crate::Result<()> { - if self.invert && self.regex.is_none() { - return Err(crate::SelError::InvertWithoutRegex); - } - if self.char_context.is_some() - && self.regex.is_none() - && !self.get_selector().as_ref().is_some_and(|s| s.contains(':')) - { - return Err(crate::SelError::CharContextWithoutTarget); - } - Ok(()) -} -``` - -Remove `requires = "regex"` from the `invert` field annotation. - -- [ ] **Step 5: Run all tests** - -Run: `cargo test` -Expected: all pass. - -- [ ] **Step 6: Commit** - -```bash -git add src/cli.rs tests/invert.rs -git commit -m "feat: add -v/--invert-match for regex" -``` - ---- - -## Phase D — Release - -### Task 22: Cargo.toml polish & CHANGELOG - -**Files:** -- Modify: `Cargo.toml` -- Modify: `CHANGELOG.md` - -- [ ] **Step 1: Update `Cargo.toml`** - -Replace the `[package]` block with: - -```toml -[package] -name = "sel" -version = "0.2.0" -description = "Select slices from text files by line numbers, ranges, positions, or regex" -repository = "https://github.com/InkyQuill/sel" -homepage = "https://github.com/InkyQuill/sel" -documentation = "https://docs.rs/sel" -readme = "README.md" -license = "MIT OR Apache-2.0" -authors = ["InkyQuill "] -edition = "2024" -rust-version = "1.92" -categories = ["command-line-utilities", "text-processing"] -keywords = ["cli", "grep", "text", "select", "extract"] -``` - -- [ ] **Step 2: Update `CHANGELOG.md`** - -Prepend: - -```markdown -## [0.2.0] — 2026-04-18 - -### Added -- Read from stdin when no file is given or when `-` is used as a filename. -- `-o`/`--output FILE` writes to a file; fails if the file exists unless `--force` is passed. Use `-o -` to force stdout. -- `-v`/`--invert-match` emits lines that do NOT match `-e PATTERN`. - -### Changed -- Internal refactor: every run is now a single five-stage pipeline (Source → Matcher → Expander → Formatter → Sink) driven by one generic `pipeline::run()`. `main.rs` shrunk from ~600 lines to ~30. -- Typed `App` builder makes positional selectors with stdin a compile-time error; CLI catches the same with a clear runtime message. -- `SelError::Io` now always carries the offending file path. -- Release pipeline migrated to `cargo-dist` + crates.io auto-publish on tag. - -### Removed -- `anyhow`, `termcolor`, `is-terminal` dependencies (unused or subsumed by std). -- Legacy `Message` and `FileNotFound` error variants (replaced by specific variants). -``` - -- [ ] **Step 3: Verify build still clean** - -Run: `cargo build --release && cargo test` -Expected: pass. - -- [ ] **Step 4: Commit** - -```bash -git add Cargo.toml CHANGELOG.md -git commit -m "chore: bump to 0.2.0, polish crate metadata, update changelog" -``` - ---- - -### Task 23: Consolidate CI workflow - -**Files:** -- Modify: `.github/workflows/ci.yml` - -- [ ] **Step 1: Inspect current CI** - -Run: `cat .github/workflows/ci.yml` -(just to see what's there; the replacement below is a full rewrite). - -- [ ] **Step 2: Rewrite `.github/workflows/ci.yml`** - -```yaml -name: CI - -on: - push: - branches: [main] - pull_request: - -env: - CARGO_TERM_COLOR: always - RUST_BACKTRACE: 1 - -jobs: - test: - name: test ${{ matrix.os }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, macos-latest, windows-latest] - steps: - - uses: actions/checkout@v4 - - uses: dtolnay/rust-toolchain@stable - with: - components: clippy, rustfmt - - uses: Swatinem/rust-cache@v2 - - run: cargo fmt --check - - run: cargo clippy --all-targets -- -D warnings - - run: cargo test --all-features -``` - -- [ ] **Step 3: Commit** - -```bash -git add .github/workflows/ci.yml -git commit -m "ci: simplify CI to one matrix job, drop hand-rolled packaging" -``` - ---- - -### Task 24: `cargo-dist` + auto-publish release workflow - -**Files:** -- Delete: `.github/workflows/release.yml` (old) -- Modify: `Cargo.toml` (cargo-dist metadata) -- Create: `.github/workflows/release.yml` (new — generated by cargo-dist) - -- [ ] **Step 1: Install cargo-dist locally** - -Run: `cargo install cargo-dist --version "^0.22"` -Expected: installed; `cargo dist --version` prints 0.22.x. - -- [ ] **Step 2: Remove the old release workflow** - -```bash -git rm .github/workflows/release.yml -``` - -- [ ] **Step 3: Initialize cargo-dist** - -Run: `cargo dist init` - -When prompted: -- Installers: **"shell"** (curl | sh for Linux/macOS) and **"powershell"** (Windows). -- Targets: `x86_64-unknown-linux-gnu`, `aarch64-unknown-linux-gnu`, `x86_64-apple-darwin`, `aarch64-apple-darwin`, `x86_64-pc-windows-msvc`. -- CI provider: **GitHub**. -- Publish to crates.io: **yes**. -- Announcement mode: **"package"**. - -This writes a `[workspace.metadata.dist]` section to `Cargo.toml` and generates `.github/workflows/release.yml`. - -- [ ] **Step 4: Verify generated workflow references `CARGO_REGISTRY_TOKEN`** - -Run: `grep -n "CARGO_REGISTRY_TOKEN" .github/workflows/release.yml` -Expected: at least one occurrence (cargo-dist uses this secret name by default). - -- [ ] **Step 5: Verify `cargo dist plan` succeeds** - -Run: `cargo dist plan` -Expected: no errors; plan prints the artifacts that would be built. - -- [ ] **Step 6: Commit** - -```bash -git add Cargo.toml .github/workflows/release.yml -git rm .github/workflows/release.yml # only if the old path differs; cargo-dist may have re-created it at the same path -git commit -m "ci: migrate release pipeline to cargo-dist with crates.io publish" -``` - -(If the old `release.yml` was replaced in place, just `git add` — no rm needed.) - ---- - -### Task 25: Dry-run release & final verification - -**Files:** none modified. - -- [ ] **Step 1: Verify all tests green one last time** - -Run: `cargo test` -Expected: pass. - -- [ ] **Step 2: Verify clippy + fmt green** - -Run: `cargo clippy --all-targets -- -D warnings && cargo fmt --check` -Expected: no output from fmt, no warnings from clippy. - -- [ ] **Step 3: Verify publish would work (no actual publish)** - -Run: `cargo publish --dry-run` -Expected: succeeds; lists files included in the package. - -- [ ] **Step 4: Verify binary size is under the 800 KB target** - -Run: `cargo build --release && ls -lh target/release/sel` -Expected: binary under 800 KB on Linux x86_64. If over, investigate with `cargo bloat --release --crates` (user can install `cargo-bloat` ad-hoc). - -- [ ] **Step 5: Open PR** - -```bash -git push -u origin v0.2-refactor -gh pr create --title "v0.2: pipeline refactor, stdin, -o, -v, cargo-dist release" --body "$(cat <<'EOF' -## Summary -- Refactored sel into a clean five-stage pipeline (Source → Matcher → Expander → Formatter → Sink), replacing 9 process_* functions with one generic run(). -- Added stdin input (no file or '-'). -- Added -o/--output FILE and --force. -- Added -v/--invert-match (requires -e). -- Typed App builder + CLI validation together rule out positional+stdin combinations. -- Dropped anyhow, termcolor, is-terminal (unused or subsumed by std). -- Migrated release to cargo-dist with crates.io auto-publish. -- Bumped to 0.2.0; updated CHANGELOG. - -Spec: docs/superpowers/specs/2026-04-18-sel-v0.2-design.md -Plan: docs/superpowers/plans/2026-04-18-sel-v0.2.md - -## Test plan -- [ ] CI green on Linux, macOS, Windows -- [ ] `cargo test` passes locally -- [ ] `cargo publish --dry-run` succeeds -- [ ] `cargo dist plan` succeeds -- [ ] Release binary under 800 KB -- [ ] Manual smoke: `sel 1-3 README.md`, `cat README.md | sel -e sel`, `sel 1-3 README.md -o /tmp/o.txt`, `sel -v -e foo README.md` -EOF -)" -``` - ---- - -## Self-review checklist - -Verified before handoff: - -- **Spec coverage:** every bullet in the spec's goals/non-goals has a corresponding task. Typed-state guarantee = Task 14 + Task 18; binary-size target = Task 25; path-aware errors = Task 5; crates.io on tag = Task 24; new tests for every SelError variant = Task 5 (InvertWithoutRegex implicitly via Task 21 test, OutputExists via Task 19 and Task 20 tests). -- **No placeholders.** Every code block is real; no "TBD" or "similar to". -- **Type consistency.** `SelError` variants introduced in Task 5 (`PositionalWithStdin`, `InvertWithoutRegex`, `OutputExists`, `CharContextWithoutTarget`, `Io{path,source}`) match their usages in Tasks 18/20/21. `App` / `App` defined in Task 14 used in Task 18. `Line`, `MatchInfo`, `Emit`, `Role` names consistent across Tasks 6–16. -- **Refactor tests stay green throughout Phase B.** New behavior additions in Phase C are TDD (test-first). diff --git a/docs/superpowers/specs/2026-04-18-sel-v0.2-design.md b/docs/superpowers/specs/2026-04-18-sel-v0.2-design.md deleted file mode 100644 index 0941a58..0000000 --- a/docs/superpowers/specs/2026-04-18-sel-v0.2-design.md +++ /dev/null @@ -1,266 +0,0 @@ -# sel v0.2 — Design - -Date: 2026-04-18 -Status: Approved (awaiting implementation plan) - -## Goals - -Make `sel` easier to maintain, lighter, and more featureful without changing its identity as a single-purpose text-selection CLI. Concretely: - -1. Refactor the codebase from 9 forked `process_*` functions into a single orthogonal pipeline, so adding or changing a feature touches one stage instead of a matrix. -2. Add stdin input. -3. Add file output via `-o` / `--output`. -4. Add inverted regex matching via `-v` / `--invert-match`. -5. Reduce dependencies from 6 to 3 (`clap`, `regex`, `thiserror`). -6. Replace the brittle hand-rolled release pipeline with `cargo-dist`, and start publishing to crates.io on tag. - -Binary mode (hex-view, byte offsets) is explicitly out of scope. It will live in a separate future project, `selb`, per the UNIX do-one-thing-well principle. - -## Non-goals - -- Multiple `-e` patterns. -- `--count`, asymmetric context (`-A`/`-B`), or other grep-style features beyond `-v`. -- Auto-creating parent directories for `-o`, or interactive overwrite prompts. -- Inverting line-number selectors (`-v` with `-e` only). -- Random-access positional selectors over stdin. A pipe is not seekable; positions over stdin will return a clear error pointing the user at saving to a file first. - -## Architecture - -Every invocation is the same linear pipeline: - -``` -Source → Matcher → Expander → Formatter → Sink -``` - -Each stage is a trait with small, swappable implementations. All orthogonality lives here; nothing else in the codebase forks on feature combinations. - -### Stages - -**Source** — yields `(line_no, bytes)`. Implementations: - -- `FileSource` wraps a `BufReader`. -- `StdinSource` wraps a `BufReader`. - -The source reads bytes via `read_until(b'\n')`, not `lines()`. This preserves input byte-exact and avoids forcing UTF-8 at read time. Line counting restarts at 1 per source. - -**Matcher** — classifies each line. Implementations: - -- `AllMatcher` — every line hits. -- `LineMatcher(sorted_ranges)` — merged sorted ranges, constant-amortized check. -- `PositionMatcher(positions)` — line-plus-column. Cannot be paired with `StdinSource`; the `App` builder rules this out at compile time. -- `RegexMatcher { regex, invert }` — `invert` flips the hit bit and suppresses spans. - -`Matcher::match_line` returns `MatchInfo { hit, spans, col }`. Spans feed match highlighting; `col` feeds positional caret rendering. - -**Expander** — turns a stream of matches into an emit plan. Implementations: - -- `NoContext` — emit target hits only. -- `LineContext(n)` — emit ±N lines around each hit, merging overlaps. Owns the ring buffer and the look-ahead window. - -Produces `Emit { line, role, match_info }` where `role` ∈ `{Target, Context}`. - -**Formatter** — serializes an `Emit` into bytes. Implementations: - -- `PlainFormatter` — full line with optional line number, optional filename prefix, optional ANSI highlight of match spans. -- `FragmentFormatter` — char-context window around a column, with caret line underneath. Used for positional selectors and for `-e` combined with `-n`. - -ANSI handling lives in a ~30-line helper inside `format/`. The `termcolor` dependency is dropped. - -**Sink** — a buffered `io::Write`. Implementations: - -- `StdoutSink` — locks stdout once, 64 KiB buffer. -- `FileSink { path, force }` — uses `create_new` when `force=false`, so collisions fail atomically. `-o -` resolves to `StdoutSink`. - -Sinks are flushed via an explicit `finish()` call in the pipeline; relying on `Drop` to report flush errors is avoided. - -### The one generic function - -``` -pipeline::run(source, matcher, expander, formatter, sink) -> Result<()> -``` - -replaces the current nine `process_*` functions. `main.rs` becomes ~50 lines of wiring with no logic. - -### Typed-state guarantees (R3) - -The `App` builder encodes the invalid combinations at the type level. `App::with_stdin_source()` returns a builder whose `matcher()` method does not accept `PositionMatcher`. CLI parsing in `cli.rs` catches the same combination earlier and returns a friendly runtime error for end users. Both barriers exist on purpose: the compile-time one keeps future refactors honest, the runtime one keeps the CLI pleasant. - -## Module layout - -``` -src/ -├── main.rs ~50 lines; parse CLI, build App, run pipeline, set exit code. -├── lib.rs Re-exports + Result type. -├── cli.rs Clap derive + validation into domain types + typed errors. -├── error.rs Single SelError enum (thiserror). -│ -├── source/ -│ ├── mod.rs trait Source + shared types. -│ ├── file.rs FileSource. -│ └── stdin.rs StdinSource. -│ -├── matcher/ -│ ├── mod.rs trait Matcher + MatchInfo. -│ ├── lines.rs LineMatcher. -│ ├── position.rs PositionMatcher. -│ └── regex.rs RegexMatcher { regex, invert }. -│ -├── context.rs trait Expander, NoContext, LineContext (ring buffer). -│ -├── format/ -│ ├── mod.rs trait Formatter + ANSI helpers + Emit. -│ ├── plain.rs PlainFormatter. -│ └── fragment.rs FragmentFormatter. -│ -├── sink/ -│ ├── mod.rs trait Sink. -│ ├── stdout.rs StdoutSink. -│ └── file.rs FileSink. -│ -├── app.rs Typed builder encoding Source×Matcher compatibility. -└── pipeline.rs Single generic run() function. -``` - -Expected line counts after refactor: `main.rs` ~50, `format/plain.rs` ~200, `format/fragment.rs` ~150, `source/file.rs` ~80, `source/stdin.rs` ~50, `context.rs` ~200, `matcher/lines.rs` ~250, `matcher/position.rs` ~100, `matcher/regex.rs` ~80, `sink/file.rs` ~60, `sink/stdout.rs` ~30, `app.rs` ~100, `pipeline.rs` ~60. - -## Shared types - -```rust -pub struct Line { pub no: u64, pub bytes: Vec } - -pub struct MatchInfo { - pub hit: bool, - pub spans: Vec>, - pub col: Option, -} - -pub struct Emit<'a> { - pub line: &'a Line, - pub role: Role, - pub match_info: &'a MatchInfo, -} - -pub enum Role { Target, Context } -``` - -`u64` for line numbers is cheap insurance against pathological inputs on 32-bit targets. - -## Feature details - -### stdin - -- No file arg, or `-` in the file list, means stdin. -- Mixing `sel 5 - file.txt` runs the pipeline once per source. Filename prefixes and line counters are per-source, matching `cat`/`grep` behavior. -- When a filename prefix is active, stdin is labeled `-`. -- Positional selectors with stdin are rejected with `SelError::PositionalWithStdin` and the message *"positional selectors (L:C) require a seekable file; pipe into a file first or use line selectors"*. - -### `-o` / `--output` - -- `-o path.txt`: create-new. If the path exists, fail with `SelError::OutputExists(path)`. -- `-o path.txt --force`: truncate-and-write. -- `-o -`: stdout (same code path as no `-o`). -- Parent directories are not created. The failure is the user's signal to fix the path. -- `BufWriter` sized 64 KiB. Explicit `finish()` surfaces flush errors. -- `--color=auto` with a file sink resolves to `never`. `--color=always` writes ANSI into the file on purpose, for use with pagers like `less -R`. - -### `-v` / `--invert-match` - -- Accepted only with `-e`. CLI validation rejects `-v` alone with `SelError::InvertWithoutRegex`. -- `RegexMatcher` holds `invert: bool`; `match_line` flips `hit` and emits no spans when inverted (nothing to highlight). -- Inverting line-number selectors is not supported — the mental model is "all lines that don't match the regex", not "all lines not in this set". - -## Dependency changes - -Drop: `anyhow`, `termcolor`, `is-terminal`. Keep: `clap`, `regex`, `thiserror`. - -`std::io::IsTerminal` (stable since 1.70) replaces `is-terminal`. ANSI color is a 30-line helper; `termcolor` is overkill for a fixed palette. `anyhow` was pure convenience on top of `thiserror` and the refactor does not need it. - -Dev-dependencies unchanged: `proptest`, `tempfile`, `criterion`. - -## Errors - -```rust -#[derive(thiserror::Error, Debug)] -pub enum SelError { - #[error("invalid selector: {0}")] - InvalidSelector(String), - - #[error("invalid regex: {0}")] - InvalidRegex(String), - - #[error("positional selectors require a seekable file; stdin is line-only")] - PositionalWithStdin, - - #[error("--invert-match requires --regex")] - InvertWithoutRegex, - - #[error("--char-context requires --regex or a positional selector")] - CharContextWithoutTarget, - - #[error("{path}: {source}")] - Io { path: String, #[source] source: io::Error }, - - #[error("output file already exists: {0} (use --force to overwrite)")] - OutputExists(PathBuf), -} -``` - -Exit codes: - -- `0` — success, including zero matches (grep-style). -- `1` — user error: bad selector, bad regex, I/O failure, invalid flag combination, output collision. -- `2` — reserved for future use. - -Every `io::Error` is wrapped with its path. Today's "No such file or directory (os error 2)" with no filename is fixed by construction. - -## Testing - -Preserve existing integration tests (`tests/basic.rs`, `selectors.rs`, `positions.rs`, `context.rs`, `regex.rs`, `multi_file.rs`). They exercise the CLI surface and are ideal for a refactor — if they keep passing, behavior held. - -Add: - -- `tests/stdin.rs` — piping input, `-` in the file list, mixing stdin with files, positional-with-stdin error. -- `tests/output_file.rs` — `-o` create, `-o` collision (fail), `-o --force`, `-o -`, `--color=always` to file. -- `tests/invert.rs` — `-v -e`, `-v` alone rejected, `-v -e` with context. - -Module-level unit tests stay co-located. Selector tests move with `selector.rs` into `matcher/lines.rs` and `matcher/position.rs`. - -Property tests via `proptest`: - -- Random line-range sets: `LineMatcher` agrees with a naive reference implementation. -- Random contexts: `LineContext` output matches an obvious `O(N²)` expander. - -Coverage target stays around 80%. Every `SelError` variant must be produced by at least one test. - -## Release (BR3) - -- `.github/workflows/ci.yml`: one job, matrix `{ubuntu-latest, macos-latest, windows-latest}` × one stable toolchain. Runs `cargo fmt --check`, `cargo clippy -D warnings`, `cargo test`. Drops the hand-rolled strip and archive steps. -- `.github/workflows/release.yml`: triggered on `v*` tags. Generated by `cargo dist init`. Builds archives for Linux x86_64/aarch64, macOS x86_64/aarch64, Windows x86_64. Auto-generates release notes from `CHANGELOG.md`. Publishes to crates.io using the existing `CARGO_REGISTRY_TOKEN` secret. -- `Cargo.toml`: pin `rust-version = "1.92"`, fill `categories`, `keywords`, `readme`, `homepage`, `documentation` for a polished crates.io page. -- Version bump to `0.2.0` on ship. `CHANGELOG.md` gets a `[0.2.0]` section in Keep-a-Changelog format. - -Binary-size target: ~700 KB stripped (currently ~1 MB). - -## Out of scope - -- Binary mode (`selb`) — separate future project. -- Multiple `-e` patterns, `--count`, `-A`/`-B`/`-C`. -- Auto-mkdir for `-o`; interactive overwrite prompts. -- Inverted line-number selectors. -- Random-access positions over stdin (buffer-the-whole-pipe approach). - -## Risks and mitigations - -- **Clap derive + typed builder interplay.** The `App` builder refuses invalid combinations at compile time, but `Cli` produces runtime values. Mitigation: a single `Cli::into_app()` method that returns `Result, SelError>`, doing all validation up front. -- **Refactor regressing behavior.** Mitigation: the existing integration tests are comprehensive and independent of internal structure; keep them green throughout. Do the refactor in small, testable commits. -- **`cargo-dist` learning curve.** Mitigation: it's the boring path specifically chosen to reduce risk; the one-time `cargo dist init` is well documented. - -## Success criteria - -- `main.rs` ≤ 80 lines, zero per-feature branching. -- Dependency count: 3. -- All existing tests pass unchanged. -- New tests cover stdin, `-o`, `-v`, and every `SelError` variant. -- `cargo install sel` works after release. -- Tagged release auto-publishes binaries and crates.io package without hand intervention. -- Stripped release binary under 800 KB. diff --git a/release-plz.toml b/release-plz.toml index e5c0a89..29066cb 100644 --- a/release-plz.toml +++ b/release-plz.toml @@ -27,6 +27,7 @@ changelog_update = true git_release_enable = false # cargo-dist creates the GitHub Release. git_tag_enable = true # we need the tag to fire release.yml. publish = false # cargo-dist publishes to crates.io. +release_always = false # tag only after the release PR is merged. semver_check = false # skip cargo-semver-checks (optional, slow). [changelog]