From aeacf065519fed17fd053da6ccda1f57f189fab0 Mon Sep 17 00:00:00 2001
From: Timo Runge <me@timorunge.com>
Date: Wed, 3 Jun 2026 23:36:14 +0200
Subject: [PATCH 1/2] build(deps): bump the all group across 1 directory with 8
 updates

Mirrors Dependabot PR #4 so the dependency bump and feature work share one CI run.
---
 Cargo.lock     | 96 +++++++++++++++++++++-----------------------------
 Cargo.toml     |  2 +-
 cli/Cargo.toml |  4 +--
 3 files changed, 43 insertions(+), 59 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index b390c04..d9faf96 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1180,9 +1180,9 @@ dependencies = [
 
 [[package]]
 name = "calamine"
-version = "0.34.0"
+version = "0.35.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "20ae05a4e39297eecf9a994210d27501318c37a9318201f8e11050add82bb6f0"
+checksum = "8822fe6253ca47aa5ad9a3be09f6fe7cd20c6a74e41b0aa42e8f4e3d523508df"
 dependencies = [
  "atoi_simd",
  "byteorder",
@@ -1231,9 +1231,9 @@ dependencies = [
 
 [[package]]
 name = "cc"
-version = "1.2.61"
+version = "1.2.63"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d16d90359e986641506914ba71350897565610e87ce0ad9e6f28569db3dd5c6d"
+checksum = "556e016178bb5662a08681bbe0f00f8e17631781a4dfc8c45e466e4b185ec27f"
 dependencies = [
  "find-msvc-tools",
  "jobserver",
@@ -2549,9 +2549,9 @@ dependencies = [
 
 [[package]]
 name = "html-to-markdown-rs"
-version = "3.3.3"
+version = "3.5.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9451a842dd9316c9762a1c2c1d496df9db4db29e702e22f7a3fca210636ed4f9"
+checksum = "6cbbfb183e8634cb956309c6bbd781d9ddae068d376fb9eb1451ac49cf4cbba7"
 dependencies = [
  "ahash",
  "astral-tl",
@@ -2559,7 +2559,7 @@ dependencies = [
  "html-escape",
  "html5ever",
  "image",
- "lru 0.17.0",
+ "lru 0.18.0",
  "memchr",
  "once_cell",
  "regex",
@@ -3263,9 +3263,9 @@ dependencies = [
 
 [[package]]
 name = "kreuzberg"
-version = "4.9.7"
+version = "4.9.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a9b6e7118097d06f882f62427e6d8722eef31bc20bd0b2f434a8b8785d05c30c"
+checksum = "ef08d77d5b72b5c424acb86e297ea13fd9f3db9372d570dd4468011dfc8c3d53"
 dependencies = [
  "ahash",
  "async-trait",
@@ -3313,7 +3313,7 @@ dependencies = [
  "pastey 0.2.2",
  "pkg-config",
  "pulldown-cmark",
- "quick-xml 0.39.4",
+ "quick-xml 0.40.1",
  "rayon",
  "regex",
  "rmp-serde",
@@ -3365,9 +3365,9 @@ dependencies = [
 
 [[package]]
 name = "kreuzberg-tesseract"
-version = "4.9.7"
+version = "4.9.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "550d1aedb28114013aa51767a1566318a7c6409b56605851c55c2ecd433ccf1b"
+checksum = "5f896204110548ad886f059e42bcc8b839dba64b95db2572ed2252acd72f22dc"
 dependencies = [
  "cc",
  "cmake",
@@ -3550,7 +3550,7 @@ dependencies = [
  "mime_guess",
  "notify",
  "proptest",
- "quick-xml 0.40.0",
+ "quick-xml 0.40.1",
  "regex",
  "reqwest",
  "rmcp",
@@ -3618,9 +3618,9 @@ dependencies = [
 
 [[package]]
 name = "lru"
-version = "0.17.0"
+version = "0.18.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0e0b564323a0fb6d54b864f625ae139de9612e27edb944dda37c109f05aac531"
+checksum = "8a860605968fce16869fd239cf4237a82f3ac470723415db603b0e8b6c8d4fb9"
 dependencies = [
  "hashbrown 0.17.0",
 ]
@@ -4480,16 +4480,16 @@ checksum = "cdcc8dd4e2f670d309a5f0e83fe36dfdc05af317008fea29144da1a2ac858e5e"
 dependencies = [
  "encoding_rs",
  "memchr",
- "serde",
 ]
 
 [[package]]
 name = "quick-xml"
-version = "0.40.0"
+version = "0.40.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0b7315c86b26aaef0321fba33c9dcc160da659c6a9d278f0f6a5656d6561c03b"
+checksum = "2474bd2e5029e7ccb6abb2ba48cf2383a333851dedf495901544281590c7da7f"
 dependencies = [
  "memchr",
+ "serde",
 ]
 
 [[package]]
@@ -4798,9 +4798,9 @@ checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
 
 [[package]]
 name = "reqwest"
-version = "0.13.3"
+version = "0.13.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "62e0021ea2c22aed41653bc7e1419abb2c97e038ff2c33d0e1309e49a97deec0"
+checksum = "219c5811de6525e5416c7d5d53bb656d3afdbc6c5af816e0802bcfa42dbdc1c3"
 dependencies = [
  "base64",
  "bytes",
@@ -4865,9 +4865,9 @@ dependencies = [
 
 [[package]]
 name = "rmcp"
-version = "1.6.0"
+version = "1.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e12ca9067b5ebfbd5b3fcdc4acfceb81aa7d5ab2a879dff7cb75d22434276aad"
+checksum = "0810a9f717d9828f475fe1f629f4c305c8464b7f496c3a854b58d29e65f4058e"
 dependencies = [
  "async-trait",
  "base64",
@@ -4898,9 +4898,9 @@ dependencies = [
 
 [[package]]
 name = "rmcp-macros"
-version = "1.6.0"
+version = "1.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7caa6743cc0888e433105fe1bc551a7f607940b126a37bc97b478e86064627eb"
+checksum = "6aefac48c364756e97f04c0401ba3231e8607882c7c1d92da0437dc16307904d"
 dependencies = [
  "darling",
  "proc-macro2",
@@ -5091,15 +5091,6 @@ dependencies = [
  "winapi-util",
 ]
 
-[[package]]
-name = "scc"
-version = "2.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "46e6f046b7fef48e2660c57ed794263155d713de679057f2d0c169bfc6e756cc"
-dependencies = [
- "sdd",
-]
-
 [[package]]
 name = "schannel"
 version = "0.1.29"
@@ -5141,12 +5132,6 @@ version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
 
-[[package]]
-name = "sdd"
-version = "3.0.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "490dcfcbfef26be6800d11870ff2df8774fa6e86d047e3e8c8a76b25655e41ca"
-
 [[package]]
 name = "security-framework"
 version = "3.7.0"
@@ -5219,9 +5204,9 @@ dependencies = [
 
 [[package]]
 name = "serde_json"
-version = "1.0.149"
+version = "1.0.150"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
+checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9"
 dependencies = [
  "indexmap",
  "itoa",
@@ -5294,24 +5279,23 @@ dependencies = [
 
 [[package]]
 name = "serial_test"
-version = "3.4.0"
+version = "3.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "911bd979bf1070a3f3aa7b691a3b3e9968f339ceeec89e08c280a8a22207a32f"
+checksum = "699f4197115b8a7e7ff19c9a315a4bd6fffec26cc4626ef45ecaea389e081c6d"
 dependencies = [
  "futures-executor",
  "futures-util",
  "log",
  "once_cell",
  "parking_lot",
- "scc",
  "serial_test_derive",
 ]
 
 [[package]]
 name = "serial_test_derive"
-version = "3.4.0"
+version = "3.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0a7d91949b85b0d2fb687445e448b40d322b6b3e4af6b44a29b21d9a5f33e6d9"
+checksum = "94e153fc76e1c6a068703d6d29c508a0b15c061c4b7e43da59cc097bc342673c"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -5373,9 +5357,9 @@ dependencies = [
 
 [[package]]
 name = "shlex"
-version = "1.3.0"
+version = "2.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba"
 
 [[package]]
 name = "signal-hook-registry"
@@ -5806,9 +5790,9 @@ checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
 
 [[package]]
 name = "tar"
-version = "0.4.45"
+version = "0.4.46"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "22692a6476a21fa75fdfc11d452fda482af402c008cdbaf3476414e122040973"
+checksum = "3f6221d9a6003c78398e3b239969f352578258df48c8eb051caadae0015bc840"
 dependencies = [
  "filetime",
  "libc",
@@ -5943,9 +5927,9 @@ dependencies = [
 
 [[package]]
 name = "tikv-jemalloc-sys"
-version = "0.6.1+5.3.0-1-ge13ca993e8ccb9ba9847cc330696e02839f328f7"
+version = "0.7.1+5.3.1-0-g81034ce1f1373e37dc865038e1bc8eeecf559ce8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cd8aa5b2ab86a2cefa406d889139c162cbb230092f7d1d7cbc1716405d852a3b"
+checksum = "1a2825c78386b4ae0314074867860ba9577875de945f05992c38815cbec327f0"
 dependencies = [
  "cc",
  "libc",
@@ -5953,9 +5937,9 @@ dependencies = [
 
 [[package]]
 name = "tikv-jemallocator"
-version = "0.6.1"
+version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0359b4327f954e0567e69fb191cf1436617748813819c94b8cd4a431422d053a"
+checksum = "249f09e49ab1609436f34c776e84231bead18d6a955f119f939bdc1d847561bd"
 dependencies = [
  "libc",
  "tikv-jemalloc-sys",
@@ -6283,9 +6267,9 @@ checksum = "009994f150cc0cd50ff54917d5bc8bffe8cad10ca10d81c34da2ec421ae61782"
 
 [[package]]
 name = "tree-sitter-language-pack"
-version = "1.7.0"
+version = "1.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "925d3fdf2a7f2be09d354463119556ac092832d807a9533708ffd3b18bb712f1"
+checksum = "208658b27011901bb099b638e9ba25ad0a2063d5d6b7fcc6c07af0dcc94ffe10"
 dependencies = [
  "ahash",
  "cc",
diff --git a/Cargo.toml b/Cargo.toml
index 782f31a..639269e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -108,7 +108,7 @@ flate2 = { version = "1", optional = true }
 zstd = { version = "0.13", optional = true }
 bzip2 = { version = "0.6", optional = true }
 xz2 = { version = "0.1", optional = true }
-rmcp = { version = "1.4", features = ["client", "transport-child-process", "transport-streamable-http-client-reqwest"], optional = true }
+rmcp = { version = "1.7", features = ["client", "transport-child-process", "transport-streamable-http-client-reqwest"], optional = true }
 mail-parser = { version = "0.11", optional = true }
 mime_guess = { version = "2", optional = true }
 tempfile = "3"
diff --git a/cli/Cargo.toml b/cli/Cargo.toml
index 820c932..a50f567 100644
--- a/cli/Cargo.toml
+++ b/cli/Cargo.toml
@@ -34,7 +34,7 @@ tokio = { version = "1", features = ["full"] }
 indicatif = "0.18"
 tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 notify = "8"
-rmcp = { version = "1.4", features = ["server", "transport-io", "transport-streamable-http-server", "macros"] }
+rmcp = { version = "1.7", features = ["server", "transport-io", "transport-streamable-http-server", "macros"] }
 axum = { version = "0.8", features = ["http1", "tokio"] }
 tokio-util = "0.7"
 serde = { version = "1", features = ["derive"] }
@@ -54,7 +54,7 @@ tar = "0.4"
 zip = { version = "8", default-features = false, features = ["deflate"] }
 
 [target.'cfg(not(target_env = "msvc"))'.dependencies]
-tikv-jemallocator = "0.6"
+tikv-jemallocator = "0.7"
 
 [target.'cfg(unix)'.dependencies]
 libc = "0.2"

From 673cf7f188c603b6b690be97de9ee7f87b6057d2 Mon Sep 17 00:00:00 2001
From: Timo Runge <me@timorunge.com>
Date: Wed, 3 Jun 2026 23:36:25 +0200
Subject: [PATCH 2/2] feat: exec output limits, maintain health check, store
 provenance

- exec: add `max_output_bytes` (default 10 MiB); stream stdout/stderr
  through bounded reads and kill the child process on timeout
- cli: `lore maintain health` reports config validity, compiled features,
  LLM client init, and store reachability
- store: stamp chunk-config fingerprint + version in lore.meta and warn
  at ingest when an existing store was built with different settings
- store: federated multi-store search with BM25 score normalization
- llm: surface missing-credential warnings via the ingest observer
- test: pure chunker boundary cases (empty, short, min-chunk, indices)
- chore(lints): enforce missing_errors_doc/missing_panics_doc and
  document the public API surface
---
 cli/src/cli/args.rs        |   2 +
 cli/src/cli/ingest.rs      |   5 +
 cli/src/cli/maintain.rs    | 146 ++++++++++++++++++++++++
 cli/src/cli/mod.rs         |   3 +-
 cli/src/cli/watch.rs       |  15 ++-
 cli/src/main.rs            |   8 +-
 docs/configuration.md      |   1 +
 skills/lore/SKILL.md       |   1 +
 src/cache/mod.rs           |   8 ++
 src/config/mod.rs          | 107 ++++++++++++++++++
 src/config/processing.rs   |   4 +
 src/config/source.rs       |  19 ++++
 src/fmt/format.rs          |   4 +
 src/ingest/chunker.rs      |  62 +++++++++++
 src/ingest/discover/mod.rs |   1 +
 src/ingest/loaders/exec.rs | 160 ++++++++++++++++++++++++---
 src/ingest/loaders/file.rs |   8 ++
 src/ingest/mod.rs          |  52 +++++++++
 src/ingest/transforms.rs   |   4 +
 src/ingest/watch.rs        |   4 +
 src/lib.rs                 |   3 +
 src/llm.rs                 | 119 ++++++++++++++++++++
 src/output/documents.rs    |   8 ++
 src/output/info.rs         |   4 +
 src/query.rs               |  24 ++++
 src/store/documents.rs     |  10 +-
 src/store/meta_key.rs      |   4 +
 src/store/mod.rs           |   8 ++
 src/store/multi.rs         | 220 ++++++++++++++++++++++++++++++++++++-
 src/store/resolve.rs       |   8 ++
 src/store/search.rs        |  12 ++
 src/store/test_helpers.rs  |   2 +
 src/store/types.rs         |  38 ++++++-
 src/store/writer.rs        |  32 ++++++
 src/util/fs.rs             |   4 +
 src/util/marker.rs         |   4 +
 36 files changed, 1088 insertions(+), 26 deletions(-)

diff --git a/cli/src/cli/args.rs b/cli/src/cli/args.rs
index 9df1d32..e902fb3 100644
--- a/cli/src/cli/args.rs
+++ b/cli/src/cli/args.rs
@@ -482,4 +482,6 @@ pub enum MaintainAction {
         #[arg(default_value = "all", hide_default_value = true)]
         scope: CacheScope,
     },
+    /// Print a single-screen health report (config, features, LLM, store)
+    Health,
 }
diff --git a/cli/src/cli/ingest.rs b/cli/src/cli/ingest.rs
index 9ccb4d9..cf32752 100644
--- a/cli/src/cli/ingest.rs
+++ b/cli/src/cli/ingest.rs
@@ -152,6 +152,11 @@ impl IngestObserver for CliIngestObserver {
         *status = sb;
     }
 
+    fn on_llm_warning(&self, msg: &str) {
+        let prefix = self.kb_prefix();
+        self.println(format!("{prefix}[{} ] {msg}", self.paint.yellow("!")));
+    }
+
     fn on_dry_run_notice(&self) {
         let prefix = self.kb_prefix();
         self.println(format!(
diff --git a/cli/src/cli/maintain.rs b/cli/src/cli/maintain.rs
index 41da9fc..31da602 100644
--- a/cli/src/cli/maintain.rs
+++ b/cli/src/cli/maintain.rs
@@ -419,6 +419,152 @@ fn fix_issues(store: &store::Store, issues: &[Issue]) -> Result<usize> {
     Ok(fixed)
 }
 
+/// Print a single-screen health report for this lore setup.
+pub fn health(
+    config_path: &Path,
+    store_path: &Path,
+    mode: OutputMode,
+    prefix: &LinePrefix,
+) -> Result<()> {
+    let mp = MultiProgress::new();
+    let paint = crate::terminal::stderr_painter();
+    let json = mode == OutputMode::Json;
+
+    if !json {
+        progress::mp_println(
+            &mp,
+            format!("{prefix}[{} ] lore health check", paint.purple(".")),
+        );
+    }
+
+    let mut hard_error = false;
+
+    // 1. Config validity
+    match lore::config::IngestConfig::from_yaml(config_path) {
+        Ok(_) => {
+            if !json {
+                progress::mp_println(
+                    &mp,
+                    format!(
+                        "{prefix}[{}] config: {} -- ok",
+                        paint.green("+"),
+                        config_path.display()
+                    ),
+                );
+            }
+        }
+        Err(e) => {
+            hard_error = true;
+            if !json {
+                progress::mp_println(
+                    &mp,
+                    format!(
+                        "{prefix}[{}] config: {} -- {e:#}",
+                        paint.red("!"),
+                        config_path.display()
+                    ),
+                );
+            }
+        }
+    }
+
+    // 2. Compiled features
+    if !json {
+        let features: Vec<&str> = [
+            #[cfg(feature = "ingest")]
+            "ingest",
+            #[cfg(feature = "llm")]
+            "llm",
+            #[cfg(feature = "s3")]
+            "s3",
+            #[cfg(feature = "mcp")]
+            "mcp",
+            #[cfg(feature = "ocr")]
+            "ocr",
+            #[cfg(feature = "iwork")]
+            "iwork",
+            #[cfg(feature = "tree-sitter")]
+            "tree-sitter",
+        ]
+        .into_iter()
+        .collect();
+        let feat_str = if features.is_empty() {
+            "none".to_owned()
+        } else {
+            features.join(", ")
+        };
+        progress::mp_println(
+            &mp,
+            format!("{prefix}[{}] features: {feat_str}", paint.green("+")),
+        );
+    }
+
+    // 3. LLM credential resolution (feature-gated)
+    #[cfg(feature = "llm")]
+    {
+        if let Ok(cfg) = lore::config::IngestConfig::from_yaml(config_path) {
+            if let Some(ref llm_cfg) = cfg.llm {
+                match lore::llm::LlmClient::new(llm_cfg) {
+                    Ok(_) => {
+                        if !json {
+                            progress::mp_println(
+                                &mp,
+                                format!("{prefix}[{}] llm: client initialized", paint.green("+")),
+                            );
+                        }
+                    }
+                    Err(e) => {
+                        if !json {
+                            progress::mp_println(
+                                &mp,
+                                format!(
+                                    "{prefix}[{}] llm: could not initialize client: {e}",
+                                    paint.yellow("-"),
+                                ),
+                            );
+                        }
+                    }
+                }
+            } else if !json {
+                progress::mp_println(
+                    &mp,
+                    format!("{prefix}[{}] llm: not configured", paint.blue("i")),
+                );
+            }
+        }
+    }
+
+    // 4. Store reachability
+    let store_path_norm = normalize_path(store_path);
+    if store_path_norm.is_dir() {
+        if !json {
+            progress::mp_println(
+                &mp,
+                format!(
+                    "{prefix}[{}] store: {} -- reachable",
+                    paint.green("+"),
+                    store_path_norm.display()
+                ),
+            );
+        }
+    } else if !json {
+        progress::mp_println(
+            &mp,
+            format!(
+                "{prefix}[{}] store: {} -- not found (run `lore ingest` to create)",
+                paint.yellow("-"),
+                store_path_norm.display()
+            ),
+        );
+    }
+
+    if hard_error {
+        anyhow::bail!("health check found configuration errors (see above)");
+    }
+
+    Ok(())
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/cli/src/cli/mod.rs b/cli/src/cli/mod.rs
index 7fca33f..1a812bd 100644
--- a/cli/src/cli/mod.rs
+++ b/cli/src/cli/mod.rs
@@ -133,8 +133,7 @@ pub fn resolve_all_configs(cli_configs: Vec<PathBuf>) -> Result<Vec<ResolvedConf
     paths
         .into_iter()
         .map(|config_path| {
-            let cfg = config::IngestConfig::from_yaml(&config_path)
-                .with_context(|| format!("failed to load config {}", config_path.display()))?;
+            let cfg = config::load_config_with_hints(&config_path)?;
             Ok(ResolvedConfig {
                 config: cfg,
                 config_path,
diff --git a/cli/src/cli/watch.rs b/cli/src/cli/watch.rs
index 79b7510..6c4d0a5 100644
--- a/cli/src/cli/watch.rs
+++ b/cli/src/cli/watch.rs
@@ -1,6 +1,6 @@
 use std::path::Path;
 use std::sync::Arc;
-use std::sync::atomic::AtomicBool;
+use std::sync::atomic::{AtomicBool, Ordering};
 
 use anyhow::Result;
 use indicatif::{MultiProgress, ProgressBar, ProgressDrawTarget};
@@ -19,9 +19,12 @@ use crate::terminal;
 /// Cycle-scoped progress bars using shared types.
 struct WatchIngestObserver {
     mp: MultiProgress,
+    paint: Painter,
     shutdown: Arc<AtomicBool>,
     bars: SourceBars,
     idle_bar: Arc<std::sync::Mutex<ProgressBar>>,
+    /// Ensures the LLM credential warning prints at most once per watch session.
+    llm_warned: AtomicBool,
 }
 
 impl WatchIngestObserver {
@@ -35,8 +38,10 @@ impl WatchIngestObserver {
         Self {
             bars: SourceBars::new(mp.clone(), term_width),
             mp,
+            paint: terminal::stderr_painter(),
             shutdown,
             idle_bar: Arc::new(std::sync::Mutex::new(idle)),
+            llm_warned: AtomicBool::new(false),
         }
     }
 
@@ -74,6 +79,14 @@ impl IngestObserver for WatchIngestObserver {
         self.bars.clear();
     }
 
+    fn on_llm_warning(&self, msg: &str) {
+        // Credentials come from the environment and don't change across cycles,
+        // so surface the warning only once per watch session.
+        if !self.llm_warned.swap(true, Ordering::Relaxed) {
+            progress::mp_println(&self.mp, format!("[{} ] {msg}", self.paint.yellow("!")));
+        }
+    }
+
     fn on_status(&self, _msg: &str) {}
 
     fn on_source_waiting(&self, _index: usize, label: &str) {
diff --git a/cli/src/main.rs b/cli/src/main.rs
index 7de55ff..79846fe 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -265,7 +265,7 @@ fn main() {
                     .config
                     .into_iter()
                     .next()
-                    .map(|p| lore::config::IngestConfig::from_yaml(&p))
+                    .map(|p| lore::config::load_config_with_hints(&p))
                     .transpose()?;
                 lore_cli::cli::preview::preview(lore_cli::cli::preview::PreviewOptions {
                     paths: &paths,
@@ -354,6 +354,12 @@ fn main() {
                                         pfx,
                                     ),
                                     MaintainAction::Clean { .. } => unreachable!(),
+                                    MaintainAction::Health => lore_cli::cli::maintain::health(
+                                        &rc.config_path,
+                                        &store_path,
+                                        lore_cli::terminal::output_mode(false),
+                                        pfx,
+                                    ),
                                 }
                             },
                         )
diff --git a/docs/configuration.md b/docs/configuration.md
index 3ea8e07..29b6f22 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -992,6 +992,7 @@ Runs one or more shell commands via `sh -c`. Supports two output modes:
 | `topic` | string | -- | Topic label applied to all chunks from this source. |
 | `tags` | string | -- | Comma-separated tags applied to all documents from this source. |
 | `processing` | string or object | -- | Processing override: a preset name or an inline profile object. |
+| `max_output_bytes` | int | -- | Maximum bytes to read from stdout (default: 10 MiB). Commands that exceed this limit fail with an error; raise this value or narrow the command if needed. |
 <!-- END GENERATED: config-source-exec -->
 
 **JSONL mode** (default): each non-empty stdout line must be a JSON object
diff --git a/skills/lore/SKILL.md b/skills/lore/SKILL.md
index 218ae81..adaa527 100644
--- a/skills/lore/SKILL.md
+++ b/skills/lore/SKILL.md
@@ -152,6 +152,7 @@ For HTTP transport, multi-KB setups, tool reference, and resource URIs:
 | `lore maintain repair` | Fix detected consistency issues |
 | `lore maintain compact` | Merge index segments for lower memory and faster reads |
 | `lore maintain clean` | Clear cached downloads, git repos, or temporary files |
+| `lore maintain health` | Print a single-screen health report (config, features, LLM, store) |
 <!-- END GENERATED: cli-commands -->
 
 ### Key flags
diff --git a/src/cache/mod.rs b/src/cache/mod.rs
index e1e9f3a..b450716 100644
--- a/src/cache/mod.rs
+++ b/src/cache/mod.rs
@@ -149,6 +149,10 @@ pub(crate) fn http_cache_dir() -> Result<PathBuf> {
 }
 
 /// Return the logs directory under the cache root, creating it if needed.
+///
+/// # Errors
+///
+/// Returns an error if the cache root cannot be determined or the directory cannot be created.
 pub fn logs_dir() -> Result<PathBuf> {
     cache_dir(&[CacheScope::LOGS_DIR])
 }
@@ -160,6 +164,10 @@ pub(crate) fn repo_cache_path(repo_url: &str) -> Result<PathBuf> {
 }
 
 /// Clear cached data. Returns `(items_removed, bytes_freed)`.
+///
+/// # Errors
+///
+/// Returns an error if the cache root cannot be determined or deletion fails.
 pub fn clear_cache(scope: CacheScope) -> Result<(usize, u64)> {
     clear_cache_at(&cache_root()?, scope)
 }
diff --git a/src/config/mod.rs b/src/config/mod.rs
index ec8527b..5ee90c4 100644
--- a/src/config/mod.rs
+++ b/src/config/mod.rs
@@ -78,6 +78,10 @@ impl IngestConfig {
     ///
     /// Merges global user-level defaults (if present) under the project
     /// config before deserialization. Precedence: global < project < env vars.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the file cannot be read, the YAML is invalid, or validation fails.
     pub fn from_yaml(path: &Path) -> Result<Self> {
         let content = std::fs::read_to_string(path)
             .with_context(|| format!("failed to read config: {}", path.display()))?;
@@ -243,6 +247,55 @@ impl Validate for IngestConfig {
     }
 }
 
+/// Load a config file with user-friendly error hints on failure.
+///
+/// Wraps `IngestConfig::from_yaml` and appends contextual guidance when
+/// common error patterns are detected.
+///
+/// # Errors
+///
+/// Returns an error if the config file cannot be read, parsed, or fails validation.
+pub fn load_config_with_hints(path: &Path) -> Result<IngestConfig> {
+    IngestConfig::from_yaml(path).map_err(|e| annotate_config_error(e, path))
+}
+
+fn annotate_config_error(err: anyhow::Error, path: &Path) -> anyhow::Error {
+    let msg = format!("{err:#}");
+
+    let hint = if msg.contains("No such file") || msg.contains("not found") {
+        Some(format!(
+            "hint: config file not found at {}; run `lore init` to create one",
+            path.display()
+        ))
+    } else if msg.contains("unknown field") {
+        let field = extract_unknown_field(&msg).unwrap_or("(see above)");
+        Some(format!(
+            "hint: unknown field `{field}` -- check spelling or see docs/configuration.md"
+        ))
+    } else if msg.contains("failed to parse config") {
+        Some("hint: YAML syntax error -- check indentation and quoting".to_owned())
+    } else if msg.contains("requires the '") {
+        Some(
+            "hint: this source type requires a compile-time feature; \
+             reinstall with: cargo install lore --features <feature>"
+                .to_owned(),
+        )
+    } else {
+        None
+    };
+
+    match hint {
+        Some(h) => err.context(h),
+        None => err,
+    }
+}
+
+fn extract_unknown_field(msg: &str) -> Option<&str> {
+    let start = msg.find("unknown field `")? + "unknown field `".len();
+    let end = msg[start..].find('`')? + start;
+    Some(&msg[start..end])
+}
+
 /// Read an environment variable and parse it into `T`; returns `None` if unset, errors if malformed.
 fn parse_env<T: std::str::FromStr>(name: &str) -> Result<Option<T>> {
     let Ok(raw) = std::env::var(name) else {
@@ -670,4 +723,58 @@ processing:
             "LORE_MAX_CHUNK_CHARS should override processing.max_chunk_chars"
         );
     }
+
+    #[test]
+    fn annotate_config_error_file_not_found() {
+        let err = anyhow::anyhow!("failed to read config: No such file or directory");
+        let annotated = annotate_config_error(err, Path::new("/tmp/missing.yaml"));
+        let msg = format!("{annotated:#}");
+        assert!(msg.contains("hint:"), "should contain hint: {msg}");
+        assert!(msg.contains("lore init"), "should suggest lore init: {msg}");
+    }
+
+    #[test]
+    fn annotate_config_error_unknown_field() {
+        let err = anyhow::anyhow!("failed to parse config: unknown field `typo_field`");
+        let annotated = annotate_config_error(err, Path::new("/tmp/lore.yaml"));
+        let msg = format!("{annotated:#}");
+        assert!(
+            msg.contains("check spelling"),
+            "should suggest check spelling: {msg}"
+        );
+        assert!(
+            msg.contains("typo_field"),
+            "should mention the field name: {msg}"
+        );
+    }
+
+    #[test]
+    fn annotate_config_error_parse_error() {
+        let err = anyhow::anyhow!("failed to parse config: expected mapping");
+        let annotated = annotate_config_error(err, Path::new("/tmp/lore.yaml"));
+        let msg = format!("{annotated:#}");
+        assert!(
+            msg.contains("indentation"),
+            "should mention indentation: {msg}"
+        );
+    }
+
+    #[test]
+    fn annotate_config_error_passthrough() {
+        let err = anyhow::anyhow!("something totally unexpected");
+        let annotated = annotate_config_error(err, Path::new("/tmp/lore.yaml"));
+        let msg = format!("{annotated:#}");
+        assert!(!msg.contains("hint:"), "should not add hint: {msg}");
+    }
+
+    #[test]
+    fn extract_unknown_field_parses_serde_format() {
+        let msg = "unknown field `bad_field`, expected one of `name`, `sources`";
+        assert_eq!(extract_unknown_field(msg), Some("bad_field"));
+    }
+
+    #[test]
+    fn extract_unknown_field_returns_none_for_unrecognized() {
+        assert_eq!(extract_unknown_field("no backtick pattern here"), None);
+    }
 }
diff --git a/src/config/processing.rs b/src/config/processing.rs
index 846eaf9..55ce444 100644
--- a/src/config/processing.rs
+++ b/src/config/processing.rs
@@ -105,6 +105,10 @@ impl ProcessingConfig {
     }
 
     /// Resolve a per-source `ProcessingRef` into a concrete profile.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if a named preset is referenced but not defined in `self.presets`.
     pub fn resolve(&self, r: Option<&ProcessingRef>) -> Result<ProcessingProfile> {
         match r {
             None => Ok(self.default_profile()),
diff --git a/src/config/source.rs b/src/config/source.rs
index ff89126..e126f0c 100644
--- a/src/config/source.rs
+++ b/src/config/source.rs
@@ -296,6 +296,14 @@ impl ExecOutputMode {
     }
 }
 
+const fn default_max_output_bytes() -> usize {
+    10 * 1024 * 1024
+}
+#[allow(clippy::trivially_copy_pass_by_ref)]
+fn is_default_max_output_bytes(v: &usize) -> bool {
+    *v == default_max_output_bytes()
+}
+
 /// Command(s) to execute. stdout is parsed as JSONL (default) or consumed as raw content.
 #[derive(Debug, Clone, Deserialize, Serialize, schemars::JsonSchema)]
 #[serde(deny_unknown_fields)]
@@ -334,6 +342,13 @@ pub struct ExecSource {
     /// Processing override: a preset name or an inline profile object.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub processing: Option<ProcessingRef>,
+    /// Maximum bytes to read from stdout (default: 10 MiB). Commands that exceed this limit fail
+    /// with an error; raise this value or narrow the command if needed.
+    #[serde(
+        default = "default_max_output_bytes",
+        skip_serializing_if = "is_default_max_output_bytes"
+    )]
+    pub max_output_bytes: usize,
 }
 
 /// MCP transport protocol.
@@ -767,6 +782,10 @@ impl SourceConfig {
     }
 
     /// Check structural invariants and URL schemes for this source.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if any required field is empty, invalid, or uses an unsupported URL scheme.
     pub fn validate(&self) -> Result<()> {
         match self {
             Self::Local(s) => {
diff --git a/src/fmt/format.rs b/src/fmt/format.rs
index 26ed552..96c5c4f 100644
--- a/src/fmt/format.rs
+++ b/src/fmt/format.rs
@@ -59,6 +59,10 @@ pub fn format_elapsed(d: Duration) -> String {
 }
 
 /// Serialize a value as pretty-printed JSON.
+///
+/// # Errors
+///
+/// Returns an error if the value cannot be serialized to JSON.
 pub fn to_json_pretty<T: serde::Serialize>(value: &T) -> Result<String> {
     serde_json::to_string_pretty(value).context("JSON serialization failed")
 }
diff --git a/src/ingest/chunker.rs b/src/ingest/chunker.rs
index e73ced2..90d3cf5 100644
--- a/src/ingest/chunker.rs
+++ b/src/ingest/chunker.rs
@@ -587,6 +587,68 @@ mod tests {
         );
     }
 
+    #[test]
+    fn chunker_boundary_empty_document() {
+        let doc = LoaderResult::test_doc("");
+        let chunks = chunk_markdown(&doc, "test", &ProcessingProfile::default());
+        assert!(chunks.is_empty(), "empty doc must produce zero chunks");
+    }
+
+    #[test]
+    fn chunker_boundary_doc_shorter_than_chunk() {
+        let doc = LoaderResult::test_doc("Short content.");
+        let cfg = ProcessingProfile {
+            min_chunk_chars: 0,
+            max_chunk_chars: 1600,
+            ..Default::default()
+        };
+        let chunks = chunk_markdown(&doc, "test", &cfg);
+        assert_eq!(
+            chunks.len(),
+            1,
+            "short doc should produce exactly one chunk"
+        );
+        assert!(chunks[0].body.contains("Short content"));
+        assert_eq!(chunks[0].chunk_index, 0);
+    }
+
+    #[test]
+    fn chunker_boundary_min_chunk_filters_tiny_pieces() {
+        let doc = LoaderResult::test_doc("ab\n\ncd\n\nef");
+        let cfg = ProcessingProfile {
+            min_chunk_chars: 10,
+            max_chunk_chars: 1600,
+            ..Default::default()
+        };
+        let chunks = chunk_markdown(&doc, "test", &cfg);
+        for c in &chunks {
+            assert!(
+                c.body.len() >= cfg.min_chunk_chars || chunks.len() == 1,
+                "chunk body shorter than min_chunk_chars={}: {:?}",
+                cfg.min_chunk_chars,
+                c.body
+            );
+        }
+    }
+
+    #[test]
+    fn chunker_boundary_chunk_indices_are_contiguous() {
+        let content = "# A\n\nSome text here.\n\n# B\n\nMore text here.\n\n# C\n\nEven more text.";
+        let doc = LoaderResult::test_doc(content);
+        let cfg = ProcessingProfile {
+            min_chunk_chars: 0,
+            max_chunk_chars: 200,
+            ..Default::default()
+        };
+        let chunks = chunk_markdown(&doc, "test", &cfg);
+        for (i, chunk) in chunks.iter().enumerate() {
+            assert_eq!(
+                chunk.chunk_index, i as i64,
+                "chunk_index must equal position in output vec"
+            );
+        }
+    }
+
     #[test]
     fn chunking_presplit_no_duplicate_heading() {
         let repeated = "x ".repeat(30);
diff --git a/src/ingest/discover/mod.rs b/src/ingest/discover/mod.rs
index bd5303b..2304145 100644
--- a/src/ingest/discover/mod.rs
+++ b/src/ingest/discover/mod.rs
@@ -668,6 +668,7 @@ async fn discover_exec(
         s.dir.as_deref(),
         &s.env,
         timeout,
+        s.max_output_bytes,
         ctx.topic.as_deref(),
         s.output,
         s.source_key.as_deref(),
diff --git a/src/ingest/loaders/exec.rs b/src/ingest/loaders/exec.rs
index 5324af1..5ed91ff 100644
--- a/src/ingest/loaders/exec.rs
+++ b/src/ingest/loaders/exec.rs
@@ -17,6 +17,7 @@ pub(crate) async fn run_exec(
     dir: Option<&str>,
     env: &HashMap<String, String>,
     timeout_secs: u64,
+    max_output_bytes: usize,
     topic: Option<&str>,
     output: ExecOutputMode,
     source_key: Option<&str>,
@@ -37,7 +38,8 @@ pub(crate) async fn run_exec(
     let mut all_docs = Vec::new();
     let mut failures = Vec::new();
     for cmd in cmds {
-        let stdout = run_single_cmd(cmd, &effective_dir, env, timeout_secs).await?;
+        let stdout =
+            run_single_cmd(cmd, &effective_dir, env, timeout_secs, max_output_bytes).await?;
         match output {
             ExecOutputMode::Jsonl => {
                 for line in stdout.lines() {
@@ -92,40 +94,81 @@ async fn run_single_cmd(
     dir: &Path,
     env: &HashMap<String, String>,
     timeout_secs: u64,
+    max_output_bytes: usize,
 ) -> Result<String> {
-    let mut child = tokio::process::Command::new("sh");
-    child
+    const STDERR_CAP: usize = 16 * 1024;
+
+    let mut command = tokio::process::Command::new("sh");
+    command
         .args(["-c", cmd])
         .current_dir(dir)
+        .kill_on_drop(true)
         .stdin(Stdio::null())
         .stdout(Stdio::piped())
         .stderr(Stdio::piped());
     for (k, v) in env {
-        child.env(k, v);
+        command.env(k, v);
     }
 
-    let output = tokio::time::timeout(Duration::from_secs(timeout_secs), child.output())
+    let mut child = command.spawn().map_err(|e| {
+        if e.kind() == std::io::ErrorKind::NotFound {
+            anyhow::anyhow!("sh is not available; exec sources require a POSIX shell on PATH")
+        } else {
+            anyhow::Error::from(e).context("failed to spawn exec command")
+        }
+    })?;
+
+    let stdout = child.stdout.take().expect("stdout piped");
+    let stderr = child.stderr.take().expect("stderr piped");
+
+    let (stdout_bytes, stderr_bytes, status) =
+        tokio::time::timeout(Duration::from_secs(timeout_secs), async {
+            use tokio::io::AsyncReadExt;
+            let (stdout_res, stderr_res) = tokio::join!(
+                async {
+                    let mut buf = Vec::new();
+                    stdout
+                        .take((max_output_bytes as u64).saturating_add(1))
+                        .read_to_end(&mut buf)
+                        .await
+                        .map(|_| buf)
+                },
+                async {
+                    let mut buf = Vec::new();
+                    stderr
+                        .take((STDERR_CAP as u64).saturating_add(1))
+                        .read_to_end(&mut buf)
+                        .await
+                        .map(|_| buf)
+                },
+            );
+            let s_bytes = stdout_res?;
+            let e_bytes = stderr_res?;
+            let st = child.wait().await?;
+            Ok::<_, std::io::Error>((s_bytes, e_bytes, st))
+        })
         .await
         .context("exec command timed out")?
-        .map_err(|e| {
-            if e.kind() == std::io::ErrorKind::NotFound {
-                anyhow::anyhow!("sh is not available; exec sources require a POSIX shell on PATH")
-            } else {
-                anyhow::Error::from(e).context("failed to spawn exec command")
-            }
-        })?;
+        .map_err(|e| anyhow::Error::from(e).context("failed to read exec command output"))?;
 
-    if !output.status.success() {
-        let stderr = String::from_utf8_lossy(&output.stderr);
+    if stdout_bytes.len() > max_output_bytes {
+        anyhow::bail!(
+            "exec command exceeded max_output_bytes ({max_output_bytes}); \
+             raise exec.max_output_bytes or narrow the command"
+        );
+    }
+
+    if !status.success() {
+        let stderr = String::from_utf8_lossy(&stderr_bytes);
         anyhow::bail!(
             "exec command {:?} exited with {}: {}",
             cmd,
-            output.status,
+            status,
             stderr.trim()
         );
     }
 
-    Ok(String::from_utf8_lossy(&output.stdout).into_owned())
+    Ok(String::from_utf8_lossy(&stdout_bytes).into_owned())
 }
 
 fn parse_jsonl_line(
@@ -296,6 +339,7 @@ mod tests {
         );
     }
 
+    #[cfg(unix)]
     #[tokio::test]
     async fn raw_mode_cmd_is_source() {
         let (docs, failures) = run_exec(
@@ -303,6 +347,7 @@ mod tests {
             None,
             &HashMap::new(),
             10,
+            10 * 1024 * 1024,
             None,
             ExecOutputMode::Raw,
             None,
@@ -318,6 +363,7 @@ mod tests {
         assert_eq!(docs[0].content, "hello");
     }
 
+    #[cfg(unix)]
     #[tokio::test]
     async fn raw_mode_explicit_source_key() {
         let (docs, _) = run_exec(
@@ -325,6 +371,7 @@ mod tests {
             None,
             &HashMap::new(),
             10,
+            10 * 1024 * 1024,
             None,
             ExecOutputMode::Raw,
             Some("my-doc"),
@@ -337,6 +384,7 @@ mod tests {
         assert_eq!(docs[0].source, "my-doc");
     }
 
+    #[cfg(unix)]
     #[tokio::test]
     async fn raw_mode_empty_stdout_yields_failure() {
         let (docs, failures) = run_exec(
@@ -344,6 +392,7 @@ mod tests {
             None,
             &HashMap::new(),
             10,
+            10 * 1024 * 1024,
             None,
             ExecOutputMode::Raw,
             None,
@@ -357,6 +406,7 @@ mod tests {
         assert_eq!(failures.len(), 1);
     }
 
+    #[cfg(unix)]
     #[tokio::test]
     async fn raw_mode_topic_and_format() {
         let (docs, _) = run_exec(
@@ -364,6 +414,7 @@ mod tests {
             None,
             &HashMap::new(),
             10,
+            10 * 1024 * 1024,
             Some("Docs"),
             ExecOutputMode::Raw,
             None,
@@ -376,4 +427,81 @@ mod tests {
         assert_eq!(docs[0].topic.as_deref(), Some("Docs"));
         assert_eq!(docs[0].format.as_deref(), Some("md"));
     }
+
+    #[cfg(unix)]
+    #[tokio::test]
+    async fn exec_output_within_cap_ok() {
+        let (docs, failures) = run_exec(
+            &["echo hello".to_owned()],
+            None,
+            &HashMap::new(),
+            10,
+            10 * 1024 * 1024,
+            None,
+            ExecOutputMode::Raw,
+            None,
+            None,
+            Path::new("/tmp"),
+            &ProgressHandle::noop(),
+        )
+        .await
+        .unwrap();
+        assert_eq!(docs.len(), 1);
+        assert!(failures.is_empty());
+        assert_eq!(docs[0].content, "hello");
+    }
+
+    #[cfg(unix)]
+    #[tokio::test]
+    async fn exec_output_exceeding_cap_errors() {
+        // Emit 10 bytes via a POSIX-portable command (no bash brace expansion,
+        // which dash -- Ubuntu's /bin/sh -- does not support).
+        let result = run_exec(
+            &["printf 1234567890".to_owned()],
+            None,
+            &HashMap::new(),
+            10,
+            5, // 5-byte cap
+            None,
+            ExecOutputMode::Raw,
+            None,
+            None,
+            Path::new("/tmp"),
+            &ProgressHandle::noop(),
+        )
+        .await;
+        let err = result.unwrap_err();
+        assert!(
+            err.to_string().contains("max_output_bytes"),
+            "error should mention max_output_bytes: {err}"
+        );
+    }
+
+    #[cfg(unix)]
+    #[tokio::test]
+    async fn exec_respects_configured_cap() {
+        // 'echo hi' outputs "hi\n" (3 bytes), which exceeds a 2-byte cap.
+        let result = run_exec(
+            &["echo hi".to_owned()],
+            None,
+            &HashMap::new(),
+            10,
+            2, // 2-byte cap
+            None,
+            ExecOutputMode::Raw,
+            None,
+            None,
+            Path::new("/tmp"),
+            &ProgressHandle::noop(),
+        )
+        .await;
+        assert!(
+            result.is_err(),
+            "should fail when output exceeds configured cap"
+        );
+        assert!(
+            result.unwrap_err().to_string().contains("max_output_bytes"),
+            "error should mention max_output_bytes"
+        );
+    }
 }
diff --git a/src/ingest/loaders/file.rs b/src/ingest/loaders/file.rs
index 47080f6..42e69b8 100644
--- a/src/ingest/loaders/file.rs
+++ b/src/ingest/loaders/file.rs
@@ -217,6 +217,10 @@ fn detect_format(path: &Path, mime: Option<&str>) -> Option<String> {
 }
 
 /// Walk `base` for files matching `pattern` (gitignore-aware), capped at `max` entries.
+///
+/// # Errors
+///
+/// Returns an error if the glob pattern is invalid or the blocking walk task panics.
 pub async fn list_files(base: &Path, pattern: &str, max: Option<usize>) -> Result<Vec<PathBuf>> {
     let base = base.to_path_buf();
     let pattern = pattern.to_owned();
@@ -275,6 +279,10 @@ fn list_files_sync(base: &Path, pattern: &str, max: Option<usize>) -> Result<Vec
 }
 
 /// Read a single file, extract its text via kreuzberg or raw UTF-8 fallback, and return a `LoaderResult`.
+///
+/// # Errors
+///
+/// Returns an error if the file cannot be read or exceeds the configured size or extraction limits.
 pub async fn read_file(
     path: &Path,
     topic: Option<&str>,
diff --git a/src/ingest/mod.rs b/src/ingest/mod.rs
index e12e950..1dbd41b 100644
--- a/src/ingest/mod.rs
+++ b/src/ingest/mod.rs
@@ -54,6 +54,9 @@ pub trait IngestObserver: Send + Sync {
     /// Ingest run starting. Called once before any sources are processed.
     fn on_start(&self, _kb_name: &str, _n_sources: usize, _mode: IngestMode, _dry_run: bool) {}
 
+    /// LLM configuration warning (missing credentials, likely to fail).
+    fn on_llm_warning(&self, _msg: &str) {}
+
     /// Dry-run notice.
     fn on_dry_run_notice(&self) {}
 
@@ -186,6 +189,10 @@ pub struct IngestResult {
 /// The `observer` controls all user-facing output (progress bars, status
 /// messages, signal handling). Use [`QuietIngestObserver`] for silent
 /// operation.
+///
+/// # Errors
+///
+/// Returns an error if the store cannot be opened or a fatal I/O error occurs during indexing.
 pub async fn ingest(
     config: IngestConfig,
     config_path: PathBuf,
@@ -221,6 +228,31 @@ pub async fn ingest(
     )
     .context("failed to open store")?;
 
+    // Warn if the existing store was built with different chunking parameters
+    // or a different lore version.
+    if store_exists && mode != IngestMode::Recreate {
+        let current_fingerprint = chunk_config_fingerprint(&config);
+        if let Some(stored) = store.get_metadata(meta_key::CHUNK_CONFIG)
+            && stored != current_fingerprint
+        {
+            tracing::warn!(
+                stored_chunk_config = stored,
+                current_chunk_config = current_fingerprint,
+                "store was built with different chunking parameters; \
+                 run `lore ingest --recreate` to rebuild with current settings"
+            );
+        }
+        if let Some(stored_ver) = store.get_metadata(meta_key::LORE_VERSION)
+            && stored_ver != crate::VERSION
+        {
+            tracing::warn!(
+                stored_version = stored_ver,
+                current_version = crate::VERSION,
+                "store was built with a different lore version"
+            );
+        }
+    }
+
     // Acquire an exclusive lock to prevent concurrent ingests against the
     // same store. The lock is acquired after Store::open (which creates the
     // directory) and after any --recreate destroy, so the lock file lives
@@ -350,6 +382,13 @@ pub async fn ingest(
     };
     let store = &*ctx.store;
 
+    #[cfg(feature = "llm")]
+    if let Some(ref client) = ctx.llm_client {
+        for warning in client.validate() {
+            observer.on_llm_warning(&warning);
+        }
+    }
+
     let mut total_docs = 0u64;
     let mut total_chunks = 0u64;
     let mut all_failed_docs: Vec<types::FailedDoc> = Vec::new();
@@ -813,6 +852,18 @@ fn cleanup_stale_documents(
     Ok(())
 }
 
+/// Stable fingerprint of the chunking-relevant processing defaults.
+///
+/// Captures `max_chunk_chars` and `min_chunk_chars` from the global processing
+/// config. When this string changes between ingest runs a warning is emitted so
+/// users know their stored chunks may not match the current settings.
+fn chunk_config_fingerprint(config: &IngestConfig) -> String {
+    format!(
+        "max_chunk_chars={},min_chunk_chars={}",
+        config.processing.max_chunk_chars, config.processing.min_chunk_chars
+    )
+}
+
 /// Write run metadata to the store, commit, and optimize if needed.
 fn write_store_metadata(
     store: &Store,
@@ -841,6 +892,7 @@ fn write_store_metadata(
         &config.store.writer_heap_mb.to_string(),
     );
     store.set_metadata(meta_key::LANGUAGE, config.store.language.as_str());
+    store.set_metadata(meta_key::CHUNK_CONFIG, &chunk_config_fingerprint(config));
 
     if let Some(name) = &config.name {
         store.set_metadata(meta_key::NAME, name);
diff --git a/src/ingest/transforms.rs b/src/ingest/transforms.rs
index 63448f6..dc4badb 100644
--- a/src/ingest/transforms.rs
+++ b/src/ingest/transforms.rs
@@ -38,6 +38,10 @@ pub struct CompiledProfile {
 
 impl CompiledProfile {
     /// Compile a `ProcessingProfile` into an executable form.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if any regex pattern in the pipeline is invalid.
     pub fn compile(profile: &ProcessingProfile) -> Result<Self> {
         let mut pipeline = Vec::with_capacity(profile.pipeline.len());
         for (i, step) in profile.pipeline.iter().enumerate() {
diff --git a/src/ingest/watch.rs b/src/ingest/watch.rs
index 9e37780..6e4e53f 100644
--- a/src/ingest/watch.rs
+++ b/src/ingest/watch.rs
@@ -34,6 +34,10 @@ pub trait WatchObserver: Send + Sync {
 
 /// Watch local source paths and re-ingest on changes, with optional periodic
 /// polling.
+///
+/// # Errors
+///
+/// Returns an error if the filesystem watcher cannot be initialised or encounters a fatal I/O failure.
 pub async fn watch(
     config: IngestConfig,
     config_path: PathBuf,
diff --git a/src/lib.rs b/src/lib.rs
index 8d7aaf7..d7f907d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,3 +1,6 @@
+// Documentation lints are enforced on the public library surface only (the
+// workspace table leaves them allow-by-default so binaries aren't burdened).
+#![warn(clippy::missing_errors_doc, clippy::missing_panics_doc)]
 //! Knowledge base library: ingest, index, search, and serve documents over MCP.
 
 /// Infallible `write!` to a `String` -- panics on `Err` (which `String` never returns).
diff --git a/src/llm.rs b/src/llm.rs
index f2a4408..31731f6 100644
--- a/src/llm.rs
+++ b/src/llm.rs
@@ -73,6 +73,10 @@ pub struct DocumentEnrichment {
 
 impl LlmClient {
     /// Create a new client from the given LLM config, building the shared HTTP client.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the HTTP client cannot be built (e.g. invalid TLS configuration).
     pub fn new(config: &LlmConfig) -> Result<Self> {
         let http = reqwest::Client::builder()
             .timeout(std::time::Duration::from_secs(config.timeout_secs))
@@ -86,6 +90,60 @@ impl LlmClient {
         })
     }
 
+    /// Check that required credentials are available for the configured provider.
+    ///
+    /// Returns warnings for obvious misconfigurations (missing API keys).
+    /// Does not perform network calls.
+    pub fn validate(&self) -> Vec<String> {
+        Self::validate_credentials(
+            self.config.provider,
+            std::env::var("ANTHROPIC_API_KEY").is_ok(),
+            std::env::var("OPENAI_API_KEY").is_ok(),
+        )
+    }
+
+    /// Pure credential-check logic, separated from environment access so it can
+    /// be unit-tested without mutating process-global environment variables.
+    fn validate_credentials(
+        provider: Option<LlmProvider>,
+        has_anthropic: bool,
+        has_openai: bool,
+    ) -> Vec<String> {
+        let mut warnings = Vec::new();
+        match provider {
+            Some(LlmProvider::Anthropic) => {
+                if !has_anthropic {
+                    warnings.push(
+                        "ANTHROPIC_API_KEY is not set -- LLM enrichment will fail; \
+                         set the variable or remove the llm config block"
+                            .to_owned(),
+                    );
+                }
+            }
+            Some(LlmProvider::Openai) => {
+                if !has_openai {
+                    warnings.push(
+                        "OPENAI_API_KEY is not set -- LLM enrichment will fail; \
+                         set the variable or remove the llm config block"
+                            .to_owned(),
+                    );
+                }
+            }
+            Some(LlmProvider::Ollama | LlmProvider::Bedrock) => {}
+            None => {
+                if !has_anthropic && !has_openai {
+                    warnings.push(
+                        "no LLM API key found (checked ANTHROPIC_API_KEY, OPENAI_API_KEY) \
+                         -- enrichment will fall back to Ollama; set a key or configure \
+                         llm.provider explicitly"
+                            .to_owned(),
+                    );
+                }
+            }
+        }
+        warnings
+    }
+
     /// Detect a topic name for a document using LLM.
     pub async fn detect_topic(&self, content: &str) -> Option<String> {
         self.call_simple(
@@ -541,6 +599,10 @@ pub async fn enrich_document(
 }
 
 /// Enrich all chunks in a document batch via concurrent LLM calls, filtering by quality threshold.
+///
+/// # Panics
+///
+/// Panics if the enrichment semaphore is closed before all tasks complete (should never occur during normal operation).
 pub async fn enrich_chunks(
     chunks: &mut Vec<Chunk>,
     llm_client: &LlmClient,
@@ -597,3 +659,60 @@ pub async fn enrich_chunks(
         })
         .collect();
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn validate_anthropic_missing_key() {
+        let warnings = LlmClient::validate_credentials(Some(LlmProvider::Anthropic), false, false);
+        assert_eq!(warnings.len(), 1);
+        assert!(warnings[0].contains("ANTHROPIC_API_KEY"));
+    }
+
+    #[test]
+    fn validate_anthropic_key_present() {
+        let warnings = LlmClient::validate_credentials(Some(LlmProvider::Anthropic), true, false);
+        assert!(warnings.is_empty());
+    }
+
+    #[test]
+    fn validate_openai_missing_key() {
+        let warnings = LlmClient::validate_credentials(Some(LlmProvider::Openai), false, false);
+        assert_eq!(warnings.len(), 1);
+        assert!(warnings[0].contains("OPENAI_API_KEY"));
+    }
+
+    #[test]
+    fn validate_openai_key_present() {
+        let warnings = LlmClient::validate_credentials(Some(LlmProvider::Openai), false, true);
+        assert!(warnings.is_empty());
+    }
+
+    #[test]
+    fn validate_ollama_no_warning() {
+        assert!(
+            LlmClient::validate_credentials(Some(LlmProvider::Ollama), false, false).is_empty()
+        );
+    }
+
+    #[test]
+    fn validate_bedrock_no_warning() {
+        assert!(
+            LlmClient::validate_credentials(Some(LlmProvider::Bedrock), false, false).is_empty()
+        );
+    }
+
+    #[test]
+    fn validate_default_provider_warns_without_keys() {
+        let warnings = LlmClient::validate_credentials(None, false, false);
+        assert_eq!(warnings.len(), 1);
+        assert!(warnings[0].contains("no LLM API key"));
+    }
+
+    #[test]
+    fn validate_default_provider_silent_with_key() {
+        assert!(LlmClient::validate_credentials(None, true, false).is_empty());
+    }
+}
diff --git a/src/output/documents.rs b/src/output/documents.rs
index 6e5d628..708cef9 100644
--- a/src/output/documents.rs
+++ b/src/output/documents.rs
@@ -13,6 +13,10 @@ use crate::store::DocDetail;
 use crate::types::{Chunk, DocMeta};
 
 /// Format a paginated list of document metadata.
+///
+/// # Errors
+///
+/// Returns an error if JSON serialization fails (only possible in `Json` output mode).
 pub fn format_document_list(
     entries: &[DocMeta],
     total: usize,
@@ -157,6 +161,10 @@ fn format_document_list_cli(
 }
 
 /// Format a single document with its chunk content.
+///
+/// # Errors
+///
+/// Returns an error if JSON serialization fails (only possible in `Json` output mode).
 pub fn format_document(
     doc: &DocDetail,
     offset: usize,
diff --git a/src/output/info.rs b/src/output/info.rs
index 4b3ff4b..5711519 100644
--- a/src/output/info.rs
+++ b/src/output/info.rs
@@ -14,6 +14,10 @@ use crate::store::{StoreInfo, TopicStat};
 const TOPIC_DISPLAY_LIMIT: usize = 10;
 
 /// Format store statistics as CLI text, JSON, or MCP text.
+///
+/// # Errors
+///
+/// Returns an error if JSON serialization fails (only possible in `Json` output mode).
 pub fn format_store_info(
     info: &StoreInfo,
     store_entries: &[(PathBuf, u64)],
diff --git a/src/query.rs b/src/query.rs
index 1486bb3..cf2f541 100644
--- a/src/query.rs
+++ b/src/query.rs
@@ -196,6 +196,10 @@ pub fn resolve_filters(
 }
 
 /// Build a `SearchQuery` from `SearchArgs` and run it against the store set.
+///
+/// # Errors
+///
+/// Returns an error if the query is empty or the Tantivy search fails.
 pub fn execute_search(
     stores: &StoreSet,
     args: SearchArgs,
@@ -223,6 +227,10 @@ pub fn execute_search(
 }
 
 /// Full-text search across indexed chunks with optional filters.
+///
+/// # Errors
+///
+/// Returns an error if the search query fails or output formatting fails.
 pub fn search(stores: &StoreSet, args: SearchArgs, mode: OutputMode) -> Result<QueryResult> {
     let offset = args.pagination.offset;
     let (results, sq, total) = execute_search(stores, args)?;
@@ -231,6 +239,10 @@ pub fn search(stores: &StoreSet, args: SearchArgs, mode: OutputMode) -> Result<Q
 }
 
 /// List all topics in the store with document and chunk counts.
+///
+/// # Errors
+///
+/// Returns an error if the store query fails or output formatting fails.
 // `args` is taken by value because the store query consumes its fields;
 // accepting `&TopicsArgs` would require either cloning every field or
 // changing the store API to borrow, adding noise with no runtime benefit.
@@ -256,6 +268,10 @@ pub fn list_topics(stores: &StoreSet, args: TopicsArgs, mode: OutputMode) -> Res
 }
 
 /// Retrieve details for a single topic by name.
+///
+/// # Errors
+///
+/// Returns an error if the store query fails, the topic is not found, or output formatting fails.
 // Same by-value rationale as `list_topics`: fields are consumed by the query.
 #[allow(clippy::needless_pass_by_value)]
 pub fn get_topic(stores: &StoreSet, args: TopicArgs, mode: OutputMode) -> Result<String> {
@@ -276,6 +292,10 @@ pub fn get_topic(stores: &StoreSet, args: TopicArgs, mode: OutputMode) -> Result
 }
 
 /// List indexed documents with optional filtering and pagination.
+///
+/// # Errors
+///
+/// Returns an error if output formatting fails.
 pub fn list_documents(stores: &StoreSet, args: DocsArgs, mode: OutputMode) -> Result<QueryResult> {
     let limit = args.pagination.limit;
     let offset = args.pagination.offset;
@@ -298,6 +318,10 @@ pub fn list_documents(stores: &StoreSet, args: DocsArgs, mode: OutputMode) -> Re
 }
 
 /// Retrieve a single document's metadata and chunks by source path.
+///
+/// # Errors
+///
+/// Returns an error if the store query fails, the document is not found, or output formatting fails.
 #[allow(clippy::needless_pass_by_value)]
 pub fn get_document(stores: &StoreSet, args: ReadArgs, mode: OutputMode) -> Result<String> {
     let (limit, offset) = if args.full {
diff --git a/src/store/documents.rs b/src/store/documents.rs
index 02ee3b1..a7e0e46 100644
--- a/src/store/documents.rs
+++ b/src/store/documents.rs
@@ -75,13 +75,17 @@ impl Store {
         self.dirty.store(true, Ordering::Release);
     }
 
-    #[cfg(all(test, feature = "ingest"))]
+    #[cfg(feature = "ingest")]
     pub(crate) fn get_metadata(&self, key: &str) -> Option<String> {
         let meta = self.read_meta();
         meta.get(key).cloned()
     }
 
     /// Convenience wrapper: delete existing chunks, insert new ones, upsert document meta.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if deleting old chunks or inserting new chunks into the Tantivy index fails.
     pub fn replace_document(&self, source_id: &str, chunks: &[Chunk], meta: DocMeta) -> Result<()> {
         self.delete_chunks_by_source(source_id)?;
         if !chunks.is_empty() {
@@ -92,6 +96,10 @@ impl Store {
     }
 
     /// Fetch metadata and all chunks for a single document by source_id.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the Tantivy index query fails.
     pub fn get_document(&self, source_id: &str) -> Result<Option<DocDetail>> {
         let Some(meta) = self.read_docs().get(source_id).cloned() else {
             return Ok(None);
diff --git a/src/store/meta_key.rs b/src/store/meta_key.rs
index aa99cd6..8ffe877 100644
--- a/src/store/meta_key.rs
+++ b/src/store/meta_key.rs
@@ -18,3 +18,7 @@ pub(crate) const PHRASE_SEARCH: &str = "phrase_search";
 pub(crate) const WRITER_HEAP_MB: &str = "writer_heap_mb";
 /// Stemming/stop-word language used to build the tokenizer pipeline.
 pub(crate) const LANGUAGE: &str = "language";
+/// Fingerprint of the chunker-relevant processing config (max/min chunk chars).
+/// Used to warn when a store was built with different chunking parameters.
+#[cfg(feature = "ingest")]
+pub(crate) const CHUNK_CONFIG: &str = "chunk_config";
diff --git a/src/store/mod.rs b/src/store/mod.rs
index 6533ec9..4ce752c 100644
--- a/src/store/mod.rs
+++ b/src/store/mod.rs
@@ -68,6 +68,10 @@ pub struct Store {
 impl Store {
     /// Open a store for reading with default settings (phrase search on, default heap).
     /// The language is read from lore.meta if present, falling back to English.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the Tantivy index at `path` cannot be opened.
     pub fn open_readonly(path: &Path) -> Result<Self> {
         use crate::types::IndexLanguage;
         let preloaded = load_meta_map(path);
@@ -80,6 +84,10 @@ impl Store {
     }
 
     /// Open or create a store at `path`.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the Tantivy index cannot be opened or created at `path`.
     pub fn open(
         path: &Path,
         phrase_search: bool,
diff --git a/src/store/multi.rs b/src/store/multi.rs
index 771ef2b..f9a58dd 100644
--- a/src/store/multi.rs
+++ b/src/store/multi.rs
@@ -6,8 +6,8 @@ use anyhow::Result;
 use crate::store::documents::resolve_name_from_terms;
 use crate::store::types::{avg_words_per_chunk, sort_chunks_natural};
 use crate::store::{
-    DocDetail, DocFilter, SearchHit, SearchQuery, SourceResolveResult, Store, StoreInfo,
-    TopicFilter, TopicResult, TopicSort, TopicStat,
+    DocDetail, DocFilter, SearchHit, SearchQuery, SearchSort, SourceResolveResult, Store,
+    StoreInfo, TopicFilter, TopicResult, TopicSort, TopicStat,
 };
 use crate::types::{Chunk, DocKind, DocMeta, SourceType};
 use crate::util;
@@ -22,6 +22,10 @@ pub struct StoreSet {
 
 impl StoreSet {
     /// Wrap multiple stores into a federated set.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if `stores` is empty.
     pub fn new(stores: Vec<Store>) -> Result<Self> {
         anyhow::ensure!(!stores.is_empty(), "StoreSet requires at least one store");
         Ok(Self {
@@ -30,6 +34,10 @@ impl StoreSet {
     }
 
     /// Wrap a single store.
+    ///
+    /// # Panics
+    ///
+    /// Never panics; the internal `new` call with a single-element vec always succeeds.
     pub fn single(store: Store) -> Self {
         Self::new(vec![store]).expect("single-element vec is always non-empty")
     }
@@ -45,6 +53,10 @@ impl StoreSet {
     }
 
     /// Full-text search across all stores, merged by score and re-paginated.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if any store's Tantivy search fails.
     pub fn search(
         &self,
         sq: &SearchQuery,
@@ -63,11 +75,18 @@ impl StoreSet {
 
         let mut all_hits: Vec<SearchHit> = Vec::new();
         let mut total: usize = 0;
+        let mut batch_ranges: Vec<(usize, usize)> = Vec::new();
 
         for store in &self.stores {
+            let start = all_hits.len();
             let (hits, store_total) = store.search(&overfetch)?;
             total = total.saturating_add(store_total);
             all_hits.extend(hits);
+            batch_ranges.push((start, all_hits.len()));
+        }
+
+        if matches!(sq.sort, SearchSort::Score) {
+            normalize_scores(&mut all_hits, &batch_ranges);
         }
 
         sq.sort.apply(&mut all_hits, sq.reverse);
@@ -136,6 +155,10 @@ impl StoreSet {
     }
 
     /// Retrieve topic details, merging chunks from all stores that have the topic.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if any store's topic query fails.
     pub fn get_topic(&self, name: &str) -> Result<Option<TopicResult>> {
         if self.stores.len() == 1 {
             return self.stores[0].get_topic(name);
@@ -213,6 +236,10 @@ impl StoreSet {
     }
 
     /// Retrieve a document's metadata and chunks, resolving the source key first.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if any store's document query fails.
     pub fn get_document(&self, source_id: &str) -> Result<Option<DocDetail>> {
         if self.stores.len() == 1 {
             return self.stores[0].get_document(source_id);
@@ -279,6 +306,32 @@ fn merge_counts<K: Eq + std::hash::Hash>(target: &mut HashMap<K, u64>, source: H
     }
 }
 
+/// Min-max normalize BM25 scores within each store batch to [0.0, 1.0].
+///
+/// Makes scores from different-sized corpora comparable before merging.
+fn normalize_scores(hits: &mut [SearchHit], batch_ranges: &[(usize, usize)]) {
+    for &(start, end) in batch_ranges {
+        let batch = &hits[start..end];
+        if batch.is_empty() {
+            continue;
+        }
+        let min = batch
+            .iter()
+            .fold(f64::INFINITY, |acc, h| acc.min(h.score.unwrap_or(0.0)));
+        let max = batch
+            .iter()
+            .fold(f64::NEG_INFINITY, |acc, h| acc.max(h.score.unwrap_or(0.0)));
+        let range = max - min;
+        for h in &mut hits[start..end] {
+            h.score = Some(if range == 0.0 {
+                1.0
+            } else {
+                (h.score.unwrap_or(0.0) - min) / range
+            });
+        }
+    }
+}
+
 /// Accumulate a `TopicStat` into a topic map, merging counts for duplicate names.
 fn merge_topic_stat(map: &mut HashMap<String, TopicStat>, stat: TopicStat) {
     let entry = map.entry(stat.name.clone()).or_insert_with(|| TopicStat {
@@ -482,4 +535,167 @@ mod tests {
 
         drop((d1, d2));
     }
+
+    #[test]
+    fn normalize_scores_multi_store() {
+        let (set, _d1, _d2) = two_store_set("t1", "t2");
+        let sq = SearchQuery::new("content");
+        let (hits, _) = set.search(&sq).unwrap();
+        for hit in &hits {
+            let score = hit.score.expect("score should be present");
+            assert!(
+                (0.0..=1.0).contains(&score),
+                "score {score} should be in [0.0, 1.0]"
+            );
+        }
+    }
+
+    #[test]
+    fn normalize_scores_single_hit_per_store() {
+        let (s1, d1) = temp_store();
+        s1.insert_chunks(&[test_chunk("/a.md", "unique alpha", "t1", 0)])
+            .unwrap();
+        s1.upsert_document(test_meta("/a.md", "t1"));
+        s1.commit().unwrap();
+
+        let (s2, d2) = temp_store();
+        s2.insert_chunks(&[test_chunk("/b.md", "unique beta", "t2", 0)])
+            .unwrap();
+        s2.upsert_document(test_meta("/b.md", "t2"));
+        s2.commit().unwrap();
+
+        let set = StoreSet::new(vec![s1, s2]).unwrap();
+        let sq = SearchQuery::new("unique");
+        let (hits, _) = set.search(&sq).unwrap();
+        assert_eq!(hits.len(), 2);
+        for hit in &hits {
+            assert_eq!(
+                hit.score,
+                Some(1.0),
+                "single hit per store should normalize to 1.0"
+            );
+        }
+
+        drop((d1, d2));
+    }
+
+    #[test]
+    fn normalize_scores_skipped_for_source_sort() {
+        let (set, _d1, _d2) = two_store_set("t1", "t2");
+
+        // Same query under both sorts. Each store holds a single matching chunk,
+        // so normalization (when it runs) pins every score to exactly 1.0.
+        let scored = set
+            .search(&SearchQuery {
+                query: "content".into(),
+                sort: SearchSort::Score,
+                ..SearchQuery::default()
+            })
+            .unwrap()
+            .0;
+        let by_source = set
+            .search(&SearchQuery {
+                query: "content".into(),
+                sort: SearchSort::Source,
+                ..SearchQuery::default()
+            })
+            .unwrap()
+            .0;
+
+        assert_eq!(scored.len(), 2, "query should match one chunk per store");
+        assert_eq!(by_source.len(), 2, "query should match one chunk per store");
+
+        // Score sort normalizes each single-hit store batch to exactly 1.0...
+        assert!(
+            scored.iter().all(|h| h.score == Some(1.0)),
+            "score sort should normalize single-hit stores to 1.0, got {:?}",
+            scored.iter().map(|h| h.score).collect::<Vec<_>>()
+        );
+        // ...whereas source sort skips normalization and preserves the raw BM25
+        // scores, which would otherwise be forced to 1.0 if normalization ran.
+        assert!(
+            by_source.iter().all(|h| h.score != Some(1.0)),
+            "source sort must preserve raw BM25 scores, got {:?}",
+            by_source.iter().map(|h| h.score).collect::<Vec<_>>()
+        );
+    }
+
+    #[test]
+    fn score_sort_ties_break_by_source_not_store_order() {
+        // Two single-hit stores both normalize to exactly 1.0 -> a cross-store tie.
+        // Store insertion order is [zebra, apple] (deliberately NOT alphabetical),
+        // so a store-order tie-break would yield zebra first. The contract is that
+        // ties resolve by source path, independent of store declaration order.
+        let (s1, d1) = temp_store();
+        s1.insert_chunks(&[test_chunk("/zebra.md", "tie content", "t1", 0)])
+            .unwrap();
+        s1.upsert_document(test_meta("/zebra.md", "t1"));
+        s1.commit().unwrap();
+
+        let (s2, d2) = temp_store();
+        s2.insert_chunks(&[test_chunk("/apple.md", "tie content", "t2", 0)])
+            .unwrap();
+        s2.upsert_document(test_meta("/apple.md", "t2"));
+        s2.commit().unwrap();
+
+        let set = StoreSet::new(vec![s1, s2]).unwrap();
+        let (hits, _) = set.search(&SearchQuery::new("content")).unwrap();
+
+        assert_eq!(hits.len(), 2);
+        assert_eq!(
+            hits[0].score,
+            Some(1.0),
+            "both single-hit stores normalize to 1.0"
+        );
+        assert_eq!(hits[1].score, Some(1.0));
+        assert_eq!(
+            hits[0].chunk.source.as_ref(),
+            "/apple.md",
+            "tie must break by source path, not store insertion order"
+        );
+        assert_eq!(hits[1].chunk.source.as_ref(), "/zebra.md");
+
+        drop((d1, d2));
+    }
+
+    #[test]
+    fn normalize_preserves_relative_order() {
+        let (s1, d1) = temp_store();
+        s1.insert_chunks(&[
+            test_chunk("/a.md", "rust programming language guide", "t", 0),
+            test_chunk("/b.md", "rust rust rust", "t", 0),
+            test_chunk("/c.md", "brief rust", "t", 0),
+        ])
+        .unwrap();
+        s1.commit().unwrap();
+
+        let (s2, d2) = temp_store();
+        s2.insert_chunks(&[test_chunk("/d.md", "unrelated python", "t", 0)])
+            .unwrap();
+        s2.commit().unwrap();
+
+        let set = StoreSet::new(vec![s1, s2]).unwrap();
+        let sq = SearchQuery {
+            query: "rust".into(),
+            limit: 10,
+            ..SearchQuery::default()
+        };
+        let (hits, _) = set.search(&sq).unwrap();
+
+        let s1_scores: Vec<f64> = hits
+            .iter()
+            .filter(|h| h.chunk.source.starts_with('/'))
+            .filter(|h| h.chunk.source.as_ref() != "/d.md")
+            .map(|h| h.score.unwrap_or(0.0))
+            .collect();
+
+        for window in s1_scores.windows(2) {
+            assert!(
+                window[0] >= window[1],
+                "scores should be descending within a store: {s1_scores:?}"
+            );
+        }
+
+        drop((d1, d2));
+    }
 }
diff --git a/src/store/resolve.rs b/src/store/resolve.rs
index 81062b0..edd8738 100644
--- a/src/store/resolve.rs
+++ b/src/store/resolve.rs
@@ -15,6 +15,10 @@ impl Store {
     /// Iterates each segment's FAST `source` StrColumn over live docs,
     /// collecting unique ordinals per segment and computing source_id from
     /// each unique source path. Skips soft-deleted docs via `doc_ids_alive()`.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if a segment's FAST column cannot be accessed.
     pub fn index_source_keys(&self) -> Result<HashSet<SourceId>> {
         let searcher = self.reader.searcher();
         let mut keys = HashSet::<SourceId>::new();
@@ -47,6 +51,10 @@ impl Store {
     ///
     /// Runs a targeted `BooleanQuery` to find matching doc addresses, then
     /// reads `source` from the FAST StrColumn and computes `source_id`.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the Tantivy search or FAST field access fails.
     pub fn count_chunks_for_sources(&self, sources: &[&str]) -> Result<HashMap<SourceId, u64>> {
         if sources.is_empty() {
             return Ok(HashMap::new());
diff --git a/src/store/search.rs b/src/store/search.rs
index 5c3290d..286ecd6 100644
--- a/src/store/search.rs
+++ b/src/store/search.rs
@@ -23,6 +23,10 @@ impl Store {
     ///
     /// `total` is the exact Tantivy match count for the query+filters (excluding
     /// in-memory-only filters like source substring and `max_per_source`).
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the query is empty or the Tantivy index search fails.
     pub fn search(
         &self,
         query: &SearchQuery,
@@ -204,6 +208,10 @@ impl Store {
     }
 
     /// Build a BooleanQuery that matches any of the given source strings.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `sources` is empty.
     pub fn source_term_query(&self, sources: &[impl AsRef<str>]) -> BooleanQuery {
         assert!(
             !sources.is_empty(),
@@ -225,6 +233,10 @@ impl Store {
 
     /// Fetch all chunks for a set of source paths. Returns chunks sorted by
     /// source then position.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the Tantivy index search or document retrieval fails.
     pub fn chunks_for_sources(&self, sources: &[impl AsRef<str>]) -> Result<Vec<Chunk>> {
         if sources.is_empty() {
             return Ok(Vec::new());
diff --git a/src/store/test_helpers.rs b/src/store/test_helpers.rs
index 4cb9491..15872a8 100644
--- a/src/store/test_helpers.rs
+++ b/src/store/test_helpers.rs
@@ -6,11 +6,13 @@ use crate::store::SearchHit;
 use crate::types::{Chunk, DocKind, DocMeta, SourceId, SourceType};
 
 /// Open a store at an existing path (for tests that need the path to outlive the store).
+#[allow(clippy::missing_panics_doc)]
 pub fn open_test_store(path: &std::path::Path) -> store::Store {
     store::Store::open(path, true, 256, crate::types::IndexLanguage::default(), 100).unwrap()
 }
 
 /// Create a temporary store backed by a `TempDir`.
+#[allow(clippy::missing_panics_doc)]
 pub fn temp_store() -> (store::Store, tempfile::TempDir) {
     let dir = tempfile::tempdir().unwrap();
     let store = open_test_store(dir.path());
diff --git a/src/store/types.rs b/src/store/types.rs
index 5b10e43..d170aa5 100644
--- a/src/store/types.rs
+++ b/src/store/types.rs
@@ -162,16 +162,36 @@ pub enum SearchSort {
 
 impl SearchSort {
     /// Sort a slice of search hits in-place, optionally reversing.
+    ///
+    /// Every arm falls back to `tie_break` so the result is a deterministic
+    /// total order -- independent of store declaration order, merge order, and the
+    /// stability of the underlying sort algorithm.
     pub fn apply(self, hits: &mut [SearchHit], reverse: bool) {
         match self {
             SearchSort::Score => {
-                hits.sort_by(|a, b| b.score.unwrap_or(0.0).total_cmp(&a.score.unwrap_or(0.0)));
+                hits.sort_by(|a, b| {
+                    b.score
+                        .unwrap_or(0.0)
+                        .total_cmp(&a.score.unwrap_or(0.0))
+                        .then_with(|| tie_break(a, b))
+                });
             }
             SearchSort::Source => {
-                hits.sort_by(|a, b| a.chunk.source.cmp(&b.chunk.source));
+                hits.sort_by(|a, b| {
+                    a.chunk
+                        .source
+                        .cmp(&b.chunk.source)
+                        .then_with(|| tie_break(a, b))
+                });
             }
             SearchSort::Topic => {
-                hits.sort_by(|a, b| a.chunk.topic.as_deref().cmp(&b.chunk.topic.as_deref()));
+                hits.sort_by(|a, b| {
+                    a.chunk
+                        .topic
+                        .as_deref()
+                        .cmp(&b.chunk.topic.as_deref())
+                        .then_with(|| tie_break(a, b))
+                });
             }
         }
         if reverse {
@@ -180,6 +200,18 @@ impl SearchSort {
     }
 }
 
+/// Deterministic, store-order-independent tie-break for hits whose primary sort
+/// key is equal: orders by source path, then chunk index. Without it, equal-score
+/// hits -- common after per-store score normalization pins every store's top hit to
+/// 1.0 -- would fall back to incidental merge/input order, which is fragile under
+/// refactors (e.g. switching to `sort_unstable_by` or a parallel store fetch).
+fn tie_break(a: &SearchHit, b: &SearchHit) -> std::cmp::Ordering {
+    a.chunk
+        .source
+        .cmp(&b.chunk.source)
+        .then_with(|| a.chunk.chunk_index.cmp(&b.chunk.chunk_index))
+}
+
 /// Full-text search request with filters, pagination, and sort.
 #[derive(Clone)]
 pub struct SearchQuery {
diff --git a/src/store/writer.rs b/src/store/writer.rs
index b8910e1..49475bf 100644
--- a/src/store/writer.rs
+++ b/src/store/writer.rs
@@ -31,6 +31,10 @@ impl Store {
     /// Unlike `commit()`, this keeps the writer alive so that Tantivy's merge
     /// policy can compact segments in the background between commits (background
     /// compaction is managed by Tantivy, not controlled directly here).
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the Tantivy commit or reader reload fails.
     pub fn commit_index(&self) -> Result<()> {
         if !self.dirty.load(Ordering::Acquire) {
             return Ok(());
@@ -48,6 +52,10 @@ impl Store {
     /// Full commit: Tantivy index + sidecar files (`lore.meta`, `lore.docs`).
     /// Used for per-source commits and the final commit. Skips writing when
     /// no changes have been made since the last commit.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the Tantivy commit or sidecar file writes fail.
     pub fn commit(&self) -> Result<()> {
         if !self.dirty.load(Ordering::Acquire) {
             return Ok(());
@@ -58,11 +66,23 @@ impl Store {
     /// Unconditional commit: always writes sidecar files regardless of dirty
     /// flags. Used on interrupt to guarantee persistence even if a prior
     /// commit already cleared the flags.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the Tantivy commit or sidecar file writes fail.
     pub fn force_commit(&self) -> Result<()> {
         self.commit_inner(true)
     }
 
     /// Add chunks to the Tantivy index.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the index writer cannot be initialised or adding a document fails.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the writer lock is in an unexpected state (should never occur in normal use).
     pub fn insert_chunks(&self, chunks: &[Chunk]) -> Result<()> {
         let guard = self.ensure_writer()?;
         let writer = guard.as_ref().expect("writer initialized by ensure_writer");
@@ -113,6 +133,14 @@ impl Store {
     }
 
     /// Delete all chunks for a given source ID.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the index writer cannot be initialised.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the writer lock is in an unexpected state (should never occur in normal use).
     pub fn delete_chunks_by_source(&self, source_id: &str) -> Result<()> {
         let term = tantivy::Term::from_field_text(self.fields.source_id, source_id);
         let guard = self.ensure_writer()?;
@@ -146,6 +174,10 @@ impl Store {
     /// Tantivy's GC can delete the old segment files after each merge commit.
     /// Holding the lock across rounds would not provide additional safety
     /// because each round is self-contained (commit + reader reload + GC).
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if listing segments, merging, or committing fails.
     pub fn optimize(&self, on_progress: impl Fn(usize)) -> Result<()> {
         // Phase 1: smart merge -- collapse small segments while skipping the
         // dominant one to avoid a full rewrite when most data is already in a
diff --git a/src/util/fs.rs b/src/util/fs.rs
index c99da2c..c92598a 100644
--- a/src/util/fs.rs
+++ b/src/util/fs.rs
@@ -104,6 +104,10 @@ pub fn is_lexically_safe_path(path: &Path) -> bool {
 /// On Unix, if the destination file already exists its permissions are preserved on
 /// the new file after the rename. New files receive the default `NamedTempFile`
 /// permissions (typically 0o600).
+///
+/// # Errors
+///
+/// Returns an error if the parent directory cannot be created, the temp file cannot be written, or the rename fails.
 pub fn atomic_write(path: &Path, data: &[u8]) -> Result<()> {
     let parent = path.parent().context("no parent directory")?;
     std::fs::create_dir_all(parent).context("failed to create parent directory")?;
diff --git a/src/util/marker.rs b/src/util/marker.rs
index 796799f..f600afa 100644
--- a/src/util/marker.rs
+++ b/src/util/marker.rs
@@ -17,6 +17,10 @@ use crate::util::{atomic_write, blake3_hex};
 /// directory) this is acceptable: a crash here is no worse than the marker never
 /// being written.  If hard durability is required, callers must fsync the
 /// parent directory after this function returns.
+///
+/// # Errors
+///
+/// Returns an error if the marker file cannot be written.
 pub fn ensure_marker(root: &Path, file_name: &str, salt: &str) -> Result<()> {
     // `canonicalize` resolves symlinks so the hash is stable across relative-path
     // variations.  The fallback to the raw path (when the directory does not yet