diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..dcb2d27
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,21 @@
+# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
+version: 2
+enable-beta-ecosystems: true # Julia ecosystem
+updates:
+  - package-ecosystem: "github-actions"
+    directory: "/" # Location of package manifests
+    schedule:
+      interval: "weekly"
+    ignore:
+      - dependency-name: "crate-ci/typos"
+        update-types: ["version-update:semver-patch", "version-update:semver-minor"]
+  - package-ecosystem: "julia"
+    directories:
+      - "/"
+      - "/docs"
+    schedule:
+      interval: "daily"
+    groups:
+      all-julia-packages:
+        patterns:
+          - "*"
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
new file mode 100644
index 0000000..4ec058a
--- /dev/null
+++ b/.github/workflows/CI.yml
@@ -0,0 +1,38 @@
+name: CI
+on:
+  push:
+    branches:
+      - main
+    tags: ['*']
+  pull_request:
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }}
+  cancel-in-progress: ${{ github.event_name == 'pull_request' }}
+jobs:
+  test:
+    name: Julia ${{ matrix.version }} - ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        version:
+          - '1.10'
+          - '1.11'
+          - '1'
+        os:
+          - ubuntu-latest
+          - macos-latest
+          - windows-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: julia-actions/setup-julia@v2
+        with:
+          version: ${{ matrix.version }}
+      - uses: julia-actions/cache@v2
+      # PureKLU is not yet registered in General; develop it from the SciML repo so the
+      # dependency resolves on every Julia version (`[sources]` in Project.toml covers 1.11+,
+      # but this keeps 1.10 green too). Remove once PureKLU is registered.
+      - name: Develop unregistered PureKLU dependency
+        run: julia --color=yes --project=. -e 'using Pkg; Pkg.develop(url="https://github.com/SciML/PureKLU.jl")'
+      - uses: julia-actions/julia-buildpkg@v1
+      - uses: julia-actions/julia-runtest@v1
diff --git a/.github/workflows/FormatCheck.yml b/.github/workflows/FormatCheck.yml
new file mode 100644
index 0000000..d5dc283
--- /dev/null
+++ b/.github/workflows/FormatCheck.yml
@@ -0,0 +1,21 @@
+name: format-check
+
+on:
+  push:
+    branches:
+      - 'main'
+      - 'release-'
+    tags: '*'
+  pull_request:
+
+jobs:
+  runic:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+      - uses: julia-actions/setup-julia@v3
+        with:
+          version: '1'
+      - uses: fredrikekre/runic-action@v1
+        with:
+          version: '1'
diff --git a/.github/workflows/SpellCheck.yml b/.github/workflows/SpellCheck.yml
new file mode 100644
index 0000000..a8e5983
--- /dev/null
+++ b/.github/workflows/SpellCheck.yml
@@ -0,0 +1,13 @@
+name: Spell Check
+
+on: [pull_request]
+
+jobs:
+  typos-check:
+    name: Spell Check with Typos
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Actions Repository
+        uses: actions/checkout@v6
+      - name: Check spelling
+        uses: crate-ci/typos@v1.18.0
diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml
new file mode 100644
index 0000000..f49313b
--- /dev/null
+++ b/.github/workflows/TagBot.yml
@@ -0,0 +1,15 @@
+name: TagBot
+on:
+  issue_comment:
+    types:
+      - created
+  workflow_dispatch:
+jobs:
+  TagBot:
+    if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: JuliaRegistries/TagBot@v1
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          ssh: ${{ secrets.DOCUMENTER_KEY }}
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..a617856
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+*.jl.cov
+*.jl.*.cov
+*.jl.mem
+/Manifest.toml
+/test/Manifest.toml
+/docs/build/
diff --git a/.typos.toml b/.typos.toml
new file mode 100644
index 0000000..0e35281
--- /dev/null
+++ b/.typos.toml
@@ -0,0 +1,7 @@
+# Identifiers carried over verbatim from the SuiteSparse C sources during the
+# port. They are variable names, not prose, so the spell checker should leave
+# them alone.
+[default.extend-words]
+pn = "pn"        # pointer-name index (AMD)
+lik = "lik"      # L[i,k] entry (solve kernels)
+hashi = "hashi"  # hash-bucket index (AMD supervariable hashing)
diff --git a/LICENSE b/LICENSE
index 545d94e..895883a 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2026 SciML Open Source Scientific Machine Learning
+Copyright (c) 2026 Chris Rackauckas and contributors
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/Project.toml b/Project.toml
new file mode 100644
index 0000000..7fb6ee2
--- /dev/null
+++ b/Project.toml
@@ -0,0 +1,48 @@
+name = "SparseWithDenseRowColMatrices"
+uuid = "80e09e16-2b43-4bb0-96aa-b2ce656e0cca"
+authors = ["Chris Rackauckas <accounts@chrisrackauckas.com> and contributors"]
+version = "0.1.0"
+
+[deps]
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
+PureKLU = "0c0d3e7f-3a8b-4f7e-b6f1-9a4d2e7c1f01"
+SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+
+[weakdeps]
+LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
+
+[extensions]
+SparseWithDenseRowColMatricesLinearSolveExt = "LinearSolve"
+
+# PureKLU is not yet registered in General; resolve it from the SciML repo. `[sources]`
+# is honored on Julia 1.11+; on 1.10 (and for registration) PureKLU must be added/registered
+# separately — see the CI workflow and README.
+[sources]
+PureKLU = {url = "https://github.com/SciML/PureKLU.jl"}
+
+[compat]
+Aqua = "0.8"
+ForwardDiff = "0.10, 1"
+LinearAlgebra = "1"
+LinearSolve = "3"
+PrecompileTools = "1"
+PureKLU = "0.1"
+Random = "1"
+SafeTestsets = "0.1"
+SparseArrays = "1"
+Test = "1"
+julia = "1.10"
+
+[extras]
+Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
+ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
+SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Aqua", "ForwardDiff", "LinearAlgebra", "LinearSolve", "Random", "SafeTestsets", "SparseArrays", "Test"]
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..dfdca25
--- /dev/null
+++ b/README.md
@@ -0,0 +1,268 @@
+# SparseWithDenseRowColMatrices.jl
+
+[![SciML Code Style](https://img.shields.io/static/v1?label=code%20style&message=SciML&color=9558b2&labelColor=389826)](https://github.com/SciML/SciMLStyle)
+
+A fast implementation of **"almost sparse" matrices** — a sparse bulk plus a low-rank dense
+correction — in Julia. It is the sparse-matrix analogue of
+[FastAlmostBandedMatrices.jl](https://github.com/SciML/FastAlmostBandedMatrices.jl): where
+that package handles a *banded* bulk with a few dense boundary rows (as in spectral
+collocation), this one handles a *general sparse* bulk with a low-rank correction (as in
+finite-element / finite-volume boundary-value problems, DAE systems, and KKT/bordered
+systems), built for use in [BoundaryValueDiffEq.jl](https://github.com/SciML/BoundaryValueDiffEq.jl).
+
+The sparse part is factored with [PureKLU.jl](https://github.com/SciML/PureKLU.jl) — a
+pure-Julia port of SuiteSparse's KLU (BTF + AMD + Gilbert–Peierls LU) — and the low-rank
+part is handled by the Sherman–Morrison–Woodbury identity, so a solve costs **one sparse LU
+solve plus a small `r × r` dense correction**. Because PureKLU is generic over
+`Union{Real, Complex}`, the whole pipeline works for `Float64`/`ComplexF64`, `BigFloat`, and
+`ForwardDiff.Dual` — i.e. you can autodiff straight through the solve.
+
+## The structure
+
+An `SparseWithDenseRowColMatrix` represents
+
+```
+A = S + U * V
+```
+
+* `S :: SparseMatrixCSC` — the `n × n` sparse bulk,
+* `U` — `n × r` dense (or a `SelectorMatrix` `[I_r; 0]` for the boundary-row case),
+* `V` — `r × n` dense, the low-rank "fill".
+
+with `r ≪ n` the *almost-sparse rank*.
+
+**Arrowhead / bordered matrices** are the canonical case: an arrowhead matrix (a sparse —
+often diagonal — bulk plus a dense last row *and* last column) is exactly `S + U*V` with
+`r = 2` (the border is rank-2), and a block-bordered system (sparse interior with a dense
+border of width `k`, as in KKT / saddle-point / domain-decomposition problems) is `r = 2k`
+over a block-diagonal bulk. Solving them this way avoids the fill-in a naive dense or
+unordered-sparse LU suffers on the dense border.
+
+## High-level example
+
+```julia
+using SparseWithDenseRowColMatrices, SparseArrays, LinearAlgebra
+
+n, r = 1000, 4
+
+# A diagonally-dominant sparse bulk
+S = sparse(4.0I, n, n)
+for i in 1:n-1; S[i+1, i] = -1.0; S[i, i+1] = -1.0; end
+
+# A general low-rank correction A = S + U*V …
+U = randn(n, r); V = randn(r, n)
+A = SparseWithDenseRowColMatrix(S, U, V)
+
+b = randn(n)
+x = A \ b                       # factor once, solve — Woodbury over a PureKLU LU of S
+F = factorize(A)                # keep the factorization for repeated solves
+x = F \ b
+
+# … or the boundary-condition form: `fill` is r dense rows placed at the top
+fill = randn(r, n)
+A2 = SparseWithDenseRowColMatrix(S, fill)               # adds `fill` to the top r rows
+A3 = SparseWithDenseRowColMatrix(S, fill; replace=true) # top r rows *become* `fill`
+```
+
+### Newton / time-stepping: refactor in place
+
+When the sparsity pattern is fixed and only the numeric values change (a Newton sweep on a
+BVP, say), reuse the symbolic analysis and preallocated workspace:
+
+```julia
+F = factorize(A)
+for step in 1:maxiters
+    newvals = jacobian_values!(...)            # same pattern as S, new nzval
+    refactor!(F, newvals)                      # reuses BTF/AMD analysis; no re-analysis
+    Δ = F \ residual
+    ...
+end
+```
+
+The `refactor!` + `\` loop is allocation-free apart from the tiny `O(r)` dense pivot vector.
+
+## Accessors (FastAlmostBandedMatrices parity)
+
+| `AlmostBandedMatrix` | `SparseWithDenseRowColMatrix`           |
+|----------------------|--------------------------------|
+| `bandpart`           | `sparsepart`                   |
+| `fillpart`           | `fillpart` (the `r × n` `V`)   |
+| `almostbandedrank`   | `denserank`             |
+| `exclusive_bandpart` | `exclusive_sparsepart`         |
+| —                    | `lowrankfactors` → `(U, V)`    |
+
+## Factorization strategy
+
+`factorize(A; strategy=:auto, refine=1, auto_fallback=true)`:
+
+* **`:woodbury`** (the fast path) — one PureKLU LU of `S`, precomputed `Z = S⁻¹U`, and a
+  dense `r × r` correction `C = I + V Z`. Requires `S` and `C` nonsingular. The Woodbury
+  forward error scales with `κ(S)·κ(C)`, so `refine` steps of (allocation-free) iterative
+  refinement are applied (default `1`).
+* **`:augmented`** (the robust path) — one sparse LU of the bordered system
+  `[S U; V -I]` of size `n + r`. Requires only `A` (not `S`) to be nonsingular, so it is the
+  correctness path when `S` is singular — common in BVPs where the interior stencil alone
+  does not pin a boundary degree of freedom.
+* **`:auto`** (default) — Woodbury, automatically falling back to the augmented system if
+  `S` is singular or `C` is ill-conditioned.
+
+## Benchmarks
+
+A realistic BVP/PDE-style system: a banded sparse interior (`bw = 2`) plus `r = 4` dense
+boundary rows. Factoring `S` with PureKLU and applying the rank-`r` Woodbury correction
+avoids both the `O(n³)` dense cost and the fill-in that the dense rows inflict on a
+general sparse `LU`. The cached solve and the symbolic-reuse `refactor!` (the Newton path)
+are sub-millisecond. (`SWDRC` below = this package.)
+
+```
+n = 5000, r = 4   (nnz(S) = 24994,  solve rel err vs dense ≈ 1e-12)
+  SWDRC         factorize + solve :   4.85 ms
+  dense  lu + solve               : 807.67 ms     (166× slower)
+  SuiteSparse lu on sparse(A)      :  24.71 ms     (5× slower; dense rows fill in)
+  SWDRC         ldiv!  (cached)    :   0.27 ms
+  SWDRC         refactor! (Newton) :   0.68 ms     (7× faster than re-factorizing)
+
+n = 10000, r = 4
+  SWDRC         factorize + solve :   9.45 ms
+  dense  lu + solve               : 3160.7 ms      (334× slower)
+  SuiteSparse lu on sparse(A)      :  79.69 ms      (8× slower)
+  SWDRC         refactor! (Newton) :   1.36 ms
+```
+
+(`julia -O2`, single thread; `@elapsed` best-of-30. The advantage holds whenever `S` has
+good sparse structure — banded, local, or low-fill; for a globally-scattered `S` with no
+good ordering, any sparse `LU` degrades and dense `lu` may win.)
+
+## What is fast
+
+* [x] Matrix–vector / matrix–matrix multiply (`mul!` vector path is allocation-free)
+* [x] Factorization and linear solve (`factorize`/`lu`, `\`, `ldiv!`)
+* [x] In-place refactorization with a fixed sparsity pattern (`refactor!`)
+* [x] Adjoint / transpose solves
+* [x] Generic element types (`BigFloat`, `ForwardDiff.Dual`, complex)
+
+`qr` is intentionally not provided — a general sparse `QR` causes far more fill than the
+LU-based KLU path; use `factorize`/`lu`.
+
+## When is this the right tool?
+
+This package only beats plain sparse LU when the matrix is genuinely **sparse plus a few
+dense rows/columns** — i.e. there exist `r ≪ n` rows/cols far denser than the rest, so
+peeling them into `U·V` leaves a sparse, low-fill bulk `S`. On a *uniformly* sparse matrix
+the low-rank part is empty (`r = 0`) and the method degenerates to plain `PureKLU.klu` with
+no benefit. Use [`recommend_lowrank_peel`](@ref) — a cheap, pattern-only (`O(nnz)`, no
+factorization) check — to decide:
+
+```julia
+rec = recommend_lowrank_peel(A)        # A::SparseMatrixCSC → a PeelRecommendation struct
+rec.recommended   # false → just use plain sparse LU (PureKLU.klu) with symbolic reuse
+                  # true  → wrap the rec.rank dense rows/cols as U·V and use this package
+rec               # `show` renders the human-readable reason (built lazily, not on construction)
+```
+
+`recommend_lowrank_peel` returns a descriptive [`PeelRecommendation`](@ref) struct — only
+`Bool`/`Int`/`Float64`/`Symbol` fields, so it costs nothing to construct; the explanation
+string is formatted only when you display it.
+
+### Cache the symbolic factorization, reuse it for many solves
+
+For a fixed sparsity pattern with changing values (Newton / time-stepping), the win is
+reusing the symbolic analysis (BTF + AMD ordering) instead of re-analyzing every step. The
+factorization object caches it; `lu`/`factorize` analyzes once, `ldiv!`/`\` solves any
+number of right-hand sides, and `lu!`/`refactor!` updates the values **reusing the cached
+symbolic analysis** (no re-analysis):
+
+```julia
+F = lu(A)                     # analyze (cache symbolic) + numeric factor
+x1 = F \ b1; x2 = F \ b2      # reuse for multiple solves
+for step in 1:maxiters
+    lu!(F, A_new)             # new values, same pattern → reuses cached symbolic (≈7× faster than re-`lu`)
+    Δ = F \ residual
+end
+```
+
+For a plain-sparse matrix (the `recommended == false` case) the same idea is `PureKLU.klu`
+once + `klu!`/`solve!` per step.
+
+### Fixed sparse base, changing low-rank correction — the Woodbury win
+
+The structure pays off most in the classic Sherman–Morrison–Woodbury setting: a **fixed**
+sparse base `S`, factored once, solved repeatedly under a **changing low-rank correction**
+`U·V` — varying boundary conditions / coupling, adding or removing a constraint, sensitivity
+or parameter sweeps, KKT / saddle-point systems with changing borders. Here the dense rows
+or columns are exactly what make a direct sparse `LU` of the assembled `S + U·V` fill in
+catastrophically; Woodbury sidesteps them by keeping only `S` factored.
+
+[`update_lowrank!`](@ref) updates the low-rank factors while **reusing `S`'s cached
+factorization** (no re-factorization of `S`, and `Z = S⁻¹U` is recomputed only if `U`
+changes):
+
+```julia
+F = factorize(A)                       # factor S once
+for step in 1:maxiters
+    update_lowrank!(F; V = V_new)      # fixed S, new dense rows/coupling → reuse S's factorization
+    x = F \ b
+end
+```
+
+Measured (`n = 20000`, `r = 8` dense rows, 25 solves, `S` fixed):
+
+```
+  Woodbury reuse via update_lowrank!  :   ~2.7 ms / solve
+  refactor! (re-factors S each step)  :   ~7.3 ms / solve   (wastes the S re-factorization)
+  re-KLU the assembled S + U·V         : ~1740   ms / solve   (~650× slower — dense rows fill)
+```
+
+Use `update_lowrank!` when only the low-rank part changes, `refactor!` when `S`'s values
+change (Newton), and `factorize` to (re)analyze a new pattern.
+
+## LinearSolve.jl integration
+
+Loading [LinearSolve.jl](https://github.com/SciML/LinearSolve.jl) activates a package
+extension that plugs `SparseWithDenseRowColMatrix` into LinearSolve's caching interface
+exactly like its `KLUFactorization`: the symbolic analysis is cached in the `LinearCache`,
+repeated `solve!`s reuse the factorization, and assigning the matrix new values of the
+**same sparsity pattern** triggers a numeric refactorization that reuses the cached symbolic
+analysis. It is also the default algorithm for this matrix type.
+
+```julia
+using SparseWithDenseRowColMatrices, LinearSolve
+
+prob  = LinearProblem(A, b)          # A::SparseWithDenseRowColMatrix
+cache = init(prob)                   # default alg = SparseWithDenseRowColFactorization(); analyzes once
+sol1  = solve!(cache)
+
+cache.A = A_new                      # same pattern, new values → reuses cached symbolic (refactor!)
+sol2  = solve!(cache)
+cache.b = b_new                      # new RHS only → reuses the cached numeric factorization
+sol3  = solve!(cache)
+```
+
+Pass `SparseWithDenseRowColFactorization(; reuse_symbolic, check_pattern, strategy, refine)`
+explicitly to `init`/`solve` to control the behavior (mirrors `KLUFactorization(; …)`). If
+`S` is singular it routes through the augmented fallback transparently, and `reuse_symbolic`
+/ `check_pattern` behave as for KLU. A singular `A` returns `ReturnCode.Infeasible` (it does
+not throw), again matching KLU. One caveat: once a singular `S` forces the augmented fallback,
+the cache keeps reusing it even if later `S` values are nonsingular — `init` a fresh cache to
+get back the faster Woodbury path.
+
+## Public API
+
+```
+SparseWithDenseRowColMatrix    SelectorMatrix
+sparsepart  fillpart  lowrankfactors  exclusive_sparsepart  denserank
+factorize   lu   \\   ldiv!   refactor!   lu!   update_lowrank!
+SparseWithDenseRowColWoodbury  SparseWithDenseRowColAugmented
+recommend_lowrank_peel   PeelRecommendation
+SparseWithDenseRowColFactorization   # LinearSolve.jl algorithm (extension)
+```
+
+## Installation
+
+PureKLU.jl is not yet in the General registry; install it from the SciML repository:
+
+```julia
+using Pkg
+Pkg.add(url="https://github.com/SciML/PureKLU.jl")
+Pkg.add(url="https://github.com/SciML/SparseWithDenseRowColMatrices.jl")
+```
diff --git a/ext/SparseWithDenseRowColMatricesLinearSolveExt.jl b/ext/SparseWithDenseRowColMatricesLinearSolveExt.jl
new file mode 100644
index 0000000..880d332
--- /dev/null
+++ b/ext/SparseWithDenseRowColMatricesLinearSolveExt.jl
@@ -0,0 +1,108 @@
+module SparseWithDenseRowColMatricesLinearSolveExt
+
+using SparseWithDenseRowColMatrices
+using SparseWithDenseRowColMatrices: SparseWithDenseRowColMatrix, SparseWithDenseRowColWoodbury,
+    SparseWithDenseRowColAugmented, denserank, refactor!, SparseWithDenseRowColFactorization
+using LinearSolve
+using LinearSolve: LinearCache, OperatorAssumptions, LinearVerbosity, AbstractSparseFactorization
+using LinearAlgebra: LinearAlgebra, ldiv!, issuccess, Factorization, SingularException, factorize
+
+# LinearSolve `@reexport using SciMLBase`, so SciMLBase is reachable through it.
+const SciMLBase = LinearSolve.SciMLBase
+
+# ------------------
+# Algorithm
+# ------------------
+# Concrete algorithm type lives in the extension; the user-facing constructor is the
+# `SparseWithDenseRowColFactorization` function exported by the package (a stub there, with
+# its method defined here so it errors helpfully when LinearSolve is not loaded).
+struct SWDRCFactorizationAlg <: AbstractSparseFactorization
+    reuse_symbolic::Bool
+    check_pattern::Bool
+    strategy::Symbol
+    refine::Int
+end
+
+function SparseWithDenseRowColMatrices.SparseWithDenseRowColFactorization(;
+        reuse_symbolic::Bool = true, check_pattern::Bool = true,
+        strategy::Symbol = :auto, refine::Integer = 1
+    )
+    return SWDRCFactorizationAlg(reuse_symbolic, check_pattern, strategy, Int(refine))
+end
+
+# Default algorithm LinearSolve picks for our matrix type, so `solve(LinearProblem(A, b))`
+# uses the caching path automatically (analogous to KLU being the default for sparse Float64).
+LinearSolve.defaultalg(::SparseWithDenseRowColMatrix, b, ::OperatorAssumptions{Bool}) =
+    SWDRCFactorizationAlg(true, true, :auto, 1)
+
+# ------------------
+# Cache
+# ------------------
+# Type-stable cacheval wrapper. The inner factorization can be a Woodbury *or* an Augmented
+# one — and can even switch between them across refactorizations if the values make `S`
+# (non)singular — so we box it behind a fixed wrapper type rather than letting the
+# `LinearCache` field type change underneath us.
+mutable struct SWDRCCacheval{T}
+    fact::Union{Nothing, Factorization{T}}
+end
+
+function LinearSolve.init_cacheval(
+        ::SWDRCFactorizationAlg, A::SparseWithDenseRowColMatrix{T}, b, u, Pl, Pr,
+        maxiters::Int, abstol, reltol,
+        verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions
+    ) where {T}
+    return SWDRCCacheval{T}(nothing)
+end
+
+# Refresh the cached factorization for the current `A`, reusing the cached symbolic analysis
+# when possible. `refactor!` reuses PureKLU's symbolic analysis (no re-analysis); it throws if
+# the pattern/rank changed or if the (Woodbury) `S` became singular — in those cases we fall
+# back to a full `factorize` (which can also switch Woodbury ↔ augmented).
+#
+# A genuinely singular `A` makes `factorize` throw `SingularException`. LinearSolve's contract
+# (cf. its KLU path, which uses `check=false`) is to return `ReturnCode.Infeasible` rather than
+# throw, so we catch it and store `nothing`; `solve!`'s `issuccess`/`!== nothing` guard then
+# maps that to `Infeasible`. (Direct `factorize`/`\` on the matrix still throws, as is idiomatic.)
+#
+# NOTE (sticky fallback): once a singular `S` forces the augmented path, a later nonsingular `S`
+# keeps reusing the augmented factorization (its `refactor!` succeeds for any nonsingular `A`),
+# so the cache does not switch back to the faster Woodbury path on its own. Re-`init` the cache
+# to recover the Woodbury path if `S` becomes reliably nonsingular again.
+function _refresh!(wrap::SWDRCCacheval, A::SparseWithDenseRowColMatrix, alg::SWDRCFactorizationAlg)
+    f = wrap.fact
+    if alg.reuse_symbolic && f isa Factorization &&
+            size(f) == size(A) && denserank(f) == denserank(A)
+        try
+            refactor!(f, A; check = alg.check_pattern)   # reuse cached symbolic analysis
+            return wrap
+        catch e
+            (e isa SingularException || e isa ArgumentError || e isa DimensionMismatch) || rethrow(e)
+        end
+    end
+    try
+        wrap.fact = factorize(A; strategy = alg.strategy, refine = alg.refine)
+    catch e
+        e isa SingularException || rethrow(e)
+        wrap.fact = nothing                              # singular A → solve! returns Infeasible
+    end
+    return wrap
+end
+
+function SciMLBase.solve!(cache::LinearCache, alg::SWDRCFactorizationAlg; kwargs...)
+    if cache.isfresh
+        wrap = LinearSolve.@get_cacheval(cache, :SWDRCFactorizationAlg)
+        _refresh!(wrap, cache.A, alg)
+        cache.isfresh = false
+    end
+    F = LinearSolve.@get_cacheval(cache, :SWDRCFactorizationAlg).fact
+    return if F !== nothing && issuccess(F)
+        y = ldiv!(cache.u, F, cache.b)
+        SciMLBase.build_linear_solution(alg, y, nothing, cache; retcode = SciMLBase.ReturnCode.Success)
+    else
+        SciMLBase.build_linear_solution(
+            alg, cache.u, nothing, cache; retcode = SciMLBase.ReturnCode.Infeasible
+        )
+    end
+end
+
+end # module
diff --git a/src/SelectorMatrix.jl b/src/SelectorMatrix.jl
new file mode 100644
index 0000000..d8d0b77
--- /dev/null
+++ b/src/SelectorMatrix.jl
@@ -0,0 +1,56 @@
+# ------------------
+# SelectorMatrix
+# ------------------
+
+"""
+    SelectorMatrix{T}(n, r)
+
+The `n × r` matrix `[I_r; 0]` — i.e. its first `r` rows are the `r × r` identity and the
+remaining `n - r` rows are zero. Used as the left low-rank factor `U` for the common
+"dense rows live at the top" case of an [`SparseWithDenseRowColMatrix`](@ref): then `U * V` places
+the `r × n` dense `fill` matrix `V` into the top `r` rows.
+
+Stored implicitly (two `Int`s), so the boundary-condition case never materializes an
+`n × r` dense `U` and the matrix–vector / factorization paths stay allocation-light.
+"""
+struct SelectorMatrix{T} <: AbstractMatrix{T}
+    n::Int
+    r::Int
+    function SelectorMatrix{T}(n::Integer, r::Integer) where {T}
+        r ≤ n || throw(ArgumentError("SelectorMatrix needs r ≤ n, got r=$r, n=$n"))
+        return new{T}(Int(n), Int(r))
+    end
+end
+SelectorMatrix(n::Integer, r::Integer) = SelectorMatrix{Bool}(n, r)
+
+Base.size(M::SelectorMatrix) = (M.n, M.r)
+
+Base.@propagate_inbounds function Base.getindex(M::SelectorMatrix{T}, i::Int, j::Int) where {T}
+    @boundscheck checkbounds(M, i, j)
+    return (i == j && j ≤ M.r) ? one(T) : zero(T)
+end
+
+Base.IndexStyle(::Type{<:SelectorMatrix}) = IndexCartesian()
+
+# A selector of one eltype is trivially convertible to another; used during promotion.
+Base.convert(::Type{SelectorMatrix{T}}, M::SelectorMatrix) where {T} = SelectorMatrix{T}(M.n, M.r)
+SelectorMatrix{T}(M::SelectorMatrix) where {T} = SelectorMatrix{T}(M.n, M.r)
+
+"""
+    materialize_U!(Z, U)
+
+Copy the left low-rank factor `U` into the preallocated dense buffer `Z` (`n × r`). This is
+the one place `U` is realized densely; specialized for [`SelectorMatrix`](@ref) so the
+common case writes a sparse identity pattern instead of going through generic `getindex`.
+"""
+function materialize_U!(Z::AbstractMatrix, U::AbstractMatrix)
+    copyto!(Z, U)
+    return Z
+end
+function materialize_U!(Z::AbstractMatrix{T}, U::SelectorMatrix) where {T}
+    fill!(Z, zero(T))
+    @inbounds for i in 1:U.r
+        Z[i, i] = one(T)
+    end
+    return Z
+end
diff --git a/src/SparseWithDenseRowColMatrices.jl b/src/SparseWithDenseRowColMatrices.jl
new file mode 100644
index 0000000..5119ff8
--- /dev/null
+++ b/src/SparseWithDenseRowColMatrices.jl
@@ -0,0 +1,49 @@
+module SparseWithDenseRowColMatrices
+
+using LinearAlgebra
+using LinearAlgebra: Factorization, SingularException, ldiv!, lu, lu!, mul!, opnorm,
+    issuccess, factorize
+using SparseArrays
+using SparseArrays: AbstractSparseMatrixCSC, SparseMatrixCSC, nonzeros, rowvals, getcolptr,
+    nnz, sparse
+using PureKLU
+
+import PrecompileTools: @setup_workload, @compile_workload
+
+include("SelectorMatrix.jl")
+include("matrix.jl")
+include("matvec.jl")
+include("woodbury.jl")
+include("augmented.jl")
+include("factorize.jl")
+include("detect.jl")
+
+export SparseWithDenseRowColMatrix, SelectorMatrix,
+    sparsepart, fillpart, lowrankfactors, exclusive_sparsepart, denserank,
+    SparseWithDenseRowColWoodbury, SparseWithDenseRowColAugmented, refactor!, update_lowrank!,
+    recommend_lowrank_peel, PeelRecommendation, SparseWithDenseRowColFactorization
+
+# ---------------------
+# Precompilation
+# ---------------------
+
+@setup_workload begin
+    n, r = 12, 2
+    for T in (Float64, ComplexF64)
+        S = sparse(T(2) * I, n, n)
+        @inbounds for i in 1:(n - 1)
+            S[i + 1, i] = -one(T)
+        end
+        fill = ones(T, r, n)
+        @compile_workload begin
+            A = SparseWithDenseRowColMatrix(S, fill)
+            b = ones(T, n)
+            A * b
+            F = factorize(A)
+            F \ b
+            refactor!(F, nonzeros(sparsepart(A)))
+        end
+    end
+end
+
+end # module
diff --git a/src/augmented.jl b/src/augmented.jl
new file mode 100644
index 0000000..6b10254
--- /dev/null
+++ b/src/augmented.jl
@@ -0,0 +1,157 @@
+# ------------------
+# Augmented factorization (fallback path)
+# ------------------
+#
+# Solve A x = b with A = S + U V via the equivalent bordered system
+#
+#     [ S   U ] [x]   [b]
+#     [ V  -I ] [y] = [0]
+#
+# factored as ONE sparse LU of size (n+r). This requires only `A` (not `S`) to be
+# nonsingular, so it is the correctness path when `S` is singular or the Woodbury
+# correction `C` is dangerously ill-conditioned. The `r` dense rows/columns of `U`/`V` add
+# fill comparable to FABM's QR cost — fine for small `r`, the regime where the low-rank
+# structure is worth exploiting at all.
+
+"""
+    SparseWithDenseRowColAugmented{T} <: LinearAlgebra.Factorization{T}
+
+Fallback factorization of an [`SparseWithDenseRowColMatrix`](@ref) `A = S + U V` via a single sparse
+LU of the bordered system `[S U; V -I]` of size `n + r`. Used automatically by
+[`factorize`](@ref) when `S` is singular or the Woodbury correction is ill-conditioned, and
+selectable explicitly with `strategy = :augmented`. Requires only `A` (not `S`) to be
+nonsingular.
+"""
+mutable struct SparseWithDenseRowColAugmented{T, KF} <: LinearAlgebra.Factorization{T}
+    Kaug::KF             # PureKLU.KLUFactorization of [S U; V -I]
+    n::Int
+    r::Int
+    rhs::Vector{T}       # length n+r work buffer ([b; 0] / solution)
+end
+
+Base.size(F::SparseWithDenseRowColAugmented) = (F.n, F.n)
+Base.size(F::SparseWithDenseRowColAugmented, i::Integer) = i ≤ 2 ? F.n : 1
+LinearAlgebra.issuccess(F::SparseWithDenseRowColAugmented) = LinearAlgebra.issuccess(F.Kaug)
+denserank(F::SparseWithDenseRowColAugmented) = F.r
+
+_sparse_block(U::AbstractMatrix{T}) where {T} = sparse(U)
+function _sparse_block(U::SelectorMatrix{T}) where {T}
+    r = U.r
+    return SparseMatrixCSC(U.n, r, collect(1:(r + 1)), collect(1:r), ones(T, r))
+end
+
+function _augmented_matrix(A::SparseWithDenseRowColMatrix{T}) where {T}
+    n = size(A, 1)
+    r = denserank(A)
+    S = _own_sparse(T, A.S)
+    if r == 0
+        return S
+    end
+    Usp = _sparse_block(A.U)               # n×r
+    Vsp = sparse(Matrix{T}(A.V))           # r×n
+    negI = SparseMatrixCSC{T, Int}(-one(T) * I, r, r)
+    return [S Usp; Vsp negI]
+end
+
+function _augmented(A::SparseWithDenseRowColMatrix{T}) where {T}
+    n = size(A, 1)
+    r = denserank(A)
+    Maug = _augmented_matrix(A)
+    Kaug = PureKLU.klu(Maug)               # throws SingularException if A is singular
+    return SparseWithDenseRowColAugmented{T, typeof(Kaug)}(Kaug, n, r, Vector{T}(undef, n + r))
+end
+
+function LinearAlgebra.ldiv!(F::SparseWithDenseRowColAugmented{T}, b::AbstractVector) where {T}
+    length(b) == F.n || throw(DimensionMismatch())
+    rhs = F.rhs
+    @inbounds copyto!(view(rhs, 1:F.n), b)
+    @inbounds for i in (F.n + 1):(F.n + F.r)
+        rhs[i] = zero(T)
+    end
+    PureKLU.solve!(F.Kaug, rhs)
+    @inbounds copyto!(b, view(rhs, 1:F.n))
+    return b
+end
+
+function LinearAlgebra.ldiv!(F::SparseWithDenseRowColAugmented, B::AbstractMatrix)
+    size(B, 1) == F.n || throw(DimensionMismatch())
+    for c in axes(B, 2)
+        ldiv!(F, view(B, :, c))
+    end
+    return B
+end
+
+LinearAlgebra.ldiv!(y::AbstractVector, F::SparseWithDenseRowColAugmented, b::AbstractVector) =
+    ldiv!(F, copyto!(y, b))
+LinearAlgebra.ldiv!(Y::AbstractMatrix, F::SparseWithDenseRowColAugmented, B::AbstractMatrix) =
+    ldiv!(F, copyto!(Y, B))
+
+# Adjoint / transpose solve. The transposed bordered system Mᵀ = [Sᵀ Vᵀ; Uᵀ -I] has Schur
+# complement Sᵀ + Vᵀ Uᵀ = (S + U V)ᵀ = Aᵀ (and likewise Mᴴ ↔ Aᴴ), so solving Mᵀ[x;y]=[b;0]
+# (resp. Mᴴ) with the SAME factorization's transpose/adjoint solve yields x = A⁻ᵀ b (resp. A⁻ᴴ b).
+for (Wrap, kluop) in ((AdjointFact, :(F.Kaug')), (TransposeFact, :(transpose(F.Kaug))))
+    @eval function LinearAlgebra.ldiv!(
+            Fw::$Wrap{<:Any, <:SparseWithDenseRowColAugmented{T}}, b::AbstractVector
+        ) where {T}
+        F = parent(Fw)
+        length(b) == F.n || throw(DimensionMismatch())
+        rhs = F.rhs
+        @inbounds copyto!(view(rhs, 1:F.n), b)
+        @inbounds for i in (F.n + 1):(F.n + F.r)
+            rhs[i] = zero(T)
+        end
+        PureKLU.solve!($kluop, rhs)
+        @inbounds copyto!(b, view(rhs, 1:F.n))
+        return b
+    end
+    @eval function LinearAlgebra.ldiv!(Fw::$Wrap{<:Any, <:SparseWithDenseRowColAugmented}, B::AbstractMatrix)
+        for c in axes(B, 2)
+            ldiv!(Fw, view(B, :, c))
+        end
+        return B
+    end
+end
+
+# Complex RHS over a real augmented factorization (real/imag split; see woodbury.jl).
+LinearAlgebra.ldiv!(F::SparseWithDenseRowColAugmented{<:Real}, b::AbstractVector{<:Complex}) =
+    _ldiv_realfact_complex!(F, b)
+LinearAlgebra.ldiv!(Fw::AdjointFact{<:Any, <:SparseWithDenseRowColAugmented{<:Real}}, b::AbstractVector{<:Complex}) =
+    _ldiv_realfact_complex!(Fw, b)
+LinearAlgebra.ldiv!(Fw::TransposeFact{<:Any, <:SparseWithDenseRowColAugmented{<:Real}}, b::AbstractVector{<:Complex}) =
+    _ldiv_realfact_complex!(Fw, b)
+
+"""
+    refactor!(F::SparseWithDenseRowColAugmented, A::SparseWithDenseRowColMatrix; check=true) -> F
+
+Rebuild the augmented system with new numeric values and refactor. When the augmented
+sparsity pattern is unchanged this reuses PureKLU's symbolic analysis (`klu!`); otherwise it
+re-analyzes. The augmented path is correctness-first, not the optimized refactor path.
+"""
+function refactor!(F::SparseWithDenseRowColAugmented{T}, A::SparseWithDenseRowColMatrix; check::Bool = true) where {T}
+    denserank(A) == F.r && size(A, 1) == F.n ||
+        throw(DimensionMismatch("shape changed; rebuild with `factorize`."))
+    Maug = _augmented_matrix(A)
+    if length(SparseArrays.nonzeros(Maug)) == length(F.Kaug.nzval) &&
+            (
+            !check || (
+                SparseArrays.getcolptr(Maug) == increment(F.Kaug.colptr) &&
+                    SparseArrays.rowvals(Maug) == increment(F.Kaug.rowval)
+            )
+        )
+        PureKLU.klu!(F.Kaug, SparseArrays.nonzeros(Maug))
+    else
+        F.Kaug = PureKLU.klu(Maug)
+    end
+    return F
+end
+
+# 1-based copy of PureKLU's internal 0-based index arrays, for the pattern check above.
+increment(v::AbstractVector{<:Integer}) = v .+ one(eltype(v))
+
+# The augmented fallback embeds U/V inside one sparse LU and keeps no separate factorization
+# of S to reuse, so the Woodbury low-rank fast path does not apply here.
+function update_lowrank!(::SparseWithDenseRowColAugmented; kwargs...)
+    throw(ArgumentError("update_lowrank! is the Woodbury fast path (it reuses S's factorization); \
+        the augmented fallback (singular S) has no separate S factorization to reuse — \
+        update with `refactor!` or rebuild with `factorize`."))
+end
diff --git a/src/detect.jl b/src/detect.jl
new file mode 100644
index 0000000..9fe6abd
--- /dev/null
+++ b/src/detect.jl
@@ -0,0 +1,170 @@
+# ------------------
+# Appropriateness detector
+# ------------------
+
+"""
+    PeelRecommendation
+
+The result of [`recommend_lowrank_peel`](@ref): whether wrapping a sparse matrix as a
+[`SparseWithDenseRowColMatrix`](@ref) (and using the Woodbury path) is worthwhile, together
+with the cheap pattern diagnostics that drove the decision. It carries **no eagerly-built
+message string** — construction is allocation-free (only `Bool`/`Int`/`Float64`/`Symbol`
+fields); the human-readable explanation is produced lazily by `show`.
+
+# Fields
+* `recommended :: Bool`   — use a `SparseWithDenseRowColMatrix` (`true`) or plain sparse LU (`false`)
+* `rank :: Int`           — proposed low-rank correction size `r` (dense rows + dense cols)
+* `code :: Symbol`        — reason category, one of `:recommended`, `:uniformly_sparse`,
+  `:rank_too_large`, `:negligible_fill`, `:too_small`, `:not_square`, `:empty`
+* `n :: Int`, `nnz :: Int`
+* `dense_rows :: Int`, `dense_cols :: Int`
+* `max_row_nnz :: Int`, `max_col_nnz :: Int`
+* `row_threshold :: Float64`, `col_threshold :: Float64`
+* `peeled_fraction :: Float64`   — share of the nonzeros held by the dense rows/cols
+
+`Bool(rec)` and `rec.recommended` both give the verdict; `rec.rank` the proposed `r`.
+"""
+struct PeelRecommendation
+    recommended::Bool
+    rank::Int
+    code::Symbol
+    n::Int
+    nnz::Int
+    dense_rows::Int
+    dense_cols::Int
+    max_row_nnz::Int
+    max_col_nnz::Int
+    row_threshold::Float64
+    col_threshold::Float64
+    peeled_fraction::Float64
+end
+
+Base.Bool(rec::PeelRecommendation) = rec.recommended
+
+# Lazily render the explanation from the stored diagnostics (no string built until shown).
+function _peel_reason(rec::PeelRecommendation)
+    c = rec.code
+    if c === :not_square
+        return "matrix is not square ($(rec.n) rows); the peel method targets square systems"
+    elseif c === :too_small
+        return "n=$(rec.n) is too small for a low-rank peel to matter; use plain sparse LU"
+    elseif c === :empty
+        return "matrix is structurally empty; use plain sparse LU"
+    elseif c === :uniformly_sparse
+        return "uniformly sparse: max $(rec.max_row_nnz) nnz/row, max $(rec.max_col_nnz) nnz/col " *
+            "(thresholds $(round(rec.row_threshold; digits = 1)) / $(round(rec.col_threshold; digits = 1))); " *
+            "no dense rows/cols — use plain sparse LU"
+    elseif c === :rank_too_large
+        return "found r=$(rec.rank) dense rows/cols ($(rec.dense_rows) rows + $(rec.dense_cols) cols); " *
+            "the low-rank correction is too large relative to n=$(rec.n) — use plain sparse LU"
+    elseif c === :negligible_fill
+        return "found r=$(rec.rank) dense rows/cols but they hold only " *
+            "$(round(100 * rec.peeled_fraction; digits = 1))% of the nonzeros; the bulk barely changes — use plain sparse LU"
+    elseif c === :recommended
+        return "found $(rec.dense_rows) dense row(s) (max $(rec.max_row_nnz) nnz) and $(rec.dense_cols) dense col(s) " *
+            "(max $(rec.max_col_nnz) nnz); peeling rank r=$(rec.rank) removes ~$(round(100 * rec.peeled_fraction; digits = 1))% of the nnz, " *
+            "leaving a sparse bulk → SparseWithDenseRowColMatrix recommended"
+    else
+        return "unknown"
+    end
+end
+
+function Base.show(io::IO, rec::PeelRecommendation)
+    return print(io, "PeelRecommendation(recommended=", rec.recommended, ", rank=", rec.rank, ", :", rec.code, ")")
+end
+function Base.show(io::IO, ::MIME"text/plain", rec::PeelRecommendation)
+    println(
+        io, "PeelRecommendation: ", rec.recommended ? "use SparseWithDenseRowColMatrix" : "use plain sparse LU",
+        " (rank r=", rec.rank, ")"
+    )
+    return print(io, "  ", _peel_reason(rec))
+end
+
+"""
+    recommend_lowrank_peel(A::AbstractSparseMatrixCSC;
+                           row_factor=8.0, col_factor=8.0, maxrank=32,
+                           min_n=64, abs_floor=4) -> PeelRecommendation
+
+Cheap, pattern-only (`O(nnz)`, never factorizes) heuristic that decides whether wrapping `A`
+as a [`SparseWithDenseRowColMatrix`](@ref) and solving with the Woodbury path is worth it,
+versus just using plain sparse LU (e.g. `PureKLU.klu`). Returns a [`PeelRecommendation`](@ref)
+(a descriptive struct — no allocating message string is built unless you `show` it).
+
+The method pays off **iff** a small number `r ≪ n` of rows and/or columns are *much* denser
+than the rest, so peeling them into the low-rank correction `U*V` leaves a genuinely sparse,
+low-fill bulk `S`. On a uniformly sparse matrix the low-rank part is empty and the method
+degenerates to plain LU — then `recommended` is `false`.
+
+A row (column) is a dense candidate when its stored-entry count exceeds both
+`row_factor * median(nnz per row)` (resp. `col_factor`) and the absolute floor `abs_floor`.
+The recommendation also requires the candidates to be few (`r ≤ min(maxrank, n ÷ 8)`) and to
+hold a meaningful share (≥ 5%) of the nonzeros.
+
+# Example
+```julia
+julia> rec = recommend_lowrank_peel(A);
+
+julia> rec.recommended, rec.rank
+(false, 0)
+```
+"""
+function recommend_lowrank_peel(
+        A::SparseArrays.AbstractSparseMatrixCSC;
+        row_factor::Real = 8.0, col_factor::Real = 8.0,
+        maxrank::Int = 32, min_n::Int = 64, abs_floor::Int = 4
+    )
+    n, m = size(A)
+    nnzA = SparseArrays.nnz(A)
+
+    n == m || return PeelRecommendation(false, 0, :not_square, n, nnzA, 0, 0, 0, 0, 0.0, 0.0, 0.0)
+    n < min_n && return PeelRecommendation(false, 0, :too_small, n, nnzA, 0, 0, 0, 0, 0.0, 0.0, 0.0)
+    nnzA == 0 && return PeelRecommendation(false, 0, :empty, n, 0, 0, 0, 0, 0, 0.0, 0.0, 0.0)
+
+    colptr = SparseArrays.getcolptr(A)
+    rows = SparseArrays.rowvals(A)
+    colcnt = diff(colptr)                         # nnz per column, O(n)
+    rowcnt = zeros(Int, n)
+    @inbounds for k in 1:nnzA
+        rowcnt[rows[k]] += 1
+    end
+
+    row_thresh = max(row_factor * max(_median_int(rowcnt), 1.0), float(abs_floor))
+    col_thresh = max(col_factor * max(_median_int(colcnt), 1.0), float(abs_floor))
+    dense_rows = count(>(row_thresh), rowcnt)
+    dense_cols = count(>(col_thresh), colcnt)
+    r = dense_rows + dense_cols
+    maxrow, maxcol = maximum(rowcnt), maximum(colcnt)
+
+    r == 0 && return PeelRecommendation(
+        false, 0, :uniformly_sparse, n, nnzA, 0, 0, maxrow, maxcol, row_thresh, col_thresh, 0.0
+    )
+
+    rank_cap = min(maxrank, n ÷ 8)
+    r > rank_cap && return PeelRecommendation(
+        false, r, :rank_too_large, n, nnzA, dense_rows, dense_cols, maxrow, maxcol, row_thresh, col_thresh, 0.0
+    )
+
+    peeled = 0
+    @inbounds for j in eachindex(colcnt)
+        colcnt[j] > col_thresh && (peeled += colcnt[j])
+    end
+    @inbounds for i in eachindex(rowcnt)
+        rowcnt[i] > row_thresh && (peeled += rowcnt[i])
+    end
+    frac = peeled / nnzA
+    frac < 0.05 && return PeelRecommendation(
+        false, r, :negligible_fill, n, nnzA, dense_rows, dense_cols, maxrow, maxcol, row_thresh, col_thresh, frac
+    )
+
+    return PeelRecommendation(
+        true, r, :recommended, n, nnzA, dense_rows, dense_cols, maxrow, maxcol, row_thresh, col_thresh, frac
+    )
+end
+
+# Median of a small integer count vector, without depending on Statistics.
+function _median_int(v::AbstractVector{<:Integer})
+    isempty(v) && return 0.0
+    s = sort(v)
+    L = length(s)
+    return isodd(L) ? float(s[(L + 1) ÷ 2]) : (s[L ÷ 2] + s[L ÷ 2 + 1]) / 2
+end
diff --git a/src/factorize.jl b/src/factorize.jl
new file mode 100644
index 0000000..3eebbfa
--- /dev/null
+++ b/src/factorize.jl
@@ -0,0 +1,99 @@
+# ------------------
+# Top-level factorize / lu / \  dispatch
+# ------------------
+
+const SparseWithDenseRowColFactorization{T} = Union{SparseWithDenseRowColWoodbury{T}, SparseWithDenseRowColAugmented{T}}
+
+"""
+    factorize(A::SparseWithDenseRowColMatrix; strategy=:auto, refine=1, auto_fallback=true) -> F
+
+Factorize `A = S + U V` for repeated solves. Returns an [`SparseWithDenseRowColWoodbury`](@ref)
+(the fast path: one sparse LU of `S` plus a small dense correction) or an
+[`SparseWithDenseRowColAugmented`](@ref) (the robust bordered-system path).
+
+* `strategy = :auto` (default): use Woodbury, automatically falling back to the augmented
+  system if `S` is singular or the `r × r` correction is ill-conditioned.
+* `strategy = :woodbury`: force Woodbury; warn on ill-conditioning, error on singular `S`.
+* `strategy = :augmented`: force the bordered-system path.
+* `refine`: iterative-refinement steps for the Woodbury solve (default `1`; ignored by the
+  augmented path).
+* `auto_fallback`: whether `:auto` may switch to the augmented path.
+
+The returned `F` **caches the symbolic analysis** (BTF + AMD ordering of `S`). Solve any
+number of right-hand sides with `F \\ b` / `ldiv!(F, b)`, and update the numeric values for a
+new matrix of the **same sparsity pattern** with [`refactor!`](@ref) / [`lu!`](@ref) — that
+reuses the cached symbolic analysis (no re-analysis), which is the ≈7× per-step win in a
+Newton / time-stepping loop. Only call `factorize`/`lu` afresh when the pattern changes.
+"""
+function LinearAlgebra.factorize(
+        A::SparseWithDenseRowColMatrix; strategy::Symbol = :auto, refine::Integer = 1,
+        auto_fallback::Bool = true
+    )
+    strategy in (:auto, :woodbury, :augmented) ||
+        throw(ArgumentError("strategy must be :auto, :woodbury or :augmented; got :$strategy"))
+
+    strategy === :augmented && return _augmented(A)
+
+    F = try
+        _woodbury(A; refine)
+    catch e
+        if e isa SingularException && strategy === :auto && auto_fallback
+            return _augmented(A)
+        end
+        rethrow(e)
+    end
+
+    if F.illconditioned
+        if strategy === :auto && auto_fallback
+            return _augmented(A)
+        else
+            @warn "SparseWithDenseRowColWoodbury: the r×r correction matrix C is ill-conditioned; the \
+                   Woodbury solve may lose accuracy. Consider `strategy = :augmented`."
+        end
+    end
+    return F
+end
+
+"""
+    lu(A::SparseWithDenseRowColMatrix; kwargs...) -> F
+
+Alias for [`factorize`](@ref). Named `lu` (not `qr`) because the sparse bulk is factored by
+an LU-based solver (PureKLU); `qr` is intentionally not provided.
+"""
+LinearAlgebra.lu(A::SparseWithDenseRowColMatrix; kwargs...) = factorize(A; kwargs...)
+
+"""
+    lu!(F, A::SparseWithDenseRowColMatrix; kwargs...) -> F
+
+Alias for [`refactor!`](@ref) — refactor `F` in place with the new values of `A`.
+"""
+LinearAlgebra.lu!(F::SparseWithDenseRowColFactorization, A::SparseWithDenseRowColMatrix; kwargs...) =
+    refactor!(F, A; kwargs...)
+
+function LinearAlgebra.qr(::SparseWithDenseRowColMatrix)
+    throw(ArgumentError("SparseWithDenseRowColMatrix is solved by an LU-based factorization; use `factorize`/`lu`, not `qr`."))
+end
+
+Base.:\(A::SparseWithDenseRowColMatrix, b::AbstractVecOrMat) = factorize(A) \ b
+
+"""
+    SparseWithDenseRowColFactorization(; reuse_symbolic=true, check_pattern=true,
+                                        strategy=:auto, refine=1)
+
+A [LinearSolve.jl](https://github.com/SciML/LinearSolve.jl) algorithm that solves
+`SparseWithDenseRowColMatrix` systems through LinearSolve's caching interface, mirroring its
+`KLUFactorization`: the symbolic analysis is cached in the `LinearCache`, repeated `solve!`s
+reuse it, and updating the matrix to new values of the **same sparsity pattern** triggers a
+numeric refactorization (via [`refactor!`](@ref)) that reuses the cached symbolic analysis.
+It is also the default algorithm `LinearSolve` picks for a `SparseWithDenseRowColMatrix`, so
+`solve(LinearProblem(A, b))` uses it automatically.
+
+* `reuse_symbolic` — reuse the cached symbolic analysis across value changes (set `false` if
+  the sparsity pattern may change between solves).
+* `check_pattern` — validate the pattern on each refactor (`false` skips the check; may error
+  if the pattern actually changes).
+* `strategy`, `refine` — forwarded to [`factorize`](@ref).
+
+Only available when `LinearSolve` is loaded (provided by a package extension).
+"""
+function SparseWithDenseRowColFactorization end
diff --git a/src/matrix.jl b/src/matrix.jl
new file mode 100644
index 0000000..851b30f
--- /dev/null
+++ b/src/matrix.jl
@@ -0,0 +1,243 @@
+# ------------------
+# SparseWithDenseRowColMatrix
+# ------------------
+
+"""
+    SparseWithDenseRowColMatrix(S::AbstractSparseMatrixCSC, U::AbstractMatrix, V::AbstractMatrix)
+    SparseWithDenseRowColMatrix(S::AbstractSparseMatrixCSC, fill::AbstractMatrix; replace=false)
+    SparseWithDenseRowColMatrix{T}(S, U, V)
+
+A square matrix represented as a **sparse part plus a low-rank dense correction**
+
+    A = S + U * V
+
+where `S` is an `n × n` `SparseMatrixCSC`, `U` is `n × r` and `V` is `r × n` dense, with
+`r ≪ n` the *almost-sparse rank*. This is the sparse-matrix analogue of an
+`AlmostBandedMatrix`: the banded "bulk" is replaced by a general sparse bulk, and the
+low-rank `fill` (which in the banded case sits in the top rows as boundary conditions) is
+kept as the explicit outer product `U * V` rather than folded into a band.
+
+The structure is exactly the one the Sherman–Morrison–Woodbury identity exploits, so
+[`factorize`](@ref)/`\\` solve `A x = b` by factoring only the sparse `S` (with
+[`PureKLU`](https://github.com/SciML/PureKLU.jl)) and applying a small `r × r` dense
+correction — see [`SparseWithDenseRowColWoodbury`](@ref). When `S` is singular but `A` is not, an
+[`SparseWithDenseRowColAugmented`](@ref) fallback is used automatically.
+
+# Constructors
+
+* `SparseWithDenseRowColMatrix(S, U, V)` — the general additive low-rank form `A = S + U*V`.
+* `SparseWithDenseRowColMatrix(S, fill)` — the boundary-condition convenience: `fill` is an `r × n`
+  dense block placed in the **top `r` rows** via an implicit [`SelectorMatrix`](@ref)
+  `U = [I_r; 0]`.
+    * `replace = false` (default): `fill` is *added* to the top `r` rows, `A = S + [I_r;0]*fill`.
+    * `replace = true`: the top `r` rows of `A` *become* `fill` (FABM-style row replacement),
+      implemented as `V = fill - S[1:r, :]` so that `S` itself is left untouched and stays
+      nonsingular for the Woodbury path.
+
+```jldoctest
+julia> using SparseWithDenseRowColMatrices, SparseArrays
+
+julia> S = sparse(1.0I, 4, 4); fill = reshape(Float64[1,2,3,4], 1, 4);
+
+julia> A = SparseWithDenseRowColMatrix(S, fill);   # rank-1 correction in row 1
+
+julia> Matrix(A)[1, :]
+4-element Vector{Float64}:
+ 2.0
+ 2.0
+ 3.0
+ 4.0
+```
+"""
+struct SparseWithDenseRowColMatrix{
+        T,
+        TS <: SparseArrays.AbstractSparseMatrixCSC{T},
+        TU <: AbstractMatrix{T},
+        TV <: AbstractMatrix{T},
+    } <: AbstractMatrix{T}
+    S::TS
+    U::TU
+    V::TV
+
+    function SparseWithDenseRowColMatrix{T, TS, TU, TV}(S, U, V) where {T, TS, TU, TV}
+        n = size(S, 1)
+        size(S, 2) == n ||
+            throw(ArgumentError("the sparse part of an SparseWithDenseRowColMatrix must be square; got size $(size(S))"))
+        size(U, 1) == n ||
+            throw(DimensionMismatch("size(U, 1) = $(size(U, 1)) must equal n = $n"))
+        size(V, 2) == n ||
+            throw(DimensionMismatch("size(V, 2) = $(size(V, 2)) must equal n = $n"))
+        size(U, 2) == size(V, 1) ||
+            throw(DimensionMismatch("low-rank factor mismatch: size(U, 2) = $(size(U, 2)) ≠ size(V, 1) = $(size(V, 1))"))
+        return new{T, TS, TU, TV}(S, U, V)
+    end
+end
+
+_to_eltype(::Type{T}, A::AbstractMatrix{T}) where {T} = A
+_to_eltype(::Type{T}, A::AbstractMatrix) where {T} = convert(AbstractMatrix{T}, A)
+_to_eltype(::Type{T}, A::SelectorMatrix) where {T} = SelectorMatrix{T}(A.n, A.r)
+
+function SparseWithDenseRowColMatrix(
+        S::SparseArrays.AbstractSparseMatrixCSC, U::AbstractMatrix, V::AbstractMatrix
+    )
+    T = promote_type(eltype(S), eltype(U), eltype(V))
+    return SparseWithDenseRowColMatrix{T}(S, U, V)
+end
+
+function SparseWithDenseRowColMatrix{T}(
+        S::SparseArrays.AbstractSparseMatrixCSC, U::AbstractMatrix, V::AbstractMatrix
+    ) where {T}
+    Sc, Uc, Vc = _to_eltype(T, S), _to_eltype(T, U), _to_eltype(T, V)
+    return SparseWithDenseRowColMatrix{T, typeof(Sc), typeof(Uc), typeof(Vc)}(Sc, Uc, Vc)
+end
+
+# Boundary-condition convenience: `fill` is an r×n block in the top r rows via a selector.
+function SparseWithDenseRowColMatrix(
+        S::SparseArrays.AbstractSparseMatrixCSC, fill::AbstractMatrix; replace::Bool = false
+    )
+    n = size(S, 2)
+    r = size(fill, 1)
+    size(fill, 2) == n ||
+        throw(DimensionMismatch("fill has $(size(fill, 2)) columns but the sparse part has $n"))
+    T = promote_type(eltype(S), eltype(fill))
+    U = SelectorMatrix{T}(n, r)
+    V = if replace
+        # A's top r rows become `fill`; leave S (hence its nonsingularity) untouched by
+        # absorbing the original top rows into the correction.
+        Vd = Matrix{T}(fill)
+        @inbounds for j in 1:n, i in 1:r
+            Vd[i, j] -= S[i, j]
+        end
+        Vd
+    else
+        _to_eltype(T, fill)
+    end
+    return SparseWithDenseRowColMatrix{T}(S, U, V)
+end
+
+# ------------------
+# Accessors (FABM parity)
+# ------------------
+
+"""
+    sparsepart(A::SparseWithDenseRowColMatrix)
+
+The sparse bulk `S` (the analogue of `bandpart` for an `AlmostBandedMatrix`).
+"""
+@inline sparsepart(A::SparseWithDenseRowColMatrix) = A.S
+
+"""
+    fillpart(A::SparseWithDenseRowColMatrix)
+
+The `r × n` right low-rank factor `V` — the dense "fill" rows (the analogue of `fillpart`
+for an `AlmostBandedMatrix`).
+"""
+@inline fillpart(A::SparseWithDenseRowColMatrix) = A.V
+
+"""
+    lowrankfactors(A::SparseWithDenseRowColMatrix) -> (U, V)
+
+The pair `(U, V)` whose product `U * V` is the low-rank correction added to the sparse part.
+"""
+@inline lowrankfactors(A::SparseWithDenseRowColMatrix) = (A.U, A.V)
+
+"""
+    denserank(A::SparseWithDenseRowColMatrix)
+
+The rank `r` of the low-rank correction (analogue of `almostbandedrank`).
+"""
+@inline denserank(A::SparseWithDenseRowColMatrix) = size(A.V, 1)
+
+"""
+    exclusive_sparsepart(A::SparseWithDenseRowColMatrix)
+
+A view of the rows of the sparse part that the low-rank correction does **not** touch.
+Only defined when the left factor is a [`SelectorMatrix`](@ref) (the boundary-condition
+case), where the correction is confined to the top `r` rows; otherwise the correction is
+not row-localized and this throws.
+"""
+function exclusive_sparsepart(A::SparseWithDenseRowColMatrix)
+    A.U isa SelectorMatrix ||
+        throw(ArgumentError("exclusive_sparsepart is only defined when the low-rank left factor is a SelectorMatrix; use `lowrankfactors` instead."))
+    r = denserank(A)
+    return @view A.S[(r + 1):end, :]
+end
+
+# ------------------
+# AbstractArray interface
+# ------------------
+
+@inline Base.size(A::SparseWithDenseRowColMatrix) = size(A.S)
+@inline Base.eltype(::SparseWithDenseRowColMatrix{T}) where {T} = T
+Base.IndexStyle(::Type{<:SparseWithDenseRowColMatrix}) = IndexCartesian()
+
+@inline _lowrank_entry(U::SelectorMatrix, V, i::Int, j::Int) =
+    i ≤ U.r ? @inbounds(V[i, j]) : zero(eltype(V))
+@inline function _lowrank_entry(U, V, i::Int, j::Int)
+    acc = zero(promote_type(eltype(U), eltype(V)))
+    @inbounds for k in 1:size(U, 2)
+        acc += U[i, k] * V[k, j]
+    end
+    return acc
+end
+
+Base.@propagate_inbounds function Base.getindex(A::SparseWithDenseRowColMatrix, i::Int, j::Int)
+    @boundscheck checkbounds(A, i, j)
+    return @inbounds A.S[i, j] + _lowrank_entry(A.U, A.V, i, j)
+end
+
+"""
+    setindex!(A::SparseWithDenseRowColMatrix, v, i, j)
+
+Set the assembled entry `A[i,j]` to `v`, so that `A[i,j]` reads back `v` (the
+`AbstractArray` round-trip). The write is absorbed into the **sparse part** by storing
+`S[i,j] = v - (U*V)[i,j]`; the low-rank correction `U*V` is left unchanged. To edit the
+low-rank part instead, mutate `fillpart(A)` / `lowrankfactors(A)` directly.
+
+Note this can introduce a stored entry into `S` at `(i,j)` (as any sparse `setindex!`
+does), and on a low-rank-covered row stores the value minus the correction.
+"""
+Base.@propagate_inbounds function Base.setindex!(A::SparseWithDenseRowColMatrix, v, i::Int, j::Int)
+    @boundscheck checkbounds(A, i, j)
+    @inbounds A.S[i, j] = v - _lowrank_entry(A.U, A.V, i, j)
+    return v
+end
+
+function Base.similar(A::SparseWithDenseRowColMatrix, ::Type{T}) where {T}
+    return SparseWithDenseRowColMatrix{T}(similar(A.S, T), _similar_U(A.U, T), similar(A.V, T))
+end
+_similar_U(U::SelectorMatrix, ::Type{T}) where {T} = SelectorMatrix{T}(U.n, U.r)
+_similar_U(U, ::Type{T}) where {T} = similar(U, T)
+
+function Base.copy(A::SparseWithDenseRowColMatrix)
+    return SparseWithDenseRowColMatrix(copy(A.S), _copy_U(A.U), copy(A.V))
+end
+_copy_U(U::SelectorMatrix{T}) where {T} = SelectorMatrix{T}(U.n, U.r)
+_copy_U(U) = copy(U)
+
+function Base.fill!(A::SparseWithDenseRowColMatrix, x)
+    fill!(SparseArrays.nonzeros(A.S), x)  # structural nonzeros only — keeps the pattern
+    A.U isa SelectorMatrix || fill!(A.U, x)
+    fill!(A.V, x)
+    return A
+end
+
+function Base.Matrix(A::SparseWithDenseRowColMatrix{T}) where {T}
+    M = Matrix(A.S)
+    mul!(M, _densify(A.U), A.V, one(T), one(T))
+    return M
+end
+_densify(U::AbstractMatrix) = U
+_densify(U::SelectorMatrix{T}) where {T} = materialize_U!(Matrix{T}(undef, U.n, U.r), U)
+
+# ------------------
+# Pretty printing
+# ------------------
+
+function Base.array_summary(io::IO, A::SparseWithDenseRowColMatrix{T}, inds::Tuple{Vararg{Base.OneTo}}) where {T}
+    print(
+        io, Base.dims2string(length.(inds)), " SparseWithDenseRowColMatrix{$T} with ",
+        SparseArrays.nnz(A.S), " stored entries and fill rank ", denserank(A)
+    )
+    return
+end
diff --git a/src/matvec.jl b/src/matvec.jl
new file mode 100644
index 0000000..9359f9a
--- /dev/null
+++ b/src/matvec.jl
@@ -0,0 +1,117 @@
+# ------------------
+# SelectorMatrix products  ( U = [I_r; 0] )
+# ------------------
+# U*w places w (length r / r rows) into the top r entries/rows and zeros the rest.
+# Split into vector / matrix methods so each is strictly more specific than the generic
+# LinearAlgebra `mul!` (which would otherwise be ambiguous on the SelectorMatrix argument).
+
+function _selector_mul!(y, U::SelectorMatrix, w, α::Number, β::Number)
+    size(w, 1) == U.r ||
+        throw(DimensionMismatch("SelectorMatrix has $(U.r) columns, operand has $(size(w, 1)) rows"))
+    size(y, 1) == U.n ||
+        throw(DimensionMismatch("output has $(size(y, 1)) rows, expected $(U.n)"))
+    r, n = U.r, U.n
+    if iszero(β)
+        @inbounds for c in axes(w, 2)
+            for i in 1:r
+                y[i, c] = α * w[i, c]
+            end
+            for i in (r + 1):n
+                y[i, c] = zero(eltype(y))
+            end
+        end
+    else
+        @inbounds for c in axes(w, 2)
+            for i in 1:r
+                y[i, c] = α * w[i, c] + β * y[i, c]
+            end
+            for i in (r + 1):n
+                y[i, c] = β * y[i, c]
+            end
+        end
+    end
+    return y
+end
+
+# Only the *vector* method is registered as `LinearAlgebra.mul!` (it is strictly more
+# specific than the generic matvec and so unambiguous, and it is what the polymorphic
+# `_applyA!` calls when `U` may be a selector). The matrix case is reached only through the
+# internal calls below, which dispatch to `_selector_mul!` directly — registering it as
+# `mul!` would clash with LinearAlgebra's `mul!(::AbstractMatrix, ::AbstractMatrix, ::Diagonal/Triangular)`.
+LinearAlgebra.mul!(y::AbstractVector, U::SelectorMatrix, w::AbstractVector, α::Number, β::Number) =
+    _selector_mul!(y, U, w, α, β)
+
+function Base.:*(U::SelectorMatrix{T}, w::AbstractVector) where {T}
+    y = Vector{promote_type(T, eltype(w))}(undef, U.n)
+    return _selector_mul!(y, U, w, true, false)
+end
+function Base.:*(U::SelectorMatrix{T}, W::AbstractMatrix) where {T}
+    Y = Matrix{promote_type(T, eltype(W))}(undef, U.n, size(W, 2))
+    return _selector_mul!(Y, U, W, true, false)
+end
+
+# ------------------
+# SparseWithDenseRowColMatrix * vector / matrix:  A = S + U*V
+# ------------------
+
+# `y .+= c * U[:, k]` — alloc-free for both the selector and dense cases.
+@inline function _axpy_col!(y::AbstractVector, U::SelectorMatrix, k::Int, c)
+    @inbounds y[k] += c
+    return y
+end
+@inline function _axpy_col!(y::AbstractVector, U::AbstractMatrix, k::Int, c)
+    @inbounds for i in eachindex(y)
+        y[i] += c * U[i, k]
+    end
+    return y
+end
+
+"""
+    mul!(y, A::SparseWithDenseRowColMatrix, x, α, β)
+
+Five-argument multiply `y .= α*(A*x) + β*y`. The vector path is allocation-free: the
+sparse `S*x` uses `SparseArrays`' in-place kernel and the rank-`r` correction is applied as
+`r` scaled column updates without forming the intermediate `V*x`.
+"""
+function LinearAlgebra.mul!(
+        y::AbstractVector, A::SparseWithDenseRowColMatrix, x::AbstractVector, α::Number, β::Number
+    )
+    mul!(y, A.S, x, α, β)
+    U, V = A.U, A.V
+    @inbounds for k in axes(V, 1)
+        sk = zero(promote_type(eltype(V), eltype(x)))
+        for j in eachindex(x)
+            sk += V[k, j] * x[j]
+        end
+        _axpy_col!(y, U, k, α * sk)
+    end
+    return y
+end
+LinearAlgebra.mul!(y::AbstractVector, A::SparseWithDenseRowColMatrix, x::AbstractVector) =
+    mul!(y, A, x, true, false)
+
+function Base.:*(A::SparseWithDenseRowColMatrix, x::AbstractVector)
+    T = promote_type(eltype(A), eltype(x))
+    y = Vector{T}(undef, size(A, 1))
+    return mul!(y, A, x, one(T), zero(T))
+end
+
+# Matrix RHS: forming the r×ncols block V*X is cheap (r ≪ n) and not the solve hot path.
+function LinearAlgebra.mul!(
+        Y::AbstractMatrix, A::SparseWithDenseRowColMatrix, X::AbstractMatrix, α::Number, β::Number
+    )
+    mul!(Y, A.S, X, α, β)
+    W = A.V * X                       # r × ncols
+    _add_lowrank!(Y, A.U, W, α)       # Y .+= α * U*(V*X)
+    return Y
+end
+_add_lowrank!(Y, U::SelectorMatrix, W, α) = _selector_mul!(Y, U, W, α, true)
+_add_lowrank!(Y, U, W, α) = mul!(Y, U, W, α, true)
+LinearAlgebra.mul!(Y::AbstractMatrix, A::SparseWithDenseRowColMatrix, X::AbstractMatrix) =
+    mul!(Y, A, X, true, false)
+
+function Base.:*(A::SparseWithDenseRowColMatrix, X::AbstractMatrix)
+    T = promote_type(eltype(A), eltype(X))
+    Y = Matrix{T}(undef, size(A, 1), size(X, 2))
+    return mul!(Y, A, X, one(T), zero(T))
+end
diff --git a/src/woodbury.jl b/src/woodbury.jl
new file mode 100644
index 0000000..1e5a6ea
--- /dev/null
+++ b/src/woodbury.jl
@@ -0,0 +1,341 @@
+# ------------------
+# Woodbury factorization (primary path)
+# ------------------
+#
+#   A = S + U V
+#   A⁻¹ b = S⁻¹b − Z C⁻¹ (V S⁻¹b),   Z = S⁻¹U (n×r),   C = I_r + V Z (r×r)
+#
+# Factor S once with PureKLU; precompute Z (one multi-RHS solve) and lu(C). Each subsequent
+# solve costs one sparse solve + an r×r dense solve. `refactor!` reuses S's symbolic
+# analysis (PureKLU `klu!`) so a Newton sweep (same pattern, new values) never re-analyzes.
+
+"""
+    SparseWithDenseRowColWoodbury{T} <: LinearAlgebra.Factorization{T}
+
+Sherman–Morrison–Woodbury factorization of an [`SparseWithDenseRowColMatrix`](@ref) `A = S + U V`,
+built from a single [`PureKLU`](https://github.com/SciML/PureKLU.jl) LU factorization of the
+sparse part `S` plus a dense `r × r` correction `C = I + V S⁻¹U`. Created by
+[`factorize`](@ref)/[`lu`](@ref) (the default strategy when `S` is nonsingular). Solve with
+`\\`/`ldiv!`; update in place for a new set of numeric values with [`refactor!`](@ref).
+
+Valid only when both `S` and `C` are nonsingular; [`factorize`](@ref) falls back to
+[`SparseWithDenseRowColAugmented`](@ref) otherwise. The forward error of the Woodbury solve scales
+with `κ(S)·κ(C)` rather than `κ(A)`, so one step of iterative refinement (`refine` keyword,
+default `1`) is applied to restore backward stability — this is allocation-free, reusing the
+cached factors.
+"""
+mutable struct SparseWithDenseRowColWoodbury{T, KF, TS, TU, TV, CF} <: LinearAlgebra.Factorization{T}
+    Sfact::KF            # PureKLU.KLUFactorization of S (symbolic reused across refactors)
+    Sown::TS             # owned SparseMatrixCSC copy of S — values for the residual matvec
+    U::TU                # n×r
+    V::TV                # r×n
+    Z::Matrix{T}         # n×r = S⁻¹U, doubles as the multi-RHS solve! buffer (unit stride)
+    C::Matrix{T}         # r×r scratch holding (after lu!) the LU factors of I + V*Z
+    Cfact::CF            # lu!(C)
+    ywork::Vector{T}     # length n — holds S⁻¹b
+    tr::Vector{T}        # length r — V*y and C⁻¹(·)
+    res::Vector{T}       # length n — structured residual / correction
+    borig::Vector{T}     # length n — saved RHS for iterative refinement
+    r::Int
+    refine::Int
+    illconditioned::Bool
+end
+
+function SparseWithDenseRowColWoodbury(
+        Sfact, Sown, U, V, Z::Matrix{T}, C, Cfact, ywork, tr, res, borig, r, refine, ill
+    ) where {T}
+    return SparseWithDenseRowColWoodbury{
+        T, typeof(Sfact), typeof(Sown), typeof(U), typeof(V), typeof(Cfact),
+    }(Sfact, Sown, U, V, Z, C, Cfact, ywork, tr, res, borig, r, refine, ill)
+end
+
+Base.size(F::SparseWithDenseRowColWoodbury) = (size(F.Sown, 1), size(F.Sown, 2))
+Base.size(F::SparseWithDenseRowColWoodbury, i::Integer) = size(F.Sown, i)
+LinearAlgebra.issuccess(F::SparseWithDenseRowColWoodbury) =
+    LinearAlgebra.issuccess(F.Sfact) && LinearAlgebra.issuccess(F.Cfact)
+denserank(F::SparseWithDenseRowColWoodbury) = F.r
+
+# Underlying-float type used to pick conditioning / refinement tolerances.
+_tol_float(::Type{T}) where {T <: AbstractFloat} = T
+_tol_float(::Type{Complex{T}}) where {T <: AbstractFloat} = T
+_tol_float(::Type{T}) where {T} = Float64    # ForwardDiff.Dual, Rational, …
+
+# Build a freshly-owned SparseMatrixCSC{T,Ti} (Ti a KLU index type) we are free to mutate.
+# `Int` is always a valid KLU index type on a given build (Int32 on 32-bit, Int64 on 64-bit),
+# so it is the safe fallback when S's index type is not accepted by PureKLU on this platform.
+function _own_sparse(::Type{T}, S::SparseArrays.AbstractSparseMatrixCSC) where {T}
+    Si = SparseMatrixCSC(S)
+    Ti = eltype(SparseArrays.getcolptr(Si))
+    if Ti <: PureKLU.KLUITypes
+        return eltype(Si) === T ? copy(Si) : SparseMatrixCSC{T, Ti}(Si)
+    else
+        return SparseMatrixCSC{T, Int}(Si)
+    end
+end
+
+# Z ← S⁻¹U  (reuses Sfact's current numeric factorization).
+function _recompute_Z!(F::SparseWithDenseRowColWoodbury)
+    materialize_U!(F.Z, F.U)
+    PureKLU.solve!(F.Sfact, F.Z)
+    return F
+end
+# C ← lu(I + V*Z)  (assumes F.Z is current).
+function _recompute_C!(F::SparseWithDenseRowColWoodbury{T}) where {T}
+    if F.r > 0
+        mul!(F.C, F.V, F.Z)
+        @inbounds for i in 1:F.r
+            F.C[i, i] += one(T)
+        end
+    end
+    F.Cfact = lu!(F.C; check = false)
+    LinearAlgebra.issuccess(F.Cfact) || throw(SingularException(0))
+    return F
+end
+# Z then C. Assumes Sfact has just been (re)factored.
+_recompute_correction!(F::SparseWithDenseRowColWoodbury) = _recompute_C!(_recompute_Z!(F))
+
+function _woodbury(A::SparseWithDenseRowColMatrix{T}; refine::Integer = 1) where {T}
+    n = size(A, 1)
+    r = denserank(A)
+    Sown = _own_sparse(T, A.S)
+    Sfact = PureKLU.klu(Sown)              # throws SingularException if S is singular
+    # Own copies of the low-rank factors (S is already copied via `_own_sparse`); this keeps
+    # the factorization independent of later in-place mutation of the input's U/V, matching
+    # the augmented path. SelectorMatrix is immutable, so `_copy_U` just rebuilds it.
+    U, V = _copy_U(A.U), copy(A.V)
+    Z = Matrix{T}(undef, n, r)
+    materialize_U!(Z, U)
+    PureKLU.solve!(Sfact, Z)               # Z = S⁻¹U  (one multi-RHS solve)
+    C = Matrix{T}(undef, r, r)
+    nrmC = zero(real(T))
+    if r > 0
+        mul!(C, V, Z)
+        @inbounds for i in 1:r
+            C[i, i] += one(T)
+        end
+        nrmC = opnorm(C, 1)
+    end
+    Cfact = lu!(C; check = false)
+    LinearAlgebra.issuccess(Cfact) || throw(SingularException(0))
+    ill = r > 0 && nrmC * opnorm(inv(Cfact), 1) > inv(sqrt(eps(_tol_float(T))))
+    return SparseWithDenseRowColWoodbury(
+        Sfact, Sown, U, V, Z, C, Cfact,
+        Vector{T}(undef, n), Vector{T}(undef, r), Vector{T}(undef, n), Vector{T}(undef, n),
+        r, Int(refine), ill
+    )
+end
+
+# ------------------
+# Solve
+# ------------------
+
+# b := A⁻¹ b via Woodbury (no refinement). Allocation-free.
+@views function _woodbury_solve!(F::SparseWithDenseRowColWoodbury{T}, b::AbstractVector) where {T}
+    copyto!(F.ywork, b)
+    PureKLU.solve!(F.Sfact, F.ywork)          # ywork = S⁻¹b
+    if F.r > 0
+        mul!(F.tr, F.V, F.ywork)              # tr = V S⁻¹b
+        ldiv!(F.Cfact, F.tr)                  # tr = C⁻¹ V S⁻¹b
+        copyto!(b, F.ywork)
+        mul!(b, F.Z, F.tr, -one(T), one(T))   # b = S⁻¹b - Z C⁻¹ V S⁻¹b
+    else
+        copyto!(b, F.ywork)
+    end
+    return b
+end
+
+# res := A*x using the owned sparse part and the low-rank factors. Allocation-free.
+function _applyA!(F::SparseWithDenseRowColWoodbury{T}, res::AbstractVector, x::AbstractVector) where {T}
+    mul!(res, F.Sown, x)
+    if F.r > 0
+        mul!(F.tr, F.V, x)
+        mul!(res, F.U, F.tr, one(T), one(T))
+    end
+    return res
+end
+
+function LinearAlgebra.ldiv!(F::SparseWithDenseRowColWoodbury{T}, b::AbstractVector) where {T}
+    length(b) == size(F, 1) || throw(DimensionMismatch())
+    F.refine > 0 && copyto!(F.borig, b)
+    _woodbury_solve!(F, b)
+    @inbounds for _ in 1:F.refine
+        _applyA!(F, F.res, b)                 # res = A x
+        @. F.res = F.borig - F.res            # res = b - A x   (structured residual)
+        _woodbury_solve!(F, F.res)            # res = A⁻¹ residual
+        @. b = b + F.res                      # x += correction
+    end
+    return b
+end
+
+# Matrix RHS: solve column by column (reuses the alloc-free vector path + refinement).
+function LinearAlgebra.ldiv!(F::SparseWithDenseRowColWoodbury, B::AbstractMatrix)
+    size(B, 1) == size(F, 1) || throw(DimensionMismatch())
+    for c in axes(B, 2)
+        ldiv!(F, view(B, :, c))
+    end
+    return B
+end
+
+# Out-of-place forms (split vector/matrix so each is strictly more specific than the generic
+# `ldiv!(::AbstractVecOrMat, ::Factorization, ::AbstractVecOrMat)`). `\` is provided by
+# LinearAlgebra's generic `Factorization` fallback, which routes here via `ldiv!` and also
+# supplies the complex-RHS-on-real-factorization split.
+LinearAlgebra.ldiv!(y::AbstractVector, F::SparseWithDenseRowColWoodbury, b::AbstractVector) =
+    ldiv!(F, copyto!(y, b))
+LinearAlgebra.ldiv!(Y::AbstractMatrix, F::SparseWithDenseRowColWoodbury, B::AbstractMatrix) =
+    ldiv!(F, copyto!(Y, B))
+
+# ------------------
+# Adjoint / transpose solve  (Aᴴ = Sᴴ + Vᴴ Uᴴ):  A⁻ᴴ b = S⁻ᴴ(b − Vᴴ C⁻ᴴ Zᴴ b)
+# ------------------
+# `F'` / `transpose(F)` on a Factorization wrap it in Adjoint/TransposeFactorization (newer
+# Julia) or Adjoint/Transpose (older). `\` for those wrappers is provided generically by
+# LinearAlgebra and routes to the `ldiv!` methods below.
+
+const AdjointFact =
+    isdefined(LinearAlgebra, :AdjointFactorization) ? LinearAlgebra.AdjointFactorization : LinearAlgebra.Adjoint
+const TransposeFact =
+    isdefined(LinearAlgebra, :TransposeFactorization) ? LinearAlgebra.TransposeFactorization : LinearAlgebra.Transpose
+
+for (Wrap, op, klu_adj) in (
+        (AdjointFact, :adjoint, :(F.Sfact')),
+        (TransposeFact, :transpose, :(transpose(F.Sfact))),
+    )
+    @eval function LinearAlgebra.ldiv!(
+            Fw::$Wrap{<:Any, <:SparseWithDenseRowColWoodbury{T}}, b::AbstractVector
+        ) where {T}
+        F = parent(Fw)
+        length(b) == size(F, 1) || throw(DimensionMismatch())
+        if F.r > 0
+            mul!(F.tr, $op(F.Z), b)            # tr = Zᴴ b
+            ldiv!($op(F.Cfact), F.tr)          # tr = C⁻ᴴ tr
+            mul!(F.res, $op(F.V), F.tr)        # res = Vᴴ tr
+            @. F.res = b - F.res
+        else
+            copyto!(F.res, b)
+        end
+        PureKLU.solve!($klu_adj, F.res)        # res = S⁻ᴴ(…)
+        copyto!(b, F.res)
+        return b
+    end
+    @eval function LinearAlgebra.ldiv!(
+            Fw::$Wrap{<:Any, <:SparseWithDenseRowColWoodbury}, B::AbstractMatrix
+        )
+        for c in axes(B, 2)
+            ldiv!(Fw, view(B, :, c))
+        end
+        return B
+    end
+end
+
+# ------------------
+# Complex RHS over a real factorization: solve real and imaginary parts separately, since
+# the cached scratch buffers are real-typed. Mirrors LinearAlgebra's dense LU and PureKLU.
+# ------------------
+
+function _ldiv_realfact_complex!(Ft, b::AbstractVector{<:Complex})
+    br = real.(b)
+    bi = imag.(b)
+    ldiv!(Ft, br)
+    ldiv!(Ft, bi)
+    @inbounds @. b = complex(br, bi)
+    return b
+end
+
+LinearAlgebra.ldiv!(F::SparseWithDenseRowColWoodbury{<:Real}, b::AbstractVector{<:Complex}) =
+    _ldiv_realfact_complex!(F, b)
+LinearAlgebra.ldiv!(Fw::AdjointFact{<:Any, <:SparseWithDenseRowColWoodbury{<:Real}}, b::AbstractVector{<:Complex}) =
+    _ldiv_realfact_complex!(Fw, b)
+LinearAlgebra.ldiv!(Fw::TransposeFact{<:Any, <:SparseWithDenseRowColWoodbury{<:Real}}, b::AbstractVector{<:Complex}) =
+    _ldiv_realfact_complex!(Fw, b)
+
+# ------------------
+# Refactor (Newton hot path): reuse symbolic analysis, new numeric values
+# ------------------
+
+"""
+    refactor!(F::SparseWithDenseRowColWoodbury, A::SparseWithDenseRowColMatrix; check=true) -> F
+    refactor!(F::SparseWithDenseRowColWoodbury, S::AbstractSparseMatrixCSC; fill=nothing, check=true) -> F
+    refactor!(F::SparseWithDenseRowColWoodbury, Snzval::AbstractVector; fill=nothing) -> F
+
+Update the factorization `F` in place with new numeric values that share the **same
+sparsity pattern** as the matrix `F` was built from, reusing PureKLU's symbolic analysis and
+preallocated workspace (no re-analysis). This is the hot path for Newton iterations on a
+boundary-value problem, where the Jacobian's pattern is fixed and only its values change.
+
+The cheapest form takes the raw `nzval` vector of the new sparse part (no pattern check —
+the caller guarantees the pattern is unchanged); pass `fill` to also update the low-rank
+right factor `V`. The `SparseMatrixCSC`/`SparseWithDenseRowColMatrix` forms validate the pattern when
+`check=true`.
+"""
+function refactor!(F::SparseWithDenseRowColWoodbury, Snzval::AbstractVector; fill = nothing)
+    nz = SparseArrays.nonzeros(F.Sown)
+    length(Snzval) == length(nz) ||
+        throw(DimensionMismatch("new nzval has length $(length(Snzval)); pattern has $(length(nz))"))
+    copyto!(nz, Snzval)
+    PureKLU.klu!(F.Sfact, nz)                  # reuse symbolic + workspace
+    fill !== nothing && copyto!(F.V, fill)
+    _recompute_correction!(F)
+    return F
+end
+
+function refactor!(
+        F::SparseWithDenseRowColWoodbury, S::SparseArrays.AbstractSparseMatrixCSC;
+        fill = nothing, check::Bool = true
+    )
+    if check
+        _same_pattern(F.Sown, S) ||
+            throw(ArgumentError("refactor! requires the new sparse part to have the same sparsity pattern."))
+    end
+    return refactor!(F, SparseArrays.nonzeros(S); fill)
+end
+
+function refactor!(F::SparseWithDenseRowColWoodbury, A::SparseWithDenseRowColMatrix; check::Bool = true)
+    size(A.V) == size(F.V) ||
+        throw(DimensionMismatch("the fill rank/shape changed; rebuild with `factorize`."))
+    if F.U isa SelectorMatrix
+        # A selector U has no stored values to update; refuse to silently ignore a switch to
+        # a different (e.g. dense) left factor, which would feed a stale U to the solve.
+        (A.U isa SelectorMatrix && size(A.U) == size(F.U)) ||
+            throw(ArgumentError("refactor! cannot change the left factor U from a SelectorMatrix to a dense/incompatible one; rebuild with `factorize`."))
+    else
+        copyto!(F.U, A.U)
+    end
+    return refactor!(F, A.S; fill = A.V, check)
+end
+
+function _same_pattern(A::SparseArrays.AbstractSparseMatrixCSC, B::SparseArrays.AbstractSparseMatrixCSC)
+    return size(A) == size(B) &&
+        SparseArrays.getcolptr(A) == SparseArrays.getcolptr(B) &&
+        SparseArrays.rowvals(A) == SparseArrays.rowvals(B)
+end
+
+"""
+    update_lowrank!(F::SparseWithDenseRowColWoodbury; U=nothing, V=nothing) -> F
+
+Update only the low-rank factors of `F`, **reusing the cached factorization of the sparse
+part `S`** (no re-factorization of `S`). This is the Sherman–Morrison–Woodbury fast path for
+a fixed sparse base solved repeatedly under a *changing low-rank correction* — varying
+boundary conditions / coupling, adding or removing a constraint, sensitivity or parameter
+sweeps. Factor `S` once with [`factorize`](@ref), then per step `update_lowrank!(F; V=…)`
+and solve.
+
+Passing `V` (the `r × n` dense factor) recomputes only the `r × r` correction `C = I + V Z`.
+Passing `U` additionally recomputes `Z = S⁻¹U` (one multi-RHS solve against the *cached* `S`
+factorization), so omit `U` when the left factor is unchanged. The sparsity pattern and rank
+are fixed; to change `S`'s values use [`refactor!`](@ref), and to change the rank rebuild
+with [`factorize`](@ref).
+"""
+function update_lowrank!(F::SparseWithDenseRowColWoodbury; U = nothing, V = nothing)
+    if U !== nothing
+        if F.U isa SelectorMatrix
+            (U isa SelectorMatrix && size(U) == size(F.U)) ||
+                throw(ArgumentError("the left factor U is a SelectorMatrix; pass a matching SelectorMatrix or rebuild with `factorize`."))
+        else
+            copyto!(F.U, U)
+        end
+        _recompute_Z!(F)                      # reuse S's cached factorization — no klu!(S)
+    end
+    V !== nothing && copyto!(F.V, V)
+    return _recompute_C!(F)
+end
diff --git a/test/runtests.jl b/test/runtests.jl
new file mode 100644
index 0000000..fc11674
--- /dev/null
+++ b/test/runtests.jl
@@ -0,0 +1,43 @@
+using SafeTestsets, Test
+
+@testset "SparseWithDenseRowColMatrices" begin
+    @safetestset "Constructors & accessors" begin
+        include("test_matrix.jl")
+    end
+    @safetestset "Matrix–vector / matrix multiply" begin
+        include("test_matvec.jl")
+    end
+    @safetestset "Woodbury factorization & solve" begin
+        include("test_woodbury.jl")
+    end
+    @safetestset "Augmented fallback" begin
+        include("test_augmented.jl")
+    end
+    @safetestset "Refactorization (Newton path)" begin
+        include("test_refactor.jl")
+    end
+    @safetestset "Generic eltypes" begin
+        include("test_generic_eltypes.jl")
+    end
+    @safetestset "Edge cases & allocations" begin
+        include("test_edgecases.jl")
+    end
+    @safetestset "Appropriateness detector" begin
+        include("test_detect.jl")
+    end
+    @safetestset "LinearSolve caching integration" begin
+        include("test_linearsolve.jl")
+    end
+    @safetestset "Arrow / bordered matrices" begin
+        include("test_arrow.jl")
+    end
+    @safetestset "Aqua quality assurance" begin
+        using Aqua, SparseWithDenseRowColMatrices
+        # `ambiguities = false` (as in FastAlmostBandedMatrices.jl): the only remaining
+        # ambiguities are inert cross-products of our matrix types with LinearAlgebra's
+        # structured matrices (`Diagonal`/`Bidiagonal`/`Tridiagonal`/`Triangular`) in
+        # `*`/`mul!` — combinations that never arise in use, and where both candidate
+        # methods compute the same product anyway.
+        Aqua.test_all(SparseWithDenseRowColMatrices; ambiguities = false, piracies = true)
+    end
+end
diff --git a/test/test_arrow.jl b/test/test_arrow.jl
new file mode 100644
index 0000000..ff77f2d
--- /dev/null
+++ b/test/test_arrow.jl
@@ -0,0 +1,85 @@
+include("testutils.jl")
+using Test, SparseArrays, LinearAlgebra, LinearSolve
+
+# A classic arrowhead matrix = diagonal bulk + dense last row + dense last column, which is a
+# rank-2 dense-row/col correction: A = Diagonal(d) + U*V with U (n×2), V (2×n).
+function arrowhead(n; seed = 1)
+    rng = MersenneTwister(seed)
+    d = 3 .+ rand(rng, n)
+    c = randn(rng, n - 1)               # last column entries (rows 1:n-1)
+    row = randn(rng, n - 1)             # last row entries (cols 1:n-1)
+    full = Matrix(Diagonal(d))
+    full[1:(n - 1), n] .= c
+    full[n, 1:(n - 1)] .= row
+    S = sparse(Diagonal(d))
+    U = zeros(n, 2); U[1:(n - 1), 1] .= c; U[n, 2] = 1.0
+    V = zeros(2, n); V[1, n] = 1.0; V[2, 1:(n - 1)] .= row
+    return SparseWithDenseRowColMatrix(S, U, V), full
+end
+
+# Block/bordered arrow: sparse interior B + dense border (E columns, F rows, C corner) of
+# width k, which is a rank-2k correction over the block-diagonal bulk [B 0; 0 C].
+function block_arrow(n, k; seed = 1)
+    rng = MersenneTwister(seed); m = n - k
+    B = sparse(SymTridiagonal(fill(4.0, m), fill(-1.0, m - 1)))
+    E = randn(rng, m, k); Fb = randn(rng, k, m); C = randn(rng, k, k) + k * I
+    full = [Matrix(B) E; Fb C]
+    S = blockdiag(B, sparse(C))
+    U = zeros(n, 2k); U[1:m, 1:k] .= E; for j in 1:k
+        U[m + j, k + j] = 1.0
+    end
+    V = zeros(2k, n); for j in 1:k
+        V[j, m + j] = 1.0
+    end; V[(k + 1):2k, 1:m] .= Fb
+    return SparseWithDenseRowColMatrix(S, U, V), full
+end
+
+@testset "arrowhead: structure, solve, refactor" begin
+    n = 400
+    A, full = arrowhead(n)
+    b = randn(n)
+    @test denserank(A) == 2
+    @test Matrix(A) ≈ full
+    @test A * b ≈ full * b
+    @test relerr(A \ b, full \ b) < 1.0e-9
+    F = factorize(A)
+    @test F isa SparseWithDenseRowColWoodbury        # diagonal bulk is nonsingular → fast path
+    @test relerr(F \ b, full \ b) < 1.0e-9
+    # Newton-style refactor: same arrow pattern, new values
+    A2, full2 = arrowhead(n; seed = 2)
+    refactor!(F, A2)
+    @test relerr(F \ b, full2 \ b) < 1.0e-9
+end
+
+@testset "detector recommends peeling an arrowhead" begin
+    _, full = arrowhead(200)
+    rec = recommend_lowrank_peel(sparse(full))
+    @test rec.recommended
+    @test rec.rank == 2
+    @test rec.dense_rows == 1 && rec.dense_cols == 1
+end
+
+@testset "block / bordered arrow (border width k → rank 2k)" begin
+    for k in (1, 3, 5)
+        n = 250
+        A, full = block_arrow(n, k)
+        b = randn(n)
+        @test denserank(A) == 2k
+        @test Matrix(A) ≈ full
+        @test relerr(A \ b, full \ b) < 1.0e-9
+    end
+end
+
+@testset "arrowhead through the LinearSolve caching interface" begin
+    n = 300
+    A, full = arrowhead(n; seed = 7)
+    b = randn(n)
+    cache = init(LinearProblem(A, b), SparseWithDenseRowColFactorization())
+    @test relerr(solve!(cache).u, full \ b) < 1.0e-9
+    # new arrow values, same pattern → reuses cached symbolic
+    A2, full2 = arrowhead(n; seed = 8)
+    cache.A = A2
+    @test relerr(solve!(cache).u, full2 \ b) < 1.0e-9
+    # default solve also picks it up
+    @test relerr(solve(LinearProblem(A, b)).u, full \ b) < 1.0e-9
+end
diff --git a/test/test_augmented.jl b/test/test_augmented.jl
new file mode 100644
index 0000000..7af4e40
--- /dev/null
+++ b/test/test_augmented.jl
@@ -0,0 +1,75 @@
+include("testutils.jl")
+using Test, LinearAlgebra
+
+# Build a matrix whose sparse part S is singular (a zero row) but whose assembled A is not,
+# because the low-rank fill supplies that row. This is the canonical BVP situation (an
+# interior stencil that does not by itself pin down a boundary degree of freedom).
+function singularS_nonsingularA(n, r; seed = 1)
+    rng = MersenneTwister(seed)
+    S = sparse(3.0 * I, n, n)
+    for i in 1:(n - 1)
+        S[i + 1, i] = -1.0
+    end
+    for j in 1:n            # zero out the first r rows of S → S singular
+        for i in 1:r
+            S[i, j] = 0.0
+        end
+    end
+    dropzeros!(S)
+    F = zeros(r, n)          # dense rows that make A nonsingular
+    for i in 1:r
+        F[i, i] = 1.0
+        F[i, i + r] = 0.3
+    end
+    A = SparseWithDenseRowColMatrix(S, F; replace = true)
+    return A
+end
+
+@testset ":auto falls back to augmented when S is singular" begin
+    A = singularS_nonsingularA(50, 2)
+    @test_throws SingularException factorize(A; strategy = :woodbury, auto_fallback = false)
+    F = factorize(A)                       # :auto
+    @test F isa SparseWithDenseRowColAugmented
+    @test issuccess(F)
+    b = randn(50)
+    @test relerr(F \ b, Matrix(A) \ b) < 1.0e-9
+end
+
+@testset "augmented adjoint & transpose solve" begin
+    A = singularS_nonsingularA(50, 2; seed = 3)
+    F = factorize(A)
+    @test F isa SparseWithDenseRowColAugmented
+    M = Matrix(A)
+    b = randn(50)
+    @test relerr(F' \ b, M' \ b) < 1.0e-9
+    @test relerr(transpose(F) \ b, transpose(M) \ b) < 1.0e-9
+    x = copy(b)
+    ldiv!(F', x)
+    @test relerr(x, M' \ b) < 1.0e-9
+end
+
+@testset "forced :augmented agrees on a nonsingular S" begin
+    A = rand_sparsedense(60, 3; seed = 5)
+    M = Matrix(A)
+    b = randn(60)
+    F = factorize(A; strategy = :augmented)
+    @test F isa SparseWithDenseRowColAugmented
+    @test relerr(F \ b, M \ b) < 1.0e-9
+    B = randn(60, 3)
+    @test relerr(F \ B, M \ B) < 1.0e-9
+end
+
+@testset "singular A surfaces SingularException" begin
+    n, r = 20, 1
+    S = spzeros(n, n)                      # A will be rank-deficient
+    F = zeros(r, n)
+    F[1, 1] = 1.0
+    A = SparseWithDenseRowColMatrix(S, F; replace = true)
+    @test_throws SingularException factorize(A; strategy = :augmented)
+    @test_throws SingularException factorize(A)   # :auto can't rescue a singular A
+end
+
+@testset "invalid strategy" begin
+    A = rand_sparsedense(20, 1)
+    @test_throws ArgumentError factorize(A; strategy = :nonsense)
+end
diff --git a/test/test_detect.jl b/test/test_detect.jl
new file mode 100644
index 0000000..c5ca1a9
--- /dev/null
+++ b/test/test_detect.jl
@@ -0,0 +1,67 @@
+include("testutils.jl")
+using Test, SparseArrays, LinearAlgebra
+
+@testset "uniformly sparse → not recommended" begin
+    # Mimics the user's DAE Jacobians: n≥64, a few nnz per row/col, no dense rows/cols.
+    n = 200
+    S = sparse(2.0 * I, n, n)
+    for i in 1:(n - 1)
+        S[i + 1, i] = -1.0; S[i, i + 1] = -1.0
+    end
+    for _ in 1:(2n)                       # scatter a few extra entries, still ≤ ~8/row
+        i = rand(1:n); j = rand(1:n); S[i, j] = 0.3
+    end
+    rec = recommend_lowrank_peel(S)
+    @test rec isa PeelRecommendation
+    @test rec.recommended == false
+    @test Bool(rec) == false
+    @test rec.rank == 0
+    @test rec.code === :uniformly_sparse
+    @test occursin("uniformly sparse", sprint(show, MIME"text/plain"(), rec))
+end
+
+@testset "tridiagonal + dense rows → recommended" begin
+    n, k = 2000, 4
+    A = spdiagm(-1 => fill(-1.0, n - 1), 0 => fill(2.0, n), 1 => fill(-1.0, n - 1))
+    A[1:k, :] .= 1.0                      # make the top k rows fully dense
+    rec = recommend_lowrank_peel(A)
+    @test rec.recommended == true
+    @test rec.rank == k
+    @test rec.dense_rows == k
+    @test rec.code === :recommended
+end
+
+@testset "tridiagonal + dense columns → recommended" begin
+    n, k = 2000, 3
+    A = spdiagm(-1 => fill(-1.0, n - 1), 0 => fill(2.0, n), 1 => fill(-1.0, n - 1))
+    A[:, 1:k] .= 1.0                      # k dense columns
+    rec = recommend_lowrank_peel(A)
+    @test rec.recommended == true
+    @test rec.rank == k
+    @test rec.dense_cols == k
+end
+
+@testset "plain tridiagonal → not recommended" begin
+    n = 2000
+    A = spdiagm(-1 => fill(-1.0, n - 1), 0 => fill(2.0, n), 1 => fill(-1.0, n - 1))
+    rec = recommend_lowrank_peel(A)
+    @test rec.recommended == false
+    @test rec.rank == 0
+end
+
+@testset "guards" begin
+    @test recommend_lowrank_peel(sparse(1.0I, 10, 10)).code === :too_small
+    @test recommend_lowrank_peel(sparse(1.0I, 80, 100)).code === :not_square
+end
+
+@testset "construction is allocation-light (no eager message string)" begin
+    n = 200
+    A = spdiagm(-1 => fill(-1.0, n - 1), 0 => fill(2.0, n), 1 => fill(-1.0, n - 1))
+    recommend_lowrank_peel(A)             # warm up
+    a = @allocated recommend_lowrank_peel(A)
+    # only the O(n) rowcnt/colcnt scratch + the small struct; the reason text is never built
+    # on construction (only by `show`). Generous bound — just a regression guard.
+    @test a < 16 * n * sizeof(Int)
+    # the descriptive text is available on demand via show
+    @test !isempty(sprint(show, MIME"text/plain"(), recommend_lowrank_peel(A)))
+end
diff --git a/test/test_edgecases.jl b/test/test_edgecases.jl
new file mode 100644
index 0000000..c4f3e38
--- /dev/null
+++ b/test/test_edgecases.jl
@@ -0,0 +1,97 @@
+include("testutils.jl")
+using Test, LinearAlgebra, SparseArrays
+
+@testset "r = 0 degenerate (pure sparse)" begin
+    n = 20
+    S = sparse(2.0 * I, n, n)
+    for i in 1:(n - 1)
+        S[i + 1, i] = -0.5
+    end
+    A = SparseWithDenseRowColMatrix(S, Matrix{Float64}(undef, n, 0), Matrix{Float64}(undef, 0, n))
+    @test denserank(A) == 0
+    @test Matrix(A) ≈ Matrix(S)
+    b = randn(n)
+    F = factorize(A)
+    @test relerr(F \ b, Matrix(S) \ b) < 1.0e-9
+    @test A * b ≈ S * b
+end
+
+@testset "Int32 index type" begin
+    n, r = 25, 2
+    S = SparseMatrixCSC{Float64, Int32}(sparse(3.0 * I, n, n))
+    for i in 1:(n - 1)
+        S[i + 1, i] = -1.0
+    end
+    A = SparseWithDenseRowColMatrix(S, randn(n, r), randn(r, n) .* 0.1)
+    b = randn(n)
+    @test relerr(factorize(A) \ b, Matrix(A) \ b) < 1.0e-9
+end
+
+@testset "ldiv! is allocation-free (refine = 0 and refine = 1)" begin
+    for refine in (0, 1), sel in (true, false)
+        A = rand_sparsedense(50, 3; selector = sel, seed = 42)
+        F = factorize(A; refine = refine)
+        b = randn(50)
+        ldiv!(F, copy(b))                       # warm up
+        @test (@allocated ldiv!(F, b)) == 0
+    end
+end
+
+@testset "refactor! allocates only the tiny dense pivot" begin
+    A = rand_sparsedense(50, 3; seed = 43)
+    F = factorize(A)
+    nz = copy(sparsepart(A).nzval)
+    refactor!(F, nz)                            # warm up
+    a = @allocated refactor!(F, nz)
+    @test a < 2048                              # O(r) pivot vector + LU wrapper only
+end
+
+@testset "complex RHS in-place ldiv! over a real factorization" begin
+    A = rand_sparsedense(40, 2; seed = 7)        # real (Float64) factorization
+    M = Matrix(A)
+    F = factorize(A)
+    @test eltype(F) <: Real
+    bc = randn(ComplexF64, 40)
+    x = copy(bc); ldiv!(F, x)
+    @test relerr(x, M \ bc) < 1.0e-9
+    xa = copy(bc); ldiv!(F', xa)
+    @test relerr(xa, M' \ bc) < 1.0e-9
+    Bc = randn(ComplexF64, 40, 3)
+    Xc = copy(Bc); ldiv!(F, Xc)
+    @test relerr(Xc, M \ Bc) < 1.0e-9
+
+    # augmented (singular S) path with a complex RHS
+    S = sparse(3.0 * I, 30, 30)
+    for i in 1:29
+        S[i + 1, i] = -1.0
+    end
+    for j in 1:30
+        S[1, j] = 0.0
+    end
+    dropzeros!(S)
+    fr = zeros(1, 30); fr[1, 1] = 1.0; fr[1, 2] = 0.3
+    Aaug = SparseWithDenseRowColMatrix(S, fr; replace = true)
+    Fa = factorize(Aaug)
+    @test Fa isa SparseWithDenseRowColAugmented
+    bc2 = randn(ComplexF64, 30)
+    xa2 = copy(bc2); ldiv!(Fa, xa2)
+    @test relerr(xa2, Matrix(Aaug) \ bc2) < 1.0e-9
+end
+
+@testset "issuccess / size reflect state" begin
+    A = rand_sparsedense(30, 2)
+    F = factorize(A)
+    @test issuccess(F)
+    @test size(F, 1) == 30 == size(F, 2)
+
+    Asing = SparseWithDenseRowColMatrix(
+        spzeros(10, 10),
+        (z = zeros(1, 10); z[1, 1] = 1.0; z); replace = true
+    )
+    @test_throws SingularException factorize(Asing)
+end
+
+@testset "qr is intentionally unsupported" begin
+    A = rand_sparsedense(20, 2)
+    @test_throws ArgumentError qr(A)
+end
diff --git a/test/test_generic_eltypes.jl b/test/test_generic_eltypes.jl
new file mode 100644
index 0000000..73e9b29
--- /dev/null
+++ b/test/test_generic_eltypes.jl
@@ -0,0 +1,79 @@
+include("testutils.jl")
+using Test, LinearAlgebra, SparseArrays, ForwardDiff
+
+@testset "Float32 / BigFloat / Complex solve & refactor" begin
+    for (T, tol) in (
+            (Float32, 1.0f-3), (BigFloat, 1.0e-60), (ComplexF64, 1.0e-9),
+            (Complex{BigFloat}, 1.0e-60),
+        )
+        n, r = 30, 2
+        rng = MersenneTwister(1)
+        S = sparse(T(4) * I, n, n)
+        for i in 1:(n - 1)
+            S[i + 1, i] = T(-1)
+            S[i, i + 1] = T(-1)
+        end
+        U = randn(rng, T, n, r)
+        V = randn(rng, T, r, n) .* T(0.1)
+        A = SparseWithDenseRowColMatrix(S, U, V)
+        M = Matrix(A)
+        b = randn(rng, T, n)
+
+        F = factorize(A)
+        @test eltype(F \ b) == T
+        @test relerr(F \ b, M \ b) < tol
+
+        # refactor stays in-type
+        S2 = copy(S); S2.nzval .*= T(2)
+        A2 = SparseWithDenseRowColMatrix(S2, U, V)
+        refactor!(F, S2.nzval)
+        @test relerr(F \ b, Matrix(A2) \ b) < tol
+    end
+end
+
+@testset "ForwardDiff: autodiff through the sparse + low-rank solve" begin
+    n, r = 25, 2
+    function objective(p)
+        T = eltype(p)
+        S = sparse(T(4) * p[1] * I, n, n)
+        for i in 1:(n - 1)
+            S[i + 1, i] = T(-1)
+        end
+        U = T.(reshape(collect(1.0:(n * r)), n, r)) ./ (n * r) .* p[2]
+        V = T.(reshape(collect(1.0:(r * n)), r, n)) ./ (r * n)
+        b = T.(collect(1.0:n))
+        return sum(factorize(SparseWithDenseRowColMatrix(S, U, V)) \ b)
+    end
+
+    p0 = [1.0, 1.0]
+    g = ForwardDiff.gradient(objective, p0)
+    h = 1.0e-6
+    fd = [
+        (objective([p0[1] + h, p0[2]]) - objective([p0[1] - h, p0[2]])) / 2h,
+        (objective([p0[1], p0[2] + h]) - objective([p0[1], p0[2] - h])) / 2h,
+    ]
+    @test relerr(g, fd) < 1.0e-5
+    @test all(isfinite, g)
+end
+
+@testset "ForwardDiff through the augmented fallback" begin
+    n, r = 20, 1
+    function objective(p)
+        T = eltype(p)
+        S = sparse(T(2) * I, n, n)
+        for i in 1:(n - 1)
+            S[i + 1, i] = T(-1)
+        end
+        for j in 1:n
+            S[1, j] = zero(T)
+        end     # S singular → augmented path
+        Fl = zeros(T, r, n); Fl[1, 1] = p[1]
+        A = SparseWithDenseRowColMatrix(S, Fl; replace = true)
+        b = T.(collect(1.0:n))
+        return sum(factorize(A) \ b)
+    end
+    g = ForwardDiff.derivative(objective, 1.5)
+    h = 1.0e-6
+    fd = (objective(1.5 + h) - objective(1.5 - h)) / 2h
+    @test isapprox(g, fd; rtol = 1.0e-4)
+end
diff --git a/test/test_linearsolve.jl b/test/test_linearsolve.jl
new file mode 100644
index 0000000..5b879e8
--- /dev/null
+++ b/test/test_linearsolve.jl
@@ -0,0 +1,93 @@
+include("testutils.jl")
+using Test, LinearAlgebra, SparseArrays, LinearSolve
+
+n, r = 60, 3
+
+@testset "default solve picks the caching algorithm" begin
+    A = rand_sparsedense(n, r; seed = 21)
+    b = randn(n)
+    sol = solve(LinearProblem(A, b))
+    @test sol.u ≈ Matrix(A) \ b
+end
+
+@testset "cache reuse: symbolic cached across value/RHS changes" begin
+    A = rand_sparsedense(n, r; seed = 22)
+    b = randn(n)
+    cache = init(LinearProblem(A, b), SparseWithDenseRowColFactorization())
+    s1 = solve!(cache)
+    @test s1.u ≈ Matrix(A) \ b
+
+    # new values, SAME sparsity pattern → reuse the cached symbolic analysis (refactor!)
+    A2 = SparseWithDenseRowColMatrix(
+        SparseMatrixCSC(
+            size(A)..., copy(sparsepart(A).colptr), copy(sparsepart(A).rowval),
+            sparsepart(A).nzval .* 1.5 .+ 0.01
+        ),
+        lowrankfactors(A)[1], Matrix(fillpart(A)) .+ 0.2
+    )
+    cache.A = A2
+    s2 = solve!(cache)
+    @test s2.u ≈ Matrix(A2) \ b
+
+    # new RHS only → reuse the cached numeric factorization
+    b2 = randn(n)
+    cache.b = b2
+    s3 = solve!(cache)
+    @test s3.u ≈ Matrix(A2) \ b2
+end
+
+@testset "reuse_symbolic = false rebuilds each solve" begin
+    A = rand_sparsedense(n, r; seed = 23)
+    b = randn(n)
+    cache = init(LinearProblem(A, b), SparseWithDenseRowColFactorization(reuse_symbolic = false))
+    @test solve!(cache).u ≈ Matrix(A) \ b
+    A2 = SparseWithDenseRowColMatrix(
+        SparseMatrixCSC(
+            size(A)..., copy(sparsepart(A).colptr), copy(sparsepart(A).rowval),
+            sparsepart(A).nzval .* 2
+        ),
+        lowrankfactors(A)[1], Matrix(fillpart(A))
+    )
+    cache.A = A2
+    @test solve!(cache).u ≈ Matrix(A2) \ b
+end
+
+@testset "singular A returns Infeasible, does not throw (KLU parity)" begin
+    n = 20
+    S = sparse(2.0 * I, n, n)
+    S[5, 5] = 0.0                         # singular S, and no fill to fix it → singular A
+    A = SparseWithDenseRowColMatrix(S, zeros(n, 1), zeros(1, n))
+    b = randn(n)
+    sol = solve(LinearProblem(A, b))      # must NOT throw
+    @test sol.retcode == ReturnCode.Infeasible
+    # also in the cached reuse loop
+    cache = init(LinearProblem(A, b), SparseWithDenseRowColFactorization())
+    @test solve!(cache).retcode == ReturnCode.Infeasible
+end
+
+@testset "singular S routes through the augmented fallback under caching" begin
+    # S singular (zeroed top rows), A nonsingular via the dense fill rows
+    nn, rr = 50, 2
+    S = sparse(3.0 * I, nn, nn)
+    for i in 1:(nn - 1)
+        S[i + 1, i] = -1.0
+    end
+    for j in 1:nn, i in 1:rr
+        S[i, j] = 0.0
+    end
+    dropzeros!(S)
+    F = zeros(rr, nn); for i in 1:rr
+        F[i, i] = 1.0; F[i, i + rr] = 0.3
+    end
+    A = SparseWithDenseRowColMatrix(S, F; replace = true)
+    b = randn(nn)
+    cache = init(LinearProblem(A, b), SparseWithDenseRowColFactorization())
+    @test solve!(cache).u ≈ Matrix(A) \ b
+    # refactor with new interior values (same pattern), still singular S → still correct
+    A2 = SparseWithDenseRowColMatrix(
+        SparseMatrixCSC(size(A.S)..., copy(A.S.colptr), copy(A.S.rowval), A.S.nzval .* 1.3),
+        A.U, A.V
+    )
+    cache.A = A2
+    @test solve!(cache).u ≈ Matrix(A2) \ b
+end
diff --git a/test/test_matrix.jl b/test/test_matrix.jl
new file mode 100644
index 0000000..337dd56
--- /dev/null
+++ b/test/test_matrix.jl
@@ -0,0 +1,109 @@
+include("testutils.jl")
+using Test
+
+n, r = 30, 3
+
+@testset "general A = S + U*V construction" begin
+    A = rand_sparsedense(n, r)
+    @test A isa SparseWithDenseRowColMatrix
+    @test size(A) == (n, n)
+    @test eltype(A) == Float64
+    @test denserank(A) == r
+    @test sparsepart(A) === A.S
+    @test fillpart(A) === A.V
+    @test lowrankfactors(A) == (A.U, A.V)
+
+    # getindex equals the dense reference everywhere
+    M = Matrix(A.S) + Matrix(A.U) * Matrix(A.V)
+    @test Matrix(A) ≈ M
+    @test all(A[i, j] ≈ M[i, j] for i in 1:n, j in 1:n)
+end
+
+@testset "eltype promotion" begin
+    S = sparse(2I, 5, 5)              # SparseMatrixCSC{Int}
+    U = ones(Float64, 5, 1)
+    V = ones(Float64, 1, 5)
+    A = SparseWithDenseRowColMatrix(S, U, V)
+    @test eltype(A) == Float64
+    @test sparsepart(A) isa SparseMatrixCSC{Float64}
+    @test eltype(A * ones(5)) == Float64
+end
+
+@testset "size / shape validation" begin
+    S = sparse(1.0I, 5, 5)
+    @test_throws DimensionMismatch SparseWithDenseRowColMatrix(S, ones(4, 1), ones(1, 5))
+    @test_throws DimensionMismatch SparseWithDenseRowColMatrix(S, ones(5, 1), ones(1, 4))
+    @test_throws DimensionMismatch SparseWithDenseRowColMatrix(S, ones(5, 2), ones(1, 5))
+    @test_throws ArgumentError SparseWithDenseRowColMatrix(sparse(1.0I, 5, 6), ones(5, 1), ones(1, 6))
+end
+
+@testset "SelectorMatrix convenience (fill)" begin
+    S = sparse(2.0I, n, n)
+    @inbounds for i in 1:(n - 1)
+        S[i + 1, i] = -0.5
+    end
+    F = randn(r, n)
+
+    A_add = SparseWithDenseRowColMatrix(S, F; replace = false)
+    @test A_add.U isa SelectorMatrix
+    @test fillpart(A_add) == F
+    Madd = Matrix(S)
+    Madd[1:r, :] .+= F
+    @test Matrix(A_add) ≈ Madd
+
+    Arep = SparseWithDenseRowColMatrix(S, F; replace = true)
+    Mrep = Matrix(S)
+    Mrep[1:r, :] = F
+    @test Matrix(Arep) ≈ Mrep
+    @test Matrix(Arep)[1:r, :] ≈ F
+    @test Matrix(Arep)[(r + 1):n, :] ≈ Matrix(S)[(r + 1):n, :]
+end
+
+@testset "exclusive_sparsepart" begin
+    Asel = rand_sparsedense(n, r; selector = true)
+    @test exclusive_sparsepart(Asel) == Matrix(sparsepart(Asel))[(r + 1):n, :]
+    Agen = rand_sparsedense(n, r; selector = false)
+    @test_throws ArgumentError exclusive_sparsepart(Agen)
+end
+
+@testset "getindex / setindex! round-trips the assembled value" begin
+    A = rand_sparsedense(n, r; selector = true)
+    # covered row (low-rank correction present): assembled value must read back
+    A[1, 1] = 100.0
+    @test A[1, 1] ≈ 100.0
+    @test sparsepart(A)[1, 1] ≈ 100.0 - fillpart(A)[1, 1]
+    # uncovered row: value goes straight into S
+    A[5, 5] = 7.0
+    @test A[5, 5] ≈ 7.0
+    @test sparsepart(A)[5, 5] ≈ 7.0
+    # dense U as well
+    Ad = rand_sparsedense(n, r; selector = false)
+    Ad[2, 4] = -3.0
+    @test Ad[2, 4] ≈ -3.0
+end
+
+@testset "similar / copy / fill! / Matrix" begin
+    A = rand_sparsedense(n, r)
+    @test similar(A) isa SparseWithDenseRowColMatrix{Float64}
+    @test similar(A, Float32) isa SparseWithDenseRowColMatrix{Float32}
+    @test similar(A, Float32, n, n) isa Matrix{Float32}   # dims form → plain Array (FABM parity)
+
+    B = copy(A)
+    @test B isa SparseWithDenseRowColMatrix
+    @test Matrix(B) ≈ Matrix(A)
+    # mutating the copy does not touch the original
+    B.V[1, 1] += 1
+    @test !(Matrix(B) ≈ Matrix(A))
+
+    C = copy(A)
+    fill!(C, 0.0)
+    @test iszero(Matrix(C))
+    @test nnz(sparsepart(C)) == nnz(sparsepart(A))   # pattern preserved
+end
+
+@testset "SelectorMatrix basics" begin
+    Sel = SelectorMatrix{Float64}(6, 2)
+    @test size(Sel) == (6, 2)
+    @test Matrix(Sel) == [Matrix(1.0I, 2, 2); zeros(4, 2)]
+    @test_throws ArgumentError SelectorMatrix{Float64}(2, 5)
+end
diff --git a/test/test_matvec.jl b/test/test_matvec.jl
new file mode 100644
index 0000000..41ea800
--- /dev/null
+++ b/test/test_matvec.jl
@@ -0,0 +1,57 @@
+include("testutils.jl")
+using Test, LinearAlgebra
+
+n, r = 40, 3
+
+@testset "A*x and mul! vs dense (dense U)" begin
+    A = rand_sparsedense(n, r; selector = false)
+    M = Matrix(A)
+    x = randn(n)
+    @test A * x ≈ M * x
+
+    y = randn(n)
+    α, β = 1.7, -0.4
+    yref = α .* (M * x) .+ β .* y
+    mul!(y, A, x, α, β)
+    @test y ≈ yref
+end
+
+@testset "A*x and mul! vs dense (selector U)" begin
+    A = rand_sparsedense(n, r; selector = true)
+    M = Matrix(A)
+    x = randn(n)
+    @test A * x ≈ M * x
+    y = zeros(n)
+    mul!(y, A, x)
+    @test y ≈ M * x
+end
+
+@testset "matrix RHS A*X" begin
+    A = rand_sparsedense(n, r)
+    M = Matrix(A)
+    X = randn(n, 5)
+    @test A * X ≈ M * X
+    Y = randn(n, 5)
+    α, β = 0.5, 2.0
+    Yref = α .* (M * X) .+ β .* Y
+    mul!(Y, A, X, α, β)
+    @test Y ≈ Yref
+end
+
+@testset "mul! is allocation-free (vector, selector & dense)" begin
+    for sel in (true, false)
+        A = rand_sparsedense(n, r; selector = sel)
+        x = randn(n)
+        y = zeros(n)
+        mul!(y, A, x, 1.0, 0.0)            # warm up
+        @test (@allocated mul!(y, A, x, 1.0, 0.0)) == 0
+    end
+end
+
+@testset "SelectorMatrix product" begin
+    U = SelectorMatrix{Float64}(7, 3)
+    w = [1.0, 2.0, 3.0]
+    @test U * w == [1.0, 2.0, 3.0, 0, 0, 0, 0]
+    W = reshape(1.0:6.0, 3, 2)
+    @test U * W == [Matrix(W); zeros(4, 2)]
+end
diff --git a/test/test_refactor.jl b/test/test_refactor.jl
new file mode 100644
index 0000000..ea390fc
--- /dev/null
+++ b/test/test_refactor.jl
@@ -0,0 +1,161 @@
+include("testutils.jl")
+using Test, LinearAlgebra, SparseArrays
+
+n, r = 60, 3
+
+@testset "refactor! with raw nzval (values only, same pattern)" begin
+    A = rand_sparsedense(n, r; seed = 11)
+    F = factorize(A)
+    b = randn(n)
+
+    S2 = copy(sparsepart(A))
+    S2.nzval .*= 1.6
+    S2.nzval .+= 0.01
+    A2 = SparseWithDenseRowColMatrix(S2, lowrankfactors(A)...)
+    refactor!(F, S2.nzval)
+    @test relerr(F \ b, Matrix(A2) \ b) < 1.0e-9
+end
+
+@testset "refactor! with SparseMatrixCSC (pattern checked) + new fill" begin
+    A = rand_sparsedense(n, r; seed = 12, selector = true)
+    F = factorize(A)
+    b = randn(n)
+
+    S2 = copy(sparsepart(A))
+    S2.nzval .= S2.nzval .* 2 .- 0.05
+    newfill = randn(r, n)
+    refactor!(F, S2; fill = newfill)
+    A2 = SparseWithDenseRowColMatrix(S2, SelectorMatrix{Float64}(n, r), newfill)
+    @test relerr(F \ b, Matrix(A2) \ b) < 1.0e-9
+end
+
+@testset "refactor! with SparseWithDenseRowColMatrix (dense U updated)" begin
+    A = rand_sparsedense(n, r; seed = 13, selector = false)
+    F = factorize(A)
+    b = randn(n)
+
+    S2 = copy(sparsepart(A)); S2.nzval .*= 1.3
+    U2 = Matrix(A.U) .+ 0.2 .* randn(n, r)
+    V2 = Matrix(A.V) .+ 0.2 .* randn(r, n)
+    A2 = SparseWithDenseRowColMatrix(S2, U2, V2)
+    refactor!(F, A2)
+    @test relerr(F \ b, Matrix(A2) \ b) < 1.0e-9
+end
+
+@testset "Newton-style sequence reusing one factorization" begin
+    A = rand_sparsedense(n, r; seed = 14)
+    F = factorize(A)
+    nzpattern = copy(sparsepart(A).nzval)
+    for k in 1:6
+        newnz = nzpattern .* (1 + 0.1k) .+ 0.001k
+        S2 = SparseMatrixCSC(size(A)..., copy(sparsepart(A).colptr), copy(sparsepart(A).rowval), newnz)
+        A2 = SparseWithDenseRowColMatrix(S2, lowrankfactors(A)...)
+        refactor!(F, newnz)
+        b = randn(n)
+        @test relerr(F \ b, Matrix(A2) \ b) < 1.0e-9
+    end
+end
+
+@testset "update_lowrank!: fixed S, varying low-rank correction" begin
+    A = rand_sparsedense(n, r; seed = 41, selector = false)
+    F = factorize(A)
+    S = sparsepart(A); U0 = lowrankfactors(A)[1]
+    # vary only V (S and U fixed) — the common Sherman–Morrison–Woodbury reuse case
+    for _ in 1:4
+        V2 = randn(r, n)
+        update_lowrank!(F; V = V2)
+        A2 = SparseWithDenseRowColMatrix(copy(S), U0, V2)
+        b = randn(n)
+        @test relerr(F \ b, Matrix(A2) \ b) < 1.0e-9
+        @test relerr(F \ b, factorize(A2) \ b) < 1.0e-9     # matches a fresh full factorization
+    end
+    # vary U and V together (still reusing S's factorization)
+    U2 = randn(n, r); V3 = randn(r, n)
+    update_lowrank!(F; U = U2, V = V3)
+    A3 = SparseWithDenseRowColMatrix(copy(S), U2, V3)
+    b = randn(n)
+    @test relerr(F \ b, Matrix(A3) \ b) < 1.0e-9
+end
+
+@testset "update_lowrank! with a SelectorMatrix U" begin
+    A = rand_sparsedense(n, r; seed = 42, selector = true)   # selector U (dense top rows)
+    F = factorize(A)
+    V2 = randn(r, n)
+    update_lowrank!(F; V = V2)                                # update the dense rows only
+    A2 = SparseWithDenseRowColMatrix(copy(sparsepart(A)), SelectorMatrix{Float64}(n, r), V2)
+    b = randn(n)
+    @test relerr(F \ b, Matrix(A2) \ b) < 1.0e-9
+    @test_throws ArgumentError update_lowrank!(F; U = randn(n, r))   # can't swap selector → dense
+end
+
+@testset "refactor! rejects an incompatible U change" begin
+    A = rand_sparsedense(40, 2; seed = 9, selector = true)   # selector U
+    F = factorize(A)
+    Adense = SparseWithDenseRowColMatrix(copy(sparsepart(A)), randn(40, 2), Matrix(fillpart(A)))
+    @test_throws ArgumentError refactor!(F, Adense)
+end
+
+@testset "factorization is independent of later input mutation" begin
+    A = rand_sparsedense(50, 2; seed = 10, selector = false)
+    M = Matrix(A)
+    b = randn(50)
+    F = factorize(A)
+    x1 = F \ b
+    A.V .+= 10.0                 # mutate the inputs after factorize …
+    A.U .+= 5.0
+    A.S.nzval .*= 2.0
+    x2 = F \ b                   # … F owns copies, so its answer is unchanged
+    @test x1 ≈ x2
+    @test relerr(x1, M \ b) < 1.0e-9
+end
+
+@testset "symbolic cache reuse via lu / lu! / ldiv!" begin
+    # Analyze once (cache symbolic), reuse for many solves and for new values (same pattern).
+    A = rand_sparsedense(n, r; seed = 31)
+    F = lu(A)                                  # analyze (cache symbolic) + numeric factor
+    for b in (randn(n), randn(n), randn(n))    # reuse the cached factorization for many RHS
+        @test relerr(ldiv!(F, copy(b)), Matrix(A) \ b) < 1.0e-9
+    end
+    A2 = rand_sparsedense(n, r; seed = 31)     # same pattern, new values
+    A2.S.nzval .= A.S.nzval .* 1.5 .+ 0.01
+    A2.V .= A.V .+ 0.2
+    lu!(F, A2)                                 # == refactor!(F, A2); reuses cached symbolic
+    b = randn(n)
+    @test relerr(F \ b, Matrix(A2) \ b) < 1.0e-9
+    @test relerr(F \ b, lu(A2) \ b) < 1.0e-9     # same answer as a fresh full factorization
+end
+
+@testset "mismatched pattern is rejected" begin
+    A = rand_sparsedense(n, r; seed = 15)
+    F = factorize(A)
+    Sbad = copy(sparsepart(A))
+    Sbad[n, 1] = 123.0                     # introduces a new structural entry
+    @test_throws ArgumentError refactor!(F, Sbad)
+    @test_throws DimensionMismatch refactor!(F, randn(length(sparsepart(A).nzval) + 1))
+end
+
+@testset "augmented refactor!" begin
+    rng = MersenneTwister(20)
+    nn, rr = 40, 2
+    S = sparse(3.0I, nn, nn)
+    for i in 1:(nn - 1)
+        S[i + 1, i] = -1.0
+    end
+    for j in 1:nn, i in 1:rr
+        S[i, j] = 0.0
+    end
+    dropzeros!(S)
+    Fl = zeros(rr, nn); for i in 1:rr
+        Fl[i, i] = 1.0
+    end
+    A = SparseWithDenseRowColMatrix(S, Fl; replace = true)
+    F = factorize(A)
+    @test F isa SparseWithDenseRowColAugmented
+    b = randn(nn)
+
+    # change interior values only (pattern of augmented system unchanged)
+    S2 = copy(sparsepart(A)); S2.nzval .*= 1.4
+    A2 = SparseWithDenseRowColMatrix(S2, A.U, A.V)
+    refactor!(F, A2)
+    @test relerr(F \ b, Matrix(A2) \ b) < 1.0e-9
+end
diff --git a/test/test_woodbury.jl b/test/test_woodbury.jl
new file mode 100644
index 0000000..bad26b0
--- /dev/null
+++ b/test/test_woodbury.jl
@@ -0,0 +1,80 @@
+include("testutils.jl")
+using Test, LinearAlgebra
+
+@testset "factorize selects Woodbury for nonsingular S" begin
+    A = rand_sparsedense(60, 4)
+    F = factorize(A)
+    @test F isa SparseWithDenseRowColWoodbury
+    @test issuccess(F)
+    @test size(F) == (60, 60)
+end
+
+@testset "solve vs dense (dense & selector U, several sizes/ranks)" begin
+    for (n, r, sel) in ((50, 1, false), (80, 4, false), (60, 3, true), (120, 6, true))
+        A = rand_sparsedense(n, r; selector = sel, seed = n + r)
+        M = Matrix(A)
+        b = randn(n)
+        F = factorize(A)
+        @test relerr(F \ b, M \ b) < 1.0e-9
+        @test relerr(A \ b, M \ b) < 1.0e-9              # \ on the matrix directly
+        # multi-RHS
+        B = randn(n, 4)
+        @test relerr(F \ B, M \ B) < 1.0e-9
+        # ldiv! in place
+        x = copy(b)
+        ldiv!(F, x)
+        @test relerr(x, M \ b) < 1.0e-9
+    end
+end
+
+@testset "adjoint & transpose solve" begin
+    A = rand_sparsedense(70, 3; seed = 99)
+    M = Matrix(A)
+    b = randn(70)
+    F = factorize(A)
+    @test relerr(F' \ b, M' \ b) < 1.0e-9
+    @test relerr(transpose(F) \ b, transpose(M) \ b) < 1.0e-9
+    B = randn(70, 3)
+    @test relerr(F' \ B, M' \ B) < 1.0e-9
+end
+
+@testset "complex adjoint ≠ transpose" begin
+    n, r = 40, 2
+    rng = MersenneTwister(3)
+    S = sparse((3.0 + 0im) * I, n, n)
+    for i in 1:(n - 1)
+        S[i + 1, i] = -1.0 + 0.2im
+    end
+    U = randn(rng, ComplexF64, n, r)
+    V = randn(rng, ComplexF64, r, n) .* 0.1
+    A = SparseWithDenseRowColMatrix(S, U, V)
+    M = Matrix(A)
+    b = randn(ComplexF64, n)
+    F = factorize(A)
+    @test relerr(F \ b, M \ b) < 1.0e-9
+    @test relerr(F' \ b, M' \ b) < 1.0e-9
+    @test relerr(transpose(F) \ b, transpose(M) \ b) < 1.0e-9
+end
+
+@testset "iterative refinement helps when S is far worse conditioned than A" begin
+    n, r = 60, 1
+    S = sparse(1.0 * I, n, n)
+    S[1, 1] = 1.0e-9                      # S nearly singular, κ(S) ≈ 1e9
+    for i in 1:(n - 1)
+        S[i + 1, i] = -0.1
+    end
+    U = SelectorMatrix{Float64}(n, r)   # correction lives in row 1
+    V = zeros(1, n)
+    V[1, 1] = 1.0                       # makes A's (1,1) ≈ 1 → A well conditioned
+    A = SparseWithDenseRowColMatrix(S, U, V)
+    M = Matrix(A)
+    b = randn(n)
+    xref = M \ b
+
+    F0 = factorize(A; strategy = :woodbury, refine = 0, auto_fallback = false)
+    F2 = factorize(A; strategy = :woodbury, refine = 3, auto_fallback = false)
+    e0 = relerr(F0 \ b, xref)
+    e2 = relerr(F2 \ b, xref)
+    @test e2 ≤ e0                       # refinement does not hurt
+    @test e2 < 1.0e-6                     # and recovers accuracy
+end
diff --git a/test/testutils.jl b/test/testutils.jl
new file mode 100644
index 0000000..1eb7e1a
--- /dev/null
+++ b/test/testutils.jl
@@ -0,0 +1,25 @@
+using SparseWithDenseRowColMatrices, SparseArrays, LinearAlgebra, Random
+
+# Dense oracle for an SparseWithDenseRowColMatrix.
+densify(A::SparseWithDenseRowColMatrix) = Matrix(A)
+
+relerr(x, y) = norm(x .- y) / norm(y)
+
+# A diagonally-dominant (well-conditioned) sparse bulk with some off-band sparsity, plus an
+# r×n low-rank correction. `selector=true` uses the boundary-condition SelectorMatrix for U.
+function rand_sparsedense(n, r; seed = 1, T = Float64, selector = false, scale = 0.1)
+    rng = MersenneTwister(seed)
+    S = sparse(T(4) * I, n, n)
+    @inbounds for i in 1:(n - 1)
+        S[i + 1, i] = T(-1)
+        S[i, i + 1] = T(-1)
+    end
+    for _ in 1:(2n)            # a few off-band entries → genuinely sparse, not banded
+        i = rand(rng, 1:n)
+        j = rand(rng, 1:n)
+        S[i, j] += T(rand(rng) * scale)
+    end
+    U = selector ? SelectorMatrix{T}(n, r) : T.(randn(rng, n, r))
+    V = T.(randn(rng, r, n)) .* T(scale)
+    return SparseWithDenseRowColMatrix(S, U, V)
+end