diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..dcb2d27 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,21 @@ +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates +version: 2 +enable-beta-ecosystems: true # Julia ecosystem +updates: + - package-ecosystem: "github-actions" + directory: "/" # Location of package manifests + schedule: + interval: "weekly" + ignore: + - dependency-name: "crate-ci/typos" + update-types: ["version-update:semver-patch", "version-update:semver-minor"] + - package-ecosystem: "julia" + directories: + - "/" + - "/docs" + schedule: + interval: "daily" + groups: + all-julia-packages: + patterns: + - "*" diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml new file mode 100644 index 0000000..4ec058a --- /dev/null +++ b/.github/workflows/CI.yml @@ -0,0 +1,38 @@ +name: CI +on: + push: + branches: + - main + tags: ['*'] + pull_request: +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} +jobs: + test: + name: Julia ${{ matrix.version }} - ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + version: + - '1.10' + - '1.11' + - '1' + os: + - ubuntu-latest + - macos-latest + - windows-latest + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@v2 + with: + version: ${{ matrix.version }} + - uses: julia-actions/cache@v2 + # PureKLU is not yet registered in General; develop it from the SciML repo so the + # dependency resolves on every Julia version (`[sources]` in Project.toml covers 1.11+, + # but this keeps 1.10 green too). Remove once PureKLU is registered. + - name: Develop unregistered PureKLU dependency + run: julia --color=yes --project=. -e 'using Pkg; Pkg.develop(url="https://github.com/SciML/PureKLU.jl")' + - uses: julia-actions/julia-buildpkg@v1 + - uses: julia-actions/julia-runtest@v1 diff --git a/.github/workflows/FormatCheck.yml b/.github/workflows/FormatCheck.yml new file mode 100644 index 0000000..d5dc283 --- /dev/null +++ b/.github/workflows/FormatCheck.yml @@ -0,0 +1,21 @@ +name: format-check + +on: + push: + branches: + - 'main' + - 'release-' + tags: '*' + pull_request: + +jobs: + runic: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - uses: julia-actions/setup-julia@v3 + with: + version: '1' + - uses: fredrikekre/runic-action@v1 + with: + version: '1' diff --git a/.github/workflows/SpellCheck.yml b/.github/workflows/SpellCheck.yml new file mode 100644 index 0000000..a8e5983 --- /dev/null +++ b/.github/workflows/SpellCheck.yml @@ -0,0 +1,13 @@ +name: Spell Check + +on: [pull_request] + +jobs: + typos-check: + name: Spell Check with Typos + runs-on: ubuntu-latest + steps: + - name: Checkout Actions Repository + uses: actions/checkout@v6 + - name: Check spelling + uses: crate-ci/typos@v1.18.0 diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml new file mode 100644 index 0000000..f49313b --- /dev/null +++ b/.github/workflows/TagBot.yml @@ -0,0 +1,15 @@ +name: TagBot +on: + issue_comment: + types: + - created + workflow_dispatch: +jobs: + TagBot: + if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot' + runs-on: ubuntu-latest + steps: + - uses: JuliaRegistries/TagBot@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + ssh: ${{ secrets.DOCUMENTER_KEY }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a617856 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +*.jl.cov +*.jl.*.cov +*.jl.mem +/Manifest.toml +/test/Manifest.toml +/docs/build/ diff --git a/.typos.toml b/.typos.toml new file mode 100644 index 0000000..0e35281 --- /dev/null +++ b/.typos.toml @@ -0,0 +1,7 @@ +# Identifiers carried over verbatim from the SuiteSparse C sources during the +# port. They are variable names, not prose, so the spell checker should leave +# them alone. +[default.extend-words] +pn = "pn" # pointer-name index (AMD) +lik = "lik" # L[i,k] entry (solve kernels) +hashi = "hashi" # hash-bucket index (AMD supervariable hashing) diff --git a/LICENSE b/LICENSE index 545d94e..895883a 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2026 SciML Open Source Scientific Machine Learning +Copyright (c) 2026 Chris Rackauckas and contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/Project.toml b/Project.toml new file mode 100644 index 0000000..7fb6ee2 --- /dev/null +++ b/Project.toml @@ -0,0 +1,48 @@ +name = "SparseWithDenseRowColMatrices" +uuid = "80e09e16-2b43-4bb0-96aa-b2ce656e0cca" +authors = ["Chris Rackauckas and contributors"] +version = "0.1.0" + +[deps] +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" +PureKLU = "0c0d3e7f-3a8b-4f7e-b6f1-9a4d2e7c1f01" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + +[weakdeps] +LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae" + +[extensions] +SparseWithDenseRowColMatricesLinearSolveExt = "LinearSolve" + +# PureKLU is not yet registered in General; resolve it from the SciML repo. `[sources]` +# is honored on Julia 1.11+; on 1.10 (and for registration) PureKLU must be added/registered +# separately — see the CI workflow and README. +[sources] +PureKLU = {url = "https://github.com/SciML/PureKLU.jl"} + +[compat] +Aqua = "0.8" +ForwardDiff = "0.10, 1" +LinearAlgebra = "1" +LinearSolve = "3" +PrecompileTools = "1" +PureKLU = "0.1" +Random = "1" +SafeTestsets = "0.1" +SparseArrays = "1" +Test = "1" +julia = "1.10" + +[extras] +Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[targets] +test = ["Aqua", "ForwardDiff", "LinearAlgebra", "LinearSolve", "Random", "SafeTestsets", "SparseArrays", "Test"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..dfdca25 --- /dev/null +++ b/README.md @@ -0,0 +1,268 @@ +# SparseWithDenseRowColMatrices.jl + +[![SciML Code Style](https://img.shields.io/static/v1?label=code%20style&message=SciML&color=9558b2&labelColor=389826)](https://github.com/SciML/SciMLStyle) + +A fast implementation of **"almost sparse" matrices** — a sparse bulk plus a low-rank dense +correction — in Julia. It is the sparse-matrix analogue of +[FastAlmostBandedMatrices.jl](https://github.com/SciML/FastAlmostBandedMatrices.jl): where +that package handles a *banded* bulk with a few dense boundary rows (as in spectral +collocation), this one handles a *general sparse* bulk with a low-rank correction (as in +finite-element / finite-volume boundary-value problems, DAE systems, and KKT/bordered +systems), built for use in [BoundaryValueDiffEq.jl](https://github.com/SciML/BoundaryValueDiffEq.jl). + +The sparse part is factored with [PureKLU.jl](https://github.com/SciML/PureKLU.jl) — a +pure-Julia port of SuiteSparse's KLU (BTF + AMD + Gilbert–Peierls LU) — and the low-rank +part is handled by the Sherman–Morrison–Woodbury identity, so a solve costs **one sparse LU +solve plus a small `r × r` dense correction**. Because PureKLU is generic over +`Union{Real, Complex}`, the whole pipeline works for `Float64`/`ComplexF64`, `BigFloat`, and +`ForwardDiff.Dual` — i.e. you can autodiff straight through the solve. + +## The structure + +An `SparseWithDenseRowColMatrix` represents + +``` +A = S + U * V +``` + +* `S :: SparseMatrixCSC` — the `n × n` sparse bulk, +* `U` — `n × r` dense (or a `SelectorMatrix` `[I_r; 0]` for the boundary-row case), +* `V` — `r × n` dense, the low-rank "fill". + +with `r ≪ n` the *almost-sparse rank*. + +**Arrowhead / bordered matrices** are the canonical case: an arrowhead matrix (a sparse — +often diagonal — bulk plus a dense last row *and* last column) is exactly `S + U*V` with +`r = 2` (the border is rank-2), and a block-bordered system (sparse interior with a dense +border of width `k`, as in KKT / saddle-point / domain-decomposition problems) is `r = 2k` +over a block-diagonal bulk. Solving them this way avoids the fill-in a naive dense or +unordered-sparse LU suffers on the dense border. + +## High-level example + +```julia +using SparseWithDenseRowColMatrices, SparseArrays, LinearAlgebra + +n, r = 1000, 4 + +# A diagonally-dominant sparse bulk +S = sparse(4.0I, n, n) +for i in 1:n-1; S[i+1, i] = -1.0; S[i, i+1] = -1.0; end + +# A general low-rank correction A = S + U*V … +U = randn(n, r); V = randn(r, n) +A = SparseWithDenseRowColMatrix(S, U, V) + +b = randn(n) +x = A \ b # factor once, solve — Woodbury over a PureKLU LU of S +F = factorize(A) # keep the factorization for repeated solves +x = F \ b + +# … or the boundary-condition form: `fill` is r dense rows placed at the top +fill = randn(r, n) +A2 = SparseWithDenseRowColMatrix(S, fill) # adds `fill` to the top r rows +A3 = SparseWithDenseRowColMatrix(S, fill; replace=true) # top r rows *become* `fill` +``` + +### Newton / time-stepping: refactor in place + +When the sparsity pattern is fixed and only the numeric values change (a Newton sweep on a +BVP, say), reuse the symbolic analysis and preallocated workspace: + +```julia +F = factorize(A) +for step in 1:maxiters + newvals = jacobian_values!(...) # same pattern as S, new nzval + refactor!(F, newvals) # reuses BTF/AMD analysis; no re-analysis + Δ = F \ residual + ... +end +``` + +The `refactor!` + `\` loop is allocation-free apart from the tiny `O(r)` dense pivot vector. + +## Accessors (FastAlmostBandedMatrices parity) + +| `AlmostBandedMatrix` | `SparseWithDenseRowColMatrix` | +|----------------------|--------------------------------| +| `bandpart` | `sparsepart` | +| `fillpart` | `fillpart` (the `r × n` `V`) | +| `almostbandedrank` | `denserank` | +| `exclusive_bandpart` | `exclusive_sparsepart` | +| — | `lowrankfactors` → `(U, V)` | + +## Factorization strategy + +`factorize(A; strategy=:auto, refine=1, auto_fallback=true)`: + +* **`:woodbury`** (the fast path) — one PureKLU LU of `S`, precomputed `Z = S⁻¹U`, and a + dense `r × r` correction `C = I + V Z`. Requires `S` and `C` nonsingular. The Woodbury + forward error scales with `κ(S)·κ(C)`, so `refine` steps of (allocation-free) iterative + refinement are applied (default `1`). +* **`:augmented`** (the robust path) — one sparse LU of the bordered system + `[S U; V -I]` of size `n + r`. Requires only `A` (not `S`) to be nonsingular, so it is the + correctness path when `S` is singular — common in BVPs where the interior stencil alone + does not pin a boundary degree of freedom. +* **`:auto`** (default) — Woodbury, automatically falling back to the augmented system if + `S` is singular or `C` is ill-conditioned. + +## Benchmarks + +A realistic BVP/PDE-style system: a banded sparse interior (`bw = 2`) plus `r = 4` dense +boundary rows. Factoring `S` with PureKLU and applying the rank-`r` Woodbury correction +avoids both the `O(n³)` dense cost and the fill-in that the dense rows inflict on a +general sparse `LU`. The cached solve and the symbolic-reuse `refactor!` (the Newton path) +are sub-millisecond. (`SWDRC` below = this package.) + +``` +n = 5000, r = 4 (nnz(S) = 24994, solve rel err vs dense ≈ 1e-12) + SWDRC factorize + solve : 4.85 ms + dense lu + solve : 807.67 ms (166× slower) + SuiteSparse lu on sparse(A) : 24.71 ms (5× slower; dense rows fill in) + SWDRC ldiv! (cached) : 0.27 ms + SWDRC refactor! (Newton) : 0.68 ms (7× faster than re-factorizing) + +n = 10000, r = 4 + SWDRC factorize + solve : 9.45 ms + dense lu + solve : 3160.7 ms (334× slower) + SuiteSparse lu on sparse(A) : 79.69 ms (8× slower) + SWDRC refactor! (Newton) : 1.36 ms +``` + +(`julia -O2`, single thread; `@elapsed` best-of-30. The advantage holds whenever `S` has +good sparse structure — banded, local, or low-fill; for a globally-scattered `S` with no +good ordering, any sparse `LU` degrades and dense `lu` may win.) + +## What is fast + +* [x] Matrix–vector / matrix–matrix multiply (`mul!` vector path is allocation-free) +* [x] Factorization and linear solve (`factorize`/`lu`, `\`, `ldiv!`) +* [x] In-place refactorization with a fixed sparsity pattern (`refactor!`) +* [x] Adjoint / transpose solves +* [x] Generic element types (`BigFloat`, `ForwardDiff.Dual`, complex) + +`qr` is intentionally not provided — a general sparse `QR` causes far more fill than the +LU-based KLU path; use `factorize`/`lu`. + +## When is this the right tool? + +This package only beats plain sparse LU when the matrix is genuinely **sparse plus a few +dense rows/columns** — i.e. there exist `r ≪ n` rows/cols far denser than the rest, so +peeling them into `U·V` leaves a sparse, low-fill bulk `S`. On a *uniformly* sparse matrix +the low-rank part is empty (`r = 0`) and the method degenerates to plain `PureKLU.klu` with +no benefit. Use [`recommend_lowrank_peel`](@ref) — a cheap, pattern-only (`O(nnz)`, no +factorization) check — to decide: + +```julia +rec = recommend_lowrank_peel(A) # A::SparseMatrixCSC → a PeelRecommendation struct +rec.recommended # false → just use plain sparse LU (PureKLU.klu) with symbolic reuse + # true → wrap the rec.rank dense rows/cols as U·V and use this package +rec # `show` renders the human-readable reason (built lazily, not on construction) +``` + +`recommend_lowrank_peel` returns a descriptive [`PeelRecommendation`](@ref) struct — only +`Bool`/`Int`/`Float64`/`Symbol` fields, so it costs nothing to construct; the explanation +string is formatted only when you display it. + +### Cache the symbolic factorization, reuse it for many solves + +For a fixed sparsity pattern with changing values (Newton / time-stepping), the win is +reusing the symbolic analysis (BTF + AMD ordering) instead of re-analyzing every step. The +factorization object caches it; `lu`/`factorize` analyzes once, `ldiv!`/`\` solves any +number of right-hand sides, and `lu!`/`refactor!` updates the values **reusing the cached +symbolic analysis** (no re-analysis): + +```julia +F = lu(A) # analyze (cache symbolic) + numeric factor +x1 = F \ b1; x2 = F \ b2 # reuse for multiple solves +for step in 1:maxiters + lu!(F, A_new) # new values, same pattern → reuses cached symbolic (≈7× faster than re-`lu`) + Δ = F \ residual +end +``` + +For a plain-sparse matrix (the `recommended == false` case) the same idea is `PureKLU.klu` +once + `klu!`/`solve!` per step. + +### Fixed sparse base, changing low-rank correction — the Woodbury win + +The structure pays off most in the classic Sherman–Morrison–Woodbury setting: a **fixed** +sparse base `S`, factored once, solved repeatedly under a **changing low-rank correction** +`U·V` — varying boundary conditions / coupling, adding or removing a constraint, sensitivity +or parameter sweeps, KKT / saddle-point systems with changing borders. Here the dense rows +or columns are exactly what make a direct sparse `LU` of the assembled `S + U·V` fill in +catastrophically; Woodbury sidesteps them by keeping only `S` factored. + +[`update_lowrank!`](@ref) updates the low-rank factors while **reusing `S`'s cached +factorization** (no re-factorization of `S`, and `Z = S⁻¹U` is recomputed only if `U` +changes): + +```julia +F = factorize(A) # factor S once +for step in 1:maxiters + update_lowrank!(F; V = V_new) # fixed S, new dense rows/coupling → reuse S's factorization + x = F \ b +end +``` + +Measured (`n = 20000`, `r = 8` dense rows, 25 solves, `S` fixed): + +``` + Woodbury reuse via update_lowrank! : ~2.7 ms / solve + refactor! (re-factors S each step) : ~7.3 ms / solve (wastes the S re-factorization) + re-KLU the assembled S + U·V : ~1740 ms / solve (~650× slower — dense rows fill) +``` + +Use `update_lowrank!` when only the low-rank part changes, `refactor!` when `S`'s values +change (Newton), and `factorize` to (re)analyze a new pattern. + +## LinearSolve.jl integration + +Loading [LinearSolve.jl](https://github.com/SciML/LinearSolve.jl) activates a package +extension that plugs `SparseWithDenseRowColMatrix` into LinearSolve's caching interface +exactly like its `KLUFactorization`: the symbolic analysis is cached in the `LinearCache`, +repeated `solve!`s reuse the factorization, and assigning the matrix new values of the +**same sparsity pattern** triggers a numeric refactorization that reuses the cached symbolic +analysis. It is also the default algorithm for this matrix type. + +```julia +using SparseWithDenseRowColMatrices, LinearSolve + +prob = LinearProblem(A, b) # A::SparseWithDenseRowColMatrix +cache = init(prob) # default alg = SparseWithDenseRowColFactorization(); analyzes once +sol1 = solve!(cache) + +cache.A = A_new # same pattern, new values → reuses cached symbolic (refactor!) +sol2 = solve!(cache) +cache.b = b_new # new RHS only → reuses the cached numeric factorization +sol3 = solve!(cache) +``` + +Pass `SparseWithDenseRowColFactorization(; reuse_symbolic, check_pattern, strategy, refine)` +explicitly to `init`/`solve` to control the behavior (mirrors `KLUFactorization(; …)`). If +`S` is singular it routes through the augmented fallback transparently, and `reuse_symbolic` +/ `check_pattern` behave as for KLU. A singular `A` returns `ReturnCode.Infeasible` (it does +not throw), again matching KLU. One caveat: once a singular `S` forces the augmented fallback, +the cache keeps reusing it even if later `S` values are nonsingular — `init` a fresh cache to +get back the faster Woodbury path. + +## Public API + +``` +SparseWithDenseRowColMatrix SelectorMatrix +sparsepart fillpart lowrankfactors exclusive_sparsepart denserank +factorize lu \\ ldiv! refactor! lu! update_lowrank! +SparseWithDenseRowColWoodbury SparseWithDenseRowColAugmented +recommend_lowrank_peel PeelRecommendation +SparseWithDenseRowColFactorization # LinearSolve.jl algorithm (extension) +``` + +## Installation + +PureKLU.jl is not yet in the General registry; install it from the SciML repository: + +```julia +using Pkg +Pkg.add(url="https://github.com/SciML/PureKLU.jl") +Pkg.add(url="https://github.com/SciML/SparseWithDenseRowColMatrices.jl") +``` diff --git a/ext/SparseWithDenseRowColMatricesLinearSolveExt.jl b/ext/SparseWithDenseRowColMatricesLinearSolveExt.jl new file mode 100644 index 0000000..880d332 --- /dev/null +++ b/ext/SparseWithDenseRowColMatricesLinearSolveExt.jl @@ -0,0 +1,108 @@ +module SparseWithDenseRowColMatricesLinearSolveExt + +using SparseWithDenseRowColMatrices +using SparseWithDenseRowColMatrices: SparseWithDenseRowColMatrix, SparseWithDenseRowColWoodbury, + SparseWithDenseRowColAugmented, denserank, refactor!, SparseWithDenseRowColFactorization +using LinearSolve +using LinearSolve: LinearCache, OperatorAssumptions, LinearVerbosity, AbstractSparseFactorization +using LinearAlgebra: LinearAlgebra, ldiv!, issuccess, Factorization, SingularException, factorize + +# LinearSolve `@reexport using SciMLBase`, so SciMLBase is reachable through it. +const SciMLBase = LinearSolve.SciMLBase + +# ------------------ +# Algorithm +# ------------------ +# Concrete algorithm type lives in the extension; the user-facing constructor is the +# `SparseWithDenseRowColFactorization` function exported by the package (a stub there, with +# its method defined here so it errors helpfully when LinearSolve is not loaded). +struct SWDRCFactorizationAlg <: AbstractSparseFactorization + reuse_symbolic::Bool + check_pattern::Bool + strategy::Symbol + refine::Int +end + +function SparseWithDenseRowColMatrices.SparseWithDenseRowColFactorization(; + reuse_symbolic::Bool = true, check_pattern::Bool = true, + strategy::Symbol = :auto, refine::Integer = 1 + ) + return SWDRCFactorizationAlg(reuse_symbolic, check_pattern, strategy, Int(refine)) +end + +# Default algorithm LinearSolve picks for our matrix type, so `solve(LinearProblem(A, b))` +# uses the caching path automatically (analogous to KLU being the default for sparse Float64). +LinearSolve.defaultalg(::SparseWithDenseRowColMatrix, b, ::OperatorAssumptions{Bool}) = + SWDRCFactorizationAlg(true, true, :auto, 1) + +# ------------------ +# Cache +# ------------------ +# Type-stable cacheval wrapper. The inner factorization can be a Woodbury *or* an Augmented +# one — and can even switch between them across refactorizations if the values make `S` +# (non)singular — so we box it behind a fixed wrapper type rather than letting the +# `LinearCache` field type change underneath us. +mutable struct SWDRCCacheval{T} + fact::Union{Nothing, Factorization{T}} +end + +function LinearSolve.init_cacheval( + ::SWDRCFactorizationAlg, A::SparseWithDenseRowColMatrix{T}, b, u, Pl, Pr, + maxiters::Int, abstol, reltol, + verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions + ) where {T} + return SWDRCCacheval{T}(nothing) +end + +# Refresh the cached factorization for the current `A`, reusing the cached symbolic analysis +# when possible. `refactor!` reuses PureKLU's symbolic analysis (no re-analysis); it throws if +# the pattern/rank changed or if the (Woodbury) `S` became singular — in those cases we fall +# back to a full `factorize` (which can also switch Woodbury ↔ augmented). +# +# A genuinely singular `A` makes `factorize` throw `SingularException`. LinearSolve's contract +# (cf. its KLU path, which uses `check=false`) is to return `ReturnCode.Infeasible` rather than +# throw, so we catch it and store `nothing`; `solve!`'s `issuccess`/`!== nothing` guard then +# maps that to `Infeasible`. (Direct `factorize`/`\` on the matrix still throws, as is idiomatic.) +# +# NOTE (sticky fallback): once a singular `S` forces the augmented path, a later nonsingular `S` +# keeps reusing the augmented factorization (its `refactor!` succeeds for any nonsingular `A`), +# so the cache does not switch back to the faster Woodbury path on its own. Re-`init` the cache +# to recover the Woodbury path if `S` becomes reliably nonsingular again. +function _refresh!(wrap::SWDRCCacheval, A::SparseWithDenseRowColMatrix, alg::SWDRCFactorizationAlg) + f = wrap.fact + if alg.reuse_symbolic && f isa Factorization && + size(f) == size(A) && denserank(f) == denserank(A) + try + refactor!(f, A; check = alg.check_pattern) # reuse cached symbolic analysis + return wrap + catch e + (e isa SingularException || e isa ArgumentError || e isa DimensionMismatch) || rethrow(e) + end + end + try + wrap.fact = factorize(A; strategy = alg.strategy, refine = alg.refine) + catch e + e isa SingularException || rethrow(e) + wrap.fact = nothing # singular A → solve! returns Infeasible + end + return wrap +end + +function SciMLBase.solve!(cache::LinearCache, alg::SWDRCFactorizationAlg; kwargs...) + if cache.isfresh + wrap = LinearSolve.@get_cacheval(cache, :SWDRCFactorizationAlg) + _refresh!(wrap, cache.A, alg) + cache.isfresh = false + end + F = LinearSolve.@get_cacheval(cache, :SWDRCFactorizationAlg).fact + return if F !== nothing && issuccess(F) + y = ldiv!(cache.u, F, cache.b) + SciMLBase.build_linear_solution(alg, y, nothing, cache; retcode = SciMLBase.ReturnCode.Success) + else + SciMLBase.build_linear_solution( + alg, cache.u, nothing, cache; retcode = SciMLBase.ReturnCode.Infeasible + ) + end +end + +end # module diff --git a/src/SelectorMatrix.jl b/src/SelectorMatrix.jl new file mode 100644 index 0000000..d8d0b77 --- /dev/null +++ b/src/SelectorMatrix.jl @@ -0,0 +1,56 @@ +# ------------------ +# SelectorMatrix +# ------------------ + +""" + SelectorMatrix{T}(n, r) + +The `n × r` matrix `[I_r; 0]` — i.e. its first `r` rows are the `r × r` identity and the +remaining `n - r` rows are zero. Used as the left low-rank factor `U` for the common +"dense rows live at the top" case of an [`SparseWithDenseRowColMatrix`](@ref): then `U * V` places +the `r × n` dense `fill` matrix `V` into the top `r` rows. + +Stored implicitly (two `Int`s), so the boundary-condition case never materializes an +`n × r` dense `U` and the matrix–vector / factorization paths stay allocation-light. +""" +struct SelectorMatrix{T} <: AbstractMatrix{T} + n::Int + r::Int + function SelectorMatrix{T}(n::Integer, r::Integer) where {T} + r ≤ n || throw(ArgumentError("SelectorMatrix needs r ≤ n, got r=$r, n=$n")) + return new{T}(Int(n), Int(r)) + end +end +SelectorMatrix(n::Integer, r::Integer) = SelectorMatrix{Bool}(n, r) + +Base.size(M::SelectorMatrix) = (M.n, M.r) + +Base.@propagate_inbounds function Base.getindex(M::SelectorMatrix{T}, i::Int, j::Int) where {T} + @boundscheck checkbounds(M, i, j) + return (i == j && j ≤ M.r) ? one(T) : zero(T) +end + +Base.IndexStyle(::Type{<:SelectorMatrix}) = IndexCartesian() + +# A selector of one eltype is trivially convertible to another; used during promotion. +Base.convert(::Type{SelectorMatrix{T}}, M::SelectorMatrix) where {T} = SelectorMatrix{T}(M.n, M.r) +SelectorMatrix{T}(M::SelectorMatrix) where {T} = SelectorMatrix{T}(M.n, M.r) + +""" + materialize_U!(Z, U) + +Copy the left low-rank factor `U` into the preallocated dense buffer `Z` (`n × r`). This is +the one place `U` is realized densely; specialized for [`SelectorMatrix`](@ref) so the +common case writes a sparse identity pattern instead of going through generic `getindex`. +""" +function materialize_U!(Z::AbstractMatrix, U::AbstractMatrix) + copyto!(Z, U) + return Z +end +function materialize_U!(Z::AbstractMatrix{T}, U::SelectorMatrix) where {T} + fill!(Z, zero(T)) + @inbounds for i in 1:U.r + Z[i, i] = one(T) + end + return Z +end diff --git a/src/SparseWithDenseRowColMatrices.jl b/src/SparseWithDenseRowColMatrices.jl new file mode 100644 index 0000000..5119ff8 --- /dev/null +++ b/src/SparseWithDenseRowColMatrices.jl @@ -0,0 +1,49 @@ +module SparseWithDenseRowColMatrices + +using LinearAlgebra +using LinearAlgebra: Factorization, SingularException, ldiv!, lu, lu!, mul!, opnorm, + issuccess, factorize +using SparseArrays +using SparseArrays: AbstractSparseMatrixCSC, SparseMatrixCSC, nonzeros, rowvals, getcolptr, + nnz, sparse +using PureKLU + +import PrecompileTools: @setup_workload, @compile_workload + +include("SelectorMatrix.jl") +include("matrix.jl") +include("matvec.jl") +include("woodbury.jl") +include("augmented.jl") +include("factorize.jl") +include("detect.jl") + +export SparseWithDenseRowColMatrix, SelectorMatrix, + sparsepart, fillpart, lowrankfactors, exclusive_sparsepart, denserank, + SparseWithDenseRowColWoodbury, SparseWithDenseRowColAugmented, refactor!, update_lowrank!, + recommend_lowrank_peel, PeelRecommendation, SparseWithDenseRowColFactorization + +# --------------------- +# Precompilation +# --------------------- + +@setup_workload begin + n, r = 12, 2 + for T in (Float64, ComplexF64) + S = sparse(T(2) * I, n, n) + @inbounds for i in 1:(n - 1) + S[i + 1, i] = -one(T) + end + fill = ones(T, r, n) + @compile_workload begin + A = SparseWithDenseRowColMatrix(S, fill) + b = ones(T, n) + A * b + F = factorize(A) + F \ b + refactor!(F, nonzeros(sparsepart(A))) + end + end +end + +end # module diff --git a/src/augmented.jl b/src/augmented.jl new file mode 100644 index 0000000..6b10254 --- /dev/null +++ b/src/augmented.jl @@ -0,0 +1,157 @@ +# ------------------ +# Augmented factorization (fallback path) +# ------------------ +# +# Solve A x = b with A = S + U V via the equivalent bordered system +# +# [ S U ] [x] [b] +# [ V -I ] [y] = [0] +# +# factored as ONE sparse LU of size (n+r). This requires only `A` (not `S`) to be +# nonsingular, so it is the correctness path when `S` is singular or the Woodbury +# correction `C` is dangerously ill-conditioned. The `r` dense rows/columns of `U`/`V` add +# fill comparable to FABM's QR cost — fine for small `r`, the regime where the low-rank +# structure is worth exploiting at all. + +""" + SparseWithDenseRowColAugmented{T} <: LinearAlgebra.Factorization{T} + +Fallback factorization of an [`SparseWithDenseRowColMatrix`](@ref) `A = S + U V` via a single sparse +LU of the bordered system `[S U; V -I]` of size `n + r`. Used automatically by +[`factorize`](@ref) when `S` is singular or the Woodbury correction is ill-conditioned, and +selectable explicitly with `strategy = :augmented`. Requires only `A` (not `S`) to be +nonsingular. +""" +mutable struct SparseWithDenseRowColAugmented{T, KF} <: LinearAlgebra.Factorization{T} + Kaug::KF # PureKLU.KLUFactorization of [S U; V -I] + n::Int + r::Int + rhs::Vector{T} # length n+r work buffer ([b; 0] / solution) +end + +Base.size(F::SparseWithDenseRowColAugmented) = (F.n, F.n) +Base.size(F::SparseWithDenseRowColAugmented, i::Integer) = i ≤ 2 ? F.n : 1 +LinearAlgebra.issuccess(F::SparseWithDenseRowColAugmented) = LinearAlgebra.issuccess(F.Kaug) +denserank(F::SparseWithDenseRowColAugmented) = F.r + +_sparse_block(U::AbstractMatrix{T}) where {T} = sparse(U) +function _sparse_block(U::SelectorMatrix{T}) where {T} + r = U.r + return SparseMatrixCSC(U.n, r, collect(1:(r + 1)), collect(1:r), ones(T, r)) +end + +function _augmented_matrix(A::SparseWithDenseRowColMatrix{T}) where {T} + n = size(A, 1) + r = denserank(A) + S = _own_sparse(T, A.S) + if r == 0 + return S + end + Usp = _sparse_block(A.U) # n×r + Vsp = sparse(Matrix{T}(A.V)) # r×n + negI = SparseMatrixCSC{T, Int}(-one(T) * I, r, r) + return [S Usp; Vsp negI] +end + +function _augmented(A::SparseWithDenseRowColMatrix{T}) where {T} + n = size(A, 1) + r = denserank(A) + Maug = _augmented_matrix(A) + Kaug = PureKLU.klu(Maug) # throws SingularException if A is singular + return SparseWithDenseRowColAugmented{T, typeof(Kaug)}(Kaug, n, r, Vector{T}(undef, n + r)) +end + +function LinearAlgebra.ldiv!(F::SparseWithDenseRowColAugmented{T}, b::AbstractVector) where {T} + length(b) == F.n || throw(DimensionMismatch()) + rhs = F.rhs + @inbounds copyto!(view(rhs, 1:F.n), b) + @inbounds for i in (F.n + 1):(F.n + F.r) + rhs[i] = zero(T) + end + PureKLU.solve!(F.Kaug, rhs) + @inbounds copyto!(b, view(rhs, 1:F.n)) + return b +end + +function LinearAlgebra.ldiv!(F::SparseWithDenseRowColAugmented, B::AbstractMatrix) + size(B, 1) == F.n || throw(DimensionMismatch()) + for c in axes(B, 2) + ldiv!(F, view(B, :, c)) + end + return B +end + +LinearAlgebra.ldiv!(y::AbstractVector, F::SparseWithDenseRowColAugmented, b::AbstractVector) = + ldiv!(F, copyto!(y, b)) +LinearAlgebra.ldiv!(Y::AbstractMatrix, F::SparseWithDenseRowColAugmented, B::AbstractMatrix) = + ldiv!(F, copyto!(Y, B)) + +# Adjoint / transpose solve. The transposed bordered system Mᵀ = [Sᵀ Vᵀ; Uᵀ -I] has Schur +# complement Sᵀ + Vᵀ Uᵀ = (S + U V)ᵀ = Aᵀ (and likewise Mᴴ ↔ Aᴴ), so solving Mᵀ[x;y]=[b;0] +# (resp. Mᴴ) with the SAME factorization's transpose/adjoint solve yields x = A⁻ᵀ b (resp. A⁻ᴴ b). +for (Wrap, kluop) in ((AdjointFact, :(F.Kaug')), (TransposeFact, :(transpose(F.Kaug)))) + @eval function LinearAlgebra.ldiv!( + Fw::$Wrap{<:Any, <:SparseWithDenseRowColAugmented{T}}, b::AbstractVector + ) where {T} + F = parent(Fw) + length(b) == F.n || throw(DimensionMismatch()) + rhs = F.rhs + @inbounds copyto!(view(rhs, 1:F.n), b) + @inbounds for i in (F.n + 1):(F.n + F.r) + rhs[i] = zero(T) + end + PureKLU.solve!($kluop, rhs) + @inbounds copyto!(b, view(rhs, 1:F.n)) + return b + end + @eval function LinearAlgebra.ldiv!(Fw::$Wrap{<:Any, <:SparseWithDenseRowColAugmented}, B::AbstractMatrix) + for c in axes(B, 2) + ldiv!(Fw, view(B, :, c)) + end + return B + end +end + +# Complex RHS over a real augmented factorization (real/imag split; see woodbury.jl). +LinearAlgebra.ldiv!(F::SparseWithDenseRowColAugmented{<:Real}, b::AbstractVector{<:Complex}) = + _ldiv_realfact_complex!(F, b) +LinearAlgebra.ldiv!(Fw::AdjointFact{<:Any, <:SparseWithDenseRowColAugmented{<:Real}}, b::AbstractVector{<:Complex}) = + _ldiv_realfact_complex!(Fw, b) +LinearAlgebra.ldiv!(Fw::TransposeFact{<:Any, <:SparseWithDenseRowColAugmented{<:Real}}, b::AbstractVector{<:Complex}) = + _ldiv_realfact_complex!(Fw, b) + +""" + refactor!(F::SparseWithDenseRowColAugmented, A::SparseWithDenseRowColMatrix; check=true) -> F + +Rebuild the augmented system with new numeric values and refactor. When the augmented +sparsity pattern is unchanged this reuses PureKLU's symbolic analysis (`klu!`); otherwise it +re-analyzes. The augmented path is correctness-first, not the optimized refactor path. +""" +function refactor!(F::SparseWithDenseRowColAugmented{T}, A::SparseWithDenseRowColMatrix; check::Bool = true) where {T} + denserank(A) == F.r && size(A, 1) == F.n || + throw(DimensionMismatch("shape changed; rebuild with `factorize`.")) + Maug = _augmented_matrix(A) + if length(SparseArrays.nonzeros(Maug)) == length(F.Kaug.nzval) && + ( + !check || ( + SparseArrays.getcolptr(Maug) == increment(F.Kaug.colptr) && + SparseArrays.rowvals(Maug) == increment(F.Kaug.rowval) + ) + ) + PureKLU.klu!(F.Kaug, SparseArrays.nonzeros(Maug)) + else + F.Kaug = PureKLU.klu(Maug) + end + return F +end + +# 1-based copy of PureKLU's internal 0-based index arrays, for the pattern check above. +increment(v::AbstractVector{<:Integer}) = v .+ one(eltype(v)) + +# The augmented fallback embeds U/V inside one sparse LU and keeps no separate factorization +# of S to reuse, so the Woodbury low-rank fast path does not apply here. +function update_lowrank!(::SparseWithDenseRowColAugmented; kwargs...) + throw(ArgumentError("update_lowrank! is the Woodbury fast path (it reuses S's factorization); \ + the augmented fallback (singular S) has no separate S factorization to reuse — \ + update with `refactor!` or rebuild with `factorize`.")) +end diff --git a/src/detect.jl b/src/detect.jl new file mode 100644 index 0000000..9fe6abd --- /dev/null +++ b/src/detect.jl @@ -0,0 +1,170 @@ +# ------------------ +# Appropriateness detector +# ------------------ + +""" + PeelRecommendation + +The result of [`recommend_lowrank_peel`](@ref): whether wrapping a sparse matrix as a +[`SparseWithDenseRowColMatrix`](@ref) (and using the Woodbury path) is worthwhile, together +with the cheap pattern diagnostics that drove the decision. It carries **no eagerly-built +message string** — construction is allocation-free (only `Bool`/`Int`/`Float64`/`Symbol` +fields); the human-readable explanation is produced lazily by `show`. + +# Fields +* `recommended :: Bool` — use a `SparseWithDenseRowColMatrix` (`true`) or plain sparse LU (`false`) +* `rank :: Int` — proposed low-rank correction size `r` (dense rows + dense cols) +* `code :: Symbol` — reason category, one of `:recommended`, `:uniformly_sparse`, + `:rank_too_large`, `:negligible_fill`, `:too_small`, `:not_square`, `:empty` +* `n :: Int`, `nnz :: Int` +* `dense_rows :: Int`, `dense_cols :: Int` +* `max_row_nnz :: Int`, `max_col_nnz :: Int` +* `row_threshold :: Float64`, `col_threshold :: Float64` +* `peeled_fraction :: Float64` — share of the nonzeros held by the dense rows/cols + +`Bool(rec)` and `rec.recommended` both give the verdict; `rec.rank` the proposed `r`. +""" +struct PeelRecommendation + recommended::Bool + rank::Int + code::Symbol + n::Int + nnz::Int + dense_rows::Int + dense_cols::Int + max_row_nnz::Int + max_col_nnz::Int + row_threshold::Float64 + col_threshold::Float64 + peeled_fraction::Float64 +end + +Base.Bool(rec::PeelRecommendation) = rec.recommended + +# Lazily render the explanation from the stored diagnostics (no string built until shown). +function _peel_reason(rec::PeelRecommendation) + c = rec.code + if c === :not_square + return "matrix is not square ($(rec.n) rows); the peel method targets square systems" + elseif c === :too_small + return "n=$(rec.n) is too small for a low-rank peel to matter; use plain sparse LU" + elseif c === :empty + return "matrix is structurally empty; use plain sparse LU" + elseif c === :uniformly_sparse + return "uniformly sparse: max $(rec.max_row_nnz) nnz/row, max $(rec.max_col_nnz) nnz/col " * + "(thresholds $(round(rec.row_threshold; digits = 1)) / $(round(rec.col_threshold; digits = 1))); " * + "no dense rows/cols — use plain sparse LU" + elseif c === :rank_too_large + return "found r=$(rec.rank) dense rows/cols ($(rec.dense_rows) rows + $(rec.dense_cols) cols); " * + "the low-rank correction is too large relative to n=$(rec.n) — use plain sparse LU" + elseif c === :negligible_fill + return "found r=$(rec.rank) dense rows/cols but they hold only " * + "$(round(100 * rec.peeled_fraction; digits = 1))% of the nonzeros; the bulk barely changes — use plain sparse LU" + elseif c === :recommended + return "found $(rec.dense_rows) dense row(s) (max $(rec.max_row_nnz) nnz) and $(rec.dense_cols) dense col(s) " * + "(max $(rec.max_col_nnz) nnz); peeling rank r=$(rec.rank) removes ~$(round(100 * rec.peeled_fraction; digits = 1))% of the nnz, " * + "leaving a sparse bulk → SparseWithDenseRowColMatrix recommended" + else + return "unknown" + end +end + +function Base.show(io::IO, rec::PeelRecommendation) + return print(io, "PeelRecommendation(recommended=", rec.recommended, ", rank=", rec.rank, ", :", rec.code, ")") +end +function Base.show(io::IO, ::MIME"text/plain", rec::PeelRecommendation) + println( + io, "PeelRecommendation: ", rec.recommended ? "use SparseWithDenseRowColMatrix" : "use plain sparse LU", + " (rank r=", rec.rank, ")" + ) + return print(io, " ", _peel_reason(rec)) +end + +""" + recommend_lowrank_peel(A::AbstractSparseMatrixCSC; + row_factor=8.0, col_factor=8.0, maxrank=32, + min_n=64, abs_floor=4) -> PeelRecommendation + +Cheap, pattern-only (`O(nnz)`, never factorizes) heuristic that decides whether wrapping `A` +as a [`SparseWithDenseRowColMatrix`](@ref) and solving with the Woodbury path is worth it, +versus just using plain sparse LU (e.g. `PureKLU.klu`). Returns a [`PeelRecommendation`](@ref) +(a descriptive struct — no allocating message string is built unless you `show` it). + +The method pays off **iff** a small number `r ≪ n` of rows and/or columns are *much* denser +than the rest, so peeling them into the low-rank correction `U*V` leaves a genuinely sparse, +low-fill bulk `S`. On a uniformly sparse matrix the low-rank part is empty and the method +degenerates to plain LU — then `recommended` is `false`. + +A row (column) is a dense candidate when its stored-entry count exceeds both +`row_factor * median(nnz per row)` (resp. `col_factor`) and the absolute floor `abs_floor`. +The recommendation also requires the candidates to be few (`r ≤ min(maxrank, n ÷ 8)`) and to +hold a meaningful share (≥ 5%) of the nonzeros. + +# Example +```julia +julia> rec = recommend_lowrank_peel(A); + +julia> rec.recommended, rec.rank +(false, 0) +``` +""" +function recommend_lowrank_peel( + A::SparseArrays.AbstractSparseMatrixCSC; + row_factor::Real = 8.0, col_factor::Real = 8.0, + maxrank::Int = 32, min_n::Int = 64, abs_floor::Int = 4 + ) + n, m = size(A) + nnzA = SparseArrays.nnz(A) + + n == m || return PeelRecommendation(false, 0, :not_square, n, nnzA, 0, 0, 0, 0, 0.0, 0.0, 0.0) + n < min_n && return PeelRecommendation(false, 0, :too_small, n, nnzA, 0, 0, 0, 0, 0.0, 0.0, 0.0) + nnzA == 0 && return PeelRecommendation(false, 0, :empty, n, 0, 0, 0, 0, 0, 0.0, 0.0, 0.0) + + colptr = SparseArrays.getcolptr(A) + rows = SparseArrays.rowvals(A) + colcnt = diff(colptr) # nnz per column, O(n) + rowcnt = zeros(Int, n) + @inbounds for k in 1:nnzA + rowcnt[rows[k]] += 1 + end + + row_thresh = max(row_factor * max(_median_int(rowcnt), 1.0), float(abs_floor)) + col_thresh = max(col_factor * max(_median_int(colcnt), 1.0), float(abs_floor)) + dense_rows = count(>(row_thresh), rowcnt) + dense_cols = count(>(col_thresh), colcnt) + r = dense_rows + dense_cols + maxrow, maxcol = maximum(rowcnt), maximum(colcnt) + + r == 0 && return PeelRecommendation( + false, 0, :uniformly_sparse, n, nnzA, 0, 0, maxrow, maxcol, row_thresh, col_thresh, 0.0 + ) + + rank_cap = min(maxrank, n ÷ 8) + r > rank_cap && return PeelRecommendation( + false, r, :rank_too_large, n, nnzA, dense_rows, dense_cols, maxrow, maxcol, row_thresh, col_thresh, 0.0 + ) + + peeled = 0 + @inbounds for j in eachindex(colcnt) + colcnt[j] > col_thresh && (peeled += colcnt[j]) + end + @inbounds for i in eachindex(rowcnt) + rowcnt[i] > row_thresh && (peeled += rowcnt[i]) + end + frac = peeled / nnzA + frac < 0.05 && return PeelRecommendation( + false, r, :negligible_fill, n, nnzA, dense_rows, dense_cols, maxrow, maxcol, row_thresh, col_thresh, frac + ) + + return PeelRecommendation( + true, r, :recommended, n, nnzA, dense_rows, dense_cols, maxrow, maxcol, row_thresh, col_thresh, frac + ) +end + +# Median of a small integer count vector, without depending on Statistics. +function _median_int(v::AbstractVector{<:Integer}) + isempty(v) && return 0.0 + s = sort(v) + L = length(s) + return isodd(L) ? float(s[(L + 1) ÷ 2]) : (s[L ÷ 2] + s[L ÷ 2 + 1]) / 2 +end diff --git a/src/factorize.jl b/src/factorize.jl new file mode 100644 index 0000000..3eebbfa --- /dev/null +++ b/src/factorize.jl @@ -0,0 +1,99 @@ +# ------------------ +# Top-level factorize / lu / \ dispatch +# ------------------ + +const SparseWithDenseRowColFactorization{T} = Union{SparseWithDenseRowColWoodbury{T}, SparseWithDenseRowColAugmented{T}} + +""" + factorize(A::SparseWithDenseRowColMatrix; strategy=:auto, refine=1, auto_fallback=true) -> F + +Factorize `A = S + U V` for repeated solves. Returns an [`SparseWithDenseRowColWoodbury`](@ref) +(the fast path: one sparse LU of `S` plus a small dense correction) or an +[`SparseWithDenseRowColAugmented`](@ref) (the robust bordered-system path). + +* `strategy = :auto` (default): use Woodbury, automatically falling back to the augmented + system if `S` is singular or the `r × r` correction is ill-conditioned. +* `strategy = :woodbury`: force Woodbury; warn on ill-conditioning, error on singular `S`. +* `strategy = :augmented`: force the bordered-system path. +* `refine`: iterative-refinement steps for the Woodbury solve (default `1`; ignored by the + augmented path). +* `auto_fallback`: whether `:auto` may switch to the augmented path. + +The returned `F` **caches the symbolic analysis** (BTF + AMD ordering of `S`). Solve any +number of right-hand sides with `F \\ b` / `ldiv!(F, b)`, and update the numeric values for a +new matrix of the **same sparsity pattern** with [`refactor!`](@ref) / [`lu!`](@ref) — that +reuses the cached symbolic analysis (no re-analysis), which is the ≈7× per-step win in a +Newton / time-stepping loop. Only call `factorize`/`lu` afresh when the pattern changes. +""" +function LinearAlgebra.factorize( + A::SparseWithDenseRowColMatrix; strategy::Symbol = :auto, refine::Integer = 1, + auto_fallback::Bool = true + ) + strategy in (:auto, :woodbury, :augmented) || + throw(ArgumentError("strategy must be :auto, :woodbury or :augmented; got :$strategy")) + + strategy === :augmented && return _augmented(A) + + F = try + _woodbury(A; refine) + catch e + if e isa SingularException && strategy === :auto && auto_fallback + return _augmented(A) + end + rethrow(e) + end + + if F.illconditioned + if strategy === :auto && auto_fallback + return _augmented(A) + else + @warn "SparseWithDenseRowColWoodbury: the r×r correction matrix C is ill-conditioned; the \ + Woodbury solve may lose accuracy. Consider `strategy = :augmented`." + end + end + return F +end + +""" + lu(A::SparseWithDenseRowColMatrix; kwargs...) -> F + +Alias for [`factorize`](@ref). Named `lu` (not `qr`) because the sparse bulk is factored by +an LU-based solver (PureKLU); `qr` is intentionally not provided. +""" +LinearAlgebra.lu(A::SparseWithDenseRowColMatrix; kwargs...) = factorize(A; kwargs...) + +""" + lu!(F, A::SparseWithDenseRowColMatrix; kwargs...) -> F + +Alias for [`refactor!`](@ref) — refactor `F` in place with the new values of `A`. +""" +LinearAlgebra.lu!(F::SparseWithDenseRowColFactorization, A::SparseWithDenseRowColMatrix; kwargs...) = + refactor!(F, A; kwargs...) + +function LinearAlgebra.qr(::SparseWithDenseRowColMatrix) + throw(ArgumentError("SparseWithDenseRowColMatrix is solved by an LU-based factorization; use `factorize`/`lu`, not `qr`.")) +end + +Base.:\(A::SparseWithDenseRowColMatrix, b::AbstractVecOrMat) = factorize(A) \ b + +""" + SparseWithDenseRowColFactorization(; reuse_symbolic=true, check_pattern=true, + strategy=:auto, refine=1) + +A [LinearSolve.jl](https://github.com/SciML/LinearSolve.jl) algorithm that solves +`SparseWithDenseRowColMatrix` systems through LinearSolve's caching interface, mirroring its +`KLUFactorization`: the symbolic analysis is cached in the `LinearCache`, repeated `solve!`s +reuse it, and updating the matrix to new values of the **same sparsity pattern** triggers a +numeric refactorization (via [`refactor!`](@ref)) that reuses the cached symbolic analysis. +It is also the default algorithm `LinearSolve` picks for a `SparseWithDenseRowColMatrix`, so +`solve(LinearProblem(A, b))` uses it automatically. + +* `reuse_symbolic` — reuse the cached symbolic analysis across value changes (set `false` if + the sparsity pattern may change between solves). +* `check_pattern` — validate the pattern on each refactor (`false` skips the check; may error + if the pattern actually changes). +* `strategy`, `refine` — forwarded to [`factorize`](@ref). + +Only available when `LinearSolve` is loaded (provided by a package extension). +""" +function SparseWithDenseRowColFactorization end diff --git a/src/matrix.jl b/src/matrix.jl new file mode 100644 index 0000000..851b30f --- /dev/null +++ b/src/matrix.jl @@ -0,0 +1,243 @@ +# ------------------ +# SparseWithDenseRowColMatrix +# ------------------ + +""" + SparseWithDenseRowColMatrix(S::AbstractSparseMatrixCSC, U::AbstractMatrix, V::AbstractMatrix) + SparseWithDenseRowColMatrix(S::AbstractSparseMatrixCSC, fill::AbstractMatrix; replace=false) + SparseWithDenseRowColMatrix{T}(S, U, V) + +A square matrix represented as a **sparse part plus a low-rank dense correction** + + A = S + U * V + +where `S` is an `n × n` `SparseMatrixCSC`, `U` is `n × r` and `V` is `r × n` dense, with +`r ≪ n` the *almost-sparse rank*. This is the sparse-matrix analogue of an +`AlmostBandedMatrix`: the banded "bulk" is replaced by a general sparse bulk, and the +low-rank `fill` (which in the banded case sits in the top rows as boundary conditions) is +kept as the explicit outer product `U * V` rather than folded into a band. + +The structure is exactly the one the Sherman–Morrison–Woodbury identity exploits, so +[`factorize`](@ref)/`\\` solve `A x = b` by factoring only the sparse `S` (with +[`PureKLU`](https://github.com/SciML/PureKLU.jl)) and applying a small `r × r` dense +correction — see [`SparseWithDenseRowColWoodbury`](@ref). When `S` is singular but `A` is not, an +[`SparseWithDenseRowColAugmented`](@ref) fallback is used automatically. + +# Constructors + +* `SparseWithDenseRowColMatrix(S, U, V)` — the general additive low-rank form `A = S + U*V`. +* `SparseWithDenseRowColMatrix(S, fill)` — the boundary-condition convenience: `fill` is an `r × n` + dense block placed in the **top `r` rows** via an implicit [`SelectorMatrix`](@ref) + `U = [I_r; 0]`. + * `replace = false` (default): `fill` is *added* to the top `r` rows, `A = S + [I_r;0]*fill`. + * `replace = true`: the top `r` rows of `A` *become* `fill` (FABM-style row replacement), + implemented as `V = fill - S[1:r, :]` so that `S` itself is left untouched and stays + nonsingular for the Woodbury path. + +```jldoctest +julia> using SparseWithDenseRowColMatrices, SparseArrays + +julia> S = sparse(1.0I, 4, 4); fill = reshape(Float64[1,2,3,4], 1, 4); + +julia> A = SparseWithDenseRowColMatrix(S, fill); # rank-1 correction in row 1 + +julia> Matrix(A)[1, :] +4-element Vector{Float64}: + 2.0 + 2.0 + 3.0 + 4.0 +``` +""" +struct SparseWithDenseRowColMatrix{ + T, + TS <: SparseArrays.AbstractSparseMatrixCSC{T}, + TU <: AbstractMatrix{T}, + TV <: AbstractMatrix{T}, + } <: AbstractMatrix{T} + S::TS + U::TU + V::TV + + function SparseWithDenseRowColMatrix{T, TS, TU, TV}(S, U, V) where {T, TS, TU, TV} + n = size(S, 1) + size(S, 2) == n || + throw(ArgumentError("the sparse part of an SparseWithDenseRowColMatrix must be square; got size $(size(S))")) + size(U, 1) == n || + throw(DimensionMismatch("size(U, 1) = $(size(U, 1)) must equal n = $n")) + size(V, 2) == n || + throw(DimensionMismatch("size(V, 2) = $(size(V, 2)) must equal n = $n")) + size(U, 2) == size(V, 1) || + throw(DimensionMismatch("low-rank factor mismatch: size(U, 2) = $(size(U, 2)) ≠ size(V, 1) = $(size(V, 1))")) + return new{T, TS, TU, TV}(S, U, V) + end +end + +_to_eltype(::Type{T}, A::AbstractMatrix{T}) where {T} = A +_to_eltype(::Type{T}, A::AbstractMatrix) where {T} = convert(AbstractMatrix{T}, A) +_to_eltype(::Type{T}, A::SelectorMatrix) where {T} = SelectorMatrix{T}(A.n, A.r) + +function SparseWithDenseRowColMatrix( + S::SparseArrays.AbstractSparseMatrixCSC, U::AbstractMatrix, V::AbstractMatrix + ) + T = promote_type(eltype(S), eltype(U), eltype(V)) + return SparseWithDenseRowColMatrix{T}(S, U, V) +end + +function SparseWithDenseRowColMatrix{T}( + S::SparseArrays.AbstractSparseMatrixCSC, U::AbstractMatrix, V::AbstractMatrix + ) where {T} + Sc, Uc, Vc = _to_eltype(T, S), _to_eltype(T, U), _to_eltype(T, V) + return SparseWithDenseRowColMatrix{T, typeof(Sc), typeof(Uc), typeof(Vc)}(Sc, Uc, Vc) +end + +# Boundary-condition convenience: `fill` is an r×n block in the top r rows via a selector. +function SparseWithDenseRowColMatrix( + S::SparseArrays.AbstractSparseMatrixCSC, fill::AbstractMatrix; replace::Bool = false + ) + n = size(S, 2) + r = size(fill, 1) + size(fill, 2) == n || + throw(DimensionMismatch("fill has $(size(fill, 2)) columns but the sparse part has $n")) + T = promote_type(eltype(S), eltype(fill)) + U = SelectorMatrix{T}(n, r) + V = if replace + # A's top r rows become `fill`; leave S (hence its nonsingularity) untouched by + # absorbing the original top rows into the correction. + Vd = Matrix{T}(fill) + @inbounds for j in 1:n, i in 1:r + Vd[i, j] -= S[i, j] + end + Vd + else + _to_eltype(T, fill) + end + return SparseWithDenseRowColMatrix{T}(S, U, V) +end + +# ------------------ +# Accessors (FABM parity) +# ------------------ + +""" + sparsepart(A::SparseWithDenseRowColMatrix) + +The sparse bulk `S` (the analogue of `bandpart` for an `AlmostBandedMatrix`). +""" +@inline sparsepart(A::SparseWithDenseRowColMatrix) = A.S + +""" + fillpart(A::SparseWithDenseRowColMatrix) + +The `r × n` right low-rank factor `V` — the dense "fill" rows (the analogue of `fillpart` +for an `AlmostBandedMatrix`). +""" +@inline fillpart(A::SparseWithDenseRowColMatrix) = A.V + +""" + lowrankfactors(A::SparseWithDenseRowColMatrix) -> (U, V) + +The pair `(U, V)` whose product `U * V` is the low-rank correction added to the sparse part. +""" +@inline lowrankfactors(A::SparseWithDenseRowColMatrix) = (A.U, A.V) + +""" + denserank(A::SparseWithDenseRowColMatrix) + +The rank `r` of the low-rank correction (analogue of `almostbandedrank`). +""" +@inline denserank(A::SparseWithDenseRowColMatrix) = size(A.V, 1) + +""" + exclusive_sparsepart(A::SparseWithDenseRowColMatrix) + +A view of the rows of the sparse part that the low-rank correction does **not** touch. +Only defined when the left factor is a [`SelectorMatrix`](@ref) (the boundary-condition +case), where the correction is confined to the top `r` rows; otherwise the correction is +not row-localized and this throws. +""" +function exclusive_sparsepart(A::SparseWithDenseRowColMatrix) + A.U isa SelectorMatrix || + throw(ArgumentError("exclusive_sparsepart is only defined when the low-rank left factor is a SelectorMatrix; use `lowrankfactors` instead.")) + r = denserank(A) + return @view A.S[(r + 1):end, :] +end + +# ------------------ +# AbstractArray interface +# ------------------ + +@inline Base.size(A::SparseWithDenseRowColMatrix) = size(A.S) +@inline Base.eltype(::SparseWithDenseRowColMatrix{T}) where {T} = T +Base.IndexStyle(::Type{<:SparseWithDenseRowColMatrix}) = IndexCartesian() + +@inline _lowrank_entry(U::SelectorMatrix, V, i::Int, j::Int) = + i ≤ U.r ? @inbounds(V[i, j]) : zero(eltype(V)) +@inline function _lowrank_entry(U, V, i::Int, j::Int) + acc = zero(promote_type(eltype(U), eltype(V))) + @inbounds for k in 1:size(U, 2) + acc += U[i, k] * V[k, j] + end + return acc +end + +Base.@propagate_inbounds function Base.getindex(A::SparseWithDenseRowColMatrix, i::Int, j::Int) + @boundscheck checkbounds(A, i, j) + return @inbounds A.S[i, j] + _lowrank_entry(A.U, A.V, i, j) +end + +""" + setindex!(A::SparseWithDenseRowColMatrix, v, i, j) + +Set the assembled entry `A[i,j]` to `v`, so that `A[i,j]` reads back `v` (the +`AbstractArray` round-trip). The write is absorbed into the **sparse part** by storing +`S[i,j] = v - (U*V)[i,j]`; the low-rank correction `U*V` is left unchanged. To edit the +low-rank part instead, mutate `fillpart(A)` / `lowrankfactors(A)` directly. + +Note this can introduce a stored entry into `S` at `(i,j)` (as any sparse `setindex!` +does), and on a low-rank-covered row stores the value minus the correction. +""" +Base.@propagate_inbounds function Base.setindex!(A::SparseWithDenseRowColMatrix, v, i::Int, j::Int) + @boundscheck checkbounds(A, i, j) + @inbounds A.S[i, j] = v - _lowrank_entry(A.U, A.V, i, j) + return v +end + +function Base.similar(A::SparseWithDenseRowColMatrix, ::Type{T}) where {T} + return SparseWithDenseRowColMatrix{T}(similar(A.S, T), _similar_U(A.U, T), similar(A.V, T)) +end +_similar_U(U::SelectorMatrix, ::Type{T}) where {T} = SelectorMatrix{T}(U.n, U.r) +_similar_U(U, ::Type{T}) where {T} = similar(U, T) + +function Base.copy(A::SparseWithDenseRowColMatrix) + return SparseWithDenseRowColMatrix(copy(A.S), _copy_U(A.U), copy(A.V)) +end +_copy_U(U::SelectorMatrix{T}) where {T} = SelectorMatrix{T}(U.n, U.r) +_copy_U(U) = copy(U) + +function Base.fill!(A::SparseWithDenseRowColMatrix, x) + fill!(SparseArrays.nonzeros(A.S), x) # structural nonzeros only — keeps the pattern + A.U isa SelectorMatrix || fill!(A.U, x) + fill!(A.V, x) + return A +end + +function Base.Matrix(A::SparseWithDenseRowColMatrix{T}) where {T} + M = Matrix(A.S) + mul!(M, _densify(A.U), A.V, one(T), one(T)) + return M +end +_densify(U::AbstractMatrix) = U +_densify(U::SelectorMatrix{T}) where {T} = materialize_U!(Matrix{T}(undef, U.n, U.r), U) + +# ------------------ +# Pretty printing +# ------------------ + +function Base.array_summary(io::IO, A::SparseWithDenseRowColMatrix{T}, inds::Tuple{Vararg{Base.OneTo}}) where {T} + print( + io, Base.dims2string(length.(inds)), " SparseWithDenseRowColMatrix{$T} with ", + SparseArrays.nnz(A.S), " stored entries and fill rank ", denserank(A) + ) + return +end diff --git a/src/matvec.jl b/src/matvec.jl new file mode 100644 index 0000000..9359f9a --- /dev/null +++ b/src/matvec.jl @@ -0,0 +1,117 @@ +# ------------------ +# SelectorMatrix products ( U = [I_r; 0] ) +# ------------------ +# U*w places w (length r / r rows) into the top r entries/rows and zeros the rest. +# Split into vector / matrix methods so each is strictly more specific than the generic +# LinearAlgebra `mul!` (which would otherwise be ambiguous on the SelectorMatrix argument). + +function _selector_mul!(y, U::SelectorMatrix, w, α::Number, β::Number) + size(w, 1) == U.r || + throw(DimensionMismatch("SelectorMatrix has $(U.r) columns, operand has $(size(w, 1)) rows")) + size(y, 1) == U.n || + throw(DimensionMismatch("output has $(size(y, 1)) rows, expected $(U.n)")) + r, n = U.r, U.n + if iszero(β) + @inbounds for c in axes(w, 2) + for i in 1:r + y[i, c] = α * w[i, c] + end + for i in (r + 1):n + y[i, c] = zero(eltype(y)) + end + end + else + @inbounds for c in axes(w, 2) + for i in 1:r + y[i, c] = α * w[i, c] + β * y[i, c] + end + for i in (r + 1):n + y[i, c] = β * y[i, c] + end + end + end + return y +end + +# Only the *vector* method is registered as `LinearAlgebra.mul!` (it is strictly more +# specific than the generic matvec and so unambiguous, and it is what the polymorphic +# `_applyA!` calls when `U` may be a selector). The matrix case is reached only through the +# internal calls below, which dispatch to `_selector_mul!` directly — registering it as +# `mul!` would clash with LinearAlgebra's `mul!(::AbstractMatrix, ::AbstractMatrix, ::Diagonal/Triangular)`. +LinearAlgebra.mul!(y::AbstractVector, U::SelectorMatrix, w::AbstractVector, α::Number, β::Number) = + _selector_mul!(y, U, w, α, β) + +function Base.:*(U::SelectorMatrix{T}, w::AbstractVector) where {T} + y = Vector{promote_type(T, eltype(w))}(undef, U.n) + return _selector_mul!(y, U, w, true, false) +end +function Base.:*(U::SelectorMatrix{T}, W::AbstractMatrix) where {T} + Y = Matrix{promote_type(T, eltype(W))}(undef, U.n, size(W, 2)) + return _selector_mul!(Y, U, W, true, false) +end + +# ------------------ +# SparseWithDenseRowColMatrix * vector / matrix: A = S + U*V +# ------------------ + +# `y .+= c * U[:, k]` — alloc-free for both the selector and dense cases. +@inline function _axpy_col!(y::AbstractVector, U::SelectorMatrix, k::Int, c) + @inbounds y[k] += c + return y +end +@inline function _axpy_col!(y::AbstractVector, U::AbstractMatrix, k::Int, c) + @inbounds for i in eachindex(y) + y[i] += c * U[i, k] + end + return y +end + +""" + mul!(y, A::SparseWithDenseRowColMatrix, x, α, β) + +Five-argument multiply `y .= α*(A*x) + β*y`. The vector path is allocation-free: the +sparse `S*x` uses `SparseArrays`' in-place kernel and the rank-`r` correction is applied as +`r` scaled column updates without forming the intermediate `V*x`. +""" +function LinearAlgebra.mul!( + y::AbstractVector, A::SparseWithDenseRowColMatrix, x::AbstractVector, α::Number, β::Number + ) + mul!(y, A.S, x, α, β) + U, V = A.U, A.V + @inbounds for k in axes(V, 1) + sk = zero(promote_type(eltype(V), eltype(x))) + for j in eachindex(x) + sk += V[k, j] * x[j] + end + _axpy_col!(y, U, k, α * sk) + end + return y +end +LinearAlgebra.mul!(y::AbstractVector, A::SparseWithDenseRowColMatrix, x::AbstractVector) = + mul!(y, A, x, true, false) + +function Base.:*(A::SparseWithDenseRowColMatrix, x::AbstractVector) + T = promote_type(eltype(A), eltype(x)) + y = Vector{T}(undef, size(A, 1)) + return mul!(y, A, x, one(T), zero(T)) +end + +# Matrix RHS: forming the r×ncols block V*X is cheap (r ≪ n) and not the solve hot path. +function LinearAlgebra.mul!( + Y::AbstractMatrix, A::SparseWithDenseRowColMatrix, X::AbstractMatrix, α::Number, β::Number + ) + mul!(Y, A.S, X, α, β) + W = A.V * X # r × ncols + _add_lowrank!(Y, A.U, W, α) # Y .+= α * U*(V*X) + return Y +end +_add_lowrank!(Y, U::SelectorMatrix, W, α) = _selector_mul!(Y, U, W, α, true) +_add_lowrank!(Y, U, W, α) = mul!(Y, U, W, α, true) +LinearAlgebra.mul!(Y::AbstractMatrix, A::SparseWithDenseRowColMatrix, X::AbstractMatrix) = + mul!(Y, A, X, true, false) + +function Base.:*(A::SparseWithDenseRowColMatrix, X::AbstractMatrix) + T = promote_type(eltype(A), eltype(X)) + Y = Matrix{T}(undef, size(A, 1), size(X, 2)) + return mul!(Y, A, X, one(T), zero(T)) +end diff --git a/src/woodbury.jl b/src/woodbury.jl new file mode 100644 index 0000000..1e5a6ea --- /dev/null +++ b/src/woodbury.jl @@ -0,0 +1,341 @@ +# ------------------ +# Woodbury factorization (primary path) +# ------------------ +# +# A = S + U V +# A⁻¹ b = S⁻¹b − Z C⁻¹ (V S⁻¹b), Z = S⁻¹U (n×r), C = I_r + V Z (r×r) +# +# Factor S once with PureKLU; precompute Z (one multi-RHS solve) and lu(C). Each subsequent +# solve costs one sparse solve + an r×r dense solve. `refactor!` reuses S's symbolic +# analysis (PureKLU `klu!`) so a Newton sweep (same pattern, new values) never re-analyzes. + +""" + SparseWithDenseRowColWoodbury{T} <: LinearAlgebra.Factorization{T} + +Sherman–Morrison–Woodbury factorization of an [`SparseWithDenseRowColMatrix`](@ref) `A = S + U V`, +built from a single [`PureKLU`](https://github.com/SciML/PureKLU.jl) LU factorization of the +sparse part `S` plus a dense `r × r` correction `C = I + V S⁻¹U`. Created by +[`factorize`](@ref)/[`lu`](@ref) (the default strategy when `S` is nonsingular). Solve with +`\\`/`ldiv!`; update in place for a new set of numeric values with [`refactor!`](@ref). + +Valid only when both `S` and `C` are nonsingular; [`factorize`](@ref) falls back to +[`SparseWithDenseRowColAugmented`](@ref) otherwise. The forward error of the Woodbury solve scales +with `κ(S)·κ(C)` rather than `κ(A)`, so one step of iterative refinement (`refine` keyword, +default `1`) is applied to restore backward stability — this is allocation-free, reusing the +cached factors. +""" +mutable struct SparseWithDenseRowColWoodbury{T, KF, TS, TU, TV, CF} <: LinearAlgebra.Factorization{T} + Sfact::KF # PureKLU.KLUFactorization of S (symbolic reused across refactors) + Sown::TS # owned SparseMatrixCSC copy of S — values for the residual matvec + U::TU # n×r + V::TV # r×n + Z::Matrix{T} # n×r = S⁻¹U, doubles as the multi-RHS solve! buffer (unit stride) + C::Matrix{T} # r×r scratch holding (after lu!) the LU factors of I + V*Z + Cfact::CF # lu!(C) + ywork::Vector{T} # length n — holds S⁻¹b + tr::Vector{T} # length r — V*y and C⁻¹(·) + res::Vector{T} # length n — structured residual / correction + borig::Vector{T} # length n — saved RHS for iterative refinement + r::Int + refine::Int + illconditioned::Bool +end + +function SparseWithDenseRowColWoodbury( + Sfact, Sown, U, V, Z::Matrix{T}, C, Cfact, ywork, tr, res, borig, r, refine, ill + ) where {T} + return SparseWithDenseRowColWoodbury{ + T, typeof(Sfact), typeof(Sown), typeof(U), typeof(V), typeof(Cfact), + }(Sfact, Sown, U, V, Z, C, Cfact, ywork, tr, res, borig, r, refine, ill) +end + +Base.size(F::SparseWithDenseRowColWoodbury) = (size(F.Sown, 1), size(F.Sown, 2)) +Base.size(F::SparseWithDenseRowColWoodbury, i::Integer) = size(F.Sown, i) +LinearAlgebra.issuccess(F::SparseWithDenseRowColWoodbury) = + LinearAlgebra.issuccess(F.Sfact) && LinearAlgebra.issuccess(F.Cfact) +denserank(F::SparseWithDenseRowColWoodbury) = F.r + +# Underlying-float type used to pick conditioning / refinement tolerances. +_tol_float(::Type{T}) where {T <: AbstractFloat} = T +_tol_float(::Type{Complex{T}}) where {T <: AbstractFloat} = T +_tol_float(::Type{T}) where {T} = Float64 # ForwardDiff.Dual, Rational, … + +# Build a freshly-owned SparseMatrixCSC{T,Ti} (Ti a KLU index type) we are free to mutate. +# `Int` is always a valid KLU index type on a given build (Int32 on 32-bit, Int64 on 64-bit), +# so it is the safe fallback when S's index type is not accepted by PureKLU on this platform. +function _own_sparse(::Type{T}, S::SparseArrays.AbstractSparseMatrixCSC) where {T} + Si = SparseMatrixCSC(S) + Ti = eltype(SparseArrays.getcolptr(Si)) + if Ti <: PureKLU.KLUITypes + return eltype(Si) === T ? copy(Si) : SparseMatrixCSC{T, Ti}(Si) + else + return SparseMatrixCSC{T, Int}(Si) + end +end + +# Z ← S⁻¹U (reuses Sfact's current numeric factorization). +function _recompute_Z!(F::SparseWithDenseRowColWoodbury) + materialize_U!(F.Z, F.U) + PureKLU.solve!(F.Sfact, F.Z) + return F +end +# C ← lu(I + V*Z) (assumes F.Z is current). +function _recompute_C!(F::SparseWithDenseRowColWoodbury{T}) where {T} + if F.r > 0 + mul!(F.C, F.V, F.Z) + @inbounds for i in 1:F.r + F.C[i, i] += one(T) + end + end + F.Cfact = lu!(F.C; check = false) + LinearAlgebra.issuccess(F.Cfact) || throw(SingularException(0)) + return F +end +# Z then C. Assumes Sfact has just been (re)factored. +_recompute_correction!(F::SparseWithDenseRowColWoodbury) = _recompute_C!(_recompute_Z!(F)) + +function _woodbury(A::SparseWithDenseRowColMatrix{T}; refine::Integer = 1) where {T} + n = size(A, 1) + r = denserank(A) + Sown = _own_sparse(T, A.S) + Sfact = PureKLU.klu(Sown) # throws SingularException if S is singular + # Own copies of the low-rank factors (S is already copied via `_own_sparse`); this keeps + # the factorization independent of later in-place mutation of the input's U/V, matching + # the augmented path. SelectorMatrix is immutable, so `_copy_U` just rebuilds it. + U, V = _copy_U(A.U), copy(A.V) + Z = Matrix{T}(undef, n, r) + materialize_U!(Z, U) + PureKLU.solve!(Sfact, Z) # Z = S⁻¹U (one multi-RHS solve) + C = Matrix{T}(undef, r, r) + nrmC = zero(real(T)) + if r > 0 + mul!(C, V, Z) + @inbounds for i in 1:r + C[i, i] += one(T) + end + nrmC = opnorm(C, 1) + end + Cfact = lu!(C; check = false) + LinearAlgebra.issuccess(Cfact) || throw(SingularException(0)) + ill = r > 0 && nrmC * opnorm(inv(Cfact), 1) > inv(sqrt(eps(_tol_float(T)))) + return SparseWithDenseRowColWoodbury( + Sfact, Sown, U, V, Z, C, Cfact, + Vector{T}(undef, n), Vector{T}(undef, r), Vector{T}(undef, n), Vector{T}(undef, n), + r, Int(refine), ill + ) +end + +# ------------------ +# Solve +# ------------------ + +# b := A⁻¹ b via Woodbury (no refinement). Allocation-free. +@views function _woodbury_solve!(F::SparseWithDenseRowColWoodbury{T}, b::AbstractVector) where {T} + copyto!(F.ywork, b) + PureKLU.solve!(F.Sfact, F.ywork) # ywork = S⁻¹b + if F.r > 0 + mul!(F.tr, F.V, F.ywork) # tr = V S⁻¹b + ldiv!(F.Cfact, F.tr) # tr = C⁻¹ V S⁻¹b + copyto!(b, F.ywork) + mul!(b, F.Z, F.tr, -one(T), one(T)) # b = S⁻¹b - Z C⁻¹ V S⁻¹b + else + copyto!(b, F.ywork) + end + return b +end + +# res := A*x using the owned sparse part and the low-rank factors. Allocation-free. +function _applyA!(F::SparseWithDenseRowColWoodbury{T}, res::AbstractVector, x::AbstractVector) where {T} + mul!(res, F.Sown, x) + if F.r > 0 + mul!(F.tr, F.V, x) + mul!(res, F.U, F.tr, one(T), one(T)) + end + return res +end + +function LinearAlgebra.ldiv!(F::SparseWithDenseRowColWoodbury{T}, b::AbstractVector) where {T} + length(b) == size(F, 1) || throw(DimensionMismatch()) + F.refine > 0 && copyto!(F.borig, b) + _woodbury_solve!(F, b) + @inbounds for _ in 1:F.refine + _applyA!(F, F.res, b) # res = A x + @. F.res = F.borig - F.res # res = b - A x (structured residual) + _woodbury_solve!(F, F.res) # res = A⁻¹ residual + @. b = b + F.res # x += correction + end + return b +end + +# Matrix RHS: solve column by column (reuses the alloc-free vector path + refinement). +function LinearAlgebra.ldiv!(F::SparseWithDenseRowColWoodbury, B::AbstractMatrix) + size(B, 1) == size(F, 1) || throw(DimensionMismatch()) + for c in axes(B, 2) + ldiv!(F, view(B, :, c)) + end + return B +end + +# Out-of-place forms (split vector/matrix so each is strictly more specific than the generic +# `ldiv!(::AbstractVecOrMat, ::Factorization, ::AbstractVecOrMat)`). `\` is provided by +# LinearAlgebra's generic `Factorization` fallback, which routes here via `ldiv!` and also +# supplies the complex-RHS-on-real-factorization split. +LinearAlgebra.ldiv!(y::AbstractVector, F::SparseWithDenseRowColWoodbury, b::AbstractVector) = + ldiv!(F, copyto!(y, b)) +LinearAlgebra.ldiv!(Y::AbstractMatrix, F::SparseWithDenseRowColWoodbury, B::AbstractMatrix) = + ldiv!(F, copyto!(Y, B)) + +# ------------------ +# Adjoint / transpose solve (Aᴴ = Sᴴ + Vᴴ Uᴴ): A⁻ᴴ b = S⁻ᴴ(b − Vᴴ C⁻ᴴ Zᴴ b) +# ------------------ +# `F'` / `transpose(F)` on a Factorization wrap it in Adjoint/TransposeFactorization (newer +# Julia) or Adjoint/Transpose (older). `\` for those wrappers is provided generically by +# LinearAlgebra and routes to the `ldiv!` methods below. + +const AdjointFact = + isdefined(LinearAlgebra, :AdjointFactorization) ? LinearAlgebra.AdjointFactorization : LinearAlgebra.Adjoint +const TransposeFact = + isdefined(LinearAlgebra, :TransposeFactorization) ? LinearAlgebra.TransposeFactorization : LinearAlgebra.Transpose + +for (Wrap, op, klu_adj) in ( + (AdjointFact, :adjoint, :(F.Sfact')), + (TransposeFact, :transpose, :(transpose(F.Sfact))), + ) + @eval function LinearAlgebra.ldiv!( + Fw::$Wrap{<:Any, <:SparseWithDenseRowColWoodbury{T}}, b::AbstractVector + ) where {T} + F = parent(Fw) + length(b) == size(F, 1) || throw(DimensionMismatch()) + if F.r > 0 + mul!(F.tr, $op(F.Z), b) # tr = Zᴴ b + ldiv!($op(F.Cfact), F.tr) # tr = C⁻ᴴ tr + mul!(F.res, $op(F.V), F.tr) # res = Vᴴ tr + @. F.res = b - F.res + else + copyto!(F.res, b) + end + PureKLU.solve!($klu_adj, F.res) # res = S⁻ᴴ(…) + copyto!(b, F.res) + return b + end + @eval function LinearAlgebra.ldiv!( + Fw::$Wrap{<:Any, <:SparseWithDenseRowColWoodbury}, B::AbstractMatrix + ) + for c in axes(B, 2) + ldiv!(Fw, view(B, :, c)) + end + return B + end +end + +# ------------------ +# Complex RHS over a real factorization: solve real and imaginary parts separately, since +# the cached scratch buffers are real-typed. Mirrors LinearAlgebra's dense LU and PureKLU. +# ------------------ + +function _ldiv_realfact_complex!(Ft, b::AbstractVector{<:Complex}) + br = real.(b) + bi = imag.(b) + ldiv!(Ft, br) + ldiv!(Ft, bi) + @inbounds @. b = complex(br, bi) + return b +end + +LinearAlgebra.ldiv!(F::SparseWithDenseRowColWoodbury{<:Real}, b::AbstractVector{<:Complex}) = + _ldiv_realfact_complex!(F, b) +LinearAlgebra.ldiv!(Fw::AdjointFact{<:Any, <:SparseWithDenseRowColWoodbury{<:Real}}, b::AbstractVector{<:Complex}) = + _ldiv_realfact_complex!(Fw, b) +LinearAlgebra.ldiv!(Fw::TransposeFact{<:Any, <:SparseWithDenseRowColWoodbury{<:Real}}, b::AbstractVector{<:Complex}) = + _ldiv_realfact_complex!(Fw, b) + +# ------------------ +# Refactor (Newton hot path): reuse symbolic analysis, new numeric values +# ------------------ + +""" + refactor!(F::SparseWithDenseRowColWoodbury, A::SparseWithDenseRowColMatrix; check=true) -> F + refactor!(F::SparseWithDenseRowColWoodbury, S::AbstractSparseMatrixCSC; fill=nothing, check=true) -> F + refactor!(F::SparseWithDenseRowColWoodbury, Snzval::AbstractVector; fill=nothing) -> F + +Update the factorization `F` in place with new numeric values that share the **same +sparsity pattern** as the matrix `F` was built from, reusing PureKLU's symbolic analysis and +preallocated workspace (no re-analysis). This is the hot path for Newton iterations on a +boundary-value problem, where the Jacobian's pattern is fixed and only its values change. + +The cheapest form takes the raw `nzval` vector of the new sparse part (no pattern check — +the caller guarantees the pattern is unchanged); pass `fill` to also update the low-rank +right factor `V`. The `SparseMatrixCSC`/`SparseWithDenseRowColMatrix` forms validate the pattern when +`check=true`. +""" +function refactor!(F::SparseWithDenseRowColWoodbury, Snzval::AbstractVector; fill = nothing) + nz = SparseArrays.nonzeros(F.Sown) + length(Snzval) == length(nz) || + throw(DimensionMismatch("new nzval has length $(length(Snzval)); pattern has $(length(nz))")) + copyto!(nz, Snzval) + PureKLU.klu!(F.Sfact, nz) # reuse symbolic + workspace + fill !== nothing && copyto!(F.V, fill) + _recompute_correction!(F) + return F +end + +function refactor!( + F::SparseWithDenseRowColWoodbury, S::SparseArrays.AbstractSparseMatrixCSC; + fill = nothing, check::Bool = true + ) + if check + _same_pattern(F.Sown, S) || + throw(ArgumentError("refactor! requires the new sparse part to have the same sparsity pattern.")) + end + return refactor!(F, SparseArrays.nonzeros(S); fill) +end + +function refactor!(F::SparseWithDenseRowColWoodbury, A::SparseWithDenseRowColMatrix; check::Bool = true) + size(A.V) == size(F.V) || + throw(DimensionMismatch("the fill rank/shape changed; rebuild with `factorize`.")) + if F.U isa SelectorMatrix + # A selector U has no stored values to update; refuse to silently ignore a switch to + # a different (e.g. dense) left factor, which would feed a stale U to the solve. + (A.U isa SelectorMatrix && size(A.U) == size(F.U)) || + throw(ArgumentError("refactor! cannot change the left factor U from a SelectorMatrix to a dense/incompatible one; rebuild with `factorize`.")) + else + copyto!(F.U, A.U) + end + return refactor!(F, A.S; fill = A.V, check) +end + +function _same_pattern(A::SparseArrays.AbstractSparseMatrixCSC, B::SparseArrays.AbstractSparseMatrixCSC) + return size(A) == size(B) && + SparseArrays.getcolptr(A) == SparseArrays.getcolptr(B) && + SparseArrays.rowvals(A) == SparseArrays.rowvals(B) +end + +""" + update_lowrank!(F::SparseWithDenseRowColWoodbury; U=nothing, V=nothing) -> F + +Update only the low-rank factors of `F`, **reusing the cached factorization of the sparse +part `S`** (no re-factorization of `S`). This is the Sherman–Morrison–Woodbury fast path for +a fixed sparse base solved repeatedly under a *changing low-rank correction* — varying +boundary conditions / coupling, adding or removing a constraint, sensitivity or parameter +sweeps. Factor `S` once with [`factorize`](@ref), then per step `update_lowrank!(F; V=…)` +and solve. + +Passing `V` (the `r × n` dense factor) recomputes only the `r × r` correction `C = I + V Z`. +Passing `U` additionally recomputes `Z = S⁻¹U` (one multi-RHS solve against the *cached* `S` +factorization), so omit `U` when the left factor is unchanged. The sparsity pattern and rank +are fixed; to change `S`'s values use [`refactor!`](@ref), and to change the rank rebuild +with [`factorize`](@ref). +""" +function update_lowrank!(F::SparseWithDenseRowColWoodbury; U = nothing, V = nothing) + if U !== nothing + if F.U isa SelectorMatrix + (U isa SelectorMatrix && size(U) == size(F.U)) || + throw(ArgumentError("the left factor U is a SelectorMatrix; pass a matching SelectorMatrix or rebuild with `factorize`.")) + else + copyto!(F.U, U) + end + _recompute_Z!(F) # reuse S's cached factorization — no klu!(S) + end + V !== nothing && copyto!(F.V, V) + return _recompute_C!(F) +end diff --git a/test/runtests.jl b/test/runtests.jl new file mode 100644 index 0000000..fc11674 --- /dev/null +++ b/test/runtests.jl @@ -0,0 +1,43 @@ +using SafeTestsets, Test + +@testset "SparseWithDenseRowColMatrices" begin + @safetestset "Constructors & accessors" begin + include("test_matrix.jl") + end + @safetestset "Matrix–vector / matrix multiply" begin + include("test_matvec.jl") + end + @safetestset "Woodbury factorization & solve" begin + include("test_woodbury.jl") + end + @safetestset "Augmented fallback" begin + include("test_augmented.jl") + end + @safetestset "Refactorization (Newton path)" begin + include("test_refactor.jl") + end + @safetestset "Generic eltypes" begin + include("test_generic_eltypes.jl") + end + @safetestset "Edge cases & allocations" begin + include("test_edgecases.jl") + end + @safetestset "Appropriateness detector" begin + include("test_detect.jl") + end + @safetestset "LinearSolve caching integration" begin + include("test_linearsolve.jl") + end + @safetestset "Arrow / bordered matrices" begin + include("test_arrow.jl") + end + @safetestset "Aqua quality assurance" begin + using Aqua, SparseWithDenseRowColMatrices + # `ambiguities = false` (as in FastAlmostBandedMatrices.jl): the only remaining + # ambiguities are inert cross-products of our matrix types with LinearAlgebra's + # structured matrices (`Diagonal`/`Bidiagonal`/`Tridiagonal`/`Triangular`) in + # `*`/`mul!` — combinations that never arise in use, and where both candidate + # methods compute the same product anyway. + Aqua.test_all(SparseWithDenseRowColMatrices; ambiguities = false, piracies = true) + end +end diff --git a/test/test_arrow.jl b/test/test_arrow.jl new file mode 100644 index 0000000..ff77f2d --- /dev/null +++ b/test/test_arrow.jl @@ -0,0 +1,85 @@ +include("testutils.jl") +using Test, SparseArrays, LinearAlgebra, LinearSolve + +# A classic arrowhead matrix = diagonal bulk + dense last row + dense last column, which is a +# rank-2 dense-row/col correction: A = Diagonal(d) + U*V with U (n×2), V (2×n). +function arrowhead(n; seed = 1) + rng = MersenneTwister(seed) + d = 3 .+ rand(rng, n) + c = randn(rng, n - 1) # last column entries (rows 1:n-1) + row = randn(rng, n - 1) # last row entries (cols 1:n-1) + full = Matrix(Diagonal(d)) + full[1:(n - 1), n] .= c + full[n, 1:(n - 1)] .= row + S = sparse(Diagonal(d)) + U = zeros(n, 2); U[1:(n - 1), 1] .= c; U[n, 2] = 1.0 + V = zeros(2, n); V[1, n] = 1.0; V[2, 1:(n - 1)] .= row + return SparseWithDenseRowColMatrix(S, U, V), full +end + +# Block/bordered arrow: sparse interior B + dense border (E columns, F rows, C corner) of +# width k, which is a rank-2k correction over the block-diagonal bulk [B 0; 0 C]. +function block_arrow(n, k; seed = 1) + rng = MersenneTwister(seed); m = n - k + B = sparse(SymTridiagonal(fill(4.0, m), fill(-1.0, m - 1))) + E = randn(rng, m, k); Fb = randn(rng, k, m); C = randn(rng, k, k) + k * I + full = [Matrix(B) E; Fb C] + S = blockdiag(B, sparse(C)) + U = zeros(n, 2k); U[1:m, 1:k] .= E; for j in 1:k + U[m + j, k + j] = 1.0 + end + V = zeros(2k, n); for j in 1:k + V[j, m + j] = 1.0 + end; V[(k + 1):2k, 1:m] .= Fb + return SparseWithDenseRowColMatrix(S, U, V), full +end + +@testset "arrowhead: structure, solve, refactor" begin + n = 400 + A, full = arrowhead(n) + b = randn(n) + @test denserank(A) == 2 + @test Matrix(A) ≈ full + @test A * b ≈ full * b + @test relerr(A \ b, full \ b) < 1.0e-9 + F = factorize(A) + @test F isa SparseWithDenseRowColWoodbury # diagonal bulk is nonsingular → fast path + @test relerr(F \ b, full \ b) < 1.0e-9 + # Newton-style refactor: same arrow pattern, new values + A2, full2 = arrowhead(n; seed = 2) + refactor!(F, A2) + @test relerr(F \ b, full2 \ b) < 1.0e-9 +end + +@testset "detector recommends peeling an arrowhead" begin + _, full = arrowhead(200) + rec = recommend_lowrank_peel(sparse(full)) + @test rec.recommended + @test rec.rank == 2 + @test rec.dense_rows == 1 && rec.dense_cols == 1 +end + +@testset "block / bordered arrow (border width k → rank 2k)" begin + for k in (1, 3, 5) + n = 250 + A, full = block_arrow(n, k) + b = randn(n) + @test denserank(A) == 2k + @test Matrix(A) ≈ full + @test relerr(A \ b, full \ b) < 1.0e-9 + end +end + +@testset "arrowhead through the LinearSolve caching interface" begin + n = 300 + A, full = arrowhead(n; seed = 7) + b = randn(n) + cache = init(LinearProblem(A, b), SparseWithDenseRowColFactorization()) + @test relerr(solve!(cache).u, full \ b) < 1.0e-9 + # new arrow values, same pattern → reuses cached symbolic + A2, full2 = arrowhead(n; seed = 8) + cache.A = A2 + @test relerr(solve!(cache).u, full2 \ b) < 1.0e-9 + # default solve also picks it up + @test relerr(solve(LinearProblem(A, b)).u, full \ b) < 1.0e-9 +end diff --git a/test/test_augmented.jl b/test/test_augmented.jl new file mode 100644 index 0000000..7af4e40 --- /dev/null +++ b/test/test_augmented.jl @@ -0,0 +1,75 @@ +include("testutils.jl") +using Test, LinearAlgebra + +# Build a matrix whose sparse part S is singular (a zero row) but whose assembled A is not, +# because the low-rank fill supplies that row. This is the canonical BVP situation (an +# interior stencil that does not by itself pin down a boundary degree of freedom). +function singularS_nonsingularA(n, r; seed = 1) + rng = MersenneTwister(seed) + S = sparse(3.0 * I, n, n) + for i in 1:(n - 1) + S[i + 1, i] = -1.0 + end + for j in 1:n # zero out the first r rows of S → S singular + for i in 1:r + S[i, j] = 0.0 + end + end + dropzeros!(S) + F = zeros(r, n) # dense rows that make A nonsingular + for i in 1:r + F[i, i] = 1.0 + F[i, i + r] = 0.3 + end + A = SparseWithDenseRowColMatrix(S, F; replace = true) + return A +end + +@testset ":auto falls back to augmented when S is singular" begin + A = singularS_nonsingularA(50, 2) + @test_throws SingularException factorize(A; strategy = :woodbury, auto_fallback = false) + F = factorize(A) # :auto + @test F isa SparseWithDenseRowColAugmented + @test issuccess(F) + b = randn(50) + @test relerr(F \ b, Matrix(A) \ b) < 1.0e-9 +end + +@testset "augmented adjoint & transpose solve" begin + A = singularS_nonsingularA(50, 2; seed = 3) + F = factorize(A) + @test F isa SparseWithDenseRowColAugmented + M = Matrix(A) + b = randn(50) + @test relerr(F' \ b, M' \ b) < 1.0e-9 + @test relerr(transpose(F) \ b, transpose(M) \ b) < 1.0e-9 + x = copy(b) + ldiv!(F', x) + @test relerr(x, M' \ b) < 1.0e-9 +end + +@testset "forced :augmented agrees on a nonsingular S" begin + A = rand_sparsedense(60, 3; seed = 5) + M = Matrix(A) + b = randn(60) + F = factorize(A; strategy = :augmented) + @test F isa SparseWithDenseRowColAugmented + @test relerr(F \ b, M \ b) < 1.0e-9 + B = randn(60, 3) + @test relerr(F \ B, M \ B) < 1.0e-9 +end + +@testset "singular A surfaces SingularException" begin + n, r = 20, 1 + S = spzeros(n, n) # A will be rank-deficient + F = zeros(r, n) + F[1, 1] = 1.0 + A = SparseWithDenseRowColMatrix(S, F; replace = true) + @test_throws SingularException factorize(A; strategy = :augmented) + @test_throws SingularException factorize(A) # :auto can't rescue a singular A +end + +@testset "invalid strategy" begin + A = rand_sparsedense(20, 1) + @test_throws ArgumentError factorize(A; strategy = :nonsense) +end diff --git a/test/test_detect.jl b/test/test_detect.jl new file mode 100644 index 0000000..c5ca1a9 --- /dev/null +++ b/test/test_detect.jl @@ -0,0 +1,67 @@ +include("testutils.jl") +using Test, SparseArrays, LinearAlgebra + +@testset "uniformly sparse → not recommended" begin + # Mimics the user's DAE Jacobians: n≥64, a few nnz per row/col, no dense rows/cols. + n = 200 + S = sparse(2.0 * I, n, n) + for i in 1:(n - 1) + S[i + 1, i] = -1.0; S[i, i + 1] = -1.0 + end + for _ in 1:(2n) # scatter a few extra entries, still ≤ ~8/row + i = rand(1:n); j = rand(1:n); S[i, j] = 0.3 + end + rec = recommend_lowrank_peel(S) + @test rec isa PeelRecommendation + @test rec.recommended == false + @test Bool(rec) == false + @test rec.rank == 0 + @test rec.code === :uniformly_sparse + @test occursin("uniformly sparse", sprint(show, MIME"text/plain"(), rec)) +end + +@testset "tridiagonal + dense rows → recommended" begin + n, k = 2000, 4 + A = spdiagm(-1 => fill(-1.0, n - 1), 0 => fill(2.0, n), 1 => fill(-1.0, n - 1)) + A[1:k, :] .= 1.0 # make the top k rows fully dense + rec = recommend_lowrank_peel(A) + @test rec.recommended == true + @test rec.rank == k + @test rec.dense_rows == k + @test rec.code === :recommended +end + +@testset "tridiagonal + dense columns → recommended" begin + n, k = 2000, 3 + A = spdiagm(-1 => fill(-1.0, n - 1), 0 => fill(2.0, n), 1 => fill(-1.0, n - 1)) + A[:, 1:k] .= 1.0 # k dense columns + rec = recommend_lowrank_peel(A) + @test rec.recommended == true + @test rec.rank == k + @test rec.dense_cols == k +end + +@testset "plain tridiagonal → not recommended" begin + n = 2000 + A = spdiagm(-1 => fill(-1.0, n - 1), 0 => fill(2.0, n), 1 => fill(-1.0, n - 1)) + rec = recommend_lowrank_peel(A) + @test rec.recommended == false + @test rec.rank == 0 +end + +@testset "guards" begin + @test recommend_lowrank_peel(sparse(1.0I, 10, 10)).code === :too_small + @test recommend_lowrank_peel(sparse(1.0I, 80, 100)).code === :not_square +end + +@testset "construction is allocation-light (no eager message string)" begin + n = 200 + A = spdiagm(-1 => fill(-1.0, n - 1), 0 => fill(2.0, n), 1 => fill(-1.0, n - 1)) + recommend_lowrank_peel(A) # warm up + a = @allocated recommend_lowrank_peel(A) + # only the O(n) rowcnt/colcnt scratch + the small struct; the reason text is never built + # on construction (only by `show`). Generous bound — just a regression guard. + @test a < 16 * n * sizeof(Int) + # the descriptive text is available on demand via show + @test !isempty(sprint(show, MIME"text/plain"(), recommend_lowrank_peel(A))) +end diff --git a/test/test_edgecases.jl b/test/test_edgecases.jl new file mode 100644 index 0000000..c4f3e38 --- /dev/null +++ b/test/test_edgecases.jl @@ -0,0 +1,97 @@ +include("testutils.jl") +using Test, LinearAlgebra, SparseArrays + +@testset "r = 0 degenerate (pure sparse)" begin + n = 20 + S = sparse(2.0 * I, n, n) + for i in 1:(n - 1) + S[i + 1, i] = -0.5 + end + A = SparseWithDenseRowColMatrix(S, Matrix{Float64}(undef, n, 0), Matrix{Float64}(undef, 0, n)) + @test denserank(A) == 0 + @test Matrix(A) ≈ Matrix(S) + b = randn(n) + F = factorize(A) + @test relerr(F \ b, Matrix(S) \ b) < 1.0e-9 + @test A * b ≈ S * b +end + +@testset "Int32 index type" begin + n, r = 25, 2 + S = SparseMatrixCSC{Float64, Int32}(sparse(3.0 * I, n, n)) + for i in 1:(n - 1) + S[i + 1, i] = -1.0 + end + A = SparseWithDenseRowColMatrix(S, randn(n, r), randn(r, n) .* 0.1) + b = randn(n) + @test relerr(factorize(A) \ b, Matrix(A) \ b) < 1.0e-9 +end + +@testset "ldiv! is allocation-free (refine = 0 and refine = 1)" begin + for refine in (0, 1), sel in (true, false) + A = rand_sparsedense(50, 3; selector = sel, seed = 42) + F = factorize(A; refine = refine) + b = randn(50) + ldiv!(F, copy(b)) # warm up + @test (@allocated ldiv!(F, b)) == 0 + end +end + +@testset "refactor! allocates only the tiny dense pivot" begin + A = rand_sparsedense(50, 3; seed = 43) + F = factorize(A) + nz = copy(sparsepart(A).nzval) + refactor!(F, nz) # warm up + a = @allocated refactor!(F, nz) + @test a < 2048 # O(r) pivot vector + LU wrapper only +end + +@testset "complex RHS in-place ldiv! over a real factorization" begin + A = rand_sparsedense(40, 2; seed = 7) # real (Float64) factorization + M = Matrix(A) + F = factorize(A) + @test eltype(F) <: Real + bc = randn(ComplexF64, 40) + x = copy(bc); ldiv!(F, x) + @test relerr(x, M \ bc) < 1.0e-9 + xa = copy(bc); ldiv!(F', xa) + @test relerr(xa, M' \ bc) < 1.0e-9 + Bc = randn(ComplexF64, 40, 3) + Xc = copy(Bc); ldiv!(F, Xc) + @test relerr(Xc, M \ Bc) < 1.0e-9 + + # augmented (singular S) path with a complex RHS + S = sparse(3.0 * I, 30, 30) + for i in 1:29 + S[i + 1, i] = -1.0 + end + for j in 1:30 + S[1, j] = 0.0 + end + dropzeros!(S) + fr = zeros(1, 30); fr[1, 1] = 1.0; fr[1, 2] = 0.3 + Aaug = SparseWithDenseRowColMatrix(S, fr; replace = true) + Fa = factorize(Aaug) + @test Fa isa SparseWithDenseRowColAugmented + bc2 = randn(ComplexF64, 30) + xa2 = copy(bc2); ldiv!(Fa, xa2) + @test relerr(xa2, Matrix(Aaug) \ bc2) < 1.0e-9 +end + +@testset "issuccess / size reflect state" begin + A = rand_sparsedense(30, 2) + F = factorize(A) + @test issuccess(F) + @test size(F, 1) == 30 == size(F, 2) + + Asing = SparseWithDenseRowColMatrix( + spzeros(10, 10), + (z = zeros(1, 10); z[1, 1] = 1.0; z); replace = true + ) + @test_throws SingularException factorize(Asing) +end + +@testset "qr is intentionally unsupported" begin + A = rand_sparsedense(20, 2) + @test_throws ArgumentError qr(A) +end diff --git a/test/test_generic_eltypes.jl b/test/test_generic_eltypes.jl new file mode 100644 index 0000000..73e9b29 --- /dev/null +++ b/test/test_generic_eltypes.jl @@ -0,0 +1,79 @@ +include("testutils.jl") +using Test, LinearAlgebra, SparseArrays, ForwardDiff + +@testset "Float32 / BigFloat / Complex solve & refactor" begin + for (T, tol) in ( + (Float32, 1.0f-3), (BigFloat, 1.0e-60), (ComplexF64, 1.0e-9), + (Complex{BigFloat}, 1.0e-60), + ) + n, r = 30, 2 + rng = MersenneTwister(1) + S = sparse(T(4) * I, n, n) + for i in 1:(n - 1) + S[i + 1, i] = T(-1) + S[i, i + 1] = T(-1) + end + U = randn(rng, T, n, r) + V = randn(rng, T, r, n) .* T(0.1) + A = SparseWithDenseRowColMatrix(S, U, V) + M = Matrix(A) + b = randn(rng, T, n) + + F = factorize(A) + @test eltype(F \ b) == T + @test relerr(F \ b, M \ b) < tol + + # refactor stays in-type + S2 = copy(S); S2.nzval .*= T(2) + A2 = SparseWithDenseRowColMatrix(S2, U, V) + refactor!(F, S2.nzval) + @test relerr(F \ b, Matrix(A2) \ b) < tol + end +end + +@testset "ForwardDiff: autodiff through the sparse + low-rank solve" begin + n, r = 25, 2 + function objective(p) + T = eltype(p) + S = sparse(T(4) * p[1] * I, n, n) + for i in 1:(n - 1) + S[i + 1, i] = T(-1) + end + U = T.(reshape(collect(1.0:(n * r)), n, r)) ./ (n * r) .* p[2] + V = T.(reshape(collect(1.0:(r * n)), r, n)) ./ (r * n) + b = T.(collect(1.0:n)) + return sum(factorize(SparseWithDenseRowColMatrix(S, U, V)) \ b) + end + + p0 = [1.0, 1.0] + g = ForwardDiff.gradient(objective, p0) + h = 1.0e-6 + fd = [ + (objective([p0[1] + h, p0[2]]) - objective([p0[1] - h, p0[2]])) / 2h, + (objective([p0[1], p0[2] + h]) - objective([p0[1], p0[2] - h])) / 2h, + ] + @test relerr(g, fd) < 1.0e-5 + @test all(isfinite, g) +end + +@testset "ForwardDiff through the augmented fallback" begin + n, r = 20, 1 + function objective(p) + T = eltype(p) + S = sparse(T(2) * I, n, n) + for i in 1:(n - 1) + S[i + 1, i] = T(-1) + end + for j in 1:n + S[1, j] = zero(T) + end # S singular → augmented path + Fl = zeros(T, r, n); Fl[1, 1] = p[1] + A = SparseWithDenseRowColMatrix(S, Fl; replace = true) + b = T.(collect(1.0:n)) + return sum(factorize(A) \ b) + end + g = ForwardDiff.derivative(objective, 1.5) + h = 1.0e-6 + fd = (objective(1.5 + h) - objective(1.5 - h)) / 2h + @test isapprox(g, fd; rtol = 1.0e-4) +end diff --git a/test/test_linearsolve.jl b/test/test_linearsolve.jl new file mode 100644 index 0000000..5b879e8 --- /dev/null +++ b/test/test_linearsolve.jl @@ -0,0 +1,93 @@ +include("testutils.jl") +using Test, LinearAlgebra, SparseArrays, LinearSolve + +n, r = 60, 3 + +@testset "default solve picks the caching algorithm" begin + A = rand_sparsedense(n, r; seed = 21) + b = randn(n) + sol = solve(LinearProblem(A, b)) + @test sol.u ≈ Matrix(A) \ b +end + +@testset "cache reuse: symbolic cached across value/RHS changes" begin + A = rand_sparsedense(n, r; seed = 22) + b = randn(n) + cache = init(LinearProblem(A, b), SparseWithDenseRowColFactorization()) + s1 = solve!(cache) + @test s1.u ≈ Matrix(A) \ b + + # new values, SAME sparsity pattern → reuse the cached symbolic analysis (refactor!) + A2 = SparseWithDenseRowColMatrix( + SparseMatrixCSC( + size(A)..., copy(sparsepart(A).colptr), copy(sparsepart(A).rowval), + sparsepart(A).nzval .* 1.5 .+ 0.01 + ), + lowrankfactors(A)[1], Matrix(fillpart(A)) .+ 0.2 + ) + cache.A = A2 + s2 = solve!(cache) + @test s2.u ≈ Matrix(A2) \ b + + # new RHS only → reuse the cached numeric factorization + b2 = randn(n) + cache.b = b2 + s3 = solve!(cache) + @test s3.u ≈ Matrix(A2) \ b2 +end + +@testset "reuse_symbolic = false rebuilds each solve" begin + A = rand_sparsedense(n, r; seed = 23) + b = randn(n) + cache = init(LinearProblem(A, b), SparseWithDenseRowColFactorization(reuse_symbolic = false)) + @test solve!(cache).u ≈ Matrix(A) \ b + A2 = SparseWithDenseRowColMatrix( + SparseMatrixCSC( + size(A)..., copy(sparsepart(A).colptr), copy(sparsepart(A).rowval), + sparsepart(A).nzval .* 2 + ), + lowrankfactors(A)[1], Matrix(fillpart(A)) + ) + cache.A = A2 + @test solve!(cache).u ≈ Matrix(A2) \ b +end + +@testset "singular A returns Infeasible, does not throw (KLU parity)" begin + n = 20 + S = sparse(2.0 * I, n, n) + S[5, 5] = 0.0 # singular S, and no fill to fix it → singular A + A = SparseWithDenseRowColMatrix(S, zeros(n, 1), zeros(1, n)) + b = randn(n) + sol = solve(LinearProblem(A, b)) # must NOT throw + @test sol.retcode == ReturnCode.Infeasible + # also in the cached reuse loop + cache = init(LinearProblem(A, b), SparseWithDenseRowColFactorization()) + @test solve!(cache).retcode == ReturnCode.Infeasible +end + +@testset "singular S routes through the augmented fallback under caching" begin + # S singular (zeroed top rows), A nonsingular via the dense fill rows + nn, rr = 50, 2 + S = sparse(3.0 * I, nn, nn) + for i in 1:(nn - 1) + S[i + 1, i] = -1.0 + end + for j in 1:nn, i in 1:rr + S[i, j] = 0.0 + end + dropzeros!(S) + F = zeros(rr, nn); for i in 1:rr + F[i, i] = 1.0; F[i, i + rr] = 0.3 + end + A = SparseWithDenseRowColMatrix(S, F; replace = true) + b = randn(nn) + cache = init(LinearProblem(A, b), SparseWithDenseRowColFactorization()) + @test solve!(cache).u ≈ Matrix(A) \ b + # refactor with new interior values (same pattern), still singular S → still correct + A2 = SparseWithDenseRowColMatrix( + SparseMatrixCSC(size(A.S)..., copy(A.S.colptr), copy(A.S.rowval), A.S.nzval .* 1.3), + A.U, A.V + ) + cache.A = A2 + @test solve!(cache).u ≈ Matrix(A2) \ b +end diff --git a/test/test_matrix.jl b/test/test_matrix.jl new file mode 100644 index 0000000..337dd56 --- /dev/null +++ b/test/test_matrix.jl @@ -0,0 +1,109 @@ +include("testutils.jl") +using Test + +n, r = 30, 3 + +@testset "general A = S + U*V construction" begin + A = rand_sparsedense(n, r) + @test A isa SparseWithDenseRowColMatrix + @test size(A) == (n, n) + @test eltype(A) == Float64 + @test denserank(A) == r + @test sparsepart(A) === A.S + @test fillpart(A) === A.V + @test lowrankfactors(A) == (A.U, A.V) + + # getindex equals the dense reference everywhere + M = Matrix(A.S) + Matrix(A.U) * Matrix(A.V) + @test Matrix(A) ≈ M + @test all(A[i, j] ≈ M[i, j] for i in 1:n, j in 1:n) +end + +@testset "eltype promotion" begin + S = sparse(2I, 5, 5) # SparseMatrixCSC{Int} + U = ones(Float64, 5, 1) + V = ones(Float64, 1, 5) + A = SparseWithDenseRowColMatrix(S, U, V) + @test eltype(A) == Float64 + @test sparsepart(A) isa SparseMatrixCSC{Float64} + @test eltype(A * ones(5)) == Float64 +end + +@testset "size / shape validation" begin + S = sparse(1.0I, 5, 5) + @test_throws DimensionMismatch SparseWithDenseRowColMatrix(S, ones(4, 1), ones(1, 5)) + @test_throws DimensionMismatch SparseWithDenseRowColMatrix(S, ones(5, 1), ones(1, 4)) + @test_throws DimensionMismatch SparseWithDenseRowColMatrix(S, ones(5, 2), ones(1, 5)) + @test_throws ArgumentError SparseWithDenseRowColMatrix(sparse(1.0I, 5, 6), ones(5, 1), ones(1, 6)) +end + +@testset "SelectorMatrix convenience (fill)" begin + S = sparse(2.0I, n, n) + @inbounds for i in 1:(n - 1) + S[i + 1, i] = -0.5 + end + F = randn(r, n) + + A_add = SparseWithDenseRowColMatrix(S, F; replace = false) + @test A_add.U isa SelectorMatrix + @test fillpart(A_add) == F + Madd = Matrix(S) + Madd[1:r, :] .+= F + @test Matrix(A_add) ≈ Madd + + Arep = SparseWithDenseRowColMatrix(S, F; replace = true) + Mrep = Matrix(S) + Mrep[1:r, :] = F + @test Matrix(Arep) ≈ Mrep + @test Matrix(Arep)[1:r, :] ≈ F + @test Matrix(Arep)[(r + 1):n, :] ≈ Matrix(S)[(r + 1):n, :] +end + +@testset "exclusive_sparsepart" begin + Asel = rand_sparsedense(n, r; selector = true) + @test exclusive_sparsepart(Asel) == Matrix(sparsepart(Asel))[(r + 1):n, :] + Agen = rand_sparsedense(n, r; selector = false) + @test_throws ArgumentError exclusive_sparsepart(Agen) +end + +@testset "getindex / setindex! round-trips the assembled value" begin + A = rand_sparsedense(n, r; selector = true) + # covered row (low-rank correction present): assembled value must read back + A[1, 1] = 100.0 + @test A[1, 1] ≈ 100.0 + @test sparsepart(A)[1, 1] ≈ 100.0 - fillpart(A)[1, 1] + # uncovered row: value goes straight into S + A[5, 5] = 7.0 + @test A[5, 5] ≈ 7.0 + @test sparsepart(A)[5, 5] ≈ 7.0 + # dense U as well + Ad = rand_sparsedense(n, r; selector = false) + Ad[2, 4] = -3.0 + @test Ad[2, 4] ≈ -3.0 +end + +@testset "similar / copy / fill! / Matrix" begin + A = rand_sparsedense(n, r) + @test similar(A) isa SparseWithDenseRowColMatrix{Float64} + @test similar(A, Float32) isa SparseWithDenseRowColMatrix{Float32} + @test similar(A, Float32, n, n) isa Matrix{Float32} # dims form → plain Array (FABM parity) + + B = copy(A) + @test B isa SparseWithDenseRowColMatrix + @test Matrix(B) ≈ Matrix(A) + # mutating the copy does not touch the original + B.V[1, 1] += 1 + @test !(Matrix(B) ≈ Matrix(A)) + + C = copy(A) + fill!(C, 0.0) + @test iszero(Matrix(C)) + @test nnz(sparsepart(C)) == nnz(sparsepart(A)) # pattern preserved +end + +@testset "SelectorMatrix basics" begin + Sel = SelectorMatrix{Float64}(6, 2) + @test size(Sel) == (6, 2) + @test Matrix(Sel) == [Matrix(1.0I, 2, 2); zeros(4, 2)] + @test_throws ArgumentError SelectorMatrix{Float64}(2, 5) +end diff --git a/test/test_matvec.jl b/test/test_matvec.jl new file mode 100644 index 0000000..41ea800 --- /dev/null +++ b/test/test_matvec.jl @@ -0,0 +1,57 @@ +include("testutils.jl") +using Test, LinearAlgebra + +n, r = 40, 3 + +@testset "A*x and mul! vs dense (dense U)" begin + A = rand_sparsedense(n, r; selector = false) + M = Matrix(A) + x = randn(n) + @test A * x ≈ M * x + + y = randn(n) + α, β = 1.7, -0.4 + yref = α .* (M * x) .+ β .* y + mul!(y, A, x, α, β) + @test y ≈ yref +end + +@testset "A*x and mul! vs dense (selector U)" begin + A = rand_sparsedense(n, r; selector = true) + M = Matrix(A) + x = randn(n) + @test A * x ≈ M * x + y = zeros(n) + mul!(y, A, x) + @test y ≈ M * x +end + +@testset "matrix RHS A*X" begin + A = rand_sparsedense(n, r) + M = Matrix(A) + X = randn(n, 5) + @test A * X ≈ M * X + Y = randn(n, 5) + α, β = 0.5, 2.0 + Yref = α .* (M * X) .+ β .* Y + mul!(Y, A, X, α, β) + @test Y ≈ Yref +end + +@testset "mul! is allocation-free (vector, selector & dense)" begin + for sel in (true, false) + A = rand_sparsedense(n, r; selector = sel) + x = randn(n) + y = zeros(n) + mul!(y, A, x, 1.0, 0.0) # warm up + @test (@allocated mul!(y, A, x, 1.0, 0.0)) == 0 + end +end + +@testset "SelectorMatrix product" begin + U = SelectorMatrix{Float64}(7, 3) + w = [1.0, 2.0, 3.0] + @test U * w == [1.0, 2.0, 3.0, 0, 0, 0, 0] + W = reshape(1.0:6.0, 3, 2) + @test U * W == [Matrix(W); zeros(4, 2)] +end diff --git a/test/test_refactor.jl b/test/test_refactor.jl new file mode 100644 index 0000000..ea390fc --- /dev/null +++ b/test/test_refactor.jl @@ -0,0 +1,161 @@ +include("testutils.jl") +using Test, LinearAlgebra, SparseArrays + +n, r = 60, 3 + +@testset "refactor! with raw nzval (values only, same pattern)" begin + A = rand_sparsedense(n, r; seed = 11) + F = factorize(A) + b = randn(n) + + S2 = copy(sparsepart(A)) + S2.nzval .*= 1.6 + S2.nzval .+= 0.01 + A2 = SparseWithDenseRowColMatrix(S2, lowrankfactors(A)...) + refactor!(F, S2.nzval) + @test relerr(F \ b, Matrix(A2) \ b) < 1.0e-9 +end + +@testset "refactor! with SparseMatrixCSC (pattern checked) + new fill" begin + A = rand_sparsedense(n, r; seed = 12, selector = true) + F = factorize(A) + b = randn(n) + + S2 = copy(sparsepart(A)) + S2.nzval .= S2.nzval .* 2 .- 0.05 + newfill = randn(r, n) + refactor!(F, S2; fill = newfill) + A2 = SparseWithDenseRowColMatrix(S2, SelectorMatrix{Float64}(n, r), newfill) + @test relerr(F \ b, Matrix(A2) \ b) < 1.0e-9 +end + +@testset "refactor! with SparseWithDenseRowColMatrix (dense U updated)" begin + A = rand_sparsedense(n, r; seed = 13, selector = false) + F = factorize(A) + b = randn(n) + + S2 = copy(sparsepart(A)); S2.nzval .*= 1.3 + U2 = Matrix(A.U) .+ 0.2 .* randn(n, r) + V2 = Matrix(A.V) .+ 0.2 .* randn(r, n) + A2 = SparseWithDenseRowColMatrix(S2, U2, V2) + refactor!(F, A2) + @test relerr(F \ b, Matrix(A2) \ b) < 1.0e-9 +end + +@testset "Newton-style sequence reusing one factorization" begin + A = rand_sparsedense(n, r; seed = 14) + F = factorize(A) + nzpattern = copy(sparsepart(A).nzval) + for k in 1:6 + newnz = nzpattern .* (1 + 0.1k) .+ 0.001k + S2 = SparseMatrixCSC(size(A)..., copy(sparsepart(A).colptr), copy(sparsepart(A).rowval), newnz) + A2 = SparseWithDenseRowColMatrix(S2, lowrankfactors(A)...) + refactor!(F, newnz) + b = randn(n) + @test relerr(F \ b, Matrix(A2) \ b) < 1.0e-9 + end +end + +@testset "update_lowrank!: fixed S, varying low-rank correction" begin + A = rand_sparsedense(n, r; seed = 41, selector = false) + F = factorize(A) + S = sparsepart(A); U0 = lowrankfactors(A)[1] + # vary only V (S and U fixed) — the common Sherman–Morrison–Woodbury reuse case + for _ in 1:4 + V2 = randn(r, n) + update_lowrank!(F; V = V2) + A2 = SparseWithDenseRowColMatrix(copy(S), U0, V2) + b = randn(n) + @test relerr(F \ b, Matrix(A2) \ b) < 1.0e-9 + @test relerr(F \ b, factorize(A2) \ b) < 1.0e-9 # matches a fresh full factorization + end + # vary U and V together (still reusing S's factorization) + U2 = randn(n, r); V3 = randn(r, n) + update_lowrank!(F; U = U2, V = V3) + A3 = SparseWithDenseRowColMatrix(copy(S), U2, V3) + b = randn(n) + @test relerr(F \ b, Matrix(A3) \ b) < 1.0e-9 +end + +@testset "update_lowrank! with a SelectorMatrix U" begin + A = rand_sparsedense(n, r; seed = 42, selector = true) # selector U (dense top rows) + F = factorize(A) + V2 = randn(r, n) + update_lowrank!(F; V = V2) # update the dense rows only + A2 = SparseWithDenseRowColMatrix(copy(sparsepart(A)), SelectorMatrix{Float64}(n, r), V2) + b = randn(n) + @test relerr(F \ b, Matrix(A2) \ b) < 1.0e-9 + @test_throws ArgumentError update_lowrank!(F; U = randn(n, r)) # can't swap selector → dense +end + +@testset "refactor! rejects an incompatible U change" begin + A = rand_sparsedense(40, 2; seed = 9, selector = true) # selector U + F = factorize(A) + Adense = SparseWithDenseRowColMatrix(copy(sparsepart(A)), randn(40, 2), Matrix(fillpart(A))) + @test_throws ArgumentError refactor!(F, Adense) +end + +@testset "factorization is independent of later input mutation" begin + A = rand_sparsedense(50, 2; seed = 10, selector = false) + M = Matrix(A) + b = randn(50) + F = factorize(A) + x1 = F \ b + A.V .+= 10.0 # mutate the inputs after factorize … + A.U .+= 5.0 + A.S.nzval .*= 2.0 + x2 = F \ b # … F owns copies, so its answer is unchanged + @test x1 ≈ x2 + @test relerr(x1, M \ b) < 1.0e-9 +end + +@testset "symbolic cache reuse via lu / lu! / ldiv!" begin + # Analyze once (cache symbolic), reuse for many solves and for new values (same pattern). + A = rand_sparsedense(n, r; seed = 31) + F = lu(A) # analyze (cache symbolic) + numeric factor + for b in (randn(n), randn(n), randn(n)) # reuse the cached factorization for many RHS + @test relerr(ldiv!(F, copy(b)), Matrix(A) \ b) < 1.0e-9 + end + A2 = rand_sparsedense(n, r; seed = 31) # same pattern, new values + A2.S.nzval .= A.S.nzval .* 1.5 .+ 0.01 + A2.V .= A.V .+ 0.2 + lu!(F, A2) # == refactor!(F, A2); reuses cached symbolic + b = randn(n) + @test relerr(F \ b, Matrix(A2) \ b) < 1.0e-9 + @test relerr(F \ b, lu(A2) \ b) < 1.0e-9 # same answer as a fresh full factorization +end + +@testset "mismatched pattern is rejected" begin + A = rand_sparsedense(n, r; seed = 15) + F = factorize(A) + Sbad = copy(sparsepart(A)) + Sbad[n, 1] = 123.0 # introduces a new structural entry + @test_throws ArgumentError refactor!(F, Sbad) + @test_throws DimensionMismatch refactor!(F, randn(length(sparsepart(A).nzval) + 1)) +end + +@testset "augmented refactor!" begin + rng = MersenneTwister(20) + nn, rr = 40, 2 + S = sparse(3.0I, nn, nn) + for i in 1:(nn - 1) + S[i + 1, i] = -1.0 + end + for j in 1:nn, i in 1:rr + S[i, j] = 0.0 + end + dropzeros!(S) + Fl = zeros(rr, nn); for i in 1:rr + Fl[i, i] = 1.0 + end + A = SparseWithDenseRowColMatrix(S, Fl; replace = true) + F = factorize(A) + @test F isa SparseWithDenseRowColAugmented + b = randn(nn) + + # change interior values only (pattern of augmented system unchanged) + S2 = copy(sparsepart(A)); S2.nzval .*= 1.4 + A2 = SparseWithDenseRowColMatrix(S2, A.U, A.V) + refactor!(F, A2) + @test relerr(F \ b, Matrix(A2) \ b) < 1.0e-9 +end diff --git a/test/test_woodbury.jl b/test/test_woodbury.jl new file mode 100644 index 0000000..bad26b0 --- /dev/null +++ b/test/test_woodbury.jl @@ -0,0 +1,80 @@ +include("testutils.jl") +using Test, LinearAlgebra + +@testset "factorize selects Woodbury for nonsingular S" begin + A = rand_sparsedense(60, 4) + F = factorize(A) + @test F isa SparseWithDenseRowColWoodbury + @test issuccess(F) + @test size(F) == (60, 60) +end + +@testset "solve vs dense (dense & selector U, several sizes/ranks)" begin + for (n, r, sel) in ((50, 1, false), (80, 4, false), (60, 3, true), (120, 6, true)) + A = rand_sparsedense(n, r; selector = sel, seed = n + r) + M = Matrix(A) + b = randn(n) + F = factorize(A) + @test relerr(F \ b, M \ b) < 1.0e-9 + @test relerr(A \ b, M \ b) < 1.0e-9 # \ on the matrix directly + # multi-RHS + B = randn(n, 4) + @test relerr(F \ B, M \ B) < 1.0e-9 + # ldiv! in place + x = copy(b) + ldiv!(F, x) + @test relerr(x, M \ b) < 1.0e-9 + end +end + +@testset "adjoint & transpose solve" begin + A = rand_sparsedense(70, 3; seed = 99) + M = Matrix(A) + b = randn(70) + F = factorize(A) + @test relerr(F' \ b, M' \ b) < 1.0e-9 + @test relerr(transpose(F) \ b, transpose(M) \ b) < 1.0e-9 + B = randn(70, 3) + @test relerr(F' \ B, M' \ B) < 1.0e-9 +end + +@testset "complex adjoint ≠ transpose" begin + n, r = 40, 2 + rng = MersenneTwister(3) + S = sparse((3.0 + 0im) * I, n, n) + for i in 1:(n - 1) + S[i + 1, i] = -1.0 + 0.2im + end + U = randn(rng, ComplexF64, n, r) + V = randn(rng, ComplexF64, r, n) .* 0.1 + A = SparseWithDenseRowColMatrix(S, U, V) + M = Matrix(A) + b = randn(ComplexF64, n) + F = factorize(A) + @test relerr(F \ b, M \ b) < 1.0e-9 + @test relerr(F' \ b, M' \ b) < 1.0e-9 + @test relerr(transpose(F) \ b, transpose(M) \ b) < 1.0e-9 +end + +@testset "iterative refinement helps when S is far worse conditioned than A" begin + n, r = 60, 1 + S = sparse(1.0 * I, n, n) + S[1, 1] = 1.0e-9 # S nearly singular, κ(S) ≈ 1e9 + for i in 1:(n - 1) + S[i + 1, i] = -0.1 + end + U = SelectorMatrix{Float64}(n, r) # correction lives in row 1 + V = zeros(1, n) + V[1, 1] = 1.0 # makes A's (1,1) ≈ 1 → A well conditioned + A = SparseWithDenseRowColMatrix(S, U, V) + M = Matrix(A) + b = randn(n) + xref = M \ b + + F0 = factorize(A; strategy = :woodbury, refine = 0, auto_fallback = false) + F2 = factorize(A; strategy = :woodbury, refine = 3, auto_fallback = false) + e0 = relerr(F0 \ b, xref) + e2 = relerr(F2 \ b, xref) + @test e2 ≤ e0 # refinement does not hurt + @test e2 < 1.0e-6 # and recovers accuracy +end diff --git a/test/testutils.jl b/test/testutils.jl new file mode 100644 index 0000000..1eb7e1a --- /dev/null +++ b/test/testutils.jl @@ -0,0 +1,25 @@ +using SparseWithDenseRowColMatrices, SparseArrays, LinearAlgebra, Random + +# Dense oracle for an SparseWithDenseRowColMatrix. +densify(A::SparseWithDenseRowColMatrix) = Matrix(A) + +relerr(x, y) = norm(x .- y) / norm(y) + +# A diagonally-dominant (well-conditioned) sparse bulk with some off-band sparsity, plus an +# r×n low-rank correction. `selector=true` uses the boundary-condition SelectorMatrix for U. +function rand_sparsedense(n, r; seed = 1, T = Float64, selector = false, scale = 0.1) + rng = MersenneTwister(seed) + S = sparse(T(4) * I, n, n) + @inbounds for i in 1:(n - 1) + S[i + 1, i] = T(-1) + S[i, i + 1] = T(-1) + end + for _ in 1:(2n) # a few off-band entries → genuinely sparse, not banded + i = rand(rng, 1:n) + j = rand(rng, 1:n) + S[i, j] += T(rand(rng) * scale) + end + U = selector ? SelectorMatrix{T}(n, r) : T.(randn(rng, n, r)) + V = T.(randn(rng, r, n)) .* T(scale) + return SparseWithDenseRowColMatrix(S, U, V) +end