diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index a18a727..50f03a7 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -20,7 +20,7 @@ jobs: arch: - x64 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - uses: julia-actions/setup-julia@v2 with: version: ${{ matrix.version }} diff --git a/Project.toml b/Project.toml index 23ecd8e..9b4671f 100644 --- a/Project.toml +++ b/Project.toml @@ -1,19 +1,26 @@ name = "CachedInterpolations" uuid = "b9709bfb-d23d-5560-8276-8c35c4b76823" +version = "1.0.0" authors = ["Tim Holy "] -version = "0.1.1" [deps] Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" [compat] +Aqua = "0.8" +Documenter = "1" +ExplicitImports = "1" Interpolations = "0.14 - 1" StaticArrays = "1" +Test = "1" julia = "1.10" [extras] +Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +ExplicitImports = "7d51a73a-1435-4ff3-83d9-f097790105c7" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test"] +test = ["Aqua", "Documenter", "ExplicitImports", "Test"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..f54218f --- /dev/null +++ b/README.md @@ -0,0 +1,90 @@ +# CachedInterpolations.jl + +[![CI](https://github.com/HolyLab/CachedInterpolations.jl/actions/workflows/CI.yml/badge.svg)](https://github.com/HolyLab/CachedInterpolations.jl/actions/workflows/CI.yml) +[![codecov](https://codecov.io/gh/HolyLab/CachedInterpolations.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/HolyLab/CachedInterpolations.jl) +[![version](https://juliahub.com/docs/General/CachedInterpolations/stable/version.svg)](https://juliahub.com/ui/Packages/General/CachedInterpolations) + +Performance-optimized quadratic B-spline interpolation for large or memory-mapped arrays. + +## The problem + +When `P` is a large array stored on disk as a memory-mapped file, evaluating +`P[y1, y2, i1, i2]` at many `(i1, i2)` tile positions with slowly-varying +floating-point coordinates `(y1, y2)` incurs repeated disk I/O — even when the +same 3×3 neighborhood of `P` is accessed on consecutive iterations. + +## The solution + +`CachedInterpolations` wraps `P` in an array of lightweight +`CachedInterpolation` objects — one per tile `(i1, i2)`. Each object caches +the 3×3×... block of `P` centered on the most-recent evaluation point. +When the interpolating coordinates change only slightly between calls (e.g., +in a gradient-descent loop), the cache is reused and disk I/O is avoided. + +## Installation + +```julia +using Pkg +Pkg.add("CachedInterpolations") +``` + +## Usage + +```julia +using CachedInterpolations + +# A 3×2 array: two tiles of length 3 each +A = [0.0 0.0; 1.0 2.0; 0.0 0.0] # size (3, 2), N=1 interpolating dim, 1 tiling dim + +itps = cachedinterpolators(A, 1) # returns a length-2 Array of CachedInterpolation + +itps[1](2.0) # quadratic-interpolated value at position 2.0 in tile 1 → 0.75 +itps[2](2.0) # same position in tile 2 → 1.5 +``` + +Repeated calls near the same coordinate reuse the cached 3-element block and +avoid re-reading from `A` (or from disk, if `A` is memory-mapped). + +### Centered coordinate axes + +Pass an `origin` tuple to shift the visible axis so that a convenient index +(e.g., the center of the array) maps to zero: + +```julia +A = reshape([0.0; 1.0; 0.0], (3, 1)) +itps = cachedinterpolators(A, 1, (2,)) # index 2 → coordinate 0 + +axes(itps[1]) # (-1:1,) +itps[1](0.0) # 0.75 — the peak, now addressed as 0 +itps[1](-0.5) # off-center +``` + +### Gradient evaluation + +`Interpolations.gradient` works on a `CachedInterpolation` with the same call +signature as on a regular `Interpolations` interpolant: + +```julia +using Interpolations +g = Interpolations.gradient(itps[1], 0.0) # SVector of partial derivatives +``` + +> **Note:** `gradient` assumes you have already called `itp(y...)` at the same +> coordinate to populate the cache. Calling `gradient` without a prior `itp(y...)` +> at the same location returns incorrect results. + +### Working with memory-mapped arrays + +```julia +using Mmap + +# Memory-map a large on-disk array +open("bigdata.bin") do io + P = Mmap.mmap(io, Array{Float64, 4}, (256, 256, 100, 50)) + itps = cachedinterpolators(P, 2) # first 2 dims interpolating, last 2 tiling + + # Evaluate at floating-point coords across all tiles — cache avoids re-reading + # the same 3×3 block when coords change slowly + result = [itps[i, j](128.0, 128.0) for i in 1:100, j in 1:50] +end +``` diff --git a/src/CachedInterpolations.jl b/src/CachedInterpolations.jl index 50410e1..66382df 100644 --- a/src/CachedInterpolations.jl +++ b/src/CachedInterpolations.jl @@ -1,7 +1,8 @@ module CachedInterpolations -using Interpolations, StaticArrays +using Interpolations: Interpolations, AbstractInterpolation, BSpline, InPlace, NoInterp, OnCell, Quadratic using Interpolations: weightedindexes, value_weights, gradient_weights, InterpGetindex +using StaticArrays: StaticArrays, SVector import Base: getindex export CachedInterpolation, cachedinterpolators @@ -19,12 +20,12 @@ quadratic interpolation of a large multidimensional array. The first end ``` where `x_1`, `x_2` are floating-point indexes and `i_1`, `i_2` are -integer indexes. A CachedInterpolation simulates an array-of-arrays +integer indexes. A `CachedInterpolation` simulates an array-of-arrays interface for this task, while in reality using only a single underlying `P` array. The performance enhancement comes from caching: when `P` is bigger -than the computer's memory, `P` may be a mmapped-file, and direct +than the computer's memory, `P` may be a memory-mapped file, and direct access to values of `P` will therefore be limited by disk I/O. If one has a task in which all elements of `B` have to be evaluated repeatedly for different values of `y_1`, `y_2`, but that these values @@ -42,17 +43,39 @@ Create an array-of-interpolating arrays with the function CachedInterpolations """ -A single `CachedInterpolation` represents a "movable" -3-by-3-by... view of `P[:, :, i_1, i_2]` for a specific `(i_1, -i_2)`. An array of these objects thus implements an array-of-arrays -interface. Create them with `cachedinterpolators`. + CachedInterpolation + +A single interpolating tile over a tiling array. A `CachedInterpolation` +represents a "movable" 3×3×... view of `P[:, :, i_1, i_2]` for a specific +`(i_1, i_2)` pair. Calling `itp(y_1, y_2)` returns the scalar +quadratic-interpolated value at position `(y_1, y_2)` (floating-point or +integer coordinates), caching the surrounding 3×3×... block of `P` to avoid +redundant I/O when coordinates change by only a small amount between calls. +Integer-index access is also available via `itp[y_1, y_2]`. + +Create arrays of `CachedInterpolation`s with [`cachedinterpolators`](@ref). + +# Examples +```jldoctest +julia> A = reshape([0.0; 1.0; 0.0], (3, 1)); + +julia> itps = cachedinterpolators(A, 1); + +julia> itp = itps[1]; + +julia> itp(2.0) # quadratic-interpolated peak of [0, 1, 0] +0.75 + +julia> itp(1.5) # off-center +0.5 +``` """ mutable struct CachedInterpolation{T, N, M, O, K} <: AbstractInterpolation{T, N, BSpline{Quadratic{InPlace}}} # Note: M = N+K - coefs::Array{T, M} # tiled array of 3x3x... buffers - parent::Array{T, M} # the overall array (`P` in the documentation above) + const coefs::Array{T, M} # tiled array of 3x3x... buffers + const parent::Array{T, M} # the overall array (`P` in the documentation above) center::NTuple{N, Int} # rounded (y_1, y_2) of prev. eval for this tile - tileindex::CartesianIndex{K} + const tileindex::CartesianIndex{K} end const splitN = Base.IteratorsMD.split @@ -65,26 +88,48 @@ Base.axes(itp::CachedInterpolation{T, N, M, O}, d) where {T, N, M, O} = d <= N ? axes(itp.parent, d) .- O[d] : Base.OneTo(1) """ + cachedinterpolators(parent::Array, N) + cachedinterpolators(parent::Array, N, origin) -`itps = cachedinterpolators(parent, N, [origin=(0,0,...)])` creates an -array-of-CachedInterpolation arrays from an underlying `parent` array, -where the first `N` dimensions of `parent` are interpolating. The -equivalent of -``` - parent[y_1, y_2, i_1, i_2] -``` -becomes -``` - itp = itps[i_1, i_2] - itp[y_1, y_2] +Create an `Array` of [`CachedInterpolation`](@ref) objects from the dense array +`parent`, where the first `N` dimensions of `parent` are interpolating and the +remaining dimensions are tiling. `parent` must be a dense `Array`; subtypes of +`AbstractArray` such as `SubArray` or `SharedArray` are not supported. + +The expression `parent[y_1, y_2, i_1, i_2]` becomes: + +```julia +itp = itps[i_1, i_2] +itp(y_1, y_2) ``` -and `itp` caches the needed values of `parent[:,:,i_1,i_2]` centered -around `y_1, y_2`. -Optionally specify the `origin` argument to offset the `y_i` -coordinates within a tile. For example, one can mimic a -`CenterIndexedArray` when `size(parent, d)` is odd for the first `N` -dimensions and you supply `origin = (div(size(parent,1)+1, 2), ...)`. +Each `itp` caches a 3×3×... block of `parent` centered on the most-recent +evaluation point, so repeated calls with nearby coordinates avoid redundant +memory or disk I/O (useful when `parent` is a memory-mapped file). + +The optional `origin` argument is a tuple of `N` integers (default: all zeros) +that offsets the coordinate system. With a nonzero `origin`, dimension `d` is +addressed as `y_d + origin[d]` in `parent`, shifting the user-visible axes by +`-origin`. For example, setting `origin = (div(size(parent,d)+1, 2) for d in +1:N)` centers the axes at zero when the interpolating dimensions have odd size. + +# Examples +```jldoctest +julia> A = reshape([0.0; 1.0; 0.0], (3, 1)); + +julia> itps = cachedinterpolators(A, 1); + +julia> itps[1](2.0) # quadratic-interpolated peak of [0, 1, 0] +0.75 + +julia> itps_c = cachedinterpolators(A, 1, (2,)); # origin at index 2 + +julia> axes(itps_c[1]) # coordinates shifted; 0 now addresses the peak +(-1:1,) + +julia> itps_c[1](0.0) # peak via centered coordinates +0.75 +``` """ function cachedinterpolators(parent::Array{T, M}, N, origin = ntuple(d -> 0, N)) where {T, M} 0 <= N <= M || error("N must be between 0 and $M") @@ -127,13 +172,6 @@ end return icoefs[wis...] end -struct CoefsWrapper{N, A} - coefs::A -end - -Base.size(itp::CoefsWrapper{N}) where {N} = splitN(size(itp.coefs), Val(N))[1] -Base.size(itp::CoefsWrapper{N}, d) where {N} = d <= N ? size(itp.coefs, d) : 1 - # FIXME: this function cheats dangerously, because it does _not_ # update the cache. This is equivalent to the assumption you've called # getindex for the current `(x_1, x_2, ...)` location before calling @@ -155,10 +193,6 @@ end ### Potential deprecations # if AbstractInterpolation <: AbstractArray goes away, this can be deprecated -getindex(itp::CachedInterpolation{T, N, M, O, K}, xs::Vararg{Int, N}) where {T, N, M, O, K} = itp(xs...) - -### Deprecations - -@deprecate getindex(itp::CachedInterpolation{T, N, M, O, K}, xs::Vararg{Number, N}) where {T, N, M, O, K} itp(xs...) +getindex(itp::CachedInterpolation{T, N, M, O, K}, xs::Vararg{Integer, N}) where {T, N, M, O, K} = itp(xs...) end # module diff --git a/test/runtests.jl b/test/runtests.jl index 6b54245..36fd355 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,5 @@ import CachedInterpolations -using Interpolations, Test +using Aqua, Documenter, ExplicitImports, Interpolations, Test @testset "CachedInterpolations" begin A = reshape([0; 1; 0], (3, 1)) @@ -25,6 +25,14 @@ using Interpolations, Test @test Interpolations.gradient(C[1, 2, 2], 3.2, 3.8) === Interpolations.gradient(Ai, 3.2, 3.8, 1, 2, 2) + # gradient! + g = zeros(2) + Interpolations.gradient!(g, C[1, 2, 2], 3.2, 3.8) + @test g ≈ collect(Interpolations.gradient(C[1, 2, 2], 3.2, 3.8)) + + # getindex with Integer arguments + @test C[1, 2, 2][3, 4] == C[1, 2, 2](3.0, 4.0) + # With origin C = CachedInterpolations.cachedinterpolators(A, 2, (4, 4)) @test C[1, 2, 2](-0.8, 0.8) ≈ Ai(3.2, 4.8, 1, 2, 2) @@ -32,10 +40,36 @@ using Interpolations, Test @test C[1, 2, 2](-0.8, -0.2) ≈ Ai(3.2, 3.8, 1, 2, 2) @test Interpolations.gradient(C[1, 2, 2], -0.8, -0.2) ≈ Interpolations.gradient(Ai, 3.2, 3.8, 1, 2, 2) + # axes with origin offset + c_orig = C[1, 1, 1] + @test axes(c_orig) == (-3:3, -3:3) + @test axes(c_orig, 1) == -3:3 + @test axes(c_orig, 3) == Base.OneTo(1) + # Check for Float32 with Float64 indexes, since that's the # default mismatch case A = rand(Float32, 7, 7, 2, 2, 3) Ai = interpolate!(A, (Q, Q, NoInterp(), NoInterp(), NoInterp())) C = CachedInterpolations.cachedinterpolators(A, 2, (4, 4)) @test @inferred(C[1, 2, 2](-0.8, 0.8)) ≈ Ai(3.2, 4.8, 1, 2, 2) + + @testset "Doctests" begin + DocMeta.setdocmeta!(CachedInterpolations, :DocTestSetup, :(using CachedInterpolations); recursive=true) + doctest(CachedInterpolations; manual=false) + end + + @testset "Aqua" begin + Aqua.test_all(CachedInterpolations) + end + + @testset "ExplicitImports" begin + # weightedindexes, value_weights, gradient_weights, InterpGetindex, gradient, gradient! + # are non-public Interpolations internals required by this package's implementation. + # Base.IteratorsMD.split is a non-public Base internal with no public alternative. + test_explicit_imports( + CachedInterpolations; + all_explicit_imports_are_public=(; ignore=(:weightedindexes, :value_weights, :gradient_weights, :InterpGetindex)), + all_qualified_accesses_are_public=(; ignore=(:IteratorsMD, :gradient, :gradient!, :split, :OneTo)), + ) + end end