From 0ece3a0d6be1c9cccfa5dba197b524db5fd22010 Mon Sep 17 00:00:00 2001 From: Herman Sletmoen Date: Sat, 25 Apr 2026 14:42:25 +0200 Subject: [PATCH 1/8] Add Filon integration type for efficient integration weighted by spherical Bessel functions --- src/SymBoltz.jl | 4 +- src/filon.jl | 127 ++++++++++++++++++++++++++++++++++++++++++++++++ src/solve.jl | 1 - 3 files changed, 130 insertions(+), 2 deletions(-) create mode 100644 src/filon.jl diff --git a/src/SymBoltz.jl b/src/SymBoltz.jl index 27e5b754..56604fa9 100644 --- a/src/SymBoltz.jl +++ b/src/SymBoltz.jl @@ -11,6 +11,7 @@ using OhMyThreads using Base.Threads using Setfield using StaticArrays +using NumericalIntegration # TODO: generate gravity equations # TODO: modified gravity: coupled quintessence; DGP, parametrized framework, EFT of LSS, ... @@ -40,6 +41,7 @@ include("models/curvature.jl") include("models/inflation.jl") include("models/cosmologies.jl") include("solve.jl") +include("filon.jl") include("observables/distances.jl") include("observables/fourier.jl") include("observables/angular.jl") @@ -51,7 +53,7 @@ export CosmologyProblem, CosmologySolution export background, perturbations, expandeq export solve, solvebg, solvept, remake, issuccess, parameter_updater export parameters_Planck18 -export spectrum_primordial, spectrum_matter, spectrum_matter_nonlinear, spectrum_cmb, correlation_function, variance_matter, stddev_matter, los_integrate, source_grid, source_grid_adaptive, sound_horizon, distance_luminosity, SphericalBesselCache +export spectrum_primordial, spectrum_matter, spectrum_matter_nonlinear, spectrum_cmb, correlation_function, variance_matter, stddev_matter, los_integrate, source_grid, source_grid_adaptive, sound_horizon, distance_luminosity, SphericalBesselCache, SphericalBesselIntegralCache export express_derivatives using PrecompileTools: @compile_workload diff --git a/src/filon.jl b/src/filon.jl new file mode 100644 index 00000000..4b99a20f --- /dev/null +++ b/src/filon.jl @@ -0,0 +1,127 @@ +using Bessels + +struct SphericalBesselIntegralCache{T <: AbstractFloat} + l::Vector{Int} + x::Vector{T} # uniform grid + I0::Matrix{T} # ∫dx jₗ(x) x⁰ cumulatively to this x-point + I1::Matrix{T} # ∫dx jₗ(x) x¹ cumulatively to this x-point + dI0::Matrix{T} # ∫dx jₗ(x) x⁰ from this to next x-point (0 for last point) + dI1::Matrix{T} # ∫dx jₗ(x) x¹ from this to next x-point (0 for last point) + invdx::T # 1/dx (constant because grid is uniform) + tmp1::Vector{T} # temporary workspace per l + tmp2::Vector{T} # temporary workspace per l + tmp3::Vector{T} # temporary workspace per l + tmp4::Vector{T} # temporary workspace per l +end + +# TODO: adaptively grid size from tolerance parameter? can compare to sin +function SphericalBesselIntegralCache(ls, x1, x2; mindx_grid = 2π/32, mindx_integral = 2π/128, method = TrapezoidalEven()) + Nx = 1 + Int(ceil((x2 - x1) / mindx_grid)) + xs = range(x1, x2, length=Nx) + dx = step(xs) + xs = collect(xs) + + mindx_integral ≤ dx ≤ mindx_grid || error("Integration grid must be finer than interpolation grid") + + T = typeof(x1) + I0s = zeros(T, length(ls), length(xs)) # TODO: add dummy for final+1 lookup? + I1s = zeros(T, length(ls), length(xs)) + dI0s = zeros(T, length(ls), length(xs)) + dI1s = zeros(T, length(ls), length(xs)) + + Nsubx = 1 + Int(ceil((xs[2] - xs[1]) / mindx_integral)) + subis = zeros(T, Nsubx) + + @inbounds for il in eachindex(ls) + l = ls[il] + for ix in 1:length(xs)-1 + subxs = range(xs[ix], xs[ix+1]; length = Nsubx) # fine integration grid + subis .= sphericalbesselj.(l, subxs) # jₗ(x) x⁰ + dI0s[il, ix] = NumericalIntegration.integrate(subxs, subis, method) # ∫ jₗ(x) x⁰ dx + subis .*= subxs # jₗ(x) x¹ + dI1s[il, ix] = NumericalIntegration.integrate(subxs, subis, method) # ∫ jₗ(x) x¹ dx + I0s[il, ix+1] = I0s[il, ix] + dI0s[il, ix] # cumulative + I1s[il, ix+1] = I1s[il, ix] + dI1s[il, ix] # cumulative + end + end + + tmp1 = zeros(T, length(ls)) + tmp2 = zeros(T, length(ls)) + tmp3 = zeros(T, length(ls)) + tmp4 = zeros(T, length(ls)) + return SphericalBesselIntegralCache{T}(ls, xs, I0s, I1s, dI0s, dI1s, 1.0/dx, tmp1, tmp2, tmp3, tmp4) +end + +function Base.show(io::IO, jlint::SphericalBesselIntegralCache) + print(io, "jₗ(x) integration cache ") + print(io, "for $(jlint.l[begin]) ≤ l ≤ $(jlint.l[end]) and ") + print(io, "$(jlint.x[begin]) ≤ x ≤ $(jlint.x[end]) ") + print(io, "($(Base.format_bytes(Base.summarysize(jlint))))\n") +end + +# TODO: Hermite interpolation, have derivative for free: could use both (I0, w) and (I1, w*x)? https://en.wikipedia.org/wiki/Cubic_Hermite_spline +Base.@propagate_inbounds function (jlint::SphericalBesselIntegralCache)(out0::AbstractArray{<:AbstractFloat}, out1::AbstractArray{<:AbstractFloat}, x) + x1 = jlint.x[begin] + ix = 1 + unsafe_trunc(Int, (x-x1)*jlint.invdx) # O(1) uniform grid lookup (corresponding to left point) # TODO: floor(Int, instead? + x₋ = jlint.x[ix] + w = (x - x₋) * jlint.invdx + ibase = (ix - 1) * length(jlint.l) + @inbounds @fastmath #=@simd=# for il in eachindex(out0) + i = ibase + il + I0 = jlint.I0[i] + I1 = jlint.I1[i] + dI0 = jlint.dI0[i] # TODO: don't look up; keep stored for prev/next point in main integrate loop + dI1 = jlint.dI1[i] + out0[il] = muladd(w, dI0, I0) # TODO: avoid doing this so many times; make function that computes I(b)-I(a) instead + out1[il] = muladd(w, dI1, I1) + end + return nothing +end + +function integrate(out, jlint::SphericalBesselIntegralCache, xs::AbstractArray, ys::AbstractArray) + xs[2] > xs[1] || error("x-domain is not strictly increasing") + xs[begin] ≥ jlint.x[begin] || error("$(xs[begin]) is outside integral cache left bound $(jlint.x[begin])") + xs[end] ≤ jlint.x[end] || error("$(xs[end]) is outside integral cache right bound $(jlint.x[end])") + out .= 0.0 + I0₋, I0₊, I1₋, I1₊ = jlint.tmp1, jlint.tmp2, jlint.tmp3, jlint.tmp4 # TODO: thread-unsafe + i₋ = 1 + x₋ = xs[i₋] + y₋ = ys[i₋] + jlint(I0₋, I1₋, x₋) + @inbounds @fastmath for i₊ in 2:length(xs) # TODO: write out and unroll loop to look up jlint(x) once per x and l without setindex? + x₊ = xs[i₊] + y₊ = ys[i₊] + A = (y₊ - y₋) / (x₊ - x₋) # A in y = Ax + B + B = y₋ - A*x₋ # B in y = Ax + B + # TODO: quadratic; C? + jlint(I0₊, I1₊, x₊) # TODO: write out this to avoid l-indexing? + #=@simd=# for il in eachindex(out) + I0 = I0₊[il] - I0₋[il] # ∫dx jₗ(x) x⁰ from x₋ to x₊ + I1 = I1₊[il] - I1₋[il] # ∫dx jₗ(x) x¹ from x₋ to x₊ # TODO: interpolate differences instead? + out[il] = muladd(A, I1, muladd(B, I0, out[il])) # i.e. out[il] += A*I1 + B*I0 + end + I0₋, I1₋, I0₊, I1₊ = I0₊, I1₊, I0₋, I1₋ # pointer-like swap for next iteration + i₋, x₋, y₋ = i₊, x₊, y₊ # pass to next iteration + end + return out +end + +# Convenience dispatches +function integrate(out, jlint::SphericalBesselIntegralCache, f::Function, x1, x2; kw...) + xs = range(x1, x2; kw...) + ys = f.(xs) + return integrate(out, jlint, xs, ys) +end +function integrate(out, jlint::SphericalBesselIntegralCache, f::Function, x2; kw...) + x1 = jlint.x[begin] + return integrate(out, jlint, f, x1, x2; kw...) +end +function integrate(out, jlint::SphericalBesselIntegralCache, f::Function; kw...) + x2 = jlint.x[end] + return integrate(out, jlint, f, x2; kw...) +end +function integrate(jlint::SphericalBesselIntegralCache, args...; kw...) + out = zeros(eltype(jlint.I0), length(jlint.l)) + integrate(out, jlint, args...; kw...) + return out +end \ No newline at end of file diff --git a/src/solve.jl b/src/solve.jl index 5d2aa058..48dfbb36 100644 --- a/src/solve.jl +++ b/src/solve.jl @@ -9,7 +9,6 @@ import OhMyThreads: TaskLocalValue import SymbolicIndexingInterface import SymbolicIndexingInterface: getsym, setsym_oop, parameter_values using RecursiveFactorization # makes RFLUFactorization() available as linear solver: https://docs.sciml.ai/LinearSolve/stable/tutorials/accelerating_choices/ -import NumericalIntegration: cumul_integrate using SparseArrays import NonlinearSolve.BracketingNonlinearSolve: AbstractBracketingAlgorithm From da33e25a10cd98a63e03afe4190e827d4b4dea66 Mon Sep 17 00:00:00 2001 From: Herman Sletmoen Date: Sat, 25 Apr 2026 18:02:09 +0200 Subject: [PATCH 2/8] Test Filon integration --- test/runtests.jl | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/test/runtests.jl b/test/runtests.jl index f1ed8014..5a47a155 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -717,3 +717,37 @@ end ] @test isequal(expandeq(eqs, D(ρ); protect = Set(ℋ)), -4ℋ*ρ) end + +@testset "Filon integration" begin + ls = 0:100 + jlint = SymBoltz.SphericalBesselIntegralCache(ls, 0.0, 100.0; mindx_grid = 2π/128, mindx_integral = 2π/1024, method = SymBoltz.SimpsonEven()) + + # Test analytical result exactly at grid points + I1s = jlint.I1[1, :] + I1s_anal = 1 .- cos.(jlint.x) + @test all(isapprox.(I1s, I1s_anal; atol = 1e-10)) + + # Test analytical result exactly between grid points (where interpolation error should be largest) + xmid = (jlint.x[begin:end-1] .+ jlint.x[begin+1:end]) ./ 2 + I1s = map(x -> SymBoltz.integrate(jlint, [0.0, x], [0.0, x])[begin], xmid) + I1s_anal = 1 .- cos.(xmid) + @test all(isapprox.(I1s, I1s_anal; atol = 1e-3)) + + # Test that integrals interpolate almost exactly at grid points + I0s = map(x -> SymBoltz.integrate(jlint, [0.0, x], [1.0, 1.0])[begin], jlint.x[2:end]) + I1s = map(x -> SymBoltz.integrate(jlint, [0.0, x], [0.0, x])[begin], jlint.x[2:end]) + @test all(isapprox.(I0s, jlint.I0[1, 2:end]; atol = 1e-15)) + @test all(isapprox.(I1s, jlint.I1[1, 2:end]; atol = 1e-13)) + + # Test against finer brute-force integral + f = x -> SymBoltz.sphericalbesselj(0, x/10.0) + + xs = range(jlint.x[begin], jlint.x[end], length = 1000) + ys = f.(xs) .* SymBoltz.sphericalbesselj.(transpose(ls), xs) + IA = SymBoltz.NumericalIntegration.integrate(xs, ys, SymBoltz.SimpsonEven()) + + xs = range(jlint.x[begin], jlint.x[end], length = 100) + ys = f.(xs) + IB = SymBoltz.integrate(jlint, xs, ys) + @test all(isapprox.(IA, IB; atol = 1e-3)) +end From 61c4cbbc84f30c3a7a5cea17426c410097a94ae9 Mon Sep 17 00:00:00 2001 From: Herman Sletmoen Date: Sat, 25 Apr 2026 23:23:16 +0200 Subject: [PATCH 3/8] Add dispatch for LOS integration with Filon integral cache --- src/observables/angular.jl | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/observables/angular.jl b/src/observables/angular.jl index 969fce38..8a138c26 100644 --- a/src/observables/angular.jl +++ b/src/observables/angular.jl @@ -171,6 +171,28 @@ function los_integrate(sol::CosmologySolution, ls::AbstractVector, τs::Abstract return los_integrate(Ss, ls, τs, ks, jl; kwargs...) end +function los_integrate(Ss::AbstractArray{T, 3}, ls::AbstractVector, τs::AbstractVector, ks::AbstractVector, jlint::SphericalBesselIntegralCache; kw...) where {T <: Real} + @assert size(Ss, 1) == 1 + Ss = @view Ss[1, :, :] + τ0 = τs[end] + reverse!(τs) # reverse, i.e. integrate back in time (jlint set up to handle increasing x) + reverse!(Ss, dims=1) + Is = similar(Ss, length(ls), length(ks)) + xs = similar(τs) + for ik in eachindex(ks) + k = ks[ik] + xs .= k .* (τ0 .- τs) + ys = @view Ss[:, ik] + is = @view Is[:, ik] + SymBoltz.integrate(is, jlint, xs, ys) + is ./= k # handle change of var! + end + Is = transpose(Is) + reverse!(τs) # undo + reverse!(Ss, dims=1) # undo + return reshape(Is, (1, size(Is, 1), size(Is, 2))) +end + # TODO: integrate splines instead of trapz! https://discourse.julialang.org/t/how-to-speed-up-the-numerical-integration-with-interpolation/96223/5 @doc raw""" spectrum_cmb(ΘlAs::AbstractMatrix, ΘlBs::AbstractMatrix, P0s::AbstractVector, ls::AbstractVector, ks::AbstractVector; integrator = TrapezoidalRule(), normalization = :Cl, thread = true) @@ -209,7 +231,7 @@ function spectrum_cmb(ΘlAs::AbstractMatrix, ΘlBs::AbstractMatrix, P0s::Abstrac end """ - spectrum_cmb(modes::AbstractVector{<:Symbol}, prob::CosmologyProblem, jl::SphericalBesselCache; normalization = :Cl, unit = nothing, kτ0s = 0.1*jl.l[begin]:2π/2:10*jl.l[end], xs = 0.0:0.0008:1.0, l_limber = 50, integrator = TrapezoidalRule(), bgopts = (alg = bgalg(prob), reltol = 1e-7, abstol = 1e-7), ptopts = (alg = ptalg(prob),, reltol = 1e-5, abstol = 1e-5), sourceopts = (rtol = 1e-3, atol = 0.9), coarse_length = 9, thread = true, verbose = false, kwargs...) + spectrum_cmb(modes::AbstractVector{<:Symbol}, prob::CosmologyProblem, jl::Union{SphericalBesselCache, SphericalBesselIntegralCache}; normalization = :Cl, unit = nothing, kτ0s = 0.1*jl.l[begin]:2π/2:10*jl.l[end], xs = 0.0:0.0008:1.0, l_limber = 50, integrator = TrapezoidalRule(), bgopts = (alg = bgalg(prob), reltol = 1e-7, abstol = 1e-7), ptopts = (alg = ptalg(prob),, reltol = 1e-5, abstol = 1e-5), sourceopts = (rtol = 1e-3, atol = 0.9), coarse_length = 9, thread = true, verbose = false, kwargs...) Compute angular CMB power spectra ``Cₗᴬᴮ`` at angular wavenumbers `ls` from the cosmological problem `prob`. The requested `modes` are specified as a vector of symbols in the form `:AB`, where `A` and `B` are `T` (temperature), `E` (E-mode polarization) or `ψ` (lensing). @@ -235,7 +257,7 @@ modes = [:TT, :TE, :ψψ, :ψT] Dls = spectrum_cmb(modes, prob, jl; normalization = :Dl, unit = u"μK") ``` """ -function spectrum_cmb(modes::AbstractVector{<:Symbol}, prob::CosmologyProblem, jl::SphericalBesselCache; normalization = :Cl, unit = nothing, kτ0s = 0.1*jl.l[begin]:2π/2:10*jl.l[end], xs = 0.0:0.0008:1.0, l_limber = 50, integrator = TrapezoidalRule(), bgopts = (alg = bgalg(prob), reltol = 1e-7, abstol = 1e-7), ptopts = (alg = ptalg(prob), reltol = 1e-5, abstol = 1e-5), sourceopts = (rtol = 1e-3, atol = 0.9), coarse_length = 9, thread = true, verbose = false, kwargs...) +function spectrum_cmb(modes::AbstractVector{<:Symbol}, prob::CosmologyProblem, jl::Union{SphericalBesselCache, SphericalBesselIntegralCache}; normalization = :Cl, unit = nothing, kτ0s = 0.1*jl.l[begin]:2π/2:10*jl.l[end], xs = 0.0:0.0008:1.0, l_limber = 50, integrator = TrapezoidalRule(), bgopts = (alg = bgalg(prob), reltol = 1e-7, abstol = 1e-7), ptopts = (alg = ptalg(prob), reltol = 1e-5, abstol = 1e-5), sourceopts = (rtol = 1e-3, atol = 0.9), coarse_length = 9, thread = true, verbose = false, kwargs...) ls = jl.l sol = solve(prob; bgopts, verbose) τ0 = getsym(sol, prob.M.τ0)(sol) From 18bd7bcb4a25acae36edffa8a51ad48ec82948f3 Mon Sep 17 00:00:00 2001 From: Herman Sletmoen Date: Mon, 27 Apr 2026 10:11:39 +0200 Subject: [PATCH 4/8] Only store cumulative integrals and subtract to get difference on each interval --- src/filon.jl | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/src/filon.jl b/src/filon.jl index 4b99a20f..b9b7d77d 100644 --- a/src/filon.jl +++ b/src/filon.jl @@ -5,8 +5,6 @@ struct SphericalBesselIntegralCache{T <: AbstractFloat} x::Vector{T} # uniform grid I0::Matrix{T} # ∫dx jₗ(x) x⁰ cumulatively to this x-point I1::Matrix{T} # ∫dx jₗ(x) x¹ cumulatively to this x-point - dI0::Matrix{T} # ∫dx jₗ(x) x⁰ from this to next x-point (0 for last point) - dI1::Matrix{T} # ∫dx jₗ(x) x¹ from this to next x-point (0 for last point) invdx::T # 1/dx (constant because grid is uniform) tmp1::Vector{T} # temporary workspace per l tmp2::Vector{T} # temporary workspace per l @@ -26,8 +24,6 @@ function SphericalBesselIntegralCache(ls, x1, x2; mindx_grid = 2π/32, mindx_int T = typeof(x1) I0s = zeros(T, length(ls), length(xs)) # TODO: add dummy for final+1 lookup? I1s = zeros(T, length(ls), length(xs)) - dI0s = zeros(T, length(ls), length(xs)) - dI1s = zeros(T, length(ls), length(xs)) Nsubx = 1 + Int(ceil((xs[2] - xs[1]) / mindx_integral)) subis = zeros(T, Nsubx) @@ -37,11 +33,9 @@ function SphericalBesselIntegralCache(ls, x1, x2; mindx_grid = 2π/32, mindx_int for ix in 1:length(xs)-1 subxs = range(xs[ix], xs[ix+1]; length = Nsubx) # fine integration grid subis .= sphericalbesselj.(l, subxs) # jₗ(x) x⁰ - dI0s[il, ix] = NumericalIntegration.integrate(subxs, subis, method) # ∫ jₗ(x) x⁰ dx + I0s[il, ix+1] = I0s[il, ix] + NumericalIntegration.integrate(subxs, subis, method) # cumulative ∫ jₗ(x) x⁰ dx subis .*= subxs # jₗ(x) x¹ - dI1s[il, ix] = NumericalIntegration.integrate(subxs, subis, method) # ∫ jₗ(x) x¹ dx - I0s[il, ix+1] = I0s[il, ix] + dI0s[il, ix] # cumulative - I1s[il, ix+1] = I1s[il, ix] + dI1s[il, ix] # cumulative + I1s[il, ix+1] = I1s[il, ix] + NumericalIntegration.integrate(subxs, subis, method) # cumulative ∫ jₗ(x) x¹ dx end end @@ -49,7 +43,7 @@ function SphericalBesselIntegralCache(ls, x1, x2; mindx_grid = 2π/32, mindx_int tmp2 = zeros(T, length(ls)) tmp3 = zeros(T, length(ls)) tmp4 = zeros(T, length(ls)) - return SphericalBesselIntegralCache{T}(ls, xs, I0s, I1s, dI0s, dI1s, 1.0/dx, tmp1, tmp2, tmp3, tmp4) + return SphericalBesselIntegralCache{T}(ls, xs, I0s, I1s, 1.0/dx, tmp1, tmp2, tmp3, tmp4) end function Base.show(io::IO, jlint::SphericalBesselIntegralCache) @@ -62,18 +56,17 @@ end # TODO: Hermite interpolation, have derivative for free: could use both (I0, w) and (I1, w*x)? https://en.wikipedia.org/wiki/Cubic_Hermite_spline Base.@propagate_inbounds function (jlint::SphericalBesselIntegralCache)(out0::AbstractArray{<:AbstractFloat}, out1::AbstractArray{<:AbstractFloat}, x) x1 = jlint.x[begin] - ix = 1 + unsafe_trunc(Int, (x-x1)*jlint.invdx) # O(1) uniform grid lookup (corresponding to left point) # TODO: floor(Int, instead? - x₋ = jlint.x[ix] + ix₋ = 1 + unsafe_trunc(Int, (x-x1)*jlint.invdx) # O(1) uniform grid lookup (corresponding to left point) # TODO: floor(Int, instead? + ix₊ = min(ix₋ + 1, length(jlint.x)) + x₋ = jlint.x[ix₋] w = (x - x₋) * jlint.invdx - ibase = (ix - 1) * length(jlint.l) @inbounds @fastmath #=@simd=# for il in eachindex(out0) - i = ibase + il - I0 = jlint.I0[i] - I1 = jlint.I1[i] - dI0 = jlint.dI0[i] # TODO: don't look up; keep stored for prev/next point in main integrate loop - dI1 = jlint.dI1[i] - out0[il] = muladd(w, dI0, I0) # TODO: avoid doing this so many times; make function that computes I(b)-I(a) instead - out1[il] = muladd(w, dI1, I1) + I0₋ = jlint.I0[il, ix₋] + I1₋ = jlint.I1[il, ix₋] + I0₊ = jlint.I0[il, ix₊] + I1₊ = jlint.I1[il, ix₊] + out0[il] = muladd(w, I0₊ - I0₋, I0₋) + out1[il] = muladd(w, I1₊ - I1₋, I1₋) end return nothing end From f6d18a0a75651ba90e03c67628e12747f03e784f Mon Sep 17 00:00:00 2001 From: Herman Sletmoen Date: Mon, 27 Apr 2026 11:55:41 +0200 Subject: [PATCH 5/8] Use Hermite interpolation for Filon integrals --- src/filon.jl | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/src/filon.jl b/src/filon.jl index b9b7d77d..458500a7 100644 --- a/src/filon.jl +++ b/src/filon.jl @@ -3,6 +3,7 @@ using Bessels struct SphericalBesselIntegralCache{T <: AbstractFloat} l::Vector{Int} x::Vector{T} # uniform grid + w::Matrix{T} # jₗ(x) at this x-point I0::Matrix{T} # ∫dx jₗ(x) x⁰ cumulatively to this x-point I1::Matrix{T} # ∫dx jₗ(x) x¹ cumulatively to this x-point invdx::T # 1/dx (constant because grid is uniform) @@ -13,7 +14,7 @@ struct SphericalBesselIntegralCache{T <: AbstractFloat} end # TODO: adaptively grid size from tolerance parameter? can compare to sin -function SphericalBesselIntegralCache(ls, x1, x2; mindx_grid = 2π/32, mindx_integral = 2π/128, method = TrapezoidalEven()) +function SphericalBesselIntegralCache(ls, x1 = 0.0, x2 = 10*ls[end]; mindx_grid = 2π/26, mindx_integral = 2π/2048, method = SimpsonEven()) Nx = 1 + Int(ceil((x2 - x1) / mindx_grid)) xs = range(x1, x2, length=Nx) dx = step(xs) @@ -24,6 +25,7 @@ function SphericalBesselIntegralCache(ls, x1, x2; mindx_grid = 2π/32, mindx_int T = typeof(x1) I0s = zeros(T, length(ls), length(xs)) # TODO: add dummy for final+1 lookup? I1s = zeros(T, length(ls), length(xs)) + ws = zeros(T, length(ls), length(xs)) Nsubx = 1 + Int(ceil((xs[2] - xs[1]) / mindx_integral)) subis = zeros(T, Nsubx) @@ -37,13 +39,14 @@ function SphericalBesselIntegralCache(ls, x1, x2; mindx_grid = 2π/32, mindx_int subis .*= subxs # jₗ(x) x¹ I1s[il, ix+1] = I1s[il, ix] + NumericalIntegration.integrate(subxs, subis, method) # cumulative ∫ jₗ(x) x¹ dx end + ws[il, :] = sphericalbesselj.(l, xs) end tmp1 = zeros(T, length(ls)) tmp2 = zeros(T, length(ls)) tmp3 = zeros(T, length(ls)) tmp4 = zeros(T, length(ls)) - return SphericalBesselIntegralCache{T}(ls, xs, I0s, I1s, 1.0/dx, tmp1, tmp2, tmp3, tmp4) + return SphericalBesselIntegralCache{T}(ls, xs, ws, I0s, I1s, 1.0/dx, tmp1, tmp2, tmp3, tmp4) end function Base.show(io::IO, jlint::SphericalBesselIntegralCache) @@ -54,24 +57,36 @@ function Base.show(io::IO, jlint::SphericalBesselIntegralCache) end # TODO: Hermite interpolation, have derivative for free: could use both (I0, w) and (I1, w*x)? https://en.wikipedia.org/wiki/Cubic_Hermite_spline -Base.@propagate_inbounds function (jlint::SphericalBesselIntegralCache)(out0::AbstractArray{<:AbstractFloat}, out1::AbstractArray{<:AbstractFloat}, x) +Base.@propagate_inbounds @fastmath function (jlint::SphericalBesselIntegralCache)(out0::AbstractArray{<:AbstractFloat}, out1::AbstractArray{<:AbstractFloat}, x) x1 = jlint.x[begin] - ix₋ = 1 + unsafe_trunc(Int, (x-x1)*jlint.invdx) # O(1) uniform grid lookup (corresponding to left point) # TODO: floor(Int, instead? + ix₋ = 1 + unsafe_trunc(Int, (x - x1) * jlint.invdx) # O(1) uniform grid lookup (corresponding to left point) # TODO: floor(Int, instead? ix₊ = min(ix₋ + 1, length(jlint.x)) x₋ = jlint.x[ix₋] - w = (x - x₋) * jlint.invdx - @inbounds @fastmath #=@simd=# for il in eachindex(out0) + x₊ = jlint.x[ix₊] + dx = x₊ - x₋ + t = (x - x₋) * jlint.invdx + h00 = (1 + 2t) * (1 - t)^2 # https://en.wikipedia.org/wiki/Cubic_Hermite_spline + h01 = t^2 * (3 - 2t) + h10 = t * (1 - t)^2 * dx # derivatives scaled by dx to map from x to t ∈ [0, 1] + h11 = t^2 * (t - 1) * dx # derivatives scaled by dx to map from x to t ∈ [0, 1] + @inbounds #=@simd=# for il in eachindex(out0) + w₋ = jlint.w[il, ix₋] I0₋ = jlint.I0[il, ix₋] I1₋ = jlint.I1[il, ix₋] + I0₋′ = w₋ + I1₋′ = w₋ * x₋ + w₊ = jlint.w[il, ix₊] I0₊ = jlint.I0[il, ix₊] I1₊ = jlint.I1[il, ix₊] - out0[il] = muladd(w, I0₊ - I0₋, I0₋) - out1[il] = muladd(w, I1₊ - I1₋, I1₋) + I0₊′ = w₊ + I1₊′ = w₊ * x₊ + out0[il] = h00 * I0₋ + h10 * I0₋′ + h01 * I0₊ + h11 * I0₊′ + out1[il] = h00 * I1₋ + h10 * I1₋′ + h01 * I1₊ + h11 * I1₊′ end return nothing end -function integrate(out, jlint::SphericalBesselIntegralCache, xs::AbstractArray, ys::AbstractArray) +@fastmath function integrate(out, jlint::SphericalBesselIntegralCache, xs::AbstractArray, ys::AbstractArray) xs[2] > xs[1] || error("x-domain is not strictly increasing") xs[begin] ≥ jlint.x[begin] || error("$(xs[begin]) is outside integral cache left bound $(jlint.x[begin])") xs[end] ≤ jlint.x[end] || error("$(xs[end]) is outside integral cache right bound $(jlint.x[end])") @@ -81,7 +96,8 @@ function integrate(out, jlint::SphericalBesselIntegralCache, xs::AbstractArray, x₋ = xs[i₋] y₋ = ys[i₋] jlint(I0₋, I1₋, x₋) - @inbounds @fastmath for i₊ in 2:length(xs) # TODO: write out and unroll loop to look up jlint(x) once per x and l without setindex? + @inbounds while i₋ < length(xs) # TODO: write out and unroll loop to look up jlint(x) once per x and l without setindex? + i₊ = i₋ + 1 x₊ = xs[i₊] y₊ = ys[i₊] A = (y₊ - y₋) / (x₊ - x₋) # A in y = Ax + B @@ -91,7 +107,7 @@ function integrate(out, jlint::SphericalBesselIntegralCache, xs::AbstractArray, #=@simd=# for il in eachindex(out) I0 = I0₊[il] - I0₋[il] # ∫dx jₗ(x) x⁰ from x₋ to x₊ I1 = I1₊[il] - I1₋[il] # ∫dx jₗ(x) x¹ from x₋ to x₊ # TODO: interpolate differences instead? - out[il] = muladd(A, I1, muladd(B, I0, out[il])) # i.e. out[il] += A*I1 + B*I0 + out[il] += A*I1 + B*I0 end I0₋, I1₋, I0₊, I1₊ = I0₊, I1₊, I0₋, I1₋ # pointer-like swap for next iteration i₋, x₋, y₋ = i₊, x₊, y₊ # pass to next iteration From 4383217657c8a9867928f211d0733e6230ebeb13 Mon Sep 17 00:00:00 2001 From: Herman Sletmoen Date: Mon, 27 Apr 2026 12:07:26 +0200 Subject: [PATCH 6/8] Update Filon integration tests --- test/runtests.jl | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 5a47a155..e3c5cc6a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -719,10 +719,10 @@ end end @testset "Filon integration" begin - ls = 0:100 - jlint = SymBoltz.SphericalBesselIntegralCache(ls, 0.0, 100.0; mindx_grid = 2π/128, mindx_integral = 2π/1024, method = SymBoltz.SimpsonEven()) + ls = 0:10:1000 + jlint = SymBoltz.SphericalBesselIntegralCache(ls, 0.0, 1000.0; mindx_grid = 2π/26, mindx_integral = 2π/2048, method = SymBoltz.SimpsonEven()) - # Test analytical result exactly at grid points + # Test analytical result exactly at grid points (depends on mindx_integral) I1s = jlint.I1[1, :] I1s_anal = 1 .- cos.(jlint.x) @test all(isapprox.(I1s, I1s_anal; atol = 1e-10)) @@ -731,23 +731,23 @@ end xmid = (jlint.x[begin:end-1] .+ jlint.x[begin+1:end]) ./ 2 I1s = map(x -> SymBoltz.integrate(jlint, [0.0, x], [0.0, x])[begin], xmid) I1s_anal = 1 .- cos.(xmid) - @test all(isapprox.(I1s, I1s_anal; atol = 1e-3)) + @test all(isapprox.(I1s, I1s_anal; atol = 1e-5)) - # Test that integrals interpolate almost exactly at grid points + # Test that integrals interpolate exactly at grid points I0s = map(x -> SymBoltz.integrate(jlint, [0.0, x], [1.0, 1.0])[begin], jlint.x[2:end]) I1s = map(x -> SymBoltz.integrate(jlint, [0.0, x], [0.0, x])[begin], jlint.x[2:end]) - @test all(isapprox.(I0s, jlint.I0[1, 2:end]; atol = 1e-15)) - @test all(isapprox.(I1s, jlint.I1[1, 2:end]; atol = 1e-13)) + @test all(isequal.(I0s, jlint.I0[1, 2:end])) + @test all(isequal.(I1s, jlint.I1[1, 2:end])) # Test against finer brute-force integral f = x -> SymBoltz.sphericalbesselj(0, x/10.0) - xs = range(jlint.x[begin], jlint.x[end], length = 1000) + xs = range(jlint.x[begin], jlint.x[end], step = 2π/1024) ys = f.(xs) .* SymBoltz.sphericalbesselj.(transpose(ls), xs) - IA = SymBoltz.NumericalIntegration.integrate(xs, ys, SymBoltz.SimpsonEven()) + IA = SymBoltz.NumericalIntegration.integrate(xs, ys, SymBoltz.TrapezoidalEven()) - xs = range(jlint.x[begin], jlint.x[end], length = 100) + xs = range(jlint.x[begin], jlint.x[end], step = 2π/128) ys = f.(xs) IB = SymBoltz.integrate(jlint, xs, ys) - @test all(isapprox.(IA, IB; atol = 1e-3)) + @test all(isapprox.(IA, IB; atol = 1e-6)) end From bd9e5efdc3aee548f1a21ceb855980a7063622c1 Mon Sep 17 00:00:00 2001 From: Herman Sletmoen Date: Mon, 27 Apr 2026 13:52:53 +0200 Subject: [PATCH 7/8] Make Filon integration multithreaded --- src/filon.jl | 5 +++-- src/observables/angular.jl | 17 ++++++++++++----- test/runtests.jl | 10 ++++++++++ 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/filon.jl b/src/filon.jl index 458500a7..e5bb9b98 100644 --- a/src/filon.jl +++ b/src/filon.jl @@ -86,12 +86,13 @@ Base.@propagate_inbounds @fastmath function (jlint::SphericalBesselIntegralCache return nothing end -@fastmath function integrate(out, jlint::SphericalBesselIntegralCache, xs::AbstractArray, ys::AbstractArray) +@fastmath function integrate(out, jlint::SphericalBesselIntegralCache, xs::AbstractArray, ys::AbstractArray, I0₋ = jlint.tmp1, I0₊ = jlint.tmp2, I1₋ = jlint.tmp3, I1₊ = jlint.tmp4; thread = false) xs[2] > xs[1] || error("x-domain is not strictly increasing") xs[begin] ≥ jlint.x[begin] || error("$(xs[begin]) is outside integral cache left bound $(jlint.x[begin])") xs[end] ≤ jlint.x[end] || error("$(xs[end]) is outside integral cache right bound $(jlint.x[end])") + size(I0₋) == size(I0₊) == size(I1₋) == size(I1₊) == size(jlint.l) || error("Workspaces have wrong size") + thread && I0₋ === jlint.tmp1 && I0₊ === jlint.tmp2 && I1₋ === jlint.tmp3 && I1₊ === jlint.tmp4 && error("Multithreading requires task-local workspaces") out .= 0.0 - I0₋, I0₊, I1₋, I1₊ = jlint.tmp1, jlint.tmp2, jlint.tmp3, jlint.tmp4 # TODO: thread-unsafe i₋ = 1 x₋ = xs[i₋] y₋ = ys[i₋] diff --git a/src/observables/angular.jl b/src/observables/angular.jl index 8a138c26..42202a37 100644 --- a/src/observables/angular.jl +++ b/src/observables/angular.jl @@ -171,20 +171,27 @@ function los_integrate(sol::CosmologySolution, ls::AbstractVector, τs::Abstract return los_integrate(Ss, ls, τs, ks, jl; kwargs...) end -function los_integrate(Ss::AbstractArray{T, 3}, ls::AbstractVector, τs::AbstractVector, ks::AbstractVector, jlint::SphericalBesselIntegralCache; kw...) where {T <: Real} +function los_integrate(Ss::AbstractArray{T, 3}, ls::AbstractVector, τs::AbstractVector, ks::AbstractVector, jlint::SphericalBesselIntegralCache; thread = true, kw...) where {T <: Real} @assert size(Ss, 1) == 1 Ss = @view Ss[1, :, :] τ0 = τs[end] reverse!(τs) # reverse, i.e. integrate back in time (jlint set up to handle increasing x) reverse!(Ss, dims=1) - Is = similar(Ss, length(ls), length(ks)) - xs = similar(τs) - for ik in eachindex(ks) + Is = zeros(eltype(Ss), length(ls), length(ks)) + @localize Is @tasks for ik in eachindex(ks) # parallellize independent loop iterations + @set scheduler = thread ? :dynamic : :serial + @local begin # define task-local values (declared once for all loop iterations) + xs = similar(τs) + tmp1 = similar(Ss, length(ls)) + tmp2 = similar(Ss, length(ls)) + tmp3 = similar(Ss, length(ls)) + tmp4 = similar(Ss, length(ls)) + end k = ks[ik] xs .= k .* (τ0 .- τs) ys = @view Ss[:, ik] is = @view Is[:, ik] - SymBoltz.integrate(is, jlint, xs, ys) + SymBoltz.integrate(is, jlint, xs, ys, tmp1, tmp2, tmp3, tmp4) is ./= k # handle change of var! end Is = transpose(Is) diff --git a/test/runtests.jl b/test/runtests.jl index e3c5cc6a..bece9de0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -750,4 +750,14 @@ end ys = f.(xs) IB = SymBoltz.integrate(jlint, xs, ys) @test all(isapprox.(IA, IB; atol = 1e-6)) + + # Multithreading / workspaces + tmp1 = zeros(length(jlint.l)) + tmp2 = zeros(length(jlint.l)) + tmp3 = zeros(length(jlint.l)) + tmp4 = zeros(length(jlint.l)) + @test SymBoltz.integrate(jlint, [0.0, π], [0.0, π], tmp1, tmp2, tmp3, tmp4; thread = true)[begin] ≈ 2.0 + tmp3 = zeros(length(jlint.l)+1) + @test_throws "wrong size" SymBoltz.integrate(jlint, [0.0, π], [0.0, π], tmp1, tmp2, tmp3, tmp4) + @test_throws "requires task-local" SymBoltz.integrate(jlint, [0.0, 1.0], [0.0, 1.0]; thread = true) end From 9e1c5f5f2dc1f063cbb7f78d031daef1652adf2e Mon Sep 17 00:00:00 2001 From: Herman Sletmoen Date: Mon, 27 Apr 2026 13:58:53 +0200 Subject: [PATCH 8/8] Compute CMB power spectrum with Filon integration in CLASS comparison --- docs/src/comparison.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/src/comparison.md b/docs/src/comparison.md index 499388de..ebeefba6 100644 --- a/docs/src/comparison.md +++ b/docs/src/comparison.md @@ -413,7 +413,8 @@ end l = 20:20:2000 # CLASS default is lmax = 2500 Dl1 = Dl_class([:TT, :TE, :EE, :phiphi, :TPhi, :Ephi], l, pars) jl = SphericalBesselCache(l) -Dl2 = Dl_symboltz([:TT, :TE, :EE, :ψψ, :ψT, :ψE], jl, pars) +jlint = SphericalBesselIntegralCache(l) +Dl2 = Dl_symboltz([:TT, :TE, :EE, :ψψ, :ψT, :ψE], jlint, pars) plot_compare(l, l, Dl1[:, 1], Dl2[:, 1], "l", "Dₗ(TT)"; tol = 2e-12) ``` ```@example class