I have this simple example, where I compare the performance of Reactant compared to CUDA. However, it is 10x slower.
using LinearAlgebra
using CUDA
using Reactant
using BenchmarkTools
T = Float32
N = 1000000
# coeffs = rand(T, N-1)
coeffs = sqrt.(T.(1:N-1))
v = rand(Complex{T}, N)
w = similar(v)
coeffs_gpu = CuArray(coeffs)
v_gpu = CuArray(v)
w_gpu = similar(v_gpu)
coeffs_reactant = Reactant.to_rarray(coeffs);
v_reactant = Reactant.to_rarray(v)
w_reactant = Reactant.to_rarray(w)
function myfunc(w, coeffs, v)
N = length(v)
@views w[1:(N - 1)] .= coeffs .* v[2:N]
return w
end
@benchmark myfunc($w, $coeffs, $v) # 325.731 μs
@benchmark myfunc($w_gpu, $coeffs_gpu, $v_gpu) # 5.384 μs
myfunc_compiled = @compile myfunc(w_reactant, coeffs_reactant, v_reactant)
@benchmark myfunc_compiled($w_reactant, $coeffs_reactant, $v_reactant) # 49.838 μs
I'm using the GPU on Reactant I guess
julia> Reactant.devices()
1-element Vector{Reactant.XLA.PJRT.Device}:
Reactant.XLA.PJRT.Device(Ptr{Nothing}(0x000000002b6aede0), "CUDA:0 NVIDIA GeForce RTX 4090"
zzz
I have this simple example, where I compare the performance of Reactant compared to CUDA. However, it is 10x slower.
I'm using the GPU on Reactant I guess