Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 21 additions & 27 deletions dp-crypto/benches/cpu_vs_gpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,15 @@

use ark_bn254::{Bn254, Fr};
use ark_ff::AdditiveGroup;
use ark_std::rand::Rng;
use ark_std::rand::thread_rng;
use divan::Bencher;
#[cfg(feature = "cuda")]
use dp_crypto::arkyper::HyperKZGGpu;
use dp_crypto::{
arkyper::{
transcript::blake3::Blake3Transcript,
CommitmentScheme, HyperKZG,
},
arkyper::{CommitmentScheme, HyperKZG, transcript::blake3::Blake3Transcript},
poly::dense::DensePolynomial,
};
#[cfg(feature = "cuda")]
use dp_crypto::arkyper::HyperKZGGpu;
use ark_std::rand::Rng;

fn main() {
divan::main();
Expand Down Expand Up @@ -58,9 +55,7 @@ mod batch_commit {
let (pp, _) = HyperKZG::<Bn254>::test_setup(&mut thread_rng(), LOG_N);
(pp, polys)
})
.bench_local_values(|(pp, polys)| {
HyperKZG::<Bn254>::batch_commit(&pp, &polys).unwrap()
})
.bench_local_values(|(pp, polys)| HyperKZG::<Bn254>::batch_commit(&pp, &polys).unwrap())
}

#[divan::bench]
Expand All @@ -71,9 +66,7 @@ mod batch_commit {
let (pp, _) = HyperKZGGpu::<Bn254>::test_setup(&mut thread_rng(), LOG_N);
(pp, polys)
})
.bench_local_values(|(pp, polys)| {
HyperKZGGpu::<Bn254>::batch_commit(&pp, &polys).unwrap()
})
.bench_local_values(|(pp, polys)| HyperKZGGpu::<Bn254>::batch_commit(&pp, &polys).unwrap())
}
}

Expand All @@ -86,36 +79,37 @@ mod batch_open {
use super::*;

/// Build a single combined polynomial with small (≤53-bit) coefficients.
fn make_open_input<CS: CommitmentScheme<Field = Fr>>() -> (CS::ProverSetup, DensePolynomial<'static, Fr>, Vec<Fr>, Blake3Transcript) {
fn make_open_input<CS: CommitmentScheme<Field = Fr>>() -> (
CS::ProverSetup,
DensePolynomial<'static, Fr>,
Vec<Fr>,
Blake3Transcript,
) {
let polys = make_polys();
let (pp, _) = CS::test_setup(&mut thread_rng(), LOG_N);
let point: Vec<Fr> = (0..LOG_N).map(|i| Fr::from(i as u64)).collect();
// Use small challenges so the linear combination stays ≤53-bit.
let challenges: Vec<Fr> = (1..=polys.len())
.map(|i| Fr::from(i as u64))
.collect();
let poly = DensePolynomial::linear_combination(
&polys.iter().collect::<Vec<_>>(),
&challenges,
);
let challenges: Vec<Fr> = (1..=polys.len()).map(|i| Fr::from(i as u64)).collect();
let poly =
DensePolynomial::linear_combination(&polys.iter().collect::<Vec<_>>(), &challenges);
let transcript = Blake3Transcript::new(b"bench_open");
(pp, poly, point, transcript)
}

#[divan::bench]
fn cpu(b: Bencher) {
b.with_inputs(make_open_input::<HyperKZG<Bn254>>)
.bench_local_values(|(pp, poly, point, mut transcript)| {
HyperKZG::<Bn254>::open(&pp, &poly, &point, &Fr::ZERO, &mut transcript).unwrap()
})
.bench_local_values(|(pp, poly, point, mut transcript)| {
HyperKZG::<Bn254>::open(&pp, &poly, &point, &Fr::ZERO, &mut transcript).unwrap()
})
}

#[divan::bench]
#[cfg(feature = "cuda")]
fn gpu(b: Bencher) {
b.with_inputs(make_open_input::<HyperKZGGpu<Bn254>>)
.bench_local_values(|(pp, poly, point, mut transcript)| {
HyperKZGGpu::<Bn254>::prove(&pp, &poly, &point, None, &mut transcript).unwrap()
})
.bench_local_values(|(pp, poly, point, mut transcript)| {
HyperKZGGpu::<Bn254>::prove(&pp, &poly, &point, None, &mut transcript).unwrap()
})
}
}
18 changes: 7 additions & 11 deletions dp-crypto/benches/msm_bitlength.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
//! ```

use ark_bn254::Fr;
use ark_std::rand::SeedableRng;
use ark_std::UniformRand;
use ark_std::rand::SeedableRng;
use divan::Bencher;

fn main() {
Expand All @@ -34,9 +34,9 @@ fn generate_small_scalars(n: usize, max_bits: u32, rng: &mut impl ark_std::rand:
#[divan::bench_group(sample_count = 5, sample_size = 1)]
mod gpu_msm_bitlength {
use super::*;
use dp_crypto::arkyper::gpu_msm::{convert_bases_to_gpu, convert_scalars_to_bigint, GPU_MSM};
use dp_crypto::arkyper::{HyperKZGSRS, HyperKZGProverKey};
use ark_bn254::Bn254;
use dp_crypto::arkyper::gpu_msm::{GPU_MSM, convert_bases_to_gpu, convert_scalars_to_bigint};
use dp_crypto::arkyper::{HyperKZGProverKey, HyperKZGSRS};
use std::sync::Arc;

/// MSM with 53-bit scalars (typical for polynomial evaluations from fix_var).
Expand Down Expand Up @@ -86,9 +86,9 @@ mod gpu_msm_bitlength {
#[divan::bench_group(sample_count = 5, sample_size = 1)]
mod cpu_msm_bitlength {
use super::*;
use ark_ec::VariableBaseMSM;
use ark_bn254::{Bn254, G1Projective};
use dp_crypto::arkyper::{HyperKZGSRS, HyperKZGProverKey};
use ark_ec::VariableBaseMSM;
use dp_crypto::arkyper::{HyperKZGProverKey, HyperKZGSRS};

#[divan::bench(args = SIZES)]
fn msm_53bit_scalars(b: Bencher, log_n: usize) {
Expand All @@ -100,9 +100,7 @@ mod cpu_msm_bitlength {
let bases = &pk.g1_powers()[..n];
let scalars = generate_small_scalars(n, 53, &mut rng);

b.bench_local(|| {
G1Projective::msm(bases, &scalars).expect("CPU MSM failed")
})
b.bench_local(|| G1Projective::msm(bases, &scalars).expect("CPU MSM failed"))
}

#[divan::bench(args = SIZES)]
Expand All @@ -115,8 +113,6 @@ mod cpu_msm_bitlength {
let bases = &pk.g1_powers()[..n];
let scalars: Vec<Fr> = (0..n).map(|_| Fr::rand(&mut rng)).collect();

b.bench_local(|| {
G1Projective::msm(bases, &scalars).expect("CPU MSM failed")
})
b.bench_local(|| G1Projective::msm(bases, &scalars).expect("CPU MSM failed"))
}
}
16 changes: 11 additions & 5 deletions dp-crypto/benches/pcs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ use ark_poly::DenseMultilinearExtension;
use ark_poly_commit::multilinear_pc::MultilinearPC;
use ark_std::rand::thread_rng;
use divan::Bencher;
#[cfg(feature = "cuda")]
use dp_crypto::arkyper::HyperKZGGpu;
use dp_crypto::{
arkyper::{CommitmentScheme, HyperKZG},
poly::{dense::DensePolynomial as ADensePolynomial, slice::SmartSlice},
};
#[cfg(feature = "cuda")]
use dp_crypto::arkyper::HyperKZGGpu;
#[allow(unused_imports)]
use jolt_core::poly::{
commitment::{
Expand Down Expand Up @@ -68,7 +68,10 @@ mod commit {
fn arkyper_gpu_commit(b: Bencher, n: usize) {
b.with_inputs(|| {
let evals = arkworks_static_evals(2u32.pow(n as u32) as usize);
(evals, HyperKZGGpu::<Bn254>::test_setup(&mut thread_rng(), n))
(
evals,
HyperKZGGpu::<Bn254>::test_setup(&mut thread_rng(), n),
)
})
.bench_local_values(|(s, (pp, _))| {
let poly = ADensePolynomial::new(s);
Expand All @@ -83,7 +86,10 @@ mod commit {
let polys = (0..NUM_BATCHED_POLYS)
.map(|_| ADensePolynomial::new(arkworks_static_evals(2u32.pow(n as u32) as usize)))
.collect::<Vec<_>>();
(polys, HyperKZGGpu::<Bn254>::test_setup(&mut thread_rng(), n))
(
polys,
HyperKZGGpu::<Bn254>::test_setup(&mut thread_rng(), n),
)
})
.bench_local_values(|(polys, (pp, _))| {
HyperKZGGpu::<Bn254>::batch_commit(&pp, &polys).unwrap()
Expand Down Expand Up @@ -196,8 +202,8 @@ mod commit {
mod open {
use ark_bn254::Fr;
use ark_ff::AdditiveGroup;
use dp_crypto::arkyper::transcript::blake3::Blake3Transcript;
use dp_crypto::arkyper::transcript::Transcript;
use dp_crypto::arkyper::transcript::blake3::Blake3Transcript;
#[allow(unused_imports)]
use jolt_core::field::JoltField;
#[allow(unused_imports)]
Expand Down
17 changes: 12 additions & 5 deletions dp-crypto/examples/hyperkzg_gpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
//! ```

use ark_bn254::{Bn254, Fr};
use ark_std::rand::SeedableRng;
use ark_std::UniformRand;
use ark_std::rand::SeedableRng;
use dp_crypto::{
arkyper::{
transcript::blake3::Blake3Transcript, CommitmentScheme, HyperKZG, HyperKZGGpu,
HyperKZGGpuProverKey, HyperKZGSRS,
CommitmentScheme, HyperKZG, HyperKZGGpu, HyperKZGGpuProverKey, HyperKZGSRS,
transcript::blake3::Blake3Transcript,
},
poly::dense::DensePolynomial,
};
Expand Down Expand Up @@ -66,7 +66,10 @@ fn main() -> anyhow::Result<()> {
println!(" GPU commit time: {:?}", gpu_commit_time);

// Verify they match
assert_eq!(cpu_commitment.0, gpu_commitment.0, "Commitments should match!");
assert_eq!(
cpu_commitment.0, gpu_commitment.0,
"Commitments should match!"
);
println!(" Commitments match!");
println!(
" Speedup: {:.2}x\n",
Expand Down Expand Up @@ -99,7 +102,11 @@ fn main() -> anyhow::Result<()> {
println!(" GPU batch commit time: {:?}", gpu_batch_time);

// Verify they match
for (i, ((cpu_c, _), (gpu_c, _))) in cpu_commitments.iter().zip(gpu_commitments.iter()).enumerate() {
for (i, ((cpu_c, _), (gpu_c, _))) in cpu_commitments
.iter()
.zip(gpu_commitments.iter())
.enumerate()
{
assert_eq!(cpu_c.0, gpu_c.0, "Commitment {} should match!", i);
}
println!(" All {} commitments match!", num_polys);
Expand Down
5 changes: 1 addition & 4 deletions dp-crypto/src/arkyper/gpu_msm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@ use ark_bn254::{Fq, Fr, G1Affine, G1Projective};
use ark_ec::AffineRepr;
use ark_ff::PrimeField;
use ec_gpu::arkworks_bn254::G1Affine as GpuG1Affine;
use ec_gpu_gen::{
program, rust_gpu_tools::Device, threadpool::Worker, G1AffineM, MultiexpKernel,
};
use ec_gpu_gen::{G1AffineM, MultiexpKernel, program, rust_gpu_tools::Device, threadpool::Worker};
use rayon::prelude::*;

pub static GPU_MSM: std::sync::LazyLock<Mutex<GpuMsm>> =
Expand Down Expand Up @@ -88,7 +86,6 @@ impl GpuMsm {
}
Ok(results)
}

}

fn fq_to_montgomery_bytes(x: &Fq) -> [u8; 32] {
Expand Down
14 changes: 10 additions & 4 deletions dp-crypto/src/arkyper/hyperkzg_gpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -432,8 +432,11 @@ pub fn gpu_batch_commit(
results[idx] = r;
}
}
tracing::trace!("[gpu_batch_commit] CPU fallback: {} polys, {:.1}ms",
cpu_poly_count, t_cpu.elapsed().as_secs_f64() * 1000.0);
tracing::trace!(
"[gpu_batch_commit] CPU fallback: {} polys, {:.1}ms",
cpu_poly_count,
t_cpu.elapsed().as_secs_f64() * 1000.0
);
}

// Join GPU results
Expand All @@ -449,8 +452,11 @@ pub fn gpu_batch_commit(
Ok(())
})?;

tracing::trace!("[gpu_batch_commit] TOTAL: {:.1}ms ({} polys)",
overall_start.elapsed().as_secs_f64() * 1000.0, polys.len());
tracing::trace!(
"[gpu_batch_commit] TOTAL: {:.1}ms ({} polys)",
overall_start.elapsed().as_secs_f64() * 1000.0,
polys.len()
);

Ok(results)
}
Expand Down
Loading