26#include <benchmark/benchmark.h>
30using namespace benchmark;
38constexpr size_t MSM_SIZE = 1 << 20;
40enum class Distribution { Clustered, UniformMixed, AllFull };
42class ThreadScalingBench :
public benchmark::Fixture {
47 void SetUp([[maybe_unused]] const ::benchmark::State& state)
override
53 srs = bb::srs::get_crs_factory<Curve>()->get_crs(MSM_SIZE);
59 Fr small_scalar() {
return Fr(
static_cast<uint64_t
>(engine.get_random_uint32())); }
62 std::vector<Fr> build_scalars(Distribution dist)
64 std::vector<Fr> scalars(MSM_SIZE);
66 case Distribution::Clustered:
67 for (
size_t i = 0; i < MSM_SIZE / 2; ++i) {
68 scalars[i] = small_scalar();
70 for (
size_t i = MSM_SIZE / 2; i < MSM_SIZE; ++i) {
71 scalars[i] = full_scalar();
74 case Distribution::UniformMixed:
75 for (
size_t i = 0; i < MSM_SIZE; ++i) {
76 scalars[i] = (engine.get_random_uint32() & 1U) ? small_scalar() : full_scalar();
79 case Distribution::AllFull:
80 for (
size_t i = 0; i < MSM_SIZE; ++i) {
81 scalars[i] = full_scalar();
89static void run_msm(ThreadScalingBench& fx, benchmark::State& state, Distribution dist)
91 const size_t num_threads =
static_cast<size_t>(state.range(0));
95 std::vector<Fr> scalars = fx.build_scalars(dist);
99 scalar_spans.emplace_back(scalars);
100 point_spans.emplace_back(fx.srs->get_monomial_points().subspan(0, MSM_SIZE));
105 for (
auto _ : state) {
113BENCHMARK_DEFINE_F(ThreadScalingBench, Clustered)(benchmark::State& state)
115 run_msm(*
this, state, Distribution::Clustered);
117BENCHMARK_DEFINE_F(ThreadScalingBench, UniformMixed)(benchmark::State& state)
119 run_msm(*
this, state, Distribution::UniformMixed);
121BENCHMARK_DEFINE_F(ThreadScalingBench, AllFull)(benchmark::State& state)
123 run_msm(*
this, state, Distribution::AllFull);
126static void ThreadSweep(benchmark::internal::Benchmark*
b)
128 for (int64_t t : { 1, 2, 4, 8 }) {
133BENCHMARK_REGISTER_F(ThreadScalingBench, Clustered)->Unit(benchmark::kMillisecond)->Apply(ThreadSweep);
134BENCHMARK_REGISTER_F(ThreadScalingBench, UniformMixed)->Unit(benchmark::kMillisecond)->Apply(ThreadSweep);
135BENCHMARK_REGISTER_F(ThreadScalingBench, AllFull)->Unit(benchmark::kMillisecond)->Apply(ThreadSweep);
typename Group::affine_element AffineElement
static std::vector< AffineElement > batch_multi_scalar_mul(std::span< std::span< const AffineElement > > points, std::span< std::span< ScalarField > > scalars, bool handle_edge_cases=true) noexcept
Compute multiple MSMs in parallel with work balancing.
#define GOOGLE_BB_BENCH_REPORTER(state)
RNG & get_debug_randomness(bool reset, std::uint_fast64_t seed)
std::filesystem::path bb_crs_path()
void init_file_crs_factory(const std::filesystem::path &path)
void set_parallel_for_concurrency(size_t num_cores)
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
static field random_element(numeric::RNG *engine=nullptr) noexcept