11#include <gtest/gtest.h>
41 "test_batch_multi_scalar_mul can exceed num_points; "
42 "raise num_points or lower kMaxBatchMSMs / kMaxBatchPointsPerMSM");
44 "test_consume_point_batch* reads past end of generators; "
45 "raise num_points or lower kMaxBucketTestPoints");
48 static inline std::vector<ScalarField>
scalars{};
52 size_t total_points = input_scalars.size();
54 std::vector<Element> expected_accs(num_threads);
55 size_t range_per_thread = (total_points + num_threads - 1) / num_threads;
58 expected_thread_acc.self_set_infinity();
59 size_t start = thread_idx * range_per_thread;
60 size_t end = ((thread_idx + 1) * range_per_thread > total_points) ? total_points
61 : (thread_idx + 1) * range_per_thread;
62 bool skip = start >= total_points;
64 for (
size_t i = start; i < end; ++i) {
65 expected_thread_acc += input_points[i] * input_scalars[i];
68 expected_accs[thread_idx] = expected_thread_acc;
72 expected_acc.self_set_infinity();
73 for (
auto& acc : expected_accs) {
84 for (
size_t i = start; i < end; ++i) {
98 constexpr uint32_t fr_size = 254;
99 constexpr uint32_t slice_bits = 7;
100 constexpr uint32_t num_slices = (fr_size + 6) / 7;
101 constexpr uint32_t last_slice_bits = fr_size - ((num_slices - 1) * slice_bits);
103 for (
size_t x = 0; x < 100; ++x) {
105 input_u256.
data[3] = input_u256.
data[3] & 0x3FFFFFFFFFFFFFFF;
106 while (input_u256 > ScalarField::modulus) {
107 input_u256 -= ScalarField::modulus;
109 std::vector<uint32_t> slices(num_slices);
112 for (uint32_t i = 0; i < num_slices; ++i) {
113 uint32_t mask = ((1U << slice_bits) - 1U);
114 uint32_t shift = slice_bits;
116 mask = ((1U << last_slice_bits) - 1U);
117 shift = last_slice_bits;
119 slices[num_slices - 1 - i] =
static_cast<uint32_t
>((acc & mask).
data[0]);
124 input.self_from_montgomery_form_reduced();
126 ASSERT_EQ(input.data[0], input_u256.
data[0]);
127 ASSERT_EQ(input.data[1], input_u256.
data[1]);
128 ASSERT_EQ(input.data[2], input_u256.
data[2]);
129 ASSERT_EQ(input.data[3], input_u256.
data[3]);
131 for (uint32_t i = 0; i < num_slices; ++i) {
133 EXPECT_EQ(result, slices[i]);
141 const size_t num_buckets = 128;
143 std::vector<uint64_t> input_point_schedule;
144 for (
size_t i = 0; i < total_points; ++i) {
146 uint64_t schedule =
static_cast<uint64_t
>(bucket) + (
static_cast<uint64_t
>(i) << 32);
147 input_point_schedule.push_back(schedule);
152 input_point_schedule,
generators, affine_data, bucket_data);
154 std::vector<Element> expected_buckets(num_buckets);
155 for (
auto& e : expected_buckets) {
156 e.self_set_infinity();
158 for (
size_t i = 0; i < total_points; ++i) {
159 uint64_t bucket = input_point_schedule[i] & 0xFFFFFFFF;
160 EXPECT_LT(
static_cast<size_t>(bucket), num_buckets);
161 expected_buckets[
static_cast<size_t>(bucket)] +=
generators[i];
163 for (
size_t i = 0; i < num_buckets; ++i) {
164 if (!expected_buckets[i].is_point_at_infinity()) {
166 EXPECT_EQ(expected, bucket_data.
buckets[i]);
176 const size_t num_buckets = 128;
178 std::vector<uint64_t> input_point_schedule;
179 for (
size_t i = 0; i < total_points; ++i) {
181 uint64_t schedule =
static_cast<uint64_t
>(bucket) + (
static_cast<uint64_t
>(i) << 32);
182 input_point_schedule.push_back(schedule);
187 input_point_schedule,
generators, affine_data, bucket_data);
192 expected_acc.self_set_infinity();
194 std::vector<Element> expected_accs(num_threads);
195 size_t range_per_thread = (total_points + num_threads - 1) / num_threads;
198 expected_thread_acc.self_set_infinity();
199 size_t start = thread_idx * range_per_thread;
200 size_t end = (thread_idx == num_threads - 1) ? total_points : (thread_idx + 1) * range_per_thread;
201 bool skip = start >= total_points;
203 for (
size_t i = start; i < end; ++i) {
204 ScalarField scalar = input_point_schedule[i] & 0xFFFFFFFF;
205 expected_thread_acc +=
generators[i] * scalar;
208 expected_accs[thread_idx] = expected_thread_acc;
211 for (
size_t i = 0; i < num_threads; ++i) {
212 expected_acc += expected_accs[i];
220 const size_t total_points = 30071;
222 std::vector<uint64_t> input_point_schedule;
223 for (
size_t i = 0; i < total_points; ++i) {
225 uint64_t schedule =
static_cast<uint64_t
>(bucket) + (
static_cast<uint64_t
>(i) << 32);
226 input_point_schedule.push_back(schedule);
230 &input_point_schedule[0], input_point_schedule.size(), 7);
234 for (
size_t i = 0; i < total_points; ++i) {
235 expected +=
static_cast<size_t>((input_point_schedule[i] & 0xFFFFFFFF) == 0);
237 EXPECT_EQ(result, expected);
240 for (
size_t i = 1; i < total_points; ++i) {
241 uint32_t prev_bucket =
static_cast<uint32_t
>(input_point_schedule[i - 1]);
242 uint32_t curr_bucket =
static_cast<uint32_t
>(input_point_schedule[i]);
243 EXPECT_LE(prev_bucket, curr_bucket) <<
"Array not sorted at index " << i;
255 constexpr uint32_t bucket_index_bits = 17;
256 constexpr size_t num_entries = 1000;
258 std::vector<uint64_t> schedule(num_entries);
261 const size_t num_true_zeros = 10;
262 for (
size_t i = 0; i < num_true_zeros; ++i) {
263 schedule[i] =
static_cast<uint64_t
>(i) << 32;
269 const size_t num_false_zeros = 20;
270 for (
size_t i = 0; i < num_false_zeros; ++i) {
271 size_t idx = num_true_zeros + i;
272 schedule[idx] = (
static_cast<uint64_t
>(idx) << 32) | 65536ULL;
276 for (
size_t i = num_true_zeros + num_false_zeros; i < num_entries; ++i) {
279 if ((bucket & 0xFFFF) == 0) {
282 schedule[i] = (
static_cast<uint64_t
>(i) << 32) |
static_cast<uint64_t
>(bucket);
286 schedule.data(), num_entries, bucket_index_bits);
290 for (
size_t i = 0; i < num_entries; ++i) {
296 EXPECT_EQ(result, expected) <<
"Zero-bucket count is wrong for bucket_index_bits=" << bucket_index_bits
297 <<
". Got " << result <<
", expected " << expected
298 <<
" (likely overwritten by count from a non-zero bucket)";
301 for (
size_t i = 1; i < num_entries; ++i) {
302 uint32_t prev =
static_cast<uint32_t
>(schedule[i - 1]);
303 uint32_t curr =
static_cast<uint32_t
>(schedule[i]);
304 EXPECT_LE(prev, curr) <<
"Array not sorted at index " << i;
314 EXPECT_EQ(result, expected);
322 std::vector<AffineElement> expected(num_msms);
328 size_t vector_offset = 0;
329 for (
size_t k = 0; k < num_msms; ++k) {
332 ASSERT_LT(vector_offset + num_pts,
num_points);
333 std::span<const AffineElement> batch_points(&
generators[vector_offset], num_pts);
335 batch_scalars_copies[k].resize(num_pts);
336 for (
size_t i = 0; i < num_pts; ++i) {
337 batch_scalars_copies[k][i] =
scalars[vector_offset + i];
340 vector_offset += num_pts;
341 batch_points_span.push_back(batch_points);
342 batch_scalars_spans.push_back(batch_scalars_copies[k]);
344 expected[k] =
naive_msm(batch_scalars_spans[k], batch_points_span[k]);
347 std::vector<AffineElement> result =
350 EXPECT_EQ(result, expected);
355 const size_t num_msms = 10;
356 std::vector<AffineElement> expected(num_msms);
362 for (
size_t k = 0; k < num_msms; ++k) {
363 const size_t num_pts = 33;
364 auto& test_scalars = batch_scalars[k];
366 test_scalars.resize(num_pts);
368 size_t fixture_offset = k * num_pts;
371 for (
size_t i = 0; i < 13; ++i) {
374 for (
size_t i = 13; i < 23; ++i) {
375 test_scalars[i] =
scalars[fixture_offset + i + 13];
377 for (
size_t i = 23; i < num_pts; ++i) {
380 batch_points_span.push_back(batch_points);
381 batch_scalars_spans.push_back(batch_scalars[k]);
383 expected[k] =
naive_msm(batch_scalars[k], batch_points);
386 std::vector<AffineElement> result =
389 EXPECT_EQ(result, expected);
394 const size_t start_index = 1234;
395 const size_t num_pts =
num_points - start_index;
403 EXPECT_EQ(result, expected);
408 const size_t start_index = 1234;
409 const size_t num_pts =
num_points - start_index;
410 std::vector<ScalarField> test_scalars(num_pts, ScalarField::zero());
415 EXPECT_EQ(result, Group::affine_point_at_infinity);
420 std::vector<ScalarField> test_scalars;
421 std::vector<AffineElement> input_points;
425 EXPECT_EQ(result, Group::affine_point_at_infinity);
430 const size_t num_pts = 100;
431 std::vector<ScalarField> test_scalars(num_pts);
432 std::vector<ScalarField> scalars_copy(num_pts);
434 for (
size_t i = 0; i < num_pts; ++i) {
436 scalars_copy[i] = test_scalars[i];
439 std::span<const AffineElement> points(&
generators[0], num_pts);
444 for (
size_t i = 0; i < num_pts; ++i) {
445 EXPECT_EQ(test_scalars[i], scalars_copy[i]) <<
"Scalar at index " << i <<
" was modified";
451 const size_t num_msms = 3;
452 const size_t num_pts = 100;
459 for (
size_t k = 0; k < num_msms; ++k) {
460 batch_scalars[k].resize(num_pts);
461 scalars_copies[k].resize(num_pts);
463 for (
size_t i = 0; i < num_pts; ++i) {
464 batch_scalars[k][i] =
scalars[k * num_pts + i];
465 scalars_copies[k][i] = batch_scalars[k][i];
468 batch_points.push_back(std::span<const AffineElement>(&
generators[k * num_pts], num_pts));
469 batch_scalar_spans.push_back(batch_scalars[k]);
474 for (
size_t k = 0; k < num_msms; ++k) {
475 for (
size_t i = 0; i < num_pts; ++i) {
476 EXPECT_EQ(batch_scalars[k][i], scalars_copies[k][i])
477 <<
"Scalar at MSM " << k <<
", index " << i <<
" was modified";
484 const size_t num_pts = 5;
485 std::vector<ScalarField> test_scalars(num_pts, ScalarField::one());
486 std::span<const AffineElement> points(&
generators[0], num_pts);
492 expected.self_set_infinity();
493 for (
size_t i = 0; i < num_pts; ++i) {
494 expected += points[i];
502 const size_t num_pts = 5;
503 std::vector<ScalarField> test_scalars(num_pts, -ScalarField::one());
504 std::span<const AffineElement> points(&
generators[0], num_pts);
510 expected.self_set_infinity();
511 for (
size_t i = 0; i < num_pts; ++i) {
512 expected -= points[i];
520 std::vector<ScalarField> test_scalars = {
scalars[0] };
521 std::span<const AffineElement> points(&
generators[0], 1);
527 EXPECT_EQ(result, expected);
532 std::vector<size_t> test_sizes = { 1, 2, 15, 16, 17, 50, 127, 128, 129, 256, 512 };
534 for (
size_t num_pts : test_sizes) {
537 std::vector<ScalarField> test_scalars(num_pts);
538 for (
size_t i = 0; i < num_pts; ++i) {
542 std::span<const AffineElement> points(&
generators[0], num_pts);
548 EXPECT_EQ(result, expected) <<
"Failed for size " << num_pts;
555 const size_t num_pts = 32;
558 std::vector<AffineElement> points(num_pts, base_point);
559 std::vector<ScalarField> test_scalars(num_pts);
562 for (
size_t i = 0; i < num_pts; ++i) {
564 scalar_sum += test_scalars[i];
573 EXPECT_EQ(result, expected);
578 const size_t num_pts = 100;
579 std::vector<ScalarField> test_scalars(num_pts);
581 expected.self_set_infinity();
583 for (
size_t i = 0; i < num_pts; ++i) {
585 test_scalars[i] = ScalarField::zero();
592 std::span<const AffineElement> points(&
generators[0], num_pts);
601 const size_t num_pts = 200;
602 std::vector<ScalarField> test_scalars(num_pts);
603 for (
size_t i = 0; i < num_pts; ++i) {
607 std::span<const AffineElement> points(&
generators[0], num_pts);
610 auto result = scalar_multiplication::pippenger<Curve>(scalar_span, points);
618 const size_t num_pts = 200;
619 std::vector<ScalarField> test_scalars(num_pts);
620 for (
size_t i = 0; i < num_pts; ++i) {
624 std::span<const AffineElement> points(&
generators[0], num_pts);
627 auto result = scalar_multiplication::pippenger_unsafe<Curve>(scalar_span, points);
634using CurveTypes = ::testing::Types<bb::curve::BN254, bb::curve::Grumpkin>;
641 this->test_get_scalar_slice();
645 this->test_consume_point_batch();
649 this->test_consume_point_batch_and_accumulate();
653 this->test_radix_sort_count_zero_entries();
657 this->test_radix_sort_count_zero_entries_wide_buckets();
661 this->test_pippenger_low_memory();
665 this->test_batch_multi_scalar_mul();
669 this->test_batch_multi_scalar_mul_sparse();
677 this->test_msm_all_zeroes();
681 this->test_msm_empty_polynomial();
685 this->test_scalars_unchanged_after_msm();
689 this->test_scalars_unchanged_after_batch_multi_scalar_mul();
693 this->test_scalar_one();
697 this->test_scalar_minus_one();
701 this->test_single_point();
705 this->test_size_thresholds();
709 this->test_duplicate_points();
713 this->test_mixed_zero_scalars();
717 this->test_pippenger_free_function();
721 this->test_pippenger_unsafe_free_function();
731using WorkUnit = PartitionMSM::MSMWorkUnit;
737 for (
const auto& u : units) {
738 for (
size_t k = 0; k < u.size; ++k) {
739 total += weights[u.batch_msm_index][u.start_index + k];
747TEST(PartitionByWeight, NoMsmsReturnsEmptyThreads)
749 auto units = PartitionMSM::partition_by_weight({}, 8);
750 ASSERT_EQ(units.size(), 8U);
751 for (
const auto& t : units) {
752 EXPECT_TRUE(t.empty());
756TEST(PartitionByWeight, AllEmptyMsmsReturnsEmptyThreads)
759 auto units = PartitionMSM::partition_by_weight(weights, 4);
760 ASSERT_EQ(units.size(), 4U);
761 for (
const auto& t : units) {
762 EXPECT_TRUE(t.empty());
766TEST(PartitionByWeight, SingleThreadGetsEverything)
769 auto units = PartitionMSM::partition_by_weight(weights, 1);
770 ASSERT_EQ(units.size(), 1U);
771 ASSERT_EQ(units[0].size(), 1U);
772 EXPECT_EQ(units[0][0].batch_msm_index, 0U);
773 EXPECT_EQ(units[0][0].start_index, 0U);
774 EXPECT_EQ(units[0][0].size, 5U);
777TEST(PartitionByWeight, EvenSplitAcrossThreads)
781 auto units = PartitionMSM::partition_by_weight(weights, 4);
782 ASSERT_EQ(units.size(), 4U);
783 for (
size_t t = 0; t < 4; ++t) {
784 ASSERT_EQ(units[t].size(), 1U) <<
"thread " << t;
785 EXPECT_EQ(units[t][0].size, 2U) <<
"thread " << t;
786 EXPECT_EQ(thread_weight(units[t], weights), 10U) <<
"thread " << t;
790TEST(PartitionByWeight, HeavyFirstWeightClosesFirstThreadEarly)
794 auto units = PartitionMSM::partition_by_weight(weights, 4);
795 ASSERT_EQ(units.size(), 4U);
797 ASSERT_FALSE(units[0].empty());
798 EXPECT_EQ(units[0][0].start_index, 0U);
799 EXPECT_EQ(units[0][0].size, 1U);
801 size_t total_assigned = 0;
802 for (
const auto& t : units) {
803 for (
const auto& u : t) {
804 total_assigned += u.size;
807 EXPECT_EQ(total_assigned, 5U);
810TEST(PartitionByWeight, BoundaryStraddlesMsm)
815 auto units = PartitionMSM::partition_by_weight(weights, 4);
816 ASSERT_EQ(units.size(), 4U);
817 size_t total_assigned = 0;
818 for (
const auto& t : units) {
819 for (
const auto& u : t) {
820 total_assigned += u.size;
823 EXPECT_EQ(total_assigned, 8U);
825 for (
size_t t = 0; t < 4; ++t) {
826 EXPECT_EQ(thread_weight(units[t], weights), 10U) <<
"thread " << t;
830TEST(PartitionByWeight, LastThreadAbsorbsRemainder)
838 auto units = PartitionMSM::partition_by_weight(weights, 3);
839 ASSERT_EQ(units.size(), 3U);
840 size_t total_assigned = 0;
841 for (
const auto& t : units) {
842 for (
const auto& u : t) {
843 total_assigned += u.size;
846 EXPECT_EQ(total_assigned, 3U);
847 ASSERT_EQ(units[2].size(), 1U);
848 EXPECT_EQ(units[2][0].start_index, 2U);
849 EXPECT_EQ(units[2][0].size, 1U);
850 EXPECT_EQ(thread_weight(units[2], weights), 1U);
853TEST(PartitionByWeight, MoreThreadsThanScalars)
858 auto units = PartitionMSM::partition_by_weight(weights, 8);
859 ASSERT_EQ(units.size(), 8U);
860 for (
size_t t = 0; t < 3; ++t) {
861 ASSERT_EQ(units[t].size(), 1U) <<
"thread " << t;
862 EXPECT_EQ(units[t][0].size, 1U);
864 for (
size_t t = 3; t < 8; ++t) {
865 EXPECT_TRUE(units[t].empty()) <<
"thread " << t;
870TEST(ScalarMultiplication, SmallInputsExplicit)
872 uint256_t x0(0x68df84429941826a, 0xeb08934ed806781c, 0xc14b6a2e4f796a73, 0x08dc1a9a11a3c8db);
873 uint256_t y0(0x8ae5c31aa997f141, 0xe85f20c504f2c11b, 0x81a94193f3b1ce2b, 0x26f2c37372adb5b7);
874 uint256_t x1(0x80f5a592d919d32f, 0x1362652b984e51ca, 0xa0b26666f770c2a1, 0x142c6e1964e5c3c5);
875 uint256_t y1(0xb6c322ebb5ae4bc5, 0xf9fef6c7909c00f8, 0xb37ca1cc9af3b421, 0x1e331c7fa73d6a59);
876 uint256_t s0(0xe48bf12a24272e08, 0xf8dd0182577f3567, 0xec8fd222b8a6becb, 0x102d76b945612c9b);
877 uint256_t s1(0x098ae8d69f1e4e9e, 0xb5c8313c0f6040ed, 0xf78041e30cc46c44, 0x1d1e6e0c21892e13);
#define BB_BENCH_NAME(name)
BB_INLINE bool get(size_t index) const noexcept
void test_pippenger_low_memory()
typename Curve::ScalarField ScalarField
static std::vector< AffineElement > generators
void test_radix_sort_count_zero_entries_wide_buckets()
static constexpr size_t kMaxBatchMSMs
void test_mixed_zero_scalars()
void test_batch_multi_scalar_mul_sparse()
void test_duplicate_points()
void test_consume_point_batch()
void test_scalars_unchanged_after_batch_multi_scalar_mul()
void test_msm_all_zeroes()
void test_pippenger_unsafe_free_function()
void test_batch_multi_scalar_mul()
static constexpr size_t num_points
static void SetUpTestSuite()
void test_scalar_minus_one()
typename Curve::Element Element
void test_consume_point_batch_and_accumulate()
static constexpr size_t kMaxBatchPointsPerMSM
void test_msm_empty_polynomial()
void test_pippenger_free_function()
typename Curve::Group Group
void test_get_scalar_slice()
void test_scalars_unchanged_after_msm()
static std::vector< ScalarField > scalars
static constexpr size_t kMaxBucketTestPoints
typename Curve::AffineElement AffineElement
static AffineElement naive_msm(std::span< ScalarField > input_scalars, std::span< const AffineElement > input_points)
void test_radix_sort_count_zero_entries()
void test_size_thresholds()
typename Group::element Element
typename grumpkin::g1 Group
typename Group::affine_element AffineElement
element class. Implements ecc group arithmetic using Jacobian coordinates See https://hyperelliptic....
group_elements::affine_element< Fq, Fr, Params > affine_element
virtual uint8_t get_random_uint8()=0
virtual uint16_t get_random_uint16()=0
virtual uint32_t get_random_uint32()=0
virtual uint256_t get_random_uint256()=0
static Element accumulate_buckets(BucketType &bucket_accumulators) noexcept
Reduce buckets to single point using running (suffix) sum from high to low: R = sum(k * B_k)
static uint32_t get_scalar_slice(const ScalarField &scalar, size_t round, size_t slice_size) noexcept
Extract c-bit slice from scalar for bucket index computation.
static AffineElement msm(std::span< const AffineElement > points, PolynomialSpan< const ScalarField > scalars, bool handle_edge_cases=false) noexcept
Main entry point for single MSM computation.
static std::vector< AffineElement > batch_multi_scalar_mul(std::span< std::span< const AffineElement > > points, std::span< std::span< ScalarField > > scalars, bool handle_edge_cases=true) noexcept
Compute multiple MSMs in parallel with work balancing.
static void batch_accumulate_points_into_buckets(std::span< const uint64_t > point_schedule, std::span< const AffineElement > points, AffineAdditionData &affine_data, BucketAccumulators &bucket_data) noexcept
Process sorted point schedule into bucket accumulators using batched affine additions.
const std::vector< MemoryValue > data
size_t sort_point_schedule_and_count_zero_buckets(uint64_t *point_schedule, const size_t num_entries, const uint32_t bucket_index_bits) noexcept
Sort point schedule by bucket index and count zero-bucket entries.
constexpr uint64_t BUCKET_INDEX_MASK
Entry point for Barretenberg command-line interface.
TYPED_TEST_SUITE(CommitmentKeyTest, Curves)
::testing::Types< curve::BN254, curve::Grumpkin > CurveTypes
TYPED_TEST(CommitmentKeyTest, CommitToZeroPoly)
TEST(BoomerangMegaCircuitBuilder, BasicCircuit)
void parallel_for(size_t num_iterations, const std::function< void(size_t)> &func)
void parallel_for_range(size_t num_points, const std::function< void(size_t, size_t)> &func, size_t no_multhreading_if_less_or_equal)
Split a loop into several loops running in parallel.
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
Scratch space for batched affine point additions (one per thread)
Affine bucket accumulators for the fast affine-trick Pippenger variant.
std::vector< AffineElement > buckets