19template <
typename Curve>
class MSM {
43 "get_scalar_slice uses 1ULL << lo_slice_bits where lo_slice_bits <= MAX_SLICE_BITS - 1; "
44 "shifting uint64_t by >= 64 is UB.");
83 return offset_generator;
116 std::span<std::span<const AffineElement>> all_points,
117 const std::vector<std::vector<uint32_t>>& all_indices,
118 std::span<uint64_t> point_schedule_buffer,
121 const auto& indices = all_indices[work_unit.batch_msm_index];
124 work_unit.size > 0 ? std::span<const uint32_t>{ &indices[work_unit.start_index], work_unit.size }
125 : std::span<const uint32_t>{};
127 .
scalars = all_scalars[work_unit.batch_msm_index],
128 .points = all_points[work_unit.batch_msm_index],
130 .point_schedule = point_schedule_buffer,
212 bool handle_edge_cases =
false)
noexcept;
221 bool handle_edge_cases = true)
noexcept;
229 return static_cast<uint32_t
>((
NUM_BITS_IN_FIELD + bits_per_slice - 1) / bits_per_slice);
234 const size_t num_points,
249 size_t num_threads)
noexcept;
253 std::span<const AffineElement> points,
260 auto& buckets = bucket_accumulators.buckets;
262 int starting_index =
static_cast<int>(buckets.size() - 1);
264 bool found_start =
false;
265 while (!found_start && starting_index > 0) {
266 const size_t idx =
static_cast<size_t>(starting_index);
267 if (bucket_accumulators.bucket_exists.get(idx)) {
269 running_sum = buckets[idx];
276 return Curve::Group::point_at_infinity;
281 for (
int i = starting_index - 1; i > 0; --i) {
282 size_t idx =
static_cast<size_t>(i);
284 if (bucket_accumulators.bucket_exists.get(idx)) {
285 running_sum += buckets[idx];
289 return sum - offset_generator;
297 std::vector<uint32_t>& nonzero_scalar_indices)
noexcept;
304 std::span<const uint32_t> nonzero_indices,
305 uint32_t bits_per_slice,
314 std::vector<std::vector<uint32_t>>& msm_scalar_indices)
noexcept;
317 static bool use_affine_trick(
size_t num_points,
size_t num_buckets)
noexcept;
328 __attribute__((always_inline))
static void process_single_point(
size_t bucket,
335 bool has_accumulator =
bucket_data.bucket_exists.get(bucket);
336 if (has_accumulator) {
351 __attribute__((always_inline))
static void process_bucket_pair(
size_t lhs_bucket,
360 bool has_bucket_accumulator =
bucket_data.bucket_exists.get(lhs_bucket);
384template <
typename Curve>
387 bool handle_edge_cases =
true) noexcept;
390template <typename
Curve>
392 std::span<const typename
Curve::AffineElement> points) noexcept;
394extern template class MSM<curve::Grumpkin>;
395extern template class MSM<curve::
BN254>;
#define BB_ASSERT_DEBUG(expression,...)
Custom class to handle packed vectors of bits.
typename Group::element Element
typename grumpkin::g1 Group
typename Group::affine_element AffineElement
const AffineElement AffineAdditionData BucketAccumulators size_t & scratch_it
typename Curve::BaseField BaseField
static constexpr size_t BUCKET_ACCUMULATION_COST
static bool use_affine_trick(size_t num_points, size_t num_buckets) noexcept
Decide if batch inversion saves work vs Jacobian additions.
const AffineElement * point_source
static Element jacobian_pippenger_with_transformed_scalars(MSMData &msm_data) noexcept
Pippenger using Jacobian buckets (handles edge cases: doubling, infinity)
__attribute__((always_inline)) static void process_single_point(size_t bucket
static Element accumulate_buckets(BucketType &bucket_accumulators) noexcept
Reduce buckets to single point using running (suffix) sum from high to low: R = sum(k * B_k)
static constexpr size_t INVERSION_TABLE_COST
static uint32_t get_scalar_slice(const ScalarField &scalar, size_t round, size_t slice_size) noexcept
Extract c-bit slice from scalar for bucket index computation.
static constexpr size_t PREFETCH_INTERVAL
static std::vector< ThreadWorkUnits > partition_by_weight(std::span< const std::vector< uint16_t > > msm_scalar_weights, size_t num_threads) noexcept
Partition per-MSM scalar weights into num_threads work units of approximately equal cumulative weight...
static constexpr size_t PREFETCH_INTERVAL_MASK
const AffineElement AffineAdditionData & affine_data
static const AffineElement & get_offset_generator() noexcept
static AffineElement msm(std::span< const AffineElement > points, PolynomialSpan< const ScalarField > scalars, bool handle_edge_cases=false) noexcept
Main entry point for single MSM computation.
static Element affine_pippenger_with_transformed_scalars(MSMData &msm_data) noexcept
Pippenger using affine buckets with batch inversion (faster, no edge case handling)
static constexpr size_t AFFINE_TRICK_SAVINGS_PER_OP
static void compute_scalar_slice_weights(std::span< const ScalarField > scalars, std::span< const uint32_t > nonzero_indices, uint32_t bits_per_slice, std::vector< uint16_t > &weights) noexcept
Compute per-scalar slice-count weights ceil(bit_length / bits_per_slice).
size_t const AffineElement const AffineElement * rhs_source_if_match
static void add_affine_points(AffineElement *points, const size_t num_points, typename Curve::BaseField *scratch_space) noexcept
Batch add n/2 independent point pairs using Montgomery's trick.
const AffineElement AffineAdditionData BucketAccumulators & bucket_data
static constexpr size_t PREFETCH_LOOKAHEAD
AffineElement * lhs_destination
static constexpr size_t PIPPENGER_THRESHOLD
static uint32_t get_num_rounds(size_t num_points) noexcept
static constexpr size_t JACOBIAN_Z_NOT_ONE_PENALTY
__attribute__((always_inline)) static void process_bucket_pair(size_t lhs_bucket
std::vector< MSMWorkUnit > ThreadWorkUnits
const AffineElement * rhs_source
static std::vector< ThreadWorkUnits > get_work_units(std::span< std::span< ScalarField > > scalars, std::vector< std::vector< uint32_t > > &msm_scalar_indices) noexcept
Distribute multiple MSMs across threads with balanced bucket-accumulation work.
size_t const AffineElement * lhs_source
typename Curve::Element Element
static constexpr size_t NUM_BITS_IN_FIELD
AffineElement * rhs_destination
static uint32_t get_optimal_log_num_buckets(size_t num_points) noexcept
Compute optimal bits per slice by minimizing cost over c in [1, MAX_SLICE_BITS)
static std::vector< AffineElement > batch_multi_scalar_mul(std::span< std::span< const AffineElement > > points, std::span< std::span< ScalarField > > scalars, bool handle_edge_cases=true) noexcept
Compute multiple MSMs in parallel with work balancing.
static constexpr size_t AFFINE_TRICK_THRESHOLD
static void batch_accumulate_points_into_buckets(std::span< const uint64_t > point_schedule, std::span< const AffineElement > points, AffineAdditionData &affine_data, BucketAccumulators &bucket_data) noexcept
Process sorted point schedule into bucket accumulators using batched affine additions.
typename Curve::ScalarField ScalarField
const AffineElement AffineAdditionData BucketAccumulators size_t size_t &point_it noexcept
static constexpr size_t MAX_SLICE_BITS
typename Curve::AffineElement AffineElement
static void transform_scalar_and_get_nonzero_scalar_indices(std::span< ScalarField > scalars, std::vector< uint32_t > &nonzero_scalar_indices) noexcept
Convert scalars from Montgomery form and collect indices of nonzero scalars.
bb::curve::BN254::Element Element
Curve::Element pippenger(PolynomialSpan< const typename Curve::ScalarField > scalars, std::span< const typename Curve::AffineElement > points, bool handle_edge_cases) noexcept
Safe MSM wrapper (defaults to handle_edge_cases=true)
Curve::Element pippenger_unsafe(PolynomialSpan< const typename Curve::ScalarField > scalars, std::span< const typename Curve::AffineElement > points) noexcept
Fast MSM wrapper for linearly independent points (no edge case handling)
Inner sum(Cont< Inner, Args... > const &in)
constexpr std::span< const typename Group::affine_element > get_precomputed_generators()
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
Scratch space for batched affine point additions (one per thread)
std::vector< AffineElement > points_to_add
std::vector< uint32_t > addition_result_bucket_destinations
AffineAdditionData() noexcept
AffineElement null_location
std::vector< BaseField > inversion_scratch_space
static constexpr size_t BATCH_OVERFLOW_SIZE
static constexpr size_t BATCH_SIZE
Affine bucket accumulators for the fast affine-trick Pippenger variant.
BucketAccumulators(size_t num_buckets) noexcept
std::vector< AffineElement > buckets
Jacobian bucket accumulators for the safe Pippenger variant.
std::vector< Element > buckets
JacobianBucketAccumulators(size_t num_buckets) noexcept
Container for MSM input data passed between algorithm stages.
std::span< uint64_t > point_schedule
std::span< const ScalarField > scalars
static MSMData from_work_unit(std::span< std::span< ScalarField > > all_scalars, std::span< std::span< const AffineElement > > all_points, const std::vector< std::vector< uint32_t > > &all_indices, std::span< uint64_t > point_schedule_buffer, const MSMWorkUnit &work_unit) noexcept
Factory method to construct MSMData from a work unit.
std::span< const AffineElement > points
std::span< const uint32_t > scalar_indices
MSMWorkUnit describes an MSM that may be part of a larger MSM.
Packed point schedule entry: (point_index << 32) | bucket_index.
constexpr uint32_t point_index() const noexcept
constexpr uint32_t bucket_index() const noexcept
static constexpr PointScheduleEntry create(uint32_t point_index, uint32_t bucket_index) noexcept