91 const std::vector<MSM>& msms,
const uint32_t total_number_of_muls,
const size_t num_msm_rows)
123 const size_t num_rows_in_read_counts_table =
124 static_cast<size_t>(total_number_of_muls) *
125 (eccvm::POINT_TABLE_SIZE >> 1);
129 point_table_read_counts[0].reserve(num_rows_in_read_counts_table);
130 point_table_read_counts[1].reserve(num_rows_in_read_counts_table);
131 for (
size_t i = 0; i < num_rows_in_read_counts_table; ++i) {
132 point_table_read_counts[0].emplace_back(0);
133 point_table_read_counts[1].emplace_back(0);
136 const auto update_read_count = [&point_table_read_counts](
const size_t point_idx,
const int slice) {
146 const size_t row_index_offset = point_idx * 8;
149 const auto table_index =
static_cast<size_t>((
slice + 15) / 2);
150 point_table_read_counts[1][row_index_offset + table_index]++;
153 const auto table_index =
static_cast<size_t>((15 -
slice) / 2);
154 point_table_read_counts[0][row_index_offset + table_index]++;
159 std::vector<size_t> msm_row_counts;
160 msm_row_counts.reserve(msms.size() + 1);
161 msm_row_counts.push_back(1);
164 std::vector<size_t> pc_values;
165 pc_values.reserve(msms.size() + 1);
166 pc_values.push_back(total_number_of_muls);
167 for (
const auto& msm : msms) {
169 msm_row_counts.push_back(msm_row_counts.back() + num_rows_required);
170 pc_values.push_back(pc_values.back() - msm.size());
183 for (
size_t msm_idx = 0; msm_idx < msms.size(); ++msm_idx) {
185 auto pc =
static_cast<uint32_t
>(pc_values[msm_idx]);
186 const auto& msm = msms[msm_idx];
187 const size_t msm_size = msm.size();
188 const size_t num_rows_per_digit =
191 for (
size_t relative_row_idx = 0; relative_row_idx < num_rows_per_digit; ++relative_row_idx) {
192 const size_t num_points_in_row = (relative_row_idx + 1) *
ADDITIONS_PER_ROW > msm_size
196 for (
size_t relative_point_idx = 0; relative_point_idx <
ADDITIONS_PER_ROW; ++relative_point_idx) {
197 const size_t point_idx =
offset + relative_point_idx;
198 const bool add = num_points_in_row > relative_point_idx;
200 int slice = msm[point_idx].wnaf_digits[digit_idx];
202 update_read_count((total_number_of_muls - pc) + point_idx,
slice);
209 for (
size_t row_idx = 0; row_idx < num_rows_per_digit; ++row_idx) {
215 ++relative_point_idx) {
216 bool add = num_points_in_row > relative_point_idx;
217 const size_t point_idx =
offset + relative_point_idx;
224 int slice = msm[point_idx].wnaf_skew ? -1 : -15;
225 update_read_count((total_number_of_muls - pc) + point_idx,
slice);
243 const size_t num_point_adds_and_doubles =
244 (num_msm_rows - 2) * 4;
251 const size_t num_accumulators = num_msm_rows - 1;
255 static constexpr size_t NUM_POINTS_IN_ADDITION_RELATION = 3;
256 const size_t num_points_to_normalize =
257 (num_point_adds_and_doubles * NUM_POINTS_IN_ADDITION_RELATION) + num_accumulators;
258 std::vector<Element> points_to_normalize(num_points_to_normalize);
260 std::span<Element> p2_trace(&points_to_normalize[num_point_adds_and_doubles], num_point_adds_and_doubles);
261 std::span<Element> p3_trace(&points_to_normalize[num_point_adds_and_doubles * 2], num_point_adds_and_doubles);
265 std::vector<bool> is_double_or_add(num_point_adds_and_doubles);
267 std::span<Element> accumulator_trace(&points_to_normalize[num_point_adds_and_doubles * 3], num_accumulators);
271 accumulator_trace[0] = offset_generator;
276 for (
size_t msm_idx = 0; msm_idx < msms.size(); msm_idx++) {
277 Element accumulator = offset_generator;
278 const auto& msm = msms[msm_idx];
279 size_t msm_row_index = msm_row_counts[msm_idx];
280 const size_t msm_size = msm.size();
281 const size_t num_rows_per_digit =
288 (msm_row_counts[msm_idx] - 1) * 4;
295 const auto pc =
static_cast<uint32_t
>(pc_values[msm_idx]);
297 for (
size_t row_idx = 0; row_idx < num_rows_per_digit; ++row_idx) {
301 auto& row = msm_rows[msm_row_index];
303 row.msm_transition = (digit_idx == 0) && (row_idx == 0);
306 auto& add_state = row.add_state[point_idx];
307 add_state.add = num_points_in_row > point_idx;
308 int slice = add_state.add ? msm[
offset + point_idx].wnaf_digits[digit_idx] : 0;
318 add_state.slice = add_state.add ? (
slice + 15) / 2 : 0;
321 ? msm[
offset + point_idx].precomputed_table[
static_cast<size_t>(add_state.slice)]
326 accumulator = add_state.add ? (accumulator + add_state.point) :
Element(p1);
327 p1_trace[trace_index] = p1;
328 p2_trace[trace_index] = p2;
329 p3_trace[trace_index] = accumulator;
330 is_double_or_add[trace_index] =
false;
334 accumulator_trace[msm_row_index] = accumulator;
336 row.q_double =
false;
338 row.msm_round =
static_cast<uint32_t
>(digit_idx);
339 row.msm_size =
static_cast<uint32_t
>(msm_size);
340 row.msm_count =
static_cast<uint32_t
>(
offset);
350 auto& row = msm_rows[msm_row_index];
351 row.msm_transition =
false;
352 row.msm_round =
static_cast<uint32_t
>(digit_idx + 1);
353 row.msm_size =
static_cast<uint32_t
>(msm_size);
354 row.msm_count =
static_cast<uint32_t
>(0);
359 auto& add_state = row.add_state[point_idx];
360 add_state.add =
false;
362 add_state.point = { 0, 0 };
363 add_state.collision_inverse = 0;
365 p1_trace[trace_index] = accumulator;
366 p2_trace[trace_index] = accumulator;
367 accumulator = accumulator.dbl();
368 p3_trace[trace_index] = accumulator;
369 is_double_or_add[trace_index] =
true;
372 accumulator_trace[msm_row_index] = accumulator;
376 for (
size_t row_idx = 0; row_idx < num_rows_per_digit; ++row_idx) {
377 auto& row = msm_rows[msm_row_index];
383 row.msm_transition =
false;
384 Element acc_expected = accumulator;
386 auto& add_state = row.add_state[point_idx];
387 add_state.add = num_points_in_row > point_idx;
388 add_state.slice = add_state.add ? msm[
offset + point_idx].wnaf_skew ? 7 : 0 : 0;
392 ? msm[
offset + point_idx].precomputed_table[
static_cast<size_t>(add_state.slice)]
397 bool add_predicate = add_state.add ? msm[
offset + point_idx].wnaf_skew :
false;
398 auto p1 = accumulator;
399 accumulator = add_predicate ? accumulator + add_state.point : accumulator;
400 p1_trace[trace_index] = p1;
401 p2_trace[trace_index] = add_state.point;
402 p3_trace[trace_index] = accumulator;
403 is_double_or_add[trace_index] =
false;
407 row.q_double =
false;
409 row.msm_round =
static_cast<uint32_t
>(digit_idx + 1);
410 row.msm_size =
static_cast<uint32_t
>(msm_size);
411 row.msm_count =
static_cast<uint32_t
>(
offset);
413 accumulator_trace[msm_row_index] = accumulator;
422 Element::batch_normalize(&points_to_normalize[start], end - start);
426 std::vector<FF> inverse_trace(num_point_adds_and_doubles);
427 if (num_point_adds_and_doubles > 0) {
429 for (
size_t operation_idx = start; operation_idx < end; ++operation_idx) {
430 if (is_double_or_add[operation_idx]) {
431 inverse_trace[operation_idx] = (p1_trace[operation_idx].y + p1_trace[operation_idx].y);
433 inverse_trace[operation_idx] = (p2_trace[operation_idx].x - p1_trace[operation_idx].x);
443 for (
size_t msm_idx = 0; msm_idx < msms.size(); msm_idx++) {
444 const auto& msm = msms[msm_idx];
446 size_t msm_row_index = msm_row_counts[msm_idx];
448 size_t accumulator_index = msm_row_counts[msm_idx] - 1;
449 const size_t msm_size = msm.size();
450 const size_t num_rows_per_digit =
454 for (
size_t row_idx = 0; row_idx < num_rows_per_digit; ++row_idx) {
455 auto& row = msm_rows[msm_row_index];
459 const Element& normalized_accumulator = accumulator_trace[accumulator_index];
460 BB_ASSERT_EQ(normalized_accumulator.is_point_at_infinity(), 0);
461 row.accumulator_x = normalized_accumulator.x;
462 row.accumulator_y = normalized_accumulator.y;
464 auto& add_state = row.add_state[point_idx];
466 const auto& inverse = inverse_trace[trace_index];
467 const auto& p1 = p1_trace[trace_index];
468 const auto& p2 = p2_trace[trace_index];
469 add_state.collision_inverse = add_state.add ? inverse : 0;
470 add_state.lambda = add_state.add ? (p2.y - p1.y) * inverse : 0;
480 MSMRow& row = msm_rows[msm_row_index];
481 const Element& normalized_accumulator = accumulator_trace[accumulator_index];
482 const FF& acc_x = normalized_accumulator.is_point_at_infinity() ? 0 : normalized_accumulator.x;
483 const FF& acc_y = normalized_accumulator.is_point_at_infinity() ? 0 : normalized_accumulator.y;
487 auto& add_state = row.
add_state[point_idx];
488 add_state.collision_inverse = 0;
489 const FF& dx = p1_trace[trace_index].x;
490 const FF& inverse = inverse_trace[trace_index];
491 add_state.lambda = ((dx + dx + dx) * dx) * inverse;
501 for (
size_t row_idx = 0; row_idx < num_rows_per_digit; ++row_idx) {
502 MSMRow& row = msm_rows[msm_row_index];
503 const Element& normalized_accumulator = accumulator_trace[accumulator_index];
504 BB_ASSERT_EQ(normalized_accumulator.is_point_at_infinity(), 0);
509 auto& add_state = row.
add_state[point_idx];
510 bool add_predicate = add_state.add ? msm[
offset + point_idx].wnaf_skew :
false;
512 const auto& inverse = inverse_trace[trace_index];
513 const auto& p1 = p1_trace[trace_index];
514 const auto& p2 = p2_trace[trace_index];
515 add_state.collision_inverse = add_predicate ? inverse : 0;
516 add_state.lambda = add_predicate ? (p2.y - p1.y) * inverse : 0;
529 Element final_accumulator(accumulator_trace.back());
530 MSMRow& final_row = msm_rows.back();
531 final_row.
pc =
static_cast<uint32_t
>(pc_values.back());
533 final_row.
accumulator_x = final_accumulator.is_point_at_infinity() ? 0 : final_accumulator.x;
534 final_row.
accumulator_y = final_accumulator.is_point_at_infinity() ? 0 : final_accumulator.y;
537 final_row.
q_add =
false;
545 return { msm_rows, point_table_read_counts };