Barretenberg
The ZK-SNARK library at the core of Aztec
Loading...
Searching...
No Matches
element_impl.hpp
Go to the documentation of this file.
1// === AUDIT STATUS ===
2// internal: { status: Planned, auditors: [], commit: }
3// external_1: { status: not started, auditors: [], commit: }
4// external_2: { status: not started, auditors: [], commit: }
5// =====================
6
7#pragma once
12#include "element.hpp"
13#include <cstdint>
14
15// NOLINTBEGIN(readability-implicit-bool-conversion, cppcoreguidelines-avoid-c-arrays)
16namespace bb::group_elements {
17template <class Fq, class Fr, class T>
18constexpr element<Fq, Fr, T>::element(const Fq& a, const Fq& b, const Fq& c) noexcept
19 : x(a)
20 , y(b)
21 , z(c)
22{}
23
24template <class Fq, class Fr, class T>
26 : x(other.x)
27 , y(other.y)
28 , z(other.z)
29{}
30
31template <class Fq, class Fr, class T>
33 : x(other.x)
34 , y(other.y)
35 , z(other.z)
36{}
37
38template <class Fq, class Fr, class T>
40 : x(other.x)
41 , y(other.y)
42 , z(Fq::one())
43{}
44
45template <class Fq, class Fr, class T>
47{
48 if (this == &other) {
49 return *this;
50 }
51 x = other.x;
52 y = other.y;
53 z = other.z;
54 return *this;
55}
56
57template <class Fq, class Fr, class T>
59{
60 x = other.x;
61 y = other.y;
62 z = other.z;
63 return *this;
64}
65
66template <class Fq, class Fr, class T> constexpr element<Fq, Fr, T>::operator affine_element<Fq, Fr, T>() const noexcept
67{
68 if (is_point_at_infinity()) {
70 result.x = Fq(0);
71 result.y = Fq(0);
72 result.self_set_infinity();
73 return result;
74 }
75 Fq z_inv = z.invert();
76 Fq zz_inv = z_inv.sqr();
77 Fq zzz_inv = zz_inv * z_inv;
78 affine_element<Fq, Fr, T> result(x * zz_inv, y * zzz_inv);
79 return result;
80}
81
82template <class Fq, class Fr, class T> constexpr void element<Fq, Fr, T>::self_dbl() noexcept
83{
84 if constexpr (Fq::modulus.data[3] >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) {
85 if (is_point_at_infinity()) {
86 return;
87 }
88 } else {
89 if (x.is_msb_set_word()) {
90 return;
91 }
92 }
93
94 // T0 = x*x
95 Fq T0 = x.sqr();
96
97 // T1 = y*y
98 Fq T1 = y.sqr();
99
100 // T2 = T1*T1 = y*y*y*y
101 Fq T2 = T1.sqr();
102
103 // T1 = T1 + x = x + y*y
104 T1 += x;
105
106 // T1 = T1 * T1
107 T1.self_sqr();
108
109 // T3 = T0 + T2 = xx + y*y*y*y
110 Fq T3 = T0 + T2;
111
112 // T1 = T1 - T3 = x*x + y*y*y*y + 2*x*x*y*y*y*y - x*x - y*y*y*y = 2*x*x*y*y*y*y = 2*S
113 T1 -= T3;
114
115 // T1 = 2T1 = 4*S
116 T1 += T1;
117
118 // T3 = 3T0
119 T3 = T0 + T0;
120 T3 += T0;
121 if constexpr (T::has_a) {
122 T3 += (T::a * z.sqr().sqr());
123 }
124
125 // z2 = 2*y*z
126 z += z;
127 z *= y;
128
129 // T0 = 2T1
130 T0 = T1 + T1;
131
132 // x2 = T3*T3
133 x = T3.sqr();
134
135 // x2 = x2 - 2T1
136 x -= T0;
137
138 // T2 = 8T2
139 T2 += T2;
140 T2 += T2;
141 T2 += T2;
142
143 // y2 = T1 - x2
144 y = T1 - x;
145
146 // y2 = y2 * T3 - T2
147 y *= T3;
148 y -= T2;
149}
150
151template <class Fq, class Fr, class T> constexpr element<Fq, Fr, T> element<Fq, Fr, T>::dbl() const noexcept
152{
153 element result(*this);
154 result.self_dbl();
155 return result;
156}
157
158template <class Fq, class Fr, class T>
160{
161 if constexpr (Fq::modulus.data[3] >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) {
162 // If either point is infinity, return the other point
163 if (other.is_point_at_infinity()) {
164 return *this;
165 }
166 if (is_point_at_infinity()) {
167 *this = { other.x, other.y, Fq::one() };
168 return *this;
169 }
170 } else {
171 const bool edge_case_trigger = x.is_msb_set() || other.x.is_msb_set();
172 if (edge_case_trigger) {
173 if (x.is_msb_set()) {
174 *this = { other.x, other.y, Fq::one() };
175 }
176 return *this;
177 }
178 }
179
180 // T0 = z1.z1
181 Fq T0 = z.sqr();
182
183 // T1 = x2.t0 - x1 = x2.z1.z1 - x1
184 Fq T1 = other.x * T0;
185 T1 -= x;
186
187 // T2 = T0.z1 = z1.z1.z1
188 // T2 = T2.y2 - y1 = y2.z1.z1.z1 - y1
189 Fq T2 = z * T0;
190 T2 *= other.y;
191 T2 -= y;
192
193 if (__builtin_expect(T1.is_zero(), 0)) {
194 if (T2.is_zero()) {
195 self_dbl();
196 return *this;
197 }
198 self_set_infinity();
199 return *this;
200 }
201
202 // T2 = 2T2 = 2(y2.z1.z1.z1 - y1) = R
203 // z3 = z1 + H
204 T2 += T2;
205 z += T1;
206
207 // T3 = T1*T1 = HH
208 Fq T3 = T1.sqr();
209
210 // z3 = z3 - z1z1 - HH
211 T0 += T3;
212
213 // z3 = (z1 + H)*(z1 + H)
214 z.self_sqr();
215 z -= T0;
216
217 // T3 = 4HH
218 T3 += T3;
219 T3 += T3;
220
221 // T1 = T1*T3 = 4HHH
222 T1 *= T3;
223
224 // T3 = T3 * x1 = 4HH*x1
225 T3 *= x;
226
227 // T0 = 2T3
228 T0 = T3 + T3;
229
230 // T0 = T0 + T1 = 2(4HH*x1) + 4HHH
231 T0 += T1;
232 x = T2.sqr();
233
234 // x3 = x3 - T0 = R*R - 8HH*x1 -4HHH
235 x -= T0;
236
237 // T3 = T3 - x3 = 4HH*x1 - x3
238 T3 -= x;
239
240 T1 *= y;
241 T1 += T1;
242
243 // T3 = T2 * T3 = R*(4HH*x1 - x3)
244 T3 *= T2;
245
246 // y3 = T3 - T1
247 y = T3 - T1;
248 return *this;
249}
250
251template <class Fq, class Fr, class T>
252constexpr element<Fq, Fr, T> element<Fq, Fr, T>::operator+(const affine_element<Fq, Fr, T>& other) const noexcept
253{
254 element result(*this);
255 return (result += other);
256}
257
258template <class Fq, class Fr, class T>
259constexpr element<Fq, Fr, T> element<Fq, Fr, T>::operator-=(const affine_element<Fq, Fr, T>& other) noexcept
260{
261 const affine_element<Fq, Fr, T> to_add{ other.x, -other.y };
262 return operator+=(to_add);
263}
264
265template <class Fq, class Fr, class T>
266constexpr element<Fq, Fr, T> element<Fq, Fr, T>::operator-(const affine_element<Fq, Fr, T>& other) const noexcept
267{
268 element result(*this);
269 return (result -= other);
270}
271
272template <class Fq, class Fr, class T>
274{
275 if constexpr (Fq::modulus.data[3] >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) {
276 bool p1_zero = is_point_at_infinity();
277 bool p2_zero = other.is_point_at_infinity();
278 if (__builtin_expect((p1_zero || p2_zero), 0)) {
279 if (p1_zero && !p2_zero) {
280 *this = other;
281 return *this;
282 }
283 if (p2_zero && !p1_zero) {
284 return *this;
285 }
286 self_set_infinity();
287 return *this;
288 }
289 } else {
290 bool p1_zero = x.is_msb_set();
291 bool p2_zero = other.x.is_msb_set();
292 if (__builtin_expect((p1_zero || p2_zero), 0)) {
293 if (p1_zero && !p2_zero) {
294 *this = other;
295 return *this;
296 }
297 if (p2_zero && !p1_zero) {
298 return *this;
299 }
300 self_set_infinity();
301 return *this;
302 }
303 }
304 Fq Z1Z1(z.sqr());
305 Fq Z2Z2(other.z.sqr());
306 Fq S2(Z1Z1 * z);
307 Fq U2(Z1Z1 * other.x);
308 S2 *= other.y;
309 Fq U1(Z2Z2 * x);
310 Fq S1(Z2Z2 * other.z);
311 S1 *= y;
312
313 Fq F(S2 - S1);
314
315 Fq H(U2 - U1);
316
317 if (__builtin_expect(H.is_zero(), 0)) {
318 if (F.is_zero()) {
319 self_dbl();
320 return *this;
321 }
322 self_set_infinity();
323 return *this;
324 }
325
326 F += F;
327
328 Fq I(H + H);
329 I.self_sqr();
330
331 Fq J(H * I);
332
333 U1 *= I;
334
335 U2 = U1 + U1;
336 U2 += J;
337
338 x = F.sqr();
339
340 x -= U2;
341
342 J *= S1;
343 J += J;
344
345 y = U1 - x;
346
347 y *= F;
348
349 y -= J;
350
351 z += other.z;
352
353 Z1Z1 += Z2Z2;
354
355 z.self_sqr();
356 z -= Z1Z1;
357 z *= H;
358 return *this;
359}
360
361template <class Fq, class Fr, class T>
362constexpr element<Fq, Fr, T> element<Fq, Fr, T>::operator+(const element& other) const noexcept
363{
364 element result(*this);
365 return (result += other);
366}
367
368template <class Fq, class Fr, class T>
370{
371 const element to_add{ other.x, -other.y, other.z };
372 return operator+=(to_add);
373}
374
375template <class Fq, class Fr, class T>
376constexpr element<Fq, Fr, T> element<Fq, Fr, T>::operator-(const element& other) const noexcept
377{
378 element result(*this);
379 return (result -= other);
380}
381
382template <class Fq, class Fr, class T> constexpr element<Fq, Fr, T> element<Fq, Fr, T>::operator-() const noexcept
383{
384 return { x, -y, z };
385}
386
387template <class Fq, class Fr, class T>
389{
390 if constexpr (T::USE_ENDOMORPHISM) {
391 return mul_with_endomorphism(exponent);
392 }
393 return mul_without_endomorphism(exponent);
394}
395
396template <class Fq, class Fr, class T> element<Fq, Fr, T> element<Fq, Fr, T>::operator*=(const Fr& exponent) noexcept
397{
398 *this = operator*(exponent);
399 return *this;
400}
401
402template <class Fq, class Fr, class T>
404{
405 if (engine == nullptr) {
407 }
408
409 // Convert the scalar to canonical u256 form
410 const uint256_t k = uint256_t(scalar);
411
412 // Coron's first DPA countermeasure (J.-S. Coron, "Resistance against Differential Power Analysis
413 // for Elliptic Curve Cryptosystems", CHES 1999, LNCS 1717, pp. 292-302, Section 5.1): blind the
414 // scalar with k' = k + r * n where r is a fresh random 64-bit value sampled per call. Since
415 // n * P = O for any P in the prime-order subgroup, k' * P = k * P. The randomization defeats
416 // DPA: per-bit traces of two signings with the same k decorrelate because the bit pattern of k'
417 // differs across calls.
418 //
419 // We force the high bit of r to be 1 so that r is sampled uniformly from [2^63, 2^64). This
420 // guarantees r * n has a fixed-width range (MSB at position M+63 or M+64 for n with MSB at M),
421 // so the iteration count remains exactly NUM_BITS regardless of the sampled r.
422 const uint64_t r = engine->get_random_uint64() | (UINT64_C(1) << 63);
424 const uint512_t k_blinded = uint512_t(k) + r_times_n;
425
426 // For n with MSB at position M, r * n < 2^(M + 65), so k_blinded < 2^(M + 65) + n < 2^(M + 66).
427 // Iterating M+65 bits is safe because k < n means the additional bit from k cannot push k_blinded
428 // past 2^(M + 65) when n is at the lower end of [2^M, 2^(M+1)); we add one extra bit (M + 66
429 // total) to cover the worst case where n is close to 2^(M+1).
430 constexpr size_t NUM_BITS = static_cast<size_t>(uint256_t(Fr::modulus).get_msb()) + 66;
431
432 // Constant-time conditional swap of two Fq coordinates. `mask` is 0 (no swap) or all-ones (swap),
433 // derived from the secret bit via integer subtraction so no branch is emitted.
434 auto cs_fq = [](Fq& a, Fq& b, uint64_t mask) {
435 constexpr size_t NUM_LIMBS = sizeof(Fq) / sizeof(uint64_t);
436 for (size_t i = 0; i < NUM_LIMBS; ++i) {
437 uint64_t t = mask & (a.data[i] ^ b.data[i]);
438 a.data[i] ^= t;
439 b.data[i] ^= t;
440 }
441 };
442 auto cswap = [&cs_fq](element& a, element& b, uint64_t mask) {
443 cs_fq(a.x, b.x, mask);
444 cs_fq(a.y, b.y, mask);
445 cs_fq(a.z, b.z, mask);
446 };
447
448 // Montgomery ladder. Invariant after each iteration: R1 - R0 = P.
449 // Once R0 first becomes non-infinity (after the first 1-bit of k_blinded is processed), the
450 // invariant guarantees R0 + R1 and 2 * R0 do not hit the doubling/infinity special-case branches.
452 element R1(*this);
453
454 for (size_t i = NUM_BITS; i-- > 0;) {
455 const uint64_t mask = 0ULL - static_cast<uint64_t>(k_blinded.get_bit(i));
456 cswap(R0, R1, mask);
457 R1 = R0 + R1;
458 R0 = R0.dbl();
459 cswap(R0, R1, mask);
460 }
461 return R0;
462}
463
464template <class Fq, class Fr, class T> constexpr element<Fq, Fr, T> element<Fq, Fr, T>::normalize() const noexcept
465{
466 const affine_element<Fq, Fr, T> converted = *this;
467 return element(converted);
468}
469
470template <class Fq, class Fr, class T> element<Fq, Fr, T> element<Fq, Fr, T>::infinity()
471{
474 return e;
475}
476
477template <class Fq, class Fr, class T> constexpr element<Fq, Fr, T> element<Fq, Fr, T>::set_infinity() const noexcept
478{
479 element result(*this);
480 result.self_set_infinity();
481 return result;
482}
483
484template <class Fq, class Fr, class T> constexpr void element<Fq, Fr, T>::self_set_infinity() noexcept
485{
486 if constexpr (Fq::modulus.data[3] >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) {
487 // We set the value of x equal to modulus to represent inifinty
488 x.data[0] = Fq::modulus.data[0];
489 x.data[1] = Fq::modulus.data[1];
490 x.data[2] = Fq::modulus.data[2];
491 x.data[3] = Fq::modulus.data[3];
492
493 // Clear y and z so the infinity representation is canonical regardless of prior state
494 y = Fq::zero();
495 z = Fq::zero();
496 } else {
497 (*this).x = Fq::zero();
498 (*this).y = Fq::zero();
499 (*this).z = Fq::zero();
500 x.self_set_msb();
501 }
502}
503
504template <class Fq, class Fr, class T> constexpr bool element<Fq, Fr, T>::is_point_at_infinity() const noexcept
505{
506 if constexpr (Fq::modulus.data[3] >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) {
507 // We check if the value of x is equal to modulus to represent inifinty
508 return ((x.data[0] ^ Fq::modulus.data[0]) | (x.data[1] ^ Fq::modulus.data[1]) |
509 (x.data[2] ^ Fq::modulus.data[2]) | (x.data[3] ^ Fq::modulus.data[3])) == 0;
510 } else {
511 return (x.is_msb_set());
512 }
513}
514
515template <class Fq, class Fr, class T> constexpr bool element<Fq, Fr, T>::on_curve() const noexcept
516{
517 if (is_point_at_infinity()) {
518 return true;
519 }
520 // We specify the point at inifinity not by (0 \lambda 0), so z should not be 0
521 if (z.is_zero()) {
522 return false;
523 }
524 Fq zz = z.sqr();
525 Fq zzzz = zz.sqr();
526 Fq bz_6 = zzzz * zz * T::b;
527 if constexpr (T::has_a) {
528 bz_6 += (x * T::a) * zzzz;
529 }
530 Fq xxx = x.sqr() * x + bz_6;
531 Fq yy = y.sqr();
532 return (xxx == yy);
533}
534
535template <class Fq, class Fr, class T>
536constexpr bool element<Fq, Fr, T>::operator==(const element& other) const noexcept
537{
538 // If one of points is not on curve, we have no business comparing them.
539 if ((!on_curve()) || (!other.on_curve())) {
540 return false;
541 }
542 bool am_infinity = is_point_at_infinity();
543 bool is_infinity = other.is_point_at_infinity();
544 bool both_infinity = am_infinity && is_infinity;
545 // If just one is infinity, then they are obviously not equal.
546 if ((!both_infinity) && (am_infinity || is_infinity)) {
547 return false;
548 }
549 const Fq lhs_zz = z.sqr();
550 const Fq lhs_zzz = lhs_zz * z;
551 const Fq rhs_zz = other.z.sqr();
552 const Fq rhs_zzz = rhs_zz * other.z;
553
554 const Fq lhs_x = x * rhs_zz;
555 const Fq lhs_y = y * rhs_zzz;
556
557 const Fq rhs_x = other.x * lhs_zz;
558 const Fq rhs_y = other.y * lhs_zzz;
559 return both_infinity || ((lhs_x == rhs_x) && (lhs_y == rhs_y));
560}
561
562template <class Fq, class Fr, class T>
564{
565 if constexpr (T::can_hash_to_curve) {
566 element result = random_coordinates_on_curve(engine);
567 result.z = Fq::random_element(engine);
568 Fq zz = result.z.sqr();
569 Fq zzz = zz * result.z;
570 result.x *= zz;
571 result.y *= zzz;
572 return result;
573 } else {
574 Fr scalar = Fr::random_element(engine);
575 return (element{ T::one_x, T::one_y, Fq::one() } * scalar);
576 }
577}
578
579template <class Fq, class Fr, class T>
581{
582 const uint256_t converted_scalar(scalar);
583
584 if (converted_scalar == 0) {
585 return element::infinity();
586 }
587
588 element accumulator(*this);
589 const uint64_t maximum_set_bit = converted_scalar.get_msb();
590 // NOT constant-time: the loop bound leaks bit-length and the per-bit branch leaks Hamming
591 // weight. This is acceptable only for public scalars; secret scalars must go through
592 // mul_const_time.
593 for (uint64_t i = maximum_set_bit - 1; i < maximum_set_bit; --i) {
594 accumulator.self_dbl();
595 if (converted_scalar.get_bit(i)) {
596 accumulator += *this;
597 }
598 }
599 return accumulator;
600}
601
602namespace detail {
603// Represents the result of
605
614template <typename Element, std::size_t NUM_ROUNDS> struct EndomorphismWnaf {
615 // NUM_WNAF_BITS: Number of bits per window in the WNAF representation.
616 static constexpr size_t NUM_WNAF_BITS = 4;
617 // table: Stores the WNAF representation of the scalars.
618 std::array<uint64_t, NUM_ROUNDS * 2> table;
619 // skew and endo_skew: Indicate if our original scalar is even or odd.
620 bool skew = false;
621 bool endo_skew = false;
622
627 {
628 wnaf::fixed_wnaf(&scalars.first[0], &table[0], skew, 0, 2, NUM_WNAF_BITS);
629 wnaf::fixed_wnaf(&scalars.second[0], &table[1], endo_skew, 0, 2, NUM_WNAF_BITS);
630 }
631};
632
633} // namespace detail
634
635template <class Fq, class Fr, class T>
637{
638 // Consider the infinity flag, return infinity if set
639 if (is_point_at_infinity()) {
640 return element::infinity();
641 }
642 constexpr size_t NUM_ROUNDS = 32;
643 const Fr converted_scalar = scalar.from_montgomery_form();
644
645 if (converted_scalar.is_zero()) {
646 return element::infinity();
647 }
648 static constexpr size_t LOOKUP_SIZE = 8;
650
651 element d2 = dbl();
652 lookup_table[0] = element(*this);
653 for (size_t i = 1; i < LOOKUP_SIZE; ++i) {
654 lookup_table[i] = lookup_table[i - 1] + d2;
655 }
656
657 detail::EndoScalars endo_scalars = Fr::split_into_endomorphism_scalars(converted_scalar);
659 element accumulator{ T::one_x, T::one_y, Fq::one() };
660 accumulator.self_set_infinity();
661 Fq beta = Fq::cube_root_of_unity();
662
663 for (size_t i = 0; i < NUM_ROUNDS * 2; ++i) {
664 uint64_t wnaf_entry = wnaf.table[i];
665 uint64_t index = wnaf_entry & 0x0fffffffU;
666 bool sign = static_cast<bool>((wnaf_entry >> 31) & 1);
667 const bool is_odd = ((i & 1) == 1);
668 auto to_add = lookup_table[static_cast<size_t>(index)];
669 to_add.y.self_conditional_negate(sign ^ is_odd);
670 if (is_odd) {
671 to_add.x *= beta;
672 }
673 accumulator += to_add;
674
675 if (i != ((2 * NUM_ROUNDS) - 1) && is_odd) {
676 for (size_t j = 0; j < 4; ++j) {
677 accumulator.self_dbl();
678 }
679 }
680 }
681
682 if (wnaf.skew) {
683 accumulator += -lookup_table[0];
684 }
685 if (wnaf.endo_skew) {
686 accumulator += element{ lookup_table[0].x * beta, lookup_table[0].y, lookup_table[0].z };
687 }
688
689 return accumulator;
690}
691
706template <typename AffineElement, typename Fq>
707__attribute__((always_inline)) inline void batch_affine_add_impl(const AffineElement* lhs,
708 AffineElement* rhs,
709 const size_t num_pairs,
710 Fq* scratch_space) noexcept
711{
713
714 // Forward pass: prepare batch inversion
715 for (size_t i = 0; i < num_pairs; ++i) {
716 scratch_space[i] = lhs[i].x + rhs[i].x;
717 rhs[i].x -= lhs[i].x;
718 rhs[i].y -= lhs[i].y;
721 }
722
724 throw_or_abort("attempted to invert zero in batch_affine_add_impl");
725 }
727
728 // Backward pass: compute additions
729 for (size_t i = num_pairs - 1; i < num_pairs; --i) {
730 // lambda = (y2 - y1) / (x2 - x1)
733 rhs[i].x = rhs[i].y.sqr();
734 rhs[i].x -= scratch_space[i]; // x3 = lambda^2 - (x1 + x2)
735
736 // y3 = lambda * (x1 - x3) - y1
737 Fq temp = lhs[i].x - rhs[i].x;
738 temp *= rhs[i].y;
739 rhs[i].y = temp - lhs[i].y;
740 }
741}
742
754template <typename AffineElement, typename Fq>
755__attribute__((always_inline)) inline void batch_affine_add_interleaved(AffineElement* points,
756 const size_t num_points,
757 Fq* scratch_space) noexcept
758{
760
761 // Forward pass: accumulate (x2 - x1) products for batch inversion
762 for (size_t i = 0; i < num_points; i += 2) {
763 scratch_space[i >> 1] = points[i].x + points[i + 1].x; // x1 + x2 (saved for later)
764 points[i + 1].x -= points[i].x; // x2 - x1
765 points[i + 1].y -= points[i].y; // y2 - y1
766 points[i + 1].y *= batch_inversion_accumulator;
767 batch_inversion_accumulator *= points[i + 1].x;
768 }
769
771 throw_or_abort("attempted to invert zero in batch_affine_add_interleaved");
772 }
774
775 // Backward pass: complete inversions and compute additions
776 for (size_t i = num_points - 2; i < num_points; i -= 2) {
777 // lambda = (y2 - y1) / (x2 - x1)
778 points[i + 1].y *= batch_inversion_accumulator;
779 batch_inversion_accumulator *= points[i + 1].x;
780 points[i + 1].x = points[i + 1].y.sqr();
781 // x3 = lambda^2 - (x1 + x2)
782 points[(i + num_points) >> 1].x = points[i + 1].x - scratch_space[i >> 1];
783
784 if (i >= 2) {
785 __builtin_prefetch(points + i - 2);
786 __builtin_prefetch(points + i - 1);
787 __builtin_prefetch(points + ((i + num_points - 2) >> 1));
788 __builtin_prefetch(scratch_space + ((i - 2) >> 1));
789 }
790
791 // y3 = lambda * (x1 - x3) - y1
792 points[i].x -= points[(i + num_points) >> 1].x;
793 points[i].x *= points[i + 1].y;
794 points[(i + num_points) >> 1].y = points[i].x - points[i].y;
795 }
796}
797
812template <typename AffineElement, typename Fq, typename T>
813__attribute__((always_inline)) inline void batch_affine_double_impl(AffineElement* points,
814 const size_t num_points,
815 Fq* scratch_space) noexcept
816{
818
819 // Forward pass: prepare batch inversion
820 for (size_t i = 0; i < num_points; ++i) {
821 scratch_space[i] = points[i].x.sqr();
822 if constexpr (T::has_a) {
823 scratch_space[i] += T::a; // adjust slope in numerator
824 }
825 scratch_space[i] = scratch_space[i] + scratch_space[i] + scratch_space[i];
826 scratch_space[i] *= batch_inversion_accumulator;
827 batch_inversion_accumulator *= (points[i].y + points[i].y);
828 }
829
831 throw_or_abort("attempted to invert zero in batch_affine_double_impl");
832 }
834
835 // Backward pass: compute doublings
837 for (size_t i_plus_1 = num_points; i_plus_1 > 0; --i_plus_1) {
838 size_t i = i_plus_1 - 1;
839
840 scratch_space[i] *= batch_inversion_accumulator;
841 batch_inversion_accumulator *= (points[i].y + points[i].y);
842
843 temp_x = points[i].x;
844 points[i].x = scratch_space[i].sqr() - (points[i].x + points[i].x);
845 points[i].y = scratch_space[i] * (temp_x - points[i].x) - points[i].y;
846 }
847}
848
860template <class Fq, class Fr, class T>
862 const std::span<affine_element<Fq, Fr, T>>& second_group,
863 const std::span<affine_element<Fq, Fr, T>>& results) noexcept
864{
866 const size_t num_points = first_group.size();
867 BB_ASSERT_EQ(second_group.size(), first_group.size());
868
869 // Space for temporary values
870 std::vector<Fq> scratch_space(num_points);
871
873 num_points, [&](size_t i) { results[i] = first_group[i]; }, thread_heuristics::FF_COPY_COST * 2);
874
875 // Perform batch affine addition: (lhs[i], rhs[i]) -> rhs[i]
878 [&](size_t start, size_t end, BB_UNUSED size_t chunk_index) {
879 batch_affine_add_impl<affine_element, Fq>(
880 &second_group[start], &results[start], end - start, &scratch_space[start]);
881 },
883}
884
895template <class Fq, class Fr, class T>
897 const std::span<const affine_element<Fq, Fr, T>>& points, const Fr& scalar) noexcept
898{
899 BB_BENCH();
901 const size_t num_points = points.size();
902
903 // Space for temporary values
904 std::vector<Fq> scratch_space(num_points);
905
906 // We compute the resulting point through WNAF by evaluating (the (\sum_i (16ⁱ⋅
907 // (a_i ∈ {-15,-13,-11,-9,-7,-5,-3,-1,1,3,5,7,9,11,13,15}))) - skew), where skew is 0 or 1. The result of the sum is
908 // always odd and skew is used to reconstruct an even scalar. This means that to construct scalar p-1, where p is
909 // the order of the scalar field, we first compute p through the sums and then subtract -1. Howver, since we are
910 // computing p⋅Point, we get a point at infinity, which is an edgecase, and we don't want to handle edgecases in the
911 // hot loop since the slow the computation down. So it's better to just handle it here.
912 if (scalar == -Fr::one()) {
914 parallel_for_heuristic(num_points, [&](size_t i) { results[i] = -points[i]; }, thread_heuristics::FF_COPY_COST);
915 return results;
916 }
917 // Compute wnaf for scalar
918 const Fr converted_scalar = scalar.from_montgomery_form();
919
920 // If the scalar is zero, just set results to the point at infinity
921 if (converted_scalar.is_zero()) {
922 affine_element result{ Fq::zero(), Fq::zero() };
923 result.self_set_infinity();
925 parallel_for_heuristic(num_points, [&](size_t i) { results[i] = result; }, thread_heuristics::FF_COPY_COST);
926 return results;
927 }
928
929 constexpr size_t LOOKUP_SIZE = 8;
930 constexpr size_t NUM_ROUNDS = 32;
931
932 detail::EndoScalars endo_scalars = Fr::split_into_endomorphism_scalars(converted_scalar);
934
936 std::array<std::vector<affine_element>, LOOKUP_SIZE> lookup_table;
937 for (auto& table : lookup_table) {
938 table.resize(num_points);
939 }
940 std::vector<affine_element> temp_point_vector(num_points);
941
942 auto execute_range = [&](size_t start, size_t end) {
943 BB_BENCH_TRACY_NAME("batch_mul_with_endo/execute_range");
944 // Perform batch affine addition in parallel
945 const auto add_chunked = [&](const affine_element* lhs, affine_element* rhs) {
946 batch_affine_add_impl<affine_element, Fq>(&lhs[start], &rhs[start], end - start, &scratch_space[start]);
947 };
948
949 // Perform point doubling in parallel
950 const auto double_chunked = [&](affine_element* lhs) {
951 batch_affine_double_impl<affine_element, Fq, T>(&lhs[start], end - start, &scratch_space[start]);
952 };
953
954 // Initialize first entries in lookup table
955 for (size_t i = start; i < end; ++i) {
956 if (points[i].is_point_at_infinity()) {
957 temp_point_vector[i] = affine_element::one();
958 lookup_table[0][i] = affine_element::one();
959 } else {
960 temp_point_vector[i] = points[i];
961 lookup_table[0][i] = points[i];
962 }
963 }
964 // Costruct lookup table
965 double_chunked(&temp_point_vector[0]);
966 for (size_t j = 1; j < LOOKUP_SIZE; ++j) {
967 for (size_t i = start; i < end; ++i) {
968 lookup_table[j][i] = lookup_table[j - 1][i];
969 }
970 add_chunked(&temp_point_vector[0], &lookup_table[j][0]);
971 }
972
973 constexpr Fq beta = Fq::cube_root_of_unity();
974 uint64_t wnaf_entry = 0;
975 uint64_t index = 0;
976 bool sign = 0;
977 // Prepare elements for the first batch addition
978 for (size_t j = 0; j < 2; ++j) {
979 wnaf_entry = wnaf.table[j];
980 index = wnaf_entry & 0x0fffffffU;
981 sign = static_cast<bool>((wnaf_entry >> 31) & 1);
982 const bool is_odd = ((j & 1) == 1);
983 for (size_t i = start; i < end; ++i) {
984 auto to_add = lookup_table[static_cast<size_t>(index)][i];
985 to_add.y.self_conditional_negate(sign ^ is_odd);
986 if (is_odd) {
987 to_add.x *= beta;
988 }
989 if (j == 0) {
990 work_elements[i] = to_add;
991 } else {
992 temp_point_vector[i] = to_add;
993 }
994 }
995 }
996 add_chunked(&temp_point_vector[0], &work_elements[0]);
997 // Run through SM logic in wnaf form (excluding the skew)
998 for (size_t j = 2; j < NUM_ROUNDS * 2; ++j) {
999 wnaf_entry = wnaf.table[j];
1000 index = wnaf_entry & 0x0fffffffU;
1001 sign = static_cast<bool>((wnaf_entry >> 31) & 1);
1002 const bool is_odd = ((j & 1) == 1);
1003 if (!is_odd) {
1004 for (size_t k = 0; k < 4; ++k) {
1005 double_chunked(&work_elements[0]);
1006 }
1007 }
1008 for (size_t i = start; i < end; ++i) {
1009 auto to_add = lookup_table[static_cast<size_t>(index)][i];
1010 to_add.y.self_conditional_negate(sign ^ is_odd);
1011 if (is_odd) {
1012 to_add.x *= beta;
1013 }
1014 temp_point_vector[i] = to_add;
1015 }
1016 add_chunked(&temp_point_vector[0], &work_elements[0]);
1017 }
1018 // Apply skew for the first endo scalar
1019 // Use affine_element::operator+ (via Jacobian) to handle edge cases related to the point at infinity.
1020 if (wnaf.skew) {
1021 for (size_t i = start; i < end; ++i) {
1022 work_elements[i] = work_elements[i] + (-lookup_table[0][i]);
1023 }
1024 }
1025 // Apply skew for the second endo scalar
1026 if (wnaf.endo_skew) {
1027 for (size_t i = start; i < end; ++i) {
1028 affine_element endo_point = lookup_table[0][i];
1029 endo_point.x *= beta;
1030 work_elements[i] = work_elements[i] + endo_point;
1031 }
1032 }
1033 // Handle points at infinity explicitly
1034 for (size_t i = start; i < end; ++i) {
1035 work_elements[i] = points[i].is_point_at_infinity() ? work_elements[i].set_infinity() : work_elements[i];
1036 }
1037 };
1038 parallel_for_range(num_points, execute_range);
1039
1040 return work_elements;
1041}
1042
1043template <typename Fq, typename Fr, typename T>
1044void element<Fq, Fr, T>::batch_normalize(element* elements, const size_t num_elements) noexcept
1045{
1046 std::vector<Fq> temporaries;
1047 temporaries.reserve(num_elements * 2);
1048 Fq accumulator = Fq::one();
1049
1050 // Iterate over the points, computing the product of their z-coordinates.
1051 // At each iteration, store the currently-accumulated z-coordinate in `temporaries`
1052 for (size_t i = 0; i < num_elements; ++i) {
1053 temporaries.emplace_back(accumulator);
1054 if (!elements[i].is_point_at_infinity()) {
1055 accumulator *= elements[i].z;
1056 }
1057 }
1058 // For the rest of this method we refer to the product of all z-coordinates as the 'global' z-coordinate
1059 // Invert the global z-coordinate and store in `accumulator`
1060 accumulator = accumulator.invert();
1061
1084 for (size_t i = num_elements - 1; i < num_elements; --i) {
1085 if (!elements[i].is_point_at_infinity()) {
1086 Fq z_inv = accumulator * temporaries[i];
1087 Fq zz_inv = z_inv.sqr();
1088 elements[i].x *= zz_inv;
1089 elements[i].y *= (zz_inv * z_inv);
1090 accumulator *= elements[i].z;
1091 }
1092 elements[i].z = Fq::one();
1093 }
1094}
1095
1096template <typename Fq, typename Fr, typename T>
1097template <typename>
1099{
1100 bool found_one = false;
1101 Fq yy;
1102 Fq x;
1103 Fq y;
1104 while (!found_one) {
1106 yy = x.sqr() * x + T::b;
1107 if constexpr (T::has_a) {
1108 yy += (x * T::a);
1109 }
1110 auto [found_root, y1] = yy.sqrt();
1111 y = y1;
1112 found_one = found_root;
1113 }
1114 return { x, y, Fq::one() };
1115}
1116
1117} // namespace bb::group_elements
1118// NOLINTEND(readability-implicit-bool-conversion, cppcoreguidelines-avoid-c-arrays)
#define BB_ASSERT_EQ(actual, expected,...)
Definition assert.hpp:83
#define BB_BENCH_TRACY_NAME(name)
Definition bb_bench.hpp:256
#define BB_BENCH()
Definition bb_bench.hpp:268
constexpr bool is_point_at_infinity() const noexcept
constexpr void self_set_infinity() noexcept
static constexpr affine_element one() noexcept
element class. Implements ecc group arithmetic using Jacobian coordinates See https://hyperelliptic....
Definition element.hpp:35
element operator*=(const Fr &exponent) noexcept
BB_INLINE constexpr element set_infinity() const noexcept
element mul_with_endomorphism(const Fr &scalar) const noexcept
static std::vector< affine_element< Fq, Fr, Params > > batch_mul_with_endomorphism(const std::span< const affine_element< Fq, Fr, Params > > &points, const Fr &scalar) noexcept
Multiply each point by the same scalar.
constexpr element operator-=(const element &other) noexcept
constexpr element operator-() const noexcept
friend constexpr element operator+(const affine_element< Fq, Fr, Params > &left, const element &right) noexcept
Definition element.hpp:76
constexpr element dbl() const noexcept
constexpr element normalize() const noexcept
constexpr void self_dbl() noexcept
static element random_element(numeric::RNG *engine=nullptr) noexcept
static void batch_normalize(element *elements, size_t num_elements) noexcept
constexpr element operator+=(const element &other) noexcept
static void batch_affine_add(const std::span< affine_element< Fq, Fr, Params > > &first_group, const std::span< affine_element< Fq, Fr, Params > > &second_group, const std::span< affine_element< Fq, Fr, Params > > &results) noexcept
Pairwise affine add points in first and second group.
element mul_const_time(const Fr &scalar, numeric::RNG *engine=nullptr) const noexcept
Constant-time scalar multiplication intended for secret scalars (e.g. ECDSA / Schnorr nonces).
BB_INLINE constexpr bool on_curve() const noexcept
BB_INLINE constexpr bool operator==(const element &other) const noexcept
element operator*(const Fr &exponent) const noexcept
element() noexcept=default
static element random_coordinates_on_curve(numeric::RNG *engine=nullptr) noexcept
element mul_without_endomorphism(const Fr &scalar) const noexcept
constexpr element & operator=(const element &other) noexcept
BB_INLINE constexpr void self_set_infinity() noexcept
BB_INLINE constexpr bool is_point_at_infinity() const noexcept
constexpr bool get_bit(uint64_t bit_index) const
constexpr uint64_t get_msb() const
bool get_bit(uint64_t bit_index) const
#define BB_UNUSED
FF a
FF b
numeric::RNG & engine
std::pair< std::array< uint64_t, 2 >, std::array< uint64_t, 2 > > EndoScalars
__attribute__((always_inline)) inline void batch_affine_add_impl(const AffineElement *lhs
Batch affine addition for parallel arrays: (lhs[i], rhs[i]) → rhs[i].
AffineElement const size_t num_pairs
const size_t num_points
AffineElement * rhs
AffineElement const size_t Fq *scratch_space noexcept
uintx< uint256_t > uint512_t
Definition uintx.hpp:309
RNG & get_randomness()
Definition engine.cpp:258
std::conditional_t< IsGoblinBigGroup< C, Fq, Fr, G >, element_goblin::goblin_element< C, goblin_field< C >, Fr, G >, element_default::element< C, Fq, Fr, G > > element
element wraps either element_default::element or element_goblin::goblin_element depending on parametr...
constexpr size_t FF_COPY_COST
Definition thread.hpp:144
constexpr size_t FF_ADDITION_COST
Definition thread.hpp:132
constexpr size_t FF_MULTIPLICATION_COST
Definition thread.hpp:134
void fixed_wnaf(const uint64_t *scalar, uint64_t *wnaf, bool &skew_map, const uint64_t point_index, const uint64_t num_points, const size_t wnaf_bits) noexcept
Performs fixed-window non-adjacent form (WNAF) computation for scalar multiplication.
Definition wnaf.hpp:117
Univariate< Fr, domain_end > operator*(const Fr &ff, const Univariate< Fr, domain_end > &uv)
void parallel_for_heuristic(size_t num_points, const std::function< void(size_t, size_t, size_t)> &func, size_t heuristic_cost)
Split a loop into several loops running in parallel based on operations in 1 iteration.
Definition thread.cpp:171
void parallel_for_range(size_t num_points, const std::function< void(size_t, size_t)> &func, size_t no_multhreading_if_less_or_equal)
Split a loop into several loops running in parallel.
Definition thread.cpp:141
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
Definition tuple.hpp:13
grumpkin::fq Fq
static constexpr field cube_root_of_unity()
static constexpr field one()
static constexpr uint256_t modulus
static void split_into_endomorphism_scalars(const field &k, field &k1, field &k2)
Full-width endomorphism decomposition: k ≡ k1 - k2·λ (mod r). Modifies the field elements k1 and k2.
BB_INLINE constexpr void self_sqr() &noexcept
constexpr field invert() const noexcept
BB_INLINE constexpr bool is_msb_set() const noexcept
static field random_element(numeric::RNG *engine=nullptr) noexcept
BB_INLINE constexpr field sqr() const noexcept
BB_INLINE constexpr bool is_zero() const noexcept
BB_INLINE constexpr field from_montgomery_form() const noexcept
static constexpr field zero()
Handles the WNAF computation for scalars that are split using an endomorphism, achieved through split...
EndomorphismWnaf(const EndoScalars &scalars)
std::array< uint64_t, NUM_ROUNDS *2 > table
void throw_or_abort(std::string const &err)