17template <
class Fq,
class Fr,
class T>
24template <
class Fq,
class Fr,
class T>
31template <
class Fq,
class Fr,
class T>
38template <
class Fq,
class Fr,
class T>
45template <
class Fq,
class Fr,
class T>
57template <
class Fq,
class Fr,
class T>
68 if (is_point_at_infinity()) {
76 Fq zz_inv = z_inv.
sqr();
77 Fq zzz_inv = zz_inv * z_inv;
78 affine_element<Fq, Fr, T> result(x * zz_inv, y * zzz_inv);
84 if constexpr (
Fq::modulus.
data[3] >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) {
85 if (is_point_at_infinity()) {
89 if (x.is_msb_set_word()) {
121 if constexpr (T::has_a) {
122 T3 += (T::a * z.sqr().sqr());
158template <
class Fq,
class Fr,
class T>
161 if constexpr (
Fq::modulus.
data[3] >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) {
163 if (other.is_point_at_infinity()) {
166 if (is_point_at_infinity()) {
167 *
this = { other.
x, other.y,
Fq::one() };
171 const bool edge_case_trigger = x.
is_msb_set() || other.x.is_msb_set();
172 if (edge_case_trigger) {
173 if (x.is_msb_set()) {
174 *
this = { other.x, other.y,
Fq::one() };
184 Fq T1 = other.x * T0;
193 if (__builtin_expect(T1.
is_zero(), 0)) {
251template <
class Fq,
class Fr,
class T>
255 return (result += other);
258template <
class Fq,
class Fr,
class T>
261 const affine_element<Fq, Fr, T> to_add{ other.
x, -other.y };
262 return operator+=(to_add);
265template <
class Fq,
class Fr,
class T>
269 return (result -= other);
272template <
class Fq,
class Fr,
class T>
275 if constexpr (
Fq::modulus.
data[3] >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) {
276 bool p1_zero = is_point_at_infinity();
277 bool p2_zero = other.is_point_at_infinity();
278 if (__builtin_expect((p1_zero || p2_zero), 0)) {
279 if (p1_zero && !p2_zero) {
283 if (p2_zero && !p1_zero) {
290 bool p1_zero = x.is_msb_set();
291 bool p2_zero = other.x.is_msb_set();
292 if (__builtin_expect((p1_zero || p2_zero), 0)) {
293 if (p1_zero && !p2_zero) {
297 if (p2_zero && !p1_zero) {
305 Fq Z2Z2(other.z.sqr());
307 Fq U2(Z1Z1 * other.x);
310 Fq S1(Z2Z2 * other.z);
317 if (__builtin_expect(H.is_zero(), 0)) {
361template <
class Fq,
class Fr,
class T>
365 return (result += other);
368template <
class Fq,
class Fr,
class T>
371 const element to_add{ other.
x, -other.y, other.z };
372 return operator+=(to_add);
375template <
class Fq,
class Fr,
class T>
379 return (result -= other);
387template <
class Fq,
class Fr,
class T>
390 if constexpr (T::USE_ENDOMORPHISM) {
391 return mul_with_endomorphism(exponent);
393 return mul_without_endomorphism(exponent);
402template <
class Fq,
class Fr,
class T>
422 const uint64_t r =
engine->get_random_uint64() | (UINT64_C(1) << 63);
434 auto cs_fq = [](
Fq&
a,
Fq&
b, uint64_t mask) {
435 constexpr size_t NUM_LIMBS =
sizeof(
Fq) /
sizeof(uint64_t);
436 for (
size_t i = 0; i < NUM_LIMBS; ++i) {
443 cs_fq(
a.x,
b.x, mask);
444 cs_fq(
a.y,
b.y, mask);
445 cs_fq(
a.z,
b.z, mask);
454 for (
size_t i = NUM_BITS; i-- > 0;) {
455 const uint64_t mask = 0ULL -
static_cast<uint64_t
>(k_blinded.
get_bit(i));
486 if constexpr (
Fq::modulus.
data[3] >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) {
506 if constexpr (
Fq::modulus.
data[3] >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) {
511 return (x.is_msb_set());
517 if (is_point_at_infinity()) {
526 Fq bz_6 = zzzz * zz * T::b;
527 if constexpr (T::has_a) {
528 bz_6 += (x * T::a) * zzzz;
530 Fq xxx = x.
sqr() * x + bz_6;
535template <
class Fq,
class Fr,
class T>
539 if ((!on_curve()) || (!other.on_curve())) {
542 bool am_infinity = is_point_at_infinity();
543 bool is_infinity = other.is_point_at_infinity();
544 bool both_infinity = am_infinity && is_infinity;
546 if ((!both_infinity) && (am_infinity || is_infinity)) {
549 const Fq lhs_zz = z.sqr();
550 const Fq lhs_zzz = lhs_zz * z;
551 const Fq rhs_zz = other.z.sqr();
552 const Fq rhs_zzz = rhs_zz * other.z;
554 const Fq lhs_x = x * rhs_zz;
555 const Fq lhs_y = y * rhs_zzz;
557 const Fq rhs_x = other.x * lhs_zz;
558 const Fq rhs_y = other.y * lhs_zzz;
559 return both_infinity || ((lhs_x == rhs_x) && (lhs_y == rhs_y));
562template <
class Fq,
class Fr,
class T>
565 if constexpr (T::can_hash_to_curve) {
568 Fq zz = result.
z.sqr();
569 Fq zzz = zz * result.
z;
579template <
class Fq,
class Fr,
class T>
582 const uint256_t converted_scalar(scalar);
584 if (converted_scalar == 0) {
589 const uint64_t maximum_set_bit = converted_scalar.
get_msb();
593 for (uint64_t i = maximum_set_bit - 1; i < maximum_set_bit; --i) {
595 if (converted_scalar.
get_bit(i)) {
596 accumulator += *
this;
618 std::array<uint64_t, NUM_ROUNDS * 2>
table;
635template <
class Fq,
class Fr,
class T>
639 if (is_point_at_infinity()) {
642 constexpr size_t NUM_ROUNDS = 32;
645 if (converted_scalar.
is_zero()) {
648 static constexpr size_t LOOKUP_SIZE = 8;
652 lookup_table[0] =
element(*
this);
653 for (
size_t i = 1; i < LOOKUP_SIZE; ++i) {
654 lookup_table[i] = lookup_table[i - 1] + d2;
660 accumulator.self_set_infinity();
663 for (
size_t i = 0; i < NUM_ROUNDS * 2; ++i) {
664 uint64_t wnaf_entry = wnaf.table[i];
665 uint64_t
index = wnaf_entry & 0x0fffffffU;
666 bool sign =
static_cast<bool>((wnaf_entry >> 31) & 1);
667 const bool is_odd = ((i & 1) == 1);
668 auto to_add = lookup_table[
static_cast<size_t>(
index)];
669 to_add.y.self_conditional_negate(sign ^ is_odd);
673 accumulator += to_add;
675 if (i != ((2 * NUM_ROUNDS) - 1) && is_odd) {
676 for (
size_t j = 0; j < 4; ++j) {
677 accumulator.self_dbl();
683 accumulator += -lookup_table[0];
685 if (wnaf.endo_skew) {
686 accumulator +=
element{ lookup_table[0].
x * beta, lookup_table[0].y, lookup_table[0].z };
706template <
typename AffineElement,
typename Fq>
707__attribute__((always_inline))
inline void batch_affine_add_impl(
const AffineElement* lhs,
710 Fq* scratch_space)
noexcept
716 scratch_space[i] = lhs[i].x +
rhs[i].x;
717 rhs[i].x -= lhs[i].x;
718 rhs[i].y -= lhs[i].y;
724 throw_or_abort(
"attempted to invert zero in batch_affine_add_impl");
733 rhs[i].x =
rhs[i].y.sqr();
734 rhs[i].x -= scratch_space[i];
737 Fq temp = lhs[i].x -
rhs[i].x;
739 rhs[i].y = temp - lhs[i].y;
754template <
typename AffineElement,
typename Fq>
755__attribute__((always_inline))
inline void batch_affine_add_interleaved(AffineElement* points,
757 Fq* scratch_space)
noexcept
763 scratch_space[i >> 1] = points[i].x + points[i + 1].x;
764 points[i + 1].x -= points[i].x;
765 points[i + 1].y -= points[i].y;
771 throw_or_abort(
"attempted to invert zero in batch_affine_add_interleaved");
780 points[i + 1].x = points[i + 1].y.sqr();
782 points[(i +
num_points) >> 1].x = points[i + 1].x - scratch_space[i >> 1];
785 __builtin_prefetch(points + i - 2);
786 __builtin_prefetch(points + i - 1);
787 __builtin_prefetch(points + ((i +
num_points - 2) >> 1));
788 __builtin_prefetch(scratch_space + ((i - 2) >> 1));
792 points[i].x -= points[(i +
num_points) >> 1].x;
793 points[i].x *= points[i + 1].y;
794 points[(i +
num_points) >> 1].y = points[i].x - points[i].y;
812template <
typename AffineElement,
typename Fq,
typename T>
813__attribute__((always_inline))
inline void batch_affine_double_impl(AffineElement* points,
815 Fq* scratch_space)
noexcept
821 scratch_space[i] = points[i].x.sqr();
822 if constexpr (T::has_a) {
823 scratch_space[i] += T::a;
825 scratch_space[i] = scratch_space[i] + scratch_space[i] + scratch_space[i];
831 throw_or_abort(
"attempted to invert zero in batch_affine_double_impl");
837 for (
size_t i_plus_1 =
num_points; i_plus_1 > 0; --i_plus_1) {
838 size_t i = i_plus_1 - 1;
844 points[i].x = scratch_space[i].
sqr() - (points[i].x + points[i].x);
845 points[i].y = scratch_space[i] * (
temp_x - points[i].x) - points[i].y;
860template <
class Fq,
class Fr,
class T>
878 [&](
size_t start,
size_t end,
BB_UNUSED size_t chunk_index) {
879 batch_affine_add_impl<affine_element, Fq>(
880 &second_group[start], &results[start], end - start, &scratch_space[start]);
895template <
class Fq,
class Fr,
class T>
921 if (converted_scalar.
is_zero()) {
929 constexpr size_t LOOKUP_SIZE = 8;
930 constexpr size_t NUM_ROUNDS = 32;
937 for (
auto& table : lookup_table) {
942 auto execute_range = [&](
size_t start,
size_t end) {
946 batch_affine_add_impl<affine_element, Fq>(&lhs[start], &
rhs[start], end - start, &scratch_space[start]);
951 batch_affine_double_impl<affine_element, Fq, T>(&lhs[start], end - start, &scratch_space[start]);
955 for (
size_t i = start; i < end; ++i) {
956 if (points[i].is_point_at_infinity()) {
960 temp_point_vector[i] = points[i];
961 lookup_table[0][i] = points[i];
965 double_chunked(&temp_point_vector[0]);
966 for (
size_t j = 1; j < LOOKUP_SIZE; ++j) {
967 for (
size_t i = start; i < end; ++i) {
968 lookup_table[j][i] = lookup_table[j - 1][i];
970 add_chunked(&temp_point_vector[0], &lookup_table[j][0]);
974 uint64_t wnaf_entry = 0;
978 for (
size_t j = 0; j < 2; ++j) {
979 wnaf_entry = wnaf.table[j];
980 index = wnaf_entry & 0x0fffffffU;
981 sign =
static_cast<bool>((wnaf_entry >> 31) & 1);
982 const bool is_odd = ((j & 1) == 1);
983 for (
size_t i = start; i < end; ++i) {
984 auto to_add = lookup_table[
static_cast<size_t>(
index)][i];
985 to_add.y.self_conditional_negate(sign ^ is_odd);
990 work_elements[i] = to_add;
992 temp_point_vector[i] = to_add;
996 add_chunked(&temp_point_vector[0], &work_elements[0]);
998 for (
size_t j = 2; j < NUM_ROUNDS * 2; ++j) {
999 wnaf_entry = wnaf.table[j];
1000 index = wnaf_entry & 0x0fffffffU;
1001 sign =
static_cast<bool>((wnaf_entry >> 31) & 1);
1002 const bool is_odd = ((j & 1) == 1);
1004 for (
size_t k = 0; k < 4; ++k) {
1005 double_chunked(&work_elements[0]);
1008 for (
size_t i = start; i < end; ++i) {
1009 auto to_add = lookup_table[
static_cast<size_t>(
index)][i];
1010 to_add.y.self_conditional_negate(sign ^ is_odd);
1014 temp_point_vector[i] = to_add;
1016 add_chunked(&temp_point_vector[0], &work_elements[0]);
1021 for (
size_t i = start; i < end; ++i) {
1022 work_elements[i] = work_elements[i] + (-lookup_table[0][i]);
1026 if (wnaf.endo_skew) {
1027 for (
size_t i = start; i < end; ++i) {
1029 endo_point.
x *= beta;
1030 work_elements[i] = work_elements[i] + endo_point;
1034 for (
size_t i = start; i < end; ++i) {
1035 work_elements[i] = points[i].
is_point_at_infinity() ? work_elements[i].set_infinity() : work_elements[i];
1040 return work_elements;
1043template <
typename Fq,
typename Fr,
typename T>
1047 temporaries.reserve(num_elements * 2);
1052 for (
size_t i = 0; i < num_elements; ++i) {
1053 temporaries.emplace_back(accumulator);
1054 if (!elements[i].is_point_at_infinity()) {
1055 accumulator *= elements[i].z;
1060 accumulator = accumulator.invert();
1084 for (
size_t i = num_elements - 1; i < num_elements; --i) {
1085 if (!elements[i].is_point_at_infinity()) {
1086 Fq z_inv = accumulator * temporaries[i];
1087 Fq zz_inv = z_inv.
sqr();
1088 elements[i].x *= zz_inv;
1089 elements[i].y *= (zz_inv * z_inv);
1090 accumulator *= elements[i].z;
1096template <
typename Fq,
typename Fr,
typename T>
1100 bool found_one =
false;
1104 while (!found_one) {
1106 yy = x.sqr() * x + T::b;
1107 if constexpr (T::has_a) {
1110 auto [found_root, y1] = yy.sqrt();
1112 found_one = found_root;
#define BB_ASSERT_EQ(actual, expected,...)
#define BB_BENCH_TRACY_NAME(name)
constexpr bool is_point_at_infinity() const noexcept
constexpr void self_set_infinity() noexcept
static constexpr affine_element one() noexcept
element class. Implements ecc group arithmetic using Jacobian coordinates See https://hyperelliptic....
element operator*=(const Fr &exponent) noexcept
BB_INLINE constexpr element set_infinity() const noexcept
element mul_with_endomorphism(const Fr &scalar) const noexcept
static element infinity()
static std::vector< affine_element< Fq, Fr, Params > > batch_mul_with_endomorphism(const std::span< const affine_element< Fq, Fr, Params > > &points, const Fr &scalar) noexcept
Multiply each point by the same scalar.
constexpr element operator-=(const element &other) noexcept
constexpr element operator-() const noexcept
friend constexpr element operator+(const affine_element< Fq, Fr, Params > &left, const element &right) noexcept
constexpr element dbl() const noexcept
constexpr element normalize() const noexcept
constexpr void self_dbl() noexcept
static element random_element(numeric::RNG *engine=nullptr) noexcept
static void batch_normalize(element *elements, size_t num_elements) noexcept
constexpr element operator+=(const element &other) noexcept
static void batch_affine_add(const std::span< affine_element< Fq, Fr, Params > > &first_group, const std::span< affine_element< Fq, Fr, Params > > &second_group, const std::span< affine_element< Fq, Fr, Params > > &results) noexcept
Pairwise affine add points in first and second group.
element mul_const_time(const Fr &scalar, numeric::RNG *engine=nullptr) const noexcept
Constant-time scalar multiplication intended for secret scalars (e.g. ECDSA / Schnorr nonces).
BB_INLINE constexpr bool on_curve() const noexcept
BB_INLINE constexpr bool operator==(const element &other) const noexcept
element operator*(const Fr &exponent) const noexcept
element() noexcept=default
static element random_coordinates_on_curve(numeric::RNG *engine=nullptr) noexcept
element mul_without_endomorphism(const Fr &scalar) const noexcept
constexpr element & operator=(const element &other) noexcept
BB_INLINE constexpr void self_set_infinity() noexcept
BB_INLINE constexpr bool is_point_at_infinity() const noexcept
constexpr bool get_bit(uint64_t bit_index) const
constexpr uint64_t get_msb() const
bool get_bit(uint64_t bit_index) const
std::pair< std::array< uint64_t, 2 >, std::array< uint64_t, 2 > > EndoScalars
__attribute__((always_inline)) inline void batch_affine_add_impl(const AffineElement *lhs
Batch affine addition for parallel arrays: (lhs[i], rhs[i]) → rhs[i].
AffineElement const size_t num_pairs
AffineElement const size_t Fq *scratch_space noexcept
batch_inversion_accumulator
uintx< uint256_t > uint512_t
std::conditional_t< IsGoblinBigGroup< C, Fq, Fr, G >, element_goblin::goblin_element< C, goblin_field< C >, Fr, G >, element_default::element< C, Fq, Fr, G > > element
element wraps either element_default::element or element_goblin::goblin_element depending on parametr...
constexpr size_t FF_COPY_COST
constexpr size_t FF_ADDITION_COST
constexpr size_t FF_MULTIPLICATION_COST
void fixed_wnaf(const uint64_t *scalar, uint64_t *wnaf, bool &skew_map, const uint64_t point_index, const uint64_t num_points, const size_t wnaf_bits) noexcept
Performs fixed-window non-adjacent form (WNAF) computation for scalar multiplication.
Univariate< Fr, domain_end > operator*(const Fr &ff, const Univariate< Fr, domain_end > &uv)
void parallel_for_heuristic(size_t num_points, const std::function< void(size_t, size_t, size_t)> &func, size_t heuristic_cost)
Split a loop into several loops running in parallel based on operations in 1 iteration.
void parallel_for_range(size_t num_points, const std::function< void(size_t, size_t)> &func, size_t no_multhreading_if_less_or_equal)
Split a loop into several loops running in parallel.
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
static constexpr field cube_root_of_unity()
static constexpr field one()
static constexpr uint256_t modulus
static void split_into_endomorphism_scalars(const field &k, field &k1, field &k2)
Full-width endomorphism decomposition: k ≡ k1 - k2·λ (mod r). Modifies the field elements k1 and k2.
BB_INLINE constexpr void self_sqr() &noexcept
constexpr field invert() const noexcept
BB_INLINE constexpr bool is_msb_set() const noexcept
static field random_element(numeric::RNG *engine=nullptr) noexcept
BB_INLINE constexpr field sqr() const noexcept
BB_INLINE constexpr bool is_zero() const noexcept
BB_INLINE constexpr field from_montgomery_form() const noexcept
static constexpr field zero()
Handles the WNAF computation for scalars that are split using an endomorphism, achieved through split...
EndomorphismWnaf(const EndoScalars &scalars)
std::array< uint64_t, NUM_ROUNDS *2 > table
static constexpr size_t NUM_WNAF_BITS
void throw_or_abort(std::string const &err)