Barretenberg: src/barretenberg/ecc/groups/element_impl.hpp Source File

// === AUDIT STATUS ===

// internal:    { status: Planned, auditors: [], commit: }

// external_1:  { status: not started, auditors: [], commit: }

// external_2:  { status: not started, auditors: [], commit: }

// =====================


#pragma once

#include "barretenberg/common/assert.hpp"

#include "barretenberg/common/bb_bench.hpp"

#include "barretenberg/common/thread.hpp"

#include "barretenberg/ecc/groups/element.hpp"

#include "element.hpp"

#include <cstdint>


// NOLINTBEGIN(readability-implicit-bool-conversion, cppcoreguidelines-avoid-c-arrays)

namespace bb::group_elements {

template <class Fq, class Fr, class T>


constexpr element<Fq, Fr, T>::element(const Fq& a, const Fq& b, const Fq& c) noexcept

    : x(a)

    , y(b)

    , z(c)

{}


template <class Fq, class Fr, class T>


constexpr element<Fq, Fr, T>::element(const element& other) noexcept

    : x(other.x)

    , y(other.y)

    , z(other.z)

{}


template <class Fq, class Fr, class T>


constexpr element<Fq, Fr, T>::element(element&& other) noexcept

    : x(other.x)

    , y(other.y)

    , z(other.z)

{}


template <class Fq, class Fr, class T>

constexpr element<Fq, Fr, T>::element(const affine_element<Fq, Fr, T>& other) noexcept

    : x(other.x)

    , y(other.y)

    , z(Fq::one())

{}


template <class Fq, class Fr, class T>


constexpr element<Fq, Fr, T>& element<Fq, Fr, T>::operator=(const element& other) noexcept

{

    if (this == &other) {

        return *this;

    }

    x = other.x;

    y = other.y;

    z = other.z;

    return *this;

}


template <class Fq, class Fr, class T>


constexpr element<Fq, Fr, T>& element<Fq, Fr, T>::operator=(element&& other) noexcept

{

    x = other.x;

    y = other.y;

    z = other.z;

    return *this;

}


template <class Fq, class Fr, class T> constexpr element<Fq, Fr, T>::operator affine_element<Fq, Fr, T>() const noexcept

{

    if (is_point_at_infinity()) {

        affine_element<Fq, Fr, T> result;

        result.x = Fq(0);

        result.y = Fq(0);

        result.self_set_infinity();

        return result;

    }

    Fq z_inv = z.invert();

    Fq zz_inv = z_inv.sqr();

    Fq zzz_inv = zz_inv * z_inv;

    affine_element<Fq, Fr, T> result(x * zz_inv, y * zzz_inv);

    return result;

}


template <class Fq, class Fr, class T> constexpr void element<Fq, Fr, T>::self_dbl() noexcept

{

    if constexpr (Fq::modulus.data[3] >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) {

        if (is_point_at_infinity()) {

            return;

        }

    } else {

        if (x.is_msb_set_word()) {

            return;

        }

    }


    // T0 = x*x

    Fq T0 = x.sqr();


    // T1 = y*y

    Fq T1 = y.sqr();


    // T2 = T1*T1 = y*y*y*y

    Fq T2 = T1.sqr();


    // T1 = T1 + x = x + y*y

    T1 += x;


    // T1 = T1 * T1

    T1.self_sqr();


    // T3 = T0 + T2 = xx + y*y*y*y

    Fq T3 = T0 + T2;


    // T1 = T1 - T3 = x*x + y*y*y*y + 2*x*x*y*y*y*y - x*x - y*y*y*y = 2*x*x*y*y*y*y = 2*S

    T1 -= T3;


    // T1 = 2T1 = 4*S

    T1 += T1;


    // T3 = 3T0

    T3 = T0 + T0;

    T3 += T0;

    if constexpr (T::has_a) {

        T3 += (T::a * z.sqr().sqr());

    }


    // z2 = 2*y*z

    z += z;

    z *= y;


    // T0 = 2T1

    T0 = T1 + T1;


    // x2 = T3*T3

    x = T3.sqr();


    // x2 = x2 - 2T1

    x -= T0;


    // T2 = 8T2

    T2 += T2;

    T2 += T2;

    T2 += T2;


    // y2 = T1 - x2

    y = T1 - x;


    // y2 = y2 * T3 - T2

    y *= T3;

    y -= T2;

}


template <class Fq, class Fr, class T> constexpr element<Fq, Fr, T> element<Fq, Fr, T>::dbl() const noexcept

{

    element result(*this);

    result.self_dbl();

    return result;

}


template <class Fq, class Fr, class T>

constexpr element<Fq, Fr, T> element<Fq, Fr, T>::operator+=(const affine_element<Fq, Fr, T>& other) noexcept

{

    if constexpr (Fq::modulus.data[3] >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) {

        // If either point is infinity, return the other point

        if (other.is_point_at_infinity()) {

            return *this;

        }

        if (is_point_at_infinity()) {

            *this = { other.x, other.y, Fq::one() };

            return *this;

        }

    } else {

        const bool edge_case_trigger = x.is_msb_set() || other.x.is_msb_set();

        if (edge_case_trigger) {

            if (x.is_msb_set()) {

                *this = { other.x, other.y, Fq::one() };

            }

            return *this;

        }

    }


    // T0 = z1.z1

    Fq T0 = z.sqr();


    // T1 = x2.t0 - x1 = x2.z1.z1 - x1

    Fq T1 = other.x * T0;

    T1 -= x;


    // T2 = T0.z1 = z1.z1.z1

    // T2 = T2.y2 - y1 = y2.z1.z1.z1 - y1

    Fq T2 = z * T0;

    T2 *= other.y;

    T2 -= y;


    if (__builtin_expect(T1.is_zero(), 0)) {

        if (T2.is_zero()) {

            self_dbl();

            return *this;

        }

        self_set_infinity();

        return *this;

    }


    // T2 = 2T2 = 2(y2.z1.z1.z1 - y1) = R

    // z3 = z1 + H

    T2 += T2;

    z += T1;


    // T3 = T1*T1 = HH

    Fq T3 = T1.sqr();


    // z3 = z3 - z1z1 - HH

    T0 += T3;


    // z3 = (z1 + H)*(z1 + H)

    z.self_sqr();

    z -= T0;


    // T3 = 4HH

    T3 += T3;

    T3 += T3;


    // T1 = T1*T3 = 4HHH

    T1 *= T3;


    // T3 = T3 * x1 = 4HH*x1

    T3 *= x;


    // T0 = 2T3

    T0 = T3 + T3;


    // T0 = T0 + T1 = 2(4HH*x1) + 4HHH

    T0 += T1;

    x = T2.sqr();


    // x3 = x3 - T0 = R*R - 8HH*x1 -4HHH

    x -= T0;


    // T3 = T3 - x3 = 4HH*x1 - x3

    T3 -= x;


    T1 *= y;

    T1 += T1;


    // T3 = T2 * T3 = R*(4HH*x1 - x3)

    T3 *= T2;


    // y3 = T3 - T1

    y = T3 - T1;

    return *this;

}


template <class Fq, class Fr, class T>

constexpr element<Fq, Fr, T> element<Fq, Fr, T>::operator+(const affine_element<Fq, Fr, T>& other) const noexcept

{

    element result(*this);

    return (result += other);

}


template <class Fq, class Fr, class T>

constexpr element<Fq, Fr, T> element<Fq, Fr, T>::operator-=(const affine_element<Fq, Fr, T>& other) noexcept

{

    const affine_element<Fq, Fr, T> to_add{ other.x, -other.y };

    return operator+=(to_add);

}


template <class Fq, class Fr, class T>

constexpr element<Fq, Fr, T> element<Fq, Fr, T>::operator-(const affine_element<Fq, Fr, T>& other) const noexcept

{

    element result(*this);

    return (result -= other);

}


template <class Fq, class Fr, class T>


constexpr element<Fq, Fr, T> element<Fq, Fr, T>::operator+=(const element& other) noexcept

{

    if constexpr (Fq::modulus.data[3] >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) {

        bool p1_zero = is_point_at_infinity();

        bool p2_zero = other.is_point_at_infinity();

        if (__builtin_expect((p1_zero || p2_zero), 0)) {

            if (p1_zero && !p2_zero) {

                *this = other;

                return *this;

            }

            if (p2_zero && !p1_zero) {

                return *this;

            }

            self_set_infinity();

            return *this;

        }

    } else {

        bool p1_zero = x.is_msb_set();

        bool p2_zero = other.x.is_msb_set();

        if (__builtin_expect((p1_zero || p2_zero), 0)) {

            if (p1_zero && !p2_zero) {

                *this = other;

                return *this;

            }

            if (p2_zero && !p1_zero) {

                return *this;

            }

            self_set_infinity();

            return *this;

        }

    }

    Fq Z1Z1(z.sqr());

    Fq Z2Z2(other.z.sqr());

    Fq S2(Z1Z1 * z);

    Fq U2(Z1Z1 * other.x);

    S2 *= other.y;

    Fq U1(Z2Z2 * x);

    Fq S1(Z2Z2 * other.z);

    S1 *= y;


    Fq F(S2 - S1);


    Fq H(U2 - U1);


    if (__builtin_expect(H.is_zero(), 0)) {

        if (F.is_zero()) {

            self_dbl();

            return *this;

        }

        self_set_infinity();

        return *this;

    }


    F += F;


    Fq I(H + H);

    I.self_sqr();


    Fq J(H * I);


    U1 *= I;


    U2 = U1 + U1;

    U2 += J;


    x = F.sqr();


    x -= U2;


    J *= S1;

    J += J;


    y = U1 - x;


    y *= F;


    y -= J;


    z += other.z;


    Z1Z1 += Z2Z2;


    z.self_sqr();

    z -= Z1Z1;

    z *= H;

    return *this;

}


template <class Fq, class Fr, class T>


constexpr element<Fq, Fr, T> element<Fq, Fr, T>::operator+(const element& other) const noexcept

{

    element result(*this);

    return (result += other);

}


template <class Fq, class Fr, class T>


constexpr element<Fq, Fr, T> element<Fq, Fr, T>::operator-=(const element& other) noexcept

{

    const element to_add{ other.x, -other.y, other.z };

    return operator+=(to_add);

}


template <class Fq, class Fr, class T>


constexpr element<Fq, Fr, T> element<Fq, Fr, T>::operator-(const element& other) const noexcept

{

    element result(*this);

    return (result -= other);

}


template <class Fq, class Fr, class T> constexpr element<Fq, Fr, T> element<Fq, Fr, T>::operator-() const noexcept

{

    return { x, -y, z };

}


template <class Fq, class Fr, class T>


element<Fq, Fr, T> element<Fq, Fr, T>::operator*(const Fr& exponent) const noexcept

{

    if constexpr (T::USE_ENDOMORPHISM) {

        return mul_with_endomorphism(exponent);

    }

    return mul_without_endomorphism(exponent);

}


template <class Fq, class Fr, class T> element<Fq, Fr, T> element<Fq, Fr, T>::operator*=(const Fr& exponent) noexcept

{

    *this = operator*(exponent);

    return *this;

}


template <class Fq, class Fr, class T>


element<Fq, Fr, T> element<Fq, Fr, T>::mul_const_time(const Fr& scalar, numeric::RNG* engine) const noexcept

{

    if (engine == nullptr) {

        engine = &numeric::get_randomness();

    }


    // Convert the scalar to canonical u256 form

    const uint256_t k = uint256_t(scalar);


    // Coron's first DPA countermeasure (J.-S. Coron, "Resistance against Differential Power Analysis

    // for Elliptic Curve Cryptosystems", CHES 1999, LNCS 1717, pp. 292-302, Section 5.1): blind the

    // scalar with k' = k + r * n where r is a fresh random 64-bit value sampled per call. Since

    // n * P = O for any P in the prime-order subgroup, k' * P = k * P. The randomization defeats

    // DPA: per-bit traces of two signings with the same k decorrelate because the bit pattern of k'

    // differs across calls.

    //

    // We force the high bit of r to be 1 so that r is sampled uniformly from [2^63, 2^64). This

    // guarantees r * n has a fixed-width range (MSB at position M+63 or M+64 for n with MSB at M),

    // so the iteration count remains exactly NUM_BITS regardless of the sampled r.

    const uint64_t r = engine->get_random_uint64() | (UINT64_C(1) << 63);

    const uint512_t r_times_n = uint512_t(uint256_t(Fr::modulus)) * uint512_t(uint256_t(r));

    const uint512_t k_blinded = uint512_t(k) + r_times_n;


    // For n with MSB at position M, r * n < 2^(M + 65), so k_blinded < 2^(M + 65) + n < 2^(M + 66).

    // Iterating M+65 bits is safe because k < n means the additional bit from k cannot push k_blinded

    // past 2^(M + 65) when n is at the lower end of [2^M, 2^(M+1)); we add one extra bit (M + 66

    // total) to cover the worst case where n is close to 2^(M+1).

    constexpr size_t NUM_BITS = static_cast<size_t>(uint256_t(Fr::modulus).get_msb()) + 66;


    // Constant-time conditional swap of two Fq coordinates. `mask` is 0 (no swap) or all-ones (swap),

    // derived from the secret bit via integer subtraction so no branch is emitted.

    auto cs_fq = [](Fq& a, Fq& b, uint64_t mask) {

        constexpr size_t NUM_LIMBS = sizeof(Fq) / sizeof(uint64_t);

        for (size_t i = 0; i < NUM_LIMBS; ++i) {

            uint64_t t = mask & (a.data[i] ^ b.data[i]);

            a.data[i] ^= t;

            b.data[i] ^= t;

        }

    };

    auto cswap = [&cs_fq](element& a, element& b, uint64_t mask) {

        cs_fq(a.x, b.x, mask);

        cs_fq(a.y, b.y, mask);

        cs_fq(a.z, b.z, mask);

    };


    // Montgomery ladder. Invariant after each iteration: R1 - R0 = P.

    // Once R0 first becomes non-infinity (after the first 1-bit of k_blinded is processed), the

    // invariant guarantees R0 + R1 and 2 * R0 do not hit the doubling/infinity special-case branches.

    element R0 = element::infinity();

    element R1(*this);


    for (size_t i = NUM_BITS; i-- > 0;) {

        const uint64_t mask = 0ULL - static_cast<uint64_t>(k_blinded.get_bit(i));

        cswap(R0, R1, mask);

        R1 = R0 + R1;

        R0 = R0.dbl();

        cswap(R0, R1, mask);

    }

    return R0;

}


template <class Fq, class Fr, class T> constexpr element<Fq, Fr, T> element<Fq, Fr, T>::normalize() const noexcept

{

    const affine_element<Fq, Fr, T> converted = *this;

    return element(converted);

}


template <class Fq, class Fr, class T> element<Fq, Fr, T> element<Fq, Fr, T>::infinity()

{

    element<Fq, Fr, T> e{};

    e.self_set_infinity();

    return e;

}


template <class Fq, class Fr, class T> constexpr element<Fq, Fr, T> element<Fq, Fr, T>::set_infinity() const noexcept

{

    element result(*this);

    result.self_set_infinity();

    return result;

}


template <class Fq, class Fr, class T> constexpr void element<Fq, Fr, T>::self_set_infinity() noexcept

{

    if constexpr (Fq::modulus.data[3] >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) {

        // We set the value of x equal to modulus to represent inifinty

        x.data[0] = Fq::modulus.data[0];

        x.data[1] = Fq::modulus.data[1];

        x.data[2] = Fq::modulus.data[2];

        x.data[3] = Fq::modulus.data[3];


        // Clear y and z so the infinity representation is canonical regardless of prior state

        y = Fq::zero();

        z = Fq::zero();

    } else {

        (*this).x = Fq::zero();

        (*this).y = Fq::zero();

        (*this).z = Fq::zero();

        x.self_set_msb();

    }

}


template <class Fq, class Fr, class T> constexpr bool element<Fq, Fr, T>::is_point_at_infinity() const noexcept

{

    if constexpr (Fq::modulus.data[3] >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) {

        // We check if the value of x is equal to modulus to represent inifinty

        return ((x.data[0] ^ Fq::modulus.data[0]) | (x.data[1] ^ Fq::modulus.data[1]) |

                (x.data[2] ^ Fq::modulus.data[2]) | (x.data[3] ^ Fq::modulus.data[3])) == 0;

    } else {

        return (x.is_msb_set());

    }

}


template <class Fq, class Fr, class T> constexpr bool element<Fq, Fr, T>::on_curve() const noexcept

{

    if (is_point_at_infinity()) {

        return true;

    }

    // We specify the point at inifinity not by (0 \lambda 0), so z should not be 0

    if (z.is_zero()) {

        return false;

    }

    Fq zz = z.sqr();

    Fq zzzz = zz.sqr();

    Fq bz_6 = zzzz * zz * T::b;

    if constexpr (T::has_a) {

        bz_6 += (x * T::a) * zzzz;

    }

    Fq xxx = x.sqr() * x + bz_6;

    Fq yy = y.sqr();

    return (xxx == yy);

}


template <class Fq, class Fr, class T>


constexpr bool element<Fq, Fr, T>::operator==(const element& other) const noexcept

{

    // If one of points is not on curve, we have no business comparing them.

    if ((!on_curve()) || (!other.on_curve())) {

        return false;

    }

    bool am_infinity = is_point_at_infinity();

    bool is_infinity = other.is_point_at_infinity();

    bool both_infinity = am_infinity && is_infinity;

    // If just one is infinity, then they are obviously not equal.

    if ((!both_infinity) && (am_infinity || is_infinity)) {

        return false;

    }

    const Fq lhs_zz = z.sqr();

    const Fq lhs_zzz = lhs_zz * z;

    const Fq rhs_zz = other.z.sqr();

    const Fq rhs_zzz = rhs_zz * other.z;


    const Fq lhs_x = x * rhs_zz;

    const Fq lhs_y = y * rhs_zzz;


    const Fq rhs_x = other.x * lhs_zz;

    const Fq rhs_y = other.y * lhs_zzz;

    return both_infinity || ((lhs_x == rhs_x) && (lhs_y == rhs_y));

}


template <class Fq, class Fr, class T>


element<Fq, Fr, T> element<Fq, Fr, T>::random_element(numeric::RNG* engine) noexcept

{

    if constexpr (T::can_hash_to_curve) {

        element result = random_coordinates_on_curve(engine);

        result.z = Fq::random_element(engine);

        Fq zz = result.z.sqr();

        Fq zzz = zz * result.z;

        result.x *= zz;

        result.y *= zzz;

        return result;

    } else {

        Fr scalar = Fr::random_element(engine);

        return (element{ T::one_x, T::one_y, Fq::one() } * scalar);

    }

}


template <class Fq, class Fr, class T>


element<Fq, Fr, T> element<Fq, Fr, T>::mul_without_endomorphism(const Fr& scalar) const noexcept

{

    const uint256_t converted_scalar(scalar);


    if (converted_scalar == 0) {

        return element::infinity();

    }


    element accumulator(*this);

    const uint64_t maximum_set_bit = converted_scalar.get_msb();

    // NOT constant-time: the loop bound leaks bit-length and the per-bit branch leaks Hamming

    // weight. This is acceptable only for public scalars; secret scalars must go through

    // mul_const_time.

    for (uint64_t i = maximum_set_bit - 1; i < maximum_set_bit; --i) {

        accumulator.self_dbl();

        if (converted_scalar.get_bit(i)) {

            accumulator += *this;

        }

    }

    return accumulator;

}


namespace detail {

// Represents the result of

using EndoScalars = std::pair<std::array<uint64_t, 2>, std::array<uint64_t, 2>>;


template <typename Element, std::size_t NUM_ROUNDS> struct EndomorphismWnaf {

    // NUM_WNAF_BITS: Number of bits per window in the WNAF representation.

    static constexpr size_t NUM_WNAF_BITS = 4;

    // table: Stores the WNAF representation of the scalars.

    std::array<uint64_t, NUM_ROUNDS * 2> table;

    // skew and endo_skew: Indicate if our original scalar is even or odd.

    bool skew = false;

    bool endo_skew = false;


    EndomorphismWnaf(const EndoScalars& scalars)

    {

        wnaf::fixed_wnaf(&scalars.first[0], &table[0], skew, 0, 2, NUM_WNAF_BITS);

        wnaf::fixed_wnaf(&scalars.second[0], &table[1], endo_skew, 0, 2, NUM_WNAF_BITS);

    }


};


} // namespace detail


template <class Fq, class Fr, class T>


element<Fq, Fr, T> element<Fq, Fr, T>::mul_with_endomorphism(const Fr& scalar) const noexcept

{

    // Consider the infinity flag, return infinity if set

    if (is_point_at_infinity()) {

        return element::infinity();

    }

    constexpr size_t NUM_ROUNDS = 32;

    const Fr converted_scalar = scalar.from_montgomery_form();


    if (converted_scalar.is_zero()) {

        return element::infinity();

    }

    static constexpr size_t LOOKUP_SIZE = 8;

    std::array<element, LOOKUP_SIZE> lookup_table;


    element d2 = dbl();

    lookup_table[0] = element(*this);

    for (size_t i = 1; i < LOOKUP_SIZE; ++i) {

        lookup_table[i] = lookup_table[i - 1] + d2;

    }


    detail::EndoScalars endo_scalars = Fr::split_into_endomorphism_scalars(converted_scalar);

    detail::EndomorphismWnaf<element, NUM_ROUNDS> wnaf{ endo_scalars };

    element accumulator{ T::one_x, T::one_y, Fq::one() };

    accumulator.self_set_infinity();

    Fq beta = Fq::cube_root_of_unity();


    for (size_t i = 0; i < NUM_ROUNDS * 2; ++i) {

        uint64_t wnaf_entry = wnaf.table[i];

        uint64_t index = wnaf_entry & 0x0fffffffU;

        bool sign = static_cast<bool>((wnaf_entry >> 31) & 1);

        const bool is_odd = ((i & 1) == 1);

        auto to_add = lookup_table[static_cast<size_t>(index)];

        to_add.y.self_conditional_negate(sign ^ is_odd);

        if (is_odd) {

            to_add.x *= beta;

        }

        accumulator += to_add;


        if (i != ((2 * NUM_ROUNDS) - 1) && is_odd) {

            for (size_t j = 0; j < 4; ++j) {

                accumulator.self_dbl();

            }

        }

    }


    if (wnaf.skew) {

        accumulator += -lookup_table[0];

    }

    if (wnaf.endo_skew) {

        accumulator += element{ lookup_table[0].x * beta, lookup_table[0].y, lookup_table[0].z };

    }


    return accumulator;

}


template <typename AffineElement, typename Fq>

__attribute__((always_inline)) inline void batch_affine_add_impl(const AffineElement* lhs,

                                                                 AffineElement* rhs,

                                                                 const size_t num_pairs,

                                                                 Fq* scratch_space) noexcept

{

    Fq batch_inversion_accumulator = Fq::one();


    // Forward pass: prepare batch inversion


    for (size_t i = 0; i < num_pairs; ++i) {

        scratch_space[i] = lhs[i].x + rhs[i].x;

        rhs[i].x -= lhs[i].x;

        rhs[i].y -= lhs[i].y;

        rhs[i].y *= batch_inversion_accumulator;

        batch_inversion_accumulator *= rhs[i].x;

    }


    if (batch_inversion_accumulator == Fq::zero()) {

        throw_or_abort("attempted to invert zero in batch_affine_add_impl");

    }


    batch_inversion_accumulator = batch_inversion_accumulator.invert();


    // Backward pass: compute additions

    for (size_t i = num_pairs - 1; i < num_pairs; --i) {

        // lambda = (y2 - y1) / (x2 - x1)

        rhs[i].y *= batch_inversion_accumulator;

        batch_inversion_accumulator *= rhs[i].x;

        rhs[i].x = rhs[i].y.sqr();

        rhs[i].x -= scratch_space[i]; // x3 = lambda^2 - (x1 + x2)


        // y3 = lambda * (x1 - x3) - y1

        Fq temp = lhs[i].x - rhs[i].x;

        temp *= rhs[i].y;

        rhs[i].y = temp - lhs[i].y;

    }

}


template <typename AffineElement, typename Fq>

__attribute__((always_inline)) inline void batch_affine_add_interleaved(AffineElement* points,

                                                                        const size_t num_points,

                                                                        Fq* scratch_space) noexcept

{

    Fq batch_inversion_accumulator = Fq::one();


    // Forward pass: accumulate (x2 - x1) products for batch inversion

    for (size_t i = 0; i < num_points; i += 2) {

        scratch_space[i >> 1] = points[i].x + points[i + 1].x; // x1 + x2 (saved for later)

        points[i + 1].x -= points[i].x;                        // x2 - x1

        points[i + 1].y -= points[i].y;                        // y2 - y1

        points[i + 1].y *= batch_inversion_accumulator;

        batch_inversion_accumulator *= points[i + 1].x;

    }


    if (batch_inversion_accumulator == Fq::zero()) {

        throw_or_abort("attempted to invert zero in batch_affine_add_interleaved");

    }

    batch_inversion_accumulator = batch_inversion_accumulator.invert();


    // Backward pass: complete inversions and compute additions

    for (size_t i = num_points - 2; i < num_points; i -= 2) {

        // lambda = (y2 - y1) / (x2 - x1)

        points[i + 1].y *= batch_inversion_accumulator;

        batch_inversion_accumulator *= points[i + 1].x;

        points[i + 1].x = points[i + 1].y.sqr();

        // x3 = lambda^2 - (x1 + x2)

        points[(i + num_points) >> 1].x = points[i + 1].x - scratch_space[i >> 1];


        if (i >= 2) {

            __builtin_prefetch(points + i - 2);

            __builtin_prefetch(points + i - 1);

            __builtin_prefetch(points + ((i + num_points - 2) >> 1));

            __builtin_prefetch(scratch_space + ((i - 2) >> 1));

        }


        // y3 = lambda * (x1 - x3) - y1

        points[i].x -= points[(i + num_points) >> 1].x;

        points[i].x *= points[i + 1].y;

        points[(i + num_points) >> 1].y = points[i].x - points[i].y;

    }

}


template <typename AffineElement, typename Fq, typename T>

__attribute__((always_inline)) inline void batch_affine_double_impl(AffineElement* points,

                                                                    const size_t num_points,

                                                                    Fq* scratch_space) noexcept

{

    Fq batch_inversion_accumulator = Fq::one();


    // Forward pass: prepare batch inversion

    for (size_t i = 0; i < num_points; ++i) {

        scratch_space[i] = points[i].x.sqr();

        if constexpr (T::has_a) {

            scratch_space[i] += T::a; // adjust slope in numerator

        }

        scratch_space[i] = scratch_space[i] + scratch_space[i] + scratch_space[i];

        scratch_space[i] *= batch_inversion_accumulator;

        batch_inversion_accumulator *= (points[i].y + points[i].y);

    }


    if (batch_inversion_accumulator == Fq::zero()) {

        throw_or_abort("attempted to invert zero in batch_affine_double_impl");

    }

    batch_inversion_accumulator = batch_inversion_accumulator.invert();


    // Backward pass: compute doublings

    Fq temp_x;


    for (size_t i_plus_1 = num_points; i_plus_1 > 0; --i_plus_1) {

        size_t i = i_plus_1 - 1;


        scratch_space[i] *= batch_inversion_accumulator;

        batch_inversion_accumulator *= (points[i].y + points[i].y);


        temp_x = points[i].x;

        points[i].x = scratch_space[i].sqr() - (points[i].x + points[i].x);

        points[i].y = scratch_space[i] * (temp_x - points[i].x) - points[i].y;

    }


}


template <class Fq, class Fr, class T>


void element<Fq, Fr, T>::batch_affine_add(const std::span<affine_element<Fq, Fr, T>>& first_group,

                                          const std::span<affine_element<Fq, Fr, T>>& second_group,

                                          const std::span<affine_element<Fq, Fr, T>>& results) noexcept

{

    using affine_element = affine_element<Fq, Fr, T>;

    const size_t num_points = first_group.size();

    BB_ASSERT_EQ(second_group.size(), first_group.size());


    // Space for temporary values

    std::vector<Fq> scratch_space(num_points);


    parallel_for_heuristic(

        num_points, [&](size_t i) { results[i] = first_group[i]; }, thread_heuristics::FF_COPY_COST * 2);


    // Perform batch affine addition: (lhs[i], rhs[i]) -> rhs[i]

    parallel_for_heuristic(

        num_points,

        [&](size_t start, size_t end, BB_UNUSED size_t chunk_index) {

            batch_affine_add_impl<affine_element, Fq>(

                &second_group[start], &results[start], end - start, &scratch_space[start]);

        },

        thread_heuristics::FF_ADDITION_COST * 6 + thread_heuristics::FF_MULTIPLICATION_COST * 6);

}


template <class Fq, class Fr, class T>


std::vector<affine_element<Fq, Fr, T>> element<Fq, Fr, T>::batch_mul_with_endomorphism(

    const std::span<const affine_element<Fq, Fr, T>>& points, const Fr& scalar) noexcept

{

    BB_BENCH();

    using affine_element = affine_element<Fq, Fr, T>;

    const size_t num_points = points.size();


    // Space for temporary values

    std::vector<Fq> scratch_space(num_points);


    // We compute the resulting point through WNAF by evaluating (the (\sum_i (16ⁱ⋅

    // (a_i ∈ {-15,-13,-11,-9,-7,-5,-3,-1,1,3,5,7,9,11,13,15}))) - skew), where skew is 0 or 1. The result of the sum is

    // always odd and skew is used to reconstruct an even scalar. This means that to construct scalar p-1, where p is

    // the order of the scalar field, we first compute p through the sums and then subtract -1. Howver, since we are

    // computing p⋅Point, we get a point at infinity, which is an edgecase, and we don't want to handle edgecases in the

    // hot loop since the slow the computation down. So it's better to just handle it here.

    if (scalar == -Fr::one()) {

        std::vector<affine_element> results(num_points);

        parallel_for_heuristic(num_points, [&](size_t i) { results[i] = -points[i]; }, thread_heuristics::FF_COPY_COST);

        return results;

    }

    // Compute wnaf for scalar

    const Fr converted_scalar = scalar.from_montgomery_form();


    // If the scalar is zero, just set results to the point at infinity

    if (converted_scalar.is_zero()) {

        affine_element result{ Fq::zero(), Fq::zero() };

        result.self_set_infinity();

        std::vector<affine_element> results(num_points);

        parallel_for_heuristic(num_points, [&](size_t i) { results[i] = result; }, thread_heuristics::FF_COPY_COST);

        return results;

    }


    constexpr size_t LOOKUP_SIZE = 8;

    constexpr size_t NUM_ROUNDS = 32;


    detail::EndoScalars endo_scalars = Fr::split_into_endomorphism_scalars(converted_scalar);

    detail::EndomorphismWnaf<element, NUM_ROUNDS> wnaf{ endo_scalars };


    std::vector<affine_element> work_elements(num_points);

    std::array<std::vector<affine_element>, LOOKUP_SIZE> lookup_table;

    for (auto& table : lookup_table) {

        table.resize(num_points);

    }

    std::vector<affine_element> temp_point_vector(num_points);


    auto execute_range = [&](size_t start, size_t end) {

        BB_BENCH_TRACY_NAME("batch_mul_with_endo/execute_range");

        // Perform batch affine addition in parallel

        const auto add_chunked = [&](const affine_element* lhs, affine_element* rhs) {

            batch_affine_add_impl<affine_element, Fq>(&lhs[start], &rhs[start], end - start, &scratch_space[start]);

        };


        // Perform point doubling in parallel

        const auto double_chunked = [&](affine_element* lhs) {

            batch_affine_double_impl<affine_element, Fq, T>(&lhs[start], end - start, &scratch_space[start]);

        };


        // Initialize first entries in lookup table

        for (size_t i = start; i < end; ++i) {

            if (points[i].is_point_at_infinity()) {

                temp_point_vector[i] = affine_element::one();

                lookup_table[0][i] = affine_element::one();

            } else {

                temp_point_vector[i] = points[i];

                lookup_table[0][i] = points[i];

            }

        }

        // Costruct lookup table

        double_chunked(&temp_point_vector[0]);

        for (size_t j = 1; j < LOOKUP_SIZE; ++j) {

            for (size_t i = start; i < end; ++i) {

                lookup_table[j][i] = lookup_table[j - 1][i];

            }

            add_chunked(&temp_point_vector[0], &lookup_table[j][0]);

        }


        constexpr Fq beta = Fq::cube_root_of_unity();

        uint64_t wnaf_entry = 0;

        uint64_t index = 0;

        bool sign = 0;

        // Prepare elements for the first batch addition

        for (size_t j = 0; j < 2; ++j) {

            wnaf_entry = wnaf.table[j];

            index = wnaf_entry & 0x0fffffffU;

            sign = static_cast<bool>((wnaf_entry >> 31) & 1);

            const bool is_odd = ((j & 1) == 1);

            for (size_t i = start; i < end; ++i) {

                auto to_add = lookup_table[static_cast<size_t>(index)][i];

                to_add.y.self_conditional_negate(sign ^ is_odd);

                if (is_odd) {

                    to_add.x *= beta;

                }

                if (j == 0) {

                    work_elements[i] = to_add;

                } else {

                    temp_point_vector[i] = to_add;

                }

            }

        }

        add_chunked(&temp_point_vector[0], &work_elements[0]);

        // Run through SM logic in wnaf form (excluding the skew)

        for (size_t j = 2; j < NUM_ROUNDS * 2; ++j) {

            wnaf_entry = wnaf.table[j];

            index = wnaf_entry & 0x0fffffffU;

            sign = static_cast<bool>((wnaf_entry >> 31) & 1);

            const bool is_odd = ((j & 1) == 1);

            if (!is_odd) {

                for (size_t k = 0; k < 4; ++k) {

                    double_chunked(&work_elements[0]);

                }

            }

            for (size_t i = start; i < end; ++i) {

                auto to_add = lookup_table[static_cast<size_t>(index)][i];

                to_add.y.self_conditional_negate(sign ^ is_odd);

                if (is_odd) {

                    to_add.x *= beta;

                }

                temp_point_vector[i] = to_add;

            }

            add_chunked(&temp_point_vector[0], &work_elements[0]);

        }

        // Apply skew for the first endo scalar

        // Use affine_element::operator+ (via Jacobian) to handle edge cases related to the point at infinity.

        if (wnaf.skew) {

            for (size_t i = start; i < end; ++i) {

                work_elements[i] = work_elements[i] + (-lookup_table[0][i]);

            }

        }

        // Apply skew for the second endo scalar

        if (wnaf.endo_skew) {

            for (size_t i = start; i < end; ++i) {

                affine_element endo_point = lookup_table[0][i];

                endo_point.x *= beta;

                work_elements[i] = work_elements[i] + endo_point;

            }

        }

        // Handle points at infinity explicitly

        for (size_t i = start; i < end; ++i) {

            work_elements[i] = points[i].is_point_at_infinity() ? work_elements[i].set_infinity() : work_elements[i];

        }

    };

    parallel_for_range(num_points, execute_range);


    return work_elements;

}


template <typename Fq, typename Fr, typename T>


void element<Fq, Fr, T>::batch_normalize(element* elements, const size_t num_elements) noexcept

{

    std::vector<Fq> temporaries;

    temporaries.reserve(num_elements * 2);

    Fq accumulator = Fq::one();


    // Iterate over the points, computing the product of their z-coordinates.

    // At each iteration, store the currently-accumulated z-coordinate in `temporaries`

    for (size_t i = 0; i < num_elements; ++i) {

        temporaries.emplace_back(accumulator);

        if (!elements[i].is_point_at_infinity()) {

            accumulator *= elements[i].z;

        }

    }

    // For the rest of this method we refer to the product of all z-coordinates as the 'global' z-coordinate

    // Invert the global z-coordinate and store in `accumulator`

    accumulator = accumulator.invert();


    for (size_t i = num_elements - 1; i < num_elements; --i) {

        if (!elements[i].is_point_at_infinity()) {

            Fq z_inv = accumulator * temporaries[i];

            Fq zz_inv = z_inv.sqr();

            elements[i].x *= zz_inv;

            elements[i].y *= (zz_inv * z_inv);

            accumulator *= elements[i].z;

        }

        elements[i].z = Fq::one();

    }

}


template <typename Fq, typename Fr, typename T>

template <typename>


element<Fq, Fr, T> element<Fq, Fr, T>::random_coordinates_on_curve(numeric::RNG* engine) noexcept

{

    bool found_one = false;

    Fq yy;

    Fq x;

    Fq y;

    while (!found_one) {

        x = Fq::random_element(engine);

        yy = x.sqr() * x + T::b;

        if constexpr (T::has_a) {

            yy += (x * T::a);

        }

        auto [found_root, y1] = yy.sqrt();

        y = y1;

        found_one = found_root;

    }

    return { x, y, Fq::one() };

}


} // namespace bb::group_elements

// NOLINTEND(readability-implicit-bool-conversion, cppcoreguidelines-avoid-c-arrays)

assert.hpp

BB_ASSERT_EQ
#define BB_ASSERT_EQ(actual, expected,...)
Definition assert.hpp:83

bb_bench.hpp

BB_BENCH_TRACY_NAME
#define BB_BENCH_TRACY_NAME(name)
Definition bb_bench.hpp:256

BB_BENCH
#define BB_BENCH()
Definition bb_bench.hpp:268

bb::group_elements::affine_element
Definition affine_element.hpp:27

bb::group_elements::affine_element::is_point_at_infinity
constexpr bool is_point_at_infinity() const noexcept
Definition affine_element_impl.hpp:113

bb::group_elements::affine_element::self_set_infinity
constexpr void self_set_infinity() noexcept
Definition affine_element_impl.hpp:95

bb::group_elements::affine_element::x
Fq x
Definition affine_element.hpp:223

bb::group_elements::affine_element::y
Fq y
Definition affine_element.hpp:224

bb::group_elements::affine_element::one
static constexpr affine_element one() noexcept
Definition affine_element.hpp:52

bb::group_elements::element
element class. Implements ecc group arithmetic using Jacobian coordinates See https://hyperelliptic....
Definition element.hpp:35

bb::group_elements::element::operator*=
element operator*=(const Fr &exponent) noexcept
Definition element_impl.hpp:396

bb::group_elements::element::set_infinity
BB_INLINE constexpr element set_infinity() const noexcept
Definition element_impl.hpp:477

bb::group_elements::element::mul_with_endomorphism
element mul_with_endomorphism(const Fr &scalar) const noexcept
Definition element_impl.hpp:636

bb::group_elements::element::infinity
static element infinity()
Definition element_impl.hpp:470

bb::group_elements::element::y
Fq y
Definition element.hpp:144

bb::group_elements::element::batch_mul_with_endomorphism
static std::vector< affine_element< Fq, Fr, Params > > batch_mul_with_endomorphism(const std::span< const affine_element< Fq, Fr, Params > > &points, const Fr &scalar) noexcept
Multiply each point by the same scalar.
Definition element_impl.hpp:896

bb::group_elements::element::operator-=
constexpr element operator-=(const element &other) noexcept
Definition element_impl.hpp:369

bb::group_elements::element::operator-
constexpr element operator-() const noexcept
Definition element_impl.hpp:382

bb::group_elements::element::z
Fq z
Definition element.hpp:145

bb::group_elements::element::operator+
friend constexpr element operator+(const affine_element< Fq, Fr, Params > &left, const element &right) noexcept
Definition element.hpp:76

bb::group_elements::element::dbl
constexpr element dbl() const noexcept
Definition element_impl.hpp:151

bb::group_elements::element::normalize
constexpr element normalize() const noexcept
Definition element_impl.hpp:464

bb::group_elements::element::self_dbl
constexpr void self_dbl() noexcept
Definition element_impl.hpp:82

bb::group_elements::element::random_element
static element random_element(numeric::RNG *engine=nullptr) noexcept
Definition element_impl.hpp:563

bb::group_elements::element::batch_normalize
static void batch_normalize(element *elements, size_t num_elements) noexcept
Definition element_impl.hpp:1044

bb::group_elements::element::operator+=
constexpr element operator+=(const element &other) noexcept
Definition element_impl.hpp:273

bb::group_elements::element::batch_affine_add
static void batch_affine_add(const std::span< affine_element< Fq, Fr, Params > > &first_group, const std::span< affine_element< Fq, Fr, Params > > &second_group, const std::span< affine_element< Fq, Fr, Params > > &results) noexcept
Pairwise affine add points in first and second group.
Definition element_impl.hpp:861

bb::group_elements::element::mul_const_time
element mul_const_time(const Fr &scalar, numeric::RNG *engine=nullptr) const noexcept
Constant-time scalar multiplication intended for secret scalars (e.g. ECDSA / Schnorr nonces).
Definition element_impl.hpp:403

bb::group_elements::element::on_curve
BB_INLINE constexpr bool on_curve() const noexcept
Definition element_impl.hpp:515

bb::group_elements::element::operator==
BB_INLINE constexpr bool operator==(const element &other) const noexcept
Definition element_impl.hpp:536

bb::group_elements::element::operator*
element operator*(const Fr &exponent) const noexcept
Definition element_impl.hpp:388

bb::group_elements::element::x
Fq x
Definition element.hpp:143

bb::group_elements::element::element
element() noexcept=default

bb::group_elements::element::random_coordinates_on_curve
static element random_coordinates_on_curve(numeric::RNG *engine=nullptr) noexcept

bb::group_elements::element::mul_without_endomorphism
element mul_without_endomorphism(const Fr &scalar) const noexcept
Definition element_impl.hpp:580

bb::group_elements::element::operator=
constexpr element & operator=(const element &other) noexcept
Definition element_impl.hpp:46

bb::group_elements::element::self_set_infinity
BB_INLINE constexpr void self_set_infinity() noexcept
Definition element_impl.hpp:484

bb::group_elements::element::is_point_at_infinity
BB_INLINE constexpr bool is_point_at_infinity() const noexcept
Definition element_impl.hpp:504

bb::numeric::RNG
Definition engine.hpp:17

bb::numeric::uint256_t
Definition uint256.hpp:32

bb::numeric::uint256_t::get_bit
constexpr bool get_bit(uint64_t bit_index) const
Definition uint256_impl.hpp:366

bb::numeric::uint256_t::data
uint64_t data[4]
Definition uint256.hpp:219

bb::numeric::uint256_t::get_msb
constexpr uint64_t get_msb() const
Definition uint256_impl.hpp:376

bb::numeric::uintx< uint256_t >

bb::numeric::uintx::get_bit
bool get_bit(uint64_t bit_index) const
Definition uintx_impl.hpp:117

BB_UNUSED
#define BB_UNUSED
Definition compiler_hints.hpp:30

MemoryTagOptions::U1
@ U1

VariableRefMutationOptions::index
@ index

a
FF a
Definition field_gt.test.cpp:52

b
FF b
Definition field_gt.test.cpp:53

engine
numeric::RNG & engine
Definition eccvm_transcript.test.cpp:282

element.hpp

bb::group_elements::detail::EndoScalars
std::pair< std::array< uint64_t, 2 >, std::array< uint64_t, 2 > > EndoScalars
Definition element_impl.hpp:604

bb::group_elements
Definition affine_element.hpp:19

bb::group_elements::__attribute__
__attribute__((always_inline)) inline void batch_affine_add_impl(const AffineElement *lhs
Batch affine addition for parallel arrays: (lhs[i], rhs[i]) → rhs[i].

bb::group_elements::num_pairs
AffineElement const size_t num_pairs
Definition element_impl.hpp:709

bb::group_elements::temp_x
Fq temp_x
Definition element_impl.hpp:836

bb::group_elements::num_points
const size_t num_points
Definition element_impl.hpp:756

bb::group_elements::rhs
AffineElement * rhs
Definition element_impl.hpp:708

bb::group_elements::noexcept
AffineElement const size_t Fq *scratch_space noexcept
Definition element_impl.hpp:711

bb::group_elements::batch_inversion_accumulator
batch_inversion_accumulator
Definition element_impl.hpp:726

bb::numeric::uint512_t
uintx< uint256_t > uint512_t
Definition uintx.hpp:309

bb::numeric::get_randomness
RNG & get_randomness()
Definition engine.cpp:258

bb::stdlib::element
std::conditional_t< IsGoblinBigGroup< C, Fq, Fr, G >, element_goblin::goblin_element< C, goblin_field< C >, Fr, G >, element_default::element< C, Fq, Fr, G > > element
element wraps either element_default::element or element_goblin::goblin_element depending on parametr...
Definition biggroup.hpp:1023

bb::thread_heuristics::FF_COPY_COST
constexpr size_t FF_COPY_COST
Definition thread.hpp:144

bb::thread_heuristics::FF_ADDITION_COST
constexpr size_t FF_ADDITION_COST
Definition thread.hpp:132

bb::thread_heuristics::FF_MULTIPLICATION_COST
constexpr size_t FF_MULTIPLICATION_COST
Definition thread.hpp:134

bb::wnaf::fixed_wnaf
void fixed_wnaf(const uint64_t *scalar, uint64_t *wnaf, bool &skew_map, const uint64_t point_index, const uint64_t num_points, const size_t wnaf_bits) noexcept
Performs fixed-window non-adjacent form (WNAF) computation for scalar multiplication.
Definition wnaf.hpp:117

bb::operator*
Univariate< Fr, domain_end > operator*(const Fr &ff, const Univariate< Fr, domain_end > &uv)
Definition univariate.hpp:566

bb::parallel_for_heuristic
void parallel_for_heuristic(size_t num_points, const std::function< void(size_t, size_t, size_t)> &func, size_t heuristic_cost)
Split a loop into several loops running in parallel based on operations in 1 iteration.
Definition thread.cpp:171

bb::parallel_for_range
void parallel_for_range(size_t num_points, const std::function< void(size_t, size_t)> &func, size_t no_multhreading_if_less_or_equal)
Split a loop into several loops running in parallel.
Definition thread.cpp:141

std::get
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
Definition tuple.hpp:13

Fq
grumpkin::fq Fq
Definition schnorr.test.cpp:11

bb::field< Bn254FrParams >

bb::field< Bn254FrParams >::cube_root_of_unity
static constexpr field cube_root_of_unity()
Definition field_declarations.hpp:255

bb::field< Bn254FrParams >::one
static constexpr field one()
Definition field_declarations.hpp:279

bb::field< Bn254FrParams >::modulus
static constexpr uint256_t modulus
Definition field_declarations.hpp:234

bb::field< Bn254FrParams >::split_into_endomorphism_scalars
static void split_into_endomorphism_scalars(const field &k, field &k1, field &k2)
Full-width endomorphism decomposition: k ≡ k1 - k2·λ (mod r). Modifies the field elements k1 and k2.
Definition field_declarations.hpp:472

bb::field::self_sqr
BB_INLINE constexpr void self_sqr() &noexcept
Definition field_impl.hpp:87

bb::field::invert
constexpr field invert() const noexcept
Definition field_impl.hpp:386

bb::field::is_msb_set
BB_INLINE constexpr bool is_msb_set() const noexcept
Definition field_impl.hpp:743

bb::field< Bn254FrParams >::random_element
static field random_element(numeric::RNG *engine=nullptr) noexcept
Definition field_impl.hpp:777

bb::field::sqr
BB_INLINE constexpr field sqr() const noexcept
Definition field_impl.hpp:72

bb::field::data
uint64_t data[4]
Definition field_declarations.hpp:232

bb::field::is_zero
BB_INLINE constexpr bool is_zero() const noexcept
Definition field_impl.hpp:753

bb::field::from_montgomery_form
BB_INLINE constexpr field from_montgomery_form() const noexcept
Definition field_impl.hpp:292

bb::field< Bn254FrParams >::zero
static constexpr field zero()
Definition field_declarations.hpp:277

bb::group_elements::detail::EndomorphismWnaf
Handles the WNAF computation for scalars that are split using an endomorphism, achieved through split...
Definition element_impl.hpp:614

bb::group_elements::detail::EndomorphismWnaf::EndomorphismWnaf
EndomorphismWnaf(const EndoScalars &scalars)
Definition element_impl.hpp:626

bb::group_elements::detail::EndomorphismWnaf::table
std::array< uint64_t, NUM_ROUNDS *2 > table
Definition element_impl.hpp:618

bb::group_elements::detail::EndomorphismWnaf::endo_skew
bool endo_skew
Definition element_impl.hpp:621

bb::group_elements::detail::EndomorphismWnaf::skew
bool skew
Definition element_impl.hpp:620

bb::group_elements::detail::EndomorphismWnaf::NUM_WNAF_BITS
static constexpr size_t NUM_WNAF_BITS
Definition element_impl.hpp:616

thread.hpp

throw_or_abort
void throw_or_abort(std::string const &err)
Definition throw_or_abort.hpp:6