9#include "../bitop/get_msb.hpp"
17 const uint32_t a_lo =
a & 0xffffULL;
18 const uint32_t a_hi =
a >> 16ULL;
19 const uint32_t b_lo =
b & 0xffffULL;
20 const uint32_t b_hi =
b >> 16ULL;
22 const uint32_t lo_lo = a_lo * b_lo;
23 const uint32_t hi_lo = a_hi * b_lo;
24 const uint32_t lo_hi = a_lo * b_hi;
25 const uint32_t hi_hi = a_hi * b_hi;
27 const uint32_t cross = (lo_lo >> 16) + (hi_lo & 0xffffULL) + lo_hi;
29 return { (cross << 16ULL) | (lo_lo & 0xffffULL), (hi_lo >> 16ULL) + (cross >> 16ULL) + hi_hi };
35 const uint32_t
sum =
a +
b;
36 const auto carry_temp =
static_cast<uint32_t
>(
sum <
a);
37 const uint32_t r =
sum + carry_in;
38 const uint32_t carry_out = carry_temp +
static_cast<unsigned int>(r < carry_in);
39 return { r, carry_out };
42constexpr uint32_t uint128_t::addc_discard_hi(
const uint32_t
a,
const uint32_t
b,
const uint32_t carry_in)
44 return a +
b + carry_in;
49 const uint32_t t_1 =
a - (borrow_in >> 31ULL);
50 const auto borrow_temp_1 =
static_cast<uint32_t
>(t_1 >
a);
51 const uint32_t t_2 = t_1 -
b;
52 const auto borrow_temp_2 =
static_cast<uint32_t
>(t_2 > t_1);
54 return { t_2, 0ULL - (borrow_temp_1 | borrow_temp_2) };
57constexpr uint32_t uint128_t::sbb_discard_hi(
const uint32_t
a,
const uint32_t
b,
const uint32_t borrow_in)
59 return a -
b - (borrow_in >> 31ULL);
66 const uint32_t carry_in)
70 const auto overflow_c =
static_cast<uint32_t
>(result.first <
a);
71 result.first += carry_in;
72 const auto overflow_carry =
static_cast<uint32_t
>(result.first < carry_in);
73 result.second += (overflow_c + overflow_carry);
77constexpr uint32_t uint128_t::mac_discard_hi(
const uint32_t
a,
80 const uint32_t carry_in)
82 return (
b * c +
a + carry_in);
106 uint64_t bit_difference =
get_msb() -
b.get_msb();
112 if (divisor > remainder) {
119 while (remainder >=
b) {
123 if (remainder >= divisor) {
124 remainder -= divisor;
127 quotient |= accumulator;
133 return { quotient, remainder };
138 const auto [r0, t0] = mul_wide(
data[0], other.data[0]);
139 const auto [q0, t1] = mac(t0,
data[0], other.data[1], 0);
140 const auto [q1, t2] = mac(t1,
data[0], other.data[2], 0);
141 const auto [q2, z0] = mac(t2,
data[0], other.data[3], 0);
143 const auto [r1, t3] = mac(q0,
data[1], other.data[0], 0);
144 const auto [q3, t4] = mac(q1,
data[1], other.data[1], t3);
145 const auto [q4, t5] = mac(q2,
data[1], other.data[2], t4);
146 const auto [q5, z1] = mac(z0,
data[1], other.data[3], t5);
148 const auto [r2, t6] = mac(q3,
data[2], other.data[0], 0);
149 const auto [q6, t7] = mac(q4,
data[2], other.data[1], t6);
150 const auto [q7, t8] = mac(q5,
data[2], other.data[2], t7);
151 const auto [q8, z2] = mac(z1,
data[2], other.data[3], t8);
153 const auto [r3, t9] = mac(q6,
data[3], other.data[0], 0);
154 const auto [r4, t10] = mac(q7,
data[3], other.data[1], t9);
155 const auto [r5, t11] = mac(q8,
data[3], other.data[2], t10);
156 const auto [r6, r7] = mac(z2,
data[3], other.data[3], t11);
168constexpr uint128_t uint128_t::slice(
const uint64_t start,
const uint64_t end)
const
172 assert(start <= end);
173 const uint64_t range = end - start;
175 return ((*
this) >> start) & mask;
182 const uint64_t maximum_set_bit = exponent.get_msb();
184 for (
int i =
static_cast<int>(maximum_set_bit) - 1; i >= 0; --i) {
185 accumulator *= accumulator;
186 if (exponent.get_bit(
static_cast<uint64_t
>(i))) {
187 accumulator *= to_mul;
198constexpr bool uint128_t::get_bit(
const uint64_t bit_index)
const
201 if (bit_index > 127) {
204 const auto idx =
static_cast<size_t>(bit_index >> 5);
205 const size_t shift = bit_index & 31;
206 return static_cast<bool>((
data[idx] >> shift) & 1);
209constexpr uint64_t uint128_t::get_msb()
const
220 const auto [r0, t0] = addc(
data[0], other.data[0], 0);
221 const auto [r1, t1] = addc(
data[1], other.data[1], t0);
222 const auto [r2, t2] = addc(
data[2], other.data[2], t1);
223 const auto r3 = addc_discard_hi(
data[3], other.data[3], t2);
224 return { r0, r1, r2, r3 };
230 const auto [r0, t0] = sbb(
data[0], other.data[0], 0);
231 const auto [r1, t1] = sbb(
data[1], other.data[1], t0);
232 const auto [r2, t2] = sbb(
data[2], other.data[2], t1);
233 const auto r3 = sbb_discard_hi(
data[3], other.data[3], t2);
234 return { r0, r1, r2, r3 };
237constexpr uint128_t uint128_t::operator-()
const
244 const auto [r0, t0] = mac(0,
data[0], other.data[0], 0ULL);
245 const auto [q0, t1] = mac(0,
data[0], other.data[1], t0);
246 const auto [q1, t2] = mac(0,
data[0], other.data[2], t1);
247 const auto q2 = mac_discard_hi(0,
data[0], other.data[3], t2);
249 const auto [r1, t3] = mac(q0,
data[1], other.data[0], 0ULL);
250 const auto [q3, t4] = mac(q1,
data[1], other.data[1], t3);
251 const auto q4 = mac_discard_hi(q2,
data[1], other.data[2], t4);
253 const auto [r2, t5] = mac(q3,
data[2], other.data[0], 0ULL);
254 const auto q5 = mac_discard_hi(q4,
data[2], other.data[1], t5);
256 const auto r3 = mac_discard_hi(q5,
data[3], other.data[0], 0ULL);
258 return { r0, r1, r2, r3 };
263 return divmod(other).first;
268 return divmod(other).second;
273 return {
data[0] & other.data[0],
data[1] & other.data[1],
data[2] & other.data[2],
data[3] & other.data[3] };
278 return {
data[0] ^ other.data[0],
data[1] ^ other.data[1],
data[2] ^ other.data[2],
data[3] ^ other.data[3] };
283 return {
data[0] | other.data[0],
data[1] | other.data[1],
data[2] | other.data[2],
data[3] | other.data[3] };
286constexpr uint128_t uint128_t::operator~()
const
288 return { ~data[0], ~data[1], ~data[2], ~data[3] };
291constexpr bool uint128_t::operator==(
const uint128_t& other)
const
293 return data[0] == other.data[0] &&
data[1] == other.data[1] &&
data[2] == other.data[2] &&
data[3] == other.data[3];
296constexpr bool uint128_t::operator!=(
const uint128_t& other)
const
298 return !(*
this == other);
301constexpr bool uint128_t::operator!()
const
306constexpr bool uint128_t::operator>(
const uint128_t& other)
const
308 bool t0 =
data[3] > other.data[3];
309 bool t1 =
data[3] == other.data[3] &&
data[2] > other.data[2];
310 bool t2 =
data[3] == other.data[3] &&
data[2] == other.data[2] &&
data[1] > other.data[1];
312 data[3] == other.data[3] &&
data[2] == other.data[2] &&
data[1] == other.data[1] &&
data[0] > other.data[0];
313 return t0 || t1 || t2 || t3;
316constexpr bool uint128_t::operator>=(
const uint128_t& other)
const
318 return (*
this > other) || (*
this == other);
321constexpr bool uint128_t::operator<(
const uint128_t& other)
const
323 return other > *
this;
326constexpr bool uint128_t::operator<=(
const uint128_t& other)
const
328 return (*
this < other) || (*
this == other);
333 uint32_t total_shift = other.data[0];
335 if (total_shift >= 128 || (other.data[1] != 0U) || (other.data[2] != 0U) || (other.data[3] != 0U)) {
339 if (total_shift == 0) {
343 uint32_t num_shifted_limbs = total_shift >> 5ULL;
344 uint32_t limb_shift = total_shift & 31ULL;
346 std::array<uint32_t, 4> shifted_limbs = { 0, 0, 0, 0 };
348 if (limb_shift == 0) {
349 shifted_limbs[0] =
data[0];
350 shifted_limbs[1] =
data[1];
351 shifted_limbs[2] =
data[2];
352 shifted_limbs[3] =
data[3];
354 uint32_t remainder_shift = 32ULL - limb_shift;
356 shifted_limbs[3] =
data[3] >> limb_shift;
358 uint32_t remainder = (
data[3]) << remainder_shift;
360 shifted_limbs[2] = (
data[2] >> limb_shift) + remainder;
362 remainder = (
data[2]) << remainder_shift;
364 shifted_limbs[1] = (
data[1] >> limb_shift) + remainder;
366 remainder = (
data[1]) << remainder_shift;
368 shifted_limbs[0] = (
data[0] >> limb_shift) + remainder;
372 for (
size_t i = 0; i < 4 - num_shifted_limbs; ++i) {
373 result.data[i] = shifted_limbs[
static_cast<size_t>(i + num_shifted_limbs)];
381 uint32_t total_shift = other.data[0];
383 if (total_shift >= 128 || (other.data[1] != 0U) || (other.data[2] != 0U) || (other.data[3] != 0U)) {
387 if (total_shift == 0) {
390 uint32_t num_shifted_limbs = total_shift >> 5ULL;
391 uint32_t limb_shift = total_shift & 31ULL;
393 std::array<uint32_t, 4> shifted_limbs{ 0, 0, 0, 0 };
395 if (limb_shift == 0) {
396 shifted_limbs[0] =
data[0];
397 shifted_limbs[1] =
data[1];
398 shifted_limbs[2] =
data[2];
399 shifted_limbs[3] =
data[3];
401 uint32_t remainder_shift = 32ULL - limb_shift;
403 shifted_limbs[0] =
data[0] << limb_shift;
405 uint32_t remainder =
data[0] >> remainder_shift;
407 shifted_limbs[1] = (
data[1] << limb_shift) + remainder;
409 remainder =
data[1] >> remainder_shift;
411 shifted_limbs[2] = (
data[2] << limb_shift) + remainder;
413 remainder =
data[2] >> remainder_shift;
415 shifted_limbs[3] = (
data[3] << limb_shift) + remainder;
419 for (
size_t i = 0; i < 4 - num_shifted_limbs; ++i) {
420 result.data[
static_cast<size_t>(i + num_shifted_limbs)] = shifted_limbs[i];
#define BB_ASSERT(expression,...)
const std::vector< MemoryValue > data
constexpr uint64_t get_msb64(const uint64_t in)
constexpr T get_msb(const T in)
Inner sum(Cont< Inner, Args... > const &in)
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
unsigned __int128 uint128_t
void throw_or_abort(std::string const &err)