13#define CLEAR_FLAGS(empty_reg) \
14 "xorq " empty_reg ", " empty_reg " \n\t"
20#define LOAD_FIELD_ELEMENT(a, lolo, lohi, hilo, hihi) \
21 "movq 0(" a "), " lolo " \n\t" \
22 "movq 8(" a "), " lohi " \n\t" \
23 "movq 16(" a "), " hilo " \n\t" \
24 "movq 24(" a "), " hihi " \n\t"
31#define STORE_FIELD_ELEMENT(r, lolo, lohi, hilo, hihi) \
32 "movq " lolo ", 0(" r ") \n\t" \
33 "movq " lohi ", 8(" r ") \n\t" \
34 "movq " hilo ", 16(" r ") \n\t" \
35 "movq " hihi ", 24(" r ") \n\t"
37#if !defined(__ADX__) || defined(DISABLE_ADX)
43 "addq 0(" b "), %%r12 \n\t" \
44 "adcq 8(" b "), %%r13 \n\t" \
45 "adcq 16(" b "), %%r14 \n\t" \
46 "adcq 24(" b "), %%r15 \n\t"
53 "subq 0(" b "), %%r12 \n\t" \
54 "sbbq 8(" b "), %%r13 \n\t" \
55 "sbbq 16(" b "), %%r14 \n\t" \
56 "sbbq 24(" b "), %%r15 \n\t"
66#define ADD_REDUCE(b, twice_not_modulus_0, twice_not_modulus_1, twice_not_modulus_2, twice_not_modulus_3) \
67 "addq 0(" b "), %%r12 \n\t" \
68 "adcq 8(" b "), %%r13 \n\t" \
69 "adcq 16(" b "), %%r14 \n\t" \
70 "adcq 24(" b "), %%r15 \n\t" \
71 "movq %%r12, %%r8 \n\t" \
72 "movq %%r13, %%r9 \n\t" \
73 "movq %%r14, %%r10 \n\t" \
74 "movq %%r15, %%r11 \n\t" \
75 "addq " twice_not_modulus_0 ", %%r12 \n\t" \
76 "adcq " twice_not_modulus_1 ", %%r13 \n\t" \
77 "adcq " twice_not_modulus_2 ", %%r14 \n\t" \
78 "adcq " twice_not_modulus_3 ", %%r15 \n\t" \
79 "cmovncq %%r8, %%r12 \n\t" \
80 "cmovncq %%r9, %%r13 \n\t" \
81 "cmovncq %%r10, %%r14 \n\t" \
82 "cmovncq %%r11, %%r15 \n\t"
95#define CONDITIONAL_ADD(b_0, b_1, b_2, b_3) \
97 "movq %%r12, %%r8 \n\t" \
98 "movq %%r13, %%r9 \n\t" \
99 "movq %%r14, %%r10 \n\t" \
100 "movq %%r15, %%r11 \n\t" \
101 "addq " b_0 ", %%r12 \n\t" \
102 "adcq " b_1 ", %%r13 \n\t" \
103 "adcq " b_2 ", %%r14 \n\t" \
104 "adcq " b_3 ", %%r15 \n\t" \
107 "cmovncq %%r8, %%r12 \n\t" \
108 "cmovncq %%r9, %%r13 \n\t" \
109 "cmovncq %%r10, %%r14 \n\t" \
110 "cmovncq %%r11, %%r15 \n\t"
150#define MUL(a1, a2, a3, a4, b) \
155 "movq " a1 ", %%rdx \n\t" \
156 "xorq %%r8, %%r8 \n\t" \
159 "mulxq 8(" b "), %%r8, %%r9 \n\t" \
160 "mulxq 24(" b "), %%rdi, %%r12 \n\t" \
161 "mulxq 0(" b "), %%r13, %%r14 \n\t" \
162 "mulxq 16(" b "), %%r15, %%r10 \n\t" \
165 "movq %%r13, %%rdx \n\t" \
166 "mulxq %[r_inv], %%rdx, %%r11 \n\t" \
170 "addq %%r8, %%r14 \n\t" \
171 "adcq %%r9, %%r15 \n\t" \
172 "adcq %%rdi, %%r10 \n\t" \
173 "adcq $0, %%r12 \n\t" \
182 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
183 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
186 "addq %%r8, %%r13 \n\t" \
187 "adcq %%rdi, %%r14 \n\t" \
188 "adcq %%r11, %%r15 \n\t" \
189 "adcq $0, %%r10 \n\t" \
190 "adcq $0, %%r12 \n\t" \
193 "addq %%r9, %%r14 \n\t" \
194 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
195 "mulxq %[modulus_3], %%rdi, %%r11 \n\t" \
196 "adcq %%r8, %%r15 \n\t" \
197 "adcq %%rdi, %%r10 \n\t" \
198 "adcq %%r11, %%r12 \n\t" \
201 "addq %%r9, %%r10 \n\t" \
202 "adcq $0, %%r12 \n\t" \
210 "movq " a2 ", %%rdx \n\t" \
211 "mulxq 0(" b "), %%r8, %%r9 \n\t" \
212 "mulxq 8(" b "), %%rdi, %%r11 \n\t" \
215 "addq %%r8, %%r14 \n\t" \
216 "adcq %%rdi, %%r15 \n\t" \
217 "adcq %%r11, %%r10 \n\t" \
218 "adcq $0, %%r12 \n\t" \
221 "addq %%r9, %%r15 \n\t" \
222 "mulxq 16(" b "), %%r8, %%r9 \n\t" \
223 "mulxq 24(" b "), %%rdi, %%r13 \n\t" \
224 "adcq %%r8, %%r10 \n\t" \
225 "adcq %%rdi, %%r12 \n\t" \
226 "adcq $0, %%r13 \n\t" \
229 "addq %%r9, %%r12 \n\t" \
230 "adcq $0, %%r13 \n\t" \
239 "movq %%r14, %%rdx \n\t" \
240 "mulxq %[r_inv], %%rdx, %%r8 \n\t" \
241 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
242 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
245 "addq %%r8, %%r14 \n\t" \
246 "adcq %%rdi, %%r15 \n\t" \
247 "adcq %%r11, %%r10 \n\t" \
248 "adcq $0, %%r12 \n\t" \
249 "adcq $0, %%r13 \n\t" \
252 "addq %%r9, %%r15 \n\t" \
253 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
254 "mulxq %[modulus_3], %%rdi, %%r11 \n\t" \
255 "adcq %%r8, %%r10 \n\t" \
256 "adcq %%r9, %%r12 \n\t" \
257 "adcq %%r11, %%r13 \n\t" \
260 "addq %%rdi, %%r12 \n\t" \
261 "adcq $0, %%r13 \n\t" \
269 "movq " a3 ", %%rdx \n\t" \
270 "mulxq 0(" b "), %%r8, %%r9 \n\t" \
271 "mulxq 8(" b "), %%rdi, %%r11 \n\t" \
274 "addq %%r8, %%r15 \n\t" \
275 "adcq %%r9, %%r10 \n\t" \
276 "adcq %%r11, %%r12 \n\t" \
277 "adcq $0, %%r13 \n\t" \
280 "addq %%rdi, %%r10 \n\t" \
281 "mulxq 16(" b "), %%r8, %%r9 \n\t" \
282 "mulxq 24(" b "), %%rdi, %%r14 \n\t" \
283 "adcq %%r8, %%r12 \n\t" \
284 "adcq %%r9, %%r13 \n\t" \
285 "adcq $0, %%r14 \n\t" \
287 "addq %%rdi, %%r13 \n\t" \
288 "adcq $0, %%r14 \n\t" \
297 "movq %%r15, %%rdx \n\t" \
298 "mulxq %[r_inv], %%rdx, %%r8 \n\t" \
299 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
300 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
305 "addq %%r8, %%r15 \n\t" \
306 "adcq %%r9, %%r10 \n\t" \
307 "adcq %%r11, %%r12 \n\t" \
308 "adcq $0, %%r13 \n\t" \
309 "adcq $0, %%r14 \n\t" \
312 "addq %%rdi, %%r10 \n\t" \
313 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
314 "mulxq %[modulus_3], %%rdi, %%r11 \n\t" \
315 "adcq %%r8, %%r12 \n\t" \
316 "adcq %%r9, %%r13 \n\t" \
317 "adcq %%r11, %%r14 \n\t" \
320 "addq %%rdi, %%r13 \n\t" \
321 "adcq $0, %%r14 \n\t" \
330 "movq " a4 ", %%rdx \n\t" \
331 "mulxq 0(" b "), %%r8, %%r9 \n\t" \
332 "mulxq 8(" b "), %%rdi, %%r11 \n\t" \
335 "addq %%r8, %%r10 \n\t" \
336 "adcq %%r9, %%r12 \n\t" \
337 "adcq %%r11, %%r13 \n\t" \
338 "adcq $0, %%r14 \n\t" \
341 "addq %%rdi, %%r12 \n\t" \
342 "mulxq 16(" b "), %%r8, %%r9 \n\t" \
343 "mulxq 24(" b "), %%rdi, %%r15 \n\t" \
344 "adcq %%r8, %%r13 \n\t" \
345 "adcq %%r9, %%r14 \n\t" \
346 "adcq $0, %%r15 \n\t" \
348 "addq %%rdi, %%r14 \n\t" \
349 "adcq $0, %%r15 \n\t" \
358 "movq %%r10, %%rdx \n\t" \
359 "mulxq %[r_inv], %%rdx, %%r8 \n\t" \
360 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
361 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
364 "addq %%r8, %%r10 \n\t" \
365 "adcq %%r9, %%r12 \n\t" \
366 "adcq %%r11, %%r13 \n\t" \
367 "adcq $0, %%r14 \n\t" \
368 "adcq $0, %%r15 \n\t" \
371 "addq %%rdi, %%r12 \n\t" \
372 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
373 "mulxq %[modulus_3], %%rdi, %%rdx \n\t" \
374 "adcq %%r8, %%r13 \n\t" \
375 "adcq %%r9, %%r14 \n\t" \
376 "adcq %%rdx, %%r15 \n\t" \
379 "addq %%rdi, %%r14 \n\t" \
380 "adcq $0, %%r15 \n\t" \
392 "adcxq 0(" b "), %%r12 \n\t" \
393 "adcxq 8(" b "), %%r13 \n\t" \
394 "adcxq 16(" b "), %%r14 \n\t" \
395 "adcxq 24(" b "), %%r15 \n\t"
402 "subq 0(" b "), %%r12 \n\t" \
403 "sbbq 8(" b "), %%r13 \n\t" \
404 "sbbq 16(" b "), %%r14 \n\t" \
405 "sbbq 24(" b "), %%r15 \n\t"
411#define ADD_REDUCE(b, twice_not_modulus_0, twice_not_modulus_1, twice_not_modulus_2, twice_not_modulus_3) \
412 "adcxq 0(" b "), %%r12 \n\t" \
413 "movq %%r12, %%r8 \n\t" \
414 "adoxq " twice_not_modulus_0 ", %%r12 \n\t" \
415 "adcxq 8(" b "), %%r13 \n\t" \
416 "movq %%r13, %%r9 \n\t" \
417 "adoxq " twice_not_modulus_1 ", %%r13 \n\t" \
418 "adcxq 16(" b "), %%r14 \n\t" \
419 "movq %%r14, %%r10 \n\t" \
420 "adoxq " twice_not_modulus_2 ", %%r14 \n\t" \
421 "adcxq 24(" b "), %%r15 \n\t" \
422 "movq %%r15, %%r11 \n\t" \
423 "adoxq " twice_not_modulus_3 ", %%r15 \n\t" \
424 "cmovnoq %%r8, %%r12 \n\t" \
425 "cmovnoq %%r9, %%r13 \n\t" \
426 "cmovnoq %%r10, %%r14 \n\t" \
427 "cmovnoq %%r11, %%r15 \n\t"
438#define CONDITIONAL_ADD(b_0, b_1, b_2, b_3) \
440 "movq %%r12, %%r8 \n\t" \
441 "movq %%r13, %%r9 \n\t" \
442 "movq %%r14, %%r10 \n\t" \
443 "movq %%r15, %%r11 \n\t" \
444 "adoxq " b_0 ", %%r12 \n\t" \
445 "adoxq " b_1 ", %%r13 \n\t" \
446 "adoxq " b_2 ", %%r14 \n\t" \
447 "adoxq " b_3 ", %%r15 \n\t" \
450 "cmovnoq %%r8, %%r12 \n\t" \
451 "cmovnoq %%r9, %%r13 \n\t" \
452 "cmovnoq %%r10, %%r14 \n\t" \
453 "cmovnoq %%r11, %%r15 \n\t"
495 "movq 0(" a "), %%rdx \n\t" \
496 "xorq %%r8, %%r8 \n\t" \
499 "mulxq 8(" a "), %%r9, %%r10 \n\t" \
500 "mulxq 16(" a "), %%r8, %%r15 \n\t" \
501 "mulxq 24(" a "), %%r11, %%r12 \n\t" \
504 "adoxq %%r8, %%r10 \n\t" \
505 "adcxq %%r15, %%r11 \n\t" \
508 "movq 8(" a "), %%rdx \n\t" \
509 "mulxq 16(" a "), %%r8, %%r15 \n\t" \
510 "mulxq 24(" a "), %%rdi, %%rcx \n\t" \
513 "movq 24(" a "), %%rdx \n\t" \
514 "mulxq 16(" a "), %%r13, %%r14 \n\t" \
517 "adoxq %%r8, %%r11 \n\t" \
518 "adcxq %%rdi, %%r12 \n\t" \
519 "adoxq %%r15, %%r12 \n\t" \
520 "adcxq %%rcx, %%r13 \n\t" \
521 "adoxq %[zero_reference], %%r13 \n\t" \
522 "adcxq %[zero_reference], %%r14 \n\t" \
525 "adoxq %[zero_reference], %%r14 \n\t" \
542 "adoxq %%r9, %%r9 \n\t" \
543 "adcxq %%r12, %%r12 \n\t" \
544 "adoxq %%r10, %%r10 \n\t" \
545 "adcxq %%r13, %%r13 \n\t" \
546 "adoxq %%r11, %%r11 \n\t" \
547 "adcxq %%r14, %%r14 \n\t" \
565 "movq 0(" a "), %%rdx \n\t" \
566 "mulxq %%rdx, %%r8, %%rcx \n\t" \
567 "movq 16(" a "), %%rdx \n\t" \
568 "mulxq %%rdx, %%rdx, %%rdi \n\t" \
571 "adcxq %%rcx, %%r9 \n\t" \
572 "adoxq %%rdx, %%r12 \n\t" \
573 "adoxq %%rdi, %%r13 \n\t" \
574 "movq 24(" a "), %%rdx \n\t" \
575 "mulxq %%rdx, %%rcx, %%r15 \n\t" \
576 "movq 8(" a "), %%rdx \n\t" \
577 "mulxq %%rdx, %%rdi, %%rdx \n\t" \
578 "adcxq %%rdi, %%r10 \n\t" \
579 "adcxq %%rdx, %%r11 \n\t" \
580 "adoxq %%rcx, %%r14 \n\t" \
581 "adoxq %[zero_reference], %%r15 \n\t" \
617 "movq %%r8, %%rdx \n\t" \
618 "mulxq %[r_inv], %%rdx, %%rdi \n\t" \
619 "mulxq %[modulus_0], %%rdi, %%rcx \n\t" \
620 "adoxq %%rdi, %%r8 \n\t" \
621 "mulxq %[modulus_3], %%r8, %%rdi \n\t" \
622 "adcxq %%rdi, %%r12 \n\t" \
623 "adoxq %%rcx, %%r9 \n\t" \
624 "adcxq %[zero_reference], %%r13 \n\t" \
625 "adcxq %[zero_reference], %%r14 \n\t" \
626 "mulxq %[modulus_1], %%rdi, %%rcx \n\t" \
627 "adcxq %[zero_reference], %%r15 \n\t" \
629 "adoxq %%rcx, %%r10 \n\t" \
630 "adcxq %%rdi, %%r9 \n\t" \
631 "adoxq %%r8, %%r11 \n\t" \
632 "mulxq %[modulus_2], %%rdi, %%rcx \n\t" \
633 "adcxq %%rdi, %%r10 \n\t" \
634 "adcxq %%rcx, %%r11 \n\t" \
647 "movq %%r9, %%rdx \n\t" \
648 "mulxq %[r_inv], %%rdx, %%rdi \n\t" \
649 "mulxq %[modulus_2], %%rdi, %%rcx \n\t" \
650 "adoxq %%rcx, %%r12 \n\t" \
651 "mulxq %[modulus_3], %%r8, %%rcx \n\t" \
652 "adcxq %%r8, %%r12 \n\t" \
653 "adoxq %%rcx, %%r13 \n\t" \
654 "adcxq %[zero_reference], %%r13 \n\t" \
655 "adoxq %[zero_reference], %%r14 \n\t" \
656 "adcxq %[zero_reference], %%r14 \n\t" \
657 "adoxq %[zero_reference], %%r15 \n\t" \
658 "adcxq %[zero_reference], %%r15 \n\t" \
661 "mulxq %[modulus_0], %%r8, %%rcx \n\t" \
662 "adcxq %%r8, %%r9 \n\t" \
663 "adoxq %%rcx, %%r10 \n\t" \
664 "mulxq %[modulus_1], %%r8, %%rcx \n\t" \
665 "adcxq %%r8, %%r10 \n\t" \
666 "adoxq %%rcx, %%r11 \n\t" \
667 "adcxq %%rdi, %%r11 \n\t" \
680 "movq %%r10, %%rdx \n\t" \
681 "mulxq %[r_inv], %%rdx, %%rdi \n\t" \
682 "mulxq %[modulus_1], %%rdi, %%rcx \n\t" \
683 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
684 "adoxq %%rcx, %%r12 \n\t" \
685 "adcxq %%r8, %%r12 \n\t" \
686 "adoxq %%r9, %%r13 \n\t" \
687 "mulxq %[modulus_3], %%r8, %%r9 \n\t" \
688 "adcxq %%r8, %%r13 \n\t" \
689 "adoxq %%r9, %%r14 \n\t" \
690 "adcxq %[zero_reference], %%r14 \n\t" \
691 "adoxq %[zero_reference], %%r15 \n\t" \
692 "adcxq %[zero_reference], %%r15 \n\t" \
695 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
696 "adcxq %%r8, %%r10 \n\t" \
697 "adoxq %%r9, %%r11 \n\t" \
698 "adcxq %%rdi, %%r11 \n\t" \
699 "adoxq %[zero_reference], %%r12 \n\t" \
700 "adoxq %[zero_reference], %%r13 \n\t" \
715 "movq %%r11, %%rdx \n\t" \
716 "mulxq %[r_inv], %%rdx, %%rdi \n\t" \
717 "mulxq %[modulus_0], %%rdi, %%rcx \n\t" \
718 "mulxq %[modulus_1], %%r8, %%r9 \n\t" \
719 "adoxq %%rdi, %%r11 \n\t" \
720 "adcxq %%r8, %%r12 \n\t" \
721 "adoxq %%rcx, %%r12 \n\t" \
722 "adcxq %%r9, %%r13 \n\t" \
723 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
724 "mulxq %[modulus_3], %%r10, %%r11 \n\t" \
725 "adoxq %%r8, %%r13 \n\t" \
726 "adcxq %%r10, %%r14 \n\t" \
727 "adoxq %%r9, %%r14 \n\t" \
728 "adcxq %%r11, %%r15 \n\t" \
731 "adoxq %[zero_reference], %%r15 \n\t" \
770#define MUL(a1, a2, a3, a4, b) \
775 "movq " a1 ", %%rdx \n\t" \
776 "xorq %%r8, %%r8 \n\t" \
779 "mulxq 0(" b "), %%r13, %%r14 \n\t" \
780 "mulxq 8(" b "), %%r8, %%r9 \n\t" \
781 "mulxq 16(" b "), %%r15, %%r10 \n\t" \
782 "mulxq 24(" b "), %%rdi, %%r12 \n\t" \
785 "movq %%r13, %%rdx \n\t" \
786 "mulxq %[r_inv], %%rdx, %%r11 \n\t" \
805 "adcxq %%r8, %%r14 \n\t" \
806 "adoxq %%rdi, %%r10 \n\t" \
807 "adcxq %%r9, %%r15 \n\t" \
809 "mulxq %[modulus_3], %%rdi, %%r11 \n\t" \
810 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
811 "adcxq %%rdi, %%r10 \n\t" \
812 "adoxq %%r11, %%r12 \n\t" \
815 "adcxq %[zero_reference], %%r12 \n\t" \
817 "adoxq %%r8, %%r13 \n\t" \
818 "adcxq %%r9, %%r14 \n\t" \
819 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
820 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
821 "adoxq %%rdi, %%r14 \n\t" \
822 "adcxq %%r11, %%r15 \n\t" \
823 "adoxq %%r8, %%r15 \n\t" \
824 "adcxq %%r9, %%r10 \n\t" \
848 "movq " a2 ", %%rdx \n\t" \
849 "mulxq 16(" b "), %%r8, %%r9 \n\t" \
850 "mulxq 24(" b "), %%rdi, %%r13 \n\t" \
852 "adoxq %%r8, %%r10 \n\t" \
853 "adcxq %%rdi, %%r12 \n\t" \
854 "adoxq %%r9, %%r12 \n\t" \
855 "adcxq %[zero_reference], %%r13 \n\t" \
857 "adoxq %[zero_reference], %%r13 \n\t" \
859 "mulxq 0(" b "), %%r8, %%r9 \n\t" \
860 "mulxq 8(" b "), %%rdi, %%r11 \n\t" \
861 "adcxq %%r8, %%r14 \n\t" \
862 "adoxq %%r9, %%r15 \n\t" \
863 "adcxq %%rdi, %%r15 \n\t" \
864 "adoxq %%r11, %%r10 \n\t" \
867 "movq %%r14, %%rdx \n\t" \
868 "mulxq %[r_inv], %%rdx, %%r8 \n\t" \
869 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
870 "mulxq %[modulus_3], %%rdi, %%r11 \n\t" \
871 "adcxq %%r8, %%r10 \n\t" \
872 "adoxq %%r9, %%r12 \n\t" \
873 "adcxq %%rdi, %%r12 \n\t" \
874 "adoxq %%r11, %%r13 \n\t" \
875 "adcxq %[zero_reference], %%r13 \n\t" \
877 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
878 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
879 "adoxq %%r8, %%r14 \n\t" \
880 "adcxq %%rdi, %%r15 \n\t" \
881 "adoxq %%r9, %%r15 \n\t" \
882 "adcxq %%r11, %%r10 \n\t" \
906 "movq " a3 ", %%rdx \n\t" \
907 "mulxq 8(" b "), %%rdi, %%r11 \n\t" \
908 "mulxq 16(" b "), %%r8, %%r9 \n\t" \
910 "adoxq %%rdi, %%r10 \n\t" \
911 "adcxq %%r11, %%r12 \n\t" \
912 "adoxq %%r8, %%r12 \n\t" \
913 "adcxq %%r9, %%r13 \n\t" \
914 "mulxq 24(" b "), %%rdi, %%r14 \n\t" \
915 "mulxq 0(" b "), %%r8, %%r9 \n\t" \
916 "adoxq %%rdi, %%r13 \n\t" \
917 "adcxq %[zero_reference], %%r14 \n\t" \
919 "adoxq %[zero_reference], %%r14 \n\t" \
921 "adcxq %%r8, %%r15 \n\t" \
922 "adoxq %%r9, %%r10 \n\t" \
925 "movq %%r15, %%rdx \n\t" \
926 "mulxq %[r_inv], %%rdx, %%r8 \n\t" \
927 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
928 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
929 "adcxq %%rdi, %%r10 \n\t" \
930 "adoxq %%r11, %%r12 \n\t" \
931 "adcxq %%r8, %%r12 \n\t" \
932 "adoxq %%r9, %%r13 \n\t" \
933 "mulxq %[modulus_3], %%rdi, %%r11 \n\t" \
934 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
935 "adcxq %%rdi, %%r13 \n\t" \
936 "adoxq %%r11, %%r14 \n\t" \
937 "adcxq %[zero_reference], %%r14 \n\t" \
939 "adoxq %%r8, %%r15 \n\t" \
940 "adcxq %%r9, %%r10 \n\t" \
950 "movq " a4 ", %%rdx \n\t" \
951 "mulxq 0(" b "), %%r8, %%r9 \n\t" \
952 "mulxq 8(" b "), %%rdi, %%r11 \n\t" \
954 "adoxq %%r8, %%r10 \n\t" \
955 "adcxq %%r9, %%r12 \n\t" \
956 "adoxq %%rdi, %%r12 \n\t" \
957 "adcxq %%r11, %%r13 \n\t" \
959 "mulxq 16(" b "), %%r8, %%r9 \n\t" \
960 "mulxq 24(" b "), %%rdi, %%r15 \n\t" \
961 "adoxq %%r8, %%r13 \n\t" \
962 "adcxq %%r9, %%r14 \n\t" \
963 "adoxq %%rdi, %%r14 \n\t" \
964 "adcxq %[zero_reference], %%r15 \n\t" \
966 "adoxq %[zero_reference], %%r15 \n\t" \
977 "movq %%r10, %%rdx \n\t" \
978 "mulxq %[r_inv], %%rdx, %%r8 \n\t" \
979 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
980 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
982 "adoxq %%r8, %%r10 \n\t" \
983 "adcxq %%r9, %%r12 \n\t" \
984 "adoxq %%rdi, %%r12 \n\t" \
985 "adcxq %%r11, %%r13 \n\t" \
987 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
988 "mulxq %[modulus_3], %%rdi, %%rdx \n\t" \
989 "adoxq %%r8, %%r13 \n\t" \
990 "adcxq %%r9, %%r14 \n\t" \
991 "adoxq %%rdi, %%r14 \n\t" \
992 "adcxq %%rdx, %%r15 \n\t" \
994 "adoxq %[zero_reference], %%r15 \n\t" \